{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0191822311963654, "eval_steps": 500, "global_step": 44000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.589050525446285e-05, "grad_norm": 1.3403533697128296, "learning_rate": 7.645259938837922e-09, "loss": 0.5188, "step": 1 }, { "epoch": 9.17810105089257e-05, "grad_norm": 1.7611314058303833, "learning_rate": 1.5290519877675843e-08, "loss": 0.5457, "step": 2 }, { "epoch": 0.00013767151576338856, "grad_norm": 0.9028812050819397, "learning_rate": 2.2935779816513765e-08, "loss": 0.4036, "step": 3 }, { "epoch": 0.0001835620210178514, "grad_norm": 0.4214492440223694, "learning_rate": 3.0581039755351686e-08, "loss": 0.2921, "step": 4 }, { "epoch": 0.00022945252627231425, "grad_norm": 0.8581069707870483, "learning_rate": 3.8226299694189604e-08, "loss": 0.4235, "step": 5 }, { "epoch": 0.0002753430315267771, "grad_norm": 0.9656532406806946, "learning_rate": 4.587155963302753e-08, "loss": 0.4195, "step": 6 }, { "epoch": 0.00032123353678124, "grad_norm": 2.2578439712524414, "learning_rate": 5.351681957186545e-08, "loss": 0.6178, "step": 7 }, { "epoch": 0.0003671240420357028, "grad_norm": 2.229050636291504, "learning_rate": 6.116207951070337e-08, "loss": 0.5685, "step": 8 }, { "epoch": 0.0004130145472901657, "grad_norm": 1.2387725114822388, "learning_rate": 6.88073394495413e-08, "loss": 0.5159, "step": 9 }, { "epoch": 0.0004589050525446285, "grad_norm": 1.74851393699646, "learning_rate": 7.645259938837921e-08, "loss": 0.5221, "step": 10 }, { "epoch": 0.0005047955577990914, "grad_norm": 3.0931506156921387, "learning_rate": 8.409785932721712e-08, "loss": 0.7711, "step": 11 }, { "epoch": 0.0005506860630535542, "grad_norm": 0.7829667925834656, "learning_rate": 9.174311926605506e-08, "loss": 0.4439, "step": 12 }, { "epoch": 0.0005965765683080171, "grad_norm": 2.237008571624756, "learning_rate": 9.938837920489297e-08, "loss": 0.5605, "step": 13 }, { "epoch": 0.00064246707356248, "grad_norm": 1.2005207538604736, "learning_rate": 1.070336391437309e-07, "loss": 0.5013, "step": 14 }, { "epoch": 0.0006883575788169427, "grad_norm": 1.6844494342803955, "learning_rate": 1.1467889908256882e-07, "loss": 0.551, "step": 15 }, { "epoch": 0.0007342480840714056, "grad_norm": 1.7097495794296265, "learning_rate": 1.2232415902140674e-07, "loss": 0.5089, "step": 16 }, { "epoch": 0.0007801385893258685, "grad_norm": 1.409144639968872, "learning_rate": 1.2996941896024464e-07, "loss": 0.4896, "step": 17 }, { "epoch": 0.0008260290945803313, "grad_norm": 3.0614712238311768, "learning_rate": 1.376146788990826e-07, "loss": 0.7174, "step": 18 }, { "epoch": 0.0008719195998347942, "grad_norm": 0.8959612250328064, "learning_rate": 1.452599388379205e-07, "loss": 0.4232, "step": 19 }, { "epoch": 0.000917810105089257, "grad_norm": 1.8896371126174927, "learning_rate": 1.5290519877675842e-07, "loss": 0.5492, "step": 20 }, { "epoch": 0.0009637006103437199, "grad_norm": 2.0700249671936035, "learning_rate": 1.6055045871559634e-07, "loss": 0.601, "step": 21 }, { "epoch": 0.0010095911155981827, "grad_norm": 2.7334303855895996, "learning_rate": 1.6819571865443424e-07, "loss": 0.6509, "step": 22 }, { "epoch": 0.0010554816208526456, "grad_norm": 1.7926756143569946, "learning_rate": 1.758409785932722e-07, "loss": 0.5735, "step": 23 }, { "epoch": 0.0011013721261071085, "grad_norm": 2.588320255279541, "learning_rate": 1.8348623853211012e-07, "loss": 0.6498, "step": 24 }, { "epoch": 0.0011472626313615713, "grad_norm": 0.9466708302497864, "learning_rate": 1.9113149847094802e-07, "loss": 0.4235, "step": 25 }, { "epoch": 0.0011931531366160342, "grad_norm": 2.035421371459961, "learning_rate": 1.9877675840978594e-07, "loss": 0.5886, "step": 26 }, { "epoch": 0.001239043641870497, "grad_norm": 1.3734371662139893, "learning_rate": 2.064220183486239e-07, "loss": 0.5093, "step": 27 }, { "epoch": 0.00128493414712496, "grad_norm": 2.2274961471557617, "learning_rate": 2.140672782874618e-07, "loss": 0.6198, "step": 28 }, { "epoch": 0.0013308246523794226, "grad_norm": 0.7679215669631958, "learning_rate": 2.2171253822629971e-07, "loss": 0.4081, "step": 29 }, { "epoch": 0.0013767151576338855, "grad_norm": 2.2280468940734863, "learning_rate": 2.2935779816513764e-07, "loss": 0.5711, "step": 30 }, { "epoch": 0.0014226056628883483, "grad_norm": 1.821223258972168, "learning_rate": 2.3700305810397554e-07, "loss": 0.5237, "step": 31 }, { "epoch": 0.0014684961681428112, "grad_norm": 1.4683890342712402, "learning_rate": 2.446483180428135e-07, "loss": 0.5886, "step": 32 }, { "epoch": 0.001514386673397274, "grad_norm": 1.8166035413742065, "learning_rate": 2.522935779816514e-07, "loss": 0.5846, "step": 33 }, { "epoch": 0.001560277178651737, "grad_norm": 2.1721227169036865, "learning_rate": 2.599388379204893e-07, "loss": 0.5962, "step": 34 }, { "epoch": 0.0016061676839061998, "grad_norm": 2.8068950176239014, "learning_rate": 2.6758409785932726e-07, "loss": 0.7705, "step": 35 }, { "epoch": 0.0016520581891606627, "grad_norm": 0.9062060117721558, "learning_rate": 2.752293577981652e-07, "loss": 0.3827, "step": 36 }, { "epoch": 0.0016979486944151256, "grad_norm": 2.020232677459717, "learning_rate": 2.8287461773700306e-07, "loss": 0.6352, "step": 37 }, { "epoch": 0.0017438391996695884, "grad_norm": 0.4772844612598419, "learning_rate": 2.90519877675841e-07, "loss": 0.3533, "step": 38 }, { "epoch": 0.0017897297049240513, "grad_norm": 1.5592962503433228, "learning_rate": 2.9816513761467896e-07, "loss": 0.5481, "step": 39 }, { "epoch": 0.001835620210178514, "grad_norm": 1.48556387424469, "learning_rate": 3.0581039755351683e-07, "loss": 0.519, "step": 40 }, { "epoch": 0.0018815107154329768, "grad_norm": 1.432593584060669, "learning_rate": 3.1345565749235476e-07, "loss": 0.5216, "step": 41 }, { "epoch": 0.0019274012206874397, "grad_norm": 1.971846580505371, "learning_rate": 3.211009174311927e-07, "loss": 0.5861, "step": 42 }, { "epoch": 0.001973291725941903, "grad_norm": 1.8548887968063354, "learning_rate": 3.287461773700306e-07, "loss": 0.5152, "step": 43 }, { "epoch": 0.0020191822311963654, "grad_norm": 1.3518904447555542, "learning_rate": 3.363914373088685e-07, "loss": 0.4928, "step": 44 }, { "epoch": 0.0020650727364508285, "grad_norm": 1.132017970085144, "learning_rate": 3.4403669724770646e-07, "loss": 0.4917, "step": 45 }, { "epoch": 0.002110963241705291, "grad_norm": 1.535001516342163, "learning_rate": 3.516819571865444e-07, "loss": 0.4899, "step": 46 }, { "epoch": 0.002156853746959754, "grad_norm": 0.6526742577552795, "learning_rate": 3.5932721712538226e-07, "loss": 0.4051, "step": 47 }, { "epoch": 0.002202744252214217, "grad_norm": 1.3923543691635132, "learning_rate": 3.6697247706422023e-07, "loss": 0.5483, "step": 48 }, { "epoch": 0.0022486347574686796, "grad_norm": 0.6318294405937195, "learning_rate": 3.7461773700305816e-07, "loss": 0.406, "step": 49 }, { "epoch": 0.0022945252627231427, "grad_norm": 2.7209126949310303, "learning_rate": 3.8226299694189603e-07, "loss": 0.687, "step": 50 }, { "epoch": 0.0023404157679776053, "grad_norm": 1.347872018814087, "learning_rate": 3.89908256880734e-07, "loss": 0.5432, "step": 51 }, { "epoch": 0.0023863062732320684, "grad_norm": 1.8943320512771606, "learning_rate": 3.975535168195719e-07, "loss": 0.6615, "step": 52 }, { "epoch": 0.002432196778486531, "grad_norm": 0.7322803139686584, "learning_rate": 4.051987767584098e-07, "loss": 0.4356, "step": 53 }, { "epoch": 0.002478087283740994, "grad_norm": 1.2317309379577637, "learning_rate": 4.128440366972478e-07, "loss": 0.547, "step": 54 }, { "epoch": 0.002523977788995457, "grad_norm": 2.214301586151123, "learning_rate": 4.2048929663608565e-07, "loss": 0.5578, "step": 55 }, { "epoch": 0.00256986829424992, "grad_norm": 1.2360845804214478, "learning_rate": 4.281345565749236e-07, "loss": 0.5632, "step": 56 }, { "epoch": 0.0026157587995043825, "grad_norm": 2.4533538818359375, "learning_rate": 4.357798165137615e-07, "loss": 0.6488, "step": 57 }, { "epoch": 0.002661649304758845, "grad_norm": 2.7331459522247314, "learning_rate": 4.4342507645259943e-07, "loss": 0.6527, "step": 58 }, { "epoch": 0.0027075398100133083, "grad_norm": 1.1070164442062378, "learning_rate": 4.510703363914373e-07, "loss": 0.4592, "step": 59 }, { "epoch": 0.002753430315267771, "grad_norm": 0.45862826704978943, "learning_rate": 4.587155963302753e-07, "loss": 0.3563, "step": 60 }, { "epoch": 0.002799320820522234, "grad_norm": 1.9747591018676758, "learning_rate": 4.663608562691132e-07, "loss": 0.6012, "step": 61 }, { "epoch": 0.0028452113257766967, "grad_norm": 1.7752922773361206, "learning_rate": 4.740061162079511e-07, "loss": 0.6039, "step": 62 }, { "epoch": 0.0028911018310311598, "grad_norm": 0.45932909846305847, "learning_rate": 4.816513761467891e-07, "loss": 0.339, "step": 63 }, { "epoch": 0.0029369923362856224, "grad_norm": 1.701511263847351, "learning_rate": 4.89296636085627e-07, "loss": 0.6563, "step": 64 }, { "epoch": 0.0029828828415400855, "grad_norm": 1.5318350791931152, "learning_rate": 4.969418960244648e-07, "loss": 0.5504, "step": 65 }, { "epoch": 0.003028773346794548, "grad_norm": 2.573740243911743, "learning_rate": 5.045871559633028e-07, "loss": 0.7237, "step": 66 }, { "epoch": 0.0030746638520490113, "grad_norm": 1.351897120475769, "learning_rate": 5.122324159021407e-07, "loss": 0.5439, "step": 67 }, { "epoch": 0.003120554357303474, "grad_norm": 1.41916024684906, "learning_rate": 5.198776758409786e-07, "loss": 0.5839, "step": 68 }, { "epoch": 0.0031664448625579366, "grad_norm": 0.7206918597221375, "learning_rate": 5.275229357798165e-07, "loss": 0.4373, "step": 69 }, { "epoch": 0.0032123353678123997, "grad_norm": 1.529764175415039, "learning_rate": 5.351681957186545e-07, "loss": 0.5101, "step": 70 }, { "epoch": 0.0032582258730668623, "grad_norm": 0.5984582304954529, "learning_rate": 5.428134556574924e-07, "loss": 0.3715, "step": 71 }, { "epoch": 0.0033041163783213254, "grad_norm": 0.830768346786499, "learning_rate": 5.504587155963304e-07, "loss": 0.4262, "step": 72 }, { "epoch": 0.003350006883575788, "grad_norm": 0.5834783315658569, "learning_rate": 5.581039755351682e-07, "loss": 0.4524, "step": 73 }, { "epoch": 0.003395897388830251, "grad_norm": 0.6707944273948669, "learning_rate": 5.657492354740061e-07, "loss": 0.3892, "step": 74 }, { "epoch": 0.003441787894084714, "grad_norm": 0.5306781530380249, "learning_rate": 5.733944954128441e-07, "loss": 0.3801, "step": 75 }, { "epoch": 0.003487678399339177, "grad_norm": 0.5925319194793701, "learning_rate": 5.81039755351682e-07, "loss": 0.4101, "step": 76 }, { "epoch": 0.0035335689045936395, "grad_norm": 0.5854651927947998, "learning_rate": 5.8868501529052e-07, "loss": 0.4071, "step": 77 }, { "epoch": 0.0035794594098481026, "grad_norm": 1.4466856718063354, "learning_rate": 5.963302752293579e-07, "loss": 0.5946, "step": 78 }, { "epoch": 0.0036253499151025653, "grad_norm": 2.0078835487365723, "learning_rate": 6.039755351681958e-07, "loss": 0.6802, "step": 79 }, { "epoch": 0.003671240420357028, "grad_norm": 0.6159325838088989, "learning_rate": 6.116207951070337e-07, "loss": 0.3883, "step": 80 }, { "epoch": 0.003717130925611491, "grad_norm": 0.44337207078933716, "learning_rate": 6.192660550458716e-07, "loss": 0.2945, "step": 81 }, { "epoch": 0.0037630214308659537, "grad_norm": 0.550020158290863, "learning_rate": 6.269113149847095e-07, "loss": 0.3542, "step": 82 }, { "epoch": 0.0038089119361204168, "grad_norm": 1.473622441291809, "learning_rate": 6.345565749235474e-07, "loss": 0.6035, "step": 83 }, { "epoch": 0.0038548024413748794, "grad_norm": 1.674747347831726, "learning_rate": 6.422018348623854e-07, "loss": 0.535, "step": 84 }, { "epoch": 0.0039006929466293425, "grad_norm": 1.0093748569488525, "learning_rate": 6.498470948012232e-07, "loss": 0.5078, "step": 85 }, { "epoch": 0.003946583451883806, "grad_norm": 0.6665521264076233, "learning_rate": 6.574923547400612e-07, "loss": 0.3994, "step": 86 }, { "epoch": 0.003992473957138268, "grad_norm": 1.0105335712432861, "learning_rate": 6.651376146788992e-07, "loss": 0.4706, "step": 87 }, { "epoch": 0.004038364462392731, "grad_norm": 0.9650800824165344, "learning_rate": 6.72782874617737e-07, "loss": 0.4861, "step": 88 }, { "epoch": 0.004084254967647194, "grad_norm": 0.7468169331550598, "learning_rate": 6.804281345565749e-07, "loss": 0.4496, "step": 89 }, { "epoch": 0.004130145472901657, "grad_norm": 0.9022457003593445, "learning_rate": 6.880733944954129e-07, "loss": 0.5108, "step": 90 }, { "epoch": 0.004176035978156119, "grad_norm": 0.5702598690986633, "learning_rate": 6.957186544342508e-07, "loss": 0.438, "step": 91 }, { "epoch": 0.004221926483410582, "grad_norm": 1.3481727838516235, "learning_rate": 7.033639143730888e-07, "loss": 0.5674, "step": 92 }, { "epoch": 0.0042678169886650455, "grad_norm": 0.4537692666053772, "learning_rate": 7.110091743119267e-07, "loss": 0.3574, "step": 93 }, { "epoch": 0.004313707493919508, "grad_norm": 1.1757780313491821, "learning_rate": 7.186544342507645e-07, "loss": 0.5351, "step": 94 }, { "epoch": 0.004359597999173971, "grad_norm": 1.3105450868606567, "learning_rate": 7.262996941896025e-07, "loss": 0.5779, "step": 95 }, { "epoch": 0.004405488504428434, "grad_norm": 0.6526632308959961, "learning_rate": 7.339449541284405e-07, "loss": 0.4459, "step": 96 }, { "epoch": 0.004451379009682897, "grad_norm": 0.807191789150238, "learning_rate": 7.415902140672783e-07, "loss": 0.5109, "step": 97 }, { "epoch": 0.004497269514937359, "grad_norm": 0.8935018181800842, "learning_rate": 7.492354740061163e-07, "loss": 0.4762, "step": 98 }, { "epoch": 0.004543160020191822, "grad_norm": 0.8139580488204956, "learning_rate": 7.568807339449542e-07, "loss": 0.4826, "step": 99 }, { "epoch": 0.004589050525446285, "grad_norm": 0.6782995462417603, "learning_rate": 7.645259938837921e-07, "loss": 0.4629, "step": 100 }, { "epoch": 0.004634941030700748, "grad_norm": 0.8879847526550293, "learning_rate": 7.7217125382263e-07, "loss": 0.5781, "step": 101 }, { "epoch": 0.004680831535955211, "grad_norm": 1.026992678642273, "learning_rate": 7.79816513761468e-07, "loss": 0.6328, "step": 102 }, { "epoch": 0.004726722041209674, "grad_norm": 0.5849438905715942, "learning_rate": 7.874617737003058e-07, "loss": 0.3326, "step": 103 }, { "epoch": 0.004772612546464137, "grad_norm": 0.5503666400909424, "learning_rate": 7.951070336391438e-07, "loss": 0.4173, "step": 104 }, { "epoch": 0.004818503051718599, "grad_norm": 0.5570368766784668, "learning_rate": 8.027522935779817e-07, "loss": 0.4654, "step": 105 }, { "epoch": 0.004864393556973062, "grad_norm": 0.661638081073761, "learning_rate": 8.103975535168196e-07, "loss": 0.5453, "step": 106 }, { "epoch": 0.004910284062227525, "grad_norm": 0.6135708093643188, "learning_rate": 8.180428134556576e-07, "loss": 0.5167, "step": 107 }, { "epoch": 0.004956174567481988, "grad_norm": 0.4996219873428345, "learning_rate": 8.256880733944956e-07, "loss": 0.3413, "step": 108 }, { "epoch": 0.0050020650727364505, "grad_norm": 0.6201441287994385, "learning_rate": 8.333333333333333e-07, "loss": 0.506, "step": 109 }, { "epoch": 0.005047955577990914, "grad_norm": 0.6837106943130493, "learning_rate": 8.409785932721713e-07, "loss": 0.5386, "step": 110 }, { "epoch": 0.005093846083245377, "grad_norm": 0.6054534316062927, "learning_rate": 8.486238532110093e-07, "loss": 0.4014, "step": 111 }, { "epoch": 0.00513973658849984, "grad_norm": 0.874347448348999, "learning_rate": 8.562691131498472e-07, "loss": 0.6087, "step": 112 }, { "epoch": 0.005185627093754302, "grad_norm": 0.5703540444374084, "learning_rate": 8.639143730886851e-07, "loss": 0.4674, "step": 113 }, { "epoch": 0.005231517599008765, "grad_norm": 0.5193082094192505, "learning_rate": 8.71559633027523e-07, "loss": 0.3745, "step": 114 }, { "epoch": 0.005277408104263228, "grad_norm": 0.5911059379577637, "learning_rate": 8.792048929663609e-07, "loss": 0.4971, "step": 115 }, { "epoch": 0.00532329860951769, "grad_norm": 0.6464957594871521, "learning_rate": 8.868501529051989e-07, "loss": 0.5544, "step": 116 }, { "epoch": 0.0053691891147721535, "grad_norm": 0.5820969343185425, "learning_rate": 8.944954128440368e-07, "loss": 0.4245, "step": 117 }, { "epoch": 0.005415079620026617, "grad_norm": 0.6394116878509521, "learning_rate": 9.021406727828746e-07, "loss": 0.3375, "step": 118 }, { "epoch": 0.00546097012528108, "grad_norm": 0.568695604801178, "learning_rate": 9.097859327217126e-07, "loss": 0.4163, "step": 119 }, { "epoch": 0.005506860630535542, "grad_norm": 0.7871958613395691, "learning_rate": 9.174311926605506e-07, "loss": 0.52, "step": 120 }, { "epoch": 0.005552751135790005, "grad_norm": 0.5058826804161072, "learning_rate": 9.250764525993884e-07, "loss": 0.4539, "step": 121 }, { "epoch": 0.005598641641044468, "grad_norm": 0.6198352575302124, "learning_rate": 9.327217125382264e-07, "loss": 0.5701, "step": 122 }, { "epoch": 0.005644532146298931, "grad_norm": 0.6333051919937134, "learning_rate": 9.403669724770644e-07, "loss": 0.5923, "step": 123 }, { "epoch": 0.005690422651553393, "grad_norm": 0.49544772505760193, "learning_rate": 9.480122324159022e-07, "loss": 0.3582, "step": 124 }, { "epoch": 0.0057363131568078565, "grad_norm": 0.6826655268669128, "learning_rate": 9.556574923547401e-07, "loss": 0.4117, "step": 125 }, { "epoch": 0.0057822036620623195, "grad_norm": 0.49462851881980896, "learning_rate": 9.633027522935782e-07, "loss": 0.4171, "step": 126 }, { "epoch": 0.005828094167316782, "grad_norm": 0.5096755027770996, "learning_rate": 9.709480122324159e-07, "loss": 0.4312, "step": 127 }, { "epoch": 0.005873984672571245, "grad_norm": 0.5185785293579102, "learning_rate": 9.78593272171254e-07, "loss": 0.4836, "step": 128 }, { "epoch": 0.005919875177825708, "grad_norm": 0.4647371768951416, "learning_rate": 9.862385321100918e-07, "loss": 0.3823, "step": 129 }, { "epoch": 0.005965765683080171, "grad_norm": 0.4872678518295288, "learning_rate": 9.938837920489297e-07, "loss": 0.4208, "step": 130 }, { "epoch": 0.006011656188334633, "grad_norm": 0.5332506895065308, "learning_rate": 1.0015290519877676e-06, "loss": 0.5208, "step": 131 }, { "epoch": 0.006057546693589096, "grad_norm": 0.4972846806049347, "learning_rate": 1.0091743119266057e-06, "loss": 0.4175, "step": 132 }, { "epoch": 0.006103437198843559, "grad_norm": 0.47214367985725403, "learning_rate": 1.0168195718654435e-06, "loss": 0.4308, "step": 133 }, { "epoch": 0.0061493277040980225, "grad_norm": 0.6262720823287964, "learning_rate": 1.0244648318042814e-06, "loss": 0.5003, "step": 134 }, { "epoch": 0.006195218209352485, "grad_norm": 0.4969519376754761, "learning_rate": 1.0321100917431195e-06, "loss": 0.4497, "step": 135 }, { "epoch": 0.006241108714606948, "grad_norm": 0.5266798734664917, "learning_rate": 1.0397553516819571e-06, "loss": 0.4646, "step": 136 }, { "epoch": 0.006286999219861411, "grad_norm": 0.5022170543670654, "learning_rate": 1.0474006116207952e-06, "loss": 0.5034, "step": 137 }, { "epoch": 0.006332889725115873, "grad_norm": 0.6192761063575745, "learning_rate": 1.055045871559633e-06, "loss": 0.4547, "step": 138 }, { "epoch": 0.006378780230370336, "grad_norm": 0.456028550863266, "learning_rate": 1.062691131498471e-06, "loss": 0.3919, "step": 139 }, { "epoch": 0.006424670735624799, "grad_norm": 0.5489574670791626, "learning_rate": 1.070336391437309e-06, "loss": 0.4536, "step": 140 }, { "epoch": 0.006470561240879262, "grad_norm": 0.6313809156417847, "learning_rate": 1.077981651376147e-06, "loss": 0.5674, "step": 141 }, { "epoch": 0.006516451746133725, "grad_norm": 0.5987564921379089, "learning_rate": 1.0856269113149848e-06, "loss": 0.6084, "step": 142 }, { "epoch": 0.006562342251388188, "grad_norm": 0.4713725745677948, "learning_rate": 1.0932721712538227e-06, "loss": 0.3861, "step": 143 }, { "epoch": 0.006608232756642651, "grad_norm": 0.5778293609619141, "learning_rate": 1.1009174311926608e-06, "loss": 0.4871, "step": 144 }, { "epoch": 0.006654123261897114, "grad_norm": 0.4850349426269531, "learning_rate": 1.1085626911314986e-06, "loss": 0.3813, "step": 145 }, { "epoch": 0.006700013767151576, "grad_norm": 0.4866754710674286, "learning_rate": 1.1162079510703365e-06, "loss": 0.4618, "step": 146 }, { "epoch": 0.006745904272406039, "grad_norm": 0.6613770723342896, "learning_rate": 1.1238532110091744e-06, "loss": 0.5501, "step": 147 }, { "epoch": 0.006791794777660502, "grad_norm": 0.4495118260383606, "learning_rate": 1.1314984709480122e-06, "loss": 0.3879, "step": 148 }, { "epoch": 0.0068376852829149645, "grad_norm": 0.6176930069923401, "learning_rate": 1.1391437308868503e-06, "loss": 0.4349, "step": 149 }, { "epoch": 0.006883575788169428, "grad_norm": 0.5159872174263, "learning_rate": 1.1467889908256882e-06, "loss": 0.4828, "step": 150 }, { "epoch": 0.006929466293423891, "grad_norm": 0.4736764430999756, "learning_rate": 1.154434250764526e-06, "loss": 0.3564, "step": 151 }, { "epoch": 0.006975356798678354, "grad_norm": 0.6415404081344604, "learning_rate": 1.162079510703364e-06, "loss": 0.6007, "step": 152 }, { "epoch": 0.007021247303932816, "grad_norm": 0.5175949335098267, "learning_rate": 1.169724770642202e-06, "loss": 0.5334, "step": 153 }, { "epoch": 0.007067137809187279, "grad_norm": 0.46300917863845825, "learning_rate": 1.17737003058104e-06, "loss": 0.413, "step": 154 }, { "epoch": 0.007113028314441742, "grad_norm": 0.5009661912918091, "learning_rate": 1.1850152905198778e-06, "loss": 0.5139, "step": 155 }, { "epoch": 0.007158918819696205, "grad_norm": 0.5458509922027588, "learning_rate": 1.1926605504587159e-06, "loss": 0.4877, "step": 156 }, { "epoch": 0.0072048093249506675, "grad_norm": 0.5963668823242188, "learning_rate": 1.2003058103975535e-06, "loss": 0.5502, "step": 157 }, { "epoch": 0.0072506998302051305, "grad_norm": 0.4685239791870117, "learning_rate": 1.2079510703363916e-06, "loss": 0.396, "step": 158 }, { "epoch": 0.007296590335459594, "grad_norm": 0.47896337509155273, "learning_rate": 1.2155963302752295e-06, "loss": 0.481, "step": 159 }, { "epoch": 0.007342480840714056, "grad_norm": 0.5194567441940308, "learning_rate": 1.2232415902140673e-06, "loss": 0.5662, "step": 160 }, { "epoch": 0.007388371345968519, "grad_norm": 0.48354044556617737, "learning_rate": 1.2308868501529054e-06, "loss": 0.402, "step": 161 }, { "epoch": 0.007434261851222982, "grad_norm": 0.4972841441631317, "learning_rate": 1.2385321100917433e-06, "loss": 0.4685, "step": 162 }, { "epoch": 0.007480152356477445, "grad_norm": 0.4714357256889343, "learning_rate": 1.2461773700305812e-06, "loss": 0.4032, "step": 163 }, { "epoch": 0.007526042861731907, "grad_norm": 0.5029146075248718, "learning_rate": 1.253822629969419e-06, "loss": 0.4139, "step": 164 }, { "epoch": 0.00757193336698637, "grad_norm": 0.4872240126132965, "learning_rate": 1.261467889908257e-06, "loss": 0.4753, "step": 165 }, { "epoch": 0.0076178238722408335, "grad_norm": 0.464991956949234, "learning_rate": 1.2691131498470948e-06, "loss": 0.3451, "step": 166 }, { "epoch": 0.007663714377495297, "grad_norm": 0.535402774810791, "learning_rate": 1.2767584097859329e-06, "loss": 0.5317, "step": 167 }, { "epoch": 0.007709604882749759, "grad_norm": 0.5238930583000183, "learning_rate": 1.2844036697247707e-06, "loss": 0.5113, "step": 168 }, { "epoch": 0.007755495388004222, "grad_norm": 0.4925292730331421, "learning_rate": 1.2920489296636088e-06, "loss": 0.4153, "step": 169 }, { "epoch": 0.007801385893258685, "grad_norm": 0.5098105669021606, "learning_rate": 1.2996941896024465e-06, "loss": 0.4934, "step": 170 }, { "epoch": 0.007847276398513148, "grad_norm": 0.5790193676948547, "learning_rate": 1.3073394495412844e-06, "loss": 0.3271, "step": 171 }, { "epoch": 0.007893166903767611, "grad_norm": 0.5277330875396729, "learning_rate": 1.3149847094801224e-06, "loss": 0.4385, "step": 172 }, { "epoch": 0.007939057409022073, "grad_norm": 0.45835790038108826, "learning_rate": 1.3226299694189603e-06, "loss": 0.4397, "step": 173 }, { "epoch": 0.007984947914276536, "grad_norm": 0.46754586696624756, "learning_rate": 1.3302752293577984e-06, "loss": 0.417, "step": 174 }, { "epoch": 0.008030838419530999, "grad_norm": 0.6357254981994629, "learning_rate": 1.3379204892966363e-06, "loss": 0.7048, "step": 175 }, { "epoch": 0.008076728924785462, "grad_norm": 0.4879402220249176, "learning_rate": 1.345565749235474e-06, "loss": 0.3694, "step": 176 }, { "epoch": 0.008122619430039925, "grad_norm": 0.5214935541152954, "learning_rate": 1.353211009174312e-06, "loss": 0.4588, "step": 177 }, { "epoch": 0.008168509935294388, "grad_norm": 0.5096695423126221, "learning_rate": 1.3608562691131499e-06, "loss": 0.4498, "step": 178 }, { "epoch": 0.008214400440548851, "grad_norm": 0.44374576210975647, "learning_rate": 1.368501529051988e-06, "loss": 0.3125, "step": 179 }, { "epoch": 0.008260290945803314, "grad_norm": 0.49549832940101624, "learning_rate": 1.3761467889908258e-06, "loss": 0.4494, "step": 180 }, { "epoch": 0.008306181451057775, "grad_norm": 0.4328993558883667, "learning_rate": 1.383792048929664e-06, "loss": 0.3694, "step": 181 }, { "epoch": 0.008352071956312239, "grad_norm": 0.4350336790084839, "learning_rate": 1.3914373088685016e-06, "loss": 0.3625, "step": 182 }, { "epoch": 0.008397962461566702, "grad_norm": 0.46577188372612, "learning_rate": 1.3990825688073395e-06, "loss": 0.4098, "step": 183 }, { "epoch": 0.008443852966821165, "grad_norm": 0.553417444229126, "learning_rate": 1.4067278287461775e-06, "loss": 0.5746, "step": 184 }, { "epoch": 0.008489743472075628, "grad_norm": 0.4803412854671478, "learning_rate": 1.4143730886850154e-06, "loss": 0.4149, "step": 185 }, { "epoch": 0.008535633977330091, "grad_norm": 0.42834556102752686, "learning_rate": 1.4220183486238535e-06, "loss": 0.3503, "step": 186 }, { "epoch": 0.008581524482584554, "grad_norm": 0.500589907169342, "learning_rate": 1.4296636085626914e-06, "loss": 0.4777, "step": 187 }, { "epoch": 0.008627414987839015, "grad_norm": 0.4265582859516144, "learning_rate": 1.437308868501529e-06, "loss": 0.3326, "step": 188 }, { "epoch": 0.008673305493093478, "grad_norm": 0.6069602370262146, "learning_rate": 1.4449541284403671e-06, "loss": 0.6107, "step": 189 }, { "epoch": 0.008719195998347942, "grad_norm": 0.4830178916454315, "learning_rate": 1.452599388379205e-06, "loss": 0.4705, "step": 190 }, { "epoch": 0.008765086503602405, "grad_norm": 0.5238247513771057, "learning_rate": 1.460244648318043e-06, "loss": 0.5562, "step": 191 }, { "epoch": 0.008810977008856868, "grad_norm": 0.4872019588947296, "learning_rate": 1.467889908256881e-06, "loss": 0.4079, "step": 192 }, { "epoch": 0.00885686751411133, "grad_norm": 0.5381096005439758, "learning_rate": 1.4755351681957188e-06, "loss": 0.4264, "step": 193 }, { "epoch": 0.008902758019365794, "grad_norm": 0.5009563565254211, "learning_rate": 1.4831804281345567e-06, "loss": 0.4761, "step": 194 }, { "epoch": 0.008948648524620255, "grad_norm": 0.5903949737548828, "learning_rate": 1.4908256880733945e-06, "loss": 0.6371, "step": 195 }, { "epoch": 0.008994539029874718, "grad_norm": 0.5457831621170044, "learning_rate": 1.4984709480122326e-06, "loss": 0.4981, "step": 196 }, { "epoch": 0.009040429535129181, "grad_norm": 0.5037556290626526, "learning_rate": 1.5061162079510705e-06, "loss": 0.429, "step": 197 }, { "epoch": 0.009086320040383645, "grad_norm": 0.4773487150669098, "learning_rate": 1.5137614678899084e-06, "loss": 0.4345, "step": 198 }, { "epoch": 0.009132210545638108, "grad_norm": 0.5097615718841553, "learning_rate": 1.5214067278287465e-06, "loss": 0.5077, "step": 199 }, { "epoch": 0.00917810105089257, "grad_norm": 0.4838576912879944, "learning_rate": 1.5290519877675841e-06, "loss": 0.4384, "step": 200 }, { "epoch": 0.009223991556147034, "grad_norm": 0.4566458761692047, "learning_rate": 1.536697247706422e-06, "loss": 0.3858, "step": 201 }, { "epoch": 0.009269882061401497, "grad_norm": 0.6419386863708496, "learning_rate": 1.54434250764526e-06, "loss": 0.5308, "step": 202 }, { "epoch": 0.009315772566655958, "grad_norm": 0.5053452849388123, "learning_rate": 1.551987767584098e-06, "loss": 0.5171, "step": 203 }, { "epoch": 0.009361663071910421, "grad_norm": 0.4292317032814026, "learning_rate": 1.559633027522936e-06, "loss": 0.3162, "step": 204 }, { "epoch": 0.009407553577164884, "grad_norm": 0.5141228437423706, "learning_rate": 1.567278287461774e-06, "loss": 0.4805, "step": 205 }, { "epoch": 0.009453444082419347, "grad_norm": 0.5513278841972351, "learning_rate": 1.5749235474006116e-06, "loss": 0.5149, "step": 206 }, { "epoch": 0.00949933458767381, "grad_norm": 0.46986624598503113, "learning_rate": 1.5825688073394496e-06, "loss": 0.4009, "step": 207 }, { "epoch": 0.009545225092928274, "grad_norm": 0.46508821845054626, "learning_rate": 1.5902140672782875e-06, "loss": 0.4486, "step": 208 }, { "epoch": 0.009591115598182737, "grad_norm": 0.5256361961364746, "learning_rate": 1.5978593272171256e-06, "loss": 0.4443, "step": 209 }, { "epoch": 0.009637006103437198, "grad_norm": 0.4129773676395416, "learning_rate": 1.6055045871559635e-06, "loss": 0.2691, "step": 210 }, { "epoch": 0.009682896608691661, "grad_norm": 0.5132619738578796, "learning_rate": 1.6131498470948016e-06, "loss": 0.4946, "step": 211 }, { "epoch": 0.009728787113946124, "grad_norm": 0.6025416254997253, "learning_rate": 1.6207951070336392e-06, "loss": 0.4538, "step": 212 }, { "epoch": 0.009774677619200587, "grad_norm": 0.5104288458824158, "learning_rate": 1.628440366972477e-06, "loss": 0.512, "step": 213 }, { "epoch": 0.00982056812445505, "grad_norm": 0.4239733815193176, "learning_rate": 1.6360856269113152e-06, "loss": 0.3331, "step": 214 }, { "epoch": 0.009866458629709514, "grad_norm": 0.5097137093544006, "learning_rate": 1.643730886850153e-06, "loss": 0.518, "step": 215 }, { "epoch": 0.009912349134963977, "grad_norm": 0.5095618963241577, "learning_rate": 1.6513761467889911e-06, "loss": 0.5671, "step": 216 }, { "epoch": 0.009958239640218438, "grad_norm": 0.5642290115356445, "learning_rate": 1.659021406727829e-06, "loss": 0.5115, "step": 217 }, { "epoch": 0.010004130145472901, "grad_norm": 0.4698328375816345, "learning_rate": 1.6666666666666667e-06, "loss": 0.4595, "step": 218 }, { "epoch": 0.010050020650727364, "grad_norm": 0.42536523938179016, "learning_rate": 1.6743119266055047e-06, "loss": 0.3607, "step": 219 }, { "epoch": 0.010095911155981827, "grad_norm": 0.4711594879627228, "learning_rate": 1.6819571865443426e-06, "loss": 0.4012, "step": 220 }, { "epoch": 0.01014180166123629, "grad_norm": 0.4453487694263458, "learning_rate": 1.6896024464831807e-06, "loss": 0.3845, "step": 221 }, { "epoch": 0.010187692166490753, "grad_norm": 0.4480073153972626, "learning_rate": 1.6972477064220186e-06, "loss": 0.4128, "step": 222 }, { "epoch": 0.010233582671745216, "grad_norm": 0.4990222454071045, "learning_rate": 1.7048929663608562e-06, "loss": 0.4794, "step": 223 }, { "epoch": 0.01027947317699968, "grad_norm": 0.5442683696746826, "learning_rate": 1.7125382262996943e-06, "loss": 0.537, "step": 224 }, { "epoch": 0.010325363682254141, "grad_norm": 0.4410986304283142, "learning_rate": 1.7201834862385322e-06, "loss": 0.4166, "step": 225 }, { "epoch": 0.010371254187508604, "grad_norm": 0.43682900071144104, "learning_rate": 1.7278287461773703e-06, "loss": 0.3537, "step": 226 }, { "epoch": 0.010417144692763067, "grad_norm": 0.4223477840423584, "learning_rate": 1.7354740061162081e-06, "loss": 0.3292, "step": 227 }, { "epoch": 0.01046303519801753, "grad_norm": 0.4678100347518921, "learning_rate": 1.743119266055046e-06, "loss": 0.467, "step": 228 }, { "epoch": 0.010508925703271993, "grad_norm": 0.49103376269340515, "learning_rate": 1.7507645259938839e-06, "loss": 0.4286, "step": 229 }, { "epoch": 0.010554816208526456, "grad_norm": 0.5196321606636047, "learning_rate": 1.7584097859327218e-06, "loss": 0.4914, "step": 230 }, { "epoch": 0.01060070671378092, "grad_norm": 0.46310898661613464, "learning_rate": 1.7660550458715596e-06, "loss": 0.3717, "step": 231 }, { "epoch": 0.01064659721903538, "grad_norm": 0.4803982079029083, "learning_rate": 1.7737003058103977e-06, "loss": 0.5182, "step": 232 }, { "epoch": 0.010692487724289844, "grad_norm": 0.49229687452316284, "learning_rate": 1.7813455657492356e-06, "loss": 0.4578, "step": 233 }, { "epoch": 0.010738378229544307, "grad_norm": 0.5128414034843445, "learning_rate": 1.7889908256880737e-06, "loss": 0.473, "step": 234 }, { "epoch": 0.01078426873479877, "grad_norm": 0.5010553598403931, "learning_rate": 1.7966360856269113e-06, "loss": 0.475, "step": 235 }, { "epoch": 0.010830159240053233, "grad_norm": 0.4786660075187683, "learning_rate": 1.8042813455657492e-06, "loss": 0.4771, "step": 236 }, { "epoch": 0.010876049745307696, "grad_norm": 0.4521401822566986, "learning_rate": 1.8119266055045873e-06, "loss": 0.3874, "step": 237 }, { "epoch": 0.01092194025056216, "grad_norm": 0.4972446858882904, "learning_rate": 1.8195718654434252e-06, "loss": 0.5038, "step": 238 }, { "epoch": 0.01096783075581662, "grad_norm": 0.4640282094478607, "learning_rate": 1.8272171253822632e-06, "loss": 0.4197, "step": 239 }, { "epoch": 0.011013721261071084, "grad_norm": 0.5040390491485596, "learning_rate": 1.8348623853211011e-06, "loss": 0.4846, "step": 240 }, { "epoch": 0.011059611766325547, "grad_norm": 0.43553870916366577, "learning_rate": 1.8425076452599388e-06, "loss": 0.3355, "step": 241 }, { "epoch": 0.01110550227158001, "grad_norm": 0.47265294194221497, "learning_rate": 1.8501529051987769e-06, "loss": 0.4562, "step": 242 }, { "epoch": 0.011151392776834473, "grad_norm": 0.4227467179298401, "learning_rate": 1.8577981651376147e-06, "loss": 0.3322, "step": 243 }, { "epoch": 0.011197283282088936, "grad_norm": 0.5771182179450989, "learning_rate": 1.8654434250764528e-06, "loss": 0.5562, "step": 244 }, { "epoch": 0.0112431737873434, "grad_norm": 0.42815539240837097, "learning_rate": 1.8730886850152907e-06, "loss": 0.3546, "step": 245 }, { "epoch": 0.011289064292597862, "grad_norm": 0.5375799536705017, "learning_rate": 1.8807339449541288e-06, "loss": 0.4165, "step": 246 }, { "epoch": 0.011334954797852324, "grad_norm": 0.4885975122451782, "learning_rate": 1.8883792048929664e-06, "loss": 0.4749, "step": 247 }, { "epoch": 0.011380845303106787, "grad_norm": 0.43277713656425476, "learning_rate": 1.8960244648318043e-06, "loss": 0.3485, "step": 248 }, { "epoch": 0.01142673580836125, "grad_norm": 0.42765310406684875, "learning_rate": 1.9036697247706424e-06, "loss": 0.3523, "step": 249 }, { "epoch": 0.011472626313615713, "grad_norm": 0.4612452983856201, "learning_rate": 1.9113149847094803e-06, "loss": 0.4482, "step": 250 }, { "epoch": 0.011518516818870176, "grad_norm": 0.43229803442955017, "learning_rate": 1.918960244648318e-06, "loss": 0.3545, "step": 251 }, { "epoch": 0.011564407324124639, "grad_norm": 0.46931692957878113, "learning_rate": 1.9266055045871564e-06, "loss": 0.4344, "step": 252 }, { "epoch": 0.011610297829379102, "grad_norm": 0.428820937871933, "learning_rate": 1.934250764525994e-06, "loss": 0.3404, "step": 253 }, { "epoch": 0.011656188334633564, "grad_norm": 0.5306540727615356, "learning_rate": 1.9418960244648317e-06, "loss": 0.5357, "step": 254 }, { "epoch": 0.011702078839888027, "grad_norm": 0.4210246801376343, "learning_rate": 1.94954128440367e-06, "loss": 0.304, "step": 255 }, { "epoch": 0.01174796934514249, "grad_norm": 0.5046725869178772, "learning_rate": 1.957186544342508e-06, "loss": 0.4961, "step": 256 }, { "epoch": 0.011793859850396953, "grad_norm": 0.45517218112945557, "learning_rate": 1.9648318042813458e-06, "loss": 0.4069, "step": 257 }, { "epoch": 0.011839750355651416, "grad_norm": 0.4402482807636261, "learning_rate": 1.9724770642201837e-06, "loss": 0.419, "step": 258 }, { "epoch": 0.011885640860905879, "grad_norm": 0.4569067656993866, "learning_rate": 1.9801223241590215e-06, "loss": 0.4055, "step": 259 }, { "epoch": 0.011931531366160342, "grad_norm": 0.4946267604827881, "learning_rate": 1.9877675840978594e-06, "loss": 0.407, "step": 260 }, { "epoch": 0.011977421871414803, "grad_norm": 0.44158419966697693, "learning_rate": 1.9954128440366973e-06, "loss": 0.4159, "step": 261 }, { "epoch": 0.012023312376669266, "grad_norm": 0.4439694881439209, "learning_rate": 2.003058103975535e-06, "loss": 0.3569, "step": 262 }, { "epoch": 0.01206920288192373, "grad_norm": 0.48543256521224976, "learning_rate": 2.0107033639143734e-06, "loss": 0.4774, "step": 263 }, { "epoch": 0.012115093387178193, "grad_norm": 0.501968264579773, "learning_rate": 2.0183486238532113e-06, "loss": 0.5504, "step": 264 }, { "epoch": 0.012160983892432656, "grad_norm": 0.45566222071647644, "learning_rate": 2.025993883792049e-06, "loss": 0.3634, "step": 265 }, { "epoch": 0.012206874397687119, "grad_norm": 0.565933346748352, "learning_rate": 2.033639143730887e-06, "loss": 0.5008, "step": 266 }, { "epoch": 0.012252764902941582, "grad_norm": 0.5437031984329224, "learning_rate": 2.041284403669725e-06, "loss": 0.537, "step": 267 }, { "epoch": 0.012298655408196045, "grad_norm": 0.443162739276886, "learning_rate": 2.048929663608563e-06, "loss": 0.3894, "step": 268 }, { "epoch": 0.012344545913450506, "grad_norm": 0.533871054649353, "learning_rate": 2.0565749235474007e-06, "loss": 0.5715, "step": 269 }, { "epoch": 0.01239043641870497, "grad_norm": 0.4236867427825928, "learning_rate": 2.064220183486239e-06, "loss": 0.3289, "step": 270 }, { "epoch": 0.012436326923959433, "grad_norm": 0.5032171607017517, "learning_rate": 2.0718654434250764e-06, "loss": 0.5618, "step": 271 }, { "epoch": 0.012482217429213896, "grad_norm": 0.5658868551254272, "learning_rate": 2.0795107033639143e-06, "loss": 0.5318, "step": 272 }, { "epoch": 0.012528107934468359, "grad_norm": 0.6331881880760193, "learning_rate": 2.0871559633027526e-06, "loss": 0.5536, "step": 273 }, { "epoch": 0.012573998439722822, "grad_norm": 0.4568878710269928, "learning_rate": 2.0948012232415905e-06, "loss": 0.407, "step": 274 }, { "epoch": 0.012619888944977285, "grad_norm": 0.4961564540863037, "learning_rate": 2.1024464831804283e-06, "loss": 0.4665, "step": 275 }, { "epoch": 0.012665779450231746, "grad_norm": 0.48572826385498047, "learning_rate": 2.110091743119266e-06, "loss": 0.4487, "step": 276 }, { "epoch": 0.01271166995548621, "grad_norm": 0.47618433833122253, "learning_rate": 2.117737003058104e-06, "loss": 0.4825, "step": 277 }, { "epoch": 0.012757560460740672, "grad_norm": 0.4763575494289398, "learning_rate": 2.125382262996942e-06, "loss": 0.4608, "step": 278 }, { "epoch": 0.012803450965995136, "grad_norm": 0.43774715065956116, "learning_rate": 2.13302752293578e-06, "loss": 0.4085, "step": 279 }, { "epoch": 0.012849341471249599, "grad_norm": 0.47849181294441223, "learning_rate": 2.140672782874618e-06, "loss": 0.391, "step": 280 }, { "epoch": 0.012895231976504062, "grad_norm": 0.5424180626869202, "learning_rate": 2.148318042813456e-06, "loss": 0.5967, "step": 281 }, { "epoch": 0.012941122481758525, "grad_norm": 0.4189923405647278, "learning_rate": 2.155963302752294e-06, "loss": 0.3116, "step": 282 }, { "epoch": 0.012987012987012988, "grad_norm": 0.48549145460128784, "learning_rate": 2.1636085626911317e-06, "loss": 0.513, "step": 283 }, { "epoch": 0.01303290349226745, "grad_norm": 0.5055712461471558, "learning_rate": 2.1712538226299696e-06, "loss": 0.4366, "step": 284 }, { "epoch": 0.013078793997521912, "grad_norm": 0.46339482069015503, "learning_rate": 2.1788990825688075e-06, "loss": 0.4272, "step": 285 }, { "epoch": 0.013124684502776375, "grad_norm": 0.42901337146759033, "learning_rate": 2.1865443425076453e-06, "loss": 0.3895, "step": 286 }, { "epoch": 0.013170575008030838, "grad_norm": 0.46788305044174194, "learning_rate": 2.1941896024464836e-06, "loss": 0.4193, "step": 287 }, { "epoch": 0.013216465513285302, "grad_norm": 0.43930676579475403, "learning_rate": 2.2018348623853215e-06, "loss": 0.3832, "step": 288 }, { "epoch": 0.013262356018539765, "grad_norm": 0.5346788763999939, "learning_rate": 2.209480122324159e-06, "loss": 0.4841, "step": 289 }, { "epoch": 0.013308246523794228, "grad_norm": 0.41882237792015076, "learning_rate": 2.2171253822629973e-06, "loss": 0.3196, "step": 290 }, { "epoch": 0.013354137029048689, "grad_norm": 0.45628708600997925, "learning_rate": 2.224770642201835e-06, "loss": 0.4657, "step": 291 }, { "epoch": 0.013400027534303152, "grad_norm": 0.49518153071403503, "learning_rate": 2.232415902140673e-06, "loss": 0.5638, "step": 292 }, { "epoch": 0.013445918039557615, "grad_norm": 0.4087590277194977, "learning_rate": 2.240061162079511e-06, "loss": 0.3098, "step": 293 }, { "epoch": 0.013491808544812078, "grad_norm": 0.5218579769134521, "learning_rate": 2.2477064220183487e-06, "loss": 0.4815, "step": 294 }, { "epoch": 0.013537699050066541, "grad_norm": 0.44696488976478577, "learning_rate": 2.2553516819571866e-06, "loss": 0.4104, "step": 295 }, { "epoch": 0.013583589555321005, "grad_norm": 0.4772244691848755, "learning_rate": 2.2629969418960245e-06, "loss": 0.4269, "step": 296 }, { "epoch": 0.013629480060575468, "grad_norm": 0.45414552092552185, "learning_rate": 2.2706422018348624e-06, "loss": 0.4195, "step": 297 }, { "epoch": 0.013675370565829929, "grad_norm": 0.4902088940143585, "learning_rate": 2.2782874617737006e-06, "loss": 0.3963, "step": 298 }, { "epoch": 0.013721261071084392, "grad_norm": 0.4744234085083008, "learning_rate": 2.2859327217125385e-06, "loss": 0.4487, "step": 299 }, { "epoch": 0.013767151576338855, "grad_norm": 0.46651050448417664, "learning_rate": 2.2935779816513764e-06, "loss": 0.4291, "step": 300 }, { "epoch": 0.013813042081593318, "grad_norm": 0.4345714747905731, "learning_rate": 2.3012232415902143e-06, "loss": 0.3359, "step": 301 }, { "epoch": 0.013858932586847781, "grad_norm": 0.5217494368553162, "learning_rate": 2.308868501529052e-06, "loss": 0.5498, "step": 302 }, { "epoch": 0.013904823092102244, "grad_norm": 0.4570966362953186, "learning_rate": 2.31651376146789e-06, "loss": 0.4328, "step": 303 }, { "epoch": 0.013950713597356708, "grad_norm": 0.48730015754699707, "learning_rate": 2.324159021406728e-06, "loss": 0.4382, "step": 304 }, { "epoch": 0.01399660410261117, "grad_norm": 0.41138121485710144, "learning_rate": 2.331804281345566e-06, "loss": 0.3028, "step": 305 }, { "epoch": 0.014042494607865632, "grad_norm": 0.46273642778396606, "learning_rate": 2.339449541284404e-06, "loss": 0.4055, "step": 306 }, { "epoch": 0.014088385113120095, "grad_norm": 0.6203993558883667, "learning_rate": 2.3470948012232415e-06, "loss": 0.4562, "step": 307 }, { "epoch": 0.014134275618374558, "grad_norm": 0.45870843529701233, "learning_rate": 2.35474006116208e-06, "loss": 0.4438, "step": 308 }, { "epoch": 0.014180166123629021, "grad_norm": 0.4719725251197815, "learning_rate": 2.3623853211009177e-06, "loss": 0.3947, "step": 309 }, { "epoch": 0.014226056628883484, "grad_norm": 0.48564717173576355, "learning_rate": 2.3700305810397555e-06, "loss": 0.4698, "step": 310 }, { "epoch": 0.014271947134137947, "grad_norm": 0.49916261434555054, "learning_rate": 2.3776758409785934e-06, "loss": 0.5144, "step": 311 }, { "epoch": 0.01431783763939241, "grad_norm": 0.48892831802368164, "learning_rate": 2.3853211009174317e-06, "loss": 0.4824, "step": 312 }, { "epoch": 0.014363728144646872, "grad_norm": 0.5027477145195007, "learning_rate": 2.392966360856269e-06, "loss": 0.4271, "step": 313 }, { "epoch": 0.014409618649901335, "grad_norm": 0.43566134572029114, "learning_rate": 2.400611620795107e-06, "loss": 0.3792, "step": 314 }, { "epoch": 0.014455509155155798, "grad_norm": 0.46502846479415894, "learning_rate": 2.4082568807339453e-06, "loss": 0.3786, "step": 315 }, { "epoch": 0.014501399660410261, "grad_norm": 0.46683400869369507, "learning_rate": 2.415902140672783e-06, "loss": 0.4447, "step": 316 }, { "epoch": 0.014547290165664724, "grad_norm": 0.5267835855484009, "learning_rate": 2.423547400611621e-06, "loss": 0.5284, "step": 317 }, { "epoch": 0.014593180670919187, "grad_norm": 0.48118987679481506, "learning_rate": 2.431192660550459e-06, "loss": 0.4114, "step": 318 }, { "epoch": 0.01463907117617365, "grad_norm": 0.4025678038597107, "learning_rate": 2.438837920489297e-06, "loss": 0.2934, "step": 319 }, { "epoch": 0.014684961681428112, "grad_norm": 0.3976534307003021, "learning_rate": 2.4464831804281347e-06, "loss": 0.28, "step": 320 }, { "epoch": 0.014730852186682575, "grad_norm": 0.4465334415435791, "learning_rate": 2.4541284403669725e-06, "loss": 0.3942, "step": 321 }, { "epoch": 0.014776742691937038, "grad_norm": 0.48250025510787964, "learning_rate": 2.461773700305811e-06, "loss": 0.4059, "step": 322 }, { "epoch": 0.014822633197191501, "grad_norm": 0.5287503600120544, "learning_rate": 2.4694189602446487e-06, "loss": 0.5658, "step": 323 }, { "epoch": 0.014868523702445964, "grad_norm": 0.45731842517852783, "learning_rate": 2.4770642201834866e-06, "loss": 0.4149, "step": 324 }, { "epoch": 0.014914414207700427, "grad_norm": 0.4629984200000763, "learning_rate": 2.4847094801223245e-06, "loss": 0.3735, "step": 325 }, { "epoch": 0.01496030471295489, "grad_norm": 0.5468152761459351, "learning_rate": 2.4923547400611623e-06, "loss": 0.5926, "step": 326 }, { "epoch": 0.015006195218209353, "grad_norm": 0.45142504572868347, "learning_rate": 2.5e-06, "loss": 0.3972, "step": 327 }, { "epoch": 0.015052085723463815, "grad_norm": 1.0734608173370361, "learning_rate": 2.507645259938838e-06, "loss": 0.5062, "step": 328 }, { "epoch": 0.015097976228718278, "grad_norm": 0.4371100664138794, "learning_rate": 2.515290519877676e-06, "loss": 0.3681, "step": 329 }, { "epoch": 0.01514386673397274, "grad_norm": 0.4611198604106903, "learning_rate": 2.522935779816514e-06, "loss": 0.3832, "step": 330 }, { "epoch": 0.015189757239227204, "grad_norm": 0.5871808528900146, "learning_rate": 2.530581039755352e-06, "loss": 0.3449, "step": 331 }, { "epoch": 0.015235647744481667, "grad_norm": 0.43380486965179443, "learning_rate": 2.5382262996941896e-06, "loss": 0.3395, "step": 332 }, { "epoch": 0.01528153824973613, "grad_norm": 0.4677191972732544, "learning_rate": 2.5458715596330274e-06, "loss": 0.4951, "step": 333 }, { "epoch": 0.015327428754990593, "grad_norm": 0.48182302713394165, "learning_rate": 2.5535168195718657e-06, "loss": 0.5163, "step": 334 }, { "epoch": 0.015373319260245055, "grad_norm": 0.563805878162384, "learning_rate": 2.5611620795107036e-06, "loss": 0.5234, "step": 335 }, { "epoch": 0.015419209765499518, "grad_norm": 0.512263834476471, "learning_rate": 2.5688073394495415e-06, "loss": 0.5025, "step": 336 }, { "epoch": 0.01546510027075398, "grad_norm": 0.43746107816696167, "learning_rate": 2.5764525993883793e-06, "loss": 0.3421, "step": 337 }, { "epoch": 0.015510990776008444, "grad_norm": 0.6869626641273499, "learning_rate": 2.5840978593272176e-06, "loss": 0.5717, "step": 338 }, { "epoch": 0.015556881281262907, "grad_norm": 0.44178250432014465, "learning_rate": 2.591743119266055e-06, "loss": 0.3735, "step": 339 }, { "epoch": 0.01560277178651737, "grad_norm": 0.4314460754394531, "learning_rate": 2.599388379204893e-06, "loss": 0.3144, "step": 340 }, { "epoch": 0.015648662291771833, "grad_norm": 0.48751088976860046, "learning_rate": 2.6070336391437313e-06, "loss": 0.476, "step": 341 }, { "epoch": 0.015694552797026296, "grad_norm": 0.4440633952617645, "learning_rate": 2.6146788990825687e-06, "loss": 0.3794, "step": 342 }, { "epoch": 0.01574044330228076, "grad_norm": 0.932782769203186, "learning_rate": 2.622324159021407e-06, "loss": 0.5274, "step": 343 }, { "epoch": 0.015786333807535222, "grad_norm": 0.50304114818573, "learning_rate": 2.629969418960245e-06, "loss": 0.4773, "step": 344 }, { "epoch": 0.015832224312789685, "grad_norm": 0.4992387890815735, "learning_rate": 2.6376146788990823e-06, "loss": 0.5398, "step": 345 }, { "epoch": 0.015878114818044145, "grad_norm": 0.4897677004337311, "learning_rate": 2.6452599388379206e-06, "loss": 0.4276, "step": 346 }, { "epoch": 0.015924005323298608, "grad_norm": 0.441156268119812, "learning_rate": 2.6529051987767585e-06, "loss": 0.3767, "step": 347 }, { "epoch": 0.01596989582855307, "grad_norm": 0.4375210702419281, "learning_rate": 2.6605504587155968e-06, "loss": 0.3923, "step": 348 }, { "epoch": 0.016015786333807534, "grad_norm": 0.4848385453224182, "learning_rate": 2.6681957186544342e-06, "loss": 0.3759, "step": 349 }, { "epoch": 0.016061676839061997, "grad_norm": 0.48047348856925964, "learning_rate": 2.6758409785932725e-06, "loss": 0.4541, "step": 350 }, { "epoch": 0.01610756734431646, "grad_norm": 0.48511138558387756, "learning_rate": 2.6834862385321104e-06, "loss": 0.4383, "step": 351 }, { "epoch": 0.016153457849570924, "grad_norm": 0.44000443816185, "learning_rate": 2.691131498470948e-06, "loss": 0.3572, "step": 352 }, { "epoch": 0.016199348354825387, "grad_norm": 0.5879843235015869, "learning_rate": 2.698776758409786e-06, "loss": 0.5802, "step": 353 }, { "epoch": 0.01624523886007985, "grad_norm": 0.5100506544113159, "learning_rate": 2.706422018348624e-06, "loss": 0.4733, "step": 354 }, { "epoch": 0.016291129365334313, "grad_norm": 0.4778331518173218, "learning_rate": 2.7140672782874623e-06, "loss": 0.4595, "step": 355 }, { "epoch": 0.016337019870588776, "grad_norm": 0.4732493460178375, "learning_rate": 2.7217125382262998e-06, "loss": 0.4049, "step": 356 }, { "epoch": 0.01638291037584324, "grad_norm": 0.4520963728427887, "learning_rate": 2.7293577981651376e-06, "loss": 0.4055, "step": 357 }, { "epoch": 0.016428800881097702, "grad_norm": 0.4668768346309662, "learning_rate": 2.737003058103976e-06, "loss": 0.4399, "step": 358 }, { "epoch": 0.016474691386352165, "grad_norm": 0.46692126989364624, "learning_rate": 2.7446483180428134e-06, "loss": 0.4235, "step": 359 }, { "epoch": 0.01652058189160663, "grad_norm": 0.5359144806861877, "learning_rate": 2.7522935779816517e-06, "loss": 0.5436, "step": 360 }, { "epoch": 0.016566472396861088, "grad_norm": 0.4567144811153412, "learning_rate": 2.7599388379204895e-06, "loss": 0.3992, "step": 361 }, { "epoch": 0.01661236290211555, "grad_norm": 0.4288550317287445, "learning_rate": 2.767584097859328e-06, "loss": 0.3498, "step": 362 }, { "epoch": 0.016658253407370014, "grad_norm": 0.4583730399608612, "learning_rate": 2.7752293577981653e-06, "loss": 0.3955, "step": 363 }, { "epoch": 0.016704143912624477, "grad_norm": 0.49381697177886963, "learning_rate": 2.782874617737003e-06, "loss": 0.4863, "step": 364 }, { "epoch": 0.01675003441787894, "grad_norm": 0.48270758986473083, "learning_rate": 2.7905198776758415e-06, "loss": 0.4584, "step": 365 }, { "epoch": 0.016795924923133403, "grad_norm": 0.4611847400665283, "learning_rate": 2.798165137614679e-06, "loss": 0.3522, "step": 366 }, { "epoch": 0.016841815428387866, "grad_norm": 0.4648805260658264, "learning_rate": 2.805810397553517e-06, "loss": 0.4502, "step": 367 }, { "epoch": 0.01688770593364233, "grad_norm": 0.49289172887802124, "learning_rate": 2.813455657492355e-06, "loss": 0.4656, "step": 368 }, { "epoch": 0.016933596438896793, "grad_norm": 0.4598466157913208, "learning_rate": 2.8211009174311925e-06, "loss": 0.3899, "step": 369 }, { "epoch": 0.016979486944151256, "grad_norm": 0.5077979564666748, "learning_rate": 2.828746177370031e-06, "loss": 0.3977, "step": 370 }, { "epoch": 0.01702537744940572, "grad_norm": 0.45774930715560913, "learning_rate": 2.8363914373088687e-06, "loss": 0.3997, "step": 371 }, { "epoch": 0.017071267954660182, "grad_norm": 0.43321484327316284, "learning_rate": 2.844036697247707e-06, "loss": 0.3554, "step": 372 }, { "epoch": 0.017117158459914645, "grad_norm": 0.43385207653045654, "learning_rate": 2.8516819571865444e-06, "loss": 0.341, "step": 373 }, { "epoch": 0.017163048965169108, "grad_norm": 0.5218827128410339, "learning_rate": 2.8593272171253827e-06, "loss": 0.5407, "step": 374 }, { "epoch": 0.017208939470423568, "grad_norm": 0.5167555809020996, "learning_rate": 2.8669724770642206e-06, "loss": 0.4346, "step": 375 }, { "epoch": 0.01725482997567803, "grad_norm": 0.510474443435669, "learning_rate": 2.874617737003058e-06, "loss": 0.5492, "step": 376 }, { "epoch": 0.017300720480932494, "grad_norm": 0.4777190685272217, "learning_rate": 2.8822629969418963e-06, "loss": 0.4308, "step": 377 }, { "epoch": 0.017346610986186957, "grad_norm": 0.48560985922813416, "learning_rate": 2.8899082568807342e-06, "loss": 0.4206, "step": 378 }, { "epoch": 0.01739250149144142, "grad_norm": 0.4789966642856598, "learning_rate": 2.8975535168195725e-06, "loss": 0.4558, "step": 379 }, { "epoch": 0.017438391996695883, "grad_norm": 0.4180901348590851, "learning_rate": 2.90519877675841e-06, "loss": 0.3134, "step": 380 }, { "epoch": 0.017484282501950346, "grad_norm": 0.4317328631877899, "learning_rate": 2.912844036697248e-06, "loss": 0.3532, "step": 381 }, { "epoch": 0.01753017300720481, "grad_norm": 0.5050603151321411, "learning_rate": 2.920489296636086e-06, "loss": 0.5576, "step": 382 }, { "epoch": 0.017576063512459272, "grad_norm": 0.49497556686401367, "learning_rate": 2.9281345565749236e-06, "loss": 0.5347, "step": 383 }, { "epoch": 0.017621954017713735, "grad_norm": 0.5131199359893799, "learning_rate": 2.935779816513762e-06, "loss": 0.4114, "step": 384 }, { "epoch": 0.0176678445229682, "grad_norm": 0.4344429671764374, "learning_rate": 2.9434250764525997e-06, "loss": 0.3587, "step": 385 }, { "epoch": 0.01771373502822266, "grad_norm": 0.45279866456985474, "learning_rate": 2.9510703363914376e-06, "loss": 0.3789, "step": 386 }, { "epoch": 0.017759625533477125, "grad_norm": 0.5409918427467346, "learning_rate": 2.9587155963302755e-06, "loss": 0.3814, "step": 387 }, { "epoch": 0.017805516038731588, "grad_norm": 0.5497090220451355, "learning_rate": 2.9663608562691134e-06, "loss": 0.5602, "step": 388 }, { "epoch": 0.01785140654398605, "grad_norm": 0.47578442096710205, "learning_rate": 2.9740061162079512e-06, "loss": 0.4324, "step": 389 }, { "epoch": 0.01789729704924051, "grad_norm": 0.47323742508888245, "learning_rate": 2.981651376146789e-06, "loss": 0.4478, "step": 390 }, { "epoch": 0.017943187554494974, "grad_norm": 0.4371729791164398, "learning_rate": 2.9892966360856274e-06, "loss": 0.3612, "step": 391 }, { "epoch": 0.017989078059749437, "grad_norm": 0.6900670528411865, "learning_rate": 2.9969418960244653e-06, "loss": 0.6762, "step": 392 }, { "epoch": 0.0180349685650039, "grad_norm": 0.5032302141189575, "learning_rate": 3.0045871559633027e-06, "loss": 0.5237, "step": 393 }, { "epoch": 0.018080859070258363, "grad_norm": 0.45847564935684204, "learning_rate": 3.012232415902141e-06, "loss": 0.357, "step": 394 }, { "epoch": 0.018126749575512826, "grad_norm": 0.48517248034477234, "learning_rate": 3.019877675840979e-06, "loss": 0.4645, "step": 395 }, { "epoch": 0.01817264008076729, "grad_norm": 0.47780027985572815, "learning_rate": 3.0275229357798168e-06, "loss": 0.4434, "step": 396 }, { "epoch": 0.018218530586021752, "grad_norm": 0.455100417137146, "learning_rate": 3.0351681957186546e-06, "loss": 0.4364, "step": 397 }, { "epoch": 0.018264421091276215, "grad_norm": 0.43008488416671753, "learning_rate": 3.042813455657493e-06, "loss": 0.3327, "step": 398 }, { "epoch": 0.01831031159653068, "grad_norm": 0.5193257331848145, "learning_rate": 3.0504587155963304e-06, "loss": 0.4522, "step": 399 }, { "epoch": 0.01835620210178514, "grad_norm": 0.45401597023010254, "learning_rate": 3.0581039755351682e-06, "loss": 0.4047, "step": 400 }, { "epoch": 0.018402092607039604, "grad_norm": 0.43108049035072327, "learning_rate": 3.0657492354740065e-06, "loss": 0.3774, "step": 401 }, { "epoch": 0.018447983112294068, "grad_norm": 0.46105724573135376, "learning_rate": 3.073394495412844e-06, "loss": 0.3984, "step": 402 }, { "epoch": 0.01849387361754853, "grad_norm": 0.502966582775116, "learning_rate": 3.0810397553516823e-06, "loss": 0.4595, "step": 403 }, { "epoch": 0.018539764122802994, "grad_norm": 0.48311692476272583, "learning_rate": 3.08868501529052e-06, "loss": 0.4275, "step": 404 }, { "epoch": 0.018585654628057453, "grad_norm": 0.4644903838634491, "learning_rate": 3.0963302752293576e-06, "loss": 0.3781, "step": 405 }, { "epoch": 0.018631545133311916, "grad_norm": 0.5004028081893921, "learning_rate": 3.103975535168196e-06, "loss": 0.4339, "step": 406 }, { "epoch": 0.01867743563856638, "grad_norm": 0.4505075514316559, "learning_rate": 3.1116207951070338e-06, "loss": 0.3918, "step": 407 }, { "epoch": 0.018723326143820843, "grad_norm": 0.4375948905944824, "learning_rate": 3.119266055045872e-06, "loss": 0.362, "step": 408 }, { "epoch": 0.018769216649075306, "grad_norm": 0.4854259490966797, "learning_rate": 3.1269113149847095e-06, "loss": 0.4676, "step": 409 }, { "epoch": 0.01881510715432977, "grad_norm": 0.4625663161277771, "learning_rate": 3.134556574923548e-06, "loss": 0.4561, "step": 410 }, { "epoch": 0.018860997659584232, "grad_norm": 0.46960076689720154, "learning_rate": 3.1422018348623857e-06, "loss": 0.4868, "step": 411 }, { "epoch": 0.018906888164838695, "grad_norm": 0.5641589164733887, "learning_rate": 3.149847094801223e-06, "loss": 0.4322, "step": 412 }, { "epoch": 0.018952778670093158, "grad_norm": 0.5606700778007507, "learning_rate": 3.1574923547400614e-06, "loss": 0.4834, "step": 413 }, { "epoch": 0.01899866917534762, "grad_norm": 0.576024055480957, "learning_rate": 3.1651376146788993e-06, "loss": 0.5227, "step": 414 }, { "epoch": 0.019044559680602084, "grad_norm": 0.4867952764034271, "learning_rate": 3.1727828746177376e-06, "loss": 0.4878, "step": 415 }, { "epoch": 0.019090450185856547, "grad_norm": 0.4988221228122711, "learning_rate": 3.180428134556575e-06, "loss": 0.4424, "step": 416 }, { "epoch": 0.01913634069111101, "grad_norm": 0.4742036461830139, "learning_rate": 3.188073394495413e-06, "loss": 0.4811, "step": 417 }, { "epoch": 0.019182231196365473, "grad_norm": 0.4699513614177704, "learning_rate": 3.195718654434251e-06, "loss": 0.4354, "step": 418 }, { "epoch": 0.019228121701619937, "grad_norm": 0.46931612491607666, "learning_rate": 3.2033639143730887e-06, "loss": 0.3925, "step": 419 }, { "epoch": 0.019274012206874396, "grad_norm": 0.4854743182659149, "learning_rate": 3.211009174311927e-06, "loss": 0.3773, "step": 420 }, { "epoch": 0.01931990271212886, "grad_norm": 0.5192313194274902, "learning_rate": 3.218654434250765e-06, "loss": 0.5547, "step": 421 }, { "epoch": 0.019365793217383322, "grad_norm": 0.47898295521736145, "learning_rate": 3.226299694189603e-06, "loss": 0.4937, "step": 422 }, { "epoch": 0.019411683722637785, "grad_norm": 0.46824750304222107, "learning_rate": 3.2339449541284406e-06, "loss": 0.4389, "step": 423 }, { "epoch": 0.01945757422789225, "grad_norm": 0.5154852867126465, "learning_rate": 3.2415902140672784e-06, "loss": 0.4951, "step": 424 }, { "epoch": 0.01950346473314671, "grad_norm": 0.4472064971923828, "learning_rate": 3.2492354740061167e-06, "loss": 0.3962, "step": 425 }, { "epoch": 0.019549355238401175, "grad_norm": 0.4869037866592407, "learning_rate": 3.256880733944954e-06, "loss": 0.448, "step": 426 }, { "epoch": 0.019595245743655638, "grad_norm": 0.436694473028183, "learning_rate": 3.2645259938837925e-06, "loss": 0.3469, "step": 427 }, { "epoch": 0.0196411362489101, "grad_norm": 0.5162097215652466, "learning_rate": 3.2721712538226303e-06, "loss": 0.4408, "step": 428 }, { "epoch": 0.019687026754164564, "grad_norm": 0.5034533143043518, "learning_rate": 3.279816513761468e-06, "loss": 0.4719, "step": 429 }, { "epoch": 0.019732917259419027, "grad_norm": 0.4772058427333832, "learning_rate": 3.287461773700306e-06, "loss": 0.417, "step": 430 }, { "epoch": 0.01977880776467349, "grad_norm": 0.5042054057121277, "learning_rate": 3.295107033639144e-06, "loss": 0.4666, "step": 431 }, { "epoch": 0.019824698269927953, "grad_norm": 0.4831267297267914, "learning_rate": 3.3027522935779823e-06, "loss": 0.4666, "step": 432 }, { "epoch": 0.019870588775182416, "grad_norm": 0.462446004152298, "learning_rate": 3.3103975535168197e-06, "loss": 0.3896, "step": 433 }, { "epoch": 0.019916479280436876, "grad_norm": 0.4908536374568939, "learning_rate": 3.318042813455658e-06, "loss": 0.5126, "step": 434 }, { "epoch": 0.01996236978569134, "grad_norm": 0.5101654529571533, "learning_rate": 3.325688073394496e-06, "loss": 0.5114, "step": 435 }, { "epoch": 0.020008260290945802, "grad_norm": 0.4652755856513977, "learning_rate": 3.3333333333333333e-06, "loss": 0.4303, "step": 436 }, { "epoch": 0.020054150796200265, "grad_norm": 0.4511263370513916, "learning_rate": 3.3409785932721716e-06, "loss": 0.4182, "step": 437 }, { "epoch": 0.02010004130145473, "grad_norm": 0.4430512487888336, "learning_rate": 3.3486238532110095e-06, "loss": 0.3437, "step": 438 }, { "epoch": 0.02014593180670919, "grad_norm": 0.5460695624351501, "learning_rate": 3.3562691131498478e-06, "loss": 0.5211, "step": 439 }, { "epoch": 0.020191822311963654, "grad_norm": 0.46814751625061035, "learning_rate": 3.3639143730886852e-06, "loss": 0.4979, "step": 440 }, { "epoch": 0.020237712817218118, "grad_norm": 0.451825886964798, "learning_rate": 3.371559633027523e-06, "loss": 0.358, "step": 441 }, { "epoch": 0.02028360332247258, "grad_norm": 0.48225319385528564, "learning_rate": 3.3792048929663614e-06, "loss": 0.4531, "step": 442 }, { "epoch": 0.020329493827727044, "grad_norm": 0.441541850566864, "learning_rate": 3.386850152905199e-06, "loss": 0.3809, "step": 443 }, { "epoch": 0.020375384332981507, "grad_norm": 0.9595709443092346, "learning_rate": 3.394495412844037e-06, "loss": 0.4303, "step": 444 }, { "epoch": 0.02042127483823597, "grad_norm": 0.6199988126754761, "learning_rate": 3.402140672782875e-06, "loss": 0.6407, "step": 445 }, { "epoch": 0.020467165343490433, "grad_norm": 0.5241382718086243, "learning_rate": 3.4097859327217125e-06, "loss": 0.5388, "step": 446 }, { "epoch": 0.020513055848744896, "grad_norm": 0.4802880883216858, "learning_rate": 3.4174311926605508e-06, "loss": 0.4601, "step": 447 }, { "epoch": 0.02055894635399936, "grad_norm": 0.462083101272583, "learning_rate": 3.4250764525993886e-06, "loss": 0.4682, "step": 448 }, { "epoch": 0.02060483685925382, "grad_norm": 0.43879276514053345, "learning_rate": 3.432721712538227e-06, "loss": 0.3861, "step": 449 }, { "epoch": 0.020650727364508282, "grad_norm": 0.5051079988479614, "learning_rate": 3.4403669724770644e-06, "loss": 0.5445, "step": 450 }, { "epoch": 0.020696617869762745, "grad_norm": 0.46442314982414246, "learning_rate": 3.4480122324159027e-06, "loss": 0.4028, "step": 451 }, { "epoch": 0.020742508375017208, "grad_norm": 0.44685596227645874, "learning_rate": 3.4556574923547405e-06, "loss": 0.3928, "step": 452 }, { "epoch": 0.02078839888027167, "grad_norm": 0.4670540988445282, "learning_rate": 3.463302752293578e-06, "loss": 0.3846, "step": 453 }, { "epoch": 0.020834289385526134, "grad_norm": 0.5065159797668457, "learning_rate": 3.4709480122324163e-06, "loss": 0.426, "step": 454 }, { "epoch": 0.020880179890780597, "grad_norm": 0.45759421586990356, "learning_rate": 3.478593272171254e-06, "loss": 0.3778, "step": 455 }, { "epoch": 0.02092607039603506, "grad_norm": 0.526719331741333, "learning_rate": 3.486238532110092e-06, "loss": 0.4252, "step": 456 }, { "epoch": 0.020971960901289523, "grad_norm": 0.4285305142402649, "learning_rate": 3.49388379204893e-06, "loss": 0.3334, "step": 457 }, { "epoch": 0.021017851406543987, "grad_norm": 0.5938563346862793, "learning_rate": 3.5015290519877678e-06, "loss": 0.4861, "step": 458 }, { "epoch": 0.02106374191179845, "grad_norm": 0.5004566311836243, "learning_rate": 3.5091743119266056e-06, "loss": 0.4418, "step": 459 }, { "epoch": 0.021109632417052913, "grad_norm": 0.497659832239151, "learning_rate": 3.5168195718654435e-06, "loss": 0.4686, "step": 460 }, { "epoch": 0.021155522922307376, "grad_norm": 0.4815540909767151, "learning_rate": 3.524464831804282e-06, "loss": 0.4217, "step": 461 }, { "epoch": 0.02120141342756184, "grad_norm": 0.43580955266952515, "learning_rate": 3.5321100917431193e-06, "loss": 0.295, "step": 462 }, { "epoch": 0.021247303932816302, "grad_norm": 0.46454644203186035, "learning_rate": 3.5397553516819576e-06, "loss": 0.4484, "step": 463 }, { "epoch": 0.02129319443807076, "grad_norm": 0.5093151926994324, "learning_rate": 3.5474006116207954e-06, "loss": 0.3842, "step": 464 }, { "epoch": 0.021339084943325225, "grad_norm": 0.4791293144226074, "learning_rate": 3.5550458715596333e-06, "loss": 0.4639, "step": 465 }, { "epoch": 0.021384975448579688, "grad_norm": 0.47072818875312805, "learning_rate": 3.562691131498471e-06, "loss": 0.4182, "step": 466 }, { "epoch": 0.02143086595383415, "grad_norm": 0.49291640520095825, "learning_rate": 3.570336391437309e-06, "loss": 0.4588, "step": 467 }, { "epoch": 0.021476756459088614, "grad_norm": 0.46959802508354187, "learning_rate": 3.5779816513761473e-06, "loss": 0.4093, "step": 468 }, { "epoch": 0.021522646964343077, "grad_norm": 0.4998926818370819, "learning_rate": 3.585626911314985e-06, "loss": 0.4976, "step": 469 }, { "epoch": 0.02156853746959754, "grad_norm": 0.43465107679367065, "learning_rate": 3.5932721712538227e-06, "loss": 0.3593, "step": 470 }, { "epoch": 0.021614427974852003, "grad_norm": 0.5242964029312134, "learning_rate": 3.600917431192661e-06, "loss": 0.5659, "step": 471 }, { "epoch": 0.021660318480106466, "grad_norm": 0.4925539493560791, "learning_rate": 3.6085626911314984e-06, "loss": 0.4773, "step": 472 }, { "epoch": 0.02170620898536093, "grad_norm": 0.44556525349617004, "learning_rate": 3.6162079510703367e-06, "loss": 0.3939, "step": 473 }, { "epoch": 0.021752099490615392, "grad_norm": 0.46809321641921997, "learning_rate": 3.6238532110091746e-06, "loss": 0.3871, "step": 474 }, { "epoch": 0.021797989995869856, "grad_norm": 0.5198566317558289, "learning_rate": 3.631498470948013e-06, "loss": 0.4929, "step": 475 }, { "epoch": 0.02184388050112432, "grad_norm": 0.4656904339790344, "learning_rate": 3.6391437308868503e-06, "loss": 0.3858, "step": 476 }, { "epoch": 0.021889771006378782, "grad_norm": 0.6396805047988892, "learning_rate": 3.646788990825688e-06, "loss": 0.5824, "step": 477 }, { "epoch": 0.02193566151163324, "grad_norm": 0.5012964606285095, "learning_rate": 3.6544342507645265e-06, "loss": 0.4931, "step": 478 }, { "epoch": 0.021981552016887704, "grad_norm": 0.5147992372512817, "learning_rate": 3.662079510703364e-06, "loss": 0.557, "step": 479 }, { "epoch": 0.022027442522142168, "grad_norm": 0.5051838159561157, "learning_rate": 3.6697247706422022e-06, "loss": 0.5182, "step": 480 }, { "epoch": 0.02207333302739663, "grad_norm": 0.4526509940624237, "learning_rate": 3.67737003058104e-06, "loss": 0.3788, "step": 481 }, { "epoch": 0.022119223532651094, "grad_norm": 0.4833812713623047, "learning_rate": 3.6850152905198775e-06, "loss": 0.4043, "step": 482 }, { "epoch": 0.022165114037905557, "grad_norm": 0.450137197971344, "learning_rate": 3.692660550458716e-06, "loss": 0.3674, "step": 483 }, { "epoch": 0.02221100454316002, "grad_norm": 0.472493976354599, "learning_rate": 3.7003058103975537e-06, "loss": 0.4229, "step": 484 }, { "epoch": 0.022256895048414483, "grad_norm": 0.460699200630188, "learning_rate": 3.707951070336392e-06, "loss": 0.386, "step": 485 }, { "epoch": 0.022302785553668946, "grad_norm": 0.43607982993125916, "learning_rate": 3.7155963302752295e-06, "loss": 0.3474, "step": 486 }, { "epoch": 0.02234867605892341, "grad_norm": 0.4386248290538788, "learning_rate": 3.7232415902140678e-06, "loss": 0.4147, "step": 487 }, { "epoch": 0.022394566564177872, "grad_norm": 0.4909244775772095, "learning_rate": 3.7308868501529056e-06, "loss": 0.5338, "step": 488 }, { "epoch": 0.022440457069432335, "grad_norm": 0.45259836316108704, "learning_rate": 3.738532110091743e-06, "loss": 0.3678, "step": 489 }, { "epoch": 0.0224863475746868, "grad_norm": 0.5172081589698792, "learning_rate": 3.7461773700305814e-06, "loss": 0.5185, "step": 490 }, { "epoch": 0.02253223807994126, "grad_norm": 0.45696645975112915, "learning_rate": 3.7538226299694192e-06, "loss": 0.4019, "step": 491 }, { "epoch": 0.022578128585195725, "grad_norm": 0.489422082901001, "learning_rate": 3.7614678899082575e-06, "loss": 0.4353, "step": 492 }, { "epoch": 0.022624019090450184, "grad_norm": 0.4883096516132355, "learning_rate": 3.769113149847095e-06, "loss": 0.4958, "step": 493 }, { "epoch": 0.022669909595704647, "grad_norm": 0.48254308104515076, "learning_rate": 3.776758409785933e-06, "loss": 0.4422, "step": 494 }, { "epoch": 0.02271580010095911, "grad_norm": 0.4923512041568756, "learning_rate": 3.784403669724771e-06, "loss": 0.4321, "step": 495 }, { "epoch": 0.022761690606213573, "grad_norm": 0.5353966355323792, "learning_rate": 3.7920489296636086e-06, "loss": 0.5279, "step": 496 }, { "epoch": 0.022807581111468037, "grad_norm": 0.49044910073280334, "learning_rate": 3.799694189602447e-06, "loss": 0.4661, "step": 497 }, { "epoch": 0.0228534716167225, "grad_norm": 0.44412946701049805, "learning_rate": 3.8073394495412848e-06, "loss": 0.359, "step": 498 }, { "epoch": 0.022899362121976963, "grad_norm": 0.5708341002464294, "learning_rate": 3.814984709480123e-06, "loss": 0.4945, "step": 499 }, { "epoch": 0.022945252627231426, "grad_norm": 0.45246854424476624, "learning_rate": 3.8226299694189605e-06, "loss": 0.4112, "step": 500 }, { "epoch": 0.02299114313248589, "grad_norm": 0.4905444085597992, "learning_rate": 3.830275229357798e-06, "loss": 0.4717, "step": 501 }, { "epoch": 0.023037033637740352, "grad_norm": 0.447233110666275, "learning_rate": 3.837920489296636e-06, "loss": 0.367, "step": 502 }, { "epoch": 0.023082924142994815, "grad_norm": 0.5000691413879395, "learning_rate": 3.8455657492354746e-06, "loss": 0.4311, "step": 503 }, { "epoch": 0.023128814648249278, "grad_norm": 0.4629581570625305, "learning_rate": 3.853211009174313e-06, "loss": 0.4196, "step": 504 }, { "epoch": 0.02317470515350374, "grad_norm": 0.42086851596832275, "learning_rate": 3.86085626911315e-06, "loss": 0.3129, "step": 505 }, { "epoch": 0.023220595658758204, "grad_norm": 1.1491636037826538, "learning_rate": 3.868501529051988e-06, "loss": 0.5884, "step": 506 }, { "epoch": 0.023266486164012667, "grad_norm": 0.4791019856929779, "learning_rate": 3.876146788990826e-06, "loss": 0.4413, "step": 507 }, { "epoch": 0.023312376669267127, "grad_norm": 0.5129106640815735, "learning_rate": 3.8837920489296635e-06, "loss": 0.5112, "step": 508 }, { "epoch": 0.02335826717452159, "grad_norm": 0.4540897309780121, "learning_rate": 3.891437308868502e-06, "loss": 0.4214, "step": 509 }, { "epoch": 0.023404157679776053, "grad_norm": 0.4633723795413971, "learning_rate": 3.89908256880734e-06, "loss": 0.4294, "step": 510 }, { "epoch": 0.023450048185030516, "grad_norm": 0.45631659030914307, "learning_rate": 3.906727828746178e-06, "loss": 0.355, "step": 511 }, { "epoch": 0.02349593869028498, "grad_norm": 0.7669097185134888, "learning_rate": 3.914373088685016e-06, "loss": 0.527, "step": 512 }, { "epoch": 0.023541829195539443, "grad_norm": 0.5041212439537048, "learning_rate": 3.922018348623853e-06, "loss": 0.4816, "step": 513 }, { "epoch": 0.023587719700793906, "grad_norm": 0.49600949883461, "learning_rate": 3.9296636085626916e-06, "loss": 0.5172, "step": 514 }, { "epoch": 0.02363361020604837, "grad_norm": 0.43761149048805237, "learning_rate": 3.937308868501529e-06, "loss": 0.3567, "step": 515 }, { "epoch": 0.023679500711302832, "grad_norm": 0.44402939081192017, "learning_rate": 3.944954128440367e-06, "loss": 0.4105, "step": 516 }, { "epoch": 0.023725391216557295, "grad_norm": 0.4629693925380707, "learning_rate": 3.952599388379206e-06, "loss": 0.3719, "step": 517 }, { "epoch": 0.023771281721811758, "grad_norm": 0.4541211426258087, "learning_rate": 3.960244648318043e-06, "loss": 0.4061, "step": 518 }, { "epoch": 0.02381717222706622, "grad_norm": 0.5160866379737854, "learning_rate": 3.967889908256881e-06, "loss": 0.4723, "step": 519 }, { "epoch": 0.023863062732320684, "grad_norm": 0.4838929772377014, "learning_rate": 3.975535168195719e-06, "loss": 0.4531, "step": 520 }, { "epoch": 0.023908953237575147, "grad_norm": 0.4735890030860901, "learning_rate": 3.983180428134557e-06, "loss": 0.461, "step": 521 }, { "epoch": 0.023954843742829607, "grad_norm": 0.46348604559898376, "learning_rate": 3.9908256880733945e-06, "loss": 0.4249, "step": 522 }, { "epoch": 0.02400073424808407, "grad_norm": 0.6115723848342896, "learning_rate": 3.998470948012233e-06, "loss": 0.6942, "step": 523 }, { "epoch": 0.024046624753338533, "grad_norm": 0.5280125737190247, "learning_rate": 4.00611620795107e-06, "loss": 0.5101, "step": 524 }, { "epoch": 0.024092515258592996, "grad_norm": 0.4911775290966034, "learning_rate": 4.013761467889909e-06, "loss": 0.403, "step": 525 }, { "epoch": 0.02413840576384746, "grad_norm": 0.5467319488525391, "learning_rate": 4.021406727828747e-06, "loss": 0.4585, "step": 526 }, { "epoch": 0.024184296269101922, "grad_norm": 0.4691978096961975, "learning_rate": 4.029051987767584e-06, "loss": 0.4003, "step": 527 }, { "epoch": 0.024230186774356385, "grad_norm": 0.523053765296936, "learning_rate": 4.036697247706423e-06, "loss": 0.5734, "step": 528 }, { "epoch": 0.02427607727961085, "grad_norm": 0.45209935307502747, "learning_rate": 4.04434250764526e-06, "loss": 0.3715, "step": 529 }, { "epoch": 0.02432196778486531, "grad_norm": 0.48233136534690857, "learning_rate": 4.051987767584098e-06, "loss": 0.4148, "step": 530 }, { "epoch": 0.024367858290119775, "grad_norm": 0.4820379316806793, "learning_rate": 4.059633027522936e-06, "loss": 0.3949, "step": 531 }, { "epoch": 0.024413748795374238, "grad_norm": 0.5382937788963318, "learning_rate": 4.067278287461774e-06, "loss": 0.5764, "step": 532 }, { "epoch": 0.0244596393006287, "grad_norm": 0.4763702154159546, "learning_rate": 4.074923547400612e-06, "loss": 0.4179, "step": 533 }, { "epoch": 0.024505529805883164, "grad_norm": 0.5012703537940979, "learning_rate": 4.08256880733945e-06, "loss": 0.4761, "step": 534 }, { "epoch": 0.024551420311137627, "grad_norm": 0.5251275300979614, "learning_rate": 4.090214067278288e-06, "loss": 0.4383, "step": 535 }, { "epoch": 0.02459731081639209, "grad_norm": 0.4684962332248688, "learning_rate": 4.097859327217126e-06, "loss": 0.401, "step": 536 }, { "epoch": 0.02464320132164655, "grad_norm": 0.5177205801010132, "learning_rate": 4.105504587155963e-06, "loss": 0.5061, "step": 537 }, { "epoch": 0.024689091826901013, "grad_norm": 0.4861942529678345, "learning_rate": 4.113149847094801e-06, "loss": 0.4672, "step": 538 }, { "epoch": 0.024734982332155476, "grad_norm": 0.5345531702041626, "learning_rate": 4.12079510703364e-06, "loss": 0.4102, "step": 539 }, { "epoch": 0.02478087283740994, "grad_norm": 0.506618857383728, "learning_rate": 4.128440366972478e-06, "loss": 0.4757, "step": 540 }, { "epoch": 0.024826763342664402, "grad_norm": 0.4715940058231354, "learning_rate": 4.136085626911315e-06, "loss": 0.3911, "step": 541 }, { "epoch": 0.024872653847918865, "grad_norm": 0.4640718400478363, "learning_rate": 4.143730886850153e-06, "loss": 0.3523, "step": 542 }, { "epoch": 0.024918544353173328, "grad_norm": 0.5020986199378967, "learning_rate": 4.151376146788991e-06, "loss": 0.4585, "step": 543 }, { "epoch": 0.02496443485842779, "grad_norm": 0.4291110634803772, "learning_rate": 4.1590214067278286e-06, "loss": 0.329, "step": 544 }, { "epoch": 0.025010325363682254, "grad_norm": 0.4956706762313843, "learning_rate": 4.166666666666667e-06, "loss": 0.4538, "step": 545 }, { "epoch": 0.025056215868936717, "grad_norm": 0.4470667839050293, "learning_rate": 4.174311926605505e-06, "loss": 0.3776, "step": 546 }, { "epoch": 0.02510210637419118, "grad_norm": 0.4709324538707733, "learning_rate": 4.181957186544343e-06, "loss": 0.4152, "step": 547 }, { "epoch": 0.025147996879445644, "grad_norm": 0.47205275297164917, "learning_rate": 4.189602446483181e-06, "loss": 0.3729, "step": 548 }, { "epoch": 0.025193887384700107, "grad_norm": 0.48534488677978516, "learning_rate": 4.197247706422018e-06, "loss": 0.4385, "step": 549 }, { "epoch": 0.02523977788995457, "grad_norm": 0.498604416847229, "learning_rate": 4.204892966360857e-06, "loss": 0.4402, "step": 550 }, { "epoch": 0.025285668395209033, "grad_norm": 0.49241143465042114, "learning_rate": 4.212538226299694e-06, "loss": 0.4474, "step": 551 }, { "epoch": 0.025331558900463493, "grad_norm": 0.4935033321380615, "learning_rate": 4.220183486238532e-06, "loss": 0.4455, "step": 552 }, { "epoch": 0.025377449405717956, "grad_norm": 0.5777140259742737, "learning_rate": 4.227828746177371e-06, "loss": 0.5521, "step": 553 }, { "epoch": 0.02542333991097242, "grad_norm": 0.4227786064147949, "learning_rate": 4.235474006116208e-06, "loss": 0.2976, "step": 554 }, { "epoch": 0.025469230416226882, "grad_norm": 0.41813260316848755, "learning_rate": 4.2431192660550464e-06, "loss": 0.2944, "step": 555 }, { "epoch": 0.025515120921481345, "grad_norm": 0.4449165165424347, "learning_rate": 4.250764525993884e-06, "loss": 0.3895, "step": 556 }, { "epoch": 0.025561011426735808, "grad_norm": 0.5028536915779114, "learning_rate": 4.258409785932722e-06, "loss": 0.4556, "step": 557 }, { "epoch": 0.02560690193199027, "grad_norm": 0.4760664999485016, "learning_rate": 4.26605504587156e-06, "loss": 0.433, "step": 558 }, { "epoch": 0.025652792437244734, "grad_norm": 0.4297028183937073, "learning_rate": 4.273700305810398e-06, "loss": 0.3351, "step": 559 }, { "epoch": 0.025698682942499197, "grad_norm": 0.48132580518722534, "learning_rate": 4.281345565749236e-06, "loss": 0.3972, "step": 560 }, { "epoch": 0.02574457344775366, "grad_norm": 0.46537891030311584, "learning_rate": 4.288990825688074e-06, "loss": 0.3974, "step": 561 }, { "epoch": 0.025790463953008123, "grad_norm": 0.43812325596809387, "learning_rate": 4.296636085626912e-06, "loss": 0.3828, "step": 562 }, { "epoch": 0.025836354458262586, "grad_norm": 0.4738851487636566, "learning_rate": 4.304281345565749e-06, "loss": 0.3836, "step": 563 }, { "epoch": 0.02588224496351705, "grad_norm": 0.5260022878646851, "learning_rate": 4.311926605504588e-06, "loss": 0.5371, "step": 564 }, { "epoch": 0.025928135468771513, "grad_norm": 0.4480191767215729, "learning_rate": 4.319571865443425e-06, "loss": 0.3949, "step": 565 }, { "epoch": 0.025974025974025976, "grad_norm": 0.5282284617424011, "learning_rate": 4.3272171253822634e-06, "loss": 0.4548, "step": 566 }, { "epoch": 0.026019916479280435, "grad_norm": 0.44939708709716797, "learning_rate": 4.334862385321102e-06, "loss": 0.3321, "step": 567 }, { "epoch": 0.0260658069845349, "grad_norm": 0.4996342062950134, "learning_rate": 4.342507645259939e-06, "loss": 0.4423, "step": 568 }, { "epoch": 0.02611169748978936, "grad_norm": 0.5004560947418213, "learning_rate": 4.3501529051987775e-06, "loss": 0.4239, "step": 569 }, { "epoch": 0.026157587995043825, "grad_norm": 0.5284087061882019, "learning_rate": 4.357798165137615e-06, "loss": 0.4846, "step": 570 }, { "epoch": 0.026203478500298288, "grad_norm": 0.4797920882701874, "learning_rate": 4.365443425076452e-06, "loss": 0.4571, "step": 571 }, { "epoch": 0.02624936900555275, "grad_norm": 0.4827975630760193, "learning_rate": 4.373088685015291e-06, "loss": 0.3913, "step": 572 }, { "epoch": 0.026295259510807214, "grad_norm": 0.4639168679714203, "learning_rate": 4.380733944954129e-06, "loss": 0.4207, "step": 573 }, { "epoch": 0.026341150016061677, "grad_norm": 0.44913971424102783, "learning_rate": 4.388379204892967e-06, "loss": 0.3854, "step": 574 }, { "epoch": 0.02638704052131614, "grad_norm": 0.44848018884658813, "learning_rate": 4.396024464831805e-06, "loss": 0.3791, "step": 575 }, { "epoch": 0.026432931026570603, "grad_norm": 0.4293183386325836, "learning_rate": 4.403669724770643e-06, "loss": 0.3616, "step": 576 }, { "epoch": 0.026478821531825066, "grad_norm": 0.47283944487571716, "learning_rate": 4.4113149847094805e-06, "loss": 0.4652, "step": 577 }, { "epoch": 0.02652471203707953, "grad_norm": 0.5692253112792969, "learning_rate": 4.418960244648318e-06, "loss": 0.5863, "step": 578 }, { "epoch": 0.026570602542333992, "grad_norm": 0.504953145980835, "learning_rate": 4.426605504587156e-06, "loss": 0.5659, "step": 579 }, { "epoch": 0.026616493047588455, "grad_norm": 0.510410726070404, "learning_rate": 4.4342507645259945e-06, "loss": 0.4835, "step": 580 }, { "epoch": 0.026662383552842915, "grad_norm": 0.5205755829811096, "learning_rate": 4.441896024464832e-06, "loss": 0.5909, "step": 581 }, { "epoch": 0.026708274058097378, "grad_norm": 0.4615248441696167, "learning_rate": 4.44954128440367e-06, "loss": 0.4298, "step": 582 }, { "epoch": 0.02675416456335184, "grad_norm": 0.48772287368774414, "learning_rate": 4.457186544342508e-06, "loss": 0.4506, "step": 583 }, { "epoch": 0.026800055068606304, "grad_norm": 0.4559618830680847, "learning_rate": 4.464831804281346e-06, "loss": 0.4039, "step": 584 }, { "epoch": 0.026845945573860767, "grad_norm": 0.4673352539539337, "learning_rate": 4.4724770642201834e-06, "loss": 0.4271, "step": 585 }, { "epoch": 0.02689183607911523, "grad_norm": 0.46606773138046265, "learning_rate": 4.480122324159022e-06, "loss": 0.4269, "step": 586 }, { "epoch": 0.026937726584369694, "grad_norm": 0.4551379382610321, "learning_rate": 4.48776758409786e-06, "loss": 0.3897, "step": 587 }, { "epoch": 0.026983617089624157, "grad_norm": 0.4167824983596802, "learning_rate": 4.4954128440366975e-06, "loss": 0.3339, "step": 588 }, { "epoch": 0.02702950759487862, "grad_norm": 0.4326254427433014, "learning_rate": 4.503058103975536e-06, "loss": 0.335, "step": 589 }, { "epoch": 0.027075398100133083, "grad_norm": 0.4331264793872833, "learning_rate": 4.510703363914373e-06, "loss": 0.3609, "step": 590 }, { "epoch": 0.027121288605387546, "grad_norm": 0.5396191477775574, "learning_rate": 4.5183486238532115e-06, "loss": 0.4927, "step": 591 }, { "epoch": 0.02716717911064201, "grad_norm": 0.4724072217941284, "learning_rate": 4.525993883792049e-06, "loss": 0.4395, "step": 592 }, { "epoch": 0.027213069615896472, "grad_norm": 0.43360739946365356, "learning_rate": 4.533639143730887e-06, "loss": 0.3339, "step": 593 }, { "epoch": 0.027258960121150935, "grad_norm": 0.5597114562988281, "learning_rate": 4.541284403669725e-06, "loss": 0.5304, "step": 594 }, { "epoch": 0.0273048506264054, "grad_norm": 0.45619672536849976, "learning_rate": 4.548929663608563e-06, "loss": 0.3814, "step": 595 }, { "epoch": 0.027350741131659858, "grad_norm": 0.4712444543838501, "learning_rate": 4.556574923547401e-06, "loss": 0.408, "step": 596 }, { "epoch": 0.02739663163691432, "grad_norm": 0.4540334641933441, "learning_rate": 4.564220183486239e-06, "loss": 0.3752, "step": 597 }, { "epoch": 0.027442522142168784, "grad_norm": 0.4880044460296631, "learning_rate": 4.571865443425077e-06, "loss": 0.4574, "step": 598 }, { "epoch": 0.027488412647423247, "grad_norm": 0.444128155708313, "learning_rate": 4.5795107033639145e-06, "loss": 0.3243, "step": 599 }, { "epoch": 0.02753430315267771, "grad_norm": 0.43998053669929504, "learning_rate": 4.587155963302753e-06, "loss": 0.3342, "step": 600 }, { "epoch": 0.027580193657932173, "grad_norm": 0.5074039101600647, "learning_rate": 4.59480122324159e-06, "loss": 0.4704, "step": 601 }, { "epoch": 0.027626084163186636, "grad_norm": 0.5305527448654175, "learning_rate": 4.6024464831804285e-06, "loss": 0.4954, "step": 602 }, { "epoch": 0.0276719746684411, "grad_norm": 0.5054502487182617, "learning_rate": 4.610091743119267e-06, "loss": 0.422, "step": 603 }, { "epoch": 0.027717865173695563, "grad_norm": 0.5841851234436035, "learning_rate": 4.617737003058104e-06, "loss": 0.4957, "step": 604 }, { "epoch": 0.027763755678950026, "grad_norm": 0.4930340349674225, "learning_rate": 4.6253822629969426e-06, "loss": 0.3941, "step": 605 }, { "epoch": 0.02780964618420449, "grad_norm": 0.5147585868835449, "learning_rate": 4.63302752293578e-06, "loss": 0.4628, "step": 606 }, { "epoch": 0.027855536689458952, "grad_norm": 0.5381789207458496, "learning_rate": 4.6406727828746175e-06, "loss": 0.4589, "step": 607 }, { "epoch": 0.027901427194713415, "grad_norm": 0.4610876739025116, "learning_rate": 4.648318042813456e-06, "loss": 0.317, "step": 608 }, { "epoch": 0.027947317699967878, "grad_norm": 0.5606909990310669, "learning_rate": 4.655963302752294e-06, "loss": 0.4998, "step": 609 }, { "epoch": 0.02799320820522234, "grad_norm": 0.4477992057800293, "learning_rate": 4.663608562691132e-06, "loss": 0.3569, "step": 610 }, { "epoch": 0.0280390987104768, "grad_norm": 0.4859997034072876, "learning_rate": 4.67125382262997e-06, "loss": 0.4474, "step": 611 }, { "epoch": 0.028084989215731264, "grad_norm": 0.5378237962722778, "learning_rate": 4.678899082568808e-06, "loss": 0.5315, "step": 612 }, { "epoch": 0.028130879720985727, "grad_norm": 0.48265835642814636, "learning_rate": 4.6865443425076455e-06, "loss": 0.4979, "step": 613 }, { "epoch": 0.02817677022624019, "grad_norm": 0.4947357773780823, "learning_rate": 4.694189602446483e-06, "loss": 0.5025, "step": 614 }, { "epoch": 0.028222660731494653, "grad_norm": 0.5121883153915405, "learning_rate": 4.701834862385321e-06, "loss": 0.5109, "step": 615 }, { "epoch": 0.028268551236749116, "grad_norm": 0.5024587512016296, "learning_rate": 4.70948012232416e-06, "loss": 0.444, "step": 616 }, { "epoch": 0.02831444174200358, "grad_norm": 0.48505645990371704, "learning_rate": 4.717125382262998e-06, "loss": 0.4286, "step": 617 }, { "epoch": 0.028360332247258042, "grad_norm": 0.5032019019126892, "learning_rate": 4.724770642201835e-06, "loss": 0.5055, "step": 618 }, { "epoch": 0.028406222752512506, "grad_norm": 0.5626063942909241, "learning_rate": 4.732415902140673e-06, "loss": 0.534, "step": 619 }, { "epoch": 0.02845211325776697, "grad_norm": 0.47117555141448975, "learning_rate": 4.740061162079511e-06, "loss": 0.3794, "step": 620 }, { "epoch": 0.02849800376302143, "grad_norm": 0.5280769467353821, "learning_rate": 4.7477064220183485e-06, "loss": 0.5664, "step": 621 }, { "epoch": 0.028543894268275895, "grad_norm": 0.47849786281585693, "learning_rate": 4.755351681957187e-06, "loss": 0.3908, "step": 622 }, { "epoch": 0.028589784773530358, "grad_norm": 0.4524434506893158, "learning_rate": 4.762996941896025e-06, "loss": 0.3959, "step": 623 }, { "epoch": 0.02863567527878482, "grad_norm": 0.5147923231124878, "learning_rate": 4.770642201834863e-06, "loss": 0.4882, "step": 624 }, { "epoch": 0.02868156578403928, "grad_norm": 0.47584009170532227, "learning_rate": 4.778287461773701e-06, "loss": 0.4213, "step": 625 }, { "epoch": 0.028727456289293744, "grad_norm": 0.47318753600120544, "learning_rate": 4.785932721712538e-06, "loss": 0.4043, "step": 626 }, { "epoch": 0.028773346794548207, "grad_norm": 0.442760169506073, "learning_rate": 4.793577981651377e-06, "loss": 0.3256, "step": 627 }, { "epoch": 0.02881923729980267, "grad_norm": 0.4752501845359802, "learning_rate": 4.801223241590214e-06, "loss": 0.3863, "step": 628 }, { "epoch": 0.028865127805057133, "grad_norm": 0.4469601809978485, "learning_rate": 4.808868501529052e-06, "loss": 0.3454, "step": 629 }, { "epoch": 0.028911018310311596, "grad_norm": 0.5374414920806885, "learning_rate": 4.816513761467891e-06, "loss": 0.5175, "step": 630 }, { "epoch": 0.02895690881556606, "grad_norm": 0.44401299953460693, "learning_rate": 4.824159021406728e-06, "loss": 0.375, "step": 631 }, { "epoch": 0.029002799320820522, "grad_norm": 0.5041106939315796, "learning_rate": 4.831804281345566e-06, "loss": 0.4823, "step": 632 }, { "epoch": 0.029048689826074985, "grad_norm": 0.5558406114578247, "learning_rate": 4.839449541284404e-06, "loss": 0.6037, "step": 633 }, { "epoch": 0.02909458033132945, "grad_norm": 0.5479883551597595, "learning_rate": 4.847094801223242e-06, "loss": 0.4702, "step": 634 }, { "epoch": 0.02914047083658391, "grad_norm": 0.47405314445495605, "learning_rate": 4.8547400611620796e-06, "loss": 0.3948, "step": 635 }, { "epoch": 0.029186361341838375, "grad_norm": 0.4791420102119446, "learning_rate": 4.862385321100918e-06, "loss": 0.3764, "step": 636 }, { "epoch": 0.029232251847092838, "grad_norm": 0.47440847754478455, "learning_rate": 4.870030581039756e-06, "loss": 0.3936, "step": 637 }, { "epoch": 0.0292781423523473, "grad_norm": 0.47720280289649963, "learning_rate": 4.877675840978594e-06, "loss": 0.4285, "step": 638 }, { "epoch": 0.029324032857601764, "grad_norm": 0.48758217692375183, "learning_rate": 4.885321100917432e-06, "loss": 0.4051, "step": 639 }, { "epoch": 0.029369923362856223, "grad_norm": 0.5307623744010925, "learning_rate": 4.892966360856269e-06, "loss": 0.4783, "step": 640 }, { "epoch": 0.029415813868110686, "grad_norm": 0.47263261675834656, "learning_rate": 4.900611620795108e-06, "loss": 0.4024, "step": 641 }, { "epoch": 0.02946170437336515, "grad_norm": 0.49153900146484375, "learning_rate": 4.908256880733945e-06, "loss": 0.4511, "step": 642 }, { "epoch": 0.029507594878619613, "grad_norm": 0.4701794385910034, "learning_rate": 4.915902140672783e-06, "loss": 0.3819, "step": 643 }, { "epoch": 0.029553485383874076, "grad_norm": 0.47918131947517395, "learning_rate": 4.923547400611622e-06, "loss": 0.3981, "step": 644 }, { "epoch": 0.02959937588912854, "grad_norm": 0.51798015832901, "learning_rate": 4.931192660550459e-06, "loss": 0.4983, "step": 645 }, { "epoch": 0.029645266394383002, "grad_norm": 0.4869624078273773, "learning_rate": 4.9388379204892974e-06, "loss": 0.4772, "step": 646 }, { "epoch": 0.029691156899637465, "grad_norm": 0.4917526841163635, "learning_rate": 4.946483180428135e-06, "loss": 0.427, "step": 647 }, { "epoch": 0.029737047404891928, "grad_norm": 0.5127080678939819, "learning_rate": 4.954128440366973e-06, "loss": 0.434, "step": 648 }, { "epoch": 0.02978293791014639, "grad_norm": 0.4668504297733307, "learning_rate": 4.961773700305811e-06, "loss": 0.4045, "step": 649 }, { "epoch": 0.029828828415400854, "grad_norm": 0.44504514336586, "learning_rate": 4.969418960244649e-06, "loss": 0.3242, "step": 650 }, { "epoch": 0.029874718920655317, "grad_norm": 0.4559177756309509, "learning_rate": 4.977064220183486e-06, "loss": 0.3539, "step": 651 }, { "epoch": 0.02992060942590978, "grad_norm": 0.4884553551673889, "learning_rate": 4.984709480122325e-06, "loss": 0.4261, "step": 652 }, { "epoch": 0.029966499931164244, "grad_norm": 0.5162047743797302, "learning_rate": 4.992354740061163e-06, "loss": 0.4672, "step": 653 }, { "epoch": 0.030012390436418707, "grad_norm": 0.46461427211761475, "learning_rate": 5e-06, "loss": 0.3678, "step": 654 }, { "epoch": 0.030058280941673166, "grad_norm": 0.44757315516471863, "learning_rate": 5.007645259938838e-06, "loss": 0.3639, "step": 655 }, { "epoch": 0.03010417144692763, "grad_norm": 0.45951172709465027, "learning_rate": 5.015290519877676e-06, "loss": 0.3985, "step": 656 }, { "epoch": 0.030150061952182092, "grad_norm": 0.47510138154029846, "learning_rate": 5.0229357798165144e-06, "loss": 0.3896, "step": 657 }, { "epoch": 0.030195952457436556, "grad_norm": 0.5010815858840942, "learning_rate": 5.030581039755352e-06, "loss": 0.4675, "step": 658 }, { "epoch": 0.03024184296269102, "grad_norm": 0.49433425068855286, "learning_rate": 5.03822629969419e-06, "loss": 0.3865, "step": 659 }, { "epoch": 0.03028773346794548, "grad_norm": 0.4810725450515747, "learning_rate": 5.045871559633028e-06, "loss": 0.3971, "step": 660 }, { "epoch": 0.030333623973199945, "grad_norm": 0.5787946581840515, "learning_rate": 5.053516819571865e-06, "loss": 0.5114, "step": 661 }, { "epoch": 0.030379514478454408, "grad_norm": 0.4574304521083832, "learning_rate": 5.061162079510704e-06, "loss": 0.3848, "step": 662 }, { "epoch": 0.03042540498370887, "grad_norm": 0.48120027780532837, "learning_rate": 5.068807339449542e-06, "loss": 0.4127, "step": 663 }, { "epoch": 0.030471295488963334, "grad_norm": 0.4440484046936035, "learning_rate": 5.076452599388379e-06, "loss": 0.3293, "step": 664 }, { "epoch": 0.030517185994217797, "grad_norm": 0.44411030411720276, "learning_rate": 5.084097859327217e-06, "loss": 0.306, "step": 665 }, { "epoch": 0.03056307649947226, "grad_norm": 0.5139580965042114, "learning_rate": 5.091743119266055e-06, "loss": 0.4569, "step": 666 }, { "epoch": 0.030608967004726723, "grad_norm": 0.5118184089660645, "learning_rate": 5.099388379204894e-06, "loss": 0.4527, "step": 667 }, { "epoch": 0.030654857509981186, "grad_norm": 0.504111647605896, "learning_rate": 5.1070336391437315e-06, "loss": 0.4701, "step": 668 }, { "epoch": 0.03070074801523565, "grad_norm": 0.45596060156822205, "learning_rate": 5.114678899082569e-06, "loss": 0.2945, "step": 669 }, { "epoch": 0.03074663852049011, "grad_norm": 0.5010163187980652, "learning_rate": 5.122324159021407e-06, "loss": 0.3896, "step": 670 }, { "epoch": 0.030792529025744572, "grad_norm": 0.47708919644355774, "learning_rate": 5.1299694189602455e-06, "loss": 0.3956, "step": 671 }, { "epoch": 0.030838419530999035, "grad_norm": 0.47731465101242065, "learning_rate": 5.137614678899083e-06, "loss": 0.4272, "step": 672 }, { "epoch": 0.0308843100362535, "grad_norm": 0.5047810673713684, "learning_rate": 5.145259938837921e-06, "loss": 0.4789, "step": 673 }, { "epoch": 0.03093020054150796, "grad_norm": 0.4855414032936096, "learning_rate": 5.152905198776759e-06, "loss": 0.402, "step": 674 }, { "epoch": 0.030976091046762425, "grad_norm": 0.48568686842918396, "learning_rate": 5.160550458715596e-06, "loss": 0.4531, "step": 675 }, { "epoch": 0.031021981552016888, "grad_norm": 0.4904922842979431, "learning_rate": 5.168195718654435e-06, "loss": 0.4363, "step": 676 }, { "epoch": 0.03106787205727135, "grad_norm": 0.45971164107322693, "learning_rate": 5.175840978593273e-06, "loss": 0.3517, "step": 677 }, { "epoch": 0.031113762562525814, "grad_norm": 0.4890196621417999, "learning_rate": 5.18348623853211e-06, "loss": 0.3709, "step": 678 }, { "epoch": 0.031159653067780277, "grad_norm": 0.4717274010181427, "learning_rate": 5.1911314984709485e-06, "loss": 0.4312, "step": 679 }, { "epoch": 0.03120554357303474, "grad_norm": 0.45426157116889954, "learning_rate": 5.198776758409786e-06, "loss": 0.4003, "step": 680 }, { "epoch": 0.0312514340782892, "grad_norm": 0.4568299949169159, "learning_rate": 5.206422018348625e-06, "loss": 0.398, "step": 681 }, { "epoch": 0.031297324583543666, "grad_norm": 0.4921552538871765, "learning_rate": 5.2140672782874625e-06, "loss": 0.4375, "step": 682 }, { "epoch": 0.03134321508879813, "grad_norm": 0.511883556842804, "learning_rate": 5.2217125382263e-06, "loss": 0.4425, "step": 683 }, { "epoch": 0.03138910559405259, "grad_norm": 0.47729989886283875, "learning_rate": 5.229357798165137e-06, "loss": 0.4291, "step": 684 }, { "epoch": 0.031434996099307055, "grad_norm": 0.5338758826255798, "learning_rate": 5.237003058103976e-06, "loss": 0.4765, "step": 685 }, { "epoch": 0.03148088660456152, "grad_norm": 0.5013654828071594, "learning_rate": 5.244648318042814e-06, "loss": 0.4556, "step": 686 }, { "epoch": 0.03152677710981598, "grad_norm": 0.48483702540397644, "learning_rate": 5.252293577981652e-06, "loss": 0.389, "step": 687 }, { "epoch": 0.031572667615070445, "grad_norm": 0.4565930962562561, "learning_rate": 5.25993883792049e-06, "loss": 0.3939, "step": 688 }, { "epoch": 0.03161855812032491, "grad_norm": 0.4636811912059784, "learning_rate": 5.267584097859327e-06, "loss": 0.3682, "step": 689 }, { "epoch": 0.03166444862557937, "grad_norm": 0.44330793619155884, "learning_rate": 5.275229357798165e-06, "loss": 0.3359, "step": 690 }, { "epoch": 0.031710339130833834, "grad_norm": 0.47419053316116333, "learning_rate": 5.282874617737004e-06, "loss": 0.4596, "step": 691 }, { "epoch": 0.03175622963608829, "grad_norm": 0.41946354508399963, "learning_rate": 5.290519877675841e-06, "loss": 0.3305, "step": 692 }, { "epoch": 0.03180212014134275, "grad_norm": 0.510080099105835, "learning_rate": 5.2981651376146795e-06, "loss": 0.4226, "step": 693 }, { "epoch": 0.031848010646597216, "grad_norm": 0.4508179724216461, "learning_rate": 5.305810397553517e-06, "loss": 0.4033, "step": 694 }, { "epoch": 0.03189390115185168, "grad_norm": 0.47535669803619385, "learning_rate": 5.313455657492356e-06, "loss": 0.4062, "step": 695 }, { "epoch": 0.03193979165710614, "grad_norm": 0.4734593629837036, "learning_rate": 5.3211009174311936e-06, "loss": 0.3946, "step": 696 }, { "epoch": 0.031985682162360606, "grad_norm": 0.41853395104408264, "learning_rate": 5.328746177370031e-06, "loss": 0.2861, "step": 697 }, { "epoch": 0.03203157266761507, "grad_norm": 0.5116884708404541, "learning_rate": 5.3363914373088685e-06, "loss": 0.4876, "step": 698 }, { "epoch": 0.03207746317286953, "grad_norm": 0.43127205967903137, "learning_rate": 5.344036697247707e-06, "loss": 0.3202, "step": 699 }, { "epoch": 0.032123353678123995, "grad_norm": 0.45206716656684875, "learning_rate": 5.351681957186545e-06, "loss": 0.3521, "step": 700 }, { "epoch": 0.03216924418337846, "grad_norm": 0.49567705392837524, "learning_rate": 5.359327217125383e-06, "loss": 0.4851, "step": 701 }, { "epoch": 0.03221513468863292, "grad_norm": 0.4594910740852356, "learning_rate": 5.366972477064221e-06, "loss": 0.3726, "step": 702 }, { "epoch": 0.032261025193887384, "grad_norm": 0.4929613769054413, "learning_rate": 5.374617737003058e-06, "loss": 0.402, "step": 703 }, { "epoch": 0.03230691569914185, "grad_norm": 0.5152514576911926, "learning_rate": 5.382262996941896e-06, "loss": 0.4337, "step": 704 }, { "epoch": 0.03235280620439631, "grad_norm": 0.4473576545715332, "learning_rate": 5.389908256880735e-06, "loss": 0.3202, "step": 705 }, { "epoch": 0.03239869670965077, "grad_norm": 0.4397395849227905, "learning_rate": 5.397553516819572e-06, "loss": 0.3618, "step": 706 }, { "epoch": 0.032444587214905236, "grad_norm": 0.4591197073459625, "learning_rate": 5.405198776758411e-06, "loss": 0.3665, "step": 707 }, { "epoch": 0.0324904777201597, "grad_norm": 0.4718942940235138, "learning_rate": 5.412844036697248e-06, "loss": 0.4327, "step": 708 }, { "epoch": 0.03253636822541416, "grad_norm": 0.4826303720474243, "learning_rate": 5.4204892966360855e-06, "loss": 0.4296, "step": 709 }, { "epoch": 0.032582258730668626, "grad_norm": 0.48603740334510803, "learning_rate": 5.428134556574925e-06, "loss": 0.4174, "step": 710 }, { "epoch": 0.03262814923592309, "grad_norm": 0.4769875109195709, "learning_rate": 5.435779816513762e-06, "loss": 0.4157, "step": 711 }, { "epoch": 0.03267403974117755, "grad_norm": 0.5343371629714966, "learning_rate": 5.4434250764525995e-06, "loss": 0.5321, "step": 712 }, { "epoch": 0.032719930246432015, "grad_norm": 0.5503456592559814, "learning_rate": 5.451070336391438e-06, "loss": 0.5236, "step": 713 }, { "epoch": 0.03276582075168648, "grad_norm": 0.4854121804237366, "learning_rate": 5.458715596330275e-06, "loss": 0.4325, "step": 714 }, { "epoch": 0.03281171125694094, "grad_norm": 0.43479588627815247, "learning_rate": 5.4663608562691136e-06, "loss": 0.3029, "step": 715 }, { "epoch": 0.032857601762195404, "grad_norm": 0.5901136994361877, "learning_rate": 5.474006116207952e-06, "loss": 0.5026, "step": 716 }, { "epoch": 0.03290349226744987, "grad_norm": 0.502284049987793, "learning_rate": 5.481651376146789e-06, "loss": 0.4487, "step": 717 }, { "epoch": 0.03294938277270433, "grad_norm": 0.5088910460472107, "learning_rate": 5.489296636085627e-06, "loss": 0.5032, "step": 718 }, { "epoch": 0.03299527327795879, "grad_norm": 0.4692699611186981, "learning_rate": 5.496941896024466e-06, "loss": 0.4325, "step": 719 }, { "epoch": 0.03304116378321326, "grad_norm": 0.4397878348827362, "learning_rate": 5.504587155963303e-06, "loss": 0.3201, "step": 720 }, { "epoch": 0.03308705428846771, "grad_norm": 0.4541064500808716, "learning_rate": 5.512232415902141e-06, "loss": 0.3928, "step": 721 }, { "epoch": 0.033132944793722176, "grad_norm": 0.4665283262729645, "learning_rate": 5.519877675840979e-06, "loss": 0.3678, "step": 722 }, { "epoch": 0.03317883529897664, "grad_norm": 0.5041980147361755, "learning_rate": 5.5275229357798165e-06, "loss": 0.5136, "step": 723 }, { "epoch": 0.0332247258042311, "grad_norm": 0.4926072061061859, "learning_rate": 5.535168195718656e-06, "loss": 0.417, "step": 724 }, { "epoch": 0.033270616309485565, "grad_norm": 0.4644889235496521, "learning_rate": 5.542813455657493e-06, "loss": 0.4224, "step": 725 }, { "epoch": 0.03331650681474003, "grad_norm": 0.5296972393989563, "learning_rate": 5.5504587155963306e-06, "loss": 0.4644, "step": 726 }, { "epoch": 0.03336239731999449, "grad_norm": 0.47378745675086975, "learning_rate": 5.558103975535169e-06, "loss": 0.3869, "step": 727 }, { "epoch": 0.033408287825248954, "grad_norm": 0.4840254485607147, "learning_rate": 5.565749235474006e-06, "loss": 0.4706, "step": 728 }, { "epoch": 0.03345417833050342, "grad_norm": 0.4293925166130066, "learning_rate": 5.573394495412845e-06, "loss": 0.3216, "step": 729 }, { "epoch": 0.03350006883575788, "grad_norm": 0.4357646405696869, "learning_rate": 5.581039755351683e-06, "loss": 0.3394, "step": 730 }, { "epoch": 0.033545959341012344, "grad_norm": 0.5204983353614807, "learning_rate": 5.58868501529052e-06, "loss": 0.4744, "step": 731 }, { "epoch": 0.03359184984626681, "grad_norm": 0.4658423066139221, "learning_rate": 5.596330275229358e-06, "loss": 0.39, "step": 732 }, { "epoch": 0.03363774035152127, "grad_norm": 0.47554606199264526, "learning_rate": 5.603975535168196e-06, "loss": 0.3871, "step": 733 }, { "epoch": 0.03368363085677573, "grad_norm": 0.4657820463180542, "learning_rate": 5.611620795107034e-06, "loss": 0.378, "step": 734 }, { "epoch": 0.033729521362030196, "grad_norm": 0.46907272934913635, "learning_rate": 5.619266055045872e-06, "loss": 0.4027, "step": 735 }, { "epoch": 0.03377541186728466, "grad_norm": 0.4753458797931671, "learning_rate": 5.62691131498471e-06, "loss": 0.466, "step": 736 }, { "epoch": 0.03382130237253912, "grad_norm": 0.5024524927139282, "learning_rate": 5.634556574923548e-06, "loss": 0.4678, "step": 737 }, { "epoch": 0.033867192877793585, "grad_norm": 0.40047597885131836, "learning_rate": 5.642201834862385e-06, "loss": 0.2765, "step": 738 }, { "epoch": 0.03391308338304805, "grad_norm": 0.4640674293041229, "learning_rate": 5.649847094801224e-06, "loss": 0.4219, "step": 739 }, { "epoch": 0.03395897388830251, "grad_norm": 0.5290677547454834, "learning_rate": 5.657492354740062e-06, "loss": 0.4627, "step": 740 }, { "epoch": 0.034004864393556974, "grad_norm": 0.4202752709388733, "learning_rate": 5.665137614678899e-06, "loss": 0.3275, "step": 741 }, { "epoch": 0.03405075489881144, "grad_norm": 0.5070269107818604, "learning_rate": 5.672782874617737e-06, "loss": 0.5058, "step": 742 }, { "epoch": 0.0340966454040659, "grad_norm": 0.544164776802063, "learning_rate": 5.680428134556576e-06, "loss": 0.5202, "step": 743 }, { "epoch": 0.034142535909320364, "grad_norm": 0.4559119641780853, "learning_rate": 5.688073394495414e-06, "loss": 0.39, "step": 744 }, { "epoch": 0.03418842641457483, "grad_norm": 0.5321412682533264, "learning_rate": 5.695718654434251e-06, "loss": 0.5447, "step": 745 }, { "epoch": 0.03423431691982929, "grad_norm": 0.47550147771835327, "learning_rate": 5.703363914373089e-06, "loss": 0.4263, "step": 746 }, { "epoch": 0.03428020742508375, "grad_norm": 0.4727621078491211, "learning_rate": 5.711009174311926e-06, "loss": 0.3601, "step": 747 }, { "epoch": 0.034326097930338216, "grad_norm": 0.4610954821109772, "learning_rate": 5.7186544342507654e-06, "loss": 0.3784, "step": 748 }, { "epoch": 0.03437198843559268, "grad_norm": 0.48836562037467957, "learning_rate": 5.726299694189603e-06, "loss": 0.4206, "step": 749 }, { "epoch": 0.034417878940847135, "grad_norm": 0.507480800151825, "learning_rate": 5.733944954128441e-06, "loss": 0.4779, "step": 750 }, { "epoch": 0.0344637694461016, "grad_norm": 0.552983820438385, "learning_rate": 5.741590214067279e-06, "loss": 0.5358, "step": 751 }, { "epoch": 0.03450965995135606, "grad_norm": 0.4471736252307892, "learning_rate": 5.749235474006116e-06, "loss": 0.3624, "step": 752 }, { "epoch": 0.034555550456610525, "grad_norm": 0.4889674484729767, "learning_rate": 5.756880733944955e-06, "loss": 0.4329, "step": 753 }, { "epoch": 0.03460144096186499, "grad_norm": 0.505480945110321, "learning_rate": 5.764525993883793e-06, "loss": 0.4043, "step": 754 }, { "epoch": 0.03464733146711945, "grad_norm": 0.4882463812828064, "learning_rate": 5.77217125382263e-06, "loss": 0.4151, "step": 755 }, { "epoch": 0.034693221972373914, "grad_norm": 0.4720045328140259, "learning_rate": 5.7798165137614684e-06, "loss": 0.3869, "step": 756 }, { "epoch": 0.03473911247762838, "grad_norm": 0.5206193327903748, "learning_rate": 5.787461773700306e-06, "loss": 0.5136, "step": 757 }, { "epoch": 0.03478500298288284, "grad_norm": 0.46404632925987244, "learning_rate": 5.795107033639145e-06, "loss": 0.3856, "step": 758 }, { "epoch": 0.0348308934881373, "grad_norm": 0.4719366431236267, "learning_rate": 5.8027522935779825e-06, "loss": 0.3643, "step": 759 }, { "epoch": 0.034876783993391766, "grad_norm": 0.4552661180496216, "learning_rate": 5.81039755351682e-06, "loss": 0.328, "step": 760 }, { "epoch": 0.03492267449864623, "grad_norm": 0.503520667552948, "learning_rate": 5.818042813455657e-06, "loss": 0.4676, "step": 761 }, { "epoch": 0.03496856500390069, "grad_norm": 0.490029513835907, "learning_rate": 5.825688073394496e-06, "loss": 0.4581, "step": 762 }, { "epoch": 0.035014455509155155, "grad_norm": 0.42828088998794556, "learning_rate": 5.833333333333334e-06, "loss": 0.3307, "step": 763 }, { "epoch": 0.03506034601440962, "grad_norm": 0.5580511689186096, "learning_rate": 5.840978593272172e-06, "loss": 0.3878, "step": 764 }, { "epoch": 0.03510623651966408, "grad_norm": 0.4630361497402191, "learning_rate": 5.84862385321101e-06, "loss": 0.4286, "step": 765 }, { "epoch": 0.035152127024918545, "grad_norm": 0.4680016040802002, "learning_rate": 5.856269113149847e-06, "loss": 0.4324, "step": 766 }, { "epoch": 0.03519801753017301, "grad_norm": 0.4838612675666809, "learning_rate": 5.863914373088685e-06, "loss": 0.491, "step": 767 }, { "epoch": 0.03524390803542747, "grad_norm": 0.47573021054267883, "learning_rate": 5.871559633027524e-06, "loss": 0.4217, "step": 768 }, { "epoch": 0.035289798540681934, "grad_norm": 0.6087118983268738, "learning_rate": 5.879204892966361e-06, "loss": 0.4436, "step": 769 }, { "epoch": 0.0353356890459364, "grad_norm": 0.4900757968425751, "learning_rate": 5.8868501529051995e-06, "loss": 0.406, "step": 770 }, { "epoch": 0.03538157955119086, "grad_norm": 0.44781285524368286, "learning_rate": 5.894495412844037e-06, "loss": 0.377, "step": 771 }, { "epoch": 0.03542747005644532, "grad_norm": 0.4651029407978058, "learning_rate": 5.902140672782875e-06, "loss": 0.3313, "step": 772 }, { "epoch": 0.035473360561699786, "grad_norm": 0.46212464570999146, "learning_rate": 5.9097859327217135e-06, "loss": 0.3985, "step": 773 }, { "epoch": 0.03551925106695425, "grad_norm": 0.49656569957733154, "learning_rate": 5.917431192660551e-06, "loss": 0.4624, "step": 774 }, { "epoch": 0.03556514157220871, "grad_norm": 0.514012336730957, "learning_rate": 5.925076452599388e-06, "loss": 0.5059, "step": 775 }, { "epoch": 0.035611032077463176, "grad_norm": 0.48806434869766235, "learning_rate": 5.932721712538227e-06, "loss": 0.4592, "step": 776 }, { "epoch": 0.03565692258271764, "grad_norm": 0.4882490038871765, "learning_rate": 5.940366972477065e-06, "loss": 0.4285, "step": 777 }, { "epoch": 0.0357028130879721, "grad_norm": 0.46129822731018066, "learning_rate": 5.9480122324159025e-06, "loss": 0.3848, "step": 778 }, { "epoch": 0.035748703593226565, "grad_norm": 0.4811941385269165, "learning_rate": 5.955657492354741e-06, "loss": 0.4294, "step": 779 }, { "epoch": 0.03579459409848102, "grad_norm": 0.47969749569892883, "learning_rate": 5.963302752293578e-06, "loss": 0.4201, "step": 780 }, { "epoch": 0.035840484603735484, "grad_norm": 0.5206419229507446, "learning_rate": 5.970948012232416e-06, "loss": 0.4679, "step": 781 }, { "epoch": 0.03588637510898995, "grad_norm": 0.5180150270462036, "learning_rate": 5.978593272171255e-06, "loss": 0.5091, "step": 782 }, { "epoch": 0.03593226561424441, "grad_norm": 0.4841292202472687, "learning_rate": 5.986238532110092e-06, "loss": 0.4233, "step": 783 }, { "epoch": 0.03597815611949887, "grad_norm": 0.43384209275245667, "learning_rate": 5.9938837920489305e-06, "loss": 0.3337, "step": 784 }, { "epoch": 0.036024046624753336, "grad_norm": 0.45960533618927, "learning_rate": 6.001529051987768e-06, "loss": 0.3717, "step": 785 }, { "epoch": 0.0360699371300078, "grad_norm": 0.4859628677368164, "learning_rate": 6.0091743119266054e-06, "loss": 0.45, "step": 786 }, { "epoch": 0.03611582763526226, "grad_norm": 0.41695070266723633, "learning_rate": 6.0168195718654446e-06, "loss": 0.3128, "step": 787 }, { "epoch": 0.036161718140516726, "grad_norm": 0.4612447917461395, "learning_rate": 6.024464831804282e-06, "loss": 0.3968, "step": 788 }, { "epoch": 0.03620760864577119, "grad_norm": 0.5911262035369873, "learning_rate": 6.0321100917431195e-06, "loss": 0.5441, "step": 789 }, { "epoch": 0.03625349915102565, "grad_norm": 0.43984946608543396, "learning_rate": 6.039755351681958e-06, "loss": 0.3262, "step": 790 }, { "epoch": 0.036299389656280115, "grad_norm": 0.4176672101020813, "learning_rate": 6.047400611620795e-06, "loss": 0.2796, "step": 791 }, { "epoch": 0.03634528016153458, "grad_norm": 0.49719345569610596, "learning_rate": 6.0550458715596335e-06, "loss": 0.4723, "step": 792 }, { "epoch": 0.03639117066678904, "grad_norm": 0.4714670479297638, "learning_rate": 6.062691131498472e-06, "loss": 0.4089, "step": 793 }, { "epoch": 0.036437061172043504, "grad_norm": 0.5643766522407532, "learning_rate": 6.070336391437309e-06, "loss": 0.5232, "step": 794 }, { "epoch": 0.03648295167729797, "grad_norm": 0.5200351476669312, "learning_rate": 6.077981651376147e-06, "loss": 0.432, "step": 795 }, { "epoch": 0.03652884218255243, "grad_norm": 0.48379260301589966, "learning_rate": 6.085626911314986e-06, "loss": 0.4007, "step": 796 }, { "epoch": 0.03657473268780689, "grad_norm": 0.450309693813324, "learning_rate": 6.093272171253823e-06, "loss": 0.3764, "step": 797 }, { "epoch": 0.03662062319306136, "grad_norm": 0.4852868318557739, "learning_rate": 6.100917431192661e-06, "loss": 0.4812, "step": 798 }, { "epoch": 0.03666651369831582, "grad_norm": 0.48230257630348206, "learning_rate": 6.108562691131499e-06, "loss": 0.4429, "step": 799 }, { "epoch": 0.03671240420357028, "grad_norm": 0.4992111623287201, "learning_rate": 6.1162079510703365e-06, "loss": 0.4462, "step": 800 }, { "epoch": 0.036758294708824746, "grad_norm": 0.45000842213630676, "learning_rate": 6.123853211009176e-06, "loss": 0.3441, "step": 801 }, { "epoch": 0.03680418521407921, "grad_norm": 0.5243483185768127, "learning_rate": 6.131498470948013e-06, "loss": 0.4953, "step": 802 }, { "epoch": 0.03685007571933367, "grad_norm": 0.5090179443359375, "learning_rate": 6.1391437308868505e-06, "loss": 0.4669, "step": 803 }, { "epoch": 0.036895966224588135, "grad_norm": 0.465063214302063, "learning_rate": 6.146788990825688e-06, "loss": 0.3666, "step": 804 }, { "epoch": 0.0369418567298426, "grad_norm": 0.5193577408790588, "learning_rate": 6.154434250764526e-06, "loss": 0.4688, "step": 805 }, { "epoch": 0.03698774723509706, "grad_norm": 0.5085948705673218, "learning_rate": 6.1620795107033646e-06, "loss": 0.4257, "step": 806 }, { "epoch": 0.037033637740351524, "grad_norm": 0.44270625710487366, "learning_rate": 6.169724770642203e-06, "loss": 0.3476, "step": 807 }, { "epoch": 0.03707952824560599, "grad_norm": 0.4461106061935425, "learning_rate": 6.17737003058104e-06, "loss": 0.375, "step": 808 }, { "epoch": 0.037125418750860444, "grad_norm": 0.4571780264377594, "learning_rate": 6.185015290519878e-06, "loss": 0.3259, "step": 809 }, { "epoch": 0.03717130925611491, "grad_norm": 0.44006818532943726, "learning_rate": 6.192660550458715e-06, "loss": 0.3335, "step": 810 }, { "epoch": 0.03721719976136937, "grad_norm": 0.4510287046432495, "learning_rate": 6.200305810397554e-06, "loss": 0.3969, "step": 811 }, { "epoch": 0.03726309026662383, "grad_norm": 0.4678497612476349, "learning_rate": 6.207951070336392e-06, "loss": 0.4056, "step": 812 }, { "epoch": 0.037308980771878296, "grad_norm": 0.5567219257354736, "learning_rate": 6.21559633027523e-06, "loss": 0.4224, "step": 813 }, { "epoch": 0.03735487127713276, "grad_norm": 0.48783227801322937, "learning_rate": 6.2232415902140675e-06, "loss": 0.4979, "step": 814 }, { "epoch": 0.03740076178238722, "grad_norm": 0.4996022582054138, "learning_rate": 6.230886850152905e-06, "loss": 0.4312, "step": 815 }, { "epoch": 0.037446652287641685, "grad_norm": 0.648202657699585, "learning_rate": 6.238532110091744e-06, "loss": 0.6078, "step": 816 }, { "epoch": 0.03749254279289615, "grad_norm": 0.49291545152664185, "learning_rate": 6.2461773700305816e-06, "loss": 0.4469, "step": 817 }, { "epoch": 0.03753843329815061, "grad_norm": 0.45443353056907654, "learning_rate": 6.253822629969419e-06, "loss": 0.3117, "step": 818 }, { "epoch": 0.037584323803405074, "grad_norm": 0.4558636546134949, "learning_rate": 6.261467889908257e-06, "loss": 0.3608, "step": 819 }, { "epoch": 0.03763021430865954, "grad_norm": 0.4326160252094269, "learning_rate": 6.269113149847096e-06, "loss": 0.3597, "step": 820 }, { "epoch": 0.037676104813914, "grad_norm": 0.514356255531311, "learning_rate": 6.276758409785934e-06, "loss": 0.4521, "step": 821 }, { "epoch": 0.037721995319168464, "grad_norm": 0.5081436038017273, "learning_rate": 6.284403669724771e-06, "loss": 0.4222, "step": 822 }, { "epoch": 0.03776788582442293, "grad_norm": 0.474962055683136, "learning_rate": 6.292048929663609e-06, "loss": 0.386, "step": 823 }, { "epoch": 0.03781377632967739, "grad_norm": 0.5017170310020447, "learning_rate": 6.299694189602446e-06, "loss": 0.4958, "step": 824 }, { "epoch": 0.03785966683493185, "grad_norm": 0.4880979359149933, "learning_rate": 6.307339449541285e-06, "loss": 0.4353, "step": 825 }, { "epoch": 0.037905557340186316, "grad_norm": 0.45421984791755676, "learning_rate": 6.314984709480123e-06, "loss": 0.3656, "step": 826 }, { "epoch": 0.03795144784544078, "grad_norm": 0.5236946940422058, "learning_rate": 6.322629969418961e-06, "loss": 0.4898, "step": 827 }, { "epoch": 0.03799733835069524, "grad_norm": 0.4378328323364258, "learning_rate": 6.330275229357799e-06, "loss": 0.3456, "step": 828 }, { "epoch": 0.038043228855949705, "grad_norm": 0.4880081117153168, "learning_rate": 6.337920489296636e-06, "loss": 0.4003, "step": 829 }, { "epoch": 0.03808911936120417, "grad_norm": 0.5049906969070435, "learning_rate": 6.345565749235475e-06, "loss": 0.4344, "step": 830 }, { "epoch": 0.03813500986645863, "grad_norm": 0.47188711166381836, "learning_rate": 6.353211009174313e-06, "loss": 0.3879, "step": 831 }, { "epoch": 0.038180900371713095, "grad_norm": 0.5207975506782532, "learning_rate": 6.36085626911315e-06, "loss": 0.5027, "step": 832 }, { "epoch": 0.03822679087696756, "grad_norm": 0.4640098512172699, "learning_rate": 6.368501529051988e-06, "loss": 0.3845, "step": 833 }, { "epoch": 0.03827268138222202, "grad_norm": 0.49735915660858154, "learning_rate": 6.376146788990826e-06, "loss": 0.4481, "step": 834 }, { "epoch": 0.038318571887476484, "grad_norm": 0.4384276866912842, "learning_rate": 6.383792048929664e-06, "loss": 0.3684, "step": 835 }, { "epoch": 0.03836446239273095, "grad_norm": 0.5249605178833008, "learning_rate": 6.391437308868502e-06, "loss": 0.5716, "step": 836 }, { "epoch": 0.03841035289798541, "grad_norm": 0.4621300995349884, "learning_rate": 6.39908256880734e-06, "loss": 0.4427, "step": 837 }, { "epoch": 0.03845624340323987, "grad_norm": 0.488831490278244, "learning_rate": 6.406727828746177e-06, "loss": 0.4785, "step": 838 }, { "epoch": 0.03850213390849433, "grad_norm": 0.5020730495452881, "learning_rate": 6.414373088685016e-06, "loss": 0.4396, "step": 839 }, { "epoch": 0.03854802441374879, "grad_norm": 0.508812665939331, "learning_rate": 6.422018348623854e-06, "loss": 0.3891, "step": 840 }, { "epoch": 0.038593914919003255, "grad_norm": 0.5025983452796936, "learning_rate": 6.429663608562692e-06, "loss": 0.4385, "step": 841 }, { "epoch": 0.03863980542425772, "grad_norm": 0.45254746079444885, "learning_rate": 6.43730886850153e-06, "loss": 0.3807, "step": 842 }, { "epoch": 0.03868569592951218, "grad_norm": 0.4926454424858093, "learning_rate": 6.444954128440367e-06, "loss": 0.3821, "step": 843 }, { "epoch": 0.038731586434766645, "grad_norm": 0.5796123147010803, "learning_rate": 6.452599388379206e-06, "loss": 0.5965, "step": 844 }, { "epoch": 0.03877747694002111, "grad_norm": 0.45580214262008667, "learning_rate": 6.460244648318044e-06, "loss": 0.3457, "step": 845 }, { "epoch": 0.03882336744527557, "grad_norm": 0.4710889756679535, "learning_rate": 6.467889908256881e-06, "loss": 0.3665, "step": 846 }, { "epoch": 0.038869257950530034, "grad_norm": 0.4428875744342804, "learning_rate": 6.4755351681957194e-06, "loss": 0.3398, "step": 847 }, { "epoch": 0.0389151484557845, "grad_norm": 0.4860277473926544, "learning_rate": 6.483180428134557e-06, "loss": 0.4059, "step": 848 }, { "epoch": 0.03896103896103896, "grad_norm": 0.519835352897644, "learning_rate": 6.490825688073395e-06, "loss": 0.466, "step": 849 }, { "epoch": 0.03900692946629342, "grad_norm": 0.5390611290931702, "learning_rate": 6.4984709480122335e-06, "loss": 0.5381, "step": 850 }, { "epoch": 0.039052819971547886, "grad_norm": 0.4673818349838257, "learning_rate": 6.506116207951071e-06, "loss": 0.3762, "step": 851 }, { "epoch": 0.03909871047680235, "grad_norm": 0.5004668235778809, "learning_rate": 6.513761467889908e-06, "loss": 0.4299, "step": 852 }, { "epoch": 0.03914460098205681, "grad_norm": 0.4638606905937195, "learning_rate": 6.521406727828747e-06, "loss": 0.3703, "step": 853 }, { "epoch": 0.039190491487311276, "grad_norm": 0.47947144508361816, "learning_rate": 6.529051987767585e-06, "loss": 0.4012, "step": 854 }, { "epoch": 0.03923638199256574, "grad_norm": 0.5222530364990234, "learning_rate": 6.536697247706422e-06, "loss": 0.4813, "step": 855 }, { "epoch": 0.0392822724978202, "grad_norm": 0.48211199045181274, "learning_rate": 6.544342507645261e-06, "loss": 0.3946, "step": 856 }, { "epoch": 0.039328163003074665, "grad_norm": 0.48552441596984863, "learning_rate": 6.551987767584098e-06, "loss": 0.3975, "step": 857 }, { "epoch": 0.03937405350832913, "grad_norm": 0.4373522996902466, "learning_rate": 6.559633027522936e-06, "loss": 0.3136, "step": 858 }, { "epoch": 0.03941994401358359, "grad_norm": 0.45665863156318665, "learning_rate": 6.567278287461775e-06, "loss": 0.4093, "step": 859 }, { "epoch": 0.039465834518838054, "grad_norm": 0.48821836709976196, "learning_rate": 6.574923547400612e-06, "loss": 0.4146, "step": 860 }, { "epoch": 0.03951172502409252, "grad_norm": 0.44661468267440796, "learning_rate": 6.58256880733945e-06, "loss": 0.3337, "step": 861 }, { "epoch": 0.03955761552934698, "grad_norm": 0.46406564116477966, "learning_rate": 6.590214067278288e-06, "loss": 0.3904, "step": 862 }, { "epoch": 0.03960350603460144, "grad_norm": 0.4877103567123413, "learning_rate": 6.597859327217125e-06, "loss": 0.4084, "step": 863 }, { "epoch": 0.039649396539855906, "grad_norm": 0.46920427680015564, "learning_rate": 6.6055045871559645e-06, "loss": 0.3958, "step": 864 }, { "epoch": 0.03969528704511037, "grad_norm": 0.5075839757919312, "learning_rate": 6.613149847094802e-06, "loss": 0.4671, "step": 865 }, { "epoch": 0.03974117755036483, "grad_norm": 0.5123674273490906, "learning_rate": 6.620795107033639e-06, "loss": 0.4602, "step": 866 }, { "epoch": 0.039787068055619296, "grad_norm": 0.5872593522071838, "learning_rate": 6.628440366972477e-06, "loss": 0.4539, "step": 867 }, { "epoch": 0.03983295856087375, "grad_norm": 0.5298520922660828, "learning_rate": 6.636085626911316e-06, "loss": 0.4833, "step": 868 }, { "epoch": 0.039878849066128215, "grad_norm": 0.5340838432312012, "learning_rate": 6.6437308868501535e-06, "loss": 0.5307, "step": 869 }, { "epoch": 0.03992473957138268, "grad_norm": 0.46776753664016724, "learning_rate": 6.651376146788992e-06, "loss": 0.3792, "step": 870 }, { "epoch": 0.03997063007663714, "grad_norm": 0.5008481740951538, "learning_rate": 6.659021406727829e-06, "loss": 0.4161, "step": 871 }, { "epoch": 0.040016520581891604, "grad_norm": 0.5117496252059937, "learning_rate": 6.666666666666667e-06, "loss": 0.4842, "step": 872 }, { "epoch": 0.04006241108714607, "grad_norm": 0.4765634834766388, "learning_rate": 6.674311926605506e-06, "loss": 0.3831, "step": 873 }, { "epoch": 0.04010830159240053, "grad_norm": 0.421880304813385, "learning_rate": 6.681957186544343e-06, "loss": 0.3088, "step": 874 }, { "epoch": 0.040154192097654993, "grad_norm": 0.4803207516670227, "learning_rate": 6.689602446483181e-06, "loss": 0.4326, "step": 875 }, { "epoch": 0.04020008260290946, "grad_norm": 0.4485001266002655, "learning_rate": 6.697247706422019e-06, "loss": 0.3705, "step": 876 }, { "epoch": 0.04024597310816392, "grad_norm": 0.6779983639717102, "learning_rate": 6.7048929663608564e-06, "loss": 0.4751, "step": 877 }, { "epoch": 0.04029186361341838, "grad_norm": 0.48208385705947876, "learning_rate": 6.7125382262996956e-06, "loss": 0.3827, "step": 878 }, { "epoch": 0.040337754118672846, "grad_norm": 0.4891941547393799, "learning_rate": 6.720183486238533e-06, "loss": 0.4127, "step": 879 }, { "epoch": 0.04038364462392731, "grad_norm": 0.5813407897949219, "learning_rate": 6.7278287461773705e-06, "loss": 0.5615, "step": 880 }, { "epoch": 0.04042953512918177, "grad_norm": 0.48525410890579224, "learning_rate": 6.735474006116208e-06, "loss": 0.4173, "step": 881 }, { "epoch": 0.040475425634436235, "grad_norm": 0.4815974831581116, "learning_rate": 6.743119266055046e-06, "loss": 0.4394, "step": 882 }, { "epoch": 0.0405213161396907, "grad_norm": 0.4465784430503845, "learning_rate": 6.7507645259938845e-06, "loss": 0.3633, "step": 883 }, { "epoch": 0.04056720664494516, "grad_norm": 0.5458753705024719, "learning_rate": 6.758409785932723e-06, "loss": 0.4662, "step": 884 }, { "epoch": 0.040613097150199624, "grad_norm": 0.5036852955818176, "learning_rate": 6.76605504587156e-06, "loss": 0.4756, "step": 885 }, { "epoch": 0.04065898765545409, "grad_norm": 0.5536975860595703, "learning_rate": 6.773700305810398e-06, "loss": 0.5082, "step": 886 }, { "epoch": 0.04070487816070855, "grad_norm": 0.5013907551765442, "learning_rate": 6.781345565749235e-06, "loss": 0.4929, "step": 887 }, { "epoch": 0.040750768665963014, "grad_norm": 0.6268038749694824, "learning_rate": 6.788990825688074e-06, "loss": 0.5717, "step": 888 }, { "epoch": 0.04079665917121748, "grad_norm": 0.5077654123306274, "learning_rate": 6.796636085626912e-06, "loss": 0.508, "step": 889 }, { "epoch": 0.04084254967647194, "grad_norm": 0.5034418106079102, "learning_rate": 6.80428134556575e-06, "loss": 0.4734, "step": 890 }, { "epoch": 0.0408884401817264, "grad_norm": 0.5471553802490234, "learning_rate": 6.8119266055045875e-06, "loss": 0.4197, "step": 891 }, { "epoch": 0.040934330686980866, "grad_norm": 0.503756582736969, "learning_rate": 6.819571865443425e-06, "loss": 0.4441, "step": 892 }, { "epoch": 0.04098022119223533, "grad_norm": 0.5133985877037048, "learning_rate": 6.827217125382264e-06, "loss": 0.4309, "step": 893 }, { "epoch": 0.04102611169748979, "grad_norm": 0.5441486835479736, "learning_rate": 6.8348623853211015e-06, "loss": 0.5227, "step": 894 }, { "epoch": 0.041072002202744255, "grad_norm": 0.48359623551368713, "learning_rate": 6.842507645259939e-06, "loss": 0.3764, "step": 895 }, { "epoch": 0.04111789270799872, "grad_norm": 0.46526890993118286, "learning_rate": 6.850152905198777e-06, "loss": 0.3697, "step": 896 }, { "epoch": 0.041163783213253174, "grad_norm": 0.4271526038646698, "learning_rate": 6.8577981651376156e-06, "loss": 0.3467, "step": 897 }, { "epoch": 0.04120967371850764, "grad_norm": 0.4491538107395172, "learning_rate": 6.865443425076454e-06, "loss": 0.3525, "step": 898 }, { "epoch": 0.0412555642237621, "grad_norm": 0.4674430191516876, "learning_rate": 6.873088685015291e-06, "loss": 0.376, "step": 899 }, { "epoch": 0.041301454729016564, "grad_norm": 0.4818445146083832, "learning_rate": 6.880733944954129e-06, "loss": 0.398, "step": 900 }, { "epoch": 0.04134734523427103, "grad_norm": 0.4773513674736023, "learning_rate": 6.888379204892966e-06, "loss": 0.4041, "step": 901 }, { "epoch": 0.04139323573952549, "grad_norm": 0.5189515352249146, "learning_rate": 6.896024464831805e-06, "loss": 0.5368, "step": 902 }, { "epoch": 0.04143912624477995, "grad_norm": 0.5067550539970398, "learning_rate": 6.903669724770643e-06, "loss": 0.4413, "step": 903 }, { "epoch": 0.041485016750034416, "grad_norm": 0.4592413306236267, "learning_rate": 6.911314984709481e-06, "loss": 0.3912, "step": 904 }, { "epoch": 0.04153090725528888, "grad_norm": 0.451252818107605, "learning_rate": 6.9189602446483185e-06, "loss": 0.3298, "step": 905 }, { "epoch": 0.04157679776054334, "grad_norm": 0.4569626748561859, "learning_rate": 6.926605504587156e-06, "loss": 0.3555, "step": 906 }, { "epoch": 0.041622688265797805, "grad_norm": 0.5089961886405945, "learning_rate": 6.934250764525995e-06, "loss": 0.4276, "step": 907 }, { "epoch": 0.04166857877105227, "grad_norm": 0.6535539627075195, "learning_rate": 6.941896024464833e-06, "loss": 0.6558, "step": 908 }, { "epoch": 0.04171446927630673, "grad_norm": 0.4889644980430603, "learning_rate": 6.94954128440367e-06, "loss": 0.384, "step": 909 }, { "epoch": 0.041760359781561195, "grad_norm": 0.49032333493232727, "learning_rate": 6.957186544342508e-06, "loss": 0.4779, "step": 910 }, { "epoch": 0.04180625028681566, "grad_norm": 0.5432859659194946, "learning_rate": 6.964831804281346e-06, "loss": 0.4565, "step": 911 }, { "epoch": 0.04185214079207012, "grad_norm": 0.4391240179538727, "learning_rate": 6.972477064220184e-06, "loss": 0.3484, "step": 912 }, { "epoch": 0.041898031297324584, "grad_norm": 0.5433114767074585, "learning_rate": 6.980122324159022e-06, "loss": 0.542, "step": 913 }, { "epoch": 0.04194392180257905, "grad_norm": 0.5047166347503662, "learning_rate": 6.98776758409786e-06, "loss": 0.4734, "step": 914 }, { "epoch": 0.04198981230783351, "grad_norm": 0.4411572515964508, "learning_rate": 6.995412844036697e-06, "loss": 0.3259, "step": 915 }, { "epoch": 0.04203570281308797, "grad_norm": 0.476979523897171, "learning_rate": 7.0030581039755356e-06, "loss": 0.4139, "step": 916 }, { "epoch": 0.042081593318342436, "grad_norm": 0.5029655694961548, "learning_rate": 7.010703363914374e-06, "loss": 0.4258, "step": 917 }, { "epoch": 0.0421274838235969, "grad_norm": 0.4646511375904083, "learning_rate": 7.018348623853211e-06, "loss": 0.3987, "step": 918 }, { "epoch": 0.04217337432885136, "grad_norm": 0.5069194436073303, "learning_rate": 7.02599388379205e-06, "loss": 0.3741, "step": 919 }, { "epoch": 0.042219264834105825, "grad_norm": 0.5072588920593262, "learning_rate": 7.033639143730887e-06, "loss": 0.4191, "step": 920 }, { "epoch": 0.04226515533936029, "grad_norm": 0.5100658535957336, "learning_rate": 7.041284403669726e-06, "loss": 0.4241, "step": 921 }, { "epoch": 0.04231104584461475, "grad_norm": 0.4676949679851532, "learning_rate": 7.048929663608564e-06, "loss": 0.4464, "step": 922 }, { "epoch": 0.042356936349869215, "grad_norm": 0.48504266142845154, "learning_rate": 7.056574923547401e-06, "loss": 0.3581, "step": 923 }, { "epoch": 0.04240282685512368, "grad_norm": 0.46454373002052307, "learning_rate": 7.0642201834862385e-06, "loss": 0.3942, "step": 924 }, { "epoch": 0.04244871736037814, "grad_norm": 0.44111472368240356, "learning_rate": 7.071865443425077e-06, "loss": 0.3617, "step": 925 }, { "epoch": 0.042494607865632604, "grad_norm": 0.4450927972793579, "learning_rate": 7.079510703363915e-06, "loss": 0.3347, "step": 926 }, { "epoch": 0.04254049837088706, "grad_norm": 0.5157008171081543, "learning_rate": 7.087155963302753e-06, "loss": 0.5074, "step": 927 }, { "epoch": 0.04258638887614152, "grad_norm": 0.4555475413799286, "learning_rate": 7.094801223241591e-06, "loss": 0.3985, "step": 928 }, { "epoch": 0.042632279381395986, "grad_norm": 0.5189474821090698, "learning_rate": 7.102446483180428e-06, "loss": 0.4656, "step": 929 }, { "epoch": 0.04267816988665045, "grad_norm": 0.44826528429985046, "learning_rate": 7.110091743119267e-06, "loss": 0.3747, "step": 930 }, { "epoch": 0.04272406039190491, "grad_norm": 0.4239514172077179, "learning_rate": 7.117737003058105e-06, "loss": 0.3316, "step": 931 }, { "epoch": 0.042769950897159376, "grad_norm": 0.5833219289779663, "learning_rate": 7.125382262996942e-06, "loss": 0.5697, "step": 932 }, { "epoch": 0.04281584140241384, "grad_norm": 0.45834407210350037, "learning_rate": 7.133027522935781e-06, "loss": 0.3672, "step": 933 }, { "epoch": 0.0428617319076683, "grad_norm": 0.5336815714836121, "learning_rate": 7.140672782874618e-06, "loss": 0.506, "step": 934 }, { "epoch": 0.042907622412922765, "grad_norm": 0.4766632318496704, "learning_rate": 7.1483180428134555e-06, "loss": 0.4341, "step": 935 }, { "epoch": 0.04295351291817723, "grad_norm": 0.4502406716346741, "learning_rate": 7.155963302752295e-06, "loss": 0.3182, "step": 936 }, { "epoch": 0.04299940342343169, "grad_norm": 0.5229837894439697, "learning_rate": 7.163608562691132e-06, "loss": 0.4083, "step": 937 }, { "epoch": 0.043045293928686154, "grad_norm": 0.510887086391449, "learning_rate": 7.17125382262997e-06, "loss": 0.4283, "step": 938 }, { "epoch": 0.04309118443394062, "grad_norm": 0.5132585167884827, "learning_rate": 7.178899082568808e-06, "loss": 0.4038, "step": 939 }, { "epoch": 0.04313707493919508, "grad_norm": 0.5235181450843811, "learning_rate": 7.186544342507645e-06, "loss": 0.5071, "step": 940 }, { "epoch": 0.04318296544444954, "grad_norm": 0.40967270731925964, "learning_rate": 7.1941896024464845e-06, "loss": 0.3069, "step": 941 }, { "epoch": 0.043228855949704006, "grad_norm": 0.5366747975349426, "learning_rate": 7.201834862385322e-06, "loss": 0.4964, "step": 942 }, { "epoch": 0.04327474645495847, "grad_norm": 0.4693954586982727, "learning_rate": 7.209480122324159e-06, "loss": 0.3575, "step": 943 }, { "epoch": 0.04332063696021293, "grad_norm": 0.48921796679496765, "learning_rate": 7.217125382262997e-06, "loss": 0.395, "step": 944 }, { "epoch": 0.043366527465467396, "grad_norm": 0.40874385833740234, "learning_rate": 7.224770642201836e-06, "loss": 0.2886, "step": 945 }, { "epoch": 0.04341241797072186, "grad_norm": 0.48684173822402954, "learning_rate": 7.232415902140673e-06, "loss": 0.3695, "step": 946 }, { "epoch": 0.04345830847597632, "grad_norm": 0.6032975912094116, "learning_rate": 7.240061162079512e-06, "loss": 0.4136, "step": 947 }, { "epoch": 0.043504198981230785, "grad_norm": 0.48327934741973877, "learning_rate": 7.247706422018349e-06, "loss": 0.3671, "step": 948 }, { "epoch": 0.04355008948648525, "grad_norm": 0.4743063151836395, "learning_rate": 7.255351681957187e-06, "loss": 0.4433, "step": 949 }, { "epoch": 0.04359597999173971, "grad_norm": 0.4645388126373291, "learning_rate": 7.262996941896026e-06, "loss": 0.3776, "step": 950 }, { "epoch": 0.043641870496994174, "grad_norm": 0.5633676648139954, "learning_rate": 7.270642201834863e-06, "loss": 0.3596, "step": 951 }, { "epoch": 0.04368776100224864, "grad_norm": 0.5181424021720886, "learning_rate": 7.278287461773701e-06, "loss": 0.5116, "step": 952 }, { "epoch": 0.0437336515075031, "grad_norm": 0.4617640972137451, "learning_rate": 7.285932721712539e-06, "loss": 0.3885, "step": 953 }, { "epoch": 0.043779542012757564, "grad_norm": 0.4297509789466858, "learning_rate": 7.293577981651376e-06, "loss": 0.3777, "step": 954 }, { "epoch": 0.04382543251801203, "grad_norm": 0.48908212780952454, "learning_rate": 7.3012232415902155e-06, "loss": 0.3916, "step": 955 }, { "epoch": 0.04387132302326648, "grad_norm": 0.4835954010486603, "learning_rate": 7.308868501529053e-06, "loss": 0.442, "step": 956 }, { "epoch": 0.043917213528520946, "grad_norm": 0.6452763080596924, "learning_rate": 7.31651376146789e-06, "loss": 0.4267, "step": 957 }, { "epoch": 0.04396310403377541, "grad_norm": 0.48812511563301086, "learning_rate": 7.324159021406728e-06, "loss": 0.397, "step": 958 }, { "epoch": 0.04400899453902987, "grad_norm": 0.552649974822998, "learning_rate": 7.331804281345566e-06, "loss": 0.4923, "step": 959 }, { "epoch": 0.044054885044284335, "grad_norm": 0.5882543921470642, "learning_rate": 7.3394495412844045e-06, "loss": 0.4857, "step": 960 }, { "epoch": 0.0441007755495388, "grad_norm": 0.47771885991096497, "learning_rate": 7.347094801223243e-06, "loss": 0.3856, "step": 961 }, { "epoch": 0.04414666605479326, "grad_norm": 0.47342777252197266, "learning_rate": 7.35474006116208e-06, "loss": 0.4174, "step": 962 }, { "epoch": 0.044192556560047724, "grad_norm": 0.47401317954063416, "learning_rate": 7.362385321100918e-06, "loss": 0.4041, "step": 963 }, { "epoch": 0.04423844706530219, "grad_norm": 0.4642093777656555, "learning_rate": 7.370030581039755e-06, "loss": 0.3239, "step": 964 }, { "epoch": 0.04428433757055665, "grad_norm": 0.4429576098918915, "learning_rate": 7.377675840978594e-06, "loss": 0.3217, "step": 965 }, { "epoch": 0.044330228075811114, "grad_norm": 0.5023338794708252, "learning_rate": 7.385321100917432e-06, "loss": 0.4977, "step": 966 }, { "epoch": 0.04437611858106558, "grad_norm": 0.463932603597641, "learning_rate": 7.39296636085627e-06, "loss": 0.3722, "step": 967 }, { "epoch": 0.04442200908632004, "grad_norm": 0.4808919131755829, "learning_rate": 7.4006116207951074e-06, "loss": 0.4459, "step": 968 }, { "epoch": 0.0444678995915745, "grad_norm": 0.5225597023963928, "learning_rate": 7.408256880733946e-06, "loss": 0.4114, "step": 969 }, { "epoch": 0.044513790096828966, "grad_norm": 0.49161815643310547, "learning_rate": 7.415902140672784e-06, "loss": 0.3953, "step": 970 }, { "epoch": 0.04455968060208343, "grad_norm": 0.46979379653930664, "learning_rate": 7.4235474006116215e-06, "loss": 0.3508, "step": 971 }, { "epoch": 0.04460557110733789, "grad_norm": 0.4807756543159485, "learning_rate": 7.431192660550459e-06, "loss": 0.4344, "step": 972 }, { "epoch": 0.044651461612592355, "grad_norm": 0.45674464106559753, "learning_rate": 7.438837920489297e-06, "loss": 0.3935, "step": 973 }, { "epoch": 0.04469735211784682, "grad_norm": 0.4198451340198517, "learning_rate": 7.4464831804281355e-06, "loss": 0.3077, "step": 974 }, { "epoch": 0.04474324262310128, "grad_norm": 0.4758407473564148, "learning_rate": 7.454128440366973e-06, "loss": 0.4641, "step": 975 }, { "epoch": 0.044789133128355745, "grad_norm": 0.44405484199523926, "learning_rate": 7.461773700305811e-06, "loss": 0.3356, "step": 976 }, { "epoch": 0.04483502363361021, "grad_norm": 0.492264062166214, "learning_rate": 7.469418960244649e-06, "loss": 0.3786, "step": 977 }, { "epoch": 0.04488091413886467, "grad_norm": 0.5459489226341248, "learning_rate": 7.477064220183486e-06, "loss": 0.5428, "step": 978 }, { "epoch": 0.044926804644119134, "grad_norm": 0.4653685688972473, "learning_rate": 7.484709480122325e-06, "loss": 0.4453, "step": 979 }, { "epoch": 0.0449726951493736, "grad_norm": 0.44355902075767517, "learning_rate": 7.492354740061163e-06, "loss": 0.351, "step": 980 }, { "epoch": 0.04501858565462806, "grad_norm": 0.47885799407958984, "learning_rate": 7.500000000000001e-06, "loss": 0.3902, "step": 981 }, { "epoch": 0.04506447615988252, "grad_norm": 0.48619183897972107, "learning_rate": 7.5076452599388385e-06, "loss": 0.4266, "step": 982 }, { "epoch": 0.045110366665136986, "grad_norm": 0.5208702683448792, "learning_rate": 7.515290519877676e-06, "loss": 0.4965, "step": 983 }, { "epoch": 0.04515625717039145, "grad_norm": 0.48644906282424927, "learning_rate": 7.522935779816515e-06, "loss": 0.4389, "step": 984 }, { "epoch": 0.04520214767564591, "grad_norm": 0.5237307548522949, "learning_rate": 7.5305810397553525e-06, "loss": 0.5208, "step": 985 }, { "epoch": 0.04524803818090037, "grad_norm": 0.5199766159057617, "learning_rate": 7.53822629969419e-06, "loss": 0.5159, "step": 986 }, { "epoch": 0.04529392868615483, "grad_norm": 0.4719480276107788, "learning_rate": 7.545871559633028e-06, "loss": 0.4001, "step": 987 }, { "epoch": 0.045339819191409295, "grad_norm": 0.5043044090270996, "learning_rate": 7.553516819571866e-06, "loss": 0.4523, "step": 988 }, { "epoch": 0.04538570969666376, "grad_norm": 0.5083537101745605, "learning_rate": 7.561162079510704e-06, "loss": 0.4894, "step": 989 }, { "epoch": 0.04543160020191822, "grad_norm": 0.4714929163455963, "learning_rate": 7.568807339449542e-06, "loss": 0.3466, "step": 990 }, { "epoch": 0.045477490707172684, "grad_norm": 0.5016339421272278, "learning_rate": 7.57645259938838e-06, "loss": 0.4748, "step": 991 }, { "epoch": 0.04552338121242715, "grad_norm": 0.496962308883667, "learning_rate": 7.584097859327217e-06, "loss": 0.4705, "step": 992 }, { "epoch": 0.04556927171768161, "grad_norm": 0.4911184310913086, "learning_rate": 7.5917431192660555e-06, "loss": 0.4765, "step": 993 }, { "epoch": 0.04561516222293607, "grad_norm": 0.5251611471176147, "learning_rate": 7.599388379204894e-06, "loss": 0.507, "step": 994 }, { "epoch": 0.045661052728190536, "grad_norm": 0.5036035776138306, "learning_rate": 7.607033639143731e-06, "loss": 0.4383, "step": 995 }, { "epoch": 0.045706943233445, "grad_norm": 0.5988549590110779, "learning_rate": 7.6146788990825695e-06, "loss": 0.5687, "step": 996 }, { "epoch": 0.04575283373869946, "grad_norm": 0.519762396812439, "learning_rate": 7.622324159021407e-06, "loss": 0.3997, "step": 997 }, { "epoch": 0.045798724243953925, "grad_norm": 0.4208580553531647, "learning_rate": 7.629969418960246e-06, "loss": 0.3384, "step": 998 }, { "epoch": 0.04584461474920839, "grad_norm": 0.5738177299499512, "learning_rate": 7.637614678899084e-06, "loss": 0.5505, "step": 999 }, { "epoch": 0.04589050525446285, "grad_norm": 0.4675780236721039, "learning_rate": 7.645259938837921e-06, "loss": 0.4085, "step": 1000 }, { "epoch": 0.045936395759717315, "grad_norm": 0.5079315900802612, "learning_rate": 7.652905198776758e-06, "loss": 0.4242, "step": 1001 }, { "epoch": 0.04598228626497178, "grad_norm": 0.45595890283584595, "learning_rate": 7.660550458715596e-06, "loss": 0.351, "step": 1002 }, { "epoch": 0.04602817677022624, "grad_norm": 0.4902017414569855, "learning_rate": 7.668195718654435e-06, "loss": 0.412, "step": 1003 }, { "epoch": 0.046074067275480704, "grad_norm": 0.506516695022583, "learning_rate": 7.675840978593273e-06, "loss": 0.4548, "step": 1004 }, { "epoch": 0.04611995778073517, "grad_norm": 0.4780597984790802, "learning_rate": 7.68348623853211e-06, "loss": 0.4037, "step": 1005 }, { "epoch": 0.04616584828598963, "grad_norm": 0.5067412257194519, "learning_rate": 7.691131498470949e-06, "loss": 0.433, "step": 1006 }, { "epoch": 0.04621173879124409, "grad_norm": 0.46468278765678406, "learning_rate": 7.698776758409787e-06, "loss": 0.3715, "step": 1007 }, { "epoch": 0.046257629296498556, "grad_norm": 0.4942983090877533, "learning_rate": 7.706422018348626e-06, "loss": 0.4499, "step": 1008 }, { "epoch": 0.04630351980175302, "grad_norm": 0.46590471267700195, "learning_rate": 7.714067278287463e-06, "loss": 0.3844, "step": 1009 }, { "epoch": 0.04634941030700748, "grad_norm": 0.5226325988769531, "learning_rate": 7.7217125382263e-06, "loss": 0.4663, "step": 1010 }, { "epoch": 0.046395300812261946, "grad_norm": 0.5061750411987305, "learning_rate": 7.729357798165138e-06, "loss": 0.4606, "step": 1011 }, { "epoch": 0.04644119131751641, "grad_norm": 0.5064273476600647, "learning_rate": 7.737003058103975e-06, "loss": 0.4086, "step": 1012 }, { "epoch": 0.04648708182277087, "grad_norm": 0.46252965927124023, "learning_rate": 7.744648318042815e-06, "loss": 0.4199, "step": 1013 }, { "epoch": 0.046532972328025335, "grad_norm": 0.4614937901496887, "learning_rate": 7.752293577981652e-06, "loss": 0.396, "step": 1014 }, { "epoch": 0.04657886283327979, "grad_norm": 0.4680635929107666, "learning_rate": 7.75993883792049e-06, "loss": 0.3866, "step": 1015 }, { "epoch": 0.046624753338534254, "grad_norm": 0.45811694860458374, "learning_rate": 7.767584097859327e-06, "loss": 0.354, "step": 1016 }, { "epoch": 0.04667064384378872, "grad_norm": 0.4698444604873657, "learning_rate": 7.775229357798164e-06, "loss": 0.381, "step": 1017 }, { "epoch": 0.04671653434904318, "grad_norm": 0.4797396659851074, "learning_rate": 7.782874617737004e-06, "loss": 0.3426, "step": 1018 }, { "epoch": 0.04676242485429764, "grad_norm": 0.4425613284111023, "learning_rate": 7.790519877675841e-06, "loss": 0.3473, "step": 1019 }, { "epoch": 0.046808315359552106, "grad_norm": 0.4574027359485626, "learning_rate": 7.79816513761468e-06, "loss": 0.3769, "step": 1020 }, { "epoch": 0.04685420586480657, "grad_norm": 0.4499163031578064, "learning_rate": 7.805810397553518e-06, "loss": 0.3748, "step": 1021 }, { "epoch": 0.04690009637006103, "grad_norm": 0.48446786403656006, "learning_rate": 7.813455657492357e-06, "loss": 0.447, "step": 1022 }, { "epoch": 0.046945986875315496, "grad_norm": 0.49225109815597534, "learning_rate": 7.821100917431194e-06, "loss": 0.3767, "step": 1023 }, { "epoch": 0.04699187738056996, "grad_norm": 0.4963989555835724, "learning_rate": 7.828746177370032e-06, "loss": 0.4705, "step": 1024 }, { "epoch": 0.04703776788582442, "grad_norm": 0.4896707236766815, "learning_rate": 7.836391437308869e-06, "loss": 0.4356, "step": 1025 }, { "epoch": 0.047083658391078885, "grad_norm": 0.4789886474609375, "learning_rate": 7.844036697247707e-06, "loss": 0.343, "step": 1026 }, { "epoch": 0.04712954889633335, "grad_norm": 0.46077170968055725, "learning_rate": 7.851681957186546e-06, "loss": 0.343, "step": 1027 }, { "epoch": 0.04717543940158781, "grad_norm": 0.4612993597984314, "learning_rate": 7.859327217125383e-06, "loss": 0.3912, "step": 1028 }, { "epoch": 0.047221329906842274, "grad_norm": 0.447415828704834, "learning_rate": 7.86697247706422e-06, "loss": 0.3323, "step": 1029 }, { "epoch": 0.04726722041209674, "grad_norm": 0.4708919823169708, "learning_rate": 7.874617737003058e-06, "loss": 0.3577, "step": 1030 }, { "epoch": 0.0473131109173512, "grad_norm": 0.448274165391922, "learning_rate": 7.882262996941895e-06, "loss": 0.3848, "step": 1031 }, { "epoch": 0.047359001422605664, "grad_norm": 0.4808759093284607, "learning_rate": 7.889908256880735e-06, "loss": 0.4683, "step": 1032 }, { "epoch": 0.04740489192786013, "grad_norm": 0.5238559246063232, "learning_rate": 7.897553516819572e-06, "loss": 0.4138, "step": 1033 }, { "epoch": 0.04745078243311459, "grad_norm": 0.44173386693000793, "learning_rate": 7.905198776758411e-06, "loss": 0.3381, "step": 1034 }, { "epoch": 0.04749667293836905, "grad_norm": 0.42944467067718506, "learning_rate": 7.912844036697249e-06, "loss": 0.3466, "step": 1035 }, { "epoch": 0.047542563443623516, "grad_norm": 0.48287513852119446, "learning_rate": 7.920489296636086e-06, "loss": 0.3962, "step": 1036 }, { "epoch": 0.04758845394887798, "grad_norm": 0.5754697918891907, "learning_rate": 7.928134556574925e-06, "loss": 0.5859, "step": 1037 }, { "epoch": 0.04763434445413244, "grad_norm": 0.5029233694076538, "learning_rate": 7.935779816513763e-06, "loss": 0.5072, "step": 1038 }, { "epoch": 0.047680234959386905, "grad_norm": 0.476535439491272, "learning_rate": 7.9434250764526e-06, "loss": 0.3594, "step": 1039 }, { "epoch": 0.04772612546464137, "grad_norm": 0.4771139621734619, "learning_rate": 7.951070336391438e-06, "loss": 0.4406, "step": 1040 }, { "epoch": 0.04777201596989583, "grad_norm": 0.4652896821498871, "learning_rate": 7.958715596330275e-06, "loss": 0.3709, "step": 1041 }, { "epoch": 0.047817906475150294, "grad_norm": 0.4450419247150421, "learning_rate": 7.966360856269114e-06, "loss": 0.3827, "step": 1042 }, { "epoch": 0.04786379698040476, "grad_norm": 0.48332735896110535, "learning_rate": 7.974006116207952e-06, "loss": 0.4561, "step": 1043 }, { "epoch": 0.047909687485659214, "grad_norm": 0.5330381989479065, "learning_rate": 7.981651376146789e-06, "loss": 0.4423, "step": 1044 }, { "epoch": 0.04795557799091368, "grad_norm": 0.45897090435028076, "learning_rate": 7.989296636085627e-06, "loss": 0.3191, "step": 1045 }, { "epoch": 0.04800146849616814, "grad_norm": 0.47465670108795166, "learning_rate": 7.996941896024466e-06, "loss": 0.4216, "step": 1046 }, { "epoch": 0.0480473590014226, "grad_norm": 0.48171624541282654, "learning_rate": 8.004587155963303e-06, "loss": 0.4003, "step": 1047 }, { "epoch": 0.048093249506677066, "grad_norm": 0.47438323497772217, "learning_rate": 8.01223241590214e-06, "loss": 0.388, "step": 1048 }, { "epoch": 0.04813914001193153, "grad_norm": 0.6740774512290955, "learning_rate": 8.01987767584098e-06, "loss": 0.4373, "step": 1049 }, { "epoch": 0.04818503051718599, "grad_norm": 0.4899618327617645, "learning_rate": 8.027522935779817e-06, "loss": 0.4915, "step": 1050 }, { "epoch": 0.048230921022440455, "grad_norm": 0.4588882625102997, "learning_rate": 8.035168195718656e-06, "loss": 0.4083, "step": 1051 }, { "epoch": 0.04827681152769492, "grad_norm": 0.4788903594017029, "learning_rate": 8.042813455657494e-06, "loss": 0.4449, "step": 1052 }, { "epoch": 0.04832270203294938, "grad_norm": 0.44267287850379944, "learning_rate": 8.050458715596331e-06, "loss": 0.3238, "step": 1053 }, { "epoch": 0.048368592538203845, "grad_norm": 0.48027700185775757, "learning_rate": 8.058103975535169e-06, "loss": 0.3214, "step": 1054 }, { "epoch": 0.04841448304345831, "grad_norm": 0.4961937963962555, "learning_rate": 8.065749235474006e-06, "loss": 0.4385, "step": 1055 }, { "epoch": 0.04846037354871277, "grad_norm": 0.45443207025527954, "learning_rate": 8.073394495412845e-06, "loss": 0.3663, "step": 1056 }, { "epoch": 0.048506264053967234, "grad_norm": 0.4791746437549591, "learning_rate": 8.081039755351683e-06, "loss": 0.4246, "step": 1057 }, { "epoch": 0.0485521545592217, "grad_norm": 0.41660791635513306, "learning_rate": 8.08868501529052e-06, "loss": 0.2961, "step": 1058 }, { "epoch": 0.04859804506447616, "grad_norm": 0.6231920123100281, "learning_rate": 8.096330275229358e-06, "loss": 0.4865, "step": 1059 }, { "epoch": 0.04864393556973062, "grad_norm": 0.4434593617916107, "learning_rate": 8.103975535168197e-06, "loss": 0.328, "step": 1060 }, { "epoch": 0.048689826074985086, "grad_norm": 0.5092295408248901, "learning_rate": 8.111620795107034e-06, "loss": 0.4893, "step": 1061 }, { "epoch": 0.04873571658023955, "grad_norm": 1.117873191833496, "learning_rate": 8.119266055045872e-06, "loss": 0.3935, "step": 1062 }, { "epoch": 0.04878160708549401, "grad_norm": 0.5893769860267639, "learning_rate": 8.12691131498471e-06, "loss": 0.5127, "step": 1063 }, { "epoch": 0.048827497590748475, "grad_norm": 0.4874710440635681, "learning_rate": 8.134556574923548e-06, "loss": 0.369, "step": 1064 }, { "epoch": 0.04887338809600294, "grad_norm": 0.4883679747581482, "learning_rate": 8.142201834862386e-06, "loss": 0.3504, "step": 1065 }, { "epoch": 0.0489192786012574, "grad_norm": 0.6136423945426941, "learning_rate": 8.149847094801225e-06, "loss": 0.5323, "step": 1066 }, { "epoch": 0.048965169106511865, "grad_norm": 0.526439368724823, "learning_rate": 8.157492354740062e-06, "loss": 0.469, "step": 1067 }, { "epoch": 0.04901105961176633, "grad_norm": 0.4777195155620575, "learning_rate": 8.1651376146789e-06, "loss": 0.3699, "step": 1068 }, { "epoch": 0.04905695011702079, "grad_norm": 0.57276451587677, "learning_rate": 8.172782874617737e-06, "loss": 0.5136, "step": 1069 }, { "epoch": 0.049102840622275254, "grad_norm": 0.48893672227859497, "learning_rate": 8.180428134556576e-06, "loss": 0.4174, "step": 1070 }, { "epoch": 0.04914873112752972, "grad_norm": 0.5125523805618286, "learning_rate": 8.188073394495414e-06, "loss": 0.4925, "step": 1071 }, { "epoch": 0.04919462163278418, "grad_norm": 0.463162362575531, "learning_rate": 8.195718654434251e-06, "loss": 0.4295, "step": 1072 }, { "epoch": 0.04924051213803864, "grad_norm": 0.434839129447937, "learning_rate": 8.203363914373089e-06, "loss": 0.3128, "step": 1073 }, { "epoch": 0.0492864026432931, "grad_norm": 0.45718950033187866, "learning_rate": 8.211009174311926e-06, "loss": 0.3063, "step": 1074 }, { "epoch": 0.04933229314854756, "grad_norm": 0.6839661002159119, "learning_rate": 8.218654434250765e-06, "loss": 0.6405, "step": 1075 }, { "epoch": 0.049378183653802026, "grad_norm": 0.49980488419532776, "learning_rate": 8.226299694189603e-06, "loss": 0.465, "step": 1076 }, { "epoch": 0.04942407415905649, "grad_norm": 0.46241894364356995, "learning_rate": 8.233944954128442e-06, "loss": 0.3392, "step": 1077 }, { "epoch": 0.04946996466431095, "grad_norm": 0.4796869456768036, "learning_rate": 8.24159021406728e-06, "loss": 0.4357, "step": 1078 }, { "epoch": 0.049515855169565415, "grad_norm": 0.4757649600505829, "learning_rate": 8.249235474006117e-06, "loss": 0.3805, "step": 1079 }, { "epoch": 0.04956174567481988, "grad_norm": 0.49290862679481506, "learning_rate": 8.256880733944956e-06, "loss": 0.4157, "step": 1080 }, { "epoch": 0.04960763618007434, "grad_norm": 0.5197950005531311, "learning_rate": 8.264525993883793e-06, "loss": 0.4078, "step": 1081 }, { "epoch": 0.049653526685328804, "grad_norm": 0.4865533709526062, "learning_rate": 8.27217125382263e-06, "loss": 0.4001, "step": 1082 }, { "epoch": 0.04969941719058327, "grad_norm": 0.5275459289550781, "learning_rate": 8.279816513761468e-06, "loss": 0.4909, "step": 1083 }, { "epoch": 0.04974530769583773, "grad_norm": 0.5157485008239746, "learning_rate": 8.287461773700306e-06, "loss": 0.4823, "step": 1084 }, { "epoch": 0.04979119820109219, "grad_norm": 0.44135355949401855, "learning_rate": 8.295107033639145e-06, "loss": 0.3462, "step": 1085 }, { "epoch": 0.049837088706346656, "grad_norm": 0.47907930612564087, "learning_rate": 8.302752293577982e-06, "loss": 0.4662, "step": 1086 }, { "epoch": 0.04988297921160112, "grad_norm": 0.4304979741573334, "learning_rate": 8.31039755351682e-06, "loss": 0.3433, "step": 1087 }, { "epoch": 0.04992886971685558, "grad_norm": 0.5039060115814209, "learning_rate": 8.318042813455657e-06, "loss": 0.4455, "step": 1088 }, { "epoch": 0.049974760222110046, "grad_norm": 0.48326313495635986, "learning_rate": 8.325688073394496e-06, "loss": 0.4794, "step": 1089 }, { "epoch": 0.05002065072736451, "grad_norm": 0.4881344437599182, "learning_rate": 8.333333333333334e-06, "loss": 0.3763, "step": 1090 }, { "epoch": 0.05006654123261897, "grad_norm": 0.5004485845565796, "learning_rate": 8.340978593272173e-06, "loss": 0.5007, "step": 1091 }, { "epoch": 0.050112431737873435, "grad_norm": 0.4826316833496094, "learning_rate": 8.34862385321101e-06, "loss": 0.3967, "step": 1092 }, { "epoch": 0.0501583222431279, "grad_norm": 0.49310603737831116, "learning_rate": 8.356269113149848e-06, "loss": 0.3985, "step": 1093 }, { "epoch": 0.05020421274838236, "grad_norm": 0.45969587564468384, "learning_rate": 8.363914373088685e-06, "loss": 0.3666, "step": 1094 }, { "epoch": 0.050250103253636824, "grad_norm": 0.49299341440200806, "learning_rate": 8.371559633027524e-06, "loss": 0.4684, "step": 1095 }, { "epoch": 0.05029599375889129, "grad_norm": 0.5132526159286499, "learning_rate": 8.379204892966362e-06, "loss": 0.5313, "step": 1096 }, { "epoch": 0.05034188426414575, "grad_norm": 0.45509016513824463, "learning_rate": 8.3868501529052e-06, "loss": 0.4106, "step": 1097 }, { "epoch": 0.05038777476940021, "grad_norm": 0.48494666814804077, "learning_rate": 8.394495412844037e-06, "loss": 0.4712, "step": 1098 }, { "epoch": 0.050433665274654677, "grad_norm": 0.5069655179977417, "learning_rate": 8.402140672782876e-06, "loss": 0.5029, "step": 1099 }, { "epoch": 0.05047955577990914, "grad_norm": 0.4696100354194641, "learning_rate": 8.409785932721713e-06, "loss": 0.4046, "step": 1100 }, { "epoch": 0.0505254462851636, "grad_norm": 0.4602081775665283, "learning_rate": 8.41743119266055e-06, "loss": 0.3858, "step": 1101 }, { "epoch": 0.050571336790418066, "grad_norm": 0.44474631547927856, "learning_rate": 8.425076452599388e-06, "loss": 0.3764, "step": 1102 }, { "epoch": 0.05061722729567252, "grad_norm": 0.42140090465545654, "learning_rate": 8.432721712538227e-06, "loss": 0.3266, "step": 1103 }, { "epoch": 0.050663117800926985, "grad_norm": 0.5296817421913147, "learning_rate": 8.440366972477065e-06, "loss": 0.4808, "step": 1104 }, { "epoch": 0.05070900830618145, "grad_norm": 0.5089552402496338, "learning_rate": 8.448012232415902e-06, "loss": 0.5265, "step": 1105 }, { "epoch": 0.05075489881143591, "grad_norm": 0.6396613717079163, "learning_rate": 8.455657492354741e-06, "loss": 0.5338, "step": 1106 }, { "epoch": 0.050800789316690374, "grad_norm": 0.5080808997154236, "learning_rate": 8.463302752293579e-06, "loss": 0.427, "step": 1107 }, { "epoch": 0.05084667982194484, "grad_norm": 0.5155501961708069, "learning_rate": 8.470948012232416e-06, "loss": 0.4954, "step": 1108 }, { "epoch": 0.0508925703271993, "grad_norm": 0.4518275558948517, "learning_rate": 8.478593272171255e-06, "loss": 0.3507, "step": 1109 }, { "epoch": 0.050938460832453764, "grad_norm": 0.5265688300132751, "learning_rate": 8.486238532110093e-06, "loss": 0.4542, "step": 1110 }, { "epoch": 0.05098435133770823, "grad_norm": 0.46242907643318176, "learning_rate": 8.49388379204893e-06, "loss": 0.3573, "step": 1111 }, { "epoch": 0.05103024184296269, "grad_norm": 0.535705029964447, "learning_rate": 8.501529051987768e-06, "loss": 0.5078, "step": 1112 }, { "epoch": 0.05107613234821715, "grad_norm": 0.532334566116333, "learning_rate": 8.509174311926605e-06, "loss": 0.5033, "step": 1113 }, { "epoch": 0.051122022853471616, "grad_norm": 0.4832940697669983, "learning_rate": 8.516819571865444e-06, "loss": 0.494, "step": 1114 }, { "epoch": 0.05116791335872608, "grad_norm": 0.5339627265930176, "learning_rate": 8.524464831804282e-06, "loss": 0.4468, "step": 1115 }, { "epoch": 0.05121380386398054, "grad_norm": 0.4926060140132904, "learning_rate": 8.53211009174312e-06, "loss": 0.4503, "step": 1116 }, { "epoch": 0.051259694369235005, "grad_norm": 0.4966335594654083, "learning_rate": 8.539755351681958e-06, "loss": 0.3704, "step": 1117 }, { "epoch": 0.05130558487448947, "grad_norm": 0.5581846237182617, "learning_rate": 8.547400611620796e-06, "loss": 0.5624, "step": 1118 }, { "epoch": 0.05135147537974393, "grad_norm": 0.5431637167930603, "learning_rate": 8.555045871559633e-06, "loss": 0.5087, "step": 1119 }, { "epoch": 0.051397365884998394, "grad_norm": 0.5035592913627625, "learning_rate": 8.562691131498472e-06, "loss": 0.43, "step": 1120 }, { "epoch": 0.05144325639025286, "grad_norm": 0.5313109755516052, "learning_rate": 8.57033639143731e-06, "loss": 0.4949, "step": 1121 }, { "epoch": 0.05148914689550732, "grad_norm": 0.5008397102355957, "learning_rate": 8.577981651376147e-06, "loss": 0.5005, "step": 1122 }, { "epoch": 0.051535037400761784, "grad_norm": 0.48177602887153625, "learning_rate": 8.585626911314986e-06, "loss": 0.4194, "step": 1123 }, { "epoch": 0.05158092790601625, "grad_norm": 0.48903730511665344, "learning_rate": 8.593272171253824e-06, "loss": 0.5026, "step": 1124 }, { "epoch": 0.05162681841127071, "grad_norm": 0.561829149723053, "learning_rate": 8.600917431192661e-06, "loss": 0.4368, "step": 1125 }, { "epoch": 0.05167270891652517, "grad_norm": 0.508267343044281, "learning_rate": 8.608562691131499e-06, "loss": 0.4451, "step": 1126 }, { "epoch": 0.051718599421779636, "grad_norm": 0.4770166575908661, "learning_rate": 8.616207951070336e-06, "loss": 0.4039, "step": 1127 }, { "epoch": 0.0517644899270341, "grad_norm": 0.458354115486145, "learning_rate": 8.623853211009175e-06, "loss": 0.379, "step": 1128 }, { "epoch": 0.05181038043228856, "grad_norm": 0.5431506037712097, "learning_rate": 8.631498470948013e-06, "loss": 0.6012, "step": 1129 }, { "epoch": 0.051856270937543025, "grad_norm": 0.5154648423194885, "learning_rate": 8.63914373088685e-06, "loss": 0.4835, "step": 1130 }, { "epoch": 0.05190216144279749, "grad_norm": 0.494040310382843, "learning_rate": 8.646788990825688e-06, "loss": 0.3786, "step": 1131 }, { "epoch": 0.05194805194805195, "grad_norm": 0.48181861639022827, "learning_rate": 8.654434250764527e-06, "loss": 0.4693, "step": 1132 }, { "epoch": 0.05199394245330641, "grad_norm": 0.45175430178642273, "learning_rate": 8.662079510703364e-06, "loss": 0.329, "step": 1133 }, { "epoch": 0.05203983295856087, "grad_norm": 0.48362353444099426, "learning_rate": 8.669724770642203e-06, "loss": 0.4633, "step": 1134 }, { "epoch": 0.052085723463815334, "grad_norm": 0.532863199710846, "learning_rate": 8.677370030581041e-06, "loss": 0.5269, "step": 1135 }, { "epoch": 0.0521316139690698, "grad_norm": 0.46966904401779175, "learning_rate": 8.685015290519878e-06, "loss": 0.3886, "step": 1136 }, { "epoch": 0.05217750447432426, "grad_norm": 0.7479985356330872, "learning_rate": 8.692660550458716e-06, "loss": 0.4961, "step": 1137 }, { "epoch": 0.05222339497957872, "grad_norm": 0.49896538257598877, "learning_rate": 8.700305810397555e-06, "loss": 0.4537, "step": 1138 }, { "epoch": 0.052269285484833186, "grad_norm": 0.45643138885498047, "learning_rate": 8.707951070336392e-06, "loss": 0.3589, "step": 1139 }, { "epoch": 0.05231517599008765, "grad_norm": 0.5456955432891846, "learning_rate": 8.71559633027523e-06, "loss": 0.4759, "step": 1140 }, { "epoch": 0.05236106649534211, "grad_norm": 0.47742560505867004, "learning_rate": 8.723241590214067e-06, "loss": 0.4256, "step": 1141 }, { "epoch": 0.052406957000596575, "grad_norm": 0.4854537546634674, "learning_rate": 8.730886850152905e-06, "loss": 0.4826, "step": 1142 }, { "epoch": 0.05245284750585104, "grad_norm": 0.4848230183124542, "learning_rate": 8.738532110091744e-06, "loss": 0.3266, "step": 1143 }, { "epoch": 0.0524987380111055, "grad_norm": 0.4757491648197174, "learning_rate": 8.746177370030581e-06, "loss": 0.421, "step": 1144 }, { "epoch": 0.052544628516359965, "grad_norm": 0.45205485820770264, "learning_rate": 8.753822629969419e-06, "loss": 0.3446, "step": 1145 }, { "epoch": 0.05259051902161443, "grad_norm": 0.514741063117981, "learning_rate": 8.761467889908258e-06, "loss": 0.4837, "step": 1146 }, { "epoch": 0.05263640952686889, "grad_norm": 0.5387755632400513, "learning_rate": 8.769113149847095e-06, "loss": 0.477, "step": 1147 }, { "epoch": 0.052682300032123354, "grad_norm": 0.500912070274353, "learning_rate": 8.776758409785935e-06, "loss": 0.5217, "step": 1148 }, { "epoch": 0.05272819053737782, "grad_norm": 0.5131441354751587, "learning_rate": 8.784403669724772e-06, "loss": 0.4837, "step": 1149 }, { "epoch": 0.05277408104263228, "grad_norm": 0.4618057608604431, "learning_rate": 8.79204892966361e-06, "loss": 0.3989, "step": 1150 }, { "epoch": 0.05281997154788674, "grad_norm": 0.4999014735221863, "learning_rate": 8.799694189602447e-06, "loss": 0.4462, "step": 1151 }, { "epoch": 0.052865862053141206, "grad_norm": 0.5007792115211487, "learning_rate": 8.807339449541286e-06, "loss": 0.4841, "step": 1152 }, { "epoch": 0.05291175255839567, "grad_norm": 0.4365804195404053, "learning_rate": 8.814984709480123e-06, "loss": 0.3408, "step": 1153 }, { "epoch": 0.05295764306365013, "grad_norm": 0.46871837973594666, "learning_rate": 8.822629969418961e-06, "loss": 0.3868, "step": 1154 }, { "epoch": 0.053003533568904596, "grad_norm": 0.4836746156215668, "learning_rate": 8.830275229357798e-06, "loss": 0.3902, "step": 1155 }, { "epoch": 0.05304942407415906, "grad_norm": 0.48468664288520813, "learning_rate": 8.837920489296636e-06, "loss": 0.412, "step": 1156 }, { "epoch": 0.05309531457941352, "grad_norm": 0.49635201692581177, "learning_rate": 8.845565749235475e-06, "loss": 0.3932, "step": 1157 }, { "epoch": 0.053141205084667985, "grad_norm": 0.4699406921863556, "learning_rate": 8.853211009174312e-06, "loss": 0.4021, "step": 1158 }, { "epoch": 0.05318709558992245, "grad_norm": 0.4714055359363556, "learning_rate": 8.86085626911315e-06, "loss": 0.4062, "step": 1159 }, { "epoch": 0.05323298609517691, "grad_norm": 0.48148754239082336, "learning_rate": 8.868501529051989e-06, "loss": 0.3957, "step": 1160 }, { "epoch": 0.053278876600431374, "grad_norm": 0.4738316535949707, "learning_rate": 8.876146788990826e-06, "loss": 0.3604, "step": 1161 }, { "epoch": 0.05332476710568583, "grad_norm": 0.5054348111152649, "learning_rate": 8.883792048929664e-06, "loss": 0.4233, "step": 1162 }, { "epoch": 0.05337065761094029, "grad_norm": 0.48546209931373596, "learning_rate": 8.891437308868503e-06, "loss": 0.4192, "step": 1163 }, { "epoch": 0.053416548116194756, "grad_norm": 0.4655562937259674, "learning_rate": 8.89908256880734e-06, "loss": 0.3959, "step": 1164 }, { "epoch": 0.05346243862144922, "grad_norm": 0.4764060080051422, "learning_rate": 8.906727828746178e-06, "loss": 0.4215, "step": 1165 }, { "epoch": 0.05350832912670368, "grad_norm": 0.5141769051551819, "learning_rate": 8.914373088685015e-06, "loss": 0.4131, "step": 1166 }, { "epoch": 0.053554219631958146, "grad_norm": 0.48103976249694824, "learning_rate": 8.922018348623855e-06, "loss": 0.3503, "step": 1167 }, { "epoch": 0.05360011013721261, "grad_norm": 0.4506582021713257, "learning_rate": 8.929663608562692e-06, "loss": 0.3688, "step": 1168 }, { "epoch": 0.05364600064246707, "grad_norm": 0.5111414194107056, "learning_rate": 8.93730886850153e-06, "loss": 0.4357, "step": 1169 }, { "epoch": 0.053691891147721535, "grad_norm": 0.4592117667198181, "learning_rate": 8.944954128440367e-06, "loss": 0.3724, "step": 1170 }, { "epoch": 0.053737781652976, "grad_norm": 0.45227017998695374, "learning_rate": 8.952599388379206e-06, "loss": 0.3607, "step": 1171 }, { "epoch": 0.05378367215823046, "grad_norm": 0.6733904480934143, "learning_rate": 8.960244648318043e-06, "loss": 0.5963, "step": 1172 }, { "epoch": 0.053829562663484924, "grad_norm": 0.517734169960022, "learning_rate": 8.967889908256881e-06, "loss": 0.5069, "step": 1173 }, { "epoch": 0.05387545316873939, "grad_norm": 0.5107107758522034, "learning_rate": 8.97553516819572e-06, "loss": 0.4816, "step": 1174 }, { "epoch": 0.05392134367399385, "grad_norm": 0.48223477602005005, "learning_rate": 8.983180428134558e-06, "loss": 0.341, "step": 1175 }, { "epoch": 0.05396723417924831, "grad_norm": 0.5055661201477051, "learning_rate": 8.990825688073395e-06, "loss": 0.4068, "step": 1176 }, { "epoch": 0.05401312468450278, "grad_norm": 0.44902464747428894, "learning_rate": 8.998470948012234e-06, "loss": 0.3577, "step": 1177 }, { "epoch": 0.05405901518975724, "grad_norm": 0.47143155336380005, "learning_rate": 9.006116207951072e-06, "loss": 0.3626, "step": 1178 }, { "epoch": 0.0541049056950117, "grad_norm": 0.4975210726261139, "learning_rate": 9.013761467889909e-06, "loss": 0.3911, "step": 1179 }, { "epoch": 0.054150796200266166, "grad_norm": 0.46615511178970337, "learning_rate": 9.021406727828746e-06, "loss": 0.3652, "step": 1180 }, { "epoch": 0.05419668670552063, "grad_norm": 0.5239560008049011, "learning_rate": 9.029051987767586e-06, "loss": 0.4994, "step": 1181 }, { "epoch": 0.05424257721077509, "grad_norm": 0.46002134680747986, "learning_rate": 9.036697247706423e-06, "loss": 0.346, "step": 1182 }, { "epoch": 0.054288467716029555, "grad_norm": 0.5116428136825562, "learning_rate": 9.04434250764526e-06, "loss": 0.4643, "step": 1183 }, { "epoch": 0.05433435822128402, "grad_norm": 0.5150168538093567, "learning_rate": 9.051987767584098e-06, "loss": 0.4952, "step": 1184 }, { "epoch": 0.05438024872653848, "grad_norm": 0.5286275148391724, "learning_rate": 9.059633027522935e-06, "loss": 0.476, "step": 1185 }, { "epoch": 0.054426139231792944, "grad_norm": 0.5359267592430115, "learning_rate": 9.067278287461775e-06, "loss": 0.544, "step": 1186 }, { "epoch": 0.05447202973704741, "grad_norm": 0.5676963329315186, "learning_rate": 9.074923547400612e-06, "loss": 0.5112, "step": 1187 }, { "epoch": 0.05451792024230187, "grad_norm": 0.48615550994873047, "learning_rate": 9.08256880733945e-06, "loss": 0.4037, "step": 1188 }, { "epoch": 0.054563810747556334, "grad_norm": 0.46548187732696533, "learning_rate": 9.090214067278289e-06, "loss": 0.3574, "step": 1189 }, { "epoch": 0.0546097012528108, "grad_norm": 0.552119791507721, "learning_rate": 9.097859327217126e-06, "loss": 0.4812, "step": 1190 }, { "epoch": 0.05465559175806526, "grad_norm": 0.5597963333129883, "learning_rate": 9.105504587155965e-06, "loss": 0.4233, "step": 1191 }, { "epoch": 0.054701482263319716, "grad_norm": 0.5239797234535217, "learning_rate": 9.113149847094803e-06, "loss": 0.4471, "step": 1192 }, { "epoch": 0.05474737276857418, "grad_norm": 0.58037269115448, "learning_rate": 9.12079510703364e-06, "loss": 0.5173, "step": 1193 }, { "epoch": 0.05479326327382864, "grad_norm": 0.5209234356880188, "learning_rate": 9.128440366972477e-06, "loss": 0.4571, "step": 1194 }, { "epoch": 0.054839153779083105, "grad_norm": 0.6197280287742615, "learning_rate": 9.136085626911317e-06, "loss": 0.444, "step": 1195 }, { "epoch": 0.05488504428433757, "grad_norm": 0.49763816595077515, "learning_rate": 9.143730886850154e-06, "loss": 0.3774, "step": 1196 }, { "epoch": 0.05493093478959203, "grad_norm": 0.48769843578338623, "learning_rate": 9.151376146788992e-06, "loss": 0.4328, "step": 1197 }, { "epoch": 0.054976825294846494, "grad_norm": 0.47927042841911316, "learning_rate": 9.159021406727829e-06, "loss": 0.3704, "step": 1198 }, { "epoch": 0.05502271580010096, "grad_norm": 0.47584062814712524, "learning_rate": 9.166666666666666e-06, "loss": 0.4078, "step": 1199 }, { "epoch": 0.05506860630535542, "grad_norm": 0.5033342838287354, "learning_rate": 9.174311926605506e-06, "loss": 0.4563, "step": 1200 }, { "epoch": 0.055114496810609884, "grad_norm": 0.5032165050506592, "learning_rate": 9.181957186544343e-06, "loss": 0.3943, "step": 1201 }, { "epoch": 0.05516038731586435, "grad_norm": 0.4666723310947418, "learning_rate": 9.18960244648318e-06, "loss": 0.4284, "step": 1202 }, { "epoch": 0.05520627782111881, "grad_norm": 0.543606162071228, "learning_rate": 9.19724770642202e-06, "loss": 0.472, "step": 1203 }, { "epoch": 0.05525216832637327, "grad_norm": 0.46025291085243225, "learning_rate": 9.204892966360857e-06, "loss": 0.3949, "step": 1204 }, { "epoch": 0.055298058831627736, "grad_norm": 0.4508424997329712, "learning_rate": 9.212538226299696e-06, "loss": 0.3562, "step": 1205 }, { "epoch": 0.0553439493368822, "grad_norm": 0.4536934494972229, "learning_rate": 9.220183486238534e-06, "loss": 0.35, "step": 1206 }, { "epoch": 0.05538983984213666, "grad_norm": 0.502681314945221, "learning_rate": 9.227828746177371e-06, "loss": 0.431, "step": 1207 }, { "epoch": 0.055435730347391125, "grad_norm": 0.4763748049736023, "learning_rate": 9.235474006116209e-06, "loss": 0.4265, "step": 1208 }, { "epoch": 0.05548162085264559, "grad_norm": 0.4737585186958313, "learning_rate": 9.243119266055046e-06, "loss": 0.4079, "step": 1209 }, { "epoch": 0.05552751135790005, "grad_norm": 0.5160509347915649, "learning_rate": 9.250764525993885e-06, "loss": 0.4793, "step": 1210 }, { "epoch": 0.055573401863154515, "grad_norm": 0.44092419743537903, "learning_rate": 9.258409785932723e-06, "loss": 0.3411, "step": 1211 }, { "epoch": 0.05561929236840898, "grad_norm": 0.5281226634979248, "learning_rate": 9.26605504587156e-06, "loss": 0.4619, "step": 1212 }, { "epoch": 0.05566518287366344, "grad_norm": 0.5943772792816162, "learning_rate": 9.273700305810397e-06, "loss": 0.3918, "step": 1213 }, { "epoch": 0.055711073378917904, "grad_norm": 0.49317190051078796, "learning_rate": 9.281345565749235e-06, "loss": 0.4246, "step": 1214 }, { "epoch": 0.05575696388417237, "grad_norm": 0.496288925409317, "learning_rate": 9.288990825688074e-06, "loss": 0.4581, "step": 1215 }, { "epoch": 0.05580285438942683, "grad_norm": 0.4898359775543213, "learning_rate": 9.296636085626912e-06, "loss": 0.3975, "step": 1216 }, { "epoch": 0.05584874489468129, "grad_norm": 0.5074936747550964, "learning_rate": 9.30428134556575e-06, "loss": 0.422, "step": 1217 }, { "epoch": 0.055894635399935756, "grad_norm": 0.6084580421447754, "learning_rate": 9.311926605504588e-06, "loss": 0.5219, "step": 1218 }, { "epoch": 0.05594052590519022, "grad_norm": 0.4942367970943451, "learning_rate": 9.319571865443426e-06, "loss": 0.5041, "step": 1219 }, { "epoch": 0.05598641641044468, "grad_norm": 0.5235260725021362, "learning_rate": 9.327217125382265e-06, "loss": 0.4732, "step": 1220 }, { "epoch": 0.05603230691569914, "grad_norm": 0.46256786584854126, "learning_rate": 9.334862385321102e-06, "loss": 0.3409, "step": 1221 }, { "epoch": 0.0560781974209536, "grad_norm": 0.49697116017341614, "learning_rate": 9.34250764525994e-06, "loss": 0.3842, "step": 1222 }, { "epoch": 0.056124087926208065, "grad_norm": 0.5227659344673157, "learning_rate": 9.350152905198777e-06, "loss": 0.47, "step": 1223 }, { "epoch": 0.05616997843146253, "grad_norm": 0.5589193105697632, "learning_rate": 9.357798165137616e-06, "loss": 0.5295, "step": 1224 }, { "epoch": 0.05621586893671699, "grad_norm": 0.44184815883636475, "learning_rate": 9.365443425076454e-06, "loss": 0.3563, "step": 1225 }, { "epoch": 0.056261759441971454, "grad_norm": 0.48488152027130127, "learning_rate": 9.373088685015291e-06, "loss": 0.4578, "step": 1226 }, { "epoch": 0.05630764994722592, "grad_norm": 0.598098874092102, "learning_rate": 9.380733944954129e-06, "loss": 0.4705, "step": 1227 }, { "epoch": 0.05635354045248038, "grad_norm": 0.48798874020576477, "learning_rate": 9.388379204892966e-06, "loss": 0.3858, "step": 1228 }, { "epoch": 0.05639943095773484, "grad_norm": 0.5060775876045227, "learning_rate": 9.396024464831805e-06, "loss": 0.4714, "step": 1229 }, { "epoch": 0.056445321462989306, "grad_norm": 0.44378143548965454, "learning_rate": 9.403669724770643e-06, "loss": 0.358, "step": 1230 }, { "epoch": 0.05649121196824377, "grad_norm": 0.472260981798172, "learning_rate": 9.411314984709482e-06, "loss": 0.3976, "step": 1231 }, { "epoch": 0.05653710247349823, "grad_norm": 0.5407238602638245, "learning_rate": 9.41896024464832e-06, "loss": 0.44, "step": 1232 }, { "epoch": 0.056582992978752696, "grad_norm": 0.5374550819396973, "learning_rate": 9.426605504587157e-06, "loss": 0.5074, "step": 1233 }, { "epoch": 0.05662888348400716, "grad_norm": 0.48143428564071655, "learning_rate": 9.434250764525996e-06, "loss": 0.4651, "step": 1234 }, { "epoch": 0.05667477398926162, "grad_norm": 0.5040050148963928, "learning_rate": 9.441896024464833e-06, "loss": 0.3929, "step": 1235 }, { "epoch": 0.056720664494516085, "grad_norm": 0.4760933518409729, "learning_rate": 9.44954128440367e-06, "loss": 0.3841, "step": 1236 }, { "epoch": 0.05676655499977055, "grad_norm": 0.48517096042633057, "learning_rate": 9.457186544342508e-06, "loss": 0.3814, "step": 1237 }, { "epoch": 0.05681244550502501, "grad_norm": 0.4878256320953369, "learning_rate": 9.464831804281346e-06, "loss": 0.3942, "step": 1238 }, { "epoch": 0.056858336010279474, "grad_norm": 0.48731502890586853, "learning_rate": 9.472477064220185e-06, "loss": 0.4196, "step": 1239 }, { "epoch": 0.05690422651553394, "grad_norm": 0.572546124458313, "learning_rate": 9.480122324159022e-06, "loss": 0.4353, "step": 1240 }, { "epoch": 0.0569501170207884, "grad_norm": 0.5423152446746826, "learning_rate": 9.48776758409786e-06, "loss": 0.5241, "step": 1241 }, { "epoch": 0.05699600752604286, "grad_norm": 0.4827650785446167, "learning_rate": 9.495412844036697e-06, "loss": 0.356, "step": 1242 }, { "epoch": 0.057041898031297326, "grad_norm": 0.5505901575088501, "learning_rate": 9.503058103975536e-06, "loss": 0.5216, "step": 1243 }, { "epoch": 0.05708778853655179, "grad_norm": 0.4636831283569336, "learning_rate": 9.510703363914374e-06, "loss": 0.3311, "step": 1244 }, { "epoch": 0.05713367904180625, "grad_norm": 0.4764443039894104, "learning_rate": 9.518348623853211e-06, "loss": 0.3514, "step": 1245 }, { "epoch": 0.057179569547060716, "grad_norm": 0.47547975182533264, "learning_rate": 9.52599388379205e-06, "loss": 0.4474, "step": 1246 }, { "epoch": 0.05722546005231518, "grad_norm": 0.4269947409629822, "learning_rate": 9.533639143730888e-06, "loss": 0.3651, "step": 1247 }, { "epoch": 0.05727135055756964, "grad_norm": 0.4777385890483856, "learning_rate": 9.541284403669727e-06, "loss": 0.3895, "step": 1248 }, { "epoch": 0.057317241062824105, "grad_norm": 0.5050618052482605, "learning_rate": 9.548929663608564e-06, "loss": 0.4435, "step": 1249 }, { "epoch": 0.05736313156807856, "grad_norm": 0.4309498965740204, "learning_rate": 9.556574923547402e-06, "loss": 0.3661, "step": 1250 }, { "epoch": 0.057409022073333024, "grad_norm": 0.4313931465148926, "learning_rate": 9.56422018348624e-06, "loss": 0.3227, "step": 1251 }, { "epoch": 0.05745491257858749, "grad_norm": 0.5016030073165894, "learning_rate": 9.571865443425077e-06, "loss": 0.4767, "step": 1252 }, { "epoch": 0.05750080308384195, "grad_norm": 0.49609053134918213, "learning_rate": 9.579510703363916e-06, "loss": 0.4073, "step": 1253 }, { "epoch": 0.05754669358909641, "grad_norm": 0.42334234714508057, "learning_rate": 9.587155963302753e-06, "loss": 0.3122, "step": 1254 }, { "epoch": 0.05759258409435088, "grad_norm": 0.45502781867980957, "learning_rate": 9.59480122324159e-06, "loss": 0.4212, "step": 1255 }, { "epoch": 0.05763847459960534, "grad_norm": 0.47657495737075806, "learning_rate": 9.602446483180428e-06, "loss": 0.3769, "step": 1256 }, { "epoch": 0.0576843651048598, "grad_norm": 0.4626166522502899, "learning_rate": 9.610091743119267e-06, "loss": 0.3816, "step": 1257 }, { "epoch": 0.057730255610114266, "grad_norm": 0.4420531392097473, "learning_rate": 9.617737003058105e-06, "loss": 0.3481, "step": 1258 }, { "epoch": 0.05777614611536873, "grad_norm": 0.47691357135772705, "learning_rate": 9.625382262996942e-06, "loss": 0.4452, "step": 1259 }, { "epoch": 0.05782203662062319, "grad_norm": 0.5204956531524658, "learning_rate": 9.633027522935781e-06, "loss": 0.505, "step": 1260 }, { "epoch": 0.057867927125877655, "grad_norm": 0.4837752878665924, "learning_rate": 9.640672782874619e-06, "loss": 0.3862, "step": 1261 }, { "epoch": 0.05791381763113212, "grad_norm": 0.49503791332244873, "learning_rate": 9.648318042813456e-06, "loss": 0.3766, "step": 1262 }, { "epoch": 0.05795970813638658, "grad_norm": 0.46857956051826477, "learning_rate": 9.655963302752295e-06, "loss": 0.3909, "step": 1263 }, { "epoch": 0.058005598641641044, "grad_norm": 0.49671974778175354, "learning_rate": 9.663608562691133e-06, "loss": 0.404, "step": 1264 }, { "epoch": 0.05805148914689551, "grad_norm": 0.4362523555755615, "learning_rate": 9.67125382262997e-06, "loss": 0.3093, "step": 1265 }, { "epoch": 0.05809737965214997, "grad_norm": 0.4402255713939667, "learning_rate": 9.678899082568808e-06, "loss": 0.3176, "step": 1266 }, { "epoch": 0.058143270157404434, "grad_norm": 0.516869843006134, "learning_rate": 9.686544342507645e-06, "loss": 0.4923, "step": 1267 }, { "epoch": 0.0581891606626589, "grad_norm": 0.5135446190834045, "learning_rate": 9.694189602446484e-06, "loss": 0.4981, "step": 1268 }, { "epoch": 0.05823505116791336, "grad_norm": 0.4470340609550476, "learning_rate": 9.701834862385322e-06, "loss": 0.3615, "step": 1269 }, { "epoch": 0.05828094167316782, "grad_norm": 0.43490803241729736, "learning_rate": 9.709480122324159e-06, "loss": 0.3296, "step": 1270 }, { "epoch": 0.058326832178422286, "grad_norm": 0.4599528908729553, "learning_rate": 9.717125382262997e-06, "loss": 0.4019, "step": 1271 }, { "epoch": 0.05837272268367675, "grad_norm": 0.4902098476886749, "learning_rate": 9.724770642201836e-06, "loss": 0.4476, "step": 1272 }, { "epoch": 0.05841861318893121, "grad_norm": 0.458825945854187, "learning_rate": 9.732415902140673e-06, "loss": 0.3687, "step": 1273 }, { "epoch": 0.058464503694185675, "grad_norm": 0.497462660074234, "learning_rate": 9.740061162079512e-06, "loss": 0.4179, "step": 1274 }, { "epoch": 0.05851039419944014, "grad_norm": 0.49030202627182007, "learning_rate": 9.74770642201835e-06, "loss": 0.4212, "step": 1275 }, { "epoch": 0.0585562847046946, "grad_norm": 0.5183060169219971, "learning_rate": 9.755351681957187e-06, "loss": 0.4335, "step": 1276 }, { "epoch": 0.058602175209949064, "grad_norm": 0.4406938850879669, "learning_rate": 9.762996941896026e-06, "loss": 0.3798, "step": 1277 }, { "epoch": 0.05864806571520353, "grad_norm": 0.49757298827171326, "learning_rate": 9.770642201834864e-06, "loss": 0.4439, "step": 1278 }, { "epoch": 0.05869395622045799, "grad_norm": 0.5338001847267151, "learning_rate": 9.778287461773701e-06, "loss": 0.4791, "step": 1279 }, { "epoch": 0.05873984672571245, "grad_norm": 0.451894611120224, "learning_rate": 9.785932721712539e-06, "loss": 0.3487, "step": 1280 }, { "epoch": 0.05878573723096691, "grad_norm": 0.5234604477882385, "learning_rate": 9.793577981651376e-06, "loss": 0.4761, "step": 1281 }, { "epoch": 0.05883162773622137, "grad_norm": 0.46757015585899353, "learning_rate": 9.801223241590215e-06, "loss": 0.3682, "step": 1282 }, { "epoch": 0.058877518241475836, "grad_norm": 0.45961517095565796, "learning_rate": 9.808868501529053e-06, "loss": 0.3523, "step": 1283 }, { "epoch": 0.0589234087467303, "grad_norm": 0.4827962815761566, "learning_rate": 9.81651376146789e-06, "loss": 0.3985, "step": 1284 }, { "epoch": 0.05896929925198476, "grad_norm": 0.48937731981277466, "learning_rate": 9.824159021406728e-06, "loss": 0.4421, "step": 1285 }, { "epoch": 0.059015189757239225, "grad_norm": 0.48009026050567627, "learning_rate": 9.831804281345567e-06, "loss": 0.4754, "step": 1286 }, { "epoch": 0.05906108026249369, "grad_norm": 0.4757213592529297, "learning_rate": 9.839449541284404e-06, "loss": 0.4376, "step": 1287 }, { "epoch": 0.05910697076774815, "grad_norm": 0.5103703141212463, "learning_rate": 9.847094801223243e-06, "loss": 0.4093, "step": 1288 }, { "epoch": 0.059152861273002615, "grad_norm": 0.4713591933250427, "learning_rate": 9.85474006116208e-06, "loss": 0.3903, "step": 1289 }, { "epoch": 0.05919875177825708, "grad_norm": 0.4589911699295044, "learning_rate": 9.862385321100918e-06, "loss": 0.4012, "step": 1290 }, { "epoch": 0.05924464228351154, "grad_norm": 0.4538578391075134, "learning_rate": 9.870030581039756e-06, "loss": 0.3765, "step": 1291 }, { "epoch": 0.059290532788766004, "grad_norm": 0.5259283781051636, "learning_rate": 9.877675840978595e-06, "loss": 0.5194, "step": 1292 }, { "epoch": 0.05933642329402047, "grad_norm": 0.47437596321105957, "learning_rate": 9.885321100917432e-06, "loss": 0.4035, "step": 1293 }, { "epoch": 0.05938231379927493, "grad_norm": 0.4400242865085602, "learning_rate": 9.89296636085627e-06, "loss": 0.3607, "step": 1294 }, { "epoch": 0.05942820430452939, "grad_norm": 0.5560067892074585, "learning_rate": 9.900611620795107e-06, "loss": 0.4591, "step": 1295 }, { "epoch": 0.059474094809783856, "grad_norm": 0.5343872308731079, "learning_rate": 9.908256880733946e-06, "loss": 0.4959, "step": 1296 }, { "epoch": 0.05951998531503832, "grad_norm": 0.5153381824493408, "learning_rate": 9.915902140672784e-06, "loss": 0.4717, "step": 1297 }, { "epoch": 0.05956587582029278, "grad_norm": 0.44272804260253906, "learning_rate": 9.923547400611621e-06, "loss": 0.3202, "step": 1298 }, { "epoch": 0.059611766325547245, "grad_norm": 0.4977368712425232, "learning_rate": 9.931192660550459e-06, "loss": 0.4114, "step": 1299 }, { "epoch": 0.05965765683080171, "grad_norm": 0.46729445457458496, "learning_rate": 9.938837920489298e-06, "loss": 0.4007, "step": 1300 }, { "epoch": 0.05970354733605617, "grad_norm": 0.4667612910270691, "learning_rate": 9.946483180428135e-06, "loss": 0.3873, "step": 1301 }, { "epoch": 0.059749437841310635, "grad_norm": 0.5385662317276001, "learning_rate": 9.954128440366973e-06, "loss": 0.4531, "step": 1302 }, { "epoch": 0.0597953283465651, "grad_norm": 0.4886770248413086, "learning_rate": 9.961773700305812e-06, "loss": 0.4461, "step": 1303 }, { "epoch": 0.05984121885181956, "grad_norm": 0.4952675700187683, "learning_rate": 9.96941896024465e-06, "loss": 0.3985, "step": 1304 }, { "epoch": 0.059887109357074024, "grad_norm": 0.46684184670448303, "learning_rate": 9.977064220183487e-06, "loss": 0.3868, "step": 1305 }, { "epoch": 0.05993299986232849, "grad_norm": 0.453785240650177, "learning_rate": 9.984709480122326e-06, "loss": 0.3615, "step": 1306 }, { "epoch": 0.05997889036758295, "grad_norm": 0.4815285801887512, "learning_rate": 9.992354740061163e-06, "loss": 0.3839, "step": 1307 }, { "epoch": 0.06002478087283741, "grad_norm": 0.4813125431537628, "learning_rate": 1e-05, "loss": 0.4371, "step": 1308 }, { "epoch": 0.06007067137809187, "grad_norm": 0.5038076639175415, "learning_rate": 9.99999999398829e-06, "loss": 0.4438, "step": 1309 }, { "epoch": 0.06011656188334633, "grad_norm": 0.4282680153846741, "learning_rate": 9.999999975953156e-06, "loss": 0.3444, "step": 1310 }, { "epoch": 0.060162452388600796, "grad_norm": 0.44697362184524536, "learning_rate": 9.999999945894602e-06, "loss": 0.305, "step": 1311 }, { "epoch": 0.06020834289385526, "grad_norm": 0.484749972820282, "learning_rate": 9.999999903812625e-06, "loss": 0.4041, "step": 1312 }, { "epoch": 0.06025423339910972, "grad_norm": 0.4960935115814209, "learning_rate": 9.999999849707227e-06, "loss": 0.4118, "step": 1313 }, { "epoch": 0.060300123904364185, "grad_norm": 0.4719673991203308, "learning_rate": 9.999999783578407e-06, "loss": 0.4299, "step": 1314 }, { "epoch": 0.06034601440961865, "grad_norm": 0.4409143924713135, "learning_rate": 9.999999705426165e-06, "loss": 0.3409, "step": 1315 }, { "epoch": 0.06039190491487311, "grad_norm": 0.5070991516113281, "learning_rate": 9.999999615250502e-06, "loss": 0.4469, "step": 1316 }, { "epoch": 0.060437795420127574, "grad_norm": 0.47174718976020813, "learning_rate": 9.99999951305142e-06, "loss": 0.4125, "step": 1317 }, { "epoch": 0.06048368592538204, "grad_norm": 0.47255584597587585, "learning_rate": 9.999999398828914e-06, "loss": 0.4568, "step": 1318 }, { "epoch": 0.0605295764306365, "grad_norm": 0.45447489619255066, "learning_rate": 9.99999927258299e-06, "loss": 0.3368, "step": 1319 }, { "epoch": 0.06057546693589096, "grad_norm": 0.45103153586387634, "learning_rate": 9.999999134313645e-06, "loss": 0.3629, "step": 1320 }, { "epoch": 0.060621357441145426, "grad_norm": 0.4483513832092285, "learning_rate": 9.99999898402088e-06, "loss": 0.3971, "step": 1321 }, { "epoch": 0.06066724794639989, "grad_norm": 0.47159498929977417, "learning_rate": 9.999998821704696e-06, "loss": 0.4108, "step": 1322 }, { "epoch": 0.06071313845165435, "grad_norm": 0.4768999516963959, "learning_rate": 9.999998647365093e-06, "loss": 0.3864, "step": 1323 }, { "epoch": 0.060759028956908816, "grad_norm": 0.48006561398506165, "learning_rate": 9.99999846100207e-06, "loss": 0.3576, "step": 1324 }, { "epoch": 0.06080491946216328, "grad_norm": 0.5075549483299255, "learning_rate": 9.999998262615628e-06, "loss": 0.4452, "step": 1325 }, { "epoch": 0.06085080996741774, "grad_norm": 0.4682963788509369, "learning_rate": 9.99999805220577e-06, "loss": 0.3618, "step": 1326 }, { "epoch": 0.060896700472672205, "grad_norm": 0.49834227561950684, "learning_rate": 9.999997829772494e-06, "loss": 0.4272, "step": 1327 }, { "epoch": 0.06094259097792667, "grad_norm": 0.5462979674339294, "learning_rate": 9.999997595315802e-06, "loss": 0.4809, "step": 1328 }, { "epoch": 0.06098848148318113, "grad_norm": 0.4988521933555603, "learning_rate": 9.999997348835694e-06, "loss": 0.4348, "step": 1329 }, { "epoch": 0.061034371988435594, "grad_norm": 0.4651349186897278, "learning_rate": 9.99999709033217e-06, "loss": 0.3178, "step": 1330 }, { "epoch": 0.06108026249369006, "grad_norm": 0.4642215371131897, "learning_rate": 9.99999681980523e-06, "loss": 0.3708, "step": 1331 }, { "epoch": 0.06112615299894452, "grad_norm": 0.4974886476993561, "learning_rate": 9.999996537254877e-06, "loss": 0.4474, "step": 1332 }, { "epoch": 0.061172043504198984, "grad_norm": 0.5218479633331299, "learning_rate": 9.999996242681109e-06, "loss": 0.5018, "step": 1333 }, { "epoch": 0.06121793400945345, "grad_norm": 0.4917214512825012, "learning_rate": 9.99999593608393e-06, "loss": 0.4078, "step": 1334 }, { "epoch": 0.06126382451470791, "grad_norm": 0.5320242643356323, "learning_rate": 9.999995617463338e-06, "loss": 0.5073, "step": 1335 }, { "epoch": 0.06130971501996237, "grad_norm": 0.5273686051368713, "learning_rate": 9.999995286819334e-06, "loss": 0.4865, "step": 1336 }, { "epoch": 0.061355605525216836, "grad_norm": 0.5050492882728577, "learning_rate": 9.99999494415192e-06, "loss": 0.3848, "step": 1337 }, { "epoch": 0.0614014960304713, "grad_norm": 0.5004304051399231, "learning_rate": 9.999994589461097e-06, "loss": 0.5303, "step": 1338 }, { "epoch": 0.061447386535725755, "grad_norm": 0.59356689453125, "learning_rate": 9.999994222746862e-06, "loss": 0.4351, "step": 1339 }, { "epoch": 0.06149327704098022, "grad_norm": 0.47034984827041626, "learning_rate": 9.999993844009221e-06, "loss": 0.3963, "step": 1340 }, { "epoch": 0.06153916754623468, "grad_norm": 0.47136425971984863, "learning_rate": 9.999993453248174e-06, "loss": 0.4423, "step": 1341 }, { "epoch": 0.061585058051489144, "grad_norm": 0.5834726095199585, "learning_rate": 9.99999305046372e-06, "loss": 0.5866, "step": 1342 }, { "epoch": 0.06163094855674361, "grad_norm": 0.4277462661266327, "learning_rate": 9.999992635655859e-06, "loss": 0.3772, "step": 1343 }, { "epoch": 0.06167683906199807, "grad_norm": 0.4777684211730957, "learning_rate": 9.999992208824596e-06, "loss": 0.4126, "step": 1344 }, { "epoch": 0.061722729567252534, "grad_norm": 0.520283043384552, "learning_rate": 9.999991769969928e-06, "loss": 0.5277, "step": 1345 }, { "epoch": 0.061768620072507, "grad_norm": 0.5171157717704773, "learning_rate": 9.99999131909186e-06, "loss": 0.4842, "step": 1346 }, { "epoch": 0.06181451057776146, "grad_norm": 0.5312116742134094, "learning_rate": 9.999990856190388e-06, "loss": 0.5218, "step": 1347 }, { "epoch": 0.06186040108301592, "grad_norm": 0.4500662386417389, "learning_rate": 9.999990381265518e-06, "loss": 0.345, "step": 1348 }, { "epoch": 0.061906291588270386, "grad_norm": 0.4788573682308197, "learning_rate": 9.99998989431725e-06, "loss": 0.4695, "step": 1349 }, { "epoch": 0.06195218209352485, "grad_norm": 0.520769476890564, "learning_rate": 9.999989395345583e-06, "loss": 0.4041, "step": 1350 }, { "epoch": 0.06199807259877931, "grad_norm": 0.4782469570636749, "learning_rate": 9.99998888435052e-06, "loss": 0.3781, "step": 1351 }, { "epoch": 0.062043963104033775, "grad_norm": 0.46850380301475525, "learning_rate": 9.999988361332063e-06, "loss": 0.3572, "step": 1352 }, { "epoch": 0.06208985360928824, "grad_norm": 0.4841806888580322, "learning_rate": 9.99998782629021e-06, "loss": 0.4375, "step": 1353 }, { "epoch": 0.0621357441145427, "grad_norm": 0.4694962203502655, "learning_rate": 9.999987279224964e-06, "loss": 0.4303, "step": 1354 }, { "epoch": 0.062181634619797164, "grad_norm": 0.44295835494995117, "learning_rate": 9.999986720136328e-06, "loss": 0.3407, "step": 1355 }, { "epoch": 0.06222752512505163, "grad_norm": 0.4706135392189026, "learning_rate": 9.999986149024301e-06, "loss": 0.449, "step": 1356 }, { "epoch": 0.06227341563030609, "grad_norm": 0.5196651816368103, "learning_rate": 9.999985565888885e-06, "loss": 0.5021, "step": 1357 }, { "epoch": 0.062319306135560554, "grad_norm": 0.5165014863014221, "learning_rate": 9.999984970730083e-06, "loss": 0.4387, "step": 1358 }, { "epoch": 0.06236519664081502, "grad_norm": 0.5116204023361206, "learning_rate": 9.999984363547895e-06, "loss": 0.4128, "step": 1359 }, { "epoch": 0.06241108714606948, "grad_norm": 0.47467002272605896, "learning_rate": 9.999983744342323e-06, "loss": 0.3345, "step": 1360 }, { "epoch": 0.06245697765132394, "grad_norm": 0.49090775847435, "learning_rate": 9.999983113113367e-06, "loss": 0.4033, "step": 1361 }, { "epoch": 0.0625028681565784, "grad_norm": 0.5150254368782043, "learning_rate": 9.99998246986103e-06, "loss": 0.4605, "step": 1362 }, { "epoch": 0.06254875866183286, "grad_norm": 0.48796579241752625, "learning_rate": 9.999981814585314e-06, "loss": 0.4349, "step": 1363 }, { "epoch": 0.06259464916708733, "grad_norm": 0.5245981812477112, "learning_rate": 9.99998114728622e-06, "loss": 0.4697, "step": 1364 }, { "epoch": 0.06264053967234179, "grad_norm": 0.4576440751552582, "learning_rate": 9.999980467963748e-06, "loss": 0.4114, "step": 1365 }, { "epoch": 0.06268643017759626, "grad_norm": 0.6070294976234436, "learning_rate": 9.999979776617902e-06, "loss": 0.4911, "step": 1366 }, { "epoch": 0.06273232068285071, "grad_norm": 0.4996192753314972, "learning_rate": 9.999979073248682e-06, "loss": 0.525, "step": 1367 }, { "epoch": 0.06277821118810518, "grad_norm": 0.4975182116031647, "learning_rate": 9.999978357856091e-06, "loss": 0.4964, "step": 1368 }, { "epoch": 0.06282410169335964, "grad_norm": 0.4779447019100189, "learning_rate": 9.99997763044013e-06, "loss": 0.342, "step": 1369 }, { "epoch": 0.06286999219861411, "grad_norm": 0.4856947362422943, "learning_rate": 9.9999768910008e-06, "loss": 0.3754, "step": 1370 }, { "epoch": 0.06291588270386857, "grad_norm": 0.4655861556529999, "learning_rate": 9.999976139538104e-06, "loss": 0.4096, "step": 1371 }, { "epoch": 0.06296177320912304, "grad_norm": 0.4430520832538605, "learning_rate": 9.999975376052045e-06, "loss": 0.3258, "step": 1372 }, { "epoch": 0.0630076637143775, "grad_norm": 0.4377870559692383, "learning_rate": 9.99997460054262e-06, "loss": 0.3093, "step": 1373 }, { "epoch": 0.06305355421963196, "grad_norm": 0.4862578809261322, "learning_rate": 9.999973813009838e-06, "loss": 0.4123, "step": 1374 }, { "epoch": 0.06309944472488642, "grad_norm": 0.4473819434642792, "learning_rate": 9.999973013453694e-06, "loss": 0.3162, "step": 1375 }, { "epoch": 0.06314533523014089, "grad_norm": 0.4855145215988159, "learning_rate": 9.999972201874193e-06, "loss": 0.4235, "step": 1376 }, { "epoch": 0.06319122573539535, "grad_norm": 0.46401524543762207, "learning_rate": 9.99997137827134e-06, "loss": 0.3717, "step": 1377 }, { "epoch": 0.06323711624064982, "grad_norm": 0.5210104584693909, "learning_rate": 9.99997054264513e-06, "loss": 0.482, "step": 1378 }, { "epoch": 0.06328300674590427, "grad_norm": 0.4522438049316406, "learning_rate": 9.999969694995572e-06, "loss": 0.3534, "step": 1379 }, { "epoch": 0.06332889725115874, "grad_norm": 0.49172359704971313, "learning_rate": 9.999968835322662e-06, "loss": 0.5015, "step": 1380 }, { "epoch": 0.0633747877564132, "grad_norm": 0.49448615312576294, "learning_rate": 9.999967963626406e-06, "loss": 0.3983, "step": 1381 }, { "epoch": 0.06342067826166767, "grad_norm": 0.4784165620803833, "learning_rate": 9.999967079906807e-06, "loss": 0.4444, "step": 1382 }, { "epoch": 0.06346656876692212, "grad_norm": 0.4576626121997833, "learning_rate": 9.999966184163865e-06, "loss": 0.4187, "step": 1383 }, { "epoch": 0.06351245927217658, "grad_norm": 0.4514203369617462, "learning_rate": 9.999965276397581e-06, "loss": 0.3193, "step": 1384 }, { "epoch": 0.06355834977743105, "grad_norm": 0.4750826060771942, "learning_rate": 9.999964356607958e-06, "loss": 0.3896, "step": 1385 }, { "epoch": 0.0636042402826855, "grad_norm": 0.6570529937744141, "learning_rate": 9.999963424795e-06, "loss": 0.4737, "step": 1386 }, { "epoch": 0.06365013078793998, "grad_norm": 0.47984176874160767, "learning_rate": 9.999962480958708e-06, "loss": 0.4061, "step": 1387 }, { "epoch": 0.06369602129319443, "grad_norm": 0.5021207332611084, "learning_rate": 9.999961525099083e-06, "loss": 0.4647, "step": 1388 }, { "epoch": 0.0637419117984489, "grad_norm": 0.5036116242408752, "learning_rate": 9.999960557216128e-06, "loss": 0.4665, "step": 1389 }, { "epoch": 0.06378780230370336, "grad_norm": 0.4725930392742157, "learning_rate": 9.999959577309847e-06, "loss": 0.3911, "step": 1390 }, { "epoch": 0.06383369280895783, "grad_norm": 0.5347336530685425, "learning_rate": 9.999958585380242e-06, "loss": 0.572, "step": 1391 }, { "epoch": 0.06387958331421228, "grad_norm": 0.4795195460319519, "learning_rate": 9.999957581427315e-06, "loss": 0.388, "step": 1392 }, { "epoch": 0.06392547381946675, "grad_norm": 0.5054691433906555, "learning_rate": 9.999956565451066e-06, "loss": 0.4247, "step": 1393 }, { "epoch": 0.06397136432472121, "grad_norm": 0.46716243028640747, "learning_rate": 9.999955537451502e-06, "loss": 0.3481, "step": 1394 }, { "epoch": 0.06401725482997568, "grad_norm": 0.5037909746170044, "learning_rate": 9.999954497428621e-06, "loss": 0.3781, "step": 1395 }, { "epoch": 0.06406314533523014, "grad_norm": 0.481431782245636, "learning_rate": 9.999953445382428e-06, "loss": 0.4591, "step": 1396 }, { "epoch": 0.06410903584048461, "grad_norm": 0.4764237105846405, "learning_rate": 9.999952381312924e-06, "loss": 0.4518, "step": 1397 }, { "epoch": 0.06415492634573906, "grad_norm": 0.5006119608879089, "learning_rate": 9.999951305220115e-06, "loss": 0.4727, "step": 1398 }, { "epoch": 0.06420081685099353, "grad_norm": 0.4904876947402954, "learning_rate": 9.999950217104e-06, "loss": 0.4317, "step": 1399 }, { "epoch": 0.06424670735624799, "grad_norm": 0.49891409277915955, "learning_rate": 9.999949116964582e-06, "loss": 0.4191, "step": 1400 }, { "epoch": 0.06429259786150246, "grad_norm": 0.48268988728523254, "learning_rate": 9.999948004801866e-06, "loss": 0.4, "step": 1401 }, { "epoch": 0.06433848836675692, "grad_norm": 0.4835261404514313, "learning_rate": 9.99994688061585e-06, "loss": 0.43, "step": 1402 }, { "epoch": 0.06438437887201139, "grad_norm": 0.4731764495372772, "learning_rate": 9.999945744406542e-06, "loss": 0.4271, "step": 1403 }, { "epoch": 0.06443026937726584, "grad_norm": 0.5208236575126648, "learning_rate": 9.999944596173943e-06, "loss": 0.4904, "step": 1404 }, { "epoch": 0.06447615988252031, "grad_norm": 0.48828527331352234, "learning_rate": 9.999943435918054e-06, "loss": 0.395, "step": 1405 }, { "epoch": 0.06452205038777477, "grad_norm": 0.4636908173561096, "learning_rate": 9.99994226363888e-06, "loss": 0.4136, "step": 1406 }, { "epoch": 0.06456794089302924, "grad_norm": 0.486735999584198, "learning_rate": 9.999941079336422e-06, "loss": 0.3574, "step": 1407 }, { "epoch": 0.0646138313982837, "grad_norm": 0.4457509219646454, "learning_rate": 9.999939883010686e-06, "loss": 0.3711, "step": 1408 }, { "epoch": 0.06465972190353816, "grad_norm": 0.5154923796653748, "learning_rate": 9.99993867466167e-06, "loss": 0.4917, "step": 1409 }, { "epoch": 0.06470561240879262, "grad_norm": 0.5085795521736145, "learning_rate": 9.99993745428938e-06, "loss": 0.5071, "step": 1410 }, { "epoch": 0.06475150291404709, "grad_norm": 0.45585259795188904, "learning_rate": 9.999936221893819e-06, "loss": 0.346, "step": 1411 }, { "epoch": 0.06479739341930155, "grad_norm": 0.5325887799263, "learning_rate": 9.999934977474988e-06, "loss": 0.4801, "step": 1412 }, { "epoch": 0.064843283924556, "grad_norm": 0.48388898372650146, "learning_rate": 9.999933721032892e-06, "loss": 0.3864, "step": 1413 }, { "epoch": 0.06488917442981047, "grad_norm": 0.46632200479507446, "learning_rate": 9.999932452567535e-06, "loss": 0.3283, "step": 1414 }, { "epoch": 0.06493506493506493, "grad_norm": 0.49091312289237976, "learning_rate": 9.999931172078918e-06, "loss": 0.3574, "step": 1415 }, { "epoch": 0.0649809554403194, "grad_norm": 0.4777570962905884, "learning_rate": 9.999929879567044e-06, "loss": 0.3807, "step": 1416 }, { "epoch": 0.06502684594557386, "grad_norm": 0.6797553300857544, "learning_rate": 9.999928575031917e-06, "loss": 0.6496, "step": 1417 }, { "epoch": 0.06507273645082833, "grad_norm": 0.48289597034454346, "learning_rate": 9.99992725847354e-06, "loss": 0.414, "step": 1418 }, { "epoch": 0.06511862695608278, "grad_norm": 0.49677640199661255, "learning_rate": 9.999925929891915e-06, "loss": 0.4638, "step": 1419 }, { "epoch": 0.06516451746133725, "grad_norm": 0.5317786335945129, "learning_rate": 9.999924589287047e-06, "loss": 0.3898, "step": 1420 }, { "epoch": 0.06521040796659171, "grad_norm": 0.48046281933784485, "learning_rate": 9.999923236658937e-06, "loss": 0.3705, "step": 1421 }, { "epoch": 0.06525629847184618, "grad_norm": 0.5125747919082642, "learning_rate": 9.999921872007591e-06, "loss": 0.44, "step": 1422 }, { "epoch": 0.06530218897710063, "grad_norm": 0.4760970175266266, "learning_rate": 9.999920495333011e-06, "loss": 0.3075, "step": 1423 }, { "epoch": 0.0653480794823551, "grad_norm": 0.49629834294319153, "learning_rate": 9.9999191066352e-06, "loss": 0.4229, "step": 1424 }, { "epoch": 0.06539396998760956, "grad_norm": 0.5172837972640991, "learning_rate": 9.999917705914162e-06, "loss": 0.4229, "step": 1425 }, { "epoch": 0.06543986049286403, "grad_norm": 0.5421844720840454, "learning_rate": 9.999916293169898e-06, "loss": 0.4791, "step": 1426 }, { "epoch": 0.06548575099811849, "grad_norm": 0.566855788230896, "learning_rate": 9.999914868402415e-06, "loss": 0.556, "step": 1427 }, { "epoch": 0.06553164150337296, "grad_norm": 0.5387606620788574, "learning_rate": 9.999913431611714e-06, "loss": 0.4659, "step": 1428 }, { "epoch": 0.06557753200862741, "grad_norm": 0.5093755722045898, "learning_rate": 9.999911982797801e-06, "loss": 0.3959, "step": 1429 }, { "epoch": 0.06562342251388188, "grad_norm": 0.4817783236503601, "learning_rate": 9.999910521960677e-06, "loss": 0.3666, "step": 1430 }, { "epoch": 0.06566931301913634, "grad_norm": 0.4898487329483032, "learning_rate": 9.999909049100345e-06, "loss": 0.4227, "step": 1431 }, { "epoch": 0.06571520352439081, "grad_norm": 0.45877912640571594, "learning_rate": 9.999907564216808e-06, "loss": 0.3669, "step": 1432 }, { "epoch": 0.06576109402964526, "grad_norm": 0.5108855366706848, "learning_rate": 9.999906067310075e-06, "loss": 0.4327, "step": 1433 }, { "epoch": 0.06580698453489973, "grad_norm": 0.4705347716808319, "learning_rate": 9.999904558380145e-06, "loss": 0.3935, "step": 1434 }, { "epoch": 0.06585287504015419, "grad_norm": 0.4982197880744934, "learning_rate": 9.999903037427022e-06, "loss": 0.4312, "step": 1435 }, { "epoch": 0.06589876554540866, "grad_norm": 0.5360764861106873, "learning_rate": 9.99990150445071e-06, "loss": 0.4989, "step": 1436 }, { "epoch": 0.06594465605066312, "grad_norm": 0.44408050179481506, "learning_rate": 9.999899959451214e-06, "loss": 0.3254, "step": 1437 }, { "epoch": 0.06599054655591759, "grad_norm": 0.44298574328422546, "learning_rate": 9.999898402428535e-06, "loss": 0.365, "step": 1438 }, { "epoch": 0.06603643706117204, "grad_norm": 0.48327207565307617, "learning_rate": 9.999896833382679e-06, "loss": 0.4417, "step": 1439 }, { "epoch": 0.06608232756642651, "grad_norm": 0.4832613170146942, "learning_rate": 9.99989525231365e-06, "loss": 0.357, "step": 1440 }, { "epoch": 0.06612821807168097, "grad_norm": 0.452497273683548, "learning_rate": 9.99989365922145e-06, "loss": 0.3956, "step": 1441 }, { "epoch": 0.06617410857693543, "grad_norm": 0.5015016794204712, "learning_rate": 9.999892054106083e-06, "loss": 0.4752, "step": 1442 }, { "epoch": 0.0662199990821899, "grad_norm": 0.541102945804596, "learning_rate": 9.999890436967555e-06, "loss": 0.4629, "step": 1443 }, { "epoch": 0.06626588958744435, "grad_norm": 0.44886717200279236, "learning_rate": 9.999888807805868e-06, "loss": 0.33, "step": 1444 }, { "epoch": 0.06631178009269882, "grad_norm": 0.4742789566516876, "learning_rate": 9.999887166621027e-06, "loss": 0.4154, "step": 1445 }, { "epoch": 0.06635767059795328, "grad_norm": 0.522352933883667, "learning_rate": 9.999885513413035e-06, "loss": 0.4191, "step": 1446 }, { "epoch": 0.06640356110320775, "grad_norm": 0.5095981955528259, "learning_rate": 9.999883848181896e-06, "loss": 0.4686, "step": 1447 }, { "epoch": 0.0664494516084622, "grad_norm": 0.5130565762519836, "learning_rate": 9.999882170927615e-06, "loss": 0.4693, "step": 1448 }, { "epoch": 0.06649534211371667, "grad_norm": 0.5398648977279663, "learning_rate": 9.999880481650196e-06, "loss": 0.4936, "step": 1449 }, { "epoch": 0.06654123261897113, "grad_norm": 0.47104206681251526, "learning_rate": 9.999878780349642e-06, "loss": 0.4079, "step": 1450 }, { "epoch": 0.0665871231242256, "grad_norm": 0.5229426622390747, "learning_rate": 9.999877067025955e-06, "loss": 0.4416, "step": 1451 }, { "epoch": 0.06663301362948006, "grad_norm": 0.4794468283653259, "learning_rate": 9.999875341679144e-06, "loss": 0.34, "step": 1452 }, { "epoch": 0.06667890413473453, "grad_norm": 0.5375478267669678, "learning_rate": 9.99987360430921e-06, "loss": 0.5231, "step": 1453 }, { "epoch": 0.06672479463998898, "grad_norm": 0.48184657096862793, "learning_rate": 9.999871854916159e-06, "loss": 0.3922, "step": 1454 }, { "epoch": 0.06677068514524345, "grad_norm": 0.5267612934112549, "learning_rate": 9.999870093499993e-06, "loss": 0.4748, "step": 1455 }, { "epoch": 0.06681657565049791, "grad_norm": 0.5034180283546448, "learning_rate": 9.99986832006072e-06, "loss": 0.4159, "step": 1456 }, { "epoch": 0.06686246615575238, "grad_norm": 0.49324777722358704, "learning_rate": 9.999866534598339e-06, "loss": 0.3985, "step": 1457 }, { "epoch": 0.06690835666100683, "grad_norm": 0.522338330745697, "learning_rate": 9.999864737112857e-06, "loss": 0.4403, "step": 1458 }, { "epoch": 0.0669542471662613, "grad_norm": 0.4822441637516022, "learning_rate": 9.99986292760428e-06, "loss": 0.4341, "step": 1459 }, { "epoch": 0.06700013767151576, "grad_norm": 0.4854448437690735, "learning_rate": 9.999861106072608e-06, "loss": 0.4203, "step": 1460 }, { "epoch": 0.06704602817677023, "grad_norm": 0.4899885356426239, "learning_rate": 9.999859272517852e-06, "loss": 0.4546, "step": 1461 }, { "epoch": 0.06709191868202469, "grad_norm": 0.489889919757843, "learning_rate": 9.99985742694001e-06, "loss": 0.3858, "step": 1462 }, { "epoch": 0.06713780918727916, "grad_norm": 0.50341796875, "learning_rate": 9.999855569339088e-06, "loss": 0.436, "step": 1463 }, { "epoch": 0.06718369969253361, "grad_norm": 0.4324839413166046, "learning_rate": 9.999853699715094e-06, "loss": 0.3398, "step": 1464 }, { "epoch": 0.06722959019778808, "grad_norm": 0.4206226170063019, "learning_rate": 9.999851818068027e-06, "loss": 0.3115, "step": 1465 }, { "epoch": 0.06727548070304254, "grad_norm": 0.4473482668399811, "learning_rate": 9.999849924397895e-06, "loss": 0.3311, "step": 1466 }, { "epoch": 0.06732137120829701, "grad_norm": 0.5379679203033447, "learning_rate": 9.999848018704703e-06, "loss": 0.5851, "step": 1467 }, { "epoch": 0.06736726171355147, "grad_norm": 0.46014630794525146, "learning_rate": 9.999846100988455e-06, "loss": 0.3913, "step": 1468 }, { "epoch": 0.06741315221880594, "grad_norm": 0.4650583565235138, "learning_rate": 9.999844171249153e-06, "loss": 0.425, "step": 1469 }, { "epoch": 0.06745904272406039, "grad_norm": 0.4622094929218292, "learning_rate": 9.999842229486806e-06, "loss": 0.3487, "step": 1470 }, { "epoch": 0.06750493322931485, "grad_norm": 0.50398850440979, "learning_rate": 9.999840275701416e-06, "loss": 0.4285, "step": 1471 }, { "epoch": 0.06755082373456932, "grad_norm": 0.5965123176574707, "learning_rate": 9.999838309892988e-06, "loss": 0.5413, "step": 1472 }, { "epoch": 0.06759671423982377, "grad_norm": 0.49533459544181824, "learning_rate": 9.999836332061526e-06, "loss": 0.4413, "step": 1473 }, { "epoch": 0.06764260474507824, "grad_norm": 0.5203729867935181, "learning_rate": 9.999834342207035e-06, "loss": 0.4864, "step": 1474 }, { "epoch": 0.0676884952503327, "grad_norm": 0.46969038248062134, "learning_rate": 9.999832340329523e-06, "loss": 0.3856, "step": 1475 }, { "epoch": 0.06773438575558717, "grad_norm": 0.4709130525588989, "learning_rate": 9.99983032642899e-06, "loss": 0.3594, "step": 1476 }, { "epoch": 0.06778027626084163, "grad_norm": 0.5098454356193542, "learning_rate": 9.999828300505443e-06, "loss": 0.3973, "step": 1477 }, { "epoch": 0.0678261667660961, "grad_norm": 0.5962406396865845, "learning_rate": 9.999826262558889e-06, "loss": 0.4966, "step": 1478 }, { "epoch": 0.06787205727135055, "grad_norm": 0.48592934012413025, "learning_rate": 9.999824212589328e-06, "loss": 0.445, "step": 1479 }, { "epoch": 0.06791794777660502, "grad_norm": 0.5435962080955505, "learning_rate": 9.99982215059677e-06, "loss": 0.482, "step": 1480 }, { "epoch": 0.06796383828185948, "grad_norm": 0.5200150609016418, "learning_rate": 9.999820076581218e-06, "loss": 0.3642, "step": 1481 }, { "epoch": 0.06800972878711395, "grad_norm": 0.4944203197956085, "learning_rate": 9.999817990542675e-06, "loss": 0.3487, "step": 1482 }, { "epoch": 0.0680556192923684, "grad_norm": 0.4612996280193329, "learning_rate": 9.999815892481149e-06, "loss": 0.3388, "step": 1483 }, { "epoch": 0.06810150979762288, "grad_norm": 0.5335448384284973, "learning_rate": 9.999813782396642e-06, "loss": 0.4182, "step": 1484 }, { "epoch": 0.06814740030287733, "grad_norm": 0.6675297021865845, "learning_rate": 9.999811660289163e-06, "loss": 0.6561, "step": 1485 }, { "epoch": 0.0681932908081318, "grad_norm": 0.49340584874153137, "learning_rate": 9.999809526158713e-06, "loss": 0.4091, "step": 1486 }, { "epoch": 0.06823918131338626, "grad_norm": 0.5457292199134827, "learning_rate": 9.9998073800053e-06, "loss": 0.3674, "step": 1487 }, { "epoch": 0.06828507181864073, "grad_norm": 0.48735201358795166, "learning_rate": 9.999805221828929e-06, "loss": 0.3718, "step": 1488 }, { "epoch": 0.06833096232389518, "grad_norm": 0.5068523287773132, "learning_rate": 9.999803051629604e-06, "loss": 0.3869, "step": 1489 }, { "epoch": 0.06837685282914965, "grad_norm": 0.5415776371955872, "learning_rate": 9.99980086940733e-06, "loss": 0.5328, "step": 1490 }, { "epoch": 0.06842274333440411, "grad_norm": 0.4694725275039673, "learning_rate": 9.999798675162114e-06, "loss": 0.4555, "step": 1491 }, { "epoch": 0.06846863383965858, "grad_norm": 0.4799397587776184, "learning_rate": 9.99979646889396e-06, "loss": 0.3815, "step": 1492 }, { "epoch": 0.06851452434491304, "grad_norm": 0.5082115530967712, "learning_rate": 9.999794250602872e-06, "loss": 0.3927, "step": 1493 }, { "epoch": 0.0685604148501675, "grad_norm": 0.4930780827999115, "learning_rate": 9.999792020288857e-06, "loss": 0.4178, "step": 1494 }, { "epoch": 0.06860630535542196, "grad_norm": 0.509013295173645, "learning_rate": 9.99978977795192e-06, "loss": 0.3992, "step": 1495 }, { "epoch": 0.06865219586067643, "grad_norm": 0.5124661326408386, "learning_rate": 9.999787523592068e-06, "loss": 0.4708, "step": 1496 }, { "epoch": 0.06869808636593089, "grad_norm": 0.5059641003608704, "learning_rate": 9.999785257209304e-06, "loss": 0.4204, "step": 1497 }, { "epoch": 0.06874397687118536, "grad_norm": 0.5025946497917175, "learning_rate": 9.999782978803636e-06, "loss": 0.4101, "step": 1498 }, { "epoch": 0.06878986737643981, "grad_norm": 0.501882016658783, "learning_rate": 9.999780688375068e-06, "loss": 0.4057, "step": 1499 }, { "epoch": 0.06883575788169427, "grad_norm": 0.5084762573242188, "learning_rate": 9.999778385923602e-06, "loss": 0.4067, "step": 1500 }, { "epoch": 0.06888164838694874, "grad_norm": 0.5197412967681885, "learning_rate": 9.99977607144925e-06, "loss": 0.4178, "step": 1501 }, { "epoch": 0.0689275388922032, "grad_norm": 0.4817604720592499, "learning_rate": 9.999773744952013e-06, "loss": 0.378, "step": 1502 }, { "epoch": 0.06897342939745767, "grad_norm": 0.5243045687675476, "learning_rate": 9.9997714064319e-06, "loss": 0.5335, "step": 1503 }, { "epoch": 0.06901931990271212, "grad_norm": 0.4980696439743042, "learning_rate": 9.999769055888913e-06, "loss": 0.4237, "step": 1504 }, { "epoch": 0.06906521040796659, "grad_norm": 0.57636958360672, "learning_rate": 9.999766693323058e-06, "loss": 0.5621, "step": 1505 }, { "epoch": 0.06911110091322105, "grad_norm": 0.47423747181892395, "learning_rate": 9.999764318734344e-06, "loss": 0.3721, "step": 1506 }, { "epoch": 0.06915699141847552, "grad_norm": 0.5168173909187317, "learning_rate": 9.999761932122776e-06, "loss": 0.5384, "step": 1507 }, { "epoch": 0.06920288192372998, "grad_norm": 0.47357746958732605, "learning_rate": 9.999759533488359e-06, "loss": 0.418, "step": 1508 }, { "epoch": 0.06924877242898445, "grad_norm": 0.5696466565132141, "learning_rate": 9.999757122831095e-06, "loss": 0.4814, "step": 1509 }, { "epoch": 0.0692946629342389, "grad_norm": 0.45018795132637024, "learning_rate": 9.999754700150995e-06, "loss": 0.3467, "step": 1510 }, { "epoch": 0.06934055343949337, "grad_norm": 0.4679853320121765, "learning_rate": 9.999752265448061e-06, "loss": 0.4159, "step": 1511 }, { "epoch": 0.06938644394474783, "grad_norm": 0.48860302567481995, "learning_rate": 9.999749818722303e-06, "loss": 0.4303, "step": 1512 }, { "epoch": 0.0694323344500023, "grad_norm": 0.4883895218372345, "learning_rate": 9.999747359973724e-06, "loss": 0.3779, "step": 1513 }, { "epoch": 0.06947822495525675, "grad_norm": 0.6044162511825562, "learning_rate": 9.99974488920233e-06, "loss": 0.5678, "step": 1514 }, { "epoch": 0.06952411546051122, "grad_norm": 0.46854501962661743, "learning_rate": 9.999742406408128e-06, "loss": 0.4126, "step": 1515 }, { "epoch": 0.06957000596576568, "grad_norm": 0.5346769690513611, "learning_rate": 9.999739911591124e-06, "loss": 0.5383, "step": 1516 }, { "epoch": 0.06961589647102015, "grad_norm": 0.4709138572216034, "learning_rate": 9.999737404751324e-06, "loss": 0.4031, "step": 1517 }, { "epoch": 0.0696617869762746, "grad_norm": 0.5270324945449829, "learning_rate": 9.999734885888732e-06, "loss": 0.4744, "step": 1518 }, { "epoch": 0.06970767748152908, "grad_norm": 0.534546971321106, "learning_rate": 9.999732355003356e-06, "loss": 0.4899, "step": 1519 }, { "epoch": 0.06975356798678353, "grad_norm": 0.47926053404808044, "learning_rate": 9.999729812095203e-06, "loss": 0.4319, "step": 1520 }, { "epoch": 0.069799458492038, "grad_norm": 0.5181357264518738, "learning_rate": 9.999727257164276e-06, "loss": 0.4303, "step": 1521 }, { "epoch": 0.06984534899729246, "grad_norm": 0.4206531345844269, "learning_rate": 9.999724690210583e-06, "loss": 0.3178, "step": 1522 }, { "epoch": 0.06989123950254693, "grad_norm": 0.6375095248222351, "learning_rate": 9.99972211123413e-06, "loss": 0.5252, "step": 1523 }, { "epoch": 0.06993713000780138, "grad_norm": 0.48132389783859253, "learning_rate": 9.999719520234924e-06, "loss": 0.4108, "step": 1524 }, { "epoch": 0.06998302051305585, "grad_norm": 0.48003053665161133, "learning_rate": 9.99971691721297e-06, "loss": 0.4281, "step": 1525 }, { "epoch": 0.07002891101831031, "grad_norm": 0.478013277053833, "learning_rate": 9.999714302168276e-06, "loss": 0.4039, "step": 1526 }, { "epoch": 0.07007480152356478, "grad_norm": 0.45703446865081787, "learning_rate": 9.999711675100845e-06, "loss": 0.3867, "step": 1527 }, { "epoch": 0.07012069202881924, "grad_norm": 0.4633644223213196, "learning_rate": 9.999709036010687e-06, "loss": 0.3881, "step": 1528 }, { "epoch": 0.07016658253407371, "grad_norm": 0.5113197565078735, "learning_rate": 9.999706384897805e-06, "loss": 0.4683, "step": 1529 }, { "epoch": 0.07021247303932816, "grad_norm": 0.45382463932037354, "learning_rate": 9.999703721762209e-06, "loss": 0.3756, "step": 1530 }, { "epoch": 0.07025836354458262, "grad_norm": 0.47707948088645935, "learning_rate": 9.999701046603903e-06, "loss": 0.3911, "step": 1531 }, { "epoch": 0.07030425404983709, "grad_norm": 0.5260950922966003, "learning_rate": 9.999698359422893e-06, "loss": 0.5267, "step": 1532 }, { "epoch": 0.07035014455509155, "grad_norm": 0.5094505548477173, "learning_rate": 9.999695660219188e-06, "loss": 0.4628, "step": 1533 }, { "epoch": 0.07039603506034602, "grad_norm": 0.507024347782135, "learning_rate": 9.999692948992792e-06, "loss": 0.4448, "step": 1534 }, { "epoch": 0.07044192556560047, "grad_norm": 0.4874345064163208, "learning_rate": 9.999690225743712e-06, "loss": 0.374, "step": 1535 }, { "epoch": 0.07048781607085494, "grad_norm": 0.4903397560119629, "learning_rate": 9.999687490471956e-06, "loss": 0.4031, "step": 1536 }, { "epoch": 0.0705337065761094, "grad_norm": 0.4693142771720886, "learning_rate": 9.99968474317753e-06, "loss": 0.3749, "step": 1537 }, { "epoch": 0.07057959708136387, "grad_norm": 0.4820583760738373, "learning_rate": 9.999681983860439e-06, "loss": 0.4276, "step": 1538 }, { "epoch": 0.07062548758661832, "grad_norm": 0.49902161955833435, "learning_rate": 9.99967921252069e-06, "loss": 0.4251, "step": 1539 }, { "epoch": 0.0706713780918728, "grad_norm": 0.542457103729248, "learning_rate": 9.999676429158292e-06, "loss": 0.4836, "step": 1540 }, { "epoch": 0.07071726859712725, "grad_norm": 0.516501247882843, "learning_rate": 9.999673633773248e-06, "loss": 0.5087, "step": 1541 }, { "epoch": 0.07076315910238172, "grad_norm": 0.4509405493736267, "learning_rate": 9.999670826365568e-06, "loss": 0.3612, "step": 1542 }, { "epoch": 0.07080904960763618, "grad_norm": 0.4723970293998718, "learning_rate": 9.99966800693526e-06, "loss": 0.4065, "step": 1543 }, { "epoch": 0.07085494011289065, "grad_norm": 0.46913012862205505, "learning_rate": 9.999665175482326e-06, "loss": 0.3502, "step": 1544 }, { "epoch": 0.0709008306181451, "grad_norm": 0.5791012644767761, "learning_rate": 9.999662332006774e-06, "loss": 0.4714, "step": 1545 }, { "epoch": 0.07094672112339957, "grad_norm": 0.4996468126773834, "learning_rate": 9.999659476508615e-06, "loss": 0.4715, "step": 1546 }, { "epoch": 0.07099261162865403, "grad_norm": 0.4880896806716919, "learning_rate": 9.99965660898785e-06, "loss": 0.4798, "step": 1547 }, { "epoch": 0.0710385021339085, "grad_norm": 0.480820894241333, "learning_rate": 9.99965372944449e-06, "loss": 0.3996, "step": 1548 }, { "epoch": 0.07108439263916295, "grad_norm": 0.5169656276702881, "learning_rate": 9.99965083787854e-06, "loss": 0.4567, "step": 1549 }, { "epoch": 0.07113028314441742, "grad_norm": 0.44148287177085876, "learning_rate": 9.99964793429001e-06, "loss": 0.3508, "step": 1550 }, { "epoch": 0.07117617364967188, "grad_norm": 0.45293569564819336, "learning_rate": 9.999645018678902e-06, "loss": 0.3841, "step": 1551 }, { "epoch": 0.07122206415492635, "grad_norm": 0.4716408848762512, "learning_rate": 9.999642091045227e-06, "loss": 0.3899, "step": 1552 }, { "epoch": 0.07126795466018081, "grad_norm": 0.4980045258998871, "learning_rate": 9.99963915138899e-06, "loss": 0.3948, "step": 1553 }, { "epoch": 0.07131384516543528, "grad_norm": 0.4896238446235657, "learning_rate": 9.999636199710201e-06, "loss": 0.4369, "step": 1554 }, { "epoch": 0.07135973567068973, "grad_norm": 0.4903663396835327, "learning_rate": 9.999633236008863e-06, "loss": 0.3711, "step": 1555 }, { "epoch": 0.0714056261759442, "grad_norm": 0.5232402682304382, "learning_rate": 9.999630260284985e-06, "loss": 0.4723, "step": 1556 }, { "epoch": 0.07145151668119866, "grad_norm": 0.4787043333053589, "learning_rate": 9.999627272538574e-06, "loss": 0.3576, "step": 1557 }, { "epoch": 0.07149740718645313, "grad_norm": 0.4703802466392517, "learning_rate": 9.999624272769638e-06, "loss": 0.381, "step": 1558 }, { "epoch": 0.07154329769170759, "grad_norm": 0.47272223234176636, "learning_rate": 9.999621260978184e-06, "loss": 0.4201, "step": 1559 }, { "epoch": 0.07158918819696204, "grad_norm": 0.4860653579235077, "learning_rate": 9.999618237164217e-06, "loss": 0.3986, "step": 1560 }, { "epoch": 0.07163507870221651, "grad_norm": 0.43770676851272583, "learning_rate": 9.999615201327747e-06, "loss": 0.3059, "step": 1561 }, { "epoch": 0.07168096920747097, "grad_norm": 0.5277722477912903, "learning_rate": 9.999612153468781e-06, "loss": 0.4428, "step": 1562 }, { "epoch": 0.07172685971272544, "grad_norm": 0.5587084889411926, "learning_rate": 9.999609093587325e-06, "loss": 0.5141, "step": 1563 }, { "epoch": 0.0717727502179799, "grad_norm": 0.47455084323883057, "learning_rate": 9.999606021683388e-06, "loss": 0.4299, "step": 1564 }, { "epoch": 0.07181864072323436, "grad_norm": 0.4605918228626251, "learning_rate": 9.999602937756975e-06, "loss": 0.4025, "step": 1565 }, { "epoch": 0.07186453122848882, "grad_norm": 0.5553957223892212, "learning_rate": 9.999599841808095e-06, "loss": 0.5459, "step": 1566 }, { "epoch": 0.07191042173374329, "grad_norm": 0.4999491572380066, "learning_rate": 9.999596733836756e-06, "loss": 0.4325, "step": 1567 }, { "epoch": 0.07195631223899775, "grad_norm": 0.5644165277481079, "learning_rate": 9.999593613842963e-06, "loss": 0.4809, "step": 1568 }, { "epoch": 0.07200220274425222, "grad_norm": 0.5210996866226196, "learning_rate": 9.99959048182673e-06, "loss": 0.4506, "step": 1569 }, { "epoch": 0.07204809324950667, "grad_norm": 0.5275651216506958, "learning_rate": 9.999587337788055e-06, "loss": 0.4472, "step": 1570 }, { "epoch": 0.07209398375476114, "grad_norm": 0.46705251932144165, "learning_rate": 9.99958418172695e-06, "loss": 0.3575, "step": 1571 }, { "epoch": 0.0721398742600156, "grad_norm": 0.496692955493927, "learning_rate": 9.999581013643427e-06, "loss": 0.3859, "step": 1572 }, { "epoch": 0.07218576476527007, "grad_norm": 0.5298336744308472, "learning_rate": 9.999577833537487e-06, "loss": 0.41, "step": 1573 }, { "epoch": 0.07223165527052453, "grad_norm": 0.4614573121070862, "learning_rate": 9.99957464140914e-06, "loss": 0.3891, "step": 1574 }, { "epoch": 0.072277545775779, "grad_norm": 0.5605151653289795, "learning_rate": 9.999571437258395e-06, "loss": 0.514, "step": 1575 }, { "epoch": 0.07232343628103345, "grad_norm": 0.4622315466403961, "learning_rate": 9.999568221085258e-06, "loss": 0.4008, "step": 1576 }, { "epoch": 0.07236932678628792, "grad_norm": 0.49091848731040955, "learning_rate": 9.999564992889739e-06, "loss": 0.4182, "step": 1577 }, { "epoch": 0.07241521729154238, "grad_norm": 0.5226359367370605, "learning_rate": 9.999561752671842e-06, "loss": 0.4755, "step": 1578 }, { "epoch": 0.07246110779679685, "grad_norm": 0.5172597765922546, "learning_rate": 9.999558500431578e-06, "loss": 0.39, "step": 1579 }, { "epoch": 0.0725069983020513, "grad_norm": 0.4999792277812958, "learning_rate": 9.999555236168954e-06, "loss": 0.4659, "step": 1580 }, { "epoch": 0.07255288880730577, "grad_norm": 0.5266298055648804, "learning_rate": 9.999551959883976e-06, "loss": 0.4457, "step": 1581 }, { "epoch": 0.07259877931256023, "grad_norm": 0.44880470633506775, "learning_rate": 9.999548671576655e-06, "loss": 0.3589, "step": 1582 }, { "epoch": 0.0726446698178147, "grad_norm": 0.49409666657447815, "learning_rate": 9.999545371246997e-06, "loss": 0.4132, "step": 1583 }, { "epoch": 0.07269056032306916, "grad_norm": 0.49254390597343445, "learning_rate": 9.999542058895009e-06, "loss": 0.3911, "step": 1584 }, { "epoch": 0.07273645082832363, "grad_norm": 0.5089792609214783, "learning_rate": 9.999538734520702e-06, "loss": 0.3997, "step": 1585 }, { "epoch": 0.07278234133357808, "grad_norm": 0.5009763836860657, "learning_rate": 9.99953539812408e-06, "loss": 0.3708, "step": 1586 }, { "epoch": 0.07282823183883255, "grad_norm": 0.4541475474834442, "learning_rate": 9.999532049705156e-06, "loss": 0.3192, "step": 1587 }, { "epoch": 0.07287412234408701, "grad_norm": 0.4755299687385559, "learning_rate": 9.999528689263933e-06, "loss": 0.4193, "step": 1588 }, { "epoch": 0.07292001284934146, "grad_norm": 0.4521019756793976, "learning_rate": 9.999525316800422e-06, "loss": 0.3407, "step": 1589 }, { "epoch": 0.07296590335459593, "grad_norm": 0.4645295739173889, "learning_rate": 9.999521932314632e-06, "loss": 0.3181, "step": 1590 }, { "epoch": 0.07301179385985039, "grad_norm": 0.500056266784668, "learning_rate": 9.999518535806568e-06, "loss": 0.4333, "step": 1591 }, { "epoch": 0.07305768436510486, "grad_norm": 0.4169314205646515, "learning_rate": 9.99951512727624e-06, "loss": 0.3031, "step": 1592 }, { "epoch": 0.07310357487035932, "grad_norm": 0.5232287645339966, "learning_rate": 9.999511706723657e-06, "loss": 0.4911, "step": 1593 }, { "epoch": 0.07314946537561379, "grad_norm": 0.5228270888328552, "learning_rate": 9.999508274148825e-06, "loss": 0.5135, "step": 1594 }, { "epoch": 0.07319535588086824, "grad_norm": 0.5087874531745911, "learning_rate": 9.999504829551755e-06, "loss": 0.4426, "step": 1595 }, { "epoch": 0.07324124638612271, "grad_norm": 0.5080674886703491, "learning_rate": 9.999501372932453e-06, "loss": 0.4309, "step": 1596 }, { "epoch": 0.07328713689137717, "grad_norm": 0.5303704142570496, "learning_rate": 9.999497904290928e-06, "loss": 0.5957, "step": 1597 }, { "epoch": 0.07333302739663164, "grad_norm": 0.5428318977355957, "learning_rate": 9.99949442362719e-06, "loss": 0.4981, "step": 1598 }, { "epoch": 0.0733789179018861, "grad_norm": 0.49112948775291443, "learning_rate": 9.999490930941243e-06, "loss": 0.4595, "step": 1599 }, { "epoch": 0.07342480840714057, "grad_norm": 0.4157106280326843, "learning_rate": 9.999487426233099e-06, "loss": 0.3169, "step": 1600 }, { "epoch": 0.07347069891239502, "grad_norm": 0.4652913510799408, "learning_rate": 9.999483909502767e-06, "loss": 0.392, "step": 1601 }, { "epoch": 0.07351658941764949, "grad_norm": 0.4506971538066864, "learning_rate": 9.999480380750253e-06, "loss": 0.3363, "step": 1602 }, { "epoch": 0.07356247992290395, "grad_norm": 0.5362367630004883, "learning_rate": 9.999476839975566e-06, "loss": 0.5811, "step": 1603 }, { "epoch": 0.07360837042815842, "grad_norm": 0.5057291388511658, "learning_rate": 9.999473287178715e-06, "loss": 0.4575, "step": 1604 }, { "epoch": 0.07365426093341287, "grad_norm": 0.48182326555252075, "learning_rate": 9.99946972235971e-06, "loss": 0.4301, "step": 1605 }, { "epoch": 0.07370015143866734, "grad_norm": 0.4803471565246582, "learning_rate": 9.999466145518558e-06, "loss": 0.4645, "step": 1606 }, { "epoch": 0.0737460419439218, "grad_norm": 0.5345284342765808, "learning_rate": 9.999462556655266e-06, "loss": 0.388, "step": 1607 }, { "epoch": 0.07379193244917627, "grad_norm": 0.47404131293296814, "learning_rate": 9.999458955769848e-06, "loss": 0.4262, "step": 1608 }, { "epoch": 0.07383782295443073, "grad_norm": 0.4565417468547821, "learning_rate": 9.999455342862305e-06, "loss": 0.3921, "step": 1609 }, { "epoch": 0.0738837134596852, "grad_norm": 0.40013477206230164, "learning_rate": 9.999451717932652e-06, "loss": 0.297, "step": 1610 }, { "epoch": 0.07392960396493965, "grad_norm": 0.4709477424621582, "learning_rate": 9.999448080980896e-06, "loss": 0.4011, "step": 1611 }, { "epoch": 0.07397549447019412, "grad_norm": 0.494823157787323, "learning_rate": 9.999444432007044e-06, "loss": 0.4454, "step": 1612 }, { "epoch": 0.07402138497544858, "grad_norm": 0.4898694157600403, "learning_rate": 9.999440771011106e-06, "loss": 0.4206, "step": 1613 }, { "epoch": 0.07406727548070305, "grad_norm": 0.4711502492427826, "learning_rate": 9.999437097993092e-06, "loss": 0.4097, "step": 1614 }, { "epoch": 0.0741131659859575, "grad_norm": 0.4736449420452118, "learning_rate": 9.999433412953009e-06, "loss": 0.4286, "step": 1615 }, { "epoch": 0.07415905649121197, "grad_norm": 0.48827502131462097, "learning_rate": 9.999429715890867e-06, "loss": 0.4054, "step": 1616 }, { "epoch": 0.07420494699646643, "grad_norm": 0.44157078862190247, "learning_rate": 9.999426006806671e-06, "loss": 0.3429, "step": 1617 }, { "epoch": 0.07425083750172089, "grad_norm": 0.4292319416999817, "learning_rate": 9.999422285700437e-06, "loss": 0.3686, "step": 1618 }, { "epoch": 0.07429672800697536, "grad_norm": 0.5420536398887634, "learning_rate": 9.99941855257217e-06, "loss": 0.4866, "step": 1619 }, { "epoch": 0.07434261851222981, "grad_norm": 0.488696426153183, "learning_rate": 9.99941480742188e-06, "loss": 0.4742, "step": 1620 }, { "epoch": 0.07438850901748428, "grad_norm": 0.4898790121078491, "learning_rate": 9.999411050249572e-06, "loss": 0.4159, "step": 1621 }, { "epoch": 0.07443439952273874, "grad_norm": 0.49666571617126465, "learning_rate": 9.99940728105526e-06, "loss": 0.3826, "step": 1622 }, { "epoch": 0.07448029002799321, "grad_norm": 0.5466395020484924, "learning_rate": 9.999403499838953e-06, "loss": 0.4834, "step": 1623 }, { "epoch": 0.07452618053324767, "grad_norm": 0.4694845676422119, "learning_rate": 9.999399706600656e-06, "loss": 0.383, "step": 1624 }, { "epoch": 0.07457207103850214, "grad_norm": 0.45946910977363586, "learning_rate": 9.99939590134038e-06, "loss": 0.4152, "step": 1625 }, { "epoch": 0.07461796154375659, "grad_norm": 0.47266894578933716, "learning_rate": 9.999392084058138e-06, "loss": 0.3846, "step": 1626 }, { "epoch": 0.07466385204901106, "grad_norm": 0.4746292233467102, "learning_rate": 9.999388254753934e-06, "loss": 0.4409, "step": 1627 }, { "epoch": 0.07470974255426552, "grad_norm": 0.44414758682250977, "learning_rate": 9.99938441342778e-06, "loss": 0.3678, "step": 1628 }, { "epoch": 0.07475563305951999, "grad_norm": 0.4766943156719208, "learning_rate": 9.999380560079682e-06, "loss": 0.4166, "step": 1629 }, { "epoch": 0.07480152356477444, "grad_norm": 0.7866554856300354, "learning_rate": 9.999376694709654e-06, "loss": 0.6537, "step": 1630 }, { "epoch": 0.07484741407002891, "grad_norm": 0.44019708037376404, "learning_rate": 9.999372817317701e-06, "loss": 0.3407, "step": 1631 }, { "epoch": 0.07489330457528337, "grad_norm": 0.5445822477340698, "learning_rate": 9.999368927903837e-06, "loss": 0.4334, "step": 1632 }, { "epoch": 0.07493919508053784, "grad_norm": 0.49358266592025757, "learning_rate": 9.999365026468066e-06, "loss": 0.4076, "step": 1633 }, { "epoch": 0.0749850855857923, "grad_norm": 0.5387043952941895, "learning_rate": 9.9993611130104e-06, "loss": 0.4127, "step": 1634 }, { "epoch": 0.07503097609104677, "grad_norm": 0.47857046127319336, "learning_rate": 9.99935718753085e-06, "loss": 0.4418, "step": 1635 }, { "epoch": 0.07507686659630122, "grad_norm": 0.5949978828430176, "learning_rate": 9.999353250029422e-06, "loss": 0.5505, "step": 1636 }, { "epoch": 0.07512275710155569, "grad_norm": 0.5482997894287109, "learning_rate": 9.999349300506129e-06, "loss": 0.4237, "step": 1637 }, { "epoch": 0.07516864760681015, "grad_norm": 0.47320905327796936, "learning_rate": 9.999345338960977e-06, "loss": 0.3847, "step": 1638 }, { "epoch": 0.07521453811206462, "grad_norm": 0.4918878674507141, "learning_rate": 9.999341365393979e-06, "loss": 0.3908, "step": 1639 }, { "epoch": 0.07526042861731908, "grad_norm": 0.5108606815338135, "learning_rate": 9.999337379805142e-06, "loss": 0.3645, "step": 1640 }, { "epoch": 0.07530631912257355, "grad_norm": 0.5490548014640808, "learning_rate": 9.999333382194476e-06, "loss": 0.4629, "step": 1641 }, { "epoch": 0.075352209627828, "grad_norm": 0.466717392206192, "learning_rate": 9.999329372561992e-06, "loss": 0.3738, "step": 1642 }, { "epoch": 0.07539810013308247, "grad_norm": 0.4765077233314514, "learning_rate": 9.999325350907697e-06, "loss": 0.3859, "step": 1643 }, { "epoch": 0.07544399063833693, "grad_norm": 0.5266830325126648, "learning_rate": 9.999321317231604e-06, "loss": 0.415, "step": 1644 }, { "epoch": 0.0754898811435914, "grad_norm": 0.690089762210846, "learning_rate": 9.999317271533722e-06, "loss": 0.4348, "step": 1645 }, { "epoch": 0.07553577164884585, "grad_norm": 0.47478124499320984, "learning_rate": 9.999313213814058e-06, "loss": 0.4176, "step": 1646 }, { "epoch": 0.07558166215410031, "grad_norm": 0.46994373202323914, "learning_rate": 9.999309144072624e-06, "loss": 0.3919, "step": 1647 }, { "epoch": 0.07562755265935478, "grad_norm": 0.500823438167572, "learning_rate": 9.99930506230943e-06, "loss": 0.3488, "step": 1648 }, { "epoch": 0.07567344316460924, "grad_norm": 0.5512404441833496, "learning_rate": 9.999300968524485e-06, "loss": 0.5014, "step": 1649 }, { "epoch": 0.0757193336698637, "grad_norm": 0.5216124057769775, "learning_rate": 9.999296862717799e-06, "loss": 0.423, "step": 1650 }, { "epoch": 0.07576522417511816, "grad_norm": 0.4777539372444153, "learning_rate": 9.99929274488938e-06, "loss": 0.3984, "step": 1651 }, { "epoch": 0.07581111468037263, "grad_norm": 0.47649621963500977, "learning_rate": 9.999288615039242e-06, "loss": 0.3922, "step": 1652 }, { "epoch": 0.07585700518562709, "grad_norm": 0.4488726556301117, "learning_rate": 9.999284473167392e-06, "loss": 0.376, "step": 1653 }, { "epoch": 0.07590289569088156, "grad_norm": 0.5228418111801147, "learning_rate": 9.999280319273841e-06, "loss": 0.4731, "step": 1654 }, { "epoch": 0.07594878619613601, "grad_norm": 0.5244832038879395, "learning_rate": 9.999276153358599e-06, "loss": 0.5163, "step": 1655 }, { "epoch": 0.07599467670139048, "grad_norm": 0.5477701425552368, "learning_rate": 9.999271975421673e-06, "loss": 0.4904, "step": 1656 }, { "epoch": 0.07604056720664494, "grad_norm": 0.5430203676223755, "learning_rate": 9.999267785463078e-06, "loss": 0.5424, "step": 1657 }, { "epoch": 0.07608645771189941, "grad_norm": 0.4881824553012848, "learning_rate": 9.999263583482821e-06, "loss": 0.3996, "step": 1658 }, { "epoch": 0.07613234821715387, "grad_norm": 0.5316872596740723, "learning_rate": 9.999259369480914e-06, "loss": 0.4728, "step": 1659 }, { "epoch": 0.07617823872240834, "grad_norm": 0.4986788034439087, "learning_rate": 9.999255143457365e-06, "loss": 0.4431, "step": 1660 }, { "epoch": 0.07622412922766279, "grad_norm": 0.4756495952606201, "learning_rate": 9.999250905412185e-06, "loss": 0.4193, "step": 1661 }, { "epoch": 0.07627001973291726, "grad_norm": 0.5116938352584839, "learning_rate": 9.999246655345386e-06, "loss": 0.4547, "step": 1662 }, { "epoch": 0.07631591023817172, "grad_norm": 0.46892058849334717, "learning_rate": 9.999242393256976e-06, "loss": 0.3562, "step": 1663 }, { "epoch": 0.07636180074342619, "grad_norm": 0.542378842830658, "learning_rate": 9.999238119146964e-06, "loss": 0.5119, "step": 1664 }, { "epoch": 0.07640769124868065, "grad_norm": 0.44915884733200073, "learning_rate": 9.999233833015364e-06, "loss": 0.3532, "step": 1665 }, { "epoch": 0.07645358175393512, "grad_norm": 0.4248468577861786, "learning_rate": 9.999229534862185e-06, "loss": 0.3035, "step": 1666 }, { "epoch": 0.07649947225918957, "grad_norm": 0.4087550938129425, "learning_rate": 9.999225224687435e-06, "loss": 0.3022, "step": 1667 }, { "epoch": 0.07654536276444404, "grad_norm": 0.4679374694824219, "learning_rate": 9.999220902491127e-06, "loss": 0.4054, "step": 1668 }, { "epoch": 0.0765912532696985, "grad_norm": 0.501705527305603, "learning_rate": 9.99921656827327e-06, "loss": 0.4336, "step": 1669 }, { "epoch": 0.07663714377495297, "grad_norm": 0.5003342628479004, "learning_rate": 9.999212222033877e-06, "loss": 0.5029, "step": 1670 }, { "epoch": 0.07668303428020742, "grad_norm": 0.5207168459892273, "learning_rate": 9.999207863772952e-06, "loss": 0.4937, "step": 1671 }, { "epoch": 0.0767289247854619, "grad_norm": 0.44004499912261963, "learning_rate": 9.999203493490513e-06, "loss": 0.3677, "step": 1672 }, { "epoch": 0.07677481529071635, "grad_norm": 0.46720001101493835, "learning_rate": 9.999199111186569e-06, "loss": 0.3987, "step": 1673 }, { "epoch": 0.07682070579597082, "grad_norm": 0.5164360404014587, "learning_rate": 9.999194716861125e-06, "loss": 0.4235, "step": 1674 }, { "epoch": 0.07686659630122528, "grad_norm": 0.448982298374176, "learning_rate": 9.999190310514198e-06, "loss": 0.3607, "step": 1675 }, { "epoch": 0.07691248680647975, "grad_norm": 0.44107311964035034, "learning_rate": 9.999185892145795e-06, "loss": 0.3519, "step": 1676 }, { "epoch": 0.0769583773117342, "grad_norm": 0.5092818737030029, "learning_rate": 9.999181461755929e-06, "loss": 0.4563, "step": 1677 }, { "epoch": 0.07700426781698866, "grad_norm": 0.5030384659767151, "learning_rate": 9.999177019344607e-06, "loss": 0.5062, "step": 1678 }, { "epoch": 0.07705015832224313, "grad_norm": 0.5244572758674622, "learning_rate": 9.999172564911845e-06, "loss": 0.4991, "step": 1679 }, { "epoch": 0.07709604882749758, "grad_norm": 0.46394649147987366, "learning_rate": 9.99916809845765e-06, "loss": 0.435, "step": 1680 }, { "epoch": 0.07714193933275205, "grad_norm": 0.4839397370815277, "learning_rate": 9.999163619982033e-06, "loss": 0.3944, "step": 1681 }, { "epoch": 0.07718782983800651, "grad_norm": 0.4912411868572235, "learning_rate": 9.999159129485004e-06, "loss": 0.4847, "step": 1682 }, { "epoch": 0.07723372034326098, "grad_norm": 0.5226150751113892, "learning_rate": 9.999154626966576e-06, "loss": 0.4746, "step": 1683 }, { "epoch": 0.07727961084851544, "grad_norm": 0.540511965751648, "learning_rate": 9.99915011242676e-06, "loss": 0.493, "step": 1684 }, { "epoch": 0.07732550135376991, "grad_norm": 0.5299343466758728, "learning_rate": 9.999145585865565e-06, "loss": 0.4382, "step": 1685 }, { "epoch": 0.07737139185902436, "grad_norm": 0.5273798704147339, "learning_rate": 9.999141047283003e-06, "loss": 0.5291, "step": 1686 }, { "epoch": 0.07741728236427883, "grad_norm": 0.5626891851425171, "learning_rate": 9.999136496679082e-06, "loss": 0.531, "step": 1687 }, { "epoch": 0.07746317286953329, "grad_norm": 0.5069789290428162, "learning_rate": 9.999131934053818e-06, "loss": 0.5268, "step": 1688 }, { "epoch": 0.07750906337478776, "grad_norm": 0.4648505747318268, "learning_rate": 9.99912735940722e-06, "loss": 0.3655, "step": 1689 }, { "epoch": 0.07755495388004222, "grad_norm": 0.4760192334651947, "learning_rate": 9.999122772739299e-06, "loss": 0.4099, "step": 1690 }, { "epoch": 0.07760084438529669, "grad_norm": 0.47898706793785095, "learning_rate": 9.999118174050063e-06, "loss": 0.418, "step": 1691 }, { "epoch": 0.07764673489055114, "grad_norm": 0.5100760459899902, "learning_rate": 9.999113563339527e-06, "loss": 0.5028, "step": 1692 }, { "epoch": 0.07769262539580561, "grad_norm": 0.4605445861816406, "learning_rate": 9.9991089406077e-06, "loss": 0.4021, "step": 1693 }, { "epoch": 0.07773851590106007, "grad_norm": 0.5177266001701355, "learning_rate": 9.999104305854595e-06, "loss": 0.4719, "step": 1694 }, { "epoch": 0.07778440640631454, "grad_norm": 0.474923700094223, "learning_rate": 9.99909965908022e-06, "loss": 0.347, "step": 1695 }, { "epoch": 0.077830296911569, "grad_norm": 0.4658982753753662, "learning_rate": 9.999095000284588e-06, "loss": 0.3567, "step": 1696 }, { "epoch": 0.07787618741682346, "grad_norm": 0.4969722628593445, "learning_rate": 9.999090329467712e-06, "loss": 0.3779, "step": 1697 }, { "epoch": 0.07792207792207792, "grad_norm": 0.4199715256690979, "learning_rate": 9.9990856466296e-06, "loss": 0.3473, "step": 1698 }, { "epoch": 0.07796796842733239, "grad_norm": 0.5353632569313049, "learning_rate": 9.999080951770266e-06, "loss": 0.5526, "step": 1699 }, { "epoch": 0.07801385893258685, "grad_norm": 0.5019856095314026, "learning_rate": 9.99907624488972e-06, "loss": 0.4686, "step": 1700 }, { "epoch": 0.07805974943784132, "grad_norm": 0.48264744877815247, "learning_rate": 9.999071525987973e-06, "loss": 0.356, "step": 1701 }, { "epoch": 0.07810563994309577, "grad_norm": 0.4562687277793884, "learning_rate": 9.999066795065037e-06, "loss": 0.3239, "step": 1702 }, { "epoch": 0.07815153044835024, "grad_norm": 0.5122276544570923, "learning_rate": 9.999062052120922e-06, "loss": 0.4815, "step": 1703 }, { "epoch": 0.0781974209536047, "grad_norm": 0.505256175994873, "learning_rate": 9.999057297155642e-06, "loss": 0.4016, "step": 1704 }, { "epoch": 0.07824331145885917, "grad_norm": 0.46977514028549194, "learning_rate": 9.999052530169207e-06, "loss": 0.3919, "step": 1705 }, { "epoch": 0.07828920196411362, "grad_norm": 0.4590289890766144, "learning_rate": 9.999047751161629e-06, "loss": 0.3469, "step": 1706 }, { "epoch": 0.07833509246936808, "grad_norm": 0.4907650649547577, "learning_rate": 9.999042960132917e-06, "loss": 0.3969, "step": 1707 }, { "epoch": 0.07838098297462255, "grad_norm": 0.421985387802124, "learning_rate": 9.999038157083084e-06, "loss": 0.2934, "step": 1708 }, { "epoch": 0.07842687347987701, "grad_norm": 0.5014013648033142, "learning_rate": 9.999033342012143e-06, "loss": 0.3988, "step": 1709 }, { "epoch": 0.07847276398513148, "grad_norm": 0.43560871481895447, "learning_rate": 9.999028514920104e-06, "loss": 0.3443, "step": 1710 }, { "epoch": 0.07851865449038593, "grad_norm": 0.49493759870529175, "learning_rate": 9.99902367580698e-06, "loss": 0.443, "step": 1711 }, { "epoch": 0.0785645449956404, "grad_norm": 0.4802602529525757, "learning_rate": 9.999018824672781e-06, "loss": 0.4454, "step": 1712 }, { "epoch": 0.07861043550089486, "grad_norm": 0.4706229269504547, "learning_rate": 9.999013961517521e-06, "loss": 0.3732, "step": 1713 }, { "epoch": 0.07865632600614933, "grad_norm": 0.5777989029884338, "learning_rate": 9.999009086341209e-06, "loss": 0.5767, "step": 1714 }, { "epoch": 0.07870221651140379, "grad_norm": 0.4825112819671631, "learning_rate": 9.999004199143858e-06, "loss": 0.3597, "step": 1715 }, { "epoch": 0.07874810701665826, "grad_norm": 0.5652618408203125, "learning_rate": 9.99899929992548e-06, "loss": 0.5087, "step": 1716 }, { "epoch": 0.07879399752191271, "grad_norm": 0.5355689525604248, "learning_rate": 9.998994388686086e-06, "loss": 0.4749, "step": 1717 }, { "epoch": 0.07883988802716718, "grad_norm": 0.4698161482810974, "learning_rate": 9.998989465425689e-06, "loss": 0.3432, "step": 1718 }, { "epoch": 0.07888577853242164, "grad_norm": 0.4778725802898407, "learning_rate": 9.9989845301443e-06, "loss": 0.4306, "step": 1719 }, { "epoch": 0.07893166903767611, "grad_norm": 0.4972658157348633, "learning_rate": 9.99897958284193e-06, "loss": 0.4225, "step": 1720 }, { "epoch": 0.07897755954293056, "grad_norm": 0.48646584153175354, "learning_rate": 9.998974623518592e-06, "loss": 0.4428, "step": 1721 }, { "epoch": 0.07902345004818503, "grad_norm": 0.5065861940383911, "learning_rate": 9.998969652174297e-06, "loss": 0.4162, "step": 1722 }, { "epoch": 0.07906934055343949, "grad_norm": 0.5074966549873352, "learning_rate": 9.998964668809058e-06, "loss": 0.4307, "step": 1723 }, { "epoch": 0.07911523105869396, "grad_norm": 0.5008343458175659, "learning_rate": 9.998959673422887e-06, "loss": 0.4689, "step": 1724 }, { "epoch": 0.07916112156394842, "grad_norm": 0.5037351250648499, "learning_rate": 9.998954666015797e-06, "loss": 0.4365, "step": 1725 }, { "epoch": 0.07920701206920289, "grad_norm": 0.4351848363876343, "learning_rate": 9.998949646587798e-06, "loss": 0.3675, "step": 1726 }, { "epoch": 0.07925290257445734, "grad_norm": 0.5463142991065979, "learning_rate": 9.998944615138903e-06, "loss": 0.4844, "step": 1727 }, { "epoch": 0.07929879307971181, "grad_norm": 0.5145556926727295, "learning_rate": 9.998939571669123e-06, "loss": 0.5189, "step": 1728 }, { "epoch": 0.07934468358496627, "grad_norm": 0.493591845035553, "learning_rate": 9.998934516178471e-06, "loss": 0.4383, "step": 1729 }, { "epoch": 0.07939057409022074, "grad_norm": 0.4987589120864868, "learning_rate": 9.998929448666962e-06, "loss": 0.4852, "step": 1730 }, { "epoch": 0.0794364645954752, "grad_norm": 0.46000444889068604, "learning_rate": 9.998924369134601e-06, "loss": 0.3748, "step": 1731 }, { "epoch": 0.07948235510072967, "grad_norm": 0.4645262062549591, "learning_rate": 9.998919277581406e-06, "loss": 0.3641, "step": 1732 }, { "epoch": 0.07952824560598412, "grad_norm": 0.46884673833847046, "learning_rate": 9.99891417400739e-06, "loss": 0.3751, "step": 1733 }, { "epoch": 0.07957413611123859, "grad_norm": 0.51549232006073, "learning_rate": 9.99890905841256e-06, "loss": 0.4764, "step": 1734 }, { "epoch": 0.07962002661649305, "grad_norm": 0.45478355884552, "learning_rate": 9.998903930796935e-06, "loss": 0.368, "step": 1735 }, { "epoch": 0.0796659171217475, "grad_norm": 0.5080839395523071, "learning_rate": 9.99889879116052e-06, "loss": 0.4487, "step": 1736 }, { "epoch": 0.07971180762700197, "grad_norm": 0.4810938835144043, "learning_rate": 9.998893639503334e-06, "loss": 0.4262, "step": 1737 }, { "epoch": 0.07975769813225643, "grad_norm": 0.4891236424446106, "learning_rate": 9.998888475825385e-06, "loss": 0.3694, "step": 1738 }, { "epoch": 0.0798035886375109, "grad_norm": 0.506584107875824, "learning_rate": 9.998883300126687e-06, "loss": 0.4447, "step": 1739 }, { "epoch": 0.07984947914276536, "grad_norm": 0.5336281061172485, "learning_rate": 9.998878112407254e-06, "loss": 0.4713, "step": 1740 }, { "epoch": 0.07989536964801983, "grad_norm": 0.5062115788459778, "learning_rate": 9.998872912667094e-06, "loss": 0.5001, "step": 1741 }, { "epoch": 0.07994126015327428, "grad_norm": 0.4527471363544464, "learning_rate": 9.998867700906225e-06, "loss": 0.3423, "step": 1742 }, { "epoch": 0.07998715065852875, "grad_norm": 0.4902063310146332, "learning_rate": 9.998862477124656e-06, "loss": 0.4471, "step": 1743 }, { "epoch": 0.08003304116378321, "grad_norm": 0.5510109663009644, "learning_rate": 9.9988572413224e-06, "loss": 0.4737, "step": 1744 }, { "epoch": 0.08007893166903768, "grad_norm": 0.4556294083595276, "learning_rate": 9.99885199349947e-06, "loss": 0.3612, "step": 1745 }, { "epoch": 0.08012482217429213, "grad_norm": 0.4711126387119293, "learning_rate": 9.998846733655879e-06, "loss": 0.4123, "step": 1746 }, { "epoch": 0.0801707126795466, "grad_norm": 0.5133438110351562, "learning_rate": 9.99884146179164e-06, "loss": 0.437, "step": 1747 }, { "epoch": 0.08021660318480106, "grad_norm": 0.5058325529098511, "learning_rate": 9.998836177906763e-06, "loss": 0.385, "step": 1748 }, { "epoch": 0.08026249369005553, "grad_norm": 0.45551466941833496, "learning_rate": 9.998830882001263e-06, "loss": 0.3553, "step": 1749 }, { "epoch": 0.08030838419530999, "grad_norm": 0.4666111469268799, "learning_rate": 9.998825574075154e-06, "loss": 0.3602, "step": 1750 }, { "epoch": 0.08035427470056446, "grad_norm": 0.45222869515419006, "learning_rate": 9.998820254128446e-06, "loss": 0.3608, "step": 1751 }, { "epoch": 0.08040016520581891, "grad_norm": 0.4758358299732208, "learning_rate": 9.998814922161153e-06, "loss": 0.4731, "step": 1752 }, { "epoch": 0.08044605571107338, "grad_norm": 0.45364636182785034, "learning_rate": 9.998809578173288e-06, "loss": 0.3556, "step": 1753 }, { "epoch": 0.08049194621632784, "grad_norm": 0.4644677937030792, "learning_rate": 9.998804222164864e-06, "loss": 0.3477, "step": 1754 }, { "epoch": 0.08053783672158231, "grad_norm": 0.5056809782981873, "learning_rate": 9.998798854135894e-06, "loss": 0.4897, "step": 1755 }, { "epoch": 0.08058372722683677, "grad_norm": 0.4946051836013794, "learning_rate": 9.99879347408639e-06, "loss": 0.4263, "step": 1756 }, { "epoch": 0.08062961773209124, "grad_norm": 0.4351504445075989, "learning_rate": 9.998788082016364e-06, "loss": 0.3642, "step": 1757 }, { "epoch": 0.08067550823734569, "grad_norm": 0.48808377981185913, "learning_rate": 9.998782677925832e-06, "loss": 0.398, "step": 1758 }, { "epoch": 0.08072139874260016, "grad_norm": 0.4811568558216095, "learning_rate": 9.998777261814804e-06, "loss": 0.4454, "step": 1759 }, { "epoch": 0.08076728924785462, "grad_norm": 0.46869370341300964, "learning_rate": 9.998771833683294e-06, "loss": 0.3588, "step": 1760 }, { "epoch": 0.08081317975310909, "grad_norm": 0.45088866353034973, "learning_rate": 9.998766393531318e-06, "loss": 0.3422, "step": 1761 }, { "epoch": 0.08085907025836354, "grad_norm": 0.5257999897003174, "learning_rate": 9.998760941358884e-06, "loss": 0.5018, "step": 1762 }, { "epoch": 0.08090496076361801, "grad_norm": 0.49018165469169617, "learning_rate": 9.99875547716601e-06, "loss": 0.4011, "step": 1763 }, { "epoch": 0.08095085126887247, "grad_norm": 0.47185319662094116, "learning_rate": 9.998750000952703e-06, "loss": 0.4002, "step": 1764 }, { "epoch": 0.08099674177412693, "grad_norm": 0.4556969404220581, "learning_rate": 9.998744512718984e-06, "loss": 0.3721, "step": 1765 }, { "epoch": 0.0810426322793814, "grad_norm": 0.496563583612442, "learning_rate": 9.998739012464859e-06, "loss": 0.4128, "step": 1766 }, { "epoch": 0.08108852278463585, "grad_norm": 0.4539939761161804, "learning_rate": 9.998733500190345e-06, "loss": 0.3447, "step": 1767 }, { "epoch": 0.08113441328989032, "grad_norm": 0.4825995862483978, "learning_rate": 9.998727975895454e-06, "loss": 0.4731, "step": 1768 }, { "epoch": 0.08118030379514478, "grad_norm": 0.47168925404548645, "learning_rate": 9.998722439580203e-06, "loss": 0.4643, "step": 1769 }, { "epoch": 0.08122619430039925, "grad_norm": 0.5266677141189575, "learning_rate": 9.9987168912446e-06, "loss": 0.5071, "step": 1770 }, { "epoch": 0.0812720848056537, "grad_norm": 0.4776930809020996, "learning_rate": 9.99871133088866e-06, "loss": 0.4359, "step": 1771 }, { "epoch": 0.08131797531090817, "grad_norm": 0.476874440908432, "learning_rate": 9.998705758512397e-06, "loss": 0.453, "step": 1772 }, { "epoch": 0.08136386581616263, "grad_norm": 0.48035696148872375, "learning_rate": 9.998700174115825e-06, "loss": 0.4577, "step": 1773 }, { "epoch": 0.0814097563214171, "grad_norm": 0.5677808523178101, "learning_rate": 9.998694577698957e-06, "loss": 0.3808, "step": 1774 }, { "epoch": 0.08145564682667156, "grad_norm": 0.5057392120361328, "learning_rate": 9.998688969261806e-06, "loss": 0.4445, "step": 1775 }, { "epoch": 0.08150153733192603, "grad_norm": 0.46602535247802734, "learning_rate": 9.998683348804386e-06, "loss": 0.3993, "step": 1776 }, { "epoch": 0.08154742783718048, "grad_norm": 0.5494076609611511, "learning_rate": 9.998677716326707e-06, "loss": 0.5327, "step": 1777 }, { "epoch": 0.08159331834243495, "grad_norm": 0.4570852220058441, "learning_rate": 9.99867207182879e-06, "loss": 0.3816, "step": 1778 }, { "epoch": 0.08163920884768941, "grad_norm": 0.5257952809333801, "learning_rate": 9.998666415310642e-06, "loss": 0.5145, "step": 1779 }, { "epoch": 0.08168509935294388, "grad_norm": 0.5287887454032898, "learning_rate": 9.99866074677228e-06, "loss": 0.525, "step": 1780 }, { "epoch": 0.08173098985819834, "grad_norm": 0.46291640400886536, "learning_rate": 9.998655066213716e-06, "loss": 0.339, "step": 1781 }, { "epoch": 0.0817768803634528, "grad_norm": 0.48628246784210205, "learning_rate": 9.998649373634963e-06, "loss": 0.4235, "step": 1782 }, { "epoch": 0.08182277086870726, "grad_norm": 0.46783190965652466, "learning_rate": 9.998643669036038e-06, "loss": 0.4047, "step": 1783 }, { "epoch": 0.08186866137396173, "grad_norm": 0.4377851188182831, "learning_rate": 9.998637952416952e-06, "loss": 0.3505, "step": 1784 }, { "epoch": 0.08191455187921619, "grad_norm": 0.5219147801399231, "learning_rate": 9.998632223777718e-06, "loss": 0.4853, "step": 1785 }, { "epoch": 0.08196044238447066, "grad_norm": 0.5514872670173645, "learning_rate": 9.998626483118352e-06, "loss": 0.5059, "step": 1786 }, { "epoch": 0.08200633288972511, "grad_norm": 0.48154398798942566, "learning_rate": 9.998620730438868e-06, "loss": 0.3723, "step": 1787 }, { "epoch": 0.08205222339497958, "grad_norm": 0.4764332175254822, "learning_rate": 9.998614965739278e-06, "loss": 0.4291, "step": 1788 }, { "epoch": 0.08209811390023404, "grad_norm": 0.49761006236076355, "learning_rate": 9.998609189019594e-06, "loss": 0.4565, "step": 1789 }, { "epoch": 0.08214400440548851, "grad_norm": 0.49671006202697754, "learning_rate": 9.998603400279837e-06, "loss": 0.4208, "step": 1790 }, { "epoch": 0.08218989491074297, "grad_norm": 0.47068002820014954, "learning_rate": 9.998597599520013e-06, "loss": 0.3703, "step": 1791 }, { "epoch": 0.08223578541599744, "grad_norm": 0.4687693417072296, "learning_rate": 9.998591786740142e-06, "loss": 0.4059, "step": 1792 }, { "epoch": 0.08228167592125189, "grad_norm": 0.46863317489624023, "learning_rate": 9.998585961940232e-06, "loss": 0.3568, "step": 1793 }, { "epoch": 0.08232756642650635, "grad_norm": 0.453643798828125, "learning_rate": 9.998580125120303e-06, "loss": 0.3677, "step": 1794 }, { "epoch": 0.08237345693176082, "grad_norm": 0.4481407403945923, "learning_rate": 9.998574276280367e-06, "loss": 0.3202, "step": 1795 }, { "epoch": 0.08241934743701528, "grad_norm": 0.5013203024864197, "learning_rate": 9.998568415420435e-06, "loss": 0.4275, "step": 1796 }, { "epoch": 0.08246523794226975, "grad_norm": 0.5677851438522339, "learning_rate": 9.998562542540526e-06, "loss": 0.5211, "step": 1797 }, { "epoch": 0.0825111284475242, "grad_norm": 0.47754719853401184, "learning_rate": 9.99855665764065e-06, "loss": 0.4155, "step": 1798 }, { "epoch": 0.08255701895277867, "grad_norm": 0.49698135256767273, "learning_rate": 9.998550760720823e-06, "loss": 0.391, "step": 1799 }, { "epoch": 0.08260290945803313, "grad_norm": 0.5295330286026001, "learning_rate": 9.99854485178106e-06, "loss": 0.4011, "step": 1800 }, { "epoch": 0.0826487999632876, "grad_norm": 0.4918323755264282, "learning_rate": 9.998538930821373e-06, "loss": 0.3698, "step": 1801 }, { "epoch": 0.08269469046854205, "grad_norm": 0.5112568736076355, "learning_rate": 9.998532997841778e-06, "loss": 0.425, "step": 1802 }, { "epoch": 0.08274058097379652, "grad_norm": 0.5109966993331909, "learning_rate": 9.998527052842289e-06, "loss": 0.4505, "step": 1803 }, { "epoch": 0.08278647147905098, "grad_norm": 0.5352508425712585, "learning_rate": 9.99852109582292e-06, "loss": 0.4678, "step": 1804 }, { "epoch": 0.08283236198430545, "grad_norm": 0.5146759748458862, "learning_rate": 9.998515126783685e-06, "loss": 0.4879, "step": 1805 }, { "epoch": 0.0828782524895599, "grad_norm": 0.4801282286643982, "learning_rate": 9.998509145724597e-06, "loss": 0.4034, "step": 1806 }, { "epoch": 0.08292414299481438, "grad_norm": 0.48487892746925354, "learning_rate": 9.998503152645675e-06, "loss": 0.4516, "step": 1807 }, { "epoch": 0.08297003350006883, "grad_norm": 0.5353267788887024, "learning_rate": 9.99849714754693e-06, "loss": 0.4417, "step": 1808 }, { "epoch": 0.0830159240053233, "grad_norm": 0.5028082132339478, "learning_rate": 9.998491130428376e-06, "loss": 0.4558, "step": 1809 }, { "epoch": 0.08306181451057776, "grad_norm": 0.49690622091293335, "learning_rate": 9.998485101290028e-06, "loss": 0.4304, "step": 1810 }, { "epoch": 0.08310770501583223, "grad_norm": 0.4832867980003357, "learning_rate": 9.9984790601319e-06, "loss": 0.3484, "step": 1811 }, { "epoch": 0.08315359552108668, "grad_norm": 0.46231919527053833, "learning_rate": 9.99847300695401e-06, "loss": 0.3558, "step": 1812 }, { "epoch": 0.08319948602634115, "grad_norm": 0.4444623589515686, "learning_rate": 9.99846694175637e-06, "loss": 0.3532, "step": 1813 }, { "epoch": 0.08324537653159561, "grad_norm": 0.493055135011673, "learning_rate": 9.998460864538992e-06, "loss": 0.4317, "step": 1814 }, { "epoch": 0.08329126703685008, "grad_norm": 0.47260966897010803, "learning_rate": 9.998454775301896e-06, "loss": 0.402, "step": 1815 }, { "epoch": 0.08333715754210454, "grad_norm": 0.49794045090675354, "learning_rate": 9.998448674045093e-06, "loss": 0.4521, "step": 1816 }, { "epoch": 0.083383048047359, "grad_norm": 0.47017616033554077, "learning_rate": 9.998442560768597e-06, "loss": 0.401, "step": 1817 }, { "epoch": 0.08342893855261346, "grad_norm": 0.5197916030883789, "learning_rate": 9.998436435472426e-06, "loss": 0.4634, "step": 1818 }, { "epoch": 0.08347482905786793, "grad_norm": 0.4560147821903229, "learning_rate": 9.998430298156591e-06, "loss": 0.4195, "step": 1819 }, { "epoch": 0.08352071956312239, "grad_norm": 0.49685657024383545, "learning_rate": 9.99842414882111e-06, "loss": 0.4512, "step": 1820 }, { "epoch": 0.08356661006837686, "grad_norm": 0.49072790145874023, "learning_rate": 9.998417987465998e-06, "loss": 0.4078, "step": 1821 }, { "epoch": 0.08361250057363132, "grad_norm": 0.5176905393600464, "learning_rate": 9.998411814091266e-06, "loss": 0.4949, "step": 1822 }, { "epoch": 0.08365839107888579, "grad_norm": 0.4655018150806427, "learning_rate": 9.998405628696931e-06, "loss": 0.3251, "step": 1823 }, { "epoch": 0.08370428158414024, "grad_norm": 0.48832565546035767, "learning_rate": 9.99839943128301e-06, "loss": 0.4302, "step": 1824 }, { "epoch": 0.0837501720893947, "grad_norm": 0.5239647030830383, "learning_rate": 9.998393221849514e-06, "loss": 0.4541, "step": 1825 }, { "epoch": 0.08379606259464917, "grad_norm": 0.5271238684654236, "learning_rate": 9.99838700039646e-06, "loss": 0.5473, "step": 1826 }, { "epoch": 0.08384195309990362, "grad_norm": 0.47936439514160156, "learning_rate": 9.998380766923864e-06, "loss": 0.4519, "step": 1827 }, { "epoch": 0.0838878436051581, "grad_norm": 0.4982088506221771, "learning_rate": 9.998374521431739e-06, "loss": 0.3711, "step": 1828 }, { "epoch": 0.08393373411041255, "grad_norm": 0.5050623416900635, "learning_rate": 9.9983682639201e-06, "loss": 0.402, "step": 1829 }, { "epoch": 0.08397962461566702, "grad_norm": 0.42724063992500305, "learning_rate": 9.998361994388964e-06, "loss": 0.358, "step": 1830 }, { "epoch": 0.08402551512092148, "grad_norm": 0.4541013836860657, "learning_rate": 9.998355712838345e-06, "loss": 0.3718, "step": 1831 }, { "epoch": 0.08407140562617595, "grad_norm": 0.42599621415138245, "learning_rate": 9.998349419268257e-06, "loss": 0.3117, "step": 1832 }, { "epoch": 0.0841172961314304, "grad_norm": 0.5127098560333252, "learning_rate": 9.998343113678716e-06, "loss": 0.4095, "step": 1833 }, { "epoch": 0.08416318663668487, "grad_norm": 0.5799052119255066, "learning_rate": 9.99833679606974e-06, "loss": 0.4023, "step": 1834 }, { "epoch": 0.08420907714193933, "grad_norm": 0.5320877432823181, "learning_rate": 9.99833046644134e-06, "loss": 0.458, "step": 1835 }, { "epoch": 0.0842549676471938, "grad_norm": 0.5162080526351929, "learning_rate": 9.998324124793531e-06, "loss": 0.5065, "step": 1836 }, { "epoch": 0.08430085815244825, "grad_norm": 0.5115873217582703, "learning_rate": 9.998317771126332e-06, "loss": 0.4371, "step": 1837 }, { "epoch": 0.08434674865770272, "grad_norm": 0.5874608159065247, "learning_rate": 9.998311405439756e-06, "loss": 0.4531, "step": 1838 }, { "epoch": 0.08439263916295718, "grad_norm": 0.4815157651901245, "learning_rate": 9.99830502773382e-06, "loss": 0.429, "step": 1839 }, { "epoch": 0.08443852966821165, "grad_norm": 0.48297929763793945, "learning_rate": 9.998298638008535e-06, "loss": 0.4392, "step": 1840 }, { "epoch": 0.08448442017346611, "grad_norm": 0.4752102792263031, "learning_rate": 9.99829223626392e-06, "loss": 0.3882, "step": 1841 }, { "epoch": 0.08453031067872058, "grad_norm": 0.4992143213748932, "learning_rate": 9.998285822499991e-06, "loss": 0.3998, "step": 1842 }, { "epoch": 0.08457620118397503, "grad_norm": 0.4586814343929291, "learning_rate": 9.99827939671676e-06, "loss": 0.4086, "step": 1843 }, { "epoch": 0.0846220916892295, "grad_norm": 0.46860653162002563, "learning_rate": 9.998272958914245e-06, "loss": 0.4088, "step": 1844 }, { "epoch": 0.08466798219448396, "grad_norm": 0.45195892453193665, "learning_rate": 9.998266509092464e-06, "loss": 0.342, "step": 1845 }, { "epoch": 0.08471387269973843, "grad_norm": 0.5009403228759766, "learning_rate": 9.998260047251427e-06, "loss": 0.4964, "step": 1846 }, { "epoch": 0.08475976320499289, "grad_norm": 0.5115389227867126, "learning_rate": 9.998253573391153e-06, "loss": 0.4795, "step": 1847 }, { "epoch": 0.08480565371024736, "grad_norm": 0.5265485644340515, "learning_rate": 9.998247087511655e-06, "loss": 0.5418, "step": 1848 }, { "epoch": 0.08485154421550181, "grad_norm": 0.4805338680744171, "learning_rate": 9.998240589612951e-06, "loss": 0.3822, "step": 1849 }, { "epoch": 0.08489743472075628, "grad_norm": 0.614960253238678, "learning_rate": 9.998234079695056e-06, "loss": 0.4183, "step": 1850 }, { "epoch": 0.08494332522601074, "grad_norm": 0.45898082852363586, "learning_rate": 9.998227557757986e-06, "loss": 0.32, "step": 1851 }, { "epoch": 0.08498921573126521, "grad_norm": 0.48527857661247253, "learning_rate": 9.998221023801757e-06, "loss": 0.4043, "step": 1852 }, { "epoch": 0.08503510623651966, "grad_norm": 0.4501403272151947, "learning_rate": 9.998214477826382e-06, "loss": 0.3178, "step": 1853 }, { "epoch": 0.08508099674177412, "grad_norm": 0.4642013609409332, "learning_rate": 9.998207919831881e-06, "loss": 0.3559, "step": 1854 }, { "epoch": 0.08512688724702859, "grad_norm": 0.5030194520950317, "learning_rate": 9.998201349818266e-06, "loss": 0.4838, "step": 1855 }, { "epoch": 0.08517277775228305, "grad_norm": 0.7251027226448059, "learning_rate": 9.998194767785553e-06, "loss": 0.4935, "step": 1856 }, { "epoch": 0.08521866825753752, "grad_norm": 0.4636625647544861, "learning_rate": 9.99818817373376e-06, "loss": 0.3633, "step": 1857 }, { "epoch": 0.08526455876279197, "grad_norm": 0.48684799671173096, "learning_rate": 9.998181567662904e-06, "loss": 0.4078, "step": 1858 }, { "epoch": 0.08531044926804644, "grad_norm": 0.5248304605484009, "learning_rate": 9.998174949572997e-06, "loss": 0.4351, "step": 1859 }, { "epoch": 0.0853563397733009, "grad_norm": 0.4800574779510498, "learning_rate": 9.998168319464057e-06, "loss": 0.4359, "step": 1860 }, { "epoch": 0.08540223027855537, "grad_norm": 0.436203271150589, "learning_rate": 9.998161677336098e-06, "loss": 0.3411, "step": 1861 }, { "epoch": 0.08544812078380983, "grad_norm": 0.5083044767379761, "learning_rate": 9.99815502318914e-06, "loss": 0.4054, "step": 1862 }, { "epoch": 0.0854940112890643, "grad_norm": 0.4430050849914551, "learning_rate": 9.998148357023196e-06, "loss": 0.3311, "step": 1863 }, { "epoch": 0.08553990179431875, "grad_norm": 0.543315052986145, "learning_rate": 9.998141678838283e-06, "loss": 0.497, "step": 1864 }, { "epoch": 0.08558579229957322, "grad_norm": 0.5219607353210449, "learning_rate": 9.998134988634414e-06, "loss": 0.5208, "step": 1865 }, { "epoch": 0.08563168280482768, "grad_norm": 0.4644080698490143, "learning_rate": 9.998128286411611e-06, "loss": 0.3786, "step": 1866 }, { "epoch": 0.08567757331008215, "grad_norm": 0.4564545750617981, "learning_rate": 9.998121572169886e-06, "loss": 0.3495, "step": 1867 }, { "epoch": 0.0857234638153366, "grad_norm": 0.4753549098968506, "learning_rate": 9.998114845909255e-06, "loss": 0.3941, "step": 1868 }, { "epoch": 0.08576935432059107, "grad_norm": 0.42606431245803833, "learning_rate": 9.998108107629736e-06, "loss": 0.3202, "step": 1869 }, { "epoch": 0.08581524482584553, "grad_norm": 0.4720016419887543, "learning_rate": 9.998101357331345e-06, "loss": 0.4031, "step": 1870 }, { "epoch": 0.0858611353311, "grad_norm": 0.5052058100700378, "learning_rate": 9.998094595014098e-06, "loss": 0.4904, "step": 1871 }, { "epoch": 0.08590702583635446, "grad_norm": 0.5914888978004456, "learning_rate": 9.99808782067801e-06, "loss": 0.3381, "step": 1872 }, { "epoch": 0.08595291634160893, "grad_norm": 0.4664536714553833, "learning_rate": 9.998081034323098e-06, "loss": 0.4378, "step": 1873 }, { "epoch": 0.08599880684686338, "grad_norm": 0.4502689838409424, "learning_rate": 9.99807423594938e-06, "loss": 0.3745, "step": 1874 }, { "epoch": 0.08604469735211785, "grad_norm": 0.4514409303665161, "learning_rate": 9.998067425556869e-06, "loss": 0.3836, "step": 1875 }, { "epoch": 0.08609058785737231, "grad_norm": 0.509328305721283, "learning_rate": 9.998060603145584e-06, "loss": 0.5113, "step": 1876 }, { "epoch": 0.08613647836262678, "grad_norm": 0.4698704779148102, "learning_rate": 9.99805376871554e-06, "loss": 0.3526, "step": 1877 }, { "epoch": 0.08618236886788123, "grad_norm": 0.5010122656822205, "learning_rate": 9.998046922266755e-06, "loss": 0.4637, "step": 1878 }, { "epoch": 0.0862282593731357, "grad_norm": 0.5294129848480225, "learning_rate": 9.998040063799246e-06, "loss": 0.5403, "step": 1879 }, { "epoch": 0.08627414987839016, "grad_norm": 0.5025519728660583, "learning_rate": 9.998033193313026e-06, "loss": 0.5209, "step": 1880 }, { "epoch": 0.08632004038364463, "grad_norm": 0.5287100076675415, "learning_rate": 9.998026310808114e-06, "loss": 0.4951, "step": 1881 }, { "epoch": 0.08636593088889909, "grad_norm": 0.4639873802661896, "learning_rate": 9.998019416284527e-06, "loss": 0.4193, "step": 1882 }, { "epoch": 0.08641182139415354, "grad_norm": 0.5831707119941711, "learning_rate": 9.99801250974228e-06, "loss": 0.5465, "step": 1883 }, { "epoch": 0.08645771189940801, "grad_norm": 0.46959373354911804, "learning_rate": 9.99800559118139e-06, "loss": 0.3839, "step": 1884 }, { "epoch": 0.08650360240466247, "grad_norm": 0.4728989005088806, "learning_rate": 9.997998660601876e-06, "loss": 0.4195, "step": 1885 }, { "epoch": 0.08654949290991694, "grad_norm": 0.4978037178516388, "learning_rate": 9.99799171800375e-06, "loss": 0.4834, "step": 1886 }, { "epoch": 0.0865953834151714, "grad_norm": 0.5176402926445007, "learning_rate": 9.997984763387032e-06, "loss": 0.4728, "step": 1887 }, { "epoch": 0.08664127392042587, "grad_norm": 0.4842921495437622, "learning_rate": 9.997977796751739e-06, "loss": 0.4632, "step": 1888 }, { "epoch": 0.08668716442568032, "grad_norm": 0.4526640474796295, "learning_rate": 9.997970818097886e-06, "loss": 0.374, "step": 1889 }, { "epoch": 0.08673305493093479, "grad_norm": 0.5068642497062683, "learning_rate": 9.99796382742549e-06, "loss": 0.4516, "step": 1890 }, { "epoch": 0.08677894543618925, "grad_norm": 0.4842502176761627, "learning_rate": 9.997956824734569e-06, "loss": 0.4543, "step": 1891 }, { "epoch": 0.08682483594144372, "grad_norm": 0.4825424253940582, "learning_rate": 9.99794981002514e-06, "loss": 0.4322, "step": 1892 }, { "epoch": 0.08687072644669817, "grad_norm": 0.4785364866256714, "learning_rate": 9.997942783297219e-06, "loss": 0.4509, "step": 1893 }, { "epoch": 0.08691661695195264, "grad_norm": 0.47149842977523804, "learning_rate": 9.997935744550823e-06, "loss": 0.396, "step": 1894 }, { "epoch": 0.0869625074572071, "grad_norm": 0.51544189453125, "learning_rate": 9.997928693785967e-06, "loss": 0.5019, "step": 1895 }, { "epoch": 0.08700839796246157, "grad_norm": 0.4829908609390259, "learning_rate": 9.997921631002671e-06, "loss": 0.479, "step": 1896 }, { "epoch": 0.08705428846771603, "grad_norm": 0.48497772216796875, "learning_rate": 9.997914556200952e-06, "loss": 0.3736, "step": 1897 }, { "epoch": 0.0871001789729705, "grad_norm": 0.5008928775787354, "learning_rate": 9.997907469380825e-06, "loss": 0.416, "step": 1898 }, { "epoch": 0.08714606947822495, "grad_norm": 0.525834858417511, "learning_rate": 9.997900370542308e-06, "loss": 0.4766, "step": 1899 }, { "epoch": 0.08719195998347942, "grad_norm": 0.5160179734230042, "learning_rate": 9.997893259685418e-06, "loss": 0.5139, "step": 1900 }, { "epoch": 0.08723785048873388, "grad_norm": 0.5106474161148071, "learning_rate": 9.997886136810173e-06, "loss": 0.4794, "step": 1901 }, { "epoch": 0.08728374099398835, "grad_norm": 0.45013463497161865, "learning_rate": 9.997879001916589e-06, "loss": 0.3253, "step": 1902 }, { "epoch": 0.0873296314992428, "grad_norm": 0.4547373056411743, "learning_rate": 9.997871855004681e-06, "loss": 0.3554, "step": 1903 }, { "epoch": 0.08737552200449727, "grad_norm": 0.5522216558456421, "learning_rate": 9.99786469607447e-06, "loss": 0.4373, "step": 1904 }, { "epoch": 0.08742141250975173, "grad_norm": 0.5390852093696594, "learning_rate": 9.997857525125974e-06, "loss": 0.4526, "step": 1905 }, { "epoch": 0.0874673030150062, "grad_norm": 0.4166499674320221, "learning_rate": 9.997850342159206e-06, "loss": 0.3042, "step": 1906 }, { "epoch": 0.08751319352026066, "grad_norm": 0.5154944658279419, "learning_rate": 9.997843147174184e-06, "loss": 0.4139, "step": 1907 }, { "epoch": 0.08755908402551513, "grad_norm": 0.4787823259830475, "learning_rate": 9.997835940170928e-06, "loss": 0.4075, "step": 1908 }, { "epoch": 0.08760497453076958, "grad_norm": 0.48952722549438477, "learning_rate": 9.997828721149454e-06, "loss": 0.414, "step": 1909 }, { "epoch": 0.08765086503602405, "grad_norm": 0.517336368560791, "learning_rate": 9.997821490109779e-06, "loss": 0.4964, "step": 1910 }, { "epoch": 0.08769675554127851, "grad_norm": 0.47285598516464233, "learning_rate": 9.99781424705192e-06, "loss": 0.4142, "step": 1911 }, { "epoch": 0.08774264604653297, "grad_norm": 0.4789881110191345, "learning_rate": 9.997806991975896e-06, "loss": 0.4791, "step": 1912 }, { "epoch": 0.08778853655178744, "grad_norm": 0.44119635224342346, "learning_rate": 9.997799724881725e-06, "loss": 0.336, "step": 1913 }, { "epoch": 0.08783442705704189, "grad_norm": 0.48285871744155884, "learning_rate": 9.99779244576942e-06, "loss": 0.4268, "step": 1914 }, { "epoch": 0.08788031756229636, "grad_norm": 0.4547489881515503, "learning_rate": 9.997785154639004e-06, "loss": 0.3245, "step": 1915 }, { "epoch": 0.08792620806755082, "grad_norm": 0.5265078544616699, "learning_rate": 9.997777851490492e-06, "loss": 0.3546, "step": 1916 }, { "epoch": 0.08797209857280529, "grad_norm": 0.4521884322166443, "learning_rate": 9.9977705363239e-06, "loss": 0.3176, "step": 1917 }, { "epoch": 0.08801798907805974, "grad_norm": 0.47659745812416077, "learning_rate": 9.997763209139248e-06, "loss": 0.3128, "step": 1918 }, { "epoch": 0.08806387958331421, "grad_norm": 0.45969158411026, "learning_rate": 9.997755869936552e-06, "loss": 0.4127, "step": 1919 }, { "epoch": 0.08810977008856867, "grad_norm": 0.46879997849464417, "learning_rate": 9.99774851871583e-06, "loss": 0.3375, "step": 1920 }, { "epoch": 0.08815566059382314, "grad_norm": 0.4860617220401764, "learning_rate": 9.997741155477103e-06, "loss": 0.4345, "step": 1921 }, { "epoch": 0.0882015510990776, "grad_norm": 0.4547886252403259, "learning_rate": 9.997733780220384e-06, "loss": 0.3451, "step": 1922 }, { "epoch": 0.08824744160433207, "grad_norm": 0.5249549150466919, "learning_rate": 9.997726392945693e-06, "loss": 0.5316, "step": 1923 }, { "epoch": 0.08829333210958652, "grad_norm": 0.5182896256446838, "learning_rate": 9.997718993653048e-06, "loss": 0.5236, "step": 1924 }, { "epoch": 0.08833922261484099, "grad_norm": 0.6074200868606567, "learning_rate": 9.997711582342464e-06, "loss": 0.4571, "step": 1925 }, { "epoch": 0.08838511312009545, "grad_norm": 0.4656011760234833, "learning_rate": 9.997704159013961e-06, "loss": 0.3757, "step": 1926 }, { "epoch": 0.08843100362534992, "grad_norm": 0.46809640526771545, "learning_rate": 9.997696723667559e-06, "loss": 0.4061, "step": 1927 }, { "epoch": 0.08847689413060437, "grad_norm": 0.46747127175331116, "learning_rate": 9.997689276303273e-06, "loss": 0.4115, "step": 1928 }, { "epoch": 0.08852278463585884, "grad_norm": 0.56339430809021, "learning_rate": 9.997681816921121e-06, "loss": 0.5307, "step": 1929 }, { "epoch": 0.0885686751411133, "grad_norm": 0.4788985848426819, "learning_rate": 9.997674345521122e-06, "loss": 0.4362, "step": 1930 }, { "epoch": 0.08861456564636777, "grad_norm": 0.48429176211357117, "learning_rate": 9.997666862103294e-06, "loss": 0.4138, "step": 1931 }, { "epoch": 0.08866045615162223, "grad_norm": 0.42795562744140625, "learning_rate": 9.997659366667653e-06, "loss": 0.3495, "step": 1932 }, { "epoch": 0.0887063466568767, "grad_norm": 0.5259376764297485, "learning_rate": 9.99765185921422e-06, "loss": 0.4898, "step": 1933 }, { "epoch": 0.08875223716213115, "grad_norm": 0.5304051637649536, "learning_rate": 9.997644339743012e-06, "loss": 0.4647, "step": 1934 }, { "epoch": 0.08879812766738562, "grad_norm": 0.5148341655731201, "learning_rate": 9.997636808254045e-06, "loss": 0.495, "step": 1935 }, { "epoch": 0.08884401817264008, "grad_norm": 0.4492471218109131, "learning_rate": 9.997629264747339e-06, "loss": 0.4242, "step": 1936 }, { "epoch": 0.08888990867789455, "grad_norm": 0.48096388578414917, "learning_rate": 9.997621709222912e-06, "loss": 0.3927, "step": 1937 }, { "epoch": 0.088935799183149, "grad_norm": 0.5156227946281433, "learning_rate": 9.997614141680783e-06, "loss": 0.4571, "step": 1938 }, { "epoch": 0.08898168968840348, "grad_norm": 0.45808881521224976, "learning_rate": 9.997606562120968e-06, "loss": 0.3624, "step": 1939 }, { "epoch": 0.08902758019365793, "grad_norm": 0.5129602551460266, "learning_rate": 9.997598970543487e-06, "loss": 0.4317, "step": 1940 }, { "epoch": 0.08907347069891239, "grad_norm": 0.4733436107635498, "learning_rate": 9.997591366948358e-06, "loss": 0.4045, "step": 1941 }, { "epoch": 0.08911936120416686, "grad_norm": 0.45796629786491394, "learning_rate": 9.9975837513356e-06, "loss": 0.4217, "step": 1942 }, { "epoch": 0.08916525170942131, "grad_norm": 0.4778556823730469, "learning_rate": 9.997576123705228e-06, "loss": 0.3812, "step": 1943 }, { "epoch": 0.08921114221467578, "grad_norm": 0.4669647812843323, "learning_rate": 9.997568484057266e-06, "loss": 0.3546, "step": 1944 }, { "epoch": 0.08925703271993024, "grad_norm": 0.4887997508049011, "learning_rate": 9.997560832391726e-06, "loss": 0.4292, "step": 1945 }, { "epoch": 0.08930292322518471, "grad_norm": 0.48549383878707886, "learning_rate": 9.997553168708631e-06, "loss": 0.393, "step": 1946 }, { "epoch": 0.08934881373043917, "grad_norm": 0.48522454500198364, "learning_rate": 9.997545493007997e-06, "loss": 0.482, "step": 1947 }, { "epoch": 0.08939470423569364, "grad_norm": 0.5656195878982544, "learning_rate": 9.997537805289844e-06, "loss": 0.6164, "step": 1948 }, { "epoch": 0.08944059474094809, "grad_norm": 0.47381073236465454, "learning_rate": 9.997530105554189e-06, "loss": 0.3774, "step": 1949 }, { "epoch": 0.08948648524620256, "grad_norm": 0.4426414966583252, "learning_rate": 9.997522393801054e-06, "loss": 0.3824, "step": 1950 }, { "epoch": 0.08953237575145702, "grad_norm": 0.4970719516277313, "learning_rate": 9.997514670030453e-06, "loss": 0.497, "step": 1951 }, { "epoch": 0.08957826625671149, "grad_norm": 0.4999612867832184, "learning_rate": 9.997506934242406e-06, "loss": 0.4459, "step": 1952 }, { "epoch": 0.08962415676196595, "grad_norm": 0.522359311580658, "learning_rate": 9.997499186436932e-06, "loss": 0.5015, "step": 1953 }, { "epoch": 0.08967004726722042, "grad_norm": 0.482906699180603, "learning_rate": 9.99749142661405e-06, "loss": 0.429, "step": 1954 }, { "epoch": 0.08971593777247487, "grad_norm": 0.47550684213638306, "learning_rate": 9.997483654773779e-06, "loss": 0.3637, "step": 1955 }, { "epoch": 0.08976182827772934, "grad_norm": 0.45095688104629517, "learning_rate": 9.997475870916138e-06, "loss": 0.3617, "step": 1956 }, { "epoch": 0.0898077187829838, "grad_norm": 0.5475599765777588, "learning_rate": 9.997468075041142e-06, "loss": 0.5413, "step": 1957 }, { "epoch": 0.08985360928823827, "grad_norm": 0.4633029103279114, "learning_rate": 9.997460267148816e-06, "loss": 0.4015, "step": 1958 }, { "epoch": 0.08989949979349272, "grad_norm": 0.46834853291511536, "learning_rate": 9.997452447239172e-06, "loss": 0.4489, "step": 1959 }, { "epoch": 0.0899453902987472, "grad_norm": 0.5147649645805359, "learning_rate": 9.997444615312233e-06, "loss": 0.4254, "step": 1960 }, { "epoch": 0.08999128080400165, "grad_norm": 0.4903944432735443, "learning_rate": 9.997436771368018e-06, "loss": 0.4514, "step": 1961 }, { "epoch": 0.09003717130925612, "grad_norm": 0.5154668092727661, "learning_rate": 9.997428915406545e-06, "loss": 0.3971, "step": 1962 }, { "epoch": 0.09008306181451058, "grad_norm": 0.5075410008430481, "learning_rate": 9.99742104742783e-06, "loss": 0.4101, "step": 1963 }, { "epoch": 0.09012895231976505, "grad_norm": 0.49641481041908264, "learning_rate": 9.997413167431897e-06, "loss": 0.4142, "step": 1964 }, { "epoch": 0.0901748428250195, "grad_norm": 0.4650253355503082, "learning_rate": 9.997405275418762e-06, "loss": 0.4014, "step": 1965 }, { "epoch": 0.09022073333027397, "grad_norm": 0.4805956482887268, "learning_rate": 9.997397371388443e-06, "loss": 0.3422, "step": 1966 }, { "epoch": 0.09026662383552843, "grad_norm": 0.5101807713508606, "learning_rate": 9.997389455340965e-06, "loss": 0.436, "step": 1967 }, { "epoch": 0.0903125143407829, "grad_norm": 0.5229566097259521, "learning_rate": 9.99738152727634e-06, "loss": 0.4953, "step": 1968 }, { "epoch": 0.09035840484603735, "grad_norm": 0.4792730212211609, "learning_rate": 9.997373587194587e-06, "loss": 0.4073, "step": 1969 }, { "epoch": 0.09040429535129182, "grad_norm": 0.519981324672699, "learning_rate": 9.997365635095732e-06, "loss": 0.3603, "step": 1970 }, { "epoch": 0.09045018585654628, "grad_norm": 0.5242164731025696, "learning_rate": 9.997357670979788e-06, "loss": 0.5586, "step": 1971 }, { "epoch": 0.09049607636180074, "grad_norm": 0.48712316155433655, "learning_rate": 9.997349694846775e-06, "loss": 0.4531, "step": 1972 }, { "epoch": 0.0905419668670552, "grad_norm": 0.49038171768188477, "learning_rate": 9.997341706696715e-06, "loss": 0.5264, "step": 1973 }, { "epoch": 0.09058785737230966, "grad_norm": 0.45785385370254517, "learning_rate": 9.997333706529624e-06, "loss": 0.3475, "step": 1974 }, { "epoch": 0.09063374787756413, "grad_norm": 0.46331408619880676, "learning_rate": 9.997325694345523e-06, "loss": 0.4144, "step": 1975 }, { "epoch": 0.09067963838281859, "grad_norm": 0.4868534207344055, "learning_rate": 9.997317670144431e-06, "loss": 0.4183, "step": 1976 }, { "epoch": 0.09072552888807306, "grad_norm": 0.4846562147140503, "learning_rate": 9.997309633926367e-06, "loss": 0.4039, "step": 1977 }, { "epoch": 0.09077141939332752, "grad_norm": 0.5201089978218079, "learning_rate": 9.997301585691351e-06, "loss": 0.5501, "step": 1978 }, { "epoch": 0.09081730989858199, "grad_norm": 0.5357211232185364, "learning_rate": 9.997293525439402e-06, "loss": 0.4765, "step": 1979 }, { "epoch": 0.09086320040383644, "grad_norm": 0.4953075051307678, "learning_rate": 9.99728545317054e-06, "loss": 0.411, "step": 1980 }, { "epoch": 0.09090909090909091, "grad_norm": 0.4356902539730072, "learning_rate": 9.997277368884782e-06, "loss": 0.3312, "step": 1981 }, { "epoch": 0.09095498141434537, "grad_norm": 0.49398893117904663, "learning_rate": 9.99726927258215e-06, "loss": 0.4406, "step": 1982 }, { "epoch": 0.09100087191959984, "grad_norm": 0.5011243224143982, "learning_rate": 9.997261164262662e-06, "loss": 0.4356, "step": 1983 }, { "epoch": 0.0910467624248543, "grad_norm": 0.4967057704925537, "learning_rate": 9.997253043926339e-06, "loss": 0.4436, "step": 1984 }, { "epoch": 0.09109265293010876, "grad_norm": 0.44922012090682983, "learning_rate": 9.997244911573199e-06, "loss": 0.3491, "step": 1985 }, { "epoch": 0.09113854343536322, "grad_norm": 0.5510815978050232, "learning_rate": 9.997236767203261e-06, "loss": 0.5051, "step": 1986 }, { "epoch": 0.09118443394061769, "grad_norm": 0.4843643009662628, "learning_rate": 9.997228610816547e-06, "loss": 0.3963, "step": 1987 }, { "epoch": 0.09123032444587215, "grad_norm": 0.5763829946517944, "learning_rate": 9.997220442413075e-06, "loss": 0.4622, "step": 1988 }, { "epoch": 0.09127621495112662, "grad_norm": 0.5644519329071045, "learning_rate": 9.997212261992865e-06, "loss": 0.562, "step": 1989 }, { "epoch": 0.09132210545638107, "grad_norm": 0.4677918255329132, "learning_rate": 9.997204069555939e-06, "loss": 0.3484, "step": 1990 }, { "epoch": 0.09136799596163554, "grad_norm": 0.5098165273666382, "learning_rate": 9.997195865102311e-06, "loss": 0.361, "step": 1991 }, { "epoch": 0.09141388646689, "grad_norm": 0.6736876368522644, "learning_rate": 9.997187648632006e-06, "loss": 0.4992, "step": 1992 }, { "epoch": 0.09145977697214447, "grad_norm": 0.440369576215744, "learning_rate": 9.997179420145043e-06, "loss": 0.3183, "step": 1993 }, { "epoch": 0.09150566747739892, "grad_norm": 0.4916366934776306, "learning_rate": 9.997171179641438e-06, "loss": 0.3439, "step": 1994 }, { "epoch": 0.0915515579826534, "grad_norm": 0.5069000124931335, "learning_rate": 9.997162927121213e-06, "loss": 0.4629, "step": 1995 }, { "epoch": 0.09159744848790785, "grad_norm": 0.4618993401527405, "learning_rate": 9.99715466258439e-06, "loss": 0.3542, "step": 1996 }, { "epoch": 0.09164333899316232, "grad_norm": 0.5310156345367432, "learning_rate": 9.997146386030989e-06, "loss": 0.5221, "step": 1997 }, { "epoch": 0.09168922949841678, "grad_norm": 0.5467902421951294, "learning_rate": 9.997138097461026e-06, "loss": 0.4811, "step": 1998 }, { "epoch": 0.09173512000367125, "grad_norm": 0.5695247650146484, "learning_rate": 9.997129796874521e-06, "loss": 0.4704, "step": 1999 }, { "epoch": 0.0917810105089257, "grad_norm": 0.45439305901527405, "learning_rate": 9.9971214842715e-06, "loss": 0.372, "step": 2000 }, { "epoch": 0.09182690101418016, "grad_norm": 0.5147867798805237, "learning_rate": 9.997113159651977e-06, "loss": 0.4309, "step": 2001 }, { "epoch": 0.09187279151943463, "grad_norm": 0.452332466840744, "learning_rate": 9.997104823015972e-06, "loss": 0.3857, "step": 2002 }, { "epoch": 0.09191868202468909, "grad_norm": 0.4936886429786682, "learning_rate": 9.99709647436351e-06, "loss": 0.442, "step": 2003 }, { "epoch": 0.09196457252994356, "grad_norm": 0.5347589254379272, "learning_rate": 9.997088113694607e-06, "loss": 0.4539, "step": 2004 }, { "epoch": 0.09201046303519801, "grad_norm": 0.5151339173316956, "learning_rate": 9.997079741009285e-06, "loss": 0.4736, "step": 2005 }, { "epoch": 0.09205635354045248, "grad_norm": 0.5258619785308838, "learning_rate": 9.997071356307562e-06, "loss": 0.4598, "step": 2006 }, { "epoch": 0.09210224404570694, "grad_norm": 0.45975634455680847, "learning_rate": 9.99706295958946e-06, "loss": 0.3403, "step": 2007 }, { "epoch": 0.09214813455096141, "grad_norm": 0.47857576608657837, "learning_rate": 9.997054550855e-06, "loss": 0.4264, "step": 2008 }, { "epoch": 0.09219402505621586, "grad_norm": 0.6320284605026245, "learning_rate": 9.9970461301042e-06, "loss": 0.5996, "step": 2009 }, { "epoch": 0.09223991556147033, "grad_norm": 0.46689409017562866, "learning_rate": 9.99703769733708e-06, "loss": 0.3845, "step": 2010 }, { "epoch": 0.09228580606672479, "grad_norm": 0.4759824573993683, "learning_rate": 9.997029252553663e-06, "loss": 0.417, "step": 2011 }, { "epoch": 0.09233169657197926, "grad_norm": 0.4591982066631317, "learning_rate": 9.997020795753966e-06, "loss": 0.3989, "step": 2012 }, { "epoch": 0.09237758707723372, "grad_norm": 0.4557945132255554, "learning_rate": 9.997012326938013e-06, "loss": 0.389, "step": 2013 }, { "epoch": 0.09242347758248819, "grad_norm": 0.49194812774658203, "learning_rate": 9.997003846105821e-06, "loss": 0.4788, "step": 2014 }, { "epoch": 0.09246936808774264, "grad_norm": 0.4524982273578644, "learning_rate": 9.996995353257414e-06, "loss": 0.3415, "step": 2015 }, { "epoch": 0.09251525859299711, "grad_norm": 0.48286381363868713, "learning_rate": 9.996986848392809e-06, "loss": 0.4012, "step": 2016 }, { "epoch": 0.09256114909825157, "grad_norm": 0.47071242332458496, "learning_rate": 9.996978331512027e-06, "loss": 0.3686, "step": 2017 }, { "epoch": 0.09260703960350604, "grad_norm": 0.49137750267982483, "learning_rate": 9.99696980261509e-06, "loss": 0.4518, "step": 2018 }, { "epoch": 0.0926529301087605, "grad_norm": 0.45813873410224915, "learning_rate": 9.996961261702018e-06, "loss": 0.3902, "step": 2019 }, { "epoch": 0.09269882061401497, "grad_norm": 0.5015468597412109, "learning_rate": 9.99695270877283e-06, "loss": 0.4394, "step": 2020 }, { "epoch": 0.09274471111926942, "grad_norm": 0.5339084267616272, "learning_rate": 9.99694414382755e-06, "loss": 0.5767, "step": 2021 }, { "epoch": 0.09279060162452389, "grad_norm": 0.5212705135345459, "learning_rate": 9.996935566866193e-06, "loss": 0.4934, "step": 2022 }, { "epoch": 0.09283649212977835, "grad_norm": 0.47062548995018005, "learning_rate": 9.996926977888786e-06, "loss": 0.4076, "step": 2023 }, { "epoch": 0.09288238263503282, "grad_norm": 0.4327714443206787, "learning_rate": 9.996918376895347e-06, "loss": 0.3372, "step": 2024 }, { "epoch": 0.09292827314028727, "grad_norm": 0.4736381471157074, "learning_rate": 9.996909763885893e-06, "loss": 0.4838, "step": 2025 }, { "epoch": 0.09297416364554174, "grad_norm": 0.46666479110717773, "learning_rate": 9.99690113886045e-06, "loss": 0.3819, "step": 2026 }, { "epoch": 0.0930200541507962, "grad_norm": 0.4831838309764862, "learning_rate": 9.996892501819037e-06, "loss": 0.4729, "step": 2027 }, { "epoch": 0.09306594465605067, "grad_norm": 0.45430436730384827, "learning_rate": 9.996883852761676e-06, "loss": 0.3528, "step": 2028 }, { "epoch": 0.09311183516130513, "grad_norm": 0.5162645578384399, "learning_rate": 9.996875191688385e-06, "loss": 0.4576, "step": 2029 }, { "epoch": 0.09315772566655958, "grad_norm": 0.48196762800216675, "learning_rate": 9.996866518599186e-06, "loss": 0.4552, "step": 2030 }, { "epoch": 0.09320361617181405, "grad_norm": 0.4665124714374542, "learning_rate": 9.9968578334941e-06, "loss": 0.4446, "step": 2031 }, { "epoch": 0.09324950667706851, "grad_norm": 0.45105865597724915, "learning_rate": 9.996849136373148e-06, "loss": 0.3795, "step": 2032 }, { "epoch": 0.09329539718232298, "grad_norm": 0.4254264533519745, "learning_rate": 9.996840427236351e-06, "loss": 0.3304, "step": 2033 }, { "epoch": 0.09334128768757743, "grad_norm": 0.46485844254493713, "learning_rate": 9.996831706083731e-06, "loss": 0.4124, "step": 2034 }, { "epoch": 0.0933871781928319, "grad_norm": 0.4854772388935089, "learning_rate": 9.996822972915305e-06, "loss": 0.4252, "step": 2035 }, { "epoch": 0.09343306869808636, "grad_norm": 0.47920531034469604, "learning_rate": 9.9968142277311e-06, "loss": 0.486, "step": 2036 }, { "epoch": 0.09347895920334083, "grad_norm": 0.47636234760284424, "learning_rate": 9.996805470531133e-06, "loss": 0.4332, "step": 2037 }, { "epoch": 0.09352484970859529, "grad_norm": 0.46059149503707886, "learning_rate": 9.996796701315424e-06, "loss": 0.4391, "step": 2038 }, { "epoch": 0.09357074021384976, "grad_norm": 0.4492204487323761, "learning_rate": 9.996787920083998e-06, "loss": 0.3421, "step": 2039 }, { "epoch": 0.09361663071910421, "grad_norm": 0.5381326675415039, "learning_rate": 9.996779126836873e-06, "loss": 0.4947, "step": 2040 }, { "epoch": 0.09366252122435868, "grad_norm": 0.6390119194984436, "learning_rate": 9.996770321574071e-06, "loss": 0.358, "step": 2041 }, { "epoch": 0.09370841172961314, "grad_norm": 0.4528900682926178, "learning_rate": 9.996761504295614e-06, "loss": 0.4001, "step": 2042 }, { "epoch": 0.09375430223486761, "grad_norm": 0.49994057416915894, "learning_rate": 9.996752675001524e-06, "loss": 0.3863, "step": 2043 }, { "epoch": 0.09380019274012207, "grad_norm": 0.46004506945610046, "learning_rate": 9.996743833691817e-06, "loss": 0.3099, "step": 2044 }, { "epoch": 0.09384608324537654, "grad_norm": 0.48346641659736633, "learning_rate": 9.996734980366523e-06, "loss": 0.3334, "step": 2045 }, { "epoch": 0.09389197375063099, "grad_norm": 0.48080554604530334, "learning_rate": 9.996726115025654e-06, "loss": 0.4218, "step": 2046 }, { "epoch": 0.09393786425588546, "grad_norm": 0.5070620775222778, "learning_rate": 9.996717237669237e-06, "loss": 0.4295, "step": 2047 }, { "epoch": 0.09398375476113992, "grad_norm": 0.4773145318031311, "learning_rate": 9.996708348297296e-06, "loss": 0.3935, "step": 2048 }, { "epoch": 0.09402964526639439, "grad_norm": 0.5115299820899963, "learning_rate": 9.996699446909844e-06, "loss": 0.4566, "step": 2049 }, { "epoch": 0.09407553577164884, "grad_norm": 0.4422728419303894, "learning_rate": 9.996690533506908e-06, "loss": 0.3483, "step": 2050 }, { "epoch": 0.09412142627690331, "grad_norm": 0.47135084867477417, "learning_rate": 9.996681608088507e-06, "loss": 0.3561, "step": 2051 }, { "epoch": 0.09416731678215777, "grad_norm": 0.4875592887401581, "learning_rate": 9.996672670654666e-06, "loss": 0.4294, "step": 2052 }, { "epoch": 0.09421320728741224, "grad_norm": 0.5143119692802429, "learning_rate": 9.996663721205402e-06, "loss": 0.5118, "step": 2053 }, { "epoch": 0.0942590977926667, "grad_norm": 0.4918496608734131, "learning_rate": 9.99665475974074e-06, "loss": 0.5125, "step": 2054 }, { "epoch": 0.09430498829792117, "grad_norm": 0.47512781620025635, "learning_rate": 9.9966457862607e-06, "loss": 0.3891, "step": 2055 }, { "epoch": 0.09435087880317562, "grad_norm": 0.4893255829811096, "learning_rate": 9.996636800765304e-06, "loss": 0.4618, "step": 2056 }, { "epoch": 0.09439676930843009, "grad_norm": 0.4645330011844635, "learning_rate": 9.996627803254574e-06, "loss": 0.3717, "step": 2057 }, { "epoch": 0.09444265981368455, "grad_norm": 0.480774462223053, "learning_rate": 9.99661879372853e-06, "loss": 0.4081, "step": 2058 }, { "epoch": 0.094488550318939, "grad_norm": 0.6540485620498657, "learning_rate": 9.996609772187196e-06, "loss": 0.5681, "step": 2059 }, { "epoch": 0.09453444082419347, "grad_norm": 0.499470591545105, "learning_rate": 9.996600738630592e-06, "loss": 0.4294, "step": 2060 }, { "epoch": 0.09458033132944793, "grad_norm": 0.50464928150177, "learning_rate": 9.99659169305874e-06, "loss": 0.4999, "step": 2061 }, { "epoch": 0.0946262218347024, "grad_norm": 0.5615844130516052, "learning_rate": 9.996582635471662e-06, "loss": 0.3745, "step": 2062 }, { "epoch": 0.09467211233995686, "grad_norm": 0.5331128239631653, "learning_rate": 9.99657356586938e-06, "loss": 0.5311, "step": 2063 }, { "epoch": 0.09471800284521133, "grad_norm": 0.5935415625572205, "learning_rate": 9.996564484251916e-06, "loss": 0.4087, "step": 2064 }, { "epoch": 0.09476389335046578, "grad_norm": 0.49952229857444763, "learning_rate": 9.99655539061929e-06, "loss": 0.4899, "step": 2065 }, { "epoch": 0.09480978385572025, "grad_norm": 0.4531458616256714, "learning_rate": 9.996546284971525e-06, "loss": 0.3466, "step": 2066 }, { "epoch": 0.09485567436097471, "grad_norm": 0.4564852714538574, "learning_rate": 9.996537167308644e-06, "loss": 0.3841, "step": 2067 }, { "epoch": 0.09490156486622918, "grad_norm": 0.529405415058136, "learning_rate": 9.996528037630668e-06, "loss": 0.4519, "step": 2068 }, { "epoch": 0.09494745537148364, "grad_norm": 0.4793223440647125, "learning_rate": 9.996518895937618e-06, "loss": 0.3978, "step": 2069 }, { "epoch": 0.0949933458767381, "grad_norm": 0.512132465839386, "learning_rate": 9.99650974222952e-06, "loss": 0.4337, "step": 2070 }, { "epoch": 0.09503923638199256, "grad_norm": 0.4214874804019928, "learning_rate": 9.996500576506392e-06, "loss": 0.3104, "step": 2071 }, { "epoch": 0.09508512688724703, "grad_norm": 0.49968063831329346, "learning_rate": 9.996491398768255e-06, "loss": 0.4654, "step": 2072 }, { "epoch": 0.09513101739250149, "grad_norm": 0.4648069143295288, "learning_rate": 9.996482209015135e-06, "loss": 0.4839, "step": 2073 }, { "epoch": 0.09517690789775596, "grad_norm": 0.513130247592926, "learning_rate": 9.996473007247052e-06, "loss": 0.4561, "step": 2074 }, { "epoch": 0.09522279840301041, "grad_norm": 0.4773569703102112, "learning_rate": 9.996463793464026e-06, "loss": 0.4036, "step": 2075 }, { "epoch": 0.09526868890826488, "grad_norm": 0.4816969335079193, "learning_rate": 9.996454567666085e-06, "loss": 0.4538, "step": 2076 }, { "epoch": 0.09531457941351934, "grad_norm": 0.49606969952583313, "learning_rate": 9.996445329853245e-06, "loss": 0.4317, "step": 2077 }, { "epoch": 0.09536046991877381, "grad_norm": 0.4426336884498596, "learning_rate": 9.996436080025533e-06, "loss": 0.367, "step": 2078 }, { "epoch": 0.09540636042402827, "grad_norm": 0.4468148350715637, "learning_rate": 9.996426818182967e-06, "loss": 0.3741, "step": 2079 }, { "epoch": 0.09545225092928274, "grad_norm": 0.5305752754211426, "learning_rate": 9.996417544325574e-06, "loss": 0.499, "step": 2080 }, { "epoch": 0.09549814143453719, "grad_norm": 0.5080432295799255, "learning_rate": 9.996408258453372e-06, "loss": 0.3881, "step": 2081 }, { "epoch": 0.09554403193979166, "grad_norm": 0.4637645184993744, "learning_rate": 9.996398960566385e-06, "loss": 0.3281, "step": 2082 }, { "epoch": 0.09558992244504612, "grad_norm": 0.4604986310005188, "learning_rate": 9.996389650664635e-06, "loss": 0.3345, "step": 2083 }, { "epoch": 0.09563581295030059, "grad_norm": 0.5082672238349915, "learning_rate": 9.996380328748147e-06, "loss": 0.4351, "step": 2084 }, { "epoch": 0.09568170345555504, "grad_norm": 0.4816417396068573, "learning_rate": 9.99637099481694e-06, "loss": 0.4196, "step": 2085 }, { "epoch": 0.09572759396080951, "grad_norm": 0.4824964702129364, "learning_rate": 9.996361648871036e-06, "loss": 0.4231, "step": 2086 }, { "epoch": 0.09577348446606397, "grad_norm": 0.4349449574947357, "learning_rate": 9.99635229091046e-06, "loss": 0.2979, "step": 2087 }, { "epoch": 0.09581937497131843, "grad_norm": 0.5159413814544678, "learning_rate": 9.996342920935236e-06, "loss": 0.5134, "step": 2088 }, { "epoch": 0.0958652654765729, "grad_norm": 0.5171272158622742, "learning_rate": 9.996333538945382e-06, "loss": 0.5119, "step": 2089 }, { "epoch": 0.09591115598182735, "grad_norm": 0.4492175579071045, "learning_rate": 9.996324144940924e-06, "loss": 0.3254, "step": 2090 }, { "epoch": 0.09595704648708182, "grad_norm": 0.4594343602657318, "learning_rate": 9.996314738921881e-06, "loss": 0.3976, "step": 2091 }, { "epoch": 0.09600293699233628, "grad_norm": 0.5824548006057739, "learning_rate": 9.99630532088828e-06, "loss": 0.4315, "step": 2092 }, { "epoch": 0.09604882749759075, "grad_norm": 0.4677094519138336, "learning_rate": 9.99629589084014e-06, "loss": 0.3412, "step": 2093 }, { "epoch": 0.0960947180028452, "grad_norm": 0.4852971136569977, "learning_rate": 9.996286448777486e-06, "loss": 0.3897, "step": 2094 }, { "epoch": 0.09614060850809968, "grad_norm": 0.4925045073032379, "learning_rate": 9.99627699470034e-06, "loss": 0.4314, "step": 2095 }, { "epoch": 0.09618649901335413, "grad_norm": 0.4866385757923126, "learning_rate": 9.996267528608725e-06, "loss": 0.4103, "step": 2096 }, { "epoch": 0.0962323895186086, "grad_norm": 0.48117971420288086, "learning_rate": 9.996258050502664e-06, "loss": 0.3878, "step": 2097 }, { "epoch": 0.09627828002386306, "grad_norm": 0.5057356357574463, "learning_rate": 9.996248560382177e-06, "loss": 0.4645, "step": 2098 }, { "epoch": 0.09632417052911753, "grad_norm": 0.48927298188209534, "learning_rate": 9.996239058247292e-06, "loss": 0.4218, "step": 2099 }, { "epoch": 0.09637006103437198, "grad_norm": 0.4779609441757202, "learning_rate": 9.996229544098028e-06, "loss": 0.3482, "step": 2100 }, { "epoch": 0.09641595153962645, "grad_norm": 0.5371790528297424, "learning_rate": 9.996220017934409e-06, "loss": 0.5009, "step": 2101 }, { "epoch": 0.09646184204488091, "grad_norm": 0.4955594539642334, "learning_rate": 9.996210479756456e-06, "loss": 0.3913, "step": 2102 }, { "epoch": 0.09650773255013538, "grad_norm": 0.4752846360206604, "learning_rate": 9.996200929564195e-06, "loss": 0.392, "step": 2103 }, { "epoch": 0.09655362305538984, "grad_norm": 0.4950645864009857, "learning_rate": 9.996191367357647e-06, "loss": 0.4475, "step": 2104 }, { "epoch": 0.0965995135606443, "grad_norm": 0.5252650380134583, "learning_rate": 9.996181793136836e-06, "loss": 0.4614, "step": 2105 }, { "epoch": 0.09664540406589876, "grad_norm": 0.47076714038848877, "learning_rate": 9.996172206901785e-06, "loss": 0.3954, "step": 2106 }, { "epoch": 0.09669129457115323, "grad_norm": 0.5345096588134766, "learning_rate": 9.996162608652516e-06, "loss": 0.469, "step": 2107 }, { "epoch": 0.09673718507640769, "grad_norm": 0.44931167364120483, "learning_rate": 9.996152998389052e-06, "loss": 0.3419, "step": 2108 }, { "epoch": 0.09678307558166216, "grad_norm": 0.5248048901557922, "learning_rate": 9.996143376111418e-06, "loss": 0.4938, "step": 2109 }, { "epoch": 0.09682896608691662, "grad_norm": 0.5095524787902832, "learning_rate": 9.996133741819635e-06, "loss": 0.4481, "step": 2110 }, { "epoch": 0.09687485659217109, "grad_norm": 0.46126413345336914, "learning_rate": 9.996124095513729e-06, "loss": 0.3248, "step": 2111 }, { "epoch": 0.09692074709742554, "grad_norm": 0.4844275712966919, "learning_rate": 9.99611443719372e-06, "loss": 0.4377, "step": 2112 }, { "epoch": 0.09696663760268001, "grad_norm": 0.4996603727340698, "learning_rate": 9.996104766859634e-06, "loss": 0.4815, "step": 2113 }, { "epoch": 0.09701252810793447, "grad_norm": 0.4958064556121826, "learning_rate": 9.996095084511491e-06, "loss": 0.4438, "step": 2114 }, { "epoch": 0.09705841861318894, "grad_norm": 0.4917685091495514, "learning_rate": 9.996085390149316e-06, "loss": 0.3995, "step": 2115 }, { "epoch": 0.0971043091184434, "grad_norm": 0.482188880443573, "learning_rate": 9.996075683773134e-06, "loss": 0.4524, "step": 2116 }, { "epoch": 0.09715019962369786, "grad_norm": 0.47922563552856445, "learning_rate": 9.996065965382967e-06, "loss": 0.3988, "step": 2117 }, { "epoch": 0.09719609012895232, "grad_norm": 0.5032529234886169, "learning_rate": 9.996056234978837e-06, "loss": 0.4015, "step": 2118 }, { "epoch": 0.09724198063420678, "grad_norm": 0.4822005033493042, "learning_rate": 9.99604649256077e-06, "loss": 0.3739, "step": 2119 }, { "epoch": 0.09728787113946125, "grad_norm": 0.5190449357032776, "learning_rate": 9.996036738128787e-06, "loss": 0.4614, "step": 2120 }, { "epoch": 0.0973337616447157, "grad_norm": 0.5280289053916931, "learning_rate": 9.996026971682912e-06, "loss": 0.5566, "step": 2121 }, { "epoch": 0.09737965214997017, "grad_norm": 0.4400560259819031, "learning_rate": 9.996017193223169e-06, "loss": 0.3012, "step": 2122 }, { "epoch": 0.09742554265522463, "grad_norm": 0.46748727560043335, "learning_rate": 9.996007402749582e-06, "loss": 0.3512, "step": 2123 }, { "epoch": 0.0974714331604791, "grad_norm": 0.5129647850990295, "learning_rate": 9.995997600262172e-06, "loss": 0.4995, "step": 2124 }, { "epoch": 0.09751732366573355, "grad_norm": 0.5150854587554932, "learning_rate": 9.995987785760967e-06, "loss": 0.5015, "step": 2125 }, { "epoch": 0.09756321417098802, "grad_norm": 0.46464788913726807, "learning_rate": 9.995977959245988e-06, "loss": 0.409, "step": 2126 }, { "epoch": 0.09760910467624248, "grad_norm": 0.46803930401802063, "learning_rate": 9.995968120717258e-06, "loss": 0.3685, "step": 2127 }, { "epoch": 0.09765499518149695, "grad_norm": 0.5460236668586731, "learning_rate": 9.995958270174801e-06, "loss": 0.462, "step": 2128 }, { "epoch": 0.09770088568675141, "grad_norm": 0.4722246825695038, "learning_rate": 9.995948407618643e-06, "loss": 0.3668, "step": 2129 }, { "epoch": 0.09774677619200588, "grad_norm": 0.497059166431427, "learning_rate": 9.995938533048805e-06, "loss": 0.4719, "step": 2130 }, { "epoch": 0.09779266669726033, "grad_norm": 0.5201273560523987, "learning_rate": 9.99592864646531e-06, "loss": 0.4974, "step": 2131 }, { "epoch": 0.0978385572025148, "grad_norm": 0.44994765520095825, "learning_rate": 9.995918747868186e-06, "loss": 0.3272, "step": 2132 }, { "epoch": 0.09788444770776926, "grad_norm": 0.5128187537193298, "learning_rate": 9.995908837257452e-06, "loss": 0.4933, "step": 2133 }, { "epoch": 0.09793033821302373, "grad_norm": 0.5345513820648193, "learning_rate": 9.995898914633135e-06, "loss": 0.4939, "step": 2134 }, { "epoch": 0.09797622871827819, "grad_norm": 0.4516834318637848, "learning_rate": 9.995888979995257e-06, "loss": 0.3654, "step": 2135 }, { "epoch": 0.09802211922353266, "grad_norm": 0.4768800139427185, "learning_rate": 9.995879033343844e-06, "loss": 0.4255, "step": 2136 }, { "epoch": 0.09806800972878711, "grad_norm": 0.5497641563415527, "learning_rate": 9.995869074678918e-06, "loss": 0.4979, "step": 2137 }, { "epoch": 0.09811390023404158, "grad_norm": 0.5317090153694153, "learning_rate": 9.995859104000502e-06, "loss": 0.5097, "step": 2138 }, { "epoch": 0.09815979073929604, "grad_norm": 0.5085233449935913, "learning_rate": 9.995849121308626e-06, "loss": 0.4393, "step": 2139 }, { "epoch": 0.09820568124455051, "grad_norm": 0.4718397557735443, "learning_rate": 9.995839126603306e-06, "loss": 0.4096, "step": 2140 }, { "epoch": 0.09825157174980496, "grad_norm": 0.47357964515686035, "learning_rate": 9.99582911988457e-06, "loss": 0.3901, "step": 2141 }, { "epoch": 0.09829746225505943, "grad_norm": 0.49856793880462646, "learning_rate": 9.995819101152441e-06, "loss": 0.4724, "step": 2142 }, { "epoch": 0.09834335276031389, "grad_norm": 0.5811507701873779, "learning_rate": 9.995809070406945e-06, "loss": 0.5367, "step": 2143 }, { "epoch": 0.09838924326556836, "grad_norm": 0.5023209452629089, "learning_rate": 9.995799027648105e-06, "loss": 0.4118, "step": 2144 }, { "epoch": 0.09843513377082282, "grad_norm": 0.5006213784217834, "learning_rate": 9.995788972875947e-06, "loss": 0.4306, "step": 2145 }, { "epoch": 0.09848102427607729, "grad_norm": 0.4715560972690582, "learning_rate": 9.99577890609049e-06, "loss": 0.3177, "step": 2146 }, { "epoch": 0.09852691478133174, "grad_norm": 0.47375479340553284, "learning_rate": 9.995768827291763e-06, "loss": 0.3851, "step": 2147 }, { "epoch": 0.0985728052865862, "grad_norm": 0.5320106148719788, "learning_rate": 9.995758736479788e-06, "loss": 0.5271, "step": 2148 }, { "epoch": 0.09861869579184067, "grad_norm": 0.4591202437877655, "learning_rate": 9.99574863365459e-06, "loss": 0.3905, "step": 2149 }, { "epoch": 0.09866458629709512, "grad_norm": 0.4693296253681183, "learning_rate": 9.995738518816193e-06, "loss": 0.4113, "step": 2150 }, { "epoch": 0.0987104768023496, "grad_norm": 0.4697159230709076, "learning_rate": 9.995728391964623e-06, "loss": 0.4133, "step": 2151 }, { "epoch": 0.09875636730760405, "grad_norm": 0.6639912128448486, "learning_rate": 9.995718253099902e-06, "loss": 0.4733, "step": 2152 }, { "epoch": 0.09880225781285852, "grad_norm": 0.9481196403503418, "learning_rate": 9.995708102222055e-06, "loss": 0.4575, "step": 2153 }, { "epoch": 0.09884814831811298, "grad_norm": 0.4139403700828552, "learning_rate": 9.995697939331105e-06, "loss": 0.2906, "step": 2154 }, { "epoch": 0.09889403882336745, "grad_norm": 0.4694135785102844, "learning_rate": 9.995687764427082e-06, "loss": 0.4363, "step": 2155 }, { "epoch": 0.0989399293286219, "grad_norm": 0.5318163633346558, "learning_rate": 9.995677577510005e-06, "loss": 0.5405, "step": 2156 }, { "epoch": 0.09898581983387637, "grad_norm": 0.4968397915363312, "learning_rate": 9.995667378579898e-06, "loss": 0.5101, "step": 2157 }, { "epoch": 0.09903171033913083, "grad_norm": 0.5037608742713928, "learning_rate": 9.995657167636791e-06, "loss": 0.4637, "step": 2158 }, { "epoch": 0.0990776008443853, "grad_norm": 0.462295800447464, "learning_rate": 9.995646944680703e-06, "loss": 0.3995, "step": 2159 }, { "epoch": 0.09912349134963976, "grad_norm": 0.5383294224739075, "learning_rate": 9.995636709711662e-06, "loss": 0.4927, "step": 2160 }, { "epoch": 0.09916938185489423, "grad_norm": 0.4859579801559448, "learning_rate": 9.99562646272969e-06, "loss": 0.4202, "step": 2161 }, { "epoch": 0.09921527236014868, "grad_norm": 0.5066967010498047, "learning_rate": 9.995616203734816e-06, "loss": 0.451, "step": 2162 }, { "epoch": 0.09926116286540315, "grad_norm": 0.5146127939224243, "learning_rate": 9.995605932727057e-06, "loss": 0.4443, "step": 2163 }, { "epoch": 0.09930705337065761, "grad_norm": 0.48541831970214844, "learning_rate": 9.995595649706445e-06, "loss": 0.4243, "step": 2164 }, { "epoch": 0.09935294387591208, "grad_norm": 0.4704222083091736, "learning_rate": 9.995585354673004e-06, "loss": 0.4556, "step": 2165 }, { "epoch": 0.09939883438116653, "grad_norm": 0.47955968976020813, "learning_rate": 9.995575047626753e-06, "loss": 0.3764, "step": 2166 }, { "epoch": 0.099444724886421, "grad_norm": 0.5205246210098267, "learning_rate": 9.995564728567723e-06, "loss": 0.5056, "step": 2167 }, { "epoch": 0.09949061539167546, "grad_norm": 0.4942520260810852, "learning_rate": 9.995554397495937e-06, "loss": 0.517, "step": 2168 }, { "epoch": 0.09953650589692993, "grad_norm": 0.4720578193664551, "learning_rate": 9.995544054411418e-06, "loss": 0.4101, "step": 2169 }, { "epoch": 0.09958239640218439, "grad_norm": 0.4484683573246002, "learning_rate": 9.995533699314192e-06, "loss": 0.3906, "step": 2170 }, { "epoch": 0.09962828690743886, "grad_norm": 0.4514347016811371, "learning_rate": 9.995523332204285e-06, "loss": 0.4033, "step": 2171 }, { "epoch": 0.09967417741269331, "grad_norm": 0.5216149687767029, "learning_rate": 9.99551295308172e-06, "loss": 0.536, "step": 2172 }, { "epoch": 0.09972006791794778, "grad_norm": 0.4413753151893616, "learning_rate": 9.995502561946524e-06, "loss": 0.406, "step": 2173 }, { "epoch": 0.09976595842320224, "grad_norm": 0.44576913118362427, "learning_rate": 9.99549215879872e-06, "loss": 0.4069, "step": 2174 }, { "epoch": 0.09981184892845671, "grad_norm": 0.4599571228027344, "learning_rate": 9.995481743638335e-06, "loss": 0.3899, "step": 2175 }, { "epoch": 0.09985773943371117, "grad_norm": 0.48537591099739075, "learning_rate": 9.995471316465392e-06, "loss": 0.4125, "step": 2176 }, { "epoch": 0.09990362993896562, "grad_norm": 0.4500022530555725, "learning_rate": 9.995460877279918e-06, "loss": 0.4163, "step": 2177 }, { "epoch": 0.09994952044422009, "grad_norm": 0.4591972529888153, "learning_rate": 9.995450426081936e-06, "loss": 0.439, "step": 2178 }, { "epoch": 0.09999541094947455, "grad_norm": 0.5077624917030334, "learning_rate": 9.995439962871473e-06, "loss": 0.4161, "step": 2179 }, { "epoch": 0.10004130145472902, "grad_norm": 0.45666274428367615, "learning_rate": 9.995429487648555e-06, "loss": 0.436, "step": 2180 }, { "epoch": 0.10008719195998347, "grad_norm": 0.4778783321380615, "learning_rate": 9.995419000413204e-06, "loss": 0.4074, "step": 2181 }, { "epoch": 0.10013308246523794, "grad_norm": 0.47730910778045654, "learning_rate": 9.995408501165447e-06, "loss": 0.4468, "step": 2182 }, { "epoch": 0.1001789729704924, "grad_norm": 0.4342380166053772, "learning_rate": 9.99539798990531e-06, "loss": 0.3603, "step": 2183 }, { "epoch": 0.10022486347574687, "grad_norm": 0.4938841760158539, "learning_rate": 9.995387466632816e-06, "loss": 0.4539, "step": 2184 }, { "epoch": 0.10027075398100133, "grad_norm": 0.5060190558433533, "learning_rate": 9.995376931347993e-06, "loss": 0.3947, "step": 2185 }, { "epoch": 0.1003166444862558, "grad_norm": 0.4662055969238281, "learning_rate": 9.995366384050865e-06, "loss": 0.4299, "step": 2186 }, { "epoch": 0.10036253499151025, "grad_norm": 0.45805132389068604, "learning_rate": 9.995355824741457e-06, "loss": 0.3403, "step": 2187 }, { "epoch": 0.10040842549676472, "grad_norm": 0.506393313407898, "learning_rate": 9.995345253419795e-06, "loss": 0.335, "step": 2188 }, { "epoch": 0.10045431600201918, "grad_norm": 0.4764336347579956, "learning_rate": 9.995334670085905e-06, "loss": 0.3535, "step": 2189 }, { "epoch": 0.10050020650727365, "grad_norm": 0.4943844676017761, "learning_rate": 9.99532407473981e-06, "loss": 0.4677, "step": 2190 }, { "epoch": 0.1005460970125281, "grad_norm": 0.47232797741889954, "learning_rate": 9.995313467381538e-06, "loss": 0.3643, "step": 2191 }, { "epoch": 0.10059198751778257, "grad_norm": 0.49308115243911743, "learning_rate": 9.995302848011114e-06, "loss": 0.4105, "step": 2192 }, { "epoch": 0.10063787802303703, "grad_norm": 0.45486244559288025, "learning_rate": 9.995292216628563e-06, "loss": 0.4052, "step": 2193 }, { "epoch": 0.1006837685282915, "grad_norm": 0.5041335821151733, "learning_rate": 9.99528157323391e-06, "loss": 0.4164, "step": 2194 }, { "epoch": 0.10072965903354596, "grad_norm": 0.47231435775756836, "learning_rate": 9.995270917827182e-06, "loss": 0.4312, "step": 2195 }, { "epoch": 0.10077554953880043, "grad_norm": 0.4928642213344574, "learning_rate": 9.995260250408406e-06, "loss": 0.4261, "step": 2196 }, { "epoch": 0.10082144004405488, "grad_norm": 0.7827200889587402, "learning_rate": 9.995249570977603e-06, "loss": 0.4144, "step": 2197 }, { "epoch": 0.10086733054930935, "grad_norm": 0.5145460963249207, "learning_rate": 9.9952388795348e-06, "loss": 0.4065, "step": 2198 }, { "epoch": 0.10091322105456381, "grad_norm": 0.4500555992126465, "learning_rate": 9.995228176080027e-06, "loss": 0.3574, "step": 2199 }, { "epoch": 0.10095911155981828, "grad_norm": 0.5063720345497131, "learning_rate": 9.995217460613307e-06, "loss": 0.456, "step": 2200 }, { "epoch": 0.10100500206507274, "grad_norm": 0.47892463207244873, "learning_rate": 9.995206733134663e-06, "loss": 0.4054, "step": 2201 }, { "epoch": 0.1010508925703272, "grad_norm": 0.4448998272418976, "learning_rate": 9.995195993644125e-06, "loss": 0.3883, "step": 2202 }, { "epoch": 0.10109678307558166, "grad_norm": 0.4948670566082001, "learning_rate": 9.995185242141718e-06, "loss": 0.4721, "step": 2203 }, { "epoch": 0.10114267358083613, "grad_norm": 0.541731595993042, "learning_rate": 9.995174478627465e-06, "loss": 0.5679, "step": 2204 }, { "epoch": 0.10118856408609059, "grad_norm": 0.39994311332702637, "learning_rate": 9.995163703101395e-06, "loss": 0.2985, "step": 2205 }, { "epoch": 0.10123445459134504, "grad_norm": 0.5325403213500977, "learning_rate": 9.995152915563533e-06, "loss": 0.5079, "step": 2206 }, { "epoch": 0.10128034509659951, "grad_norm": 0.47382909059524536, "learning_rate": 9.995142116013903e-06, "loss": 0.4287, "step": 2207 }, { "epoch": 0.10132623560185397, "grad_norm": 0.46792659163475037, "learning_rate": 9.995131304452535e-06, "loss": 0.3958, "step": 2208 }, { "epoch": 0.10137212610710844, "grad_norm": 0.46098190546035767, "learning_rate": 9.995120480879452e-06, "loss": 0.3795, "step": 2209 }, { "epoch": 0.1014180166123629, "grad_norm": 0.5036364197731018, "learning_rate": 9.99510964529468e-06, "loss": 0.4578, "step": 2210 }, { "epoch": 0.10146390711761737, "grad_norm": 0.470348596572876, "learning_rate": 9.995098797698247e-06, "loss": 0.3892, "step": 2211 }, { "epoch": 0.10150979762287182, "grad_norm": 0.4469543397426605, "learning_rate": 9.995087938090178e-06, "loss": 0.3101, "step": 2212 }, { "epoch": 0.10155568812812629, "grad_norm": 0.4983699321746826, "learning_rate": 9.995077066470498e-06, "loss": 0.4663, "step": 2213 }, { "epoch": 0.10160157863338075, "grad_norm": 0.4524115025997162, "learning_rate": 9.995066182839233e-06, "loss": 0.3654, "step": 2214 }, { "epoch": 0.10164746913863522, "grad_norm": 0.4622112512588501, "learning_rate": 9.995055287196413e-06, "loss": 0.3746, "step": 2215 }, { "epoch": 0.10169335964388967, "grad_norm": 0.44869089126586914, "learning_rate": 9.99504437954206e-06, "loss": 0.3533, "step": 2216 }, { "epoch": 0.10173925014914414, "grad_norm": 0.4604414999485016, "learning_rate": 9.995033459876202e-06, "loss": 0.4453, "step": 2217 }, { "epoch": 0.1017851406543986, "grad_norm": 0.4404522776603699, "learning_rate": 9.995022528198865e-06, "loss": 0.3684, "step": 2218 }, { "epoch": 0.10183103115965307, "grad_norm": 0.4539918303489685, "learning_rate": 9.995011584510074e-06, "loss": 0.3738, "step": 2219 }, { "epoch": 0.10187692166490753, "grad_norm": 0.5033751130104065, "learning_rate": 9.995000628809859e-06, "loss": 0.4241, "step": 2220 }, { "epoch": 0.101922812170162, "grad_norm": 0.5245758891105652, "learning_rate": 9.994989661098243e-06, "loss": 0.3629, "step": 2221 }, { "epoch": 0.10196870267541645, "grad_norm": 0.4730580449104309, "learning_rate": 9.994978681375254e-06, "loss": 0.3834, "step": 2222 }, { "epoch": 0.10201459318067092, "grad_norm": 0.46525928378105164, "learning_rate": 9.994967689640915e-06, "loss": 0.3916, "step": 2223 }, { "epoch": 0.10206048368592538, "grad_norm": 0.46430933475494385, "learning_rate": 9.994956685895259e-06, "loss": 0.3255, "step": 2224 }, { "epoch": 0.10210637419117985, "grad_norm": 0.67746901512146, "learning_rate": 9.994945670138305e-06, "loss": 0.3333, "step": 2225 }, { "epoch": 0.1021522646964343, "grad_norm": 0.5671274662017822, "learning_rate": 9.994934642370085e-06, "loss": 0.4657, "step": 2226 }, { "epoch": 0.10219815520168878, "grad_norm": 0.48215028643608093, "learning_rate": 9.994923602590626e-06, "loss": 0.4336, "step": 2227 }, { "epoch": 0.10224404570694323, "grad_norm": 0.5110505819320679, "learning_rate": 9.994912550799948e-06, "loss": 0.4637, "step": 2228 }, { "epoch": 0.1022899362121977, "grad_norm": 0.5171206593513489, "learning_rate": 9.994901486998084e-06, "loss": 0.4985, "step": 2229 }, { "epoch": 0.10233582671745216, "grad_norm": 0.5020104646682739, "learning_rate": 9.994890411185057e-06, "loss": 0.4134, "step": 2230 }, { "epoch": 0.10238171722270663, "grad_norm": 0.515200138092041, "learning_rate": 9.994879323360895e-06, "loss": 0.392, "step": 2231 }, { "epoch": 0.10242760772796108, "grad_norm": 0.5204353928565979, "learning_rate": 9.994868223525627e-06, "loss": 0.4528, "step": 2232 }, { "epoch": 0.10247349823321555, "grad_norm": 0.439077764749527, "learning_rate": 9.994857111679275e-06, "loss": 0.3284, "step": 2233 }, { "epoch": 0.10251938873847001, "grad_norm": 0.4522359371185303, "learning_rate": 9.994845987821869e-06, "loss": 0.3765, "step": 2234 }, { "epoch": 0.10256527924372447, "grad_norm": 0.5172908306121826, "learning_rate": 9.994834851953434e-06, "loss": 0.4447, "step": 2235 }, { "epoch": 0.10261116974897894, "grad_norm": 0.5548928380012512, "learning_rate": 9.994823704073999e-06, "loss": 0.5318, "step": 2236 }, { "epoch": 0.10265706025423339, "grad_norm": 0.45871788263320923, "learning_rate": 9.994812544183587e-06, "loss": 0.3571, "step": 2237 }, { "epoch": 0.10270295075948786, "grad_norm": 0.47312793135643005, "learning_rate": 9.99480137228223e-06, "loss": 0.3511, "step": 2238 }, { "epoch": 0.10274884126474232, "grad_norm": 0.4848744571208954, "learning_rate": 9.994790188369952e-06, "loss": 0.4607, "step": 2239 }, { "epoch": 0.10279473176999679, "grad_norm": 0.47760719060897827, "learning_rate": 9.994778992446779e-06, "loss": 0.3909, "step": 2240 }, { "epoch": 0.10284062227525124, "grad_norm": 0.47932136058807373, "learning_rate": 9.994767784512738e-06, "loss": 0.3861, "step": 2241 }, { "epoch": 0.10288651278050572, "grad_norm": 0.49739232659339905, "learning_rate": 9.994756564567858e-06, "loss": 0.394, "step": 2242 }, { "epoch": 0.10293240328576017, "grad_norm": 0.47455066442489624, "learning_rate": 9.994745332612165e-06, "loss": 0.379, "step": 2243 }, { "epoch": 0.10297829379101464, "grad_norm": 0.5340999364852905, "learning_rate": 9.994734088645685e-06, "loss": 0.4073, "step": 2244 }, { "epoch": 0.1030241842962691, "grad_norm": 0.5435153841972351, "learning_rate": 9.994722832668447e-06, "loss": 0.5173, "step": 2245 }, { "epoch": 0.10307007480152357, "grad_norm": 0.5073051452636719, "learning_rate": 9.994711564680475e-06, "loss": 0.465, "step": 2246 }, { "epoch": 0.10311596530677802, "grad_norm": 0.505617082118988, "learning_rate": 9.9947002846818e-06, "loss": 0.4141, "step": 2247 }, { "epoch": 0.1031618558120325, "grad_norm": 0.5151702165603638, "learning_rate": 9.994688992672446e-06, "loss": 0.4138, "step": 2248 }, { "epoch": 0.10320774631728695, "grad_norm": 0.4773789942264557, "learning_rate": 9.994677688652443e-06, "loss": 0.3628, "step": 2249 }, { "epoch": 0.10325363682254142, "grad_norm": 0.48059847950935364, "learning_rate": 9.994666372621816e-06, "loss": 0.4127, "step": 2250 }, { "epoch": 0.10329952732779588, "grad_norm": 0.4865242540836334, "learning_rate": 9.99465504458059e-06, "loss": 0.4151, "step": 2251 }, { "epoch": 0.10334541783305035, "grad_norm": 0.5266616344451904, "learning_rate": 9.994643704528799e-06, "loss": 0.3796, "step": 2252 }, { "epoch": 0.1033913083383048, "grad_norm": 0.47762367129325867, "learning_rate": 9.994632352466464e-06, "loss": 0.3826, "step": 2253 }, { "epoch": 0.10343719884355927, "grad_norm": 0.5382362008094788, "learning_rate": 9.994620988393616e-06, "loss": 0.5113, "step": 2254 }, { "epoch": 0.10348308934881373, "grad_norm": 0.4840710461139679, "learning_rate": 9.994609612310279e-06, "loss": 0.4081, "step": 2255 }, { "epoch": 0.1035289798540682, "grad_norm": 0.4215961694717407, "learning_rate": 9.994598224216482e-06, "loss": 0.3472, "step": 2256 }, { "epoch": 0.10357487035932265, "grad_norm": 0.4542910158634186, "learning_rate": 9.994586824112255e-06, "loss": 0.3611, "step": 2257 }, { "epoch": 0.10362076086457712, "grad_norm": 0.4572446644306183, "learning_rate": 9.99457541199762e-06, "loss": 0.4029, "step": 2258 }, { "epoch": 0.10366665136983158, "grad_norm": 0.4768829345703125, "learning_rate": 9.99456398787261e-06, "loss": 0.4332, "step": 2259 }, { "epoch": 0.10371254187508605, "grad_norm": 0.474031001329422, "learning_rate": 9.99455255173725e-06, "loss": 0.4178, "step": 2260 }, { "epoch": 0.1037584323803405, "grad_norm": 0.48735129833221436, "learning_rate": 9.994541103591566e-06, "loss": 0.4297, "step": 2261 }, { "epoch": 0.10380432288559498, "grad_norm": 0.48689502477645874, "learning_rate": 9.994529643435588e-06, "loss": 0.3712, "step": 2262 }, { "epoch": 0.10385021339084943, "grad_norm": 0.44529426097869873, "learning_rate": 9.994518171269342e-06, "loss": 0.3496, "step": 2263 }, { "epoch": 0.1038961038961039, "grad_norm": 0.49490877985954285, "learning_rate": 9.994506687092856e-06, "loss": 0.4297, "step": 2264 }, { "epoch": 0.10394199440135836, "grad_norm": 0.473677396774292, "learning_rate": 9.994495190906158e-06, "loss": 0.4025, "step": 2265 }, { "epoch": 0.10398788490661282, "grad_norm": 0.49198609590530396, "learning_rate": 9.994483682709275e-06, "loss": 0.466, "step": 2266 }, { "epoch": 0.10403377541186729, "grad_norm": 0.6021363735198975, "learning_rate": 9.994472162502235e-06, "loss": 0.4573, "step": 2267 }, { "epoch": 0.10407966591712174, "grad_norm": 0.4464619755744934, "learning_rate": 9.994460630285066e-06, "loss": 0.327, "step": 2268 }, { "epoch": 0.10412555642237621, "grad_norm": 0.5164344310760498, "learning_rate": 9.994449086057796e-06, "loss": 0.4582, "step": 2269 }, { "epoch": 0.10417144692763067, "grad_norm": 0.5113261938095093, "learning_rate": 9.994437529820452e-06, "loss": 0.4951, "step": 2270 }, { "epoch": 0.10421733743288514, "grad_norm": 0.4675207734107971, "learning_rate": 9.994425961573063e-06, "loss": 0.3984, "step": 2271 }, { "epoch": 0.1042632279381396, "grad_norm": 0.483003169298172, "learning_rate": 9.994414381315654e-06, "loss": 0.457, "step": 2272 }, { "epoch": 0.10430911844339406, "grad_norm": 0.5005264282226562, "learning_rate": 9.994402789048254e-06, "loss": 0.4435, "step": 2273 }, { "epoch": 0.10435500894864852, "grad_norm": 0.45094045996665955, "learning_rate": 9.994391184770894e-06, "loss": 0.3642, "step": 2274 }, { "epoch": 0.10440089945390299, "grad_norm": 0.4513227343559265, "learning_rate": 9.994379568483599e-06, "loss": 0.3488, "step": 2275 }, { "epoch": 0.10444678995915745, "grad_norm": 0.45290058851242065, "learning_rate": 9.994367940186395e-06, "loss": 0.3874, "step": 2276 }, { "epoch": 0.10449268046441192, "grad_norm": 0.46660661697387695, "learning_rate": 9.994356299879315e-06, "loss": 0.4454, "step": 2277 }, { "epoch": 0.10453857096966637, "grad_norm": 0.5158165693283081, "learning_rate": 9.994344647562384e-06, "loss": 0.4185, "step": 2278 }, { "epoch": 0.10458446147492084, "grad_norm": 0.45892372727394104, "learning_rate": 9.99433298323563e-06, "loss": 0.3642, "step": 2279 }, { "epoch": 0.1046303519801753, "grad_norm": 0.48131415247917175, "learning_rate": 9.99432130689908e-06, "loss": 0.4475, "step": 2280 }, { "epoch": 0.10467624248542977, "grad_norm": 0.45579615235328674, "learning_rate": 9.994309618552767e-06, "loss": 0.3903, "step": 2281 }, { "epoch": 0.10472213299068422, "grad_norm": 0.4769909679889679, "learning_rate": 9.994297918196712e-06, "loss": 0.3563, "step": 2282 }, { "epoch": 0.1047680234959387, "grad_norm": 0.5031367540359497, "learning_rate": 9.99428620583095e-06, "loss": 0.4604, "step": 2283 }, { "epoch": 0.10481391400119315, "grad_norm": 0.499083012342453, "learning_rate": 9.994274481455503e-06, "loss": 0.4311, "step": 2284 }, { "epoch": 0.10485980450644762, "grad_norm": 0.45621082186698914, "learning_rate": 9.994262745070404e-06, "loss": 0.3708, "step": 2285 }, { "epoch": 0.10490569501170208, "grad_norm": 0.5472161173820496, "learning_rate": 9.994250996675678e-06, "loss": 0.481, "step": 2286 }, { "epoch": 0.10495158551695655, "grad_norm": 0.47954538464546204, "learning_rate": 9.994239236271355e-06, "loss": 0.4298, "step": 2287 }, { "epoch": 0.104997476022211, "grad_norm": 0.48620352149009705, "learning_rate": 9.994227463857462e-06, "loss": 0.41, "step": 2288 }, { "epoch": 0.10504336652746547, "grad_norm": 0.4758157432079315, "learning_rate": 9.994215679434029e-06, "loss": 0.4177, "step": 2289 }, { "epoch": 0.10508925703271993, "grad_norm": 0.46810030937194824, "learning_rate": 9.994203883001086e-06, "loss": 0.3778, "step": 2290 }, { "epoch": 0.1051351475379744, "grad_norm": 0.46195679903030396, "learning_rate": 9.994192074558655e-06, "loss": 0.3535, "step": 2291 }, { "epoch": 0.10518103804322886, "grad_norm": 0.47153595089912415, "learning_rate": 9.99418025410677e-06, "loss": 0.4338, "step": 2292 }, { "epoch": 0.10522692854848333, "grad_norm": 0.4545343816280365, "learning_rate": 9.994168421645458e-06, "loss": 0.3429, "step": 2293 }, { "epoch": 0.10527281905373778, "grad_norm": 0.499397337436676, "learning_rate": 9.994156577174747e-06, "loss": 0.4304, "step": 2294 }, { "epoch": 0.10531870955899224, "grad_norm": 0.5031590461730957, "learning_rate": 9.994144720694665e-06, "loss": 0.4586, "step": 2295 }, { "epoch": 0.10536460006424671, "grad_norm": 0.5150892734527588, "learning_rate": 9.994132852205241e-06, "loss": 0.5245, "step": 2296 }, { "epoch": 0.10541049056950116, "grad_norm": 0.5018406510353088, "learning_rate": 9.994120971706505e-06, "loss": 0.4291, "step": 2297 }, { "epoch": 0.10545638107475563, "grad_norm": 0.48349466919898987, "learning_rate": 9.994109079198485e-06, "loss": 0.4885, "step": 2298 }, { "epoch": 0.10550227158001009, "grad_norm": 0.4850239157676697, "learning_rate": 9.994097174681206e-06, "loss": 0.4081, "step": 2299 }, { "epoch": 0.10554816208526456, "grad_norm": 0.5362891554832458, "learning_rate": 9.9940852581547e-06, "loss": 0.4581, "step": 2300 }, { "epoch": 0.10559405259051902, "grad_norm": 0.46285656094551086, "learning_rate": 9.994073329618996e-06, "loss": 0.3888, "step": 2301 }, { "epoch": 0.10563994309577349, "grad_norm": 0.5105830430984497, "learning_rate": 9.994061389074121e-06, "loss": 0.5244, "step": 2302 }, { "epoch": 0.10568583360102794, "grad_norm": 0.46407097578048706, "learning_rate": 9.994049436520105e-06, "loss": 0.3871, "step": 2303 }, { "epoch": 0.10573172410628241, "grad_norm": 0.492532342672348, "learning_rate": 9.994037471956977e-06, "loss": 0.4071, "step": 2304 }, { "epoch": 0.10577761461153687, "grad_norm": 0.4274005591869354, "learning_rate": 9.994025495384764e-06, "loss": 0.3901, "step": 2305 }, { "epoch": 0.10582350511679134, "grad_norm": 0.4736476242542267, "learning_rate": 9.994013506803497e-06, "loss": 0.4085, "step": 2306 }, { "epoch": 0.1058693956220458, "grad_norm": 0.45817533135414124, "learning_rate": 9.994001506213204e-06, "loss": 0.3445, "step": 2307 }, { "epoch": 0.10591528612730026, "grad_norm": 0.5188115239143372, "learning_rate": 9.993989493613913e-06, "loss": 0.5088, "step": 2308 }, { "epoch": 0.10596117663255472, "grad_norm": 0.46297457814216614, "learning_rate": 9.993977469005653e-06, "loss": 0.3574, "step": 2309 }, { "epoch": 0.10600706713780919, "grad_norm": 0.5024999976158142, "learning_rate": 9.993965432388453e-06, "loss": 0.4564, "step": 2310 }, { "epoch": 0.10605295764306365, "grad_norm": 0.4582570791244507, "learning_rate": 9.993953383762342e-06, "loss": 0.3533, "step": 2311 }, { "epoch": 0.10609884814831812, "grad_norm": 0.46465012431144714, "learning_rate": 9.99394132312735e-06, "loss": 0.3691, "step": 2312 }, { "epoch": 0.10614473865357257, "grad_norm": 0.4867328405380249, "learning_rate": 9.993929250483504e-06, "loss": 0.4135, "step": 2313 }, { "epoch": 0.10619062915882704, "grad_norm": 0.5091094374656677, "learning_rate": 9.993917165830837e-06, "loss": 0.4449, "step": 2314 }, { "epoch": 0.1062365196640815, "grad_norm": 0.4904663562774658, "learning_rate": 9.993905069169374e-06, "loss": 0.3909, "step": 2315 }, { "epoch": 0.10628241016933597, "grad_norm": 0.4993605613708496, "learning_rate": 9.993892960499146e-06, "loss": 0.402, "step": 2316 }, { "epoch": 0.10632830067459043, "grad_norm": 0.5261427164077759, "learning_rate": 9.99388083982018e-06, "loss": 0.509, "step": 2317 }, { "epoch": 0.1063741911798449, "grad_norm": 0.5238061547279358, "learning_rate": 9.993868707132508e-06, "loss": 0.4949, "step": 2318 }, { "epoch": 0.10642008168509935, "grad_norm": 0.4810526967048645, "learning_rate": 9.993856562436158e-06, "loss": 0.3733, "step": 2319 }, { "epoch": 0.10646597219035382, "grad_norm": 0.5121590495109558, "learning_rate": 9.993844405731156e-06, "loss": 0.4591, "step": 2320 }, { "epoch": 0.10651186269560828, "grad_norm": 0.5276305079460144, "learning_rate": 9.993832237017539e-06, "loss": 0.5269, "step": 2321 }, { "epoch": 0.10655775320086275, "grad_norm": 0.4516938328742981, "learning_rate": 9.993820056295328e-06, "loss": 0.3456, "step": 2322 }, { "epoch": 0.1066036437061172, "grad_norm": 0.4453715980052948, "learning_rate": 9.993807863564557e-06, "loss": 0.3187, "step": 2323 }, { "epoch": 0.10664953421137166, "grad_norm": 0.4927120804786682, "learning_rate": 9.993795658825254e-06, "loss": 0.4846, "step": 2324 }, { "epoch": 0.10669542471662613, "grad_norm": 0.4254423975944519, "learning_rate": 9.99378344207745e-06, "loss": 0.3249, "step": 2325 }, { "epoch": 0.10674131522188059, "grad_norm": 0.48758989572525024, "learning_rate": 9.99377121332117e-06, "loss": 0.4594, "step": 2326 }, { "epoch": 0.10678720572713506, "grad_norm": 0.459878146648407, "learning_rate": 9.99375897255645e-06, "loss": 0.3656, "step": 2327 }, { "epoch": 0.10683309623238951, "grad_norm": 0.5259808301925659, "learning_rate": 9.993746719783313e-06, "loss": 0.5098, "step": 2328 }, { "epoch": 0.10687898673764398, "grad_norm": 0.4410202205181122, "learning_rate": 9.993734455001791e-06, "loss": 0.3947, "step": 2329 }, { "epoch": 0.10692487724289844, "grad_norm": 0.4721528887748718, "learning_rate": 9.993722178211916e-06, "loss": 0.4093, "step": 2330 }, { "epoch": 0.10697076774815291, "grad_norm": 0.48999449610710144, "learning_rate": 9.993709889413714e-06, "loss": 0.3596, "step": 2331 }, { "epoch": 0.10701665825340737, "grad_norm": 0.45347708463668823, "learning_rate": 9.993697588607216e-06, "loss": 0.3595, "step": 2332 }, { "epoch": 0.10706254875866184, "grad_norm": 0.4915441572666168, "learning_rate": 9.99368527579245e-06, "loss": 0.4159, "step": 2333 }, { "epoch": 0.10710843926391629, "grad_norm": 0.5159887075424194, "learning_rate": 9.993672950969449e-06, "loss": 0.4007, "step": 2334 }, { "epoch": 0.10715432976917076, "grad_norm": 0.4823332130908966, "learning_rate": 9.99366061413824e-06, "loss": 0.3842, "step": 2335 }, { "epoch": 0.10720022027442522, "grad_norm": 0.5124900341033936, "learning_rate": 9.993648265298852e-06, "loss": 0.5253, "step": 2336 }, { "epoch": 0.10724611077967969, "grad_norm": 0.5371134877204895, "learning_rate": 9.993635904451318e-06, "loss": 0.4226, "step": 2337 }, { "epoch": 0.10729200128493414, "grad_norm": 0.45335957407951355, "learning_rate": 9.993623531595663e-06, "loss": 0.3406, "step": 2338 }, { "epoch": 0.10733789179018861, "grad_norm": 0.4715803563594818, "learning_rate": 9.993611146731921e-06, "loss": 0.3892, "step": 2339 }, { "epoch": 0.10738378229544307, "grad_norm": 0.5375993847846985, "learning_rate": 9.993598749860121e-06, "loss": 0.4932, "step": 2340 }, { "epoch": 0.10742967280069754, "grad_norm": 0.5276365876197815, "learning_rate": 9.993586340980291e-06, "loss": 0.4471, "step": 2341 }, { "epoch": 0.107475563305952, "grad_norm": 0.5309797525405884, "learning_rate": 9.993573920092463e-06, "loss": 0.4651, "step": 2342 }, { "epoch": 0.10752145381120647, "grad_norm": 0.5680058598518372, "learning_rate": 9.993561487196663e-06, "loss": 0.4458, "step": 2343 }, { "epoch": 0.10756734431646092, "grad_norm": 0.4932131767272949, "learning_rate": 9.993549042292927e-06, "loss": 0.386, "step": 2344 }, { "epoch": 0.10761323482171539, "grad_norm": 0.47958359122276306, "learning_rate": 9.99353658538128e-06, "loss": 0.4351, "step": 2345 }, { "epoch": 0.10765912532696985, "grad_norm": 0.4573841989040375, "learning_rate": 9.993524116461752e-06, "loss": 0.3812, "step": 2346 }, { "epoch": 0.10770501583222432, "grad_norm": 0.4970970153808594, "learning_rate": 9.993511635534377e-06, "loss": 0.3835, "step": 2347 }, { "epoch": 0.10775090633747877, "grad_norm": 0.4605258107185364, "learning_rate": 9.993499142599182e-06, "loss": 0.3937, "step": 2348 }, { "epoch": 0.10779679684273324, "grad_norm": 0.4642235338687897, "learning_rate": 9.993486637656196e-06, "loss": 0.4232, "step": 2349 }, { "epoch": 0.1078426873479877, "grad_norm": 0.43981191515922546, "learning_rate": 9.993474120705453e-06, "loss": 0.3081, "step": 2350 }, { "epoch": 0.10788857785324217, "grad_norm": 0.5256198644638062, "learning_rate": 9.993461591746978e-06, "loss": 0.3888, "step": 2351 }, { "epoch": 0.10793446835849663, "grad_norm": 0.5696247220039368, "learning_rate": 9.993449050780807e-06, "loss": 0.4899, "step": 2352 }, { "epoch": 0.10798035886375108, "grad_norm": 0.4480229616165161, "learning_rate": 9.993436497806965e-06, "loss": 0.3523, "step": 2353 }, { "epoch": 0.10802624936900555, "grad_norm": 0.4560874402523041, "learning_rate": 9.993423932825485e-06, "loss": 0.294, "step": 2354 }, { "epoch": 0.10807213987426001, "grad_norm": 0.509881854057312, "learning_rate": 9.993411355836395e-06, "loss": 0.4791, "step": 2355 }, { "epoch": 0.10811803037951448, "grad_norm": 0.4819093942642212, "learning_rate": 9.993398766839728e-06, "loss": 0.4125, "step": 2356 }, { "epoch": 0.10816392088476894, "grad_norm": 0.489335834980011, "learning_rate": 9.993386165835512e-06, "loss": 0.4444, "step": 2357 }, { "epoch": 0.1082098113900234, "grad_norm": 0.475861519575119, "learning_rate": 9.993373552823779e-06, "loss": 0.429, "step": 2358 }, { "epoch": 0.10825570189527786, "grad_norm": 0.5080271363258362, "learning_rate": 9.993360927804558e-06, "loss": 0.4447, "step": 2359 }, { "epoch": 0.10830159240053233, "grad_norm": 0.49017783999443054, "learning_rate": 9.99334829077788e-06, "loss": 0.4698, "step": 2360 }, { "epoch": 0.10834748290578679, "grad_norm": 0.4478916525840759, "learning_rate": 9.993335641743776e-06, "loss": 0.3644, "step": 2361 }, { "epoch": 0.10839337341104126, "grad_norm": 0.45496076345443726, "learning_rate": 9.993322980702276e-06, "loss": 0.3563, "step": 2362 }, { "epoch": 0.10843926391629571, "grad_norm": 0.5220422148704529, "learning_rate": 9.99331030765341e-06, "loss": 0.5412, "step": 2363 }, { "epoch": 0.10848515442155018, "grad_norm": 0.5577587485313416, "learning_rate": 9.993297622597207e-06, "loss": 0.5075, "step": 2364 }, { "epoch": 0.10853104492680464, "grad_norm": 0.49074456095695496, "learning_rate": 9.9932849255337e-06, "loss": 0.4133, "step": 2365 }, { "epoch": 0.10857693543205911, "grad_norm": 0.4843331575393677, "learning_rate": 9.993272216462918e-06, "loss": 0.4323, "step": 2366 }, { "epoch": 0.10862282593731357, "grad_norm": 0.4688464105129242, "learning_rate": 9.993259495384894e-06, "loss": 0.3456, "step": 2367 }, { "epoch": 0.10866871644256804, "grad_norm": 0.4599507451057434, "learning_rate": 9.993246762299656e-06, "loss": 0.352, "step": 2368 }, { "epoch": 0.10871460694782249, "grad_norm": 0.5474652647972107, "learning_rate": 9.993234017207236e-06, "loss": 0.391, "step": 2369 }, { "epoch": 0.10876049745307696, "grad_norm": 0.5002933740615845, "learning_rate": 9.993221260107664e-06, "loss": 0.3981, "step": 2370 }, { "epoch": 0.10880638795833142, "grad_norm": 0.5205653309822083, "learning_rate": 9.993208491000969e-06, "loss": 0.4138, "step": 2371 }, { "epoch": 0.10885227846358589, "grad_norm": 0.44890275597572327, "learning_rate": 9.993195709887186e-06, "loss": 0.3429, "step": 2372 }, { "epoch": 0.10889816896884034, "grad_norm": 0.4605308771133423, "learning_rate": 9.993182916766343e-06, "loss": 0.3621, "step": 2373 }, { "epoch": 0.10894405947409481, "grad_norm": 0.4829280376434326, "learning_rate": 9.993170111638469e-06, "loss": 0.4611, "step": 2374 }, { "epoch": 0.10898994997934927, "grad_norm": 0.4615125060081482, "learning_rate": 9.993157294503596e-06, "loss": 0.4265, "step": 2375 }, { "epoch": 0.10903584048460374, "grad_norm": 0.48623043298721313, "learning_rate": 9.99314446536176e-06, "loss": 0.4458, "step": 2376 }, { "epoch": 0.1090817309898582, "grad_norm": 0.48533710837364197, "learning_rate": 9.993131624212984e-06, "loss": 0.449, "step": 2377 }, { "epoch": 0.10912762149511267, "grad_norm": 0.4932247996330261, "learning_rate": 9.993118771057302e-06, "loss": 0.392, "step": 2378 }, { "epoch": 0.10917351200036712, "grad_norm": 0.44672098755836487, "learning_rate": 9.993105905894747e-06, "loss": 0.3543, "step": 2379 }, { "epoch": 0.1092194025056216, "grad_norm": 0.46219930052757263, "learning_rate": 9.993093028725349e-06, "loss": 0.3476, "step": 2380 }, { "epoch": 0.10926529301087605, "grad_norm": 0.49842169880867004, "learning_rate": 9.993080139549137e-06, "loss": 0.372, "step": 2381 }, { "epoch": 0.10931118351613052, "grad_norm": 0.47868072986602783, "learning_rate": 9.993067238366141e-06, "loss": 0.3983, "step": 2382 }, { "epoch": 0.10935707402138498, "grad_norm": 0.4512401819229126, "learning_rate": 9.993054325176397e-06, "loss": 0.3671, "step": 2383 }, { "epoch": 0.10940296452663943, "grad_norm": 0.5758407115936279, "learning_rate": 9.993041399979931e-06, "loss": 0.4839, "step": 2384 }, { "epoch": 0.1094488550318939, "grad_norm": 0.4671958088874817, "learning_rate": 9.993028462776777e-06, "loss": 0.3999, "step": 2385 }, { "epoch": 0.10949474553714836, "grad_norm": 0.4457875192165375, "learning_rate": 9.993015513566965e-06, "loss": 0.3492, "step": 2386 }, { "epoch": 0.10954063604240283, "grad_norm": 0.49180343747138977, "learning_rate": 9.99300255235053e-06, "loss": 0.4583, "step": 2387 }, { "epoch": 0.10958652654765728, "grad_norm": 0.5329729318618774, "learning_rate": 9.992989579127497e-06, "loss": 0.5426, "step": 2388 }, { "epoch": 0.10963241705291175, "grad_norm": 0.454342782497406, "learning_rate": 9.992976593897898e-06, "loss": 0.3241, "step": 2389 }, { "epoch": 0.10967830755816621, "grad_norm": 0.466134637594223, "learning_rate": 9.99296359666177e-06, "loss": 0.381, "step": 2390 }, { "epoch": 0.10972419806342068, "grad_norm": 0.5188995599746704, "learning_rate": 9.992950587419137e-06, "loss": 0.4044, "step": 2391 }, { "epoch": 0.10977008856867514, "grad_norm": 0.5016509294509888, "learning_rate": 9.992937566170036e-06, "loss": 0.438, "step": 2392 }, { "epoch": 0.1098159790739296, "grad_norm": 0.4966766834259033, "learning_rate": 9.992924532914493e-06, "loss": 0.4513, "step": 2393 }, { "epoch": 0.10986186957918406, "grad_norm": 0.480093777179718, "learning_rate": 9.992911487652545e-06, "loss": 0.4445, "step": 2394 }, { "epoch": 0.10990776008443853, "grad_norm": 0.45770004391670227, "learning_rate": 9.992898430384222e-06, "loss": 0.3784, "step": 2395 }, { "epoch": 0.10995365058969299, "grad_norm": 0.46298423409461975, "learning_rate": 9.99288536110955e-06, "loss": 0.3823, "step": 2396 }, { "epoch": 0.10999954109494746, "grad_norm": 0.5011987686157227, "learning_rate": 9.992872279828566e-06, "loss": 0.4484, "step": 2397 }, { "epoch": 0.11004543160020192, "grad_norm": 0.4758690893650055, "learning_rate": 9.992859186541302e-06, "loss": 0.4126, "step": 2398 }, { "epoch": 0.11009132210545639, "grad_norm": 0.4889727830886841, "learning_rate": 9.992846081247786e-06, "loss": 0.4645, "step": 2399 }, { "epoch": 0.11013721261071084, "grad_norm": 0.4708193838596344, "learning_rate": 9.992832963948052e-06, "loss": 0.3602, "step": 2400 }, { "epoch": 0.11018310311596531, "grad_norm": 0.42878010869026184, "learning_rate": 9.99281983464213e-06, "loss": 0.3197, "step": 2401 }, { "epoch": 0.11022899362121977, "grad_norm": 0.5145881175994873, "learning_rate": 9.99280669333005e-06, "loss": 0.4186, "step": 2402 }, { "epoch": 0.11027488412647424, "grad_norm": 0.5047385692596436, "learning_rate": 9.992793540011847e-06, "loss": 0.3386, "step": 2403 }, { "epoch": 0.1103207746317287, "grad_norm": 0.504069447517395, "learning_rate": 9.992780374687552e-06, "loss": 0.4812, "step": 2404 }, { "epoch": 0.11036666513698316, "grad_norm": 0.49924415349960327, "learning_rate": 9.992767197357196e-06, "loss": 0.4426, "step": 2405 }, { "epoch": 0.11041255564223762, "grad_norm": 0.45040181279182434, "learning_rate": 9.99275400802081e-06, "loss": 0.3412, "step": 2406 }, { "epoch": 0.11045844614749209, "grad_norm": 0.46537142992019653, "learning_rate": 9.992740806678427e-06, "loss": 0.4099, "step": 2407 }, { "epoch": 0.11050433665274655, "grad_norm": 0.521116316318512, "learning_rate": 9.992727593330077e-06, "loss": 0.5319, "step": 2408 }, { "epoch": 0.11055022715800102, "grad_norm": 0.49834680557250977, "learning_rate": 9.992714367975793e-06, "loss": 0.4444, "step": 2409 }, { "epoch": 0.11059611766325547, "grad_norm": 0.4632551074028015, "learning_rate": 9.992701130615607e-06, "loss": 0.3695, "step": 2410 }, { "epoch": 0.11064200816850994, "grad_norm": 0.5194320678710938, "learning_rate": 9.992687881249552e-06, "loss": 0.5201, "step": 2411 }, { "epoch": 0.1106878986737644, "grad_norm": 0.5251150131225586, "learning_rate": 9.992674619877656e-06, "loss": 0.494, "step": 2412 }, { "epoch": 0.11073378917901885, "grad_norm": 0.5041511654853821, "learning_rate": 9.992661346499956e-06, "loss": 0.48, "step": 2413 }, { "epoch": 0.11077967968427332, "grad_norm": 0.5411491394042969, "learning_rate": 9.99264806111648e-06, "loss": 0.5058, "step": 2414 }, { "epoch": 0.11082557018952778, "grad_norm": 0.529249370098114, "learning_rate": 9.99263476372726e-06, "loss": 0.4878, "step": 2415 }, { "epoch": 0.11087146069478225, "grad_norm": 0.5474193096160889, "learning_rate": 9.992621454332329e-06, "loss": 0.4853, "step": 2416 }, { "epoch": 0.1109173512000367, "grad_norm": 0.5019587874412537, "learning_rate": 9.992608132931721e-06, "loss": 0.5294, "step": 2417 }, { "epoch": 0.11096324170529118, "grad_norm": 0.46557125449180603, "learning_rate": 9.992594799525466e-06, "loss": 0.3972, "step": 2418 }, { "epoch": 0.11100913221054563, "grad_norm": 0.4534105956554413, "learning_rate": 9.992581454113595e-06, "loss": 0.3619, "step": 2419 }, { "epoch": 0.1110550227158001, "grad_norm": 0.45725610852241516, "learning_rate": 9.99256809669614e-06, "loss": 0.3296, "step": 2420 }, { "epoch": 0.11110091322105456, "grad_norm": 0.4957942068576813, "learning_rate": 9.992554727273136e-06, "loss": 0.3492, "step": 2421 }, { "epoch": 0.11114680372630903, "grad_norm": 0.4696333408355713, "learning_rate": 9.992541345844615e-06, "loss": 0.4115, "step": 2422 }, { "epoch": 0.11119269423156349, "grad_norm": 0.45193302631378174, "learning_rate": 9.992527952410607e-06, "loss": 0.4146, "step": 2423 }, { "epoch": 0.11123858473681796, "grad_norm": 0.5438513159751892, "learning_rate": 9.992514546971144e-06, "loss": 0.5345, "step": 2424 }, { "epoch": 0.11128447524207241, "grad_norm": 0.4902760088443756, "learning_rate": 9.99250112952626e-06, "loss": 0.46, "step": 2425 }, { "epoch": 0.11133036574732688, "grad_norm": 0.5059555172920227, "learning_rate": 9.992487700075986e-06, "loss": 0.4454, "step": 2426 }, { "epoch": 0.11137625625258134, "grad_norm": 0.48046404123306274, "learning_rate": 9.992474258620353e-06, "loss": 0.4067, "step": 2427 }, { "epoch": 0.11142214675783581, "grad_norm": 0.4960084557533264, "learning_rate": 9.9924608051594e-06, "loss": 0.4689, "step": 2428 }, { "epoch": 0.11146803726309026, "grad_norm": 0.4739789664745331, "learning_rate": 9.99244733969315e-06, "loss": 0.3776, "step": 2429 }, { "epoch": 0.11151392776834473, "grad_norm": 0.4622792601585388, "learning_rate": 9.99243386222164e-06, "loss": 0.3687, "step": 2430 }, { "epoch": 0.11155981827359919, "grad_norm": 0.46889933943748474, "learning_rate": 9.992420372744905e-06, "loss": 0.3734, "step": 2431 }, { "epoch": 0.11160570877885366, "grad_norm": 0.5245449542999268, "learning_rate": 9.992406871262971e-06, "loss": 0.4792, "step": 2432 }, { "epoch": 0.11165159928410812, "grad_norm": 0.500846266746521, "learning_rate": 9.992393357775877e-06, "loss": 0.4371, "step": 2433 }, { "epoch": 0.11169748978936259, "grad_norm": 0.4570404887199402, "learning_rate": 9.992379832283653e-06, "loss": 0.3525, "step": 2434 }, { "epoch": 0.11174338029461704, "grad_norm": 0.506668210029602, "learning_rate": 9.992366294786329e-06, "loss": 0.4516, "step": 2435 }, { "epoch": 0.11178927079987151, "grad_norm": 0.5264373421669006, "learning_rate": 9.99235274528394e-06, "loss": 0.4056, "step": 2436 }, { "epoch": 0.11183516130512597, "grad_norm": 0.4368688464164734, "learning_rate": 9.992339183776518e-06, "loss": 0.3442, "step": 2437 }, { "epoch": 0.11188105181038044, "grad_norm": 0.5090033411979675, "learning_rate": 9.992325610264097e-06, "loss": 0.5376, "step": 2438 }, { "epoch": 0.1119269423156349, "grad_norm": 0.5053332448005676, "learning_rate": 9.992312024746708e-06, "loss": 0.5406, "step": 2439 }, { "epoch": 0.11197283282088936, "grad_norm": 0.511529266834259, "learning_rate": 9.992298427224384e-06, "loss": 0.4448, "step": 2440 }, { "epoch": 0.11201872332614382, "grad_norm": 0.46909859776496887, "learning_rate": 9.99228481769716e-06, "loss": 0.3173, "step": 2441 }, { "epoch": 0.11206461383139828, "grad_norm": 0.4972430169582367, "learning_rate": 9.992271196165065e-06, "loss": 0.4328, "step": 2442 }, { "epoch": 0.11211050433665275, "grad_norm": 0.48291459679603577, "learning_rate": 9.992257562628133e-06, "loss": 0.4198, "step": 2443 }, { "epoch": 0.1121563948419072, "grad_norm": 0.4549403786659241, "learning_rate": 9.992243917086397e-06, "loss": 0.3854, "step": 2444 }, { "epoch": 0.11220228534716167, "grad_norm": 0.43886083364486694, "learning_rate": 9.992230259539892e-06, "loss": 0.357, "step": 2445 }, { "epoch": 0.11224817585241613, "grad_norm": 0.5201106071472168, "learning_rate": 9.992216589988647e-06, "loss": 0.5056, "step": 2446 }, { "epoch": 0.1122940663576706, "grad_norm": 0.4949498176574707, "learning_rate": 9.992202908432696e-06, "loss": 0.4781, "step": 2447 }, { "epoch": 0.11233995686292506, "grad_norm": 0.4632495641708374, "learning_rate": 9.992189214872074e-06, "loss": 0.3345, "step": 2448 }, { "epoch": 0.11238584736817953, "grad_norm": 0.5333968997001648, "learning_rate": 9.992175509306812e-06, "loss": 0.5341, "step": 2449 }, { "epoch": 0.11243173787343398, "grad_norm": 0.5001994371414185, "learning_rate": 9.992161791736945e-06, "loss": 0.3955, "step": 2450 }, { "epoch": 0.11247762837868845, "grad_norm": 0.5235245227813721, "learning_rate": 9.992148062162503e-06, "loss": 0.5332, "step": 2451 }, { "epoch": 0.11252351888394291, "grad_norm": 0.5011329054832458, "learning_rate": 9.99213432058352e-06, "loss": 0.4945, "step": 2452 }, { "epoch": 0.11256940938919738, "grad_norm": 0.529055655002594, "learning_rate": 9.99212056700003e-06, "loss": 0.5399, "step": 2453 }, { "epoch": 0.11261529989445183, "grad_norm": 0.4809805452823639, "learning_rate": 9.992106801412065e-06, "loss": 0.3989, "step": 2454 }, { "epoch": 0.1126611903997063, "grad_norm": 0.4782566428184509, "learning_rate": 9.99209302381966e-06, "loss": 0.3819, "step": 2455 }, { "epoch": 0.11270708090496076, "grad_norm": 0.49292415380477905, "learning_rate": 9.992079234222846e-06, "loss": 0.4182, "step": 2456 }, { "epoch": 0.11275297141021523, "grad_norm": 0.45130375027656555, "learning_rate": 9.992065432621657e-06, "loss": 0.3879, "step": 2457 }, { "epoch": 0.11279886191546969, "grad_norm": 0.46563634276390076, "learning_rate": 9.992051619016126e-06, "loss": 0.4124, "step": 2458 }, { "epoch": 0.11284475242072416, "grad_norm": 0.43359073996543884, "learning_rate": 9.992037793406287e-06, "loss": 0.3345, "step": 2459 }, { "epoch": 0.11289064292597861, "grad_norm": 0.5066606402397156, "learning_rate": 9.992023955792172e-06, "loss": 0.4848, "step": 2460 }, { "epoch": 0.11293653343123308, "grad_norm": 0.4598350524902344, "learning_rate": 9.992010106173816e-06, "loss": 0.3666, "step": 2461 }, { "epoch": 0.11298242393648754, "grad_norm": 0.46526703238487244, "learning_rate": 9.99199624455125e-06, "loss": 0.4273, "step": 2462 }, { "epoch": 0.11302831444174201, "grad_norm": 0.4622974693775177, "learning_rate": 9.991982370924509e-06, "loss": 0.3942, "step": 2463 }, { "epoch": 0.11307420494699646, "grad_norm": 0.450998991727829, "learning_rate": 9.991968485293627e-06, "loss": 0.3573, "step": 2464 }, { "epoch": 0.11312009545225093, "grad_norm": 0.447102814912796, "learning_rate": 9.991954587658634e-06, "loss": 0.3541, "step": 2465 }, { "epoch": 0.11316598595750539, "grad_norm": 0.4572606086730957, "learning_rate": 9.991940678019569e-06, "loss": 0.3263, "step": 2466 }, { "epoch": 0.11321187646275986, "grad_norm": 0.5254818201065063, "learning_rate": 9.99192675637646e-06, "loss": 0.479, "step": 2467 }, { "epoch": 0.11325776696801432, "grad_norm": 0.4464569389820099, "learning_rate": 9.991912822729342e-06, "loss": 0.3968, "step": 2468 }, { "epoch": 0.11330365747326879, "grad_norm": 0.48612359166145325, "learning_rate": 9.99189887707825e-06, "loss": 0.3998, "step": 2469 }, { "epoch": 0.11334954797852324, "grad_norm": 0.49350661039352417, "learning_rate": 9.991884919423217e-06, "loss": 0.4847, "step": 2470 }, { "epoch": 0.1133954384837777, "grad_norm": 0.470751017332077, "learning_rate": 9.991870949764276e-06, "loss": 0.4253, "step": 2471 }, { "epoch": 0.11344132898903217, "grad_norm": 0.4237576723098755, "learning_rate": 9.991856968101462e-06, "loss": 0.2862, "step": 2472 }, { "epoch": 0.11348721949428663, "grad_norm": 0.448932021856308, "learning_rate": 9.991842974434805e-06, "loss": 0.3259, "step": 2473 }, { "epoch": 0.1135331099995411, "grad_norm": 0.49851247668266296, "learning_rate": 9.991828968764343e-06, "loss": 0.4279, "step": 2474 }, { "epoch": 0.11357900050479555, "grad_norm": 0.48448216915130615, "learning_rate": 9.991814951090106e-06, "loss": 0.4553, "step": 2475 }, { "epoch": 0.11362489101005002, "grad_norm": 0.5331076383590698, "learning_rate": 9.991800921412132e-06, "loss": 0.4624, "step": 2476 }, { "epoch": 0.11367078151530448, "grad_norm": 0.47850388288497925, "learning_rate": 9.99178687973045e-06, "loss": 0.4941, "step": 2477 }, { "epoch": 0.11371667202055895, "grad_norm": 0.5628966689109802, "learning_rate": 9.991772826045097e-06, "loss": 0.526, "step": 2478 }, { "epoch": 0.1137625625258134, "grad_norm": 0.44776520133018494, "learning_rate": 9.991758760356105e-06, "loss": 0.368, "step": 2479 }, { "epoch": 0.11380845303106787, "grad_norm": 0.4841279983520508, "learning_rate": 9.99174468266351e-06, "loss": 0.4209, "step": 2480 }, { "epoch": 0.11385434353632233, "grad_norm": 0.4831261932849884, "learning_rate": 9.991730592967344e-06, "loss": 0.4023, "step": 2481 }, { "epoch": 0.1139002340415768, "grad_norm": 0.47709277272224426, "learning_rate": 9.99171649126764e-06, "loss": 0.441, "step": 2482 }, { "epoch": 0.11394612454683126, "grad_norm": 0.5035306811332703, "learning_rate": 9.991702377564435e-06, "loss": 0.543, "step": 2483 }, { "epoch": 0.11399201505208573, "grad_norm": 0.45184990763664246, "learning_rate": 9.991688251857761e-06, "loss": 0.3407, "step": 2484 }, { "epoch": 0.11403790555734018, "grad_norm": 0.4857518970966339, "learning_rate": 9.991674114147651e-06, "loss": 0.432, "step": 2485 }, { "epoch": 0.11408379606259465, "grad_norm": 0.45240625739097595, "learning_rate": 9.991659964434142e-06, "loss": 0.4001, "step": 2486 }, { "epoch": 0.11412968656784911, "grad_norm": 0.5007497668266296, "learning_rate": 9.991645802717265e-06, "loss": 0.4381, "step": 2487 }, { "epoch": 0.11417557707310358, "grad_norm": 0.4801659882068634, "learning_rate": 9.991631628997055e-06, "loss": 0.3593, "step": 2488 }, { "epoch": 0.11422146757835804, "grad_norm": 0.4322071969509125, "learning_rate": 9.991617443273547e-06, "loss": 0.3531, "step": 2489 }, { "epoch": 0.1142673580836125, "grad_norm": 0.49512311816215515, "learning_rate": 9.991603245546775e-06, "loss": 0.4317, "step": 2490 }, { "epoch": 0.11431324858886696, "grad_norm": 0.4706094563007355, "learning_rate": 9.99158903581677e-06, "loss": 0.4298, "step": 2491 }, { "epoch": 0.11435913909412143, "grad_norm": 0.49296361207962036, "learning_rate": 9.991574814083572e-06, "loss": 0.5042, "step": 2492 }, { "epoch": 0.11440502959937589, "grad_norm": 0.46887606382369995, "learning_rate": 9.99156058034721e-06, "loss": 0.3861, "step": 2493 }, { "epoch": 0.11445092010463036, "grad_norm": 0.529515266418457, "learning_rate": 9.991546334607721e-06, "loss": 0.4387, "step": 2494 }, { "epoch": 0.11449681060988481, "grad_norm": 0.49518662691116333, "learning_rate": 9.991532076865138e-06, "loss": 0.4588, "step": 2495 }, { "epoch": 0.11454270111513928, "grad_norm": 0.6056028008460999, "learning_rate": 9.991517807119497e-06, "loss": 0.5964, "step": 2496 }, { "epoch": 0.11458859162039374, "grad_norm": 0.46793216466903687, "learning_rate": 9.99150352537083e-06, "loss": 0.383, "step": 2497 }, { "epoch": 0.11463448212564821, "grad_norm": 0.47859108448028564, "learning_rate": 9.991489231619171e-06, "loss": 0.4231, "step": 2498 }, { "epoch": 0.11468037263090267, "grad_norm": 0.5051490664482117, "learning_rate": 9.991474925864557e-06, "loss": 0.4568, "step": 2499 }, { "epoch": 0.11472626313615712, "grad_norm": 0.4955484867095947, "learning_rate": 9.991460608107023e-06, "loss": 0.4417, "step": 2500 }, { "epoch": 0.11477215364141159, "grad_norm": 0.49946221709251404, "learning_rate": 9.9914462783466e-06, "loss": 0.3824, "step": 2501 }, { "epoch": 0.11481804414666605, "grad_norm": 0.46977555751800537, "learning_rate": 9.991431936583322e-06, "loss": 0.3355, "step": 2502 }, { "epoch": 0.11486393465192052, "grad_norm": 0.5135558247566223, "learning_rate": 9.991417582817229e-06, "loss": 0.4024, "step": 2503 }, { "epoch": 0.11490982515717497, "grad_norm": 0.4645470082759857, "learning_rate": 9.99140321704835e-06, "loss": 0.4053, "step": 2504 }, { "epoch": 0.11495571566242944, "grad_norm": 0.4802762567996979, "learning_rate": 9.991388839276722e-06, "loss": 0.3575, "step": 2505 }, { "epoch": 0.1150016061676839, "grad_norm": 0.4936521351337433, "learning_rate": 9.991374449502378e-06, "loss": 0.4517, "step": 2506 }, { "epoch": 0.11504749667293837, "grad_norm": 0.48894599080085754, "learning_rate": 9.991360047725356e-06, "loss": 0.404, "step": 2507 }, { "epoch": 0.11509338717819283, "grad_norm": 0.4959702491760254, "learning_rate": 9.991345633945689e-06, "loss": 0.4437, "step": 2508 }, { "epoch": 0.1151392776834473, "grad_norm": 0.4830079972743988, "learning_rate": 9.991331208163409e-06, "loss": 0.3637, "step": 2509 }, { "epoch": 0.11518516818870175, "grad_norm": 0.5489052534103394, "learning_rate": 9.991316770378552e-06, "loss": 0.5557, "step": 2510 }, { "epoch": 0.11523105869395622, "grad_norm": 0.5327038764953613, "learning_rate": 9.991302320591155e-06, "loss": 0.4937, "step": 2511 }, { "epoch": 0.11527694919921068, "grad_norm": 0.5096604228019714, "learning_rate": 9.99128785880125e-06, "loss": 0.4203, "step": 2512 }, { "epoch": 0.11532283970446515, "grad_norm": 0.483519583940506, "learning_rate": 9.991273385008875e-06, "loss": 0.3665, "step": 2513 }, { "epoch": 0.1153687302097196, "grad_norm": 0.4577787518501282, "learning_rate": 9.991258899214062e-06, "loss": 0.3305, "step": 2514 }, { "epoch": 0.11541462071497408, "grad_norm": 0.44032642245292664, "learning_rate": 9.991244401416846e-06, "loss": 0.349, "step": 2515 }, { "epoch": 0.11546051122022853, "grad_norm": 0.46372753381729126, "learning_rate": 9.991229891617263e-06, "loss": 0.3502, "step": 2516 }, { "epoch": 0.115506401725483, "grad_norm": 0.5140289068222046, "learning_rate": 9.991215369815347e-06, "loss": 0.4978, "step": 2517 }, { "epoch": 0.11555229223073746, "grad_norm": 0.4673903286457062, "learning_rate": 9.991200836011133e-06, "loss": 0.3771, "step": 2518 }, { "epoch": 0.11559818273599193, "grad_norm": 0.5397522449493408, "learning_rate": 9.991186290204657e-06, "loss": 0.4595, "step": 2519 }, { "epoch": 0.11564407324124638, "grad_norm": 0.5108731985092163, "learning_rate": 9.991171732395953e-06, "loss": 0.4213, "step": 2520 }, { "epoch": 0.11568996374650085, "grad_norm": 0.5284456014633179, "learning_rate": 9.991157162585056e-06, "loss": 0.4834, "step": 2521 }, { "epoch": 0.11573585425175531, "grad_norm": 0.5100253820419312, "learning_rate": 9.991142580772003e-06, "loss": 0.5323, "step": 2522 }, { "epoch": 0.11578174475700978, "grad_norm": 0.6847053170204163, "learning_rate": 9.991127986956826e-06, "loss": 0.3686, "step": 2523 }, { "epoch": 0.11582763526226424, "grad_norm": 0.493842750787735, "learning_rate": 9.991113381139561e-06, "loss": 0.3763, "step": 2524 }, { "epoch": 0.1158735257675187, "grad_norm": 0.4907994568347931, "learning_rate": 9.991098763320243e-06, "loss": 0.4254, "step": 2525 }, { "epoch": 0.11591941627277316, "grad_norm": 0.4895491302013397, "learning_rate": 9.991084133498909e-06, "loss": 0.4286, "step": 2526 }, { "epoch": 0.11596530677802763, "grad_norm": 0.4673796594142914, "learning_rate": 9.991069491675592e-06, "loss": 0.4089, "step": 2527 }, { "epoch": 0.11601119728328209, "grad_norm": 0.4665570855140686, "learning_rate": 9.99105483785033e-06, "loss": 0.3868, "step": 2528 }, { "epoch": 0.11605708778853656, "grad_norm": 0.4687078893184662, "learning_rate": 9.991040172023155e-06, "loss": 0.3428, "step": 2529 }, { "epoch": 0.11610297829379101, "grad_norm": 0.588813304901123, "learning_rate": 9.991025494194102e-06, "loss": 0.5647, "step": 2530 }, { "epoch": 0.11614886879904547, "grad_norm": 0.500828742980957, "learning_rate": 9.99101080436321e-06, "loss": 0.425, "step": 2531 }, { "epoch": 0.11619475930429994, "grad_norm": 0.540797233581543, "learning_rate": 9.990996102530511e-06, "loss": 0.4804, "step": 2532 }, { "epoch": 0.1162406498095544, "grad_norm": 0.45883938670158386, "learning_rate": 9.990981388696043e-06, "loss": 0.4273, "step": 2533 }, { "epoch": 0.11628654031480887, "grad_norm": 0.5134001970291138, "learning_rate": 9.99096666285984e-06, "loss": 0.4881, "step": 2534 }, { "epoch": 0.11633243082006332, "grad_norm": 0.5219642519950867, "learning_rate": 9.990951925021936e-06, "loss": 0.4852, "step": 2535 }, { "epoch": 0.1163783213253178, "grad_norm": 0.49288153648376465, "learning_rate": 9.990937175182367e-06, "loss": 0.4311, "step": 2536 }, { "epoch": 0.11642421183057225, "grad_norm": 0.5010160803794861, "learning_rate": 9.990922413341172e-06, "loss": 0.4391, "step": 2537 }, { "epoch": 0.11647010233582672, "grad_norm": 0.5057856440544128, "learning_rate": 9.990907639498381e-06, "loss": 0.3806, "step": 2538 }, { "epoch": 0.11651599284108118, "grad_norm": 0.5091994404792786, "learning_rate": 9.990892853654035e-06, "loss": 0.3987, "step": 2539 }, { "epoch": 0.11656188334633565, "grad_norm": 0.49505552649497986, "learning_rate": 9.990878055808166e-06, "loss": 0.4096, "step": 2540 }, { "epoch": 0.1166077738515901, "grad_norm": 0.5190210342407227, "learning_rate": 9.990863245960809e-06, "loss": 0.4279, "step": 2541 }, { "epoch": 0.11665366435684457, "grad_norm": 0.5601308345794678, "learning_rate": 9.990848424112003e-06, "loss": 0.4448, "step": 2542 }, { "epoch": 0.11669955486209903, "grad_norm": 0.4967178404331207, "learning_rate": 9.99083359026178e-06, "loss": 0.3949, "step": 2543 }, { "epoch": 0.1167454453673535, "grad_norm": 0.4323314428329468, "learning_rate": 9.990818744410178e-06, "loss": 0.3785, "step": 2544 }, { "epoch": 0.11679133587260795, "grad_norm": 0.47798097133636475, "learning_rate": 9.990803886557232e-06, "loss": 0.4007, "step": 2545 }, { "epoch": 0.11683722637786242, "grad_norm": 0.43997934460639954, "learning_rate": 9.990789016702979e-06, "loss": 0.3193, "step": 2546 }, { "epoch": 0.11688311688311688, "grad_norm": 0.44810405373573303, "learning_rate": 9.990774134847452e-06, "loss": 0.3171, "step": 2547 }, { "epoch": 0.11692900738837135, "grad_norm": 0.5047373175621033, "learning_rate": 9.99075924099069e-06, "loss": 0.4914, "step": 2548 }, { "epoch": 0.1169748978936258, "grad_norm": 0.4475233852863312, "learning_rate": 9.990744335132725e-06, "loss": 0.3559, "step": 2549 }, { "epoch": 0.11702078839888028, "grad_norm": 0.4465727210044861, "learning_rate": 9.990729417273597e-06, "loss": 0.3767, "step": 2550 }, { "epoch": 0.11706667890413473, "grad_norm": 0.4620459973812103, "learning_rate": 9.990714487413338e-06, "loss": 0.3695, "step": 2551 }, { "epoch": 0.1171125694093892, "grad_norm": 0.5415677428245544, "learning_rate": 9.990699545551988e-06, "loss": 0.505, "step": 2552 }, { "epoch": 0.11715845991464366, "grad_norm": 0.4978041648864746, "learning_rate": 9.990684591689578e-06, "loss": 0.4121, "step": 2553 }, { "epoch": 0.11720435041989813, "grad_norm": 0.4712114632129669, "learning_rate": 9.990669625826149e-06, "loss": 0.392, "step": 2554 }, { "epoch": 0.11725024092515259, "grad_norm": 0.5263304114341736, "learning_rate": 9.990654647961732e-06, "loss": 0.4467, "step": 2555 }, { "epoch": 0.11729613143040706, "grad_norm": 0.46179741621017456, "learning_rate": 9.990639658096369e-06, "loss": 0.3736, "step": 2556 }, { "epoch": 0.11734202193566151, "grad_norm": 0.46357646584510803, "learning_rate": 9.99062465623009e-06, "loss": 0.3265, "step": 2557 }, { "epoch": 0.11738791244091598, "grad_norm": 0.5010532140731812, "learning_rate": 9.990609642362936e-06, "loss": 0.4042, "step": 2558 }, { "epoch": 0.11743380294617044, "grad_norm": 0.44892188906669617, "learning_rate": 9.99059461649494e-06, "loss": 0.3619, "step": 2559 }, { "epoch": 0.1174796934514249, "grad_norm": 0.5023994445800781, "learning_rate": 9.990579578626138e-06, "loss": 0.4238, "step": 2560 }, { "epoch": 0.11752558395667936, "grad_norm": 0.47415891289711, "learning_rate": 9.990564528756567e-06, "loss": 0.419, "step": 2561 }, { "epoch": 0.11757147446193382, "grad_norm": 0.4921308755874634, "learning_rate": 9.990549466886266e-06, "loss": 0.4009, "step": 2562 }, { "epoch": 0.11761736496718829, "grad_norm": 0.4659929871559143, "learning_rate": 9.990534393015266e-06, "loss": 0.3781, "step": 2563 }, { "epoch": 0.11766325547244275, "grad_norm": 0.4787804186344147, "learning_rate": 9.990519307143607e-06, "loss": 0.4515, "step": 2564 }, { "epoch": 0.11770914597769722, "grad_norm": 0.4983351230621338, "learning_rate": 9.990504209271324e-06, "loss": 0.4243, "step": 2565 }, { "epoch": 0.11775503648295167, "grad_norm": 0.4443513751029968, "learning_rate": 9.990489099398454e-06, "loss": 0.3249, "step": 2566 }, { "epoch": 0.11780092698820614, "grad_norm": 0.532058835029602, "learning_rate": 9.990473977525032e-06, "loss": 0.4121, "step": 2567 }, { "epoch": 0.1178468174934606, "grad_norm": 0.5136082768440247, "learning_rate": 9.990458843651096e-06, "loss": 0.4726, "step": 2568 }, { "epoch": 0.11789270799871507, "grad_norm": 0.4879734516143799, "learning_rate": 9.99044369777668e-06, "loss": 0.4131, "step": 2569 }, { "epoch": 0.11793859850396952, "grad_norm": 0.5160858035087585, "learning_rate": 9.990428539901821e-06, "loss": 0.4291, "step": 2570 }, { "epoch": 0.117984489009224, "grad_norm": 0.49275532364845276, "learning_rate": 9.990413370026559e-06, "loss": 0.3801, "step": 2571 }, { "epoch": 0.11803037951447845, "grad_norm": 0.4487593173980713, "learning_rate": 9.990398188150927e-06, "loss": 0.3677, "step": 2572 }, { "epoch": 0.11807627001973292, "grad_norm": 0.453101247549057, "learning_rate": 9.990382994274964e-06, "loss": 0.3956, "step": 2573 }, { "epoch": 0.11812216052498738, "grad_norm": 0.44491493701934814, "learning_rate": 9.990367788398702e-06, "loss": 0.3494, "step": 2574 }, { "epoch": 0.11816805103024185, "grad_norm": 0.4446192681789398, "learning_rate": 9.990352570522181e-06, "loss": 0.3465, "step": 2575 }, { "epoch": 0.1182139415354963, "grad_norm": 0.4731144905090332, "learning_rate": 9.990337340645438e-06, "loss": 0.3828, "step": 2576 }, { "epoch": 0.11825983204075077, "grad_norm": 0.5044993758201599, "learning_rate": 9.990322098768509e-06, "loss": 0.4299, "step": 2577 }, { "epoch": 0.11830572254600523, "grad_norm": 0.48940330743789673, "learning_rate": 9.990306844891429e-06, "loss": 0.3868, "step": 2578 }, { "epoch": 0.1183516130512597, "grad_norm": 0.4931536018848419, "learning_rate": 9.990291579014237e-06, "loss": 0.4411, "step": 2579 }, { "epoch": 0.11839750355651416, "grad_norm": 0.48019281029701233, "learning_rate": 9.99027630113697e-06, "loss": 0.3771, "step": 2580 }, { "epoch": 0.11844339406176863, "grad_norm": 0.4962136447429657, "learning_rate": 9.990261011259661e-06, "loss": 0.406, "step": 2581 }, { "epoch": 0.11848928456702308, "grad_norm": 0.47679248452186584, "learning_rate": 9.990245709382351e-06, "loss": 0.4009, "step": 2582 }, { "epoch": 0.11853517507227755, "grad_norm": 0.4609931409358978, "learning_rate": 9.990230395505074e-06, "loss": 0.3559, "step": 2583 }, { "epoch": 0.11858106557753201, "grad_norm": 0.4899750053882599, "learning_rate": 9.990215069627869e-06, "loss": 0.4558, "step": 2584 }, { "epoch": 0.11862695608278648, "grad_norm": 0.5094980001449585, "learning_rate": 9.990199731750769e-06, "loss": 0.5164, "step": 2585 }, { "epoch": 0.11867284658804093, "grad_norm": 0.48061394691467285, "learning_rate": 9.990184381873816e-06, "loss": 0.3473, "step": 2586 }, { "epoch": 0.1187187370932954, "grad_norm": 0.44875308871269226, "learning_rate": 9.990169019997045e-06, "loss": 0.3931, "step": 2587 }, { "epoch": 0.11876462759854986, "grad_norm": 0.4578744173049927, "learning_rate": 9.99015364612049e-06, "loss": 0.4115, "step": 2588 }, { "epoch": 0.11881051810380432, "grad_norm": 0.4992080628871918, "learning_rate": 9.990138260244193e-06, "loss": 0.4382, "step": 2589 }, { "epoch": 0.11885640860905879, "grad_norm": 0.4635515809059143, "learning_rate": 9.990122862368187e-06, "loss": 0.3502, "step": 2590 }, { "epoch": 0.11890229911431324, "grad_norm": 0.45318344235420227, "learning_rate": 9.99010745249251e-06, "loss": 0.3755, "step": 2591 }, { "epoch": 0.11894818961956771, "grad_norm": 0.4907990097999573, "learning_rate": 9.990092030617201e-06, "loss": 0.4112, "step": 2592 }, { "epoch": 0.11899408012482217, "grad_norm": 0.527769923210144, "learning_rate": 9.990076596742294e-06, "loss": 0.5634, "step": 2593 }, { "epoch": 0.11903997063007664, "grad_norm": 0.40479812026023865, "learning_rate": 9.99006115086783e-06, "loss": 0.309, "step": 2594 }, { "epoch": 0.1190858611353311, "grad_norm": 0.4663656949996948, "learning_rate": 9.990045692993842e-06, "loss": 0.4433, "step": 2595 }, { "epoch": 0.11913175164058556, "grad_norm": 0.46576741337776184, "learning_rate": 9.990030223120369e-06, "loss": 0.3417, "step": 2596 }, { "epoch": 0.11917764214584002, "grad_norm": 0.49073708057403564, "learning_rate": 9.990014741247449e-06, "loss": 0.4298, "step": 2597 }, { "epoch": 0.11922353265109449, "grad_norm": 0.49052125215530396, "learning_rate": 9.989999247375117e-06, "loss": 0.4487, "step": 2598 }, { "epoch": 0.11926942315634895, "grad_norm": 0.4916822910308838, "learning_rate": 9.989983741503412e-06, "loss": 0.4126, "step": 2599 }, { "epoch": 0.11931531366160342, "grad_norm": 0.46278244256973267, "learning_rate": 9.989968223632373e-06, "loss": 0.3623, "step": 2600 }, { "epoch": 0.11936120416685787, "grad_norm": 0.509976863861084, "learning_rate": 9.989952693762033e-06, "loss": 0.4513, "step": 2601 }, { "epoch": 0.11940709467211234, "grad_norm": 0.5073480010032654, "learning_rate": 9.989937151892434e-06, "loss": 0.477, "step": 2602 }, { "epoch": 0.1194529851773668, "grad_norm": 0.5486426949501038, "learning_rate": 9.989921598023609e-06, "loss": 0.4428, "step": 2603 }, { "epoch": 0.11949887568262127, "grad_norm": 0.4678741991519928, "learning_rate": 9.989906032155599e-06, "loss": 0.4013, "step": 2604 }, { "epoch": 0.11954476618787573, "grad_norm": 0.5263512134552002, "learning_rate": 9.989890454288439e-06, "loss": 0.5139, "step": 2605 }, { "epoch": 0.1195906566931302, "grad_norm": 0.4683694839477539, "learning_rate": 9.989874864422167e-06, "loss": 0.4018, "step": 2606 }, { "epoch": 0.11963654719838465, "grad_norm": 0.45887887477874756, "learning_rate": 9.98985926255682e-06, "loss": 0.4354, "step": 2607 }, { "epoch": 0.11968243770363912, "grad_norm": 0.6490362882614136, "learning_rate": 9.989843648692437e-06, "loss": 0.5797, "step": 2608 }, { "epoch": 0.11972832820889358, "grad_norm": 0.4808391034603119, "learning_rate": 9.989828022829056e-06, "loss": 0.3606, "step": 2609 }, { "epoch": 0.11977421871414805, "grad_norm": 0.46937552094459534, "learning_rate": 9.989812384966713e-06, "loss": 0.4311, "step": 2610 }, { "epoch": 0.1198201092194025, "grad_norm": 0.5063654184341431, "learning_rate": 9.989796735105446e-06, "loss": 0.4181, "step": 2611 }, { "epoch": 0.11986599972465697, "grad_norm": 0.4380093216896057, "learning_rate": 9.989781073245292e-06, "loss": 0.3394, "step": 2612 }, { "epoch": 0.11991189022991143, "grad_norm": 0.4629599452018738, "learning_rate": 9.98976539938629e-06, "loss": 0.3732, "step": 2613 }, { "epoch": 0.1199577807351659, "grad_norm": 0.45319634675979614, "learning_rate": 9.989749713528475e-06, "loss": 0.3796, "step": 2614 }, { "epoch": 0.12000367124042036, "grad_norm": 0.47024860978126526, "learning_rate": 9.98973401567189e-06, "loss": 0.3873, "step": 2615 }, { "epoch": 0.12004956174567483, "grad_norm": 0.47099870443344116, "learning_rate": 9.989718305816568e-06, "loss": 0.3683, "step": 2616 }, { "epoch": 0.12009545225092928, "grad_norm": 0.5088024139404297, "learning_rate": 9.989702583962548e-06, "loss": 0.4209, "step": 2617 }, { "epoch": 0.12014134275618374, "grad_norm": 0.44578009843826294, "learning_rate": 9.98968685010987e-06, "loss": 0.3672, "step": 2618 }, { "epoch": 0.12018723326143821, "grad_norm": 0.48774299025535583, "learning_rate": 9.989671104258567e-06, "loss": 0.4517, "step": 2619 }, { "epoch": 0.12023312376669266, "grad_norm": 0.4851672649383545, "learning_rate": 9.989655346408681e-06, "loss": 0.4784, "step": 2620 }, { "epoch": 0.12027901427194714, "grad_norm": 0.46987080574035645, "learning_rate": 9.989639576560248e-06, "loss": 0.3549, "step": 2621 }, { "epoch": 0.12032490477720159, "grad_norm": 0.4889931380748749, "learning_rate": 9.989623794713307e-06, "loss": 0.4615, "step": 2622 }, { "epoch": 0.12037079528245606, "grad_norm": 0.4562816023826599, "learning_rate": 9.989608000867896e-06, "loss": 0.3668, "step": 2623 }, { "epoch": 0.12041668578771052, "grad_norm": 0.5622779726982117, "learning_rate": 9.989592195024054e-06, "loss": 0.5258, "step": 2624 }, { "epoch": 0.12046257629296499, "grad_norm": 0.4577435255050659, "learning_rate": 9.989576377181816e-06, "loss": 0.3997, "step": 2625 }, { "epoch": 0.12050846679821944, "grad_norm": 0.47942185401916504, "learning_rate": 9.98956054734122e-06, "loss": 0.4302, "step": 2626 }, { "epoch": 0.12055435730347391, "grad_norm": 0.4641333222389221, "learning_rate": 9.989544705502307e-06, "loss": 0.39, "step": 2627 }, { "epoch": 0.12060024780872837, "grad_norm": 0.4787479639053345, "learning_rate": 9.989528851665114e-06, "loss": 0.3977, "step": 2628 }, { "epoch": 0.12064613831398284, "grad_norm": 0.47346392273902893, "learning_rate": 9.98951298582968e-06, "loss": 0.4281, "step": 2629 }, { "epoch": 0.1206920288192373, "grad_norm": 0.4877626299858093, "learning_rate": 9.98949710799604e-06, "loss": 0.4898, "step": 2630 }, { "epoch": 0.12073791932449177, "grad_norm": 0.4503246247768402, "learning_rate": 9.989481218164235e-06, "loss": 0.3747, "step": 2631 }, { "epoch": 0.12078380982974622, "grad_norm": 0.4832894206047058, "learning_rate": 9.989465316334303e-06, "loss": 0.4471, "step": 2632 }, { "epoch": 0.12082970033500069, "grad_norm": 0.465067595243454, "learning_rate": 9.98944940250628e-06, "loss": 0.3676, "step": 2633 }, { "epoch": 0.12087559084025515, "grad_norm": 0.43929052352905273, "learning_rate": 9.989433476680207e-06, "loss": 0.3595, "step": 2634 }, { "epoch": 0.12092148134550962, "grad_norm": 0.5205087661743164, "learning_rate": 9.989417538856121e-06, "loss": 0.478, "step": 2635 }, { "epoch": 0.12096737185076407, "grad_norm": 0.496156245470047, "learning_rate": 9.98940158903406e-06, "loss": 0.4612, "step": 2636 }, { "epoch": 0.12101326235601854, "grad_norm": 0.47536590695381165, "learning_rate": 9.989385627214064e-06, "loss": 0.3773, "step": 2637 }, { "epoch": 0.121059152861273, "grad_norm": 0.4678027629852295, "learning_rate": 9.98936965339617e-06, "loss": 0.423, "step": 2638 }, { "epoch": 0.12110504336652747, "grad_norm": 0.5094507932662964, "learning_rate": 9.989353667580418e-06, "loss": 0.4631, "step": 2639 }, { "epoch": 0.12115093387178193, "grad_norm": 0.4905732572078705, "learning_rate": 9.989337669766841e-06, "loss": 0.3958, "step": 2640 }, { "epoch": 0.1211968243770364, "grad_norm": 0.45689842104911804, "learning_rate": 9.989321659955484e-06, "loss": 0.3389, "step": 2641 }, { "epoch": 0.12124271488229085, "grad_norm": 0.46184709668159485, "learning_rate": 9.989305638146384e-06, "loss": 0.4432, "step": 2642 }, { "epoch": 0.12128860538754532, "grad_norm": 0.48346012830734253, "learning_rate": 9.989289604339577e-06, "loss": 0.4496, "step": 2643 }, { "epoch": 0.12133449589279978, "grad_norm": 0.4688853919506073, "learning_rate": 9.989273558535105e-06, "loss": 0.4094, "step": 2644 }, { "epoch": 0.12138038639805425, "grad_norm": 0.49808549880981445, "learning_rate": 9.989257500733003e-06, "loss": 0.4532, "step": 2645 }, { "epoch": 0.1214262769033087, "grad_norm": 0.43277788162231445, "learning_rate": 9.989241430933312e-06, "loss": 0.3305, "step": 2646 }, { "epoch": 0.12147216740856316, "grad_norm": 0.4733690619468689, "learning_rate": 9.98922534913607e-06, "loss": 0.4502, "step": 2647 }, { "epoch": 0.12151805791381763, "grad_norm": 0.4406627118587494, "learning_rate": 9.989209255341316e-06, "loss": 0.3209, "step": 2648 }, { "epoch": 0.12156394841907209, "grad_norm": 0.4449348747730255, "learning_rate": 9.989193149549088e-06, "loss": 0.3765, "step": 2649 }, { "epoch": 0.12160983892432656, "grad_norm": 0.4798898994922638, "learning_rate": 9.989177031759425e-06, "loss": 0.4042, "step": 2650 }, { "epoch": 0.12165572942958101, "grad_norm": 0.4864308834075928, "learning_rate": 9.989160901972366e-06, "loss": 0.3935, "step": 2651 }, { "epoch": 0.12170161993483548, "grad_norm": 0.46155428886413574, "learning_rate": 9.98914476018795e-06, "loss": 0.2942, "step": 2652 }, { "epoch": 0.12174751044008994, "grad_norm": 0.4384247958660126, "learning_rate": 9.989128606406215e-06, "loss": 0.3186, "step": 2653 }, { "epoch": 0.12179340094534441, "grad_norm": 0.45776841044425964, "learning_rate": 9.989112440627202e-06, "loss": 0.3595, "step": 2654 }, { "epoch": 0.12183929145059887, "grad_norm": 0.5123647451400757, "learning_rate": 9.989096262850945e-06, "loss": 0.4416, "step": 2655 }, { "epoch": 0.12188518195585334, "grad_norm": 0.42523425817489624, "learning_rate": 9.989080073077489e-06, "loss": 0.2779, "step": 2656 }, { "epoch": 0.12193107246110779, "grad_norm": 0.46643584966659546, "learning_rate": 9.98906387130687e-06, "loss": 0.3834, "step": 2657 }, { "epoch": 0.12197696296636226, "grad_norm": 0.44290754199028015, "learning_rate": 9.989047657539125e-06, "loss": 0.3423, "step": 2658 }, { "epoch": 0.12202285347161672, "grad_norm": 0.4556252360343933, "learning_rate": 9.989031431774295e-06, "loss": 0.3335, "step": 2659 }, { "epoch": 0.12206874397687119, "grad_norm": 0.5164615511894226, "learning_rate": 9.98901519401242e-06, "loss": 0.4986, "step": 2660 }, { "epoch": 0.12211463448212564, "grad_norm": 0.4679234027862549, "learning_rate": 9.988998944253539e-06, "loss": 0.4101, "step": 2661 }, { "epoch": 0.12216052498738011, "grad_norm": 0.522869348526001, "learning_rate": 9.988982682497688e-06, "loss": 0.4316, "step": 2662 }, { "epoch": 0.12220641549263457, "grad_norm": 0.48769786953926086, "learning_rate": 9.98896640874491e-06, "loss": 0.4261, "step": 2663 }, { "epoch": 0.12225230599788904, "grad_norm": 0.4540833532810211, "learning_rate": 9.98895012299524e-06, "loss": 0.4039, "step": 2664 }, { "epoch": 0.1222981965031435, "grad_norm": 0.4306601881980896, "learning_rate": 9.98893382524872e-06, "loss": 0.335, "step": 2665 }, { "epoch": 0.12234408700839797, "grad_norm": 0.534954309463501, "learning_rate": 9.988917515505392e-06, "loss": 0.4612, "step": 2666 }, { "epoch": 0.12238997751365242, "grad_norm": 0.47996991872787476, "learning_rate": 9.988901193765288e-06, "loss": 0.4145, "step": 2667 }, { "epoch": 0.1224358680189069, "grad_norm": 0.4783177077770233, "learning_rate": 9.988884860028453e-06, "loss": 0.3981, "step": 2668 }, { "epoch": 0.12248175852416135, "grad_norm": 0.4888615906238556, "learning_rate": 9.988868514294925e-06, "loss": 0.4878, "step": 2669 }, { "epoch": 0.12252764902941582, "grad_norm": 0.5233306884765625, "learning_rate": 9.98885215656474e-06, "loss": 0.4703, "step": 2670 }, { "epoch": 0.12257353953467028, "grad_norm": 0.47517794370651245, "learning_rate": 9.988835786837945e-06, "loss": 0.3881, "step": 2671 }, { "epoch": 0.12261943003992475, "grad_norm": 0.4643119275569916, "learning_rate": 9.98881940511457e-06, "loss": 0.3692, "step": 2672 }, { "epoch": 0.1226653205451792, "grad_norm": 0.4417555630207062, "learning_rate": 9.988803011394661e-06, "loss": 0.3455, "step": 2673 }, { "epoch": 0.12271121105043367, "grad_norm": 0.47155094146728516, "learning_rate": 9.988786605678254e-06, "loss": 0.3854, "step": 2674 }, { "epoch": 0.12275710155568813, "grad_norm": 0.4446570873260498, "learning_rate": 9.988770187965391e-06, "loss": 0.369, "step": 2675 }, { "epoch": 0.1228029920609426, "grad_norm": 0.473347932100296, "learning_rate": 9.988753758256108e-06, "loss": 0.4111, "step": 2676 }, { "epoch": 0.12284888256619705, "grad_norm": 0.555674135684967, "learning_rate": 9.98873731655045e-06, "loss": 0.5086, "step": 2677 }, { "epoch": 0.12289477307145151, "grad_norm": 0.6599460244178772, "learning_rate": 9.988720862848451e-06, "loss": 0.4837, "step": 2678 }, { "epoch": 0.12294066357670598, "grad_norm": 0.5217191576957703, "learning_rate": 9.988704397150155e-06, "loss": 0.4923, "step": 2679 }, { "epoch": 0.12298655408196044, "grad_norm": 0.5135862827301025, "learning_rate": 9.988687919455597e-06, "loss": 0.4923, "step": 2680 }, { "epoch": 0.1230324445872149, "grad_norm": 0.5184143781661987, "learning_rate": 9.988671429764821e-06, "loss": 0.4831, "step": 2681 }, { "epoch": 0.12307833509246936, "grad_norm": 0.5583066940307617, "learning_rate": 9.988654928077862e-06, "loss": 0.4151, "step": 2682 }, { "epoch": 0.12312422559772383, "grad_norm": 0.4416000247001648, "learning_rate": 9.988638414394764e-06, "loss": 0.371, "step": 2683 }, { "epoch": 0.12317011610297829, "grad_norm": 0.5057492256164551, "learning_rate": 9.988621888715564e-06, "loss": 0.4177, "step": 2684 }, { "epoch": 0.12321600660823276, "grad_norm": 0.5240790247917175, "learning_rate": 9.988605351040304e-06, "loss": 0.4063, "step": 2685 }, { "epoch": 0.12326189711348721, "grad_norm": 0.4482614994049072, "learning_rate": 9.988588801369023e-06, "loss": 0.3951, "step": 2686 }, { "epoch": 0.12330778761874168, "grad_norm": 0.427959144115448, "learning_rate": 9.988572239701758e-06, "loss": 0.3258, "step": 2687 }, { "epoch": 0.12335367812399614, "grad_norm": 0.5692232847213745, "learning_rate": 9.988555666038556e-06, "loss": 0.5131, "step": 2688 }, { "epoch": 0.12339956862925061, "grad_norm": 0.4918581247329712, "learning_rate": 9.988539080379447e-06, "loss": 0.4351, "step": 2689 }, { "epoch": 0.12344545913450507, "grad_norm": 0.45957711338996887, "learning_rate": 9.98852248272448e-06, "loss": 0.4242, "step": 2690 }, { "epoch": 0.12349134963975954, "grad_norm": 0.480080783367157, "learning_rate": 9.988505873073686e-06, "loss": 0.3804, "step": 2691 }, { "epoch": 0.123537240145014, "grad_norm": 0.4867088198661804, "learning_rate": 9.988489251427113e-06, "loss": 0.4472, "step": 2692 }, { "epoch": 0.12358313065026846, "grad_norm": 0.49736061692237854, "learning_rate": 9.988472617784797e-06, "loss": 0.4333, "step": 2693 }, { "epoch": 0.12362902115552292, "grad_norm": 0.4782455265522003, "learning_rate": 9.98845597214678e-06, "loss": 0.37, "step": 2694 }, { "epoch": 0.12367491166077739, "grad_norm": 0.4965982437133789, "learning_rate": 9.9884393145131e-06, "loss": 0.4735, "step": 2695 }, { "epoch": 0.12372080216603185, "grad_norm": 0.4879864454269409, "learning_rate": 9.988422644883797e-06, "loss": 0.433, "step": 2696 }, { "epoch": 0.12376669267128632, "grad_norm": 0.4590333104133606, "learning_rate": 9.988405963258913e-06, "loss": 0.4145, "step": 2697 }, { "epoch": 0.12381258317654077, "grad_norm": 0.4210829436779022, "learning_rate": 9.988389269638487e-06, "loss": 0.2989, "step": 2698 }, { "epoch": 0.12385847368179524, "grad_norm": 0.4543023407459259, "learning_rate": 9.988372564022559e-06, "loss": 0.3524, "step": 2699 }, { "epoch": 0.1239043641870497, "grad_norm": 0.7114877104759216, "learning_rate": 9.988355846411169e-06, "loss": 0.5302, "step": 2700 }, { "epoch": 0.12395025469230417, "grad_norm": 0.4773704707622528, "learning_rate": 9.988339116804358e-06, "loss": 0.3911, "step": 2701 }, { "epoch": 0.12399614519755862, "grad_norm": 0.488718718290329, "learning_rate": 9.988322375202166e-06, "loss": 0.4934, "step": 2702 }, { "epoch": 0.1240420357028131, "grad_norm": 0.47661206126213074, "learning_rate": 9.988305621604631e-06, "loss": 0.4276, "step": 2703 }, { "epoch": 0.12408792620806755, "grad_norm": 0.48617932200431824, "learning_rate": 9.988288856011797e-06, "loss": 0.3923, "step": 2704 }, { "epoch": 0.12413381671332202, "grad_norm": 0.5129133462905884, "learning_rate": 9.988272078423703e-06, "loss": 0.4549, "step": 2705 }, { "epoch": 0.12417970721857648, "grad_norm": 0.484331876039505, "learning_rate": 9.988255288840389e-06, "loss": 0.4466, "step": 2706 }, { "epoch": 0.12422559772383093, "grad_norm": 0.464389443397522, "learning_rate": 9.988238487261894e-06, "loss": 0.3958, "step": 2707 }, { "epoch": 0.1242714882290854, "grad_norm": 0.5085384249687195, "learning_rate": 9.98822167368826e-06, "loss": 0.5681, "step": 2708 }, { "epoch": 0.12431737873433986, "grad_norm": 0.493242084980011, "learning_rate": 9.98820484811953e-06, "loss": 0.4812, "step": 2709 }, { "epoch": 0.12436326923959433, "grad_norm": 0.48836490511894226, "learning_rate": 9.988188010555739e-06, "loss": 0.4301, "step": 2710 }, { "epoch": 0.12440915974484879, "grad_norm": 0.41648826003074646, "learning_rate": 9.98817116099693e-06, "loss": 0.3083, "step": 2711 }, { "epoch": 0.12445505025010326, "grad_norm": 0.4782598614692688, "learning_rate": 9.988154299443145e-06, "loss": 0.4008, "step": 2712 }, { "epoch": 0.12450094075535771, "grad_norm": 0.4725387394428253, "learning_rate": 9.988137425894422e-06, "loss": 0.4191, "step": 2713 }, { "epoch": 0.12454683126061218, "grad_norm": 0.5006836652755737, "learning_rate": 9.988120540350804e-06, "loss": 0.4333, "step": 2714 }, { "epoch": 0.12459272176586664, "grad_norm": 0.4685843288898468, "learning_rate": 9.98810364281233e-06, "loss": 0.4105, "step": 2715 }, { "epoch": 0.12463861227112111, "grad_norm": 0.4412849545478821, "learning_rate": 9.988086733279042e-06, "loss": 0.3268, "step": 2716 }, { "epoch": 0.12468450277637556, "grad_norm": 0.579803466796875, "learning_rate": 9.988069811750978e-06, "loss": 0.564, "step": 2717 }, { "epoch": 0.12473039328163003, "grad_norm": 0.49471402168273926, "learning_rate": 9.98805287822818e-06, "loss": 0.4621, "step": 2718 }, { "epoch": 0.12477628378688449, "grad_norm": 0.4630710482597351, "learning_rate": 9.98803593271069e-06, "loss": 0.3895, "step": 2719 }, { "epoch": 0.12482217429213896, "grad_norm": 0.47090473771095276, "learning_rate": 9.988018975198549e-06, "loss": 0.3714, "step": 2720 }, { "epoch": 0.12486806479739342, "grad_norm": 0.46573033928871155, "learning_rate": 9.988002005691794e-06, "loss": 0.3511, "step": 2721 }, { "epoch": 0.12491395530264789, "grad_norm": 0.5408806800842285, "learning_rate": 9.98798502419047e-06, "loss": 0.4806, "step": 2722 }, { "epoch": 0.12495984580790234, "grad_norm": 0.44541439414024353, "learning_rate": 9.987968030694616e-06, "loss": 0.3738, "step": 2723 }, { "epoch": 0.1250057363131568, "grad_norm": 0.46675705909729004, "learning_rate": 9.987951025204274e-06, "loss": 0.4484, "step": 2724 }, { "epoch": 0.12505162681841128, "grad_norm": 0.47927436232566833, "learning_rate": 9.987934007719485e-06, "loss": 0.4382, "step": 2725 }, { "epoch": 0.12509751732366572, "grad_norm": 0.46207964420318604, "learning_rate": 9.987916978240286e-06, "loss": 0.4188, "step": 2726 }, { "epoch": 0.1251434078289202, "grad_norm": 0.4308772385120392, "learning_rate": 9.987899936766724e-06, "loss": 0.3191, "step": 2727 }, { "epoch": 0.12518929833417466, "grad_norm": 0.45977357029914856, "learning_rate": 9.987882883298834e-06, "loss": 0.3623, "step": 2728 }, { "epoch": 0.12523518883942913, "grad_norm": 0.44675368070602417, "learning_rate": 9.987865817836659e-06, "loss": 0.3188, "step": 2729 }, { "epoch": 0.12528107934468358, "grad_norm": 0.508658230304718, "learning_rate": 9.987848740380243e-06, "loss": 0.4487, "step": 2730 }, { "epoch": 0.12532696984993805, "grad_norm": 0.5319099426269531, "learning_rate": 9.987831650929625e-06, "loss": 0.4188, "step": 2731 }, { "epoch": 0.12537286035519252, "grad_norm": 0.5304564237594604, "learning_rate": 9.987814549484846e-06, "loss": 0.5167, "step": 2732 }, { "epoch": 0.125418750860447, "grad_norm": 0.5082815289497375, "learning_rate": 9.987797436045949e-06, "loss": 0.504, "step": 2733 }, { "epoch": 0.12546464136570143, "grad_norm": 0.5002908110618591, "learning_rate": 9.987780310612972e-06, "loss": 0.4142, "step": 2734 }, { "epoch": 0.1255105318709559, "grad_norm": 0.4971759021282196, "learning_rate": 9.987763173185956e-06, "loss": 0.3695, "step": 2735 }, { "epoch": 0.12555642237621037, "grad_norm": 0.48756933212280273, "learning_rate": 9.987746023764944e-06, "loss": 0.3668, "step": 2736 }, { "epoch": 0.1256023128814648, "grad_norm": 0.45884984731674194, "learning_rate": 9.987728862349979e-06, "loss": 0.3895, "step": 2737 }, { "epoch": 0.12564820338671928, "grad_norm": 0.5038509964942932, "learning_rate": 9.987711688941098e-06, "loss": 0.4321, "step": 2738 }, { "epoch": 0.12569409389197375, "grad_norm": 0.5076050162315369, "learning_rate": 9.987694503538347e-06, "loss": 0.463, "step": 2739 }, { "epoch": 0.12573998439722822, "grad_norm": 0.44172585010528564, "learning_rate": 9.987677306141763e-06, "loss": 0.3518, "step": 2740 }, { "epoch": 0.12578587490248266, "grad_norm": 0.5345978736877441, "learning_rate": 9.98766009675139e-06, "loss": 0.5107, "step": 2741 }, { "epoch": 0.12583176540773713, "grad_norm": 0.4749714136123657, "learning_rate": 9.987642875367269e-06, "loss": 0.3612, "step": 2742 }, { "epoch": 0.1258776559129916, "grad_norm": 0.5910397171974182, "learning_rate": 9.98762564198944e-06, "loss": 0.4715, "step": 2743 }, { "epoch": 0.12592354641824607, "grad_norm": 0.4772932529449463, "learning_rate": 9.987608396617946e-06, "loss": 0.3904, "step": 2744 }, { "epoch": 0.12596943692350052, "grad_norm": 0.460406631231308, "learning_rate": 9.987591139252828e-06, "loss": 0.3764, "step": 2745 }, { "epoch": 0.126015327428755, "grad_norm": 0.44393283128738403, "learning_rate": 9.987573869894128e-06, "loss": 0.3805, "step": 2746 }, { "epoch": 0.12606121793400946, "grad_norm": 0.5282798409461975, "learning_rate": 9.987556588541884e-06, "loss": 0.4113, "step": 2747 }, { "epoch": 0.12610710843926393, "grad_norm": 0.46061545610427856, "learning_rate": 9.987539295196144e-06, "loss": 0.3199, "step": 2748 }, { "epoch": 0.12615299894451837, "grad_norm": 0.48865440487861633, "learning_rate": 9.987521989856944e-06, "loss": 0.3675, "step": 2749 }, { "epoch": 0.12619888944977284, "grad_norm": 0.47560811042785645, "learning_rate": 9.98750467252433e-06, "loss": 0.4288, "step": 2750 }, { "epoch": 0.1262447799550273, "grad_norm": 0.48677363991737366, "learning_rate": 9.98748734319834e-06, "loss": 0.4112, "step": 2751 }, { "epoch": 0.12629067046028178, "grad_norm": 0.44393691420555115, "learning_rate": 9.987470001879016e-06, "loss": 0.3558, "step": 2752 }, { "epoch": 0.12633656096553622, "grad_norm": 0.5380996465682983, "learning_rate": 9.987452648566401e-06, "loss": 0.418, "step": 2753 }, { "epoch": 0.1263824514707907, "grad_norm": 0.5498951077461243, "learning_rate": 9.987435283260538e-06, "loss": 0.4486, "step": 2754 }, { "epoch": 0.12642834197604516, "grad_norm": 0.48008909821510315, "learning_rate": 9.987417905961467e-06, "loss": 0.3517, "step": 2755 }, { "epoch": 0.12647423248129963, "grad_norm": 0.5095076560974121, "learning_rate": 9.987400516669227e-06, "loss": 0.4982, "step": 2756 }, { "epoch": 0.12652012298655407, "grad_norm": 0.420227587223053, "learning_rate": 9.987383115383867e-06, "loss": 0.2704, "step": 2757 }, { "epoch": 0.12656601349180854, "grad_norm": 0.44439947605133057, "learning_rate": 9.987365702105423e-06, "loss": 0.344, "step": 2758 }, { "epoch": 0.126611903997063, "grad_norm": 0.44056349992752075, "learning_rate": 9.987348276833938e-06, "loss": 0.3255, "step": 2759 }, { "epoch": 0.12665779450231748, "grad_norm": 0.4823189675807953, "learning_rate": 9.987330839569456e-06, "loss": 0.3681, "step": 2760 }, { "epoch": 0.12670368500757193, "grad_norm": 0.4581528604030609, "learning_rate": 9.987313390312015e-06, "loss": 0.3541, "step": 2761 }, { "epoch": 0.1267495755128264, "grad_norm": 0.48997363448143005, "learning_rate": 9.98729592906166e-06, "loss": 0.3867, "step": 2762 }, { "epoch": 0.12679546601808087, "grad_norm": 0.5013061165809631, "learning_rate": 9.987278455818434e-06, "loss": 0.4401, "step": 2763 }, { "epoch": 0.12684135652333534, "grad_norm": 0.48018598556518555, "learning_rate": 9.987260970582376e-06, "loss": 0.3603, "step": 2764 }, { "epoch": 0.12688724702858978, "grad_norm": 0.5071067214012146, "learning_rate": 9.98724347335353e-06, "loss": 0.4919, "step": 2765 }, { "epoch": 0.12693313753384425, "grad_norm": 0.4889163076877594, "learning_rate": 9.987225964131937e-06, "loss": 0.4011, "step": 2766 }, { "epoch": 0.12697902803909872, "grad_norm": 0.47455519437789917, "learning_rate": 9.98720844291764e-06, "loss": 0.4128, "step": 2767 }, { "epoch": 0.12702491854435316, "grad_norm": 0.5203969478607178, "learning_rate": 9.98719090971068e-06, "loss": 0.4459, "step": 2768 }, { "epoch": 0.12707080904960763, "grad_norm": 0.4268333911895752, "learning_rate": 9.9871733645111e-06, "loss": 0.3224, "step": 2769 }, { "epoch": 0.1271166995548621, "grad_norm": 0.4883555769920349, "learning_rate": 9.987155807318942e-06, "loss": 0.3919, "step": 2770 }, { "epoch": 0.12716259006011657, "grad_norm": 0.47102999687194824, "learning_rate": 9.98713823813425e-06, "loss": 0.4466, "step": 2771 }, { "epoch": 0.127208480565371, "grad_norm": 0.5064966082572937, "learning_rate": 9.987120656957063e-06, "loss": 0.4034, "step": 2772 }, { "epoch": 0.12725437107062548, "grad_norm": 0.4775254428386688, "learning_rate": 9.987103063787425e-06, "loss": 0.4264, "step": 2773 }, { "epoch": 0.12730026157587995, "grad_norm": 0.4880734384059906, "learning_rate": 9.987085458625378e-06, "loss": 0.4213, "step": 2774 }, { "epoch": 0.12734615208113442, "grad_norm": 0.5105839967727661, "learning_rate": 9.987067841470964e-06, "loss": 0.522, "step": 2775 }, { "epoch": 0.12739204258638887, "grad_norm": 0.46990150213241577, "learning_rate": 9.987050212324227e-06, "loss": 0.4288, "step": 2776 }, { "epoch": 0.12743793309164334, "grad_norm": 0.4751972556114197, "learning_rate": 9.987032571185206e-06, "loss": 0.4182, "step": 2777 }, { "epoch": 0.1274838235968978, "grad_norm": 0.4855301082134247, "learning_rate": 9.987014918053947e-06, "loss": 0.4451, "step": 2778 }, { "epoch": 0.12752971410215228, "grad_norm": 0.4894592761993408, "learning_rate": 9.986997252930492e-06, "loss": 0.4717, "step": 2779 }, { "epoch": 0.12757560460740672, "grad_norm": 0.45551395416259766, "learning_rate": 9.986979575814882e-06, "loss": 0.3876, "step": 2780 }, { "epoch": 0.1276214951126612, "grad_norm": 0.5171364545822144, "learning_rate": 9.98696188670716e-06, "loss": 0.4956, "step": 2781 }, { "epoch": 0.12766738561791566, "grad_norm": 0.48302924633026123, "learning_rate": 9.986944185607368e-06, "loss": 0.3774, "step": 2782 }, { "epoch": 0.12771327612317013, "grad_norm": 0.5062499046325684, "learning_rate": 9.98692647251555e-06, "loss": 0.4683, "step": 2783 }, { "epoch": 0.12775916662842457, "grad_norm": 0.4605990946292877, "learning_rate": 9.986908747431747e-06, "loss": 0.3978, "step": 2784 }, { "epoch": 0.12780505713367904, "grad_norm": 0.5306394100189209, "learning_rate": 9.986891010356004e-06, "loss": 0.4384, "step": 2785 }, { "epoch": 0.1278509476389335, "grad_norm": 0.47338321805000305, "learning_rate": 9.986873261288358e-06, "loss": 0.416, "step": 2786 }, { "epoch": 0.12789683814418798, "grad_norm": 0.512321949005127, "learning_rate": 9.98685550022886e-06, "loss": 0.5109, "step": 2787 }, { "epoch": 0.12794272864944242, "grad_norm": 0.5197054743766785, "learning_rate": 9.986837727177547e-06, "loss": 0.4975, "step": 2788 }, { "epoch": 0.1279886191546969, "grad_norm": 0.45874637365341187, "learning_rate": 9.986819942134462e-06, "loss": 0.446, "step": 2789 }, { "epoch": 0.12803450965995136, "grad_norm": 0.49551525712013245, "learning_rate": 9.98680214509965e-06, "loss": 0.4219, "step": 2790 }, { "epoch": 0.12808040016520583, "grad_norm": 0.48614075779914856, "learning_rate": 9.986784336073153e-06, "loss": 0.3977, "step": 2791 }, { "epoch": 0.12812629067046027, "grad_norm": 0.5329895615577698, "learning_rate": 9.986766515055014e-06, "loss": 0.5341, "step": 2792 }, { "epoch": 0.12817218117571474, "grad_norm": 0.4470343291759491, "learning_rate": 9.986748682045275e-06, "loss": 0.3473, "step": 2793 }, { "epoch": 0.12821807168096921, "grad_norm": 0.4502967894077301, "learning_rate": 9.986730837043977e-06, "loss": 0.4066, "step": 2794 }, { "epoch": 0.12826396218622366, "grad_norm": 0.5154269933700562, "learning_rate": 9.986712980051167e-06, "loss": 0.4548, "step": 2795 }, { "epoch": 0.12830985269147813, "grad_norm": 0.5148901343345642, "learning_rate": 9.986695111066887e-06, "loss": 0.4738, "step": 2796 }, { "epoch": 0.1283557431967326, "grad_norm": 0.4601181745529175, "learning_rate": 9.986677230091178e-06, "loss": 0.3941, "step": 2797 }, { "epoch": 0.12840163370198707, "grad_norm": 0.48027870059013367, "learning_rate": 9.986659337124085e-06, "loss": 0.4309, "step": 2798 }, { "epoch": 0.1284475242072415, "grad_norm": 0.45475712418556213, "learning_rate": 9.98664143216565e-06, "loss": 0.3625, "step": 2799 }, { "epoch": 0.12849341471249598, "grad_norm": 0.42265060544013977, "learning_rate": 9.986623515215915e-06, "loss": 0.354, "step": 2800 }, { "epoch": 0.12853930521775045, "grad_norm": 0.4429236352443695, "learning_rate": 9.986605586274924e-06, "loss": 0.3569, "step": 2801 }, { "epoch": 0.12858519572300492, "grad_norm": 0.443429172039032, "learning_rate": 9.986587645342722e-06, "loss": 0.3551, "step": 2802 }, { "epoch": 0.12863108622825936, "grad_norm": 0.5227746367454529, "learning_rate": 9.98656969241935e-06, "loss": 0.488, "step": 2803 }, { "epoch": 0.12867697673351383, "grad_norm": 0.48553329706192017, "learning_rate": 9.986551727504851e-06, "loss": 0.3797, "step": 2804 }, { "epoch": 0.1287228672387683, "grad_norm": 0.48094120621681213, "learning_rate": 9.98653375059927e-06, "loss": 0.4062, "step": 2805 }, { "epoch": 0.12876875774402277, "grad_norm": 0.5202357769012451, "learning_rate": 9.98651576170265e-06, "loss": 0.4871, "step": 2806 }, { "epoch": 0.1288146482492772, "grad_norm": 0.4971047639846802, "learning_rate": 9.98649776081503e-06, "loss": 0.4294, "step": 2807 }, { "epoch": 0.12886053875453168, "grad_norm": 0.4567926228046417, "learning_rate": 9.98647974793646e-06, "loss": 0.4046, "step": 2808 }, { "epoch": 0.12890642925978615, "grad_norm": 0.45385095477104187, "learning_rate": 9.986461723066978e-06, "loss": 0.3881, "step": 2809 }, { "epoch": 0.12895231976504062, "grad_norm": 0.48508352041244507, "learning_rate": 9.986443686206631e-06, "loss": 0.387, "step": 2810 }, { "epoch": 0.12899821027029507, "grad_norm": 0.4662885069847107, "learning_rate": 9.986425637355459e-06, "loss": 0.3979, "step": 2811 }, { "epoch": 0.12904410077554954, "grad_norm": 0.4589211344718933, "learning_rate": 9.986407576513508e-06, "loss": 0.3599, "step": 2812 }, { "epoch": 0.129089991280804, "grad_norm": 0.49436479806900024, "learning_rate": 9.98638950368082e-06, "loss": 0.4318, "step": 2813 }, { "epoch": 0.12913588178605848, "grad_norm": 0.5392730236053467, "learning_rate": 9.986371418857439e-06, "loss": 0.4702, "step": 2814 }, { "epoch": 0.12918177229131292, "grad_norm": 0.44370704889297485, "learning_rate": 9.98635332204341e-06, "loss": 0.3399, "step": 2815 }, { "epoch": 0.1292276627965674, "grad_norm": 0.4396328330039978, "learning_rate": 9.986335213238773e-06, "loss": 0.3396, "step": 2816 }, { "epoch": 0.12927355330182186, "grad_norm": 0.47519561648368835, "learning_rate": 9.986317092443574e-06, "loss": 0.3803, "step": 2817 }, { "epoch": 0.12931944380707633, "grad_norm": 0.4754610061645508, "learning_rate": 9.986298959657857e-06, "loss": 0.3875, "step": 2818 }, { "epoch": 0.12936533431233077, "grad_norm": 0.45905134081840515, "learning_rate": 9.986280814881664e-06, "loss": 0.3814, "step": 2819 }, { "epoch": 0.12941122481758524, "grad_norm": 0.5036689639091492, "learning_rate": 9.98626265811504e-06, "loss": 0.3898, "step": 2820 }, { "epoch": 0.1294571153228397, "grad_norm": 0.4956029951572418, "learning_rate": 9.986244489358027e-06, "loss": 0.4421, "step": 2821 }, { "epoch": 0.12950300582809418, "grad_norm": 0.4815804064273834, "learning_rate": 9.98622630861067e-06, "loss": 0.3907, "step": 2822 }, { "epoch": 0.12954889633334862, "grad_norm": 0.44697144627571106, "learning_rate": 9.986208115873012e-06, "loss": 0.3483, "step": 2823 }, { "epoch": 0.1295947868386031, "grad_norm": 0.4544214904308319, "learning_rate": 9.986189911145099e-06, "loss": 0.3447, "step": 2824 }, { "epoch": 0.12964067734385756, "grad_norm": 0.46760818362236023, "learning_rate": 9.986171694426972e-06, "loss": 0.4243, "step": 2825 }, { "epoch": 0.129686567849112, "grad_norm": 0.46960702538490295, "learning_rate": 9.986153465718675e-06, "loss": 0.3831, "step": 2826 }, { "epoch": 0.12973245835436648, "grad_norm": 0.426664263010025, "learning_rate": 9.986135225020252e-06, "loss": 0.3425, "step": 2827 }, { "epoch": 0.12977834885962095, "grad_norm": 0.43156060576438904, "learning_rate": 9.986116972331749e-06, "loss": 0.3025, "step": 2828 }, { "epoch": 0.12982423936487542, "grad_norm": 0.47947800159454346, "learning_rate": 9.986098707653209e-06, "loss": 0.3699, "step": 2829 }, { "epoch": 0.12987012987012986, "grad_norm": 0.4837958514690399, "learning_rate": 9.986080430984674e-06, "loss": 0.4231, "step": 2830 }, { "epoch": 0.12991602037538433, "grad_norm": 0.479728639125824, "learning_rate": 9.986062142326189e-06, "loss": 0.4314, "step": 2831 }, { "epoch": 0.1299619108806388, "grad_norm": 0.4612409472465515, "learning_rate": 9.9860438416778e-06, "loss": 0.4022, "step": 2832 }, { "epoch": 0.13000780138589327, "grad_norm": 0.4356297254562378, "learning_rate": 9.986025529039547e-06, "loss": 0.3633, "step": 2833 }, { "epoch": 0.1300536918911477, "grad_norm": 0.4393591284751892, "learning_rate": 9.986007204411476e-06, "loss": 0.3702, "step": 2834 }, { "epoch": 0.13009958239640218, "grad_norm": 0.4903585612773895, "learning_rate": 9.985988867793633e-06, "loss": 0.4499, "step": 2835 }, { "epoch": 0.13014547290165665, "grad_norm": 0.4818597435951233, "learning_rate": 9.98597051918606e-06, "loss": 0.4053, "step": 2836 }, { "epoch": 0.13019136340691112, "grad_norm": 0.47199276089668274, "learning_rate": 9.9859521585888e-06, "loss": 0.3754, "step": 2837 }, { "epoch": 0.13023725391216556, "grad_norm": 0.47593963146209717, "learning_rate": 9.985933786001902e-06, "loss": 0.3988, "step": 2838 }, { "epoch": 0.13028314441742003, "grad_norm": 0.4384521245956421, "learning_rate": 9.985915401425403e-06, "loss": 0.3253, "step": 2839 }, { "epoch": 0.1303290349226745, "grad_norm": 0.4463512599468231, "learning_rate": 9.985897004859354e-06, "loss": 0.3491, "step": 2840 }, { "epoch": 0.13037492542792897, "grad_norm": 0.47416383028030396, "learning_rate": 9.985878596303795e-06, "loss": 0.4332, "step": 2841 }, { "epoch": 0.13042081593318341, "grad_norm": 0.44360268115997314, "learning_rate": 9.985860175758772e-06, "loss": 0.3497, "step": 2842 }, { "epoch": 0.13046670643843788, "grad_norm": 0.4880979359149933, "learning_rate": 9.985841743224329e-06, "loss": 0.4226, "step": 2843 }, { "epoch": 0.13051259694369235, "grad_norm": 0.4397323429584503, "learning_rate": 9.985823298700509e-06, "loss": 0.3382, "step": 2844 }, { "epoch": 0.13055848744894683, "grad_norm": 0.47623541951179504, "learning_rate": 9.98580484218736e-06, "loss": 0.4189, "step": 2845 }, { "epoch": 0.13060437795420127, "grad_norm": 0.4655338525772095, "learning_rate": 9.98578637368492e-06, "loss": 0.4045, "step": 2846 }, { "epoch": 0.13065026845945574, "grad_norm": 0.4542866051197052, "learning_rate": 9.98576789319324e-06, "loss": 0.3466, "step": 2847 }, { "epoch": 0.1306961589647102, "grad_norm": 0.4580006003379822, "learning_rate": 9.985749400712362e-06, "loss": 0.3737, "step": 2848 }, { "epoch": 0.13074204946996468, "grad_norm": 0.4561164975166321, "learning_rate": 9.98573089624233e-06, "loss": 0.4018, "step": 2849 }, { "epoch": 0.13078793997521912, "grad_norm": 0.47935986518859863, "learning_rate": 9.985712379783188e-06, "loss": 0.4262, "step": 2850 }, { "epoch": 0.1308338304804736, "grad_norm": 0.4901215136051178, "learning_rate": 9.985693851334981e-06, "loss": 0.3723, "step": 2851 }, { "epoch": 0.13087972098572806, "grad_norm": 0.4891490340232849, "learning_rate": 9.985675310897755e-06, "loss": 0.4574, "step": 2852 }, { "epoch": 0.1309256114909825, "grad_norm": 0.49904853105545044, "learning_rate": 9.985656758471551e-06, "loss": 0.4673, "step": 2853 }, { "epoch": 0.13097150199623697, "grad_norm": 0.4225284457206726, "learning_rate": 9.98563819405642e-06, "loss": 0.342, "step": 2854 }, { "epoch": 0.13101739250149144, "grad_norm": 0.4977639317512512, "learning_rate": 9.9856196176524e-06, "loss": 0.486, "step": 2855 }, { "epoch": 0.1310632830067459, "grad_norm": 0.4584922194480896, "learning_rate": 9.985601029259538e-06, "loss": 0.375, "step": 2856 }, { "epoch": 0.13110917351200035, "grad_norm": 0.42476415634155273, "learning_rate": 9.985582428877881e-06, "loss": 0.3345, "step": 2857 }, { "epoch": 0.13115506401725482, "grad_norm": 0.4525851905345917, "learning_rate": 9.985563816507469e-06, "loss": 0.3431, "step": 2858 }, { "epoch": 0.1312009545225093, "grad_norm": 0.4731519818305969, "learning_rate": 9.985545192148351e-06, "loss": 0.4038, "step": 2859 }, { "epoch": 0.13124684502776376, "grad_norm": 0.6694402694702148, "learning_rate": 9.985526555800571e-06, "loss": 0.4627, "step": 2860 }, { "epoch": 0.1312927355330182, "grad_norm": 0.5048166513442993, "learning_rate": 9.985507907464173e-06, "loss": 0.4525, "step": 2861 }, { "epoch": 0.13133862603827268, "grad_norm": 0.4961734712123871, "learning_rate": 9.9854892471392e-06, "loss": 0.4833, "step": 2862 }, { "epoch": 0.13138451654352715, "grad_norm": 0.4771215617656708, "learning_rate": 9.9854705748257e-06, "loss": 0.4171, "step": 2863 }, { "epoch": 0.13143040704878162, "grad_norm": 0.4323614537715912, "learning_rate": 9.985451890523719e-06, "loss": 0.3657, "step": 2864 }, { "epoch": 0.13147629755403606, "grad_norm": 0.48595213890075684, "learning_rate": 9.985433194233296e-06, "loss": 0.4805, "step": 2865 }, { "epoch": 0.13152218805929053, "grad_norm": 0.5306482315063477, "learning_rate": 9.985414485954482e-06, "loss": 0.561, "step": 2866 }, { "epoch": 0.131568078564545, "grad_norm": 0.4797099828720093, "learning_rate": 9.985395765687319e-06, "loss": 0.3897, "step": 2867 }, { "epoch": 0.13161396906979947, "grad_norm": 0.45678552985191345, "learning_rate": 9.985377033431851e-06, "loss": 0.36, "step": 2868 }, { "epoch": 0.1316598595750539, "grad_norm": 0.504903256893158, "learning_rate": 9.985358289188127e-06, "loss": 0.4316, "step": 2869 }, { "epoch": 0.13170575008030838, "grad_norm": 0.49582263827323914, "learning_rate": 9.985339532956189e-06, "loss": 0.4183, "step": 2870 }, { "epoch": 0.13175164058556285, "grad_norm": 0.518846869468689, "learning_rate": 9.985320764736082e-06, "loss": 0.4101, "step": 2871 }, { "epoch": 0.13179753109081732, "grad_norm": 0.4616560935974121, "learning_rate": 9.985301984527853e-06, "loss": 0.3848, "step": 2872 }, { "epoch": 0.13184342159607176, "grad_norm": 0.47033193707466125, "learning_rate": 9.985283192331546e-06, "loss": 0.3859, "step": 2873 }, { "epoch": 0.13188931210132623, "grad_norm": 0.46414950489997864, "learning_rate": 9.985264388147207e-06, "loss": 0.3802, "step": 2874 }, { "epoch": 0.1319352026065807, "grad_norm": 0.49761998653411865, "learning_rate": 9.98524557197488e-06, "loss": 0.4782, "step": 2875 }, { "epoch": 0.13198109311183517, "grad_norm": 0.4831656813621521, "learning_rate": 9.98522674381461e-06, "loss": 0.4049, "step": 2876 }, { "epoch": 0.13202698361708962, "grad_norm": 0.4910261631011963, "learning_rate": 9.985207903666442e-06, "loss": 0.4116, "step": 2877 }, { "epoch": 0.13207287412234409, "grad_norm": 0.529514729976654, "learning_rate": 9.985189051530425e-06, "loss": 0.4655, "step": 2878 }, { "epoch": 0.13211876462759856, "grad_norm": 0.5253849625587463, "learning_rate": 9.9851701874066e-06, "loss": 0.4962, "step": 2879 }, { "epoch": 0.13216465513285303, "grad_norm": 0.4822869300842285, "learning_rate": 9.985151311295016e-06, "loss": 0.4606, "step": 2880 }, { "epoch": 0.13221054563810747, "grad_norm": 0.5626682043075562, "learning_rate": 9.985132423195715e-06, "loss": 0.4697, "step": 2881 }, { "epoch": 0.13225643614336194, "grad_norm": 0.4947010576725006, "learning_rate": 9.985113523108743e-06, "loss": 0.4634, "step": 2882 }, { "epoch": 0.1323023266486164, "grad_norm": 0.4874163866043091, "learning_rate": 9.985094611034148e-06, "loss": 0.4035, "step": 2883 }, { "epoch": 0.13234821715387085, "grad_norm": 0.5082656741142273, "learning_rate": 9.985075686971974e-06, "loss": 0.4759, "step": 2884 }, { "epoch": 0.13239410765912532, "grad_norm": 0.4486599564552307, "learning_rate": 9.985056750922263e-06, "loss": 0.3524, "step": 2885 }, { "epoch": 0.1324399981643798, "grad_norm": 0.47667229175567627, "learning_rate": 9.985037802885068e-06, "loss": 0.4013, "step": 2886 }, { "epoch": 0.13248588866963426, "grad_norm": 0.48025259375572205, "learning_rate": 9.985018842860428e-06, "loss": 0.3661, "step": 2887 }, { "epoch": 0.1325317791748887, "grad_norm": 0.42884561419487, "learning_rate": 9.984999870848392e-06, "loss": 0.3575, "step": 2888 }, { "epoch": 0.13257766968014317, "grad_norm": 0.49767404794692993, "learning_rate": 9.984980886849005e-06, "loss": 0.4659, "step": 2889 }, { "epoch": 0.13262356018539764, "grad_norm": 0.502864420413971, "learning_rate": 9.984961890862311e-06, "loss": 0.4605, "step": 2890 }, { "epoch": 0.1326694506906521, "grad_norm": 0.4371437728404999, "learning_rate": 9.984942882888358e-06, "loss": 0.3454, "step": 2891 }, { "epoch": 0.13271534119590656, "grad_norm": 0.5332352519035339, "learning_rate": 9.98492386292719e-06, "loss": 0.5113, "step": 2892 }, { "epoch": 0.13276123170116103, "grad_norm": 0.4774024486541748, "learning_rate": 9.984904830978855e-06, "loss": 0.4557, "step": 2893 }, { "epoch": 0.1328071222064155, "grad_norm": 0.4903777837753296, "learning_rate": 9.984885787043394e-06, "loss": 0.4565, "step": 2894 }, { "epoch": 0.13285301271166997, "grad_norm": 0.46722671389579773, "learning_rate": 9.984866731120858e-06, "loss": 0.392, "step": 2895 }, { "epoch": 0.1328989032169244, "grad_norm": 0.5084421634674072, "learning_rate": 9.98484766321129e-06, "loss": 0.4594, "step": 2896 }, { "epoch": 0.13294479372217888, "grad_norm": 0.42572417855262756, "learning_rate": 9.984828583314738e-06, "loss": 0.3296, "step": 2897 }, { "epoch": 0.13299068422743335, "grad_norm": 0.5137713551521301, "learning_rate": 9.984809491431246e-06, "loss": 0.4231, "step": 2898 }, { "epoch": 0.13303657473268782, "grad_norm": 0.540494978427887, "learning_rate": 9.984790387560859e-06, "loss": 0.4796, "step": 2899 }, { "epoch": 0.13308246523794226, "grad_norm": 0.5203617811203003, "learning_rate": 9.984771271703626e-06, "loss": 0.465, "step": 2900 }, { "epoch": 0.13312835574319673, "grad_norm": 0.45151373744010925, "learning_rate": 9.984752143859591e-06, "loss": 0.4146, "step": 2901 }, { "epoch": 0.1331742462484512, "grad_norm": 0.49115505814552307, "learning_rate": 9.9847330040288e-06, "loss": 0.4722, "step": 2902 }, { "epoch": 0.13322013675370567, "grad_norm": 0.49026769399642944, "learning_rate": 9.9847138522113e-06, "loss": 0.4013, "step": 2903 }, { "epoch": 0.1332660272589601, "grad_norm": 0.48324286937713623, "learning_rate": 9.984694688407135e-06, "loss": 0.3783, "step": 2904 }, { "epoch": 0.13331191776421458, "grad_norm": 0.4451691210269928, "learning_rate": 9.984675512616353e-06, "loss": 0.3456, "step": 2905 }, { "epoch": 0.13335780826946905, "grad_norm": 0.5046260356903076, "learning_rate": 9.984656324839001e-06, "loss": 0.4426, "step": 2906 }, { "epoch": 0.13340369877472352, "grad_norm": 0.47953924536705017, "learning_rate": 9.984637125075123e-06, "loss": 0.4526, "step": 2907 }, { "epoch": 0.13344958927997796, "grad_norm": 0.4949834942817688, "learning_rate": 9.984617913324765e-06, "loss": 0.4242, "step": 2908 }, { "epoch": 0.13349547978523243, "grad_norm": 0.4905979633331299, "learning_rate": 9.984598689587975e-06, "loss": 0.376, "step": 2909 }, { "epoch": 0.1335413702904869, "grad_norm": 0.47098323702812195, "learning_rate": 9.984579453864799e-06, "loss": 0.4382, "step": 2910 }, { "epoch": 0.13358726079574137, "grad_norm": 0.49535703659057617, "learning_rate": 9.98456020615528e-06, "loss": 0.4273, "step": 2911 }, { "epoch": 0.13363315130099582, "grad_norm": 0.43479371070861816, "learning_rate": 9.984540946459469e-06, "loss": 0.3242, "step": 2912 }, { "epoch": 0.1336790418062503, "grad_norm": 0.4440033733844757, "learning_rate": 9.98452167477741e-06, "loss": 0.3853, "step": 2913 }, { "epoch": 0.13372493231150476, "grad_norm": 0.4634888172149658, "learning_rate": 9.984502391109148e-06, "loss": 0.3898, "step": 2914 }, { "epoch": 0.1337708228167592, "grad_norm": 0.46266263723373413, "learning_rate": 9.984483095454734e-06, "loss": 0.4323, "step": 2915 }, { "epoch": 0.13381671332201367, "grad_norm": 0.5111375451087952, "learning_rate": 9.984463787814209e-06, "loss": 0.5356, "step": 2916 }, { "epoch": 0.13386260382726814, "grad_norm": 0.536014199256897, "learning_rate": 9.984444468187622e-06, "loss": 0.4703, "step": 2917 }, { "epoch": 0.1339084943325226, "grad_norm": 0.4837159514427185, "learning_rate": 9.984425136575019e-06, "loss": 0.4142, "step": 2918 }, { "epoch": 0.13395438483777705, "grad_norm": 0.575469434261322, "learning_rate": 9.984405792976447e-06, "loss": 0.5177, "step": 2919 }, { "epoch": 0.13400027534303152, "grad_norm": 0.5213784575462341, "learning_rate": 9.984386437391951e-06, "loss": 0.5453, "step": 2920 }, { "epoch": 0.134046165848286, "grad_norm": 0.46826115250587463, "learning_rate": 9.984367069821582e-06, "loss": 0.3794, "step": 2921 }, { "epoch": 0.13409205635354046, "grad_norm": 0.5002729296684265, "learning_rate": 9.98434769026538e-06, "loss": 0.4654, "step": 2922 }, { "epoch": 0.1341379468587949, "grad_norm": 0.46221449971199036, "learning_rate": 9.984328298723396e-06, "loss": 0.3405, "step": 2923 }, { "epoch": 0.13418383736404937, "grad_norm": 0.46960917115211487, "learning_rate": 9.984308895195677e-06, "loss": 0.3443, "step": 2924 }, { "epoch": 0.13422972786930384, "grad_norm": 0.5330900549888611, "learning_rate": 9.984289479682265e-06, "loss": 0.5086, "step": 2925 }, { "epoch": 0.13427561837455831, "grad_norm": 0.5278019309043884, "learning_rate": 9.984270052183213e-06, "loss": 0.4222, "step": 2926 }, { "epoch": 0.13432150887981276, "grad_norm": 0.4523743987083435, "learning_rate": 9.984250612698564e-06, "loss": 0.387, "step": 2927 }, { "epoch": 0.13436739938506723, "grad_norm": 0.5056136250495911, "learning_rate": 9.984231161228364e-06, "loss": 0.4466, "step": 2928 }, { "epoch": 0.1344132898903217, "grad_norm": 0.5223401188850403, "learning_rate": 9.984211697772662e-06, "loss": 0.469, "step": 2929 }, { "epoch": 0.13445918039557617, "grad_norm": 0.4763677716255188, "learning_rate": 9.984192222331503e-06, "loss": 0.3786, "step": 2930 }, { "epoch": 0.1345050709008306, "grad_norm": 0.4712555408477783, "learning_rate": 9.984172734904937e-06, "loss": 0.4073, "step": 2931 }, { "epoch": 0.13455096140608508, "grad_norm": 0.505085289478302, "learning_rate": 9.984153235493006e-06, "loss": 0.471, "step": 2932 }, { "epoch": 0.13459685191133955, "grad_norm": 0.4289167523384094, "learning_rate": 9.984133724095761e-06, "loss": 0.2825, "step": 2933 }, { "epoch": 0.13464274241659402, "grad_norm": 0.540083110332489, "learning_rate": 9.984114200713247e-06, "loss": 0.5823, "step": 2934 }, { "epoch": 0.13468863292184846, "grad_norm": 0.45814406871795654, "learning_rate": 9.984094665345512e-06, "loss": 0.4116, "step": 2935 }, { "epoch": 0.13473452342710293, "grad_norm": 0.4792840778827667, "learning_rate": 9.984075117992602e-06, "loss": 0.378, "step": 2936 }, { "epoch": 0.1347804139323574, "grad_norm": 0.458863765001297, "learning_rate": 9.984055558654565e-06, "loss": 0.4024, "step": 2937 }, { "epoch": 0.13482630443761187, "grad_norm": 0.5246399641036987, "learning_rate": 9.984035987331446e-06, "loss": 0.4475, "step": 2938 }, { "epoch": 0.1348721949428663, "grad_norm": 0.49317172169685364, "learning_rate": 9.984016404023293e-06, "loss": 0.4132, "step": 2939 }, { "epoch": 0.13491808544812078, "grad_norm": 0.4826701581478119, "learning_rate": 9.983996808730157e-06, "loss": 0.3769, "step": 2940 }, { "epoch": 0.13496397595337525, "grad_norm": 0.543339192867279, "learning_rate": 9.983977201452077e-06, "loss": 0.4822, "step": 2941 }, { "epoch": 0.1350098664586297, "grad_norm": 0.46195462346076965, "learning_rate": 9.983957582189108e-06, "loss": 0.331, "step": 2942 }, { "epoch": 0.13505575696388417, "grad_norm": 0.47714972496032715, "learning_rate": 9.983937950941293e-06, "loss": 0.4408, "step": 2943 }, { "epoch": 0.13510164746913864, "grad_norm": 0.44046348333358765, "learning_rate": 9.98391830770868e-06, "loss": 0.3491, "step": 2944 }, { "epoch": 0.1351475379743931, "grad_norm": 0.4799294173717499, "learning_rate": 9.983898652491318e-06, "loss": 0.4402, "step": 2945 }, { "epoch": 0.13519342847964755, "grad_norm": 0.48560038208961487, "learning_rate": 9.98387898528925e-06, "loss": 0.439, "step": 2946 }, { "epoch": 0.13523931898490202, "grad_norm": 0.4312838613986969, "learning_rate": 9.983859306102527e-06, "loss": 0.3, "step": 2947 }, { "epoch": 0.1352852094901565, "grad_norm": 0.46086788177490234, "learning_rate": 9.983839614931197e-06, "loss": 0.3927, "step": 2948 }, { "epoch": 0.13533109999541096, "grad_norm": 0.44949251413345337, "learning_rate": 9.983819911775305e-06, "loss": 0.4055, "step": 2949 }, { "epoch": 0.1353769905006654, "grad_norm": 0.5050535202026367, "learning_rate": 9.983800196634898e-06, "loss": 0.4866, "step": 2950 }, { "epoch": 0.13542288100591987, "grad_norm": 0.4712943434715271, "learning_rate": 9.983780469510025e-06, "loss": 0.4055, "step": 2951 }, { "epoch": 0.13546877151117434, "grad_norm": 0.44342565536499023, "learning_rate": 9.983760730400733e-06, "loss": 0.3747, "step": 2952 }, { "epoch": 0.1355146620164288, "grad_norm": 0.5054129362106323, "learning_rate": 9.98374097930707e-06, "loss": 0.4137, "step": 2953 }, { "epoch": 0.13556055252168325, "grad_norm": 0.5286996364593506, "learning_rate": 9.983721216229081e-06, "loss": 0.5068, "step": 2954 }, { "epoch": 0.13560644302693772, "grad_norm": 0.4568662643432617, "learning_rate": 9.983701441166817e-06, "loss": 0.3382, "step": 2955 }, { "epoch": 0.1356523335321922, "grad_norm": 0.5419427752494812, "learning_rate": 9.983681654120323e-06, "loss": 0.5546, "step": 2956 }, { "epoch": 0.13569822403744666, "grad_norm": 0.5187088847160339, "learning_rate": 9.983661855089647e-06, "loss": 0.4198, "step": 2957 }, { "epoch": 0.1357441145427011, "grad_norm": 0.47891896963119507, "learning_rate": 9.98364204407484e-06, "loss": 0.3829, "step": 2958 }, { "epoch": 0.13579000504795558, "grad_norm": 0.5072096586227417, "learning_rate": 9.983622221075944e-06, "loss": 0.4828, "step": 2959 }, { "epoch": 0.13583589555321005, "grad_norm": 0.43741774559020996, "learning_rate": 9.98360238609301e-06, "loss": 0.3563, "step": 2960 }, { "epoch": 0.13588178605846452, "grad_norm": 0.47707393765449524, "learning_rate": 9.983582539126086e-06, "loss": 0.4248, "step": 2961 }, { "epoch": 0.13592767656371896, "grad_norm": 0.4735635221004486, "learning_rate": 9.983562680175219e-06, "loss": 0.3808, "step": 2962 }, { "epoch": 0.13597356706897343, "grad_norm": 0.5682708024978638, "learning_rate": 9.983542809240455e-06, "loss": 0.4385, "step": 2963 }, { "epoch": 0.1360194575742279, "grad_norm": 0.49021559953689575, "learning_rate": 9.983522926321845e-06, "loss": 0.4929, "step": 2964 }, { "epoch": 0.13606534807948237, "grad_norm": 0.4581962823867798, "learning_rate": 9.983503031419434e-06, "loss": 0.3794, "step": 2965 }, { "epoch": 0.1361112385847368, "grad_norm": 0.45535171031951904, "learning_rate": 9.983483124533271e-06, "loss": 0.4255, "step": 2966 }, { "epoch": 0.13615712908999128, "grad_norm": 0.4496273994445801, "learning_rate": 9.983463205663403e-06, "loss": 0.3651, "step": 2967 }, { "epoch": 0.13620301959524575, "grad_norm": 0.4613458514213562, "learning_rate": 9.983443274809881e-06, "loss": 0.3727, "step": 2968 }, { "epoch": 0.13624891010050022, "grad_norm": 0.4700455665588379, "learning_rate": 9.98342333197275e-06, "loss": 0.4458, "step": 2969 }, { "epoch": 0.13629480060575466, "grad_norm": 0.44079285860061646, "learning_rate": 9.98340337715206e-06, "loss": 0.376, "step": 2970 }, { "epoch": 0.13634069111100913, "grad_norm": 0.4573451280593872, "learning_rate": 9.983383410347856e-06, "loss": 0.4004, "step": 2971 }, { "epoch": 0.1363865816162636, "grad_norm": 0.4634217619895935, "learning_rate": 9.983363431560189e-06, "loss": 0.3977, "step": 2972 }, { "epoch": 0.13643247212151804, "grad_norm": 0.5109870433807373, "learning_rate": 9.983343440789104e-06, "loss": 0.3594, "step": 2973 }, { "epoch": 0.13647836262677251, "grad_norm": 0.4920806884765625, "learning_rate": 9.983323438034652e-06, "loss": 0.4794, "step": 2974 }, { "epoch": 0.13652425313202698, "grad_norm": 0.4754081070423126, "learning_rate": 9.98330342329688e-06, "loss": 0.3923, "step": 2975 }, { "epoch": 0.13657014363728145, "grad_norm": 0.49609583616256714, "learning_rate": 9.983283396575835e-06, "loss": 0.4968, "step": 2976 }, { "epoch": 0.1366160341425359, "grad_norm": 0.4833639860153198, "learning_rate": 9.983263357871566e-06, "loss": 0.4471, "step": 2977 }, { "epoch": 0.13666192464779037, "grad_norm": 0.47659772634506226, "learning_rate": 9.983243307184122e-06, "loss": 0.454, "step": 2978 }, { "epoch": 0.13670781515304484, "grad_norm": 0.48343032598495483, "learning_rate": 9.98322324451355e-06, "loss": 0.3838, "step": 2979 }, { "epoch": 0.1367537056582993, "grad_norm": 0.45296165347099304, "learning_rate": 9.9832031698599e-06, "loss": 0.4094, "step": 2980 }, { "epoch": 0.13679959616355375, "grad_norm": 0.49433180689811707, "learning_rate": 9.98318308322322e-06, "loss": 0.423, "step": 2981 }, { "epoch": 0.13684548666880822, "grad_norm": 0.5197073221206665, "learning_rate": 9.983162984603557e-06, "loss": 0.5168, "step": 2982 }, { "epoch": 0.1368913771740627, "grad_norm": 0.487143337726593, "learning_rate": 9.983142874000958e-06, "loss": 0.4387, "step": 2983 }, { "epoch": 0.13693726767931716, "grad_norm": 0.4529087245464325, "learning_rate": 9.983122751415475e-06, "loss": 0.3518, "step": 2984 }, { "epoch": 0.1369831581845716, "grad_norm": 0.47358787059783936, "learning_rate": 9.983102616847153e-06, "loss": 0.3994, "step": 2985 }, { "epoch": 0.13702904868982607, "grad_norm": 0.42946839332580566, "learning_rate": 9.983082470296042e-06, "loss": 0.3157, "step": 2986 }, { "epoch": 0.13707493919508054, "grad_norm": 0.4564993977546692, "learning_rate": 9.983062311762191e-06, "loss": 0.3439, "step": 2987 }, { "epoch": 0.137120829700335, "grad_norm": 0.48037704825401306, "learning_rate": 9.983042141245648e-06, "loss": 0.3779, "step": 2988 }, { "epoch": 0.13716672020558945, "grad_norm": 0.4633772373199463, "learning_rate": 9.98302195874646e-06, "loss": 0.3894, "step": 2989 }, { "epoch": 0.13721261071084392, "grad_norm": 0.49246591329574585, "learning_rate": 9.98300176426468e-06, "loss": 0.5057, "step": 2990 }, { "epoch": 0.1372585012160984, "grad_norm": 0.5284117460250854, "learning_rate": 9.98298155780035e-06, "loss": 0.4869, "step": 2991 }, { "epoch": 0.13730439172135286, "grad_norm": 0.4986521005630493, "learning_rate": 9.982961339353522e-06, "loss": 0.3958, "step": 2992 }, { "epoch": 0.1373502822266073, "grad_norm": 0.493929922580719, "learning_rate": 9.982941108924247e-06, "loss": 0.4244, "step": 2993 }, { "epoch": 0.13739617273186178, "grad_norm": 0.4537135064601898, "learning_rate": 9.98292086651257e-06, "loss": 0.4015, "step": 2994 }, { "epoch": 0.13744206323711625, "grad_norm": 0.4772287607192993, "learning_rate": 9.982900612118543e-06, "loss": 0.4243, "step": 2995 }, { "epoch": 0.13748795374237072, "grad_norm": 0.44992128014564514, "learning_rate": 9.982880345742209e-06, "loss": 0.4048, "step": 2996 }, { "epoch": 0.13753384424762516, "grad_norm": 0.47454380989074707, "learning_rate": 9.982860067383624e-06, "loss": 0.3977, "step": 2997 }, { "epoch": 0.13757973475287963, "grad_norm": 0.4821033775806427, "learning_rate": 9.98283977704283e-06, "loss": 0.4167, "step": 2998 }, { "epoch": 0.1376256252581341, "grad_norm": 0.5374534726142883, "learning_rate": 9.982819474719882e-06, "loss": 0.5086, "step": 2999 }, { "epoch": 0.13767151576338854, "grad_norm": 0.452519029378891, "learning_rate": 9.982799160414824e-06, "loss": 0.3634, "step": 3000 }, { "epoch": 0.137717406268643, "grad_norm": 0.44751930236816406, "learning_rate": 9.982778834127706e-06, "loss": 0.4112, "step": 3001 }, { "epoch": 0.13776329677389748, "grad_norm": 0.5348258018493652, "learning_rate": 9.982758495858578e-06, "loss": 0.4842, "step": 3002 }, { "epoch": 0.13780918727915195, "grad_norm": 0.494220495223999, "learning_rate": 9.982738145607489e-06, "loss": 0.4673, "step": 3003 }, { "epoch": 0.1378550777844064, "grad_norm": 0.45482999086380005, "learning_rate": 9.982717783374488e-06, "loss": 0.3896, "step": 3004 }, { "epoch": 0.13790096828966086, "grad_norm": 0.4997367858886719, "learning_rate": 9.982697409159623e-06, "loss": 0.4042, "step": 3005 }, { "epoch": 0.13794685879491533, "grad_norm": 0.4939901828765869, "learning_rate": 9.982677022962942e-06, "loss": 0.4215, "step": 3006 }, { "epoch": 0.1379927493001698, "grad_norm": 0.5064058899879456, "learning_rate": 9.982656624784497e-06, "loss": 0.5187, "step": 3007 }, { "epoch": 0.13803863980542425, "grad_norm": 0.4413110315799713, "learning_rate": 9.982636214624334e-06, "loss": 0.359, "step": 3008 }, { "epoch": 0.13808453031067872, "grad_norm": 0.46107569336891174, "learning_rate": 9.982615792482503e-06, "loss": 0.3349, "step": 3009 }, { "epoch": 0.13813042081593319, "grad_norm": 0.5945149660110474, "learning_rate": 9.982595358359055e-06, "loss": 0.4809, "step": 3010 }, { "epoch": 0.13817631132118766, "grad_norm": 0.46925055980682373, "learning_rate": 9.982574912254039e-06, "loss": 0.4304, "step": 3011 }, { "epoch": 0.1382222018264421, "grad_norm": 0.4706903398036957, "learning_rate": 9.9825544541675e-06, "loss": 0.4235, "step": 3012 }, { "epoch": 0.13826809233169657, "grad_norm": 0.4697916805744171, "learning_rate": 9.982533984099492e-06, "loss": 0.3537, "step": 3013 }, { "epoch": 0.13831398283695104, "grad_norm": 0.5078604221343994, "learning_rate": 9.982513502050062e-06, "loss": 0.4434, "step": 3014 }, { "epoch": 0.1383598733422055, "grad_norm": 0.43013402819633484, "learning_rate": 9.982493008019258e-06, "loss": 0.3062, "step": 3015 }, { "epoch": 0.13840576384745995, "grad_norm": 0.48810437321662903, "learning_rate": 9.982472502007132e-06, "loss": 0.382, "step": 3016 }, { "epoch": 0.13845165435271442, "grad_norm": 0.493046373128891, "learning_rate": 9.982451984013732e-06, "loss": 0.4408, "step": 3017 }, { "epoch": 0.1384975448579689, "grad_norm": 0.4840654730796814, "learning_rate": 9.982431454039107e-06, "loss": 0.4071, "step": 3018 }, { "epoch": 0.13854343536322336, "grad_norm": 0.465982049703598, "learning_rate": 9.98241091208331e-06, "loss": 0.4104, "step": 3019 }, { "epoch": 0.1385893258684778, "grad_norm": 0.44012361764907837, "learning_rate": 9.982390358146383e-06, "loss": 0.3719, "step": 3020 }, { "epoch": 0.13863521637373227, "grad_norm": 0.4710675776004791, "learning_rate": 9.982369792228382e-06, "loss": 0.4165, "step": 3021 }, { "epoch": 0.13868110687898674, "grad_norm": 0.603533923625946, "learning_rate": 9.982349214329353e-06, "loss": 0.5535, "step": 3022 }, { "epoch": 0.1387269973842412, "grad_norm": 0.4514589011669159, "learning_rate": 9.982328624449347e-06, "loss": 0.3429, "step": 3023 }, { "epoch": 0.13877288788949566, "grad_norm": 0.48825907707214355, "learning_rate": 9.982308022588413e-06, "loss": 0.4228, "step": 3024 }, { "epoch": 0.13881877839475013, "grad_norm": 0.47944125533103943, "learning_rate": 9.982287408746601e-06, "loss": 0.4445, "step": 3025 }, { "epoch": 0.1388646689000046, "grad_norm": 0.4505082964897156, "learning_rate": 9.98226678292396e-06, "loss": 0.3558, "step": 3026 }, { "epoch": 0.13891055940525907, "grad_norm": 0.5942498445510864, "learning_rate": 9.98224614512054e-06, "loss": 0.4874, "step": 3027 }, { "epoch": 0.1389564499105135, "grad_norm": 0.496677041053772, "learning_rate": 9.982225495336391e-06, "loss": 0.4097, "step": 3028 }, { "epoch": 0.13900234041576798, "grad_norm": 0.455191969871521, "learning_rate": 9.982204833571563e-06, "loss": 0.3526, "step": 3029 }, { "epoch": 0.13904823092102245, "grad_norm": 0.4483187794685364, "learning_rate": 9.982184159826103e-06, "loss": 0.3606, "step": 3030 }, { "epoch": 0.1390941214262769, "grad_norm": 0.5057973861694336, "learning_rate": 9.982163474100061e-06, "loss": 0.4309, "step": 3031 }, { "epoch": 0.13914001193153136, "grad_norm": 0.46668848395347595, "learning_rate": 9.982142776393492e-06, "loss": 0.3847, "step": 3032 }, { "epoch": 0.13918590243678583, "grad_norm": 0.5688084363937378, "learning_rate": 9.982122066706439e-06, "loss": 0.5048, "step": 3033 }, { "epoch": 0.1392317929420403, "grad_norm": 0.46805140376091003, "learning_rate": 9.982101345038957e-06, "loss": 0.3865, "step": 3034 }, { "epoch": 0.13927768344729474, "grad_norm": 0.470278263092041, "learning_rate": 9.982080611391091e-06, "loss": 0.4083, "step": 3035 }, { "epoch": 0.1393235739525492, "grad_norm": 0.47664496302604675, "learning_rate": 9.982059865762897e-06, "loss": 0.4615, "step": 3036 }, { "epoch": 0.13936946445780368, "grad_norm": 0.4638785421848297, "learning_rate": 9.98203910815442e-06, "loss": 0.3479, "step": 3037 }, { "epoch": 0.13941535496305815, "grad_norm": 0.5027391910552979, "learning_rate": 9.98201833856571e-06, "loss": 0.46, "step": 3038 }, { "epoch": 0.1394612454683126, "grad_norm": 0.4439915120601654, "learning_rate": 9.981997556996818e-06, "loss": 0.3558, "step": 3039 }, { "epoch": 0.13950713597356706, "grad_norm": 0.48414838314056396, "learning_rate": 9.981976763447797e-06, "loss": 0.4048, "step": 3040 }, { "epoch": 0.13955302647882153, "grad_norm": 0.4496571719646454, "learning_rate": 9.981955957918692e-06, "loss": 0.3261, "step": 3041 }, { "epoch": 0.139598916984076, "grad_norm": 0.45858386158943176, "learning_rate": 9.981935140409556e-06, "loss": 0.3721, "step": 3042 }, { "epoch": 0.13964480748933045, "grad_norm": 0.5063408613204956, "learning_rate": 9.981914310920438e-06, "loss": 0.3553, "step": 3043 }, { "epoch": 0.13969069799458492, "grad_norm": 0.5631232857704163, "learning_rate": 9.98189346945139e-06, "loss": 0.4224, "step": 3044 }, { "epoch": 0.1397365884998394, "grad_norm": 0.4210375249385834, "learning_rate": 9.98187261600246e-06, "loss": 0.299, "step": 3045 }, { "epoch": 0.13978247900509386, "grad_norm": 0.46979600191116333, "learning_rate": 9.981851750573697e-06, "loss": 0.4476, "step": 3046 }, { "epoch": 0.1398283695103483, "grad_norm": 0.48486843705177307, "learning_rate": 9.981830873165154e-06, "loss": 0.3981, "step": 3047 }, { "epoch": 0.13987426001560277, "grad_norm": 0.45954447984695435, "learning_rate": 9.98180998377688e-06, "loss": 0.3512, "step": 3048 }, { "epoch": 0.13992015052085724, "grad_norm": 0.4965614378452301, "learning_rate": 9.981789082408925e-06, "loss": 0.4775, "step": 3049 }, { "epoch": 0.1399660410261117, "grad_norm": 0.5440787672996521, "learning_rate": 9.98176816906134e-06, "loss": 0.5307, "step": 3050 }, { "epoch": 0.14001193153136615, "grad_norm": 0.48101547360420227, "learning_rate": 9.981747243734174e-06, "loss": 0.3847, "step": 3051 }, { "epoch": 0.14005782203662062, "grad_norm": 0.4872550070285797, "learning_rate": 9.981726306427478e-06, "loss": 0.4246, "step": 3052 }, { "epoch": 0.1401037125418751, "grad_norm": 0.49027836322784424, "learning_rate": 9.981705357141303e-06, "loss": 0.404, "step": 3053 }, { "epoch": 0.14014960304712956, "grad_norm": 0.4657130837440491, "learning_rate": 9.981684395875699e-06, "loss": 0.4031, "step": 3054 }, { "epoch": 0.140195493552384, "grad_norm": 0.4884653091430664, "learning_rate": 9.981663422630718e-06, "loss": 0.4748, "step": 3055 }, { "epoch": 0.14024138405763847, "grad_norm": 0.512407660484314, "learning_rate": 9.981642437406407e-06, "loss": 0.5276, "step": 3056 }, { "epoch": 0.14028727456289294, "grad_norm": 0.4834372103214264, "learning_rate": 9.981621440202818e-06, "loss": 0.4412, "step": 3057 }, { "epoch": 0.14033316506814741, "grad_norm": 0.4676523506641388, "learning_rate": 9.981600431020003e-06, "loss": 0.4183, "step": 3058 }, { "epoch": 0.14037905557340186, "grad_norm": 0.4383641481399536, "learning_rate": 9.981579409858008e-06, "loss": 0.3281, "step": 3059 }, { "epoch": 0.14042494607865633, "grad_norm": 0.5031691193580627, "learning_rate": 9.98155837671689e-06, "loss": 0.4985, "step": 3060 }, { "epoch": 0.1404708365839108, "grad_norm": 0.4581845700740814, "learning_rate": 9.981537331596696e-06, "loss": 0.3907, "step": 3061 }, { "epoch": 0.14051672708916524, "grad_norm": 0.5085744857788086, "learning_rate": 9.981516274497476e-06, "loss": 0.4928, "step": 3062 }, { "epoch": 0.1405626175944197, "grad_norm": 0.48630714416503906, "learning_rate": 9.981495205419282e-06, "loss": 0.4323, "step": 3063 }, { "epoch": 0.14060850809967418, "grad_norm": 0.4920274019241333, "learning_rate": 9.981474124362163e-06, "loss": 0.4801, "step": 3064 }, { "epoch": 0.14065439860492865, "grad_norm": 0.4722157120704651, "learning_rate": 9.98145303132617e-06, "loss": 0.3693, "step": 3065 }, { "epoch": 0.1407002891101831, "grad_norm": 0.5346464514732361, "learning_rate": 9.981431926311359e-06, "loss": 0.5228, "step": 3066 }, { "epoch": 0.14074617961543756, "grad_norm": 0.46221181750297546, "learning_rate": 9.981410809317773e-06, "loss": 0.3349, "step": 3067 }, { "epoch": 0.14079207012069203, "grad_norm": 0.4616551995277405, "learning_rate": 9.981389680345468e-06, "loss": 0.3898, "step": 3068 }, { "epoch": 0.1408379606259465, "grad_norm": 0.4333873987197876, "learning_rate": 9.981368539394492e-06, "loss": 0.3396, "step": 3069 }, { "epoch": 0.14088385113120094, "grad_norm": 0.45387235283851624, "learning_rate": 9.981347386464897e-06, "loss": 0.3525, "step": 3070 }, { "epoch": 0.1409297416364554, "grad_norm": 0.4792275130748749, "learning_rate": 9.981326221556734e-06, "loss": 0.4041, "step": 3071 }, { "epoch": 0.14097563214170988, "grad_norm": 0.4632616639137268, "learning_rate": 9.981305044670054e-06, "loss": 0.3335, "step": 3072 }, { "epoch": 0.14102152264696435, "grad_norm": 0.5189899206161499, "learning_rate": 9.981283855804906e-06, "loss": 0.4768, "step": 3073 }, { "epoch": 0.1410674131522188, "grad_norm": 0.47905048727989197, "learning_rate": 9.981262654961343e-06, "loss": 0.4456, "step": 3074 }, { "epoch": 0.14111330365747327, "grad_norm": 0.4708541929721832, "learning_rate": 9.981241442139415e-06, "loss": 0.4004, "step": 3075 }, { "epoch": 0.14115919416272774, "grad_norm": 0.4787214398384094, "learning_rate": 9.981220217339176e-06, "loss": 0.426, "step": 3076 }, { "epoch": 0.1412050846679822, "grad_norm": 0.4904378652572632, "learning_rate": 9.981198980560671e-06, "loss": 0.375, "step": 3077 }, { "epoch": 0.14125097517323665, "grad_norm": 0.5384431481361389, "learning_rate": 9.981177731803956e-06, "loss": 0.4761, "step": 3078 }, { "epoch": 0.14129686567849112, "grad_norm": 0.47541213035583496, "learning_rate": 9.981156471069083e-06, "loss": 0.4076, "step": 3079 }, { "epoch": 0.1413427561837456, "grad_norm": 0.6860296726226807, "learning_rate": 9.981135198356098e-06, "loss": 0.466, "step": 3080 }, { "epoch": 0.14138864668900006, "grad_norm": 0.4394347369670868, "learning_rate": 9.981113913665056e-06, "loss": 0.297, "step": 3081 }, { "epoch": 0.1414345371942545, "grad_norm": 0.5016234517097473, "learning_rate": 9.981092616996007e-06, "loss": 0.4169, "step": 3082 }, { "epoch": 0.14148042769950897, "grad_norm": 0.4984942078590393, "learning_rate": 9.981071308349001e-06, "loss": 0.4473, "step": 3083 }, { "epoch": 0.14152631820476344, "grad_norm": 0.5313014984130859, "learning_rate": 9.981049987724092e-06, "loss": 0.524, "step": 3084 }, { "epoch": 0.1415722087100179, "grad_norm": 0.4959547221660614, "learning_rate": 9.981028655121329e-06, "loss": 0.3705, "step": 3085 }, { "epoch": 0.14161809921527235, "grad_norm": 0.49455925822257996, "learning_rate": 9.981007310540765e-06, "loss": 0.3202, "step": 3086 }, { "epoch": 0.14166398972052682, "grad_norm": 0.4558359682559967, "learning_rate": 9.98098595398245e-06, "loss": 0.3861, "step": 3087 }, { "epoch": 0.1417098802257813, "grad_norm": 0.5051929950714111, "learning_rate": 9.980964585446435e-06, "loss": 0.4286, "step": 3088 }, { "epoch": 0.14175577073103574, "grad_norm": 0.4991109371185303, "learning_rate": 9.980943204932773e-06, "loss": 0.4307, "step": 3089 }, { "epoch": 0.1418016612362902, "grad_norm": 0.5207083821296692, "learning_rate": 9.980921812441515e-06, "loss": 0.4935, "step": 3090 }, { "epoch": 0.14184755174154468, "grad_norm": 0.4758206307888031, "learning_rate": 9.980900407972714e-06, "loss": 0.4257, "step": 3091 }, { "epoch": 0.14189344224679915, "grad_norm": 0.47634357213974, "learning_rate": 9.980878991526417e-06, "loss": 0.4223, "step": 3092 }, { "epoch": 0.1419393327520536, "grad_norm": 0.6730012893676758, "learning_rate": 9.980857563102677e-06, "loss": 0.5096, "step": 3093 }, { "epoch": 0.14198522325730806, "grad_norm": 0.45978879928588867, "learning_rate": 9.98083612270155e-06, "loss": 0.3773, "step": 3094 }, { "epoch": 0.14203111376256253, "grad_norm": 0.5018622875213623, "learning_rate": 9.98081467032308e-06, "loss": 0.4277, "step": 3095 }, { "epoch": 0.142077004267817, "grad_norm": 0.4870125651359558, "learning_rate": 9.980793205967325e-06, "loss": 0.3581, "step": 3096 }, { "epoch": 0.14212289477307144, "grad_norm": 0.4462220370769501, "learning_rate": 9.980771729634335e-06, "loss": 0.3673, "step": 3097 }, { "epoch": 0.1421687852783259, "grad_norm": 0.4655051529407501, "learning_rate": 9.98075024132416e-06, "loss": 0.3897, "step": 3098 }, { "epoch": 0.14221467578358038, "grad_norm": 0.542564868927002, "learning_rate": 9.980728741036853e-06, "loss": 0.4938, "step": 3099 }, { "epoch": 0.14226056628883485, "grad_norm": 0.4483783543109894, "learning_rate": 9.980707228772464e-06, "loss": 0.3459, "step": 3100 }, { "epoch": 0.1423064567940893, "grad_norm": 0.4641178250312805, "learning_rate": 9.980685704531048e-06, "loss": 0.3285, "step": 3101 }, { "epoch": 0.14235234729934376, "grad_norm": 0.5129979848861694, "learning_rate": 9.980664168312654e-06, "loss": 0.4006, "step": 3102 }, { "epoch": 0.14239823780459823, "grad_norm": 0.4599338471889496, "learning_rate": 9.980642620117334e-06, "loss": 0.3767, "step": 3103 }, { "epoch": 0.1424441283098527, "grad_norm": 0.49075594544410706, "learning_rate": 9.980621059945141e-06, "loss": 0.457, "step": 3104 }, { "epoch": 0.14249001881510714, "grad_norm": 0.48812124133110046, "learning_rate": 9.980599487796126e-06, "loss": 0.4538, "step": 3105 }, { "epoch": 0.14253590932036161, "grad_norm": 0.551496684551239, "learning_rate": 9.980577903670342e-06, "loss": 0.5404, "step": 3106 }, { "epoch": 0.14258179982561608, "grad_norm": 0.46660685539245605, "learning_rate": 9.98055630756784e-06, "loss": 0.3679, "step": 3107 }, { "epoch": 0.14262769033087055, "grad_norm": 0.46349766850471497, "learning_rate": 9.980534699488671e-06, "loss": 0.425, "step": 3108 }, { "epoch": 0.142673580836125, "grad_norm": 0.49127018451690674, "learning_rate": 9.980513079432889e-06, "loss": 0.4616, "step": 3109 }, { "epoch": 0.14271947134137947, "grad_norm": 0.5036889314651489, "learning_rate": 9.980491447400544e-06, "loss": 0.4524, "step": 3110 }, { "epoch": 0.14276536184663394, "grad_norm": 0.49507656693458557, "learning_rate": 9.98046980339169e-06, "loss": 0.4538, "step": 3111 }, { "epoch": 0.1428112523518884, "grad_norm": 0.4613012969493866, "learning_rate": 9.980448147406376e-06, "loss": 0.4542, "step": 3112 }, { "epoch": 0.14285714285714285, "grad_norm": 0.4675821363925934, "learning_rate": 9.980426479444658e-06, "loss": 0.4006, "step": 3113 }, { "epoch": 0.14290303336239732, "grad_norm": 0.43902119994163513, "learning_rate": 9.980404799506585e-06, "loss": 0.3266, "step": 3114 }, { "epoch": 0.1429489238676518, "grad_norm": 0.46223410964012146, "learning_rate": 9.98038310759221e-06, "loss": 0.3844, "step": 3115 }, { "epoch": 0.14299481437290626, "grad_norm": 0.546207070350647, "learning_rate": 9.980361403701588e-06, "loss": 0.4387, "step": 3116 }, { "epoch": 0.1430407048781607, "grad_norm": 0.4951692819595337, "learning_rate": 9.980339687834767e-06, "loss": 0.4257, "step": 3117 }, { "epoch": 0.14308659538341517, "grad_norm": 0.4896540343761444, "learning_rate": 9.9803179599918e-06, "loss": 0.4065, "step": 3118 }, { "epoch": 0.14313248588866964, "grad_norm": 0.45201075077056885, "learning_rate": 9.980296220172742e-06, "loss": 0.3398, "step": 3119 }, { "epoch": 0.14317837639392408, "grad_norm": 0.4665740728378296, "learning_rate": 9.980274468377641e-06, "loss": 0.3576, "step": 3120 }, { "epoch": 0.14322426689917855, "grad_norm": 0.48130103945732117, "learning_rate": 9.980252704606555e-06, "loss": 0.4094, "step": 3121 }, { "epoch": 0.14327015740443302, "grad_norm": 0.44395318627357483, "learning_rate": 9.980230928859532e-06, "loss": 0.3734, "step": 3122 }, { "epoch": 0.1433160479096875, "grad_norm": 0.4439608156681061, "learning_rate": 9.980209141136624e-06, "loss": 0.3335, "step": 3123 }, { "epoch": 0.14336193841494194, "grad_norm": 0.48598894476890564, "learning_rate": 9.980187341437886e-06, "loss": 0.4086, "step": 3124 }, { "epoch": 0.1434078289201964, "grad_norm": 0.4441927373409271, "learning_rate": 9.980165529763368e-06, "loss": 0.3999, "step": 3125 }, { "epoch": 0.14345371942545088, "grad_norm": 0.44016677141189575, "learning_rate": 9.980143706113125e-06, "loss": 0.3391, "step": 3126 }, { "epoch": 0.14349960993070535, "grad_norm": 0.44359922409057617, "learning_rate": 9.980121870487207e-06, "loss": 0.3427, "step": 3127 }, { "epoch": 0.1435455004359598, "grad_norm": 0.4892541170120239, "learning_rate": 9.980100022885668e-06, "loss": 0.4538, "step": 3128 }, { "epoch": 0.14359139094121426, "grad_norm": 0.5039497017860413, "learning_rate": 9.980078163308561e-06, "loss": 0.4661, "step": 3129 }, { "epoch": 0.14363728144646873, "grad_norm": 0.4830937385559082, "learning_rate": 9.980056291755937e-06, "loss": 0.4725, "step": 3130 }, { "epoch": 0.1436831719517232, "grad_norm": 0.43063437938690186, "learning_rate": 9.98003440822785e-06, "loss": 0.3218, "step": 3131 }, { "epoch": 0.14372906245697764, "grad_norm": 0.4961840808391571, "learning_rate": 9.980012512724353e-06, "loss": 0.4388, "step": 3132 }, { "epoch": 0.1437749529622321, "grad_norm": 0.43553978204727173, "learning_rate": 9.979990605245495e-06, "loss": 0.3254, "step": 3133 }, { "epoch": 0.14382084346748658, "grad_norm": 0.4516310691833496, "learning_rate": 9.979968685791332e-06, "loss": 0.3654, "step": 3134 }, { "epoch": 0.14386673397274105, "grad_norm": 0.4878760874271393, "learning_rate": 9.979946754361918e-06, "loss": 0.4295, "step": 3135 }, { "epoch": 0.1439126244779955, "grad_norm": 0.4790392816066742, "learning_rate": 9.979924810957302e-06, "loss": 0.4528, "step": 3136 }, { "epoch": 0.14395851498324996, "grad_norm": 0.5120466947555542, "learning_rate": 9.979902855577539e-06, "loss": 0.4706, "step": 3137 }, { "epoch": 0.14400440548850443, "grad_norm": 0.4738479256629944, "learning_rate": 9.979880888222682e-06, "loss": 0.3859, "step": 3138 }, { "epoch": 0.1440502959937589, "grad_norm": 0.4464550018310547, "learning_rate": 9.979858908892782e-06, "loss": 0.3758, "step": 3139 }, { "epoch": 0.14409618649901335, "grad_norm": 0.5037957429885864, "learning_rate": 9.979836917587892e-06, "loss": 0.4588, "step": 3140 }, { "epoch": 0.14414207700426782, "grad_norm": 0.4527125954627991, "learning_rate": 9.979814914308069e-06, "loss": 0.3471, "step": 3141 }, { "epoch": 0.14418796750952229, "grad_norm": 0.44850632548332214, "learning_rate": 9.979792899053361e-06, "loss": 0.3834, "step": 3142 }, { "epoch": 0.14423385801477676, "grad_norm": 0.4473056495189667, "learning_rate": 9.979770871823823e-06, "loss": 0.3564, "step": 3143 }, { "epoch": 0.1442797485200312, "grad_norm": 0.4869324564933777, "learning_rate": 9.979748832619508e-06, "loss": 0.4307, "step": 3144 }, { "epoch": 0.14432563902528567, "grad_norm": 0.4606243371963501, "learning_rate": 9.979726781440469e-06, "loss": 0.4164, "step": 3145 }, { "epoch": 0.14437152953054014, "grad_norm": 0.503044605255127, "learning_rate": 9.979704718286756e-06, "loss": 0.4867, "step": 3146 }, { "epoch": 0.14441742003579458, "grad_norm": 0.5367587804794312, "learning_rate": 9.979682643158428e-06, "loss": 0.5154, "step": 3147 }, { "epoch": 0.14446331054104905, "grad_norm": 0.48371759057044983, "learning_rate": 9.979660556055533e-06, "loss": 0.4645, "step": 3148 }, { "epoch": 0.14450920104630352, "grad_norm": 0.4573518633842468, "learning_rate": 9.979638456978126e-06, "loss": 0.4268, "step": 3149 }, { "epoch": 0.144555091551558, "grad_norm": 0.514528751373291, "learning_rate": 9.979616345926262e-06, "loss": 0.4475, "step": 3150 }, { "epoch": 0.14460098205681243, "grad_norm": 0.5610572695732117, "learning_rate": 9.979594222899992e-06, "loss": 0.4988, "step": 3151 }, { "epoch": 0.1446468725620669, "grad_norm": 0.4925415515899658, "learning_rate": 9.979572087899369e-06, "loss": 0.3891, "step": 3152 }, { "epoch": 0.14469276306732137, "grad_norm": 0.4756164848804474, "learning_rate": 9.979549940924446e-06, "loss": 0.369, "step": 3153 }, { "epoch": 0.14473865357257584, "grad_norm": 0.47581610083580017, "learning_rate": 9.979527781975278e-06, "loss": 0.4067, "step": 3154 }, { "epoch": 0.14478454407783029, "grad_norm": 0.49081236124038696, "learning_rate": 9.979505611051916e-06, "loss": 0.3976, "step": 3155 }, { "epoch": 0.14483043458308476, "grad_norm": 0.5139479041099548, "learning_rate": 9.979483428154414e-06, "loss": 0.5148, "step": 3156 }, { "epoch": 0.14487632508833923, "grad_norm": 0.47468358278274536, "learning_rate": 9.979461233282828e-06, "loss": 0.3882, "step": 3157 }, { "epoch": 0.1449222155935937, "grad_norm": 0.4881764352321625, "learning_rate": 9.97943902643721e-06, "loss": 0.4197, "step": 3158 }, { "epoch": 0.14496810609884814, "grad_norm": 0.5144550204277039, "learning_rate": 9.979416807617611e-06, "loss": 0.4181, "step": 3159 }, { "epoch": 0.1450139966041026, "grad_norm": 0.42927655577659607, "learning_rate": 9.979394576824085e-06, "loss": 0.3798, "step": 3160 }, { "epoch": 0.14505988710935708, "grad_norm": 0.4579162895679474, "learning_rate": 9.979372334056687e-06, "loss": 0.3593, "step": 3161 }, { "epoch": 0.14510577761461155, "grad_norm": 0.4813615083694458, "learning_rate": 9.979350079315472e-06, "loss": 0.3868, "step": 3162 }, { "epoch": 0.145151668119866, "grad_norm": 0.5369012355804443, "learning_rate": 9.97932781260049e-06, "loss": 0.4931, "step": 3163 }, { "epoch": 0.14519755862512046, "grad_norm": 0.47141051292419434, "learning_rate": 9.979305533911797e-06, "loss": 0.4274, "step": 3164 }, { "epoch": 0.14524344913037493, "grad_norm": 0.48149245977401733, "learning_rate": 9.979283243249446e-06, "loss": 0.3994, "step": 3165 }, { "epoch": 0.1452893396356294, "grad_norm": 0.5012102723121643, "learning_rate": 9.97926094061349e-06, "loss": 0.4275, "step": 3166 }, { "epoch": 0.14533523014088384, "grad_norm": 0.5880632400512695, "learning_rate": 9.979238626003982e-06, "loss": 0.4164, "step": 3167 }, { "epoch": 0.1453811206461383, "grad_norm": 0.4732213616371155, "learning_rate": 9.979216299420978e-06, "loss": 0.3799, "step": 3168 }, { "epoch": 0.14542701115139278, "grad_norm": 0.4795583486557007, "learning_rate": 9.979193960864528e-06, "loss": 0.4202, "step": 3169 }, { "epoch": 0.14547290165664725, "grad_norm": 0.49628451466560364, "learning_rate": 9.979171610334691e-06, "loss": 0.4166, "step": 3170 }, { "epoch": 0.1455187921619017, "grad_norm": 0.468717098236084, "learning_rate": 9.979149247831515e-06, "loss": 0.3935, "step": 3171 }, { "epoch": 0.14556468266715616, "grad_norm": 0.5007694959640503, "learning_rate": 9.979126873355059e-06, "loss": 0.463, "step": 3172 }, { "epoch": 0.14561057317241063, "grad_norm": 0.49003463983535767, "learning_rate": 9.979104486905372e-06, "loss": 0.4481, "step": 3173 }, { "epoch": 0.1456564636776651, "grad_norm": 0.4339561462402344, "learning_rate": 9.979082088482512e-06, "loss": 0.3073, "step": 3174 }, { "epoch": 0.14570235418291955, "grad_norm": 0.5013194680213928, "learning_rate": 9.979059678086529e-06, "loss": 0.4631, "step": 3175 }, { "epoch": 0.14574824468817402, "grad_norm": 0.4873732030391693, "learning_rate": 9.979037255717482e-06, "loss": 0.4555, "step": 3176 }, { "epoch": 0.1457941351934285, "grad_norm": 0.4566386342048645, "learning_rate": 9.979014821375419e-06, "loss": 0.3723, "step": 3177 }, { "epoch": 0.14584002569868293, "grad_norm": 0.4844808876514435, "learning_rate": 9.978992375060397e-06, "loss": 0.4172, "step": 3178 }, { "epoch": 0.1458859162039374, "grad_norm": 0.49305760860443115, "learning_rate": 9.978969916772472e-06, "loss": 0.4111, "step": 3179 }, { "epoch": 0.14593180670919187, "grad_norm": 0.42815062403678894, "learning_rate": 9.978947446511692e-06, "loss": 0.3426, "step": 3180 }, { "epoch": 0.14597769721444634, "grad_norm": 0.5468980669975281, "learning_rate": 9.978924964278119e-06, "loss": 0.4926, "step": 3181 }, { "epoch": 0.14602358771970078, "grad_norm": 0.44816407561302185, "learning_rate": 9.9789024700718e-06, "loss": 0.3472, "step": 3182 }, { "epoch": 0.14606947822495525, "grad_norm": 0.49575862288475037, "learning_rate": 9.978879963892794e-06, "loss": 0.4554, "step": 3183 }, { "epoch": 0.14611536873020972, "grad_norm": 0.4600825011730194, "learning_rate": 9.978857445741152e-06, "loss": 0.3694, "step": 3184 }, { "epoch": 0.1461612592354642, "grad_norm": 0.5010223984718323, "learning_rate": 9.978834915616929e-06, "loss": 0.4739, "step": 3185 }, { "epoch": 0.14620714974071863, "grad_norm": 0.5065903067588806, "learning_rate": 9.978812373520179e-06, "loss": 0.4736, "step": 3186 }, { "epoch": 0.1462530402459731, "grad_norm": 0.47557079792022705, "learning_rate": 9.978789819450957e-06, "loss": 0.4275, "step": 3187 }, { "epoch": 0.14629893075122757, "grad_norm": 0.5065678954124451, "learning_rate": 9.978767253409318e-06, "loss": 0.4784, "step": 3188 }, { "epoch": 0.14634482125648204, "grad_norm": 0.5704526305198669, "learning_rate": 9.978744675395314e-06, "loss": 0.4233, "step": 3189 }, { "epoch": 0.1463907117617365, "grad_norm": 0.5078095197677612, "learning_rate": 9.978722085409e-06, "loss": 0.3907, "step": 3190 }, { "epoch": 0.14643660226699096, "grad_norm": 0.48235201835632324, "learning_rate": 9.978699483450432e-06, "loss": 0.3985, "step": 3191 }, { "epoch": 0.14648249277224543, "grad_norm": 0.49654367566108704, "learning_rate": 9.978676869519662e-06, "loss": 0.4956, "step": 3192 }, { "epoch": 0.1465283832774999, "grad_norm": 0.48536545038223267, "learning_rate": 9.978654243616746e-06, "loss": 0.4643, "step": 3193 }, { "epoch": 0.14657427378275434, "grad_norm": 0.5111007690429688, "learning_rate": 9.978631605741737e-06, "loss": 0.5157, "step": 3194 }, { "epoch": 0.1466201642880088, "grad_norm": 0.445444792509079, "learning_rate": 9.97860895589469e-06, "loss": 0.314, "step": 3195 }, { "epoch": 0.14666605479326328, "grad_norm": 0.47209522128105164, "learning_rate": 9.978586294075661e-06, "loss": 0.4301, "step": 3196 }, { "epoch": 0.14671194529851775, "grad_norm": 0.521162211894989, "learning_rate": 9.978563620284704e-06, "loss": 0.4361, "step": 3197 }, { "epoch": 0.1467578358037722, "grad_norm": 0.4355694651603699, "learning_rate": 9.978540934521871e-06, "loss": 0.3996, "step": 3198 }, { "epoch": 0.14680372630902666, "grad_norm": 0.47809621691703796, "learning_rate": 9.978518236787219e-06, "loss": 0.4249, "step": 3199 }, { "epoch": 0.14684961681428113, "grad_norm": 0.4769158959388733, "learning_rate": 9.978495527080803e-06, "loss": 0.4532, "step": 3200 }, { "epoch": 0.1468955073195356, "grad_norm": 0.47216641902923584, "learning_rate": 9.978472805402675e-06, "loss": 0.4076, "step": 3201 }, { "epoch": 0.14694139782479004, "grad_norm": 0.4564381241798401, "learning_rate": 9.97845007175289e-06, "loss": 0.41, "step": 3202 }, { "epoch": 0.1469872883300445, "grad_norm": 0.5419045090675354, "learning_rate": 9.978427326131506e-06, "loss": 0.4873, "step": 3203 }, { "epoch": 0.14703317883529898, "grad_norm": 0.5343883037567139, "learning_rate": 9.978404568538575e-06, "loss": 0.424, "step": 3204 }, { "epoch": 0.14707906934055345, "grad_norm": 0.4494619369506836, "learning_rate": 9.978381798974153e-06, "loss": 0.3195, "step": 3205 }, { "epoch": 0.1471249598458079, "grad_norm": 0.5076942443847656, "learning_rate": 9.978359017438293e-06, "loss": 0.4641, "step": 3206 }, { "epoch": 0.14717085035106237, "grad_norm": 0.4370076060295105, "learning_rate": 9.978336223931051e-06, "loss": 0.3307, "step": 3207 }, { "epoch": 0.14721674085631684, "grad_norm": 0.5028615593910217, "learning_rate": 9.978313418452482e-06, "loss": 0.4516, "step": 3208 }, { "epoch": 0.14726263136157128, "grad_norm": 0.4506158232688904, "learning_rate": 9.978290601002638e-06, "loss": 0.3574, "step": 3209 }, { "epoch": 0.14730852186682575, "grad_norm": 0.44715920090675354, "learning_rate": 9.978267771581579e-06, "loss": 0.335, "step": 3210 }, { "epoch": 0.14735441237208022, "grad_norm": 0.4608115553855896, "learning_rate": 9.978244930189356e-06, "loss": 0.339, "step": 3211 }, { "epoch": 0.1474003028773347, "grad_norm": 0.5308574438095093, "learning_rate": 9.978222076826026e-06, "loss": 0.4133, "step": 3212 }, { "epoch": 0.14744619338258913, "grad_norm": 0.4709831476211548, "learning_rate": 9.978199211491643e-06, "loss": 0.4215, "step": 3213 }, { "epoch": 0.1474920838878436, "grad_norm": 0.4851463735103607, "learning_rate": 9.978176334186261e-06, "loss": 0.4392, "step": 3214 }, { "epoch": 0.14753797439309807, "grad_norm": 0.43764227628707886, "learning_rate": 9.978153444909938e-06, "loss": 0.3467, "step": 3215 }, { "epoch": 0.14758386489835254, "grad_norm": 0.4525946378707886, "learning_rate": 9.978130543662726e-06, "loss": 0.4284, "step": 3216 }, { "epoch": 0.14762975540360698, "grad_norm": 0.4712614417076111, "learning_rate": 9.97810763044468e-06, "loss": 0.4149, "step": 3217 }, { "epoch": 0.14767564590886145, "grad_norm": 0.46015509963035583, "learning_rate": 9.978084705255857e-06, "loss": 0.3853, "step": 3218 }, { "epoch": 0.14772153641411592, "grad_norm": 0.48580053448677063, "learning_rate": 9.978061768096312e-06, "loss": 0.3521, "step": 3219 }, { "epoch": 0.1477674269193704, "grad_norm": 0.49456503987312317, "learning_rate": 9.9780388189661e-06, "loss": 0.43, "step": 3220 }, { "epoch": 0.14781331742462483, "grad_norm": 0.485686719417572, "learning_rate": 9.978015857865275e-06, "loss": 0.4268, "step": 3221 }, { "epoch": 0.1478592079298793, "grad_norm": 0.4638362228870392, "learning_rate": 9.977992884793893e-06, "loss": 0.4398, "step": 3222 }, { "epoch": 0.14790509843513377, "grad_norm": 0.46629539132118225, "learning_rate": 9.977969899752009e-06, "loss": 0.4198, "step": 3223 }, { "epoch": 0.14795098894038824, "grad_norm": 0.5020266771316528, "learning_rate": 9.977946902739679e-06, "loss": 0.4318, "step": 3224 }, { "epoch": 0.1479968794456427, "grad_norm": 0.48321160674095154, "learning_rate": 9.977923893756958e-06, "loss": 0.4115, "step": 3225 }, { "epoch": 0.14804276995089716, "grad_norm": 0.5496830940246582, "learning_rate": 9.9779008728039e-06, "loss": 0.4595, "step": 3226 }, { "epoch": 0.14808866045615163, "grad_norm": 0.5111088156700134, "learning_rate": 9.977877839880562e-06, "loss": 0.4378, "step": 3227 }, { "epoch": 0.1481345509614061, "grad_norm": 0.5269816517829895, "learning_rate": 9.977854794986998e-06, "loss": 0.5112, "step": 3228 }, { "epoch": 0.14818044146666054, "grad_norm": 0.5188043713569641, "learning_rate": 9.977831738123266e-06, "loss": 0.4758, "step": 3229 }, { "epoch": 0.148226331971915, "grad_norm": 0.510813295841217, "learning_rate": 9.977808669289419e-06, "loss": 0.4725, "step": 3230 }, { "epoch": 0.14827222247716948, "grad_norm": 0.521381676197052, "learning_rate": 9.977785588485514e-06, "loss": 0.5073, "step": 3231 }, { "epoch": 0.14831811298242395, "grad_norm": 0.49391964077949524, "learning_rate": 9.977762495711605e-06, "loss": 0.4603, "step": 3232 }, { "epoch": 0.1483640034876784, "grad_norm": 0.47634458541870117, "learning_rate": 9.977739390967746e-06, "loss": 0.4183, "step": 3233 }, { "epoch": 0.14840989399293286, "grad_norm": 0.4440281093120575, "learning_rate": 9.977716274253998e-06, "loss": 0.3587, "step": 3234 }, { "epoch": 0.14845578449818733, "grad_norm": 0.43542689085006714, "learning_rate": 9.977693145570413e-06, "loss": 0.3481, "step": 3235 }, { "epoch": 0.14850167500344177, "grad_norm": 0.5680211186408997, "learning_rate": 9.977670004917046e-06, "loss": 0.3766, "step": 3236 }, { "epoch": 0.14854756550869624, "grad_norm": 0.6538944244384766, "learning_rate": 9.977646852293954e-06, "loss": 0.4768, "step": 3237 }, { "epoch": 0.14859345601395071, "grad_norm": 0.4630160331726074, "learning_rate": 9.977623687701192e-06, "loss": 0.4119, "step": 3238 }, { "epoch": 0.14863934651920518, "grad_norm": 0.45330503582954407, "learning_rate": 9.977600511138816e-06, "loss": 0.3413, "step": 3239 }, { "epoch": 0.14868523702445963, "grad_norm": 0.4710831642150879, "learning_rate": 9.977577322606882e-06, "loss": 0.3961, "step": 3240 }, { "epoch": 0.1487311275297141, "grad_norm": 0.48167937994003296, "learning_rate": 9.977554122105446e-06, "loss": 0.4179, "step": 3241 }, { "epoch": 0.14877701803496857, "grad_norm": 0.5230624079704285, "learning_rate": 9.977530909634561e-06, "loss": 0.4514, "step": 3242 }, { "epoch": 0.14882290854022304, "grad_norm": 0.47600480914115906, "learning_rate": 9.977507685194287e-06, "loss": 0.3741, "step": 3243 }, { "epoch": 0.14886879904547748, "grad_norm": 0.46319711208343506, "learning_rate": 9.97748444878468e-06, "loss": 0.3849, "step": 3244 }, { "epoch": 0.14891468955073195, "grad_norm": 0.46935534477233887, "learning_rate": 9.97746120040579e-06, "loss": 0.407, "step": 3245 }, { "epoch": 0.14896058005598642, "grad_norm": 0.47236064076423645, "learning_rate": 9.97743794005768e-06, "loss": 0.4032, "step": 3246 }, { "epoch": 0.1490064705612409, "grad_norm": 0.49963274598121643, "learning_rate": 9.9774146677404e-06, "loss": 0.4263, "step": 3247 }, { "epoch": 0.14905236106649533, "grad_norm": 0.515075147151947, "learning_rate": 9.97739138345401e-06, "loss": 0.4636, "step": 3248 }, { "epoch": 0.1490982515717498, "grad_norm": 0.4450444281101227, "learning_rate": 9.977368087198565e-06, "loss": 0.4166, "step": 3249 }, { "epoch": 0.14914414207700427, "grad_norm": 0.4645858108997345, "learning_rate": 9.977344778974118e-06, "loss": 0.397, "step": 3250 }, { "epoch": 0.14919003258225874, "grad_norm": 0.4821338653564453, "learning_rate": 9.977321458780731e-06, "loss": 0.3795, "step": 3251 }, { "epoch": 0.14923592308751318, "grad_norm": 0.47550302743911743, "learning_rate": 9.977298126618456e-06, "loss": 0.4161, "step": 3252 }, { "epoch": 0.14928181359276765, "grad_norm": 0.5013076663017273, "learning_rate": 9.977274782487348e-06, "loss": 0.4643, "step": 3253 }, { "epoch": 0.14932770409802212, "grad_norm": 0.42847785353660583, "learning_rate": 9.977251426387466e-06, "loss": 0.3158, "step": 3254 }, { "epoch": 0.1493735946032766, "grad_norm": 0.49158450961112976, "learning_rate": 9.977228058318867e-06, "loss": 0.4821, "step": 3255 }, { "epoch": 0.14941948510853104, "grad_norm": 0.5497332215309143, "learning_rate": 9.977204678281603e-06, "loss": 0.4187, "step": 3256 }, { "epoch": 0.1494653756137855, "grad_norm": 0.49648115038871765, "learning_rate": 9.977181286275735e-06, "loss": 0.3362, "step": 3257 }, { "epoch": 0.14951126611903998, "grad_norm": 0.5207563042640686, "learning_rate": 9.977157882301314e-06, "loss": 0.5135, "step": 3258 }, { "epoch": 0.14955715662429445, "grad_norm": 0.49305441975593567, "learning_rate": 9.9771344663584e-06, "loss": 0.3643, "step": 3259 }, { "epoch": 0.1496030471295489, "grad_norm": 0.45614635944366455, "learning_rate": 9.977111038447049e-06, "loss": 0.4039, "step": 3260 }, { "epoch": 0.14964893763480336, "grad_norm": 0.4625093340873718, "learning_rate": 9.977087598567317e-06, "loss": 0.3974, "step": 3261 }, { "epoch": 0.14969482814005783, "grad_norm": 0.43722766637802124, "learning_rate": 9.97706414671926e-06, "loss": 0.3446, "step": 3262 }, { "epoch": 0.1497407186453123, "grad_norm": 0.5063542127609253, "learning_rate": 9.977040682902935e-06, "loss": 0.4738, "step": 3263 }, { "epoch": 0.14978660915056674, "grad_norm": 0.47105082869529724, "learning_rate": 9.977017207118398e-06, "loss": 0.3969, "step": 3264 }, { "epoch": 0.1498324996558212, "grad_norm": 0.4708927571773529, "learning_rate": 9.976993719365704e-06, "loss": 0.3844, "step": 3265 }, { "epoch": 0.14987839016107568, "grad_norm": 0.48262327909469604, "learning_rate": 9.976970219644912e-06, "loss": 0.4316, "step": 3266 }, { "epoch": 0.14992428066633012, "grad_norm": 0.453774631023407, "learning_rate": 9.976946707956077e-06, "loss": 0.3611, "step": 3267 }, { "epoch": 0.1499701711715846, "grad_norm": 0.5171927809715271, "learning_rate": 9.976923184299257e-06, "loss": 0.4728, "step": 3268 }, { "epoch": 0.15001606167683906, "grad_norm": 0.41328203678131104, "learning_rate": 9.976899648674506e-06, "loss": 0.3226, "step": 3269 }, { "epoch": 0.15006195218209353, "grad_norm": 0.4921911954879761, "learning_rate": 9.976876101081883e-06, "loss": 0.4472, "step": 3270 }, { "epoch": 0.15010784268734798, "grad_norm": 0.47238001227378845, "learning_rate": 9.976852541521443e-06, "loss": 0.3968, "step": 3271 }, { "epoch": 0.15015373319260245, "grad_norm": 0.4779175817966461, "learning_rate": 9.976828969993243e-06, "loss": 0.441, "step": 3272 }, { "epoch": 0.15019962369785692, "grad_norm": 0.5207765698432922, "learning_rate": 9.976805386497342e-06, "loss": 0.4787, "step": 3273 }, { "epoch": 0.15024551420311139, "grad_norm": 0.4468748867511749, "learning_rate": 9.976781791033792e-06, "loss": 0.3815, "step": 3274 }, { "epoch": 0.15029140470836583, "grad_norm": 0.4599221646785736, "learning_rate": 9.976758183602656e-06, "loss": 0.4145, "step": 3275 }, { "epoch": 0.1503372952136203, "grad_norm": 0.5256581902503967, "learning_rate": 9.976734564203984e-06, "loss": 0.4799, "step": 3276 }, { "epoch": 0.15038318571887477, "grad_norm": 0.49408283829689026, "learning_rate": 9.976710932837838e-06, "loss": 0.4687, "step": 3277 }, { "epoch": 0.15042907622412924, "grad_norm": 0.5246826410293579, "learning_rate": 9.976687289504272e-06, "loss": 0.5185, "step": 3278 }, { "epoch": 0.15047496672938368, "grad_norm": 0.47444865107536316, "learning_rate": 9.976663634203344e-06, "loss": 0.3545, "step": 3279 }, { "epoch": 0.15052085723463815, "grad_norm": 0.4859449863433838, "learning_rate": 9.976639966935111e-06, "loss": 0.4716, "step": 3280 }, { "epoch": 0.15056674773989262, "grad_norm": 0.4734502136707306, "learning_rate": 9.976616287699629e-06, "loss": 0.382, "step": 3281 }, { "epoch": 0.1506126382451471, "grad_norm": 0.4478846490383148, "learning_rate": 9.976592596496956e-06, "loss": 0.3778, "step": 3282 }, { "epoch": 0.15065852875040153, "grad_norm": 0.5079098343849182, "learning_rate": 9.976568893327148e-06, "loss": 0.3747, "step": 3283 }, { "epoch": 0.150704419255656, "grad_norm": 0.46207308769226074, "learning_rate": 9.976545178190262e-06, "loss": 0.4172, "step": 3284 }, { "epoch": 0.15075030976091047, "grad_norm": 0.49411800503730774, "learning_rate": 9.976521451086357e-06, "loss": 0.4661, "step": 3285 }, { "epoch": 0.15079620026616494, "grad_norm": 0.4663957953453064, "learning_rate": 9.976497712015487e-06, "loss": 0.429, "step": 3286 }, { "epoch": 0.15084209077141938, "grad_norm": 0.5326253175735474, "learning_rate": 9.976473960977711e-06, "loss": 0.5189, "step": 3287 }, { "epoch": 0.15088798127667385, "grad_norm": 0.43387115001678467, "learning_rate": 9.976450197973087e-06, "loss": 0.3104, "step": 3288 }, { "epoch": 0.15093387178192832, "grad_norm": 0.4897501468658447, "learning_rate": 9.97642642300167e-06, "loss": 0.4416, "step": 3289 }, { "epoch": 0.1509797622871828, "grad_norm": 0.46450138092041016, "learning_rate": 9.976402636063519e-06, "loss": 0.3624, "step": 3290 }, { "epoch": 0.15102565279243724, "grad_norm": 0.4670571982860565, "learning_rate": 9.97637883715869e-06, "loss": 0.3467, "step": 3291 }, { "epoch": 0.1510715432976917, "grad_norm": 0.5098255276679993, "learning_rate": 9.976355026287237e-06, "loss": 0.4609, "step": 3292 }, { "epoch": 0.15111743380294618, "grad_norm": 0.4938158392906189, "learning_rate": 9.976331203449224e-06, "loss": 0.437, "step": 3293 }, { "epoch": 0.15116332430820062, "grad_norm": 0.5252211093902588, "learning_rate": 9.976307368644704e-06, "loss": 0.5031, "step": 3294 }, { "epoch": 0.1512092148134551, "grad_norm": 0.4526505470275879, "learning_rate": 9.976283521873738e-06, "loss": 0.3591, "step": 3295 }, { "epoch": 0.15125510531870956, "grad_norm": 0.4161567687988281, "learning_rate": 9.976259663136378e-06, "loss": 0.3301, "step": 3296 }, { "epoch": 0.15130099582396403, "grad_norm": 0.4712582528591156, "learning_rate": 9.976235792432685e-06, "loss": 0.42, "step": 3297 }, { "epoch": 0.15134688632921847, "grad_norm": 0.42962464690208435, "learning_rate": 9.976211909762714e-06, "loss": 0.3086, "step": 3298 }, { "epoch": 0.15139277683447294, "grad_norm": 0.43894869089126587, "learning_rate": 9.976188015126526e-06, "loss": 0.3597, "step": 3299 }, { "epoch": 0.1514386673397274, "grad_norm": 0.4999287724494934, "learning_rate": 9.976164108524175e-06, "loss": 0.4506, "step": 3300 }, { "epoch": 0.15148455784498188, "grad_norm": 0.48164981603622437, "learning_rate": 9.97614018995572e-06, "loss": 0.4152, "step": 3301 }, { "epoch": 0.15153044835023632, "grad_norm": 0.4818864166736603, "learning_rate": 9.976116259421218e-06, "loss": 0.398, "step": 3302 }, { "epoch": 0.1515763388554908, "grad_norm": 0.4519684314727783, "learning_rate": 9.976092316920729e-06, "loss": 0.3498, "step": 3303 }, { "epoch": 0.15162222936074526, "grad_norm": 0.48560863733291626, "learning_rate": 9.976068362454306e-06, "loss": 0.4594, "step": 3304 }, { "epoch": 0.15166811986599973, "grad_norm": 0.476107656955719, "learning_rate": 9.97604439602201e-06, "loss": 0.4122, "step": 3305 }, { "epoch": 0.15171401037125418, "grad_norm": 0.46209830045700073, "learning_rate": 9.976020417623898e-06, "loss": 0.3688, "step": 3306 }, { "epoch": 0.15175990087650865, "grad_norm": 0.4434915781021118, "learning_rate": 9.975996427260027e-06, "loss": 0.3113, "step": 3307 }, { "epoch": 0.15180579138176312, "grad_norm": 0.4564756453037262, "learning_rate": 9.975972424930455e-06, "loss": 0.3615, "step": 3308 }, { "epoch": 0.1518516818870176, "grad_norm": 0.46090400218963623, "learning_rate": 9.97594841063524e-06, "loss": 0.3829, "step": 3309 }, { "epoch": 0.15189757239227203, "grad_norm": 0.419474720954895, "learning_rate": 9.97592438437444e-06, "loss": 0.3067, "step": 3310 }, { "epoch": 0.1519434628975265, "grad_norm": 0.5025521516799927, "learning_rate": 9.97590034614811e-06, "loss": 0.5174, "step": 3311 }, { "epoch": 0.15198935340278097, "grad_norm": 0.4800359010696411, "learning_rate": 9.975876295956313e-06, "loss": 0.4037, "step": 3312 }, { "epoch": 0.15203524390803544, "grad_norm": 0.4942193031311035, "learning_rate": 9.975852233799104e-06, "loss": 0.4936, "step": 3313 }, { "epoch": 0.15208113441328988, "grad_norm": 0.4991069436073303, "learning_rate": 9.97582815967654e-06, "loss": 0.4341, "step": 3314 }, { "epoch": 0.15212702491854435, "grad_norm": 0.468336284160614, "learning_rate": 9.975804073588677e-06, "loss": 0.4085, "step": 3315 }, { "epoch": 0.15217291542379882, "grad_norm": 0.6517894864082336, "learning_rate": 9.975779975535578e-06, "loss": 0.4395, "step": 3316 }, { "epoch": 0.1522188059290533, "grad_norm": 0.47369495034217834, "learning_rate": 9.975755865517299e-06, "loss": 0.4105, "step": 3317 }, { "epoch": 0.15226469643430773, "grad_norm": 0.4783988893032074, "learning_rate": 9.975731743533898e-06, "loss": 0.4524, "step": 3318 }, { "epoch": 0.1523105869395622, "grad_norm": 0.5697835087776184, "learning_rate": 9.97570760958543e-06, "loss": 0.4298, "step": 3319 }, { "epoch": 0.15235647744481667, "grad_norm": 0.46827632188796997, "learning_rate": 9.97568346367196e-06, "loss": 0.3887, "step": 3320 }, { "epoch": 0.15240236795007114, "grad_norm": 0.4631456434726715, "learning_rate": 9.975659305793537e-06, "loss": 0.3773, "step": 3321 }, { "epoch": 0.15244825845532559, "grad_norm": 0.5249735116958618, "learning_rate": 9.975635135950225e-06, "loss": 0.5099, "step": 3322 }, { "epoch": 0.15249414896058006, "grad_norm": 0.4569174647331238, "learning_rate": 9.975610954142082e-06, "loss": 0.3858, "step": 3323 }, { "epoch": 0.15254003946583453, "grad_norm": 0.46604296565055847, "learning_rate": 9.975586760369164e-06, "loss": 0.429, "step": 3324 }, { "epoch": 0.15258592997108897, "grad_norm": 0.4511224925518036, "learning_rate": 9.97556255463153e-06, "loss": 0.3165, "step": 3325 }, { "epoch": 0.15263182047634344, "grad_norm": 0.47166356444358826, "learning_rate": 9.97553833692924e-06, "loss": 0.4136, "step": 3326 }, { "epoch": 0.1526777109815979, "grad_norm": 0.48687249422073364, "learning_rate": 9.97551410726235e-06, "loss": 0.4023, "step": 3327 }, { "epoch": 0.15272360148685238, "grad_norm": 0.5155811905860901, "learning_rate": 9.975489865630919e-06, "loss": 0.5007, "step": 3328 }, { "epoch": 0.15276949199210682, "grad_norm": 0.46205323934555054, "learning_rate": 9.975465612035003e-06, "loss": 0.4261, "step": 3329 }, { "epoch": 0.1528153824973613, "grad_norm": 0.44857993721961975, "learning_rate": 9.975441346474665e-06, "loss": 0.345, "step": 3330 }, { "epoch": 0.15286127300261576, "grad_norm": 0.4689047336578369, "learning_rate": 9.97541706894996e-06, "loss": 0.3974, "step": 3331 }, { "epoch": 0.15290716350787023, "grad_norm": 0.5168011784553528, "learning_rate": 9.975392779460947e-06, "loss": 0.5219, "step": 3332 }, { "epoch": 0.15295305401312467, "grad_norm": 0.5148208141326904, "learning_rate": 9.975368478007686e-06, "loss": 0.5021, "step": 3333 }, { "epoch": 0.15299894451837914, "grad_norm": 0.46601489186286926, "learning_rate": 9.975344164590233e-06, "loss": 0.3969, "step": 3334 }, { "epoch": 0.1530448350236336, "grad_norm": 0.5498623251914978, "learning_rate": 9.975319839208646e-06, "loss": 0.4889, "step": 3335 }, { "epoch": 0.15309072552888808, "grad_norm": 0.47389283776283264, "learning_rate": 9.975295501862987e-06, "loss": 0.3962, "step": 3336 }, { "epoch": 0.15313661603414253, "grad_norm": 0.42044904828071594, "learning_rate": 9.975271152553312e-06, "loss": 0.2977, "step": 3337 }, { "epoch": 0.153182506539397, "grad_norm": 0.4859681725502014, "learning_rate": 9.97524679127968e-06, "loss": 0.4673, "step": 3338 }, { "epoch": 0.15322839704465147, "grad_norm": 0.50887131690979, "learning_rate": 9.97522241804215e-06, "loss": 0.4669, "step": 3339 }, { "epoch": 0.15327428754990594, "grad_norm": 0.48404571413993835, "learning_rate": 9.97519803284078e-06, "loss": 0.4461, "step": 3340 }, { "epoch": 0.15332017805516038, "grad_norm": 0.4372429847717285, "learning_rate": 9.97517363567563e-06, "loss": 0.3436, "step": 3341 }, { "epoch": 0.15336606856041485, "grad_norm": 0.48688292503356934, "learning_rate": 9.975149226546756e-06, "loss": 0.4416, "step": 3342 }, { "epoch": 0.15341195906566932, "grad_norm": 0.4545394480228424, "learning_rate": 9.975124805454219e-06, "loss": 0.3806, "step": 3343 }, { "epoch": 0.1534578495709238, "grad_norm": 0.437548965215683, "learning_rate": 9.975100372398078e-06, "loss": 0.3484, "step": 3344 }, { "epoch": 0.15350374007617823, "grad_norm": 0.43340831995010376, "learning_rate": 9.97507592737839e-06, "loss": 0.2852, "step": 3345 }, { "epoch": 0.1535496305814327, "grad_norm": 0.49539417028427124, "learning_rate": 9.975051470395214e-06, "loss": 0.4105, "step": 3346 }, { "epoch": 0.15359552108668717, "grad_norm": 0.5326932072639465, "learning_rate": 9.97502700144861e-06, "loss": 0.5791, "step": 3347 }, { "epoch": 0.15364141159194164, "grad_norm": 0.5001088380813599, "learning_rate": 9.975002520538637e-06, "loss": 0.5075, "step": 3348 }, { "epoch": 0.15368730209719608, "grad_norm": 0.5102899074554443, "learning_rate": 9.974978027665351e-06, "loss": 0.4281, "step": 3349 }, { "epoch": 0.15373319260245055, "grad_norm": 0.4571594297885895, "learning_rate": 9.974953522828815e-06, "loss": 0.4102, "step": 3350 }, { "epoch": 0.15377908310770502, "grad_norm": 0.4282696545124054, "learning_rate": 9.974929006029087e-06, "loss": 0.3064, "step": 3351 }, { "epoch": 0.1538249736129595, "grad_norm": 0.5148342847824097, "learning_rate": 9.974904477266222e-06, "loss": 0.4558, "step": 3352 }, { "epoch": 0.15387086411821393, "grad_norm": 0.468266099691391, "learning_rate": 9.974879936540283e-06, "loss": 0.362, "step": 3353 }, { "epoch": 0.1539167546234684, "grad_norm": 0.4335395395755768, "learning_rate": 9.974855383851327e-06, "loss": 0.3431, "step": 3354 }, { "epoch": 0.15396264512872287, "grad_norm": 0.4697313606739044, "learning_rate": 9.974830819199414e-06, "loss": 0.4196, "step": 3355 }, { "epoch": 0.15400853563397732, "grad_norm": 0.4735826551914215, "learning_rate": 9.974806242584605e-06, "loss": 0.3677, "step": 3356 }, { "epoch": 0.1540544261392318, "grad_norm": 0.5247685313224792, "learning_rate": 9.974781654006955e-06, "loss": 0.5134, "step": 3357 }, { "epoch": 0.15410031664448626, "grad_norm": 0.5189568400382996, "learning_rate": 9.974757053466526e-06, "loss": 0.5135, "step": 3358 }, { "epoch": 0.15414620714974073, "grad_norm": 0.46164682507514954, "learning_rate": 9.974732440963377e-06, "loss": 0.377, "step": 3359 }, { "epoch": 0.15419209765499517, "grad_norm": 0.44032758474349976, "learning_rate": 9.974707816497566e-06, "loss": 0.3886, "step": 3360 }, { "epoch": 0.15423798816024964, "grad_norm": 0.49407023191452026, "learning_rate": 9.974683180069152e-06, "loss": 0.4566, "step": 3361 }, { "epoch": 0.1542838786655041, "grad_norm": 0.5214409828186035, "learning_rate": 9.974658531678196e-06, "loss": 0.529, "step": 3362 }, { "epoch": 0.15432976917075858, "grad_norm": 0.41360002756118774, "learning_rate": 9.974633871324754e-06, "loss": 0.31, "step": 3363 }, { "epoch": 0.15437565967601302, "grad_norm": 0.506309986114502, "learning_rate": 9.97460919900889e-06, "loss": 0.4304, "step": 3364 }, { "epoch": 0.1544215501812675, "grad_norm": 0.467813640832901, "learning_rate": 9.97458451473066e-06, "loss": 0.3867, "step": 3365 }, { "epoch": 0.15446744068652196, "grad_norm": 0.46149975061416626, "learning_rate": 9.974559818490126e-06, "loss": 0.346, "step": 3366 }, { "epoch": 0.15451333119177643, "grad_norm": 0.4611506760120392, "learning_rate": 9.974535110287344e-06, "loss": 0.3927, "step": 3367 }, { "epoch": 0.15455922169703087, "grad_norm": 0.45857664942741394, "learning_rate": 9.974510390122376e-06, "loss": 0.3892, "step": 3368 }, { "epoch": 0.15460511220228534, "grad_norm": 0.4210169017314911, "learning_rate": 9.97448565799528e-06, "loss": 0.3153, "step": 3369 }, { "epoch": 0.15465100270753981, "grad_norm": 0.4962354004383087, "learning_rate": 9.974460913906115e-06, "loss": 0.4937, "step": 3370 }, { "epoch": 0.15469689321279428, "grad_norm": 0.4946744441986084, "learning_rate": 9.974436157854943e-06, "loss": 0.4492, "step": 3371 }, { "epoch": 0.15474278371804873, "grad_norm": 0.5004401206970215, "learning_rate": 9.974411389841822e-06, "loss": 0.5147, "step": 3372 }, { "epoch": 0.1547886742233032, "grad_norm": 0.44354549050331116, "learning_rate": 9.974386609866811e-06, "loss": 0.3313, "step": 3373 }, { "epoch": 0.15483456472855767, "grad_norm": 0.4852031469345093, "learning_rate": 9.97436181792997e-06, "loss": 0.4147, "step": 3374 }, { "epoch": 0.15488045523381214, "grad_norm": 0.45381179451942444, "learning_rate": 9.974337014031358e-06, "loss": 0.3414, "step": 3375 }, { "epoch": 0.15492634573906658, "grad_norm": 0.49262282252311707, "learning_rate": 9.974312198171037e-06, "loss": 0.4475, "step": 3376 }, { "epoch": 0.15497223624432105, "grad_norm": 0.5245398283004761, "learning_rate": 9.974287370349066e-06, "loss": 0.5369, "step": 3377 }, { "epoch": 0.15501812674957552, "grad_norm": 0.5146275162696838, "learning_rate": 9.9742625305655e-06, "loss": 0.4949, "step": 3378 }, { "epoch": 0.15506401725483, "grad_norm": 0.47069868445396423, "learning_rate": 9.974237678820405e-06, "loss": 0.3461, "step": 3379 }, { "epoch": 0.15510990776008443, "grad_norm": 0.4811304211616516, "learning_rate": 9.974212815113838e-06, "loss": 0.3857, "step": 3380 }, { "epoch": 0.1551557982653389, "grad_norm": 0.4665345847606659, "learning_rate": 9.97418793944586e-06, "loss": 0.335, "step": 3381 }, { "epoch": 0.15520168877059337, "grad_norm": 0.48886385560035706, "learning_rate": 9.97416305181653e-06, "loss": 0.4225, "step": 3382 }, { "epoch": 0.1552475792758478, "grad_norm": 0.4920494556427002, "learning_rate": 9.974138152225906e-06, "loss": 0.3834, "step": 3383 }, { "epoch": 0.15529346978110228, "grad_norm": 0.5018653869628906, "learning_rate": 9.97411324067405e-06, "loss": 0.4707, "step": 3384 }, { "epoch": 0.15533936028635675, "grad_norm": 0.46046924591064453, "learning_rate": 9.974088317161022e-06, "loss": 0.3816, "step": 3385 }, { "epoch": 0.15538525079161122, "grad_norm": 0.471876323223114, "learning_rate": 9.974063381686882e-06, "loss": 0.393, "step": 3386 }, { "epoch": 0.15543114129686567, "grad_norm": 0.5284046530723572, "learning_rate": 9.974038434251688e-06, "loss": 0.4763, "step": 3387 }, { "epoch": 0.15547703180212014, "grad_norm": 0.45570287108421326, "learning_rate": 9.974013474855502e-06, "loss": 0.4046, "step": 3388 }, { "epoch": 0.1555229223073746, "grad_norm": 0.5144028067588806, "learning_rate": 9.973988503498385e-06, "loss": 0.3695, "step": 3389 }, { "epoch": 0.15556881281262908, "grad_norm": 0.5176663398742676, "learning_rate": 9.973963520180394e-06, "loss": 0.4461, "step": 3390 }, { "epoch": 0.15561470331788352, "grad_norm": 0.4735722839832306, "learning_rate": 9.97393852490159e-06, "loss": 0.38, "step": 3391 }, { "epoch": 0.155660593823138, "grad_norm": 0.5099039673805237, "learning_rate": 9.973913517662034e-06, "loss": 0.486, "step": 3392 }, { "epoch": 0.15570648432839246, "grad_norm": 0.4798545241355896, "learning_rate": 9.973888498461786e-06, "loss": 0.3874, "step": 3393 }, { "epoch": 0.15575237483364693, "grad_norm": 0.4520500600337982, "learning_rate": 9.973863467300908e-06, "loss": 0.3966, "step": 3394 }, { "epoch": 0.15579826533890137, "grad_norm": 0.526831328868866, "learning_rate": 9.973838424179456e-06, "loss": 0.4817, "step": 3395 }, { "epoch": 0.15584415584415584, "grad_norm": 0.45720869302749634, "learning_rate": 9.973813369097493e-06, "loss": 0.4067, "step": 3396 }, { "epoch": 0.1558900463494103, "grad_norm": 0.45186010003089905, "learning_rate": 9.973788302055078e-06, "loss": 0.3877, "step": 3397 }, { "epoch": 0.15593593685466478, "grad_norm": 0.4824221730232239, "learning_rate": 9.973763223052272e-06, "loss": 0.4087, "step": 3398 }, { "epoch": 0.15598182735991922, "grad_norm": 0.47514066100120544, "learning_rate": 9.973738132089135e-06, "loss": 0.4592, "step": 3399 }, { "epoch": 0.1560277178651737, "grad_norm": 0.5052584409713745, "learning_rate": 9.973713029165728e-06, "loss": 0.3943, "step": 3400 }, { "epoch": 0.15607360837042816, "grad_norm": 0.4857885241508484, "learning_rate": 9.97368791428211e-06, "loss": 0.4136, "step": 3401 }, { "epoch": 0.15611949887568263, "grad_norm": 0.544730007648468, "learning_rate": 9.973662787438345e-06, "loss": 0.4757, "step": 3402 }, { "epoch": 0.15616538938093708, "grad_norm": 0.49583789706230164, "learning_rate": 9.973637648634489e-06, "loss": 0.4761, "step": 3403 }, { "epoch": 0.15621127988619155, "grad_norm": 0.4893946349620819, "learning_rate": 9.973612497870605e-06, "loss": 0.4811, "step": 3404 }, { "epoch": 0.15625717039144602, "grad_norm": 0.4920247793197632, "learning_rate": 9.973587335146751e-06, "loss": 0.4511, "step": 3405 }, { "epoch": 0.15630306089670049, "grad_norm": 0.511658787727356, "learning_rate": 9.973562160462989e-06, "loss": 0.4338, "step": 3406 }, { "epoch": 0.15634895140195493, "grad_norm": 0.5208668112754822, "learning_rate": 9.973536973819382e-06, "loss": 0.4519, "step": 3407 }, { "epoch": 0.1563948419072094, "grad_norm": 0.4765201807022095, "learning_rate": 9.973511775215987e-06, "loss": 0.4218, "step": 3408 }, { "epoch": 0.15644073241246387, "grad_norm": 0.45064905285835266, "learning_rate": 9.973486564652866e-06, "loss": 0.3699, "step": 3409 }, { "epoch": 0.15648662291771834, "grad_norm": 0.4541752338409424, "learning_rate": 9.973461342130079e-06, "loss": 0.3394, "step": 3410 }, { "epoch": 0.15653251342297278, "grad_norm": 0.47415247559547424, "learning_rate": 9.973436107647688e-06, "loss": 0.4073, "step": 3411 }, { "epoch": 0.15657840392822725, "grad_norm": 0.4652484953403473, "learning_rate": 9.97341086120575e-06, "loss": 0.4058, "step": 3412 }, { "epoch": 0.15662429443348172, "grad_norm": 0.47529393434524536, "learning_rate": 9.973385602804333e-06, "loss": 0.434, "step": 3413 }, { "epoch": 0.15667018493873616, "grad_norm": 0.4971560835838318, "learning_rate": 9.97336033244349e-06, "loss": 0.4805, "step": 3414 }, { "epoch": 0.15671607544399063, "grad_norm": 0.5403168201446533, "learning_rate": 9.973335050123285e-06, "loss": 0.4962, "step": 3415 }, { "epoch": 0.1567619659492451, "grad_norm": 0.4820096492767334, "learning_rate": 9.97330975584378e-06, "loss": 0.4088, "step": 3416 }, { "epoch": 0.15680785645449957, "grad_norm": 0.51247638463974, "learning_rate": 9.973284449605037e-06, "loss": 0.5114, "step": 3417 }, { "epoch": 0.15685374695975401, "grad_norm": 0.4764186441898346, "learning_rate": 9.973259131407112e-06, "loss": 0.3556, "step": 3418 }, { "epoch": 0.15689963746500848, "grad_norm": 0.44877660274505615, "learning_rate": 9.97323380125007e-06, "loss": 0.364, "step": 3419 }, { "epoch": 0.15694552797026295, "grad_norm": 0.4653012156486511, "learning_rate": 9.973208459133967e-06, "loss": 0.404, "step": 3420 }, { "epoch": 0.15699141847551742, "grad_norm": 0.5108131170272827, "learning_rate": 9.973183105058868e-06, "loss": 0.4285, "step": 3421 }, { "epoch": 0.15703730898077187, "grad_norm": 0.4447953999042511, "learning_rate": 9.973157739024834e-06, "loss": 0.3819, "step": 3422 }, { "epoch": 0.15708319948602634, "grad_norm": 0.4533008635044098, "learning_rate": 9.973132361031926e-06, "loss": 0.3846, "step": 3423 }, { "epoch": 0.1571290899912808, "grad_norm": 0.5040102005004883, "learning_rate": 9.973106971080205e-06, "loss": 0.3853, "step": 3424 }, { "epoch": 0.15717498049653528, "grad_norm": 0.5119069814682007, "learning_rate": 9.973081569169729e-06, "loss": 0.5325, "step": 3425 }, { "epoch": 0.15722087100178972, "grad_norm": 0.451943039894104, "learning_rate": 9.973056155300563e-06, "loss": 0.3256, "step": 3426 }, { "epoch": 0.1572667615070442, "grad_norm": 0.5147818922996521, "learning_rate": 9.973030729472766e-06, "loss": 0.4459, "step": 3427 }, { "epoch": 0.15731265201229866, "grad_norm": 0.4829035699367523, "learning_rate": 9.9730052916864e-06, "loss": 0.508, "step": 3428 }, { "epoch": 0.15735854251755313, "grad_norm": 0.49244794249534607, "learning_rate": 9.972979841941525e-06, "loss": 0.4083, "step": 3429 }, { "epoch": 0.15740443302280757, "grad_norm": 0.4974241852760315, "learning_rate": 9.972954380238203e-06, "loss": 0.4255, "step": 3430 }, { "epoch": 0.15745032352806204, "grad_norm": 0.4844846725463867, "learning_rate": 9.972928906576496e-06, "loss": 0.4384, "step": 3431 }, { "epoch": 0.1574962140333165, "grad_norm": 0.4852088391780853, "learning_rate": 9.972903420956463e-06, "loss": 0.4507, "step": 3432 }, { "epoch": 0.15754210453857098, "grad_norm": 0.47295406460762024, "learning_rate": 9.972877923378168e-06, "loss": 0.412, "step": 3433 }, { "epoch": 0.15758799504382542, "grad_norm": 0.47541558742523193, "learning_rate": 9.972852413841671e-06, "loss": 0.4161, "step": 3434 }, { "epoch": 0.1576338855490799, "grad_norm": 0.47602322697639465, "learning_rate": 9.972826892347035e-06, "loss": 0.4245, "step": 3435 }, { "epoch": 0.15767977605433436, "grad_norm": 0.48176711797714233, "learning_rate": 9.972801358894317e-06, "loss": 0.343, "step": 3436 }, { "epoch": 0.15772566655958883, "grad_norm": 0.5384151935577393, "learning_rate": 9.972775813483582e-06, "loss": 0.5104, "step": 3437 }, { "epoch": 0.15777155706484328, "grad_norm": 0.5327281951904297, "learning_rate": 9.972750256114891e-06, "loss": 0.5175, "step": 3438 }, { "epoch": 0.15781744757009775, "grad_norm": 0.5229492783546448, "learning_rate": 9.972724686788304e-06, "loss": 0.4321, "step": 3439 }, { "epoch": 0.15786333807535222, "grad_norm": 0.5037031173706055, "learning_rate": 9.972699105503885e-06, "loss": 0.446, "step": 3440 }, { "epoch": 0.15790922858060666, "grad_norm": 0.5082417130470276, "learning_rate": 9.972673512261694e-06, "loss": 0.4699, "step": 3441 }, { "epoch": 0.15795511908586113, "grad_norm": 0.4872379004955292, "learning_rate": 9.972647907061791e-06, "loss": 0.4487, "step": 3442 }, { "epoch": 0.1580010095911156, "grad_norm": 0.44630125164985657, "learning_rate": 9.972622289904241e-06, "loss": 0.3654, "step": 3443 }, { "epoch": 0.15804690009637007, "grad_norm": 0.5068696737289429, "learning_rate": 9.972596660789104e-06, "loss": 0.5295, "step": 3444 }, { "epoch": 0.1580927906016245, "grad_norm": 0.4432818591594696, "learning_rate": 9.97257101971644e-06, "loss": 0.3641, "step": 3445 }, { "epoch": 0.15813868110687898, "grad_norm": 0.4711827039718628, "learning_rate": 9.972545366686314e-06, "loss": 0.4242, "step": 3446 }, { "epoch": 0.15818457161213345, "grad_norm": 0.5113137364387512, "learning_rate": 9.972519701698784e-06, "loss": 0.4778, "step": 3447 }, { "epoch": 0.15823046211738792, "grad_norm": 0.42560282349586487, "learning_rate": 9.972494024753915e-06, "loss": 0.311, "step": 3448 }, { "epoch": 0.15827635262264236, "grad_norm": 0.46892601251602173, "learning_rate": 9.972468335851766e-06, "loss": 0.4211, "step": 3449 }, { "epoch": 0.15832224312789683, "grad_norm": 0.5339772701263428, "learning_rate": 9.972442634992399e-06, "loss": 0.5147, "step": 3450 }, { "epoch": 0.1583681336331513, "grad_norm": 0.4870809018611908, "learning_rate": 9.972416922175878e-06, "loss": 0.4442, "step": 3451 }, { "epoch": 0.15841402413840577, "grad_norm": 0.4548492133617401, "learning_rate": 9.972391197402265e-06, "loss": 0.3703, "step": 3452 }, { "epoch": 0.15845991464366022, "grad_norm": 0.4818779230117798, "learning_rate": 9.972365460671618e-06, "loss": 0.4731, "step": 3453 }, { "epoch": 0.15850580514891469, "grad_norm": 0.5016392469406128, "learning_rate": 9.972339711984002e-06, "loss": 0.4718, "step": 3454 }, { "epoch": 0.15855169565416916, "grad_norm": 0.4378732740879059, "learning_rate": 9.972313951339481e-06, "loss": 0.3528, "step": 3455 }, { "epoch": 0.15859758615942363, "grad_norm": 0.45574894547462463, "learning_rate": 9.972288178738112e-06, "loss": 0.3417, "step": 3456 }, { "epoch": 0.15864347666467807, "grad_norm": 0.4504535496234894, "learning_rate": 9.972262394179959e-06, "loss": 0.407, "step": 3457 }, { "epoch": 0.15868936716993254, "grad_norm": 0.5264883041381836, "learning_rate": 9.972236597665085e-06, "loss": 0.3318, "step": 3458 }, { "epoch": 0.158735257675187, "grad_norm": 0.45955637097358704, "learning_rate": 9.97221078919355e-06, "loss": 0.3648, "step": 3459 }, { "epoch": 0.15878114818044148, "grad_norm": 0.44068920612335205, "learning_rate": 9.97218496876542e-06, "loss": 0.3676, "step": 3460 }, { "epoch": 0.15882703868569592, "grad_norm": 0.4735561013221741, "learning_rate": 9.972159136380751e-06, "loss": 0.378, "step": 3461 }, { "epoch": 0.1588729291909504, "grad_norm": 0.5666905045509338, "learning_rate": 9.97213329203961e-06, "loss": 0.4883, "step": 3462 }, { "epoch": 0.15891881969620486, "grad_norm": 0.4964545667171478, "learning_rate": 9.972107435742057e-06, "loss": 0.3688, "step": 3463 }, { "epoch": 0.15896471020145933, "grad_norm": 0.49066397547721863, "learning_rate": 9.972081567488156e-06, "loss": 0.4053, "step": 3464 }, { "epoch": 0.15901060070671377, "grad_norm": 0.4651356637477875, "learning_rate": 9.972055687277969e-06, "loss": 0.3821, "step": 3465 }, { "epoch": 0.15905649121196824, "grad_norm": 0.45859792828559875, "learning_rate": 9.972029795111557e-06, "loss": 0.3823, "step": 3466 }, { "epoch": 0.1591023817172227, "grad_norm": 0.5831094980239868, "learning_rate": 9.972003890988982e-06, "loss": 0.481, "step": 3467 }, { "epoch": 0.15914827222247718, "grad_norm": 0.46102064847946167, "learning_rate": 9.971977974910305e-06, "loss": 0.3656, "step": 3468 }, { "epoch": 0.15919416272773163, "grad_norm": 0.4691809117794037, "learning_rate": 9.971952046875592e-06, "loss": 0.4027, "step": 3469 }, { "epoch": 0.1592400532329861, "grad_norm": 0.4473104774951935, "learning_rate": 9.971926106884904e-06, "loss": 0.3324, "step": 3470 }, { "epoch": 0.15928594373824057, "grad_norm": 0.45411378145217896, "learning_rate": 9.971900154938301e-06, "loss": 0.3414, "step": 3471 }, { "epoch": 0.159331834243495, "grad_norm": 0.6522583961486816, "learning_rate": 9.97187419103585e-06, "loss": 0.6009, "step": 3472 }, { "epoch": 0.15937772474874948, "grad_norm": 0.48917824029922485, "learning_rate": 9.971848215177608e-06, "loss": 0.4148, "step": 3473 }, { "epoch": 0.15942361525400395, "grad_norm": 0.46714523434638977, "learning_rate": 9.971822227363642e-06, "loss": 0.3926, "step": 3474 }, { "epoch": 0.15946950575925842, "grad_norm": 0.4685218632221222, "learning_rate": 9.971796227594011e-06, "loss": 0.4305, "step": 3475 }, { "epoch": 0.15951539626451286, "grad_norm": 0.5073850154876709, "learning_rate": 9.97177021586878e-06, "loss": 0.4215, "step": 3476 }, { "epoch": 0.15956128676976733, "grad_norm": 0.4946824014186859, "learning_rate": 9.971744192188012e-06, "loss": 0.3863, "step": 3477 }, { "epoch": 0.1596071772750218, "grad_norm": 0.5422838926315308, "learning_rate": 9.971718156551767e-06, "loss": 0.4859, "step": 3478 }, { "epoch": 0.15965306778027627, "grad_norm": 0.47773876786231995, "learning_rate": 9.971692108960109e-06, "loss": 0.3946, "step": 3479 }, { "epoch": 0.1596989582855307, "grad_norm": 0.5057024955749512, "learning_rate": 9.971666049413102e-06, "loss": 0.4902, "step": 3480 }, { "epoch": 0.15974484879078518, "grad_norm": 0.4997882843017578, "learning_rate": 9.971639977910806e-06, "loss": 0.4698, "step": 3481 }, { "epoch": 0.15979073929603965, "grad_norm": 0.4545903205871582, "learning_rate": 9.971613894453284e-06, "loss": 0.3522, "step": 3482 }, { "epoch": 0.15983662980129412, "grad_norm": 0.4788646996021271, "learning_rate": 9.971587799040602e-06, "loss": 0.3968, "step": 3483 }, { "epoch": 0.15988252030654856, "grad_norm": 0.4190256595611572, "learning_rate": 9.971561691672818e-06, "loss": 0.3483, "step": 3484 }, { "epoch": 0.15992841081180303, "grad_norm": 0.6223978400230408, "learning_rate": 9.97153557235e-06, "loss": 0.4374, "step": 3485 }, { "epoch": 0.1599743013170575, "grad_norm": 0.4866560697555542, "learning_rate": 9.971509441072206e-06, "loss": 0.4325, "step": 3486 }, { "epoch": 0.16002019182231197, "grad_norm": 0.4690292179584503, "learning_rate": 9.971483297839501e-06, "loss": 0.4232, "step": 3487 }, { "epoch": 0.16006608232756642, "grad_norm": 0.5000173449516296, "learning_rate": 9.971457142651948e-06, "loss": 0.5088, "step": 3488 }, { "epoch": 0.1601119728328209, "grad_norm": 0.45145830512046814, "learning_rate": 9.97143097550961e-06, "loss": 0.3997, "step": 3489 }, { "epoch": 0.16015786333807536, "grad_norm": 0.48280757665634155, "learning_rate": 9.971404796412549e-06, "loss": 0.4424, "step": 3490 }, { "epoch": 0.16020375384332983, "grad_norm": 0.463607519865036, "learning_rate": 9.97137860536083e-06, "loss": 0.3572, "step": 3491 }, { "epoch": 0.16024964434858427, "grad_norm": 0.4859597980976105, "learning_rate": 9.97135240235451e-06, "loss": 0.4294, "step": 3492 }, { "epoch": 0.16029553485383874, "grad_norm": 0.4947422444820404, "learning_rate": 9.971326187393661e-06, "loss": 0.4047, "step": 3493 }, { "epoch": 0.1603414253590932, "grad_norm": 0.508838415145874, "learning_rate": 9.97129996047834e-06, "loss": 0.4525, "step": 3494 }, { "epoch": 0.16038731586434768, "grad_norm": 0.4381849467754364, "learning_rate": 9.971273721608611e-06, "loss": 0.3566, "step": 3495 }, { "epoch": 0.16043320636960212, "grad_norm": 0.4655773639678955, "learning_rate": 9.971247470784538e-06, "loss": 0.3967, "step": 3496 }, { "epoch": 0.1604790968748566, "grad_norm": 0.4902365803718567, "learning_rate": 9.971221208006185e-06, "loss": 0.4305, "step": 3497 }, { "epoch": 0.16052498738011106, "grad_norm": 0.4625941812992096, "learning_rate": 9.971194933273614e-06, "loss": 0.348, "step": 3498 }, { "epoch": 0.16057087788536553, "grad_norm": 0.4561326503753662, "learning_rate": 9.971168646586886e-06, "loss": 0.3368, "step": 3499 }, { "epoch": 0.16061676839061997, "grad_norm": 0.45755860209465027, "learning_rate": 9.971142347946068e-06, "loss": 0.3352, "step": 3500 }, { "epoch": 0.16066265889587444, "grad_norm": 0.5377780199050903, "learning_rate": 9.971116037351221e-06, "loss": 0.525, "step": 3501 }, { "epoch": 0.16070854940112891, "grad_norm": 0.5142645835876465, "learning_rate": 9.971089714802409e-06, "loss": 0.4804, "step": 3502 }, { "epoch": 0.16075443990638336, "grad_norm": 0.4570762813091278, "learning_rate": 9.971063380299695e-06, "loss": 0.3607, "step": 3503 }, { "epoch": 0.16080033041163783, "grad_norm": 0.5035489201545715, "learning_rate": 9.971037033843143e-06, "loss": 0.4369, "step": 3504 }, { "epoch": 0.1608462209168923, "grad_norm": 0.4712914824485779, "learning_rate": 9.971010675432816e-06, "loss": 0.3548, "step": 3505 }, { "epoch": 0.16089211142214677, "grad_norm": 0.48235970735549927, "learning_rate": 9.970984305068777e-06, "loss": 0.4269, "step": 3506 }, { "epoch": 0.1609380019274012, "grad_norm": 0.48529648780822754, "learning_rate": 9.970957922751088e-06, "loss": 0.4613, "step": 3507 }, { "epoch": 0.16098389243265568, "grad_norm": 0.45862317085266113, "learning_rate": 9.970931528479815e-06, "loss": 0.354, "step": 3508 }, { "epoch": 0.16102978293791015, "grad_norm": 0.4915834665298462, "learning_rate": 9.970905122255022e-06, "loss": 0.4101, "step": 3509 }, { "epoch": 0.16107567344316462, "grad_norm": 0.4911457300186157, "learning_rate": 9.97087870407677e-06, "loss": 0.457, "step": 3510 }, { "epoch": 0.16112156394841906, "grad_norm": 0.4485325515270233, "learning_rate": 9.970852273945124e-06, "loss": 0.3288, "step": 3511 }, { "epoch": 0.16116745445367353, "grad_norm": 0.4835895001888275, "learning_rate": 9.970825831860147e-06, "loss": 0.4256, "step": 3512 }, { "epoch": 0.161213344958928, "grad_norm": 0.4708102345466614, "learning_rate": 9.970799377821902e-06, "loss": 0.4208, "step": 3513 }, { "epoch": 0.16125923546418247, "grad_norm": 0.45050904154777527, "learning_rate": 9.970772911830452e-06, "loss": 0.3896, "step": 3514 }, { "epoch": 0.1613051259694369, "grad_norm": 0.4893935024738312, "learning_rate": 9.970746433885865e-06, "loss": 0.4551, "step": 3515 }, { "epoch": 0.16135101647469138, "grad_norm": 0.4802362024784088, "learning_rate": 9.9707199439882e-06, "loss": 0.4072, "step": 3516 }, { "epoch": 0.16139690697994585, "grad_norm": 0.4859634041786194, "learning_rate": 9.970693442137524e-06, "loss": 0.472, "step": 3517 }, { "epoch": 0.16144279748520032, "grad_norm": 0.4686821401119232, "learning_rate": 9.970666928333898e-06, "loss": 0.4034, "step": 3518 }, { "epoch": 0.16148868799045477, "grad_norm": 0.46538659930229187, "learning_rate": 9.970640402577386e-06, "loss": 0.4106, "step": 3519 }, { "epoch": 0.16153457849570924, "grad_norm": 0.5193923115730286, "learning_rate": 9.970613864868052e-06, "loss": 0.4924, "step": 3520 }, { "epoch": 0.1615804690009637, "grad_norm": 0.4929127097129822, "learning_rate": 9.970587315205963e-06, "loss": 0.4591, "step": 3521 }, { "epoch": 0.16162635950621818, "grad_norm": 0.46408167481422424, "learning_rate": 9.970560753591179e-06, "loss": 0.4327, "step": 3522 }, { "epoch": 0.16167225001147262, "grad_norm": 0.4852508008480072, "learning_rate": 9.970534180023765e-06, "loss": 0.3999, "step": 3523 }, { "epoch": 0.1617181405167271, "grad_norm": 0.5348367094993591, "learning_rate": 9.970507594503784e-06, "loss": 0.4923, "step": 3524 }, { "epoch": 0.16176403102198156, "grad_norm": 0.6300472021102905, "learning_rate": 9.970480997031302e-06, "loss": 0.4225, "step": 3525 }, { "epoch": 0.16180992152723603, "grad_norm": 0.4214268624782562, "learning_rate": 9.970454387606383e-06, "loss": 0.3489, "step": 3526 }, { "epoch": 0.16185581203249047, "grad_norm": 0.5366626977920532, "learning_rate": 9.97042776622909e-06, "loss": 0.4199, "step": 3527 }, { "epoch": 0.16190170253774494, "grad_norm": 0.5000843405723572, "learning_rate": 9.970401132899486e-06, "loss": 0.417, "step": 3528 }, { "epoch": 0.1619475930429994, "grad_norm": 0.49688413739204407, "learning_rate": 9.970374487617634e-06, "loss": 0.4798, "step": 3529 }, { "epoch": 0.16199348354825385, "grad_norm": 0.44794321060180664, "learning_rate": 9.970347830383602e-06, "loss": 0.344, "step": 3530 }, { "epoch": 0.16203937405350832, "grad_norm": 0.4808467626571655, "learning_rate": 9.970321161197452e-06, "loss": 0.4158, "step": 3531 }, { "epoch": 0.1620852645587628, "grad_norm": 0.46551400423049927, "learning_rate": 9.970294480059249e-06, "loss": 0.411, "step": 3532 }, { "epoch": 0.16213115506401726, "grad_norm": 0.4455750286579132, "learning_rate": 9.970267786969056e-06, "loss": 0.3465, "step": 3533 }, { "epoch": 0.1621770455692717, "grad_norm": 0.4404858350753784, "learning_rate": 9.970241081926936e-06, "loss": 0.3182, "step": 3534 }, { "epoch": 0.16222293607452618, "grad_norm": 0.49429038166999817, "learning_rate": 9.970214364932958e-06, "loss": 0.405, "step": 3535 }, { "epoch": 0.16226882657978065, "grad_norm": 0.4389179050922394, "learning_rate": 9.970187635987181e-06, "loss": 0.3637, "step": 3536 }, { "epoch": 0.16231471708503512, "grad_norm": 0.49280327558517456, "learning_rate": 9.97016089508967e-06, "loss": 0.4417, "step": 3537 }, { "epoch": 0.16236060759028956, "grad_norm": 0.4841049909591675, "learning_rate": 9.970134142240492e-06, "loss": 0.4105, "step": 3538 }, { "epoch": 0.16240649809554403, "grad_norm": 0.4690589904785156, "learning_rate": 9.970107377439713e-06, "loss": 0.4488, "step": 3539 }, { "epoch": 0.1624523886007985, "grad_norm": 0.48826152086257935, "learning_rate": 9.97008060068739e-06, "loss": 0.4522, "step": 3540 }, { "epoch": 0.16249827910605297, "grad_norm": 0.4803142249584198, "learning_rate": 9.970053811983594e-06, "loss": 0.409, "step": 3541 }, { "epoch": 0.1625441696113074, "grad_norm": 0.5035238862037659, "learning_rate": 9.970027011328386e-06, "loss": 0.4819, "step": 3542 }, { "epoch": 0.16259006011656188, "grad_norm": 0.44776809215545654, "learning_rate": 9.970000198721835e-06, "loss": 0.3522, "step": 3543 }, { "epoch": 0.16263595062181635, "grad_norm": 0.5222958326339722, "learning_rate": 9.969973374163998e-06, "loss": 0.4357, "step": 3544 }, { "epoch": 0.16268184112707082, "grad_norm": 0.5011484026908875, "learning_rate": 9.969946537654944e-06, "loss": 0.4468, "step": 3545 }, { "epoch": 0.16272773163232526, "grad_norm": 0.5149145722389221, "learning_rate": 9.969919689194739e-06, "loss": 0.4858, "step": 3546 }, { "epoch": 0.16277362213757973, "grad_norm": 0.4738142192363739, "learning_rate": 9.969892828783445e-06, "loss": 0.4285, "step": 3547 }, { "epoch": 0.1628195126428342, "grad_norm": 0.49899822473526, "learning_rate": 9.969865956421127e-06, "loss": 0.4527, "step": 3548 }, { "epoch": 0.16286540314808867, "grad_norm": 0.4843798279762268, "learning_rate": 9.96983907210785e-06, "loss": 0.4493, "step": 3549 }, { "epoch": 0.16291129365334311, "grad_norm": 0.5192020535469055, "learning_rate": 9.96981217584368e-06, "loss": 0.4716, "step": 3550 }, { "epoch": 0.16295718415859758, "grad_norm": 0.46311327815055847, "learning_rate": 9.969785267628677e-06, "loss": 0.391, "step": 3551 }, { "epoch": 0.16300307466385205, "grad_norm": 0.5039772987365723, "learning_rate": 9.969758347462912e-06, "loss": 0.472, "step": 3552 }, { "epoch": 0.16304896516910652, "grad_norm": 0.4776366651058197, "learning_rate": 9.969731415346445e-06, "loss": 0.3838, "step": 3553 }, { "epoch": 0.16309485567436097, "grad_norm": 0.4233502149581909, "learning_rate": 9.969704471279342e-06, "loss": 0.3524, "step": 3554 }, { "epoch": 0.16314074617961544, "grad_norm": 0.48174601793289185, "learning_rate": 9.96967751526167e-06, "loss": 0.3853, "step": 3555 }, { "epoch": 0.1631866366848699, "grad_norm": 0.48675301671028137, "learning_rate": 9.969650547293492e-06, "loss": 0.4591, "step": 3556 }, { "epoch": 0.16323252719012438, "grad_norm": 0.4992932975292206, "learning_rate": 9.96962356737487e-06, "loss": 0.412, "step": 3557 }, { "epoch": 0.16327841769537882, "grad_norm": 0.48553457856178284, "learning_rate": 9.969596575505876e-06, "loss": 0.4594, "step": 3558 }, { "epoch": 0.1633243082006333, "grad_norm": 0.519641637802124, "learning_rate": 9.969569571686569e-06, "loss": 0.5074, "step": 3559 }, { "epoch": 0.16337019870588776, "grad_norm": 0.4730679988861084, "learning_rate": 9.969542555917013e-06, "loss": 0.4667, "step": 3560 }, { "epoch": 0.1634160892111422, "grad_norm": 0.49281245470046997, "learning_rate": 9.969515528197279e-06, "loss": 0.4372, "step": 3561 }, { "epoch": 0.16346197971639667, "grad_norm": 0.46238622069358826, "learning_rate": 9.969488488527429e-06, "loss": 0.395, "step": 3562 }, { "epoch": 0.16350787022165114, "grad_norm": 0.4411563277244568, "learning_rate": 9.969461436907524e-06, "loss": 0.3493, "step": 3563 }, { "epoch": 0.1635537607269056, "grad_norm": 0.4830007553100586, "learning_rate": 9.969434373337635e-06, "loss": 0.3951, "step": 3564 }, { "epoch": 0.16359965123216005, "grad_norm": 0.4694591462612152, "learning_rate": 9.969407297817823e-06, "loss": 0.3695, "step": 3565 }, { "epoch": 0.16364554173741452, "grad_norm": 0.45552733540534973, "learning_rate": 9.969380210348157e-06, "loss": 0.3376, "step": 3566 }, { "epoch": 0.163691432242669, "grad_norm": 0.46346917748451233, "learning_rate": 9.9693531109287e-06, "loss": 0.3801, "step": 3567 }, { "epoch": 0.16373732274792346, "grad_norm": 0.443578839302063, "learning_rate": 9.969325999559516e-06, "loss": 0.3775, "step": 3568 }, { "epoch": 0.1637832132531779, "grad_norm": 0.43470194935798645, "learning_rate": 9.969298876240672e-06, "loss": 0.3593, "step": 3569 }, { "epoch": 0.16382910375843238, "grad_norm": 0.4560805857181549, "learning_rate": 9.969271740972232e-06, "loss": 0.3197, "step": 3570 }, { "epoch": 0.16387499426368685, "grad_norm": 0.47402840852737427, "learning_rate": 9.969244593754261e-06, "loss": 0.4427, "step": 3571 }, { "epoch": 0.16392088476894132, "grad_norm": 0.4482603073120117, "learning_rate": 9.969217434586826e-06, "loss": 0.3807, "step": 3572 }, { "epoch": 0.16396677527419576, "grad_norm": 0.4365561306476593, "learning_rate": 9.969190263469992e-06, "loss": 0.3573, "step": 3573 }, { "epoch": 0.16401266577945023, "grad_norm": 0.45426681637763977, "learning_rate": 9.969163080403823e-06, "loss": 0.3594, "step": 3574 }, { "epoch": 0.1640585562847047, "grad_norm": 0.44736793637275696, "learning_rate": 9.969135885388386e-06, "loss": 0.3356, "step": 3575 }, { "epoch": 0.16410444678995917, "grad_norm": 0.4486168622970581, "learning_rate": 9.969108678423744e-06, "loss": 0.3529, "step": 3576 }, { "epoch": 0.1641503372952136, "grad_norm": 0.5961507558822632, "learning_rate": 9.969081459509965e-06, "loss": 0.4484, "step": 3577 }, { "epoch": 0.16419622780046808, "grad_norm": 0.47634968161582947, "learning_rate": 9.969054228647113e-06, "loss": 0.427, "step": 3578 }, { "epoch": 0.16424211830572255, "grad_norm": 0.4267920255661011, "learning_rate": 9.969026985835256e-06, "loss": 0.3241, "step": 3579 }, { "epoch": 0.16428800881097702, "grad_norm": 0.4745945930480957, "learning_rate": 9.968999731074453e-06, "loss": 0.4324, "step": 3580 }, { "epoch": 0.16433389931623146, "grad_norm": 0.5914706587791443, "learning_rate": 9.968972464364777e-06, "loss": 0.5544, "step": 3581 }, { "epoch": 0.16437978982148593, "grad_norm": 0.5118852853775024, "learning_rate": 9.96894518570629e-06, "loss": 0.4806, "step": 3582 }, { "epoch": 0.1644256803267404, "grad_norm": 0.47302529215812683, "learning_rate": 9.968917895099057e-06, "loss": 0.326, "step": 3583 }, { "epoch": 0.16447157083199487, "grad_norm": 0.4752989411354065, "learning_rate": 9.968890592543145e-06, "loss": 0.4096, "step": 3584 }, { "epoch": 0.16451746133724932, "grad_norm": 0.49046263098716736, "learning_rate": 9.96886327803862e-06, "loss": 0.4083, "step": 3585 }, { "epoch": 0.16456335184250379, "grad_norm": 0.5011879205703735, "learning_rate": 9.968835951585549e-06, "loss": 0.4752, "step": 3586 }, { "epoch": 0.16460924234775826, "grad_norm": 0.46065208315849304, "learning_rate": 9.968808613183993e-06, "loss": 0.3789, "step": 3587 }, { "epoch": 0.1646551328530127, "grad_norm": 0.46373555064201355, "learning_rate": 9.96878126283402e-06, "loss": 0.391, "step": 3588 }, { "epoch": 0.16470102335826717, "grad_norm": 0.49008578062057495, "learning_rate": 9.968753900535699e-06, "loss": 0.4234, "step": 3589 }, { "epoch": 0.16474691386352164, "grad_norm": 0.49280592799186707, "learning_rate": 9.968726526289091e-06, "loss": 0.4676, "step": 3590 }, { "epoch": 0.1647928043687761, "grad_norm": 0.4429382085800171, "learning_rate": 9.968699140094264e-06, "loss": 0.3359, "step": 3591 }, { "epoch": 0.16483869487403055, "grad_norm": 0.4706316590309143, "learning_rate": 9.968671741951284e-06, "loss": 0.4487, "step": 3592 }, { "epoch": 0.16488458537928502, "grad_norm": 0.4552987515926361, "learning_rate": 9.968644331860216e-06, "loss": 0.3573, "step": 3593 }, { "epoch": 0.1649304758845395, "grad_norm": 0.5117678642272949, "learning_rate": 9.968616909821128e-06, "loss": 0.4636, "step": 3594 }, { "epoch": 0.16497636638979396, "grad_norm": 0.48572269082069397, "learning_rate": 9.968589475834083e-06, "loss": 0.4353, "step": 3595 }, { "epoch": 0.1650222568950484, "grad_norm": 0.41882574558258057, "learning_rate": 9.968562029899149e-06, "loss": 0.3105, "step": 3596 }, { "epoch": 0.16506814740030287, "grad_norm": 0.49018704891204834, "learning_rate": 9.968534572016393e-06, "loss": 0.4289, "step": 3597 }, { "epoch": 0.16511403790555734, "grad_norm": 0.4399867355823517, "learning_rate": 9.968507102185879e-06, "loss": 0.3976, "step": 3598 }, { "epoch": 0.1651599284108118, "grad_norm": 0.4994634985923767, "learning_rate": 9.968479620407672e-06, "loss": 0.4735, "step": 3599 }, { "epoch": 0.16520581891606625, "grad_norm": 0.4570196866989136, "learning_rate": 9.96845212668184e-06, "loss": 0.341, "step": 3600 }, { "epoch": 0.16525170942132072, "grad_norm": 0.49720463156700134, "learning_rate": 9.968424621008448e-06, "loss": 0.4065, "step": 3601 }, { "epoch": 0.1652975999265752, "grad_norm": 0.4903962314128876, "learning_rate": 9.968397103387564e-06, "loss": 0.4717, "step": 3602 }, { "epoch": 0.16534349043182966, "grad_norm": 0.4884265959262848, "learning_rate": 9.968369573819253e-06, "loss": 0.4325, "step": 3603 }, { "epoch": 0.1653893809370841, "grad_norm": 0.4561275541782379, "learning_rate": 9.96834203230358e-06, "loss": 0.4037, "step": 3604 }, { "epoch": 0.16543527144233858, "grad_norm": 0.4439562261104584, "learning_rate": 9.968314478840614e-06, "loss": 0.3302, "step": 3605 }, { "epoch": 0.16548116194759305, "grad_norm": 0.4430003762245178, "learning_rate": 9.968286913430419e-06, "loss": 0.3572, "step": 3606 }, { "epoch": 0.16552705245284752, "grad_norm": 0.54210364818573, "learning_rate": 9.968259336073063e-06, "loss": 0.4807, "step": 3607 }, { "epoch": 0.16557294295810196, "grad_norm": 0.42343929409980774, "learning_rate": 9.96823174676861e-06, "loss": 0.3588, "step": 3608 }, { "epoch": 0.16561883346335643, "grad_norm": 0.4443022906780243, "learning_rate": 9.968204145517128e-06, "loss": 0.3409, "step": 3609 }, { "epoch": 0.1656647239686109, "grad_norm": 0.47555527091026306, "learning_rate": 9.968176532318682e-06, "loss": 0.4034, "step": 3610 }, { "epoch": 0.16571061447386537, "grad_norm": 0.47132250666618347, "learning_rate": 9.968148907173344e-06, "loss": 0.3995, "step": 3611 }, { "epoch": 0.1657565049791198, "grad_norm": 0.551042377948761, "learning_rate": 9.968121270081171e-06, "loss": 0.4833, "step": 3612 }, { "epoch": 0.16580239548437428, "grad_norm": 0.4281960427761078, "learning_rate": 9.968093621042236e-06, "loss": 0.3566, "step": 3613 }, { "epoch": 0.16584828598962875, "grad_norm": 0.48665499687194824, "learning_rate": 9.968065960056603e-06, "loss": 0.401, "step": 3614 }, { "epoch": 0.16589417649488322, "grad_norm": 0.5198683142662048, "learning_rate": 9.96803828712434e-06, "loss": 0.4607, "step": 3615 }, { "epoch": 0.16594006700013766, "grad_norm": 0.48868244886398315, "learning_rate": 9.968010602245514e-06, "loss": 0.4556, "step": 3616 }, { "epoch": 0.16598595750539213, "grad_norm": 0.4600757658481598, "learning_rate": 9.967982905420188e-06, "loss": 0.3886, "step": 3617 }, { "epoch": 0.1660318480106466, "grad_norm": 0.4531581997871399, "learning_rate": 9.967955196648433e-06, "loss": 0.3932, "step": 3618 }, { "epoch": 0.16607773851590105, "grad_norm": 0.511566698551178, "learning_rate": 9.967927475930314e-06, "loss": 0.4625, "step": 3619 }, { "epoch": 0.16612362902115552, "grad_norm": 0.5202604532241821, "learning_rate": 9.967899743265896e-06, "loss": 0.5543, "step": 3620 }, { "epoch": 0.16616951952641, "grad_norm": 0.46145981550216675, "learning_rate": 9.967871998655247e-06, "loss": 0.3985, "step": 3621 }, { "epoch": 0.16621541003166446, "grad_norm": 0.4441681206226349, "learning_rate": 9.967844242098435e-06, "loss": 0.4123, "step": 3622 }, { "epoch": 0.1662613005369189, "grad_norm": 0.5212213397026062, "learning_rate": 9.967816473595525e-06, "loss": 0.4368, "step": 3623 }, { "epoch": 0.16630719104217337, "grad_norm": 0.4769091308116913, "learning_rate": 9.967788693146583e-06, "loss": 0.4398, "step": 3624 }, { "epoch": 0.16635308154742784, "grad_norm": 0.5438241362571716, "learning_rate": 9.967760900751679e-06, "loss": 0.5716, "step": 3625 }, { "epoch": 0.1663989720526823, "grad_norm": 0.5117270350456238, "learning_rate": 9.967733096410877e-06, "loss": 0.5079, "step": 3626 }, { "epoch": 0.16644486255793675, "grad_norm": 0.4526447653770447, "learning_rate": 9.967705280124244e-06, "loss": 0.3317, "step": 3627 }, { "epoch": 0.16649075306319122, "grad_norm": 0.48207688331604004, "learning_rate": 9.967677451891849e-06, "loss": 0.4355, "step": 3628 }, { "epoch": 0.1665366435684457, "grad_norm": 0.5575090646743774, "learning_rate": 9.967649611713757e-06, "loss": 0.4618, "step": 3629 }, { "epoch": 0.16658253407370016, "grad_norm": 0.48127418756484985, "learning_rate": 9.967621759590036e-06, "loss": 0.4136, "step": 3630 }, { "epoch": 0.1666284245789546, "grad_norm": 0.44972941279411316, "learning_rate": 9.96759389552075e-06, "loss": 0.351, "step": 3631 }, { "epoch": 0.16667431508420907, "grad_norm": 0.4519535005092621, "learning_rate": 9.967566019505972e-06, "loss": 0.3936, "step": 3632 }, { "epoch": 0.16672020558946354, "grad_norm": 0.48320138454437256, "learning_rate": 9.967538131545765e-06, "loss": 0.422, "step": 3633 }, { "epoch": 0.166766096094718, "grad_norm": 0.4901284873485565, "learning_rate": 9.967510231640194e-06, "loss": 0.4414, "step": 3634 }, { "epoch": 0.16681198659997246, "grad_norm": 0.6204987168312073, "learning_rate": 9.96748231978933e-06, "loss": 0.4445, "step": 3635 }, { "epoch": 0.16685787710522693, "grad_norm": 0.42596858739852905, "learning_rate": 9.967454395993239e-06, "loss": 0.3173, "step": 3636 }, { "epoch": 0.1669037676104814, "grad_norm": 0.4553443193435669, "learning_rate": 9.96742646025199e-06, "loss": 0.3544, "step": 3637 }, { "epoch": 0.16694965811573587, "grad_norm": 0.4198419749736786, "learning_rate": 9.967398512565645e-06, "loss": 0.345, "step": 3638 }, { "epoch": 0.1669955486209903, "grad_norm": 0.4592035114765167, "learning_rate": 9.967370552934275e-06, "loss": 0.3388, "step": 3639 }, { "epoch": 0.16704143912624478, "grad_norm": 0.4720841646194458, "learning_rate": 9.967342581357948e-06, "loss": 0.3526, "step": 3640 }, { "epoch": 0.16708732963149925, "grad_norm": 0.449739009141922, "learning_rate": 9.967314597836729e-06, "loss": 0.3367, "step": 3641 }, { "epoch": 0.16713322013675372, "grad_norm": 0.45591211318969727, "learning_rate": 9.967286602370684e-06, "loss": 0.346, "step": 3642 }, { "epoch": 0.16717911064200816, "grad_norm": 0.477683961391449, "learning_rate": 9.967258594959885e-06, "loss": 0.3681, "step": 3643 }, { "epoch": 0.16722500114726263, "grad_norm": 0.4451040029525757, "learning_rate": 9.967230575604397e-06, "loss": 0.3441, "step": 3644 }, { "epoch": 0.1672708916525171, "grad_norm": 0.5009549856185913, "learning_rate": 9.967202544304286e-06, "loss": 0.4363, "step": 3645 }, { "epoch": 0.16731678215777157, "grad_norm": 0.49261751770973206, "learning_rate": 9.967174501059622e-06, "loss": 0.4321, "step": 3646 }, { "epoch": 0.167362672663026, "grad_norm": 0.48177361488342285, "learning_rate": 9.96714644587047e-06, "loss": 0.423, "step": 3647 }, { "epoch": 0.16740856316828048, "grad_norm": 0.48607900738716125, "learning_rate": 9.967118378736899e-06, "loss": 0.459, "step": 3648 }, { "epoch": 0.16745445367353495, "grad_norm": 0.49827563762664795, "learning_rate": 9.967090299658975e-06, "loss": 0.4954, "step": 3649 }, { "epoch": 0.1675003441787894, "grad_norm": 0.5257965922355652, "learning_rate": 9.967062208636767e-06, "loss": 0.4866, "step": 3650 }, { "epoch": 0.16754623468404387, "grad_norm": 0.4925329089164734, "learning_rate": 9.967034105670341e-06, "loss": 0.3914, "step": 3651 }, { "epoch": 0.16759212518929834, "grad_norm": 0.4688475430011749, "learning_rate": 9.967005990759768e-06, "loss": 0.3363, "step": 3652 }, { "epoch": 0.1676380156945528, "grad_norm": 0.4857305884361267, "learning_rate": 9.966977863905112e-06, "loss": 0.4413, "step": 3653 }, { "epoch": 0.16768390619980725, "grad_norm": 0.4764745533466339, "learning_rate": 9.96694972510644e-06, "loss": 0.4698, "step": 3654 }, { "epoch": 0.16772979670506172, "grad_norm": 0.5319045782089233, "learning_rate": 9.966921574363824e-06, "loss": 0.4475, "step": 3655 }, { "epoch": 0.1677756872103162, "grad_norm": 0.48479700088500977, "learning_rate": 9.966893411677328e-06, "loss": 0.4159, "step": 3656 }, { "epoch": 0.16782157771557066, "grad_norm": 0.46323296427726746, "learning_rate": 9.966865237047022e-06, "loss": 0.3678, "step": 3657 }, { "epoch": 0.1678674682208251, "grad_norm": 0.5238285660743713, "learning_rate": 9.966837050472973e-06, "loss": 0.4789, "step": 3658 }, { "epoch": 0.16791335872607957, "grad_norm": 0.5209804177284241, "learning_rate": 9.966808851955248e-06, "loss": 0.4723, "step": 3659 }, { "epoch": 0.16795924923133404, "grad_norm": 0.4425673484802246, "learning_rate": 9.966780641493914e-06, "loss": 0.3276, "step": 3660 }, { "epoch": 0.1680051397365885, "grad_norm": 0.4626409411430359, "learning_rate": 9.96675241908904e-06, "loss": 0.3701, "step": 3661 }, { "epoch": 0.16805103024184295, "grad_norm": 0.4914664924144745, "learning_rate": 9.966724184740694e-06, "loss": 0.4388, "step": 3662 }, { "epoch": 0.16809692074709742, "grad_norm": 0.4645136892795563, "learning_rate": 9.966695938448946e-06, "loss": 0.3894, "step": 3663 }, { "epoch": 0.1681428112523519, "grad_norm": 0.48512527346611023, "learning_rate": 9.966667680213861e-06, "loss": 0.4511, "step": 3664 }, { "epoch": 0.16818870175760636, "grad_norm": 0.48779502511024475, "learning_rate": 9.966639410035508e-06, "loss": 0.4428, "step": 3665 }, { "epoch": 0.1682345922628608, "grad_norm": 0.45460015535354614, "learning_rate": 9.966611127913953e-06, "loss": 0.3389, "step": 3666 }, { "epoch": 0.16828048276811527, "grad_norm": 0.4733645021915436, "learning_rate": 9.966582833849267e-06, "loss": 0.3401, "step": 3667 }, { "epoch": 0.16832637327336974, "grad_norm": 0.5052480101585388, "learning_rate": 9.966554527841517e-06, "loss": 0.3982, "step": 3668 }, { "epoch": 0.16837226377862421, "grad_norm": 0.4637988805770874, "learning_rate": 9.96652620989077e-06, "loss": 0.4158, "step": 3669 }, { "epoch": 0.16841815428387866, "grad_norm": 0.48965662717819214, "learning_rate": 9.966497879997097e-06, "loss": 0.4883, "step": 3670 }, { "epoch": 0.16846404478913313, "grad_norm": 0.5050069689750671, "learning_rate": 9.96646953816056e-06, "loss": 0.4985, "step": 3671 }, { "epoch": 0.1685099352943876, "grad_norm": 0.4886408746242523, "learning_rate": 9.966441184381235e-06, "loss": 0.5171, "step": 3672 }, { "epoch": 0.16855582579964207, "grad_norm": 0.5022122263908386, "learning_rate": 9.966412818659186e-06, "loss": 0.4599, "step": 3673 }, { "epoch": 0.1686017163048965, "grad_norm": 0.4836370646953583, "learning_rate": 9.96638444099448e-06, "loss": 0.4248, "step": 3674 }, { "epoch": 0.16864760681015098, "grad_norm": 0.4565540850162506, "learning_rate": 9.966356051387186e-06, "loss": 0.4002, "step": 3675 }, { "epoch": 0.16869349731540545, "grad_norm": 0.5088211297988892, "learning_rate": 9.966327649837376e-06, "loss": 0.4823, "step": 3676 }, { "epoch": 0.1687393878206599, "grad_norm": 0.4447498023509979, "learning_rate": 9.966299236345113e-06, "loss": 0.3399, "step": 3677 }, { "epoch": 0.16878527832591436, "grad_norm": 0.46449801325798035, "learning_rate": 9.96627081091047e-06, "loss": 0.3688, "step": 3678 }, { "epoch": 0.16883116883116883, "grad_norm": 0.5423493981361389, "learning_rate": 9.966242373533513e-06, "loss": 0.5368, "step": 3679 }, { "epoch": 0.1688770593364233, "grad_norm": 0.4940808415412903, "learning_rate": 9.966213924214308e-06, "loss": 0.4598, "step": 3680 }, { "epoch": 0.16892294984167774, "grad_norm": 0.4560638964176178, "learning_rate": 9.966185462952929e-06, "loss": 0.3924, "step": 3681 }, { "epoch": 0.16896884034693221, "grad_norm": 0.4676917493343353, "learning_rate": 9.966156989749438e-06, "loss": 0.3839, "step": 3682 }, { "epoch": 0.16901473085218668, "grad_norm": 0.4943281412124634, "learning_rate": 9.96612850460391e-06, "loss": 0.4358, "step": 3683 }, { "epoch": 0.16906062135744115, "grad_norm": 0.48242637515068054, "learning_rate": 9.966100007516409e-06, "loss": 0.3759, "step": 3684 }, { "epoch": 0.1691065118626956, "grad_norm": 0.5070915222167969, "learning_rate": 9.966071498487004e-06, "loss": 0.4628, "step": 3685 }, { "epoch": 0.16915240236795007, "grad_norm": 0.49251246452331543, "learning_rate": 9.966042977515765e-06, "loss": 0.45, "step": 3686 }, { "epoch": 0.16919829287320454, "grad_norm": 0.5031691193580627, "learning_rate": 9.96601444460276e-06, "loss": 0.4651, "step": 3687 }, { "epoch": 0.169244183378459, "grad_norm": 0.43061497807502747, "learning_rate": 9.965985899748058e-06, "loss": 0.3171, "step": 3688 }, { "epoch": 0.16929007388371345, "grad_norm": 0.4535612165927887, "learning_rate": 9.965957342951726e-06, "loss": 0.3392, "step": 3689 }, { "epoch": 0.16933596438896792, "grad_norm": 0.46733710169792175, "learning_rate": 9.965928774213835e-06, "loss": 0.3838, "step": 3690 }, { "epoch": 0.1693818548942224, "grad_norm": 0.4973802864551544, "learning_rate": 9.965900193534452e-06, "loss": 0.3731, "step": 3691 }, { "epoch": 0.16942774539947686, "grad_norm": 0.501643180847168, "learning_rate": 9.965871600913646e-06, "loss": 0.4404, "step": 3692 }, { "epoch": 0.1694736359047313, "grad_norm": 0.5125111937522888, "learning_rate": 9.965842996351487e-06, "loss": 0.4333, "step": 3693 }, { "epoch": 0.16951952640998577, "grad_norm": 0.475371778011322, "learning_rate": 9.965814379848043e-06, "loss": 0.4084, "step": 3694 }, { "epoch": 0.16956541691524024, "grad_norm": 0.4575386345386505, "learning_rate": 9.965785751403383e-06, "loss": 0.3676, "step": 3695 }, { "epoch": 0.1696113074204947, "grad_norm": 0.45974910259246826, "learning_rate": 9.965757111017574e-06, "loss": 0.3816, "step": 3696 }, { "epoch": 0.16965719792574915, "grad_norm": 0.4485854506492615, "learning_rate": 9.965728458690687e-06, "loss": 0.3579, "step": 3697 }, { "epoch": 0.16970308843100362, "grad_norm": 0.5038155317306519, "learning_rate": 9.96569979442279e-06, "loss": 0.4234, "step": 3698 }, { "epoch": 0.1697489789362581, "grad_norm": 0.45121437311172485, "learning_rate": 9.965671118213954e-06, "loss": 0.4015, "step": 3699 }, { "epoch": 0.16979486944151256, "grad_norm": 0.4932691156864166, "learning_rate": 9.965642430064245e-06, "loss": 0.4586, "step": 3700 }, { "epoch": 0.169840759946767, "grad_norm": 0.4838521480560303, "learning_rate": 9.965613729973731e-06, "loss": 0.4324, "step": 3701 }, { "epoch": 0.16988665045202148, "grad_norm": 0.4346429705619812, "learning_rate": 9.965585017942484e-06, "loss": 0.3272, "step": 3702 }, { "epoch": 0.16993254095727595, "grad_norm": 0.5014439225196838, "learning_rate": 9.965556293970575e-06, "loss": 0.4157, "step": 3703 }, { "epoch": 0.16997843146253042, "grad_norm": 0.5021127462387085, "learning_rate": 9.965527558058067e-06, "loss": 0.4221, "step": 3704 }, { "epoch": 0.17002432196778486, "grad_norm": 0.4837076961994171, "learning_rate": 9.965498810205035e-06, "loss": 0.4338, "step": 3705 }, { "epoch": 0.17007021247303933, "grad_norm": 0.4779275953769684, "learning_rate": 9.965470050411545e-06, "loss": 0.4311, "step": 3706 }, { "epoch": 0.1701161029782938, "grad_norm": 0.5239678025245667, "learning_rate": 9.965441278677665e-06, "loss": 0.4465, "step": 3707 }, { "epoch": 0.17016199348354824, "grad_norm": 0.4581717848777771, "learning_rate": 9.965412495003468e-06, "loss": 0.4253, "step": 3708 }, { "epoch": 0.1702078839888027, "grad_norm": 0.4749666750431061, "learning_rate": 9.965383699389019e-06, "loss": 0.4275, "step": 3709 }, { "epoch": 0.17025377449405718, "grad_norm": 0.47646042704582214, "learning_rate": 9.96535489183439e-06, "loss": 0.3938, "step": 3710 }, { "epoch": 0.17029966499931165, "grad_norm": 0.5016754269599915, "learning_rate": 9.965326072339652e-06, "loss": 0.4693, "step": 3711 }, { "epoch": 0.1703455555045661, "grad_norm": 0.5509760975837708, "learning_rate": 9.96529724090487e-06, "loss": 0.5276, "step": 3712 }, { "epoch": 0.17039144600982056, "grad_norm": 0.5025253891944885, "learning_rate": 9.965268397530114e-06, "loss": 0.4199, "step": 3713 }, { "epoch": 0.17043733651507503, "grad_norm": 0.4659944772720337, "learning_rate": 9.965239542215457e-06, "loss": 0.3768, "step": 3714 }, { "epoch": 0.1704832270203295, "grad_norm": 0.4489624500274658, "learning_rate": 9.965210674960965e-06, "loss": 0.3799, "step": 3715 }, { "epoch": 0.17052911752558395, "grad_norm": 0.47398844361305237, "learning_rate": 9.965181795766707e-06, "loss": 0.4186, "step": 3716 }, { "epoch": 0.17057500803083842, "grad_norm": 0.48437100648880005, "learning_rate": 9.965152904632756e-06, "loss": 0.3324, "step": 3717 }, { "epoch": 0.17062089853609289, "grad_norm": 0.49503275752067566, "learning_rate": 9.965124001559177e-06, "loss": 0.4116, "step": 3718 }, { "epoch": 0.17066678904134736, "grad_norm": 0.5525345802307129, "learning_rate": 9.965095086546044e-06, "loss": 0.4694, "step": 3719 }, { "epoch": 0.1707126795466018, "grad_norm": 0.46774229407310486, "learning_rate": 9.965066159593424e-06, "loss": 0.4353, "step": 3720 }, { "epoch": 0.17075857005185627, "grad_norm": 0.4906098544597626, "learning_rate": 9.965037220701385e-06, "loss": 0.4723, "step": 3721 }, { "epoch": 0.17080446055711074, "grad_norm": 0.4435950815677643, "learning_rate": 9.96500826987e-06, "loss": 0.3678, "step": 3722 }, { "epoch": 0.1708503510623652, "grad_norm": 0.4562053382396698, "learning_rate": 9.964979307099337e-06, "loss": 0.3785, "step": 3723 }, { "epoch": 0.17089624156761965, "grad_norm": 0.5205396413803101, "learning_rate": 9.964950332389466e-06, "loss": 0.4253, "step": 3724 }, { "epoch": 0.17094213207287412, "grad_norm": 0.4992291033267975, "learning_rate": 9.964921345740457e-06, "loss": 0.5003, "step": 3725 }, { "epoch": 0.1709880225781286, "grad_norm": 0.49866196513175964, "learning_rate": 9.964892347152377e-06, "loss": 0.4234, "step": 3726 }, { "epoch": 0.17103391308338306, "grad_norm": 0.463359534740448, "learning_rate": 9.964863336625298e-06, "loss": 0.3827, "step": 3727 }, { "epoch": 0.1710798035886375, "grad_norm": 0.441690057516098, "learning_rate": 9.964834314159293e-06, "loss": 0.3189, "step": 3728 }, { "epoch": 0.17112569409389197, "grad_norm": 0.5331432223320007, "learning_rate": 9.964805279754425e-06, "loss": 0.4308, "step": 3729 }, { "epoch": 0.17117158459914644, "grad_norm": 0.5610324144363403, "learning_rate": 9.964776233410768e-06, "loss": 0.4735, "step": 3730 }, { "epoch": 0.1712174751044009, "grad_norm": 0.4304240643978119, "learning_rate": 9.964747175128393e-06, "loss": 0.3354, "step": 3731 }, { "epoch": 0.17126336560965535, "grad_norm": 0.4620472192764282, "learning_rate": 9.964718104907366e-06, "loss": 0.365, "step": 3732 }, { "epoch": 0.17130925611490982, "grad_norm": 0.44709792733192444, "learning_rate": 9.96468902274776e-06, "loss": 0.329, "step": 3733 }, { "epoch": 0.1713551466201643, "grad_norm": 0.4359210431575775, "learning_rate": 9.964659928649643e-06, "loss": 0.3637, "step": 3734 }, { "epoch": 0.17140103712541874, "grad_norm": 0.48796746134757996, "learning_rate": 9.964630822613085e-06, "loss": 0.4465, "step": 3735 }, { "epoch": 0.1714469276306732, "grad_norm": 0.48671266436576843, "learning_rate": 9.964601704638158e-06, "loss": 0.4265, "step": 3736 }, { "epoch": 0.17149281813592768, "grad_norm": 0.4906500279903412, "learning_rate": 9.96457257472493e-06, "loss": 0.4478, "step": 3737 }, { "epoch": 0.17153870864118215, "grad_norm": 0.46206560730934143, "learning_rate": 9.964543432873472e-06, "loss": 0.3835, "step": 3738 }, { "epoch": 0.1715845991464366, "grad_norm": 0.4673829674720764, "learning_rate": 9.964514279083855e-06, "loss": 0.4125, "step": 3739 }, { "epoch": 0.17163048965169106, "grad_norm": 0.4572446942329407, "learning_rate": 9.964485113356147e-06, "loss": 0.3757, "step": 3740 }, { "epoch": 0.17167638015694553, "grad_norm": 0.4328352212905884, "learning_rate": 9.96445593569042e-06, "loss": 0.3632, "step": 3741 }, { "epoch": 0.1717222706622, "grad_norm": 0.519863486289978, "learning_rate": 9.964426746086742e-06, "loss": 0.3733, "step": 3742 }, { "epoch": 0.17176816116745444, "grad_norm": 0.47234734892845154, "learning_rate": 9.964397544545185e-06, "loss": 0.3922, "step": 3743 }, { "epoch": 0.1718140516727089, "grad_norm": 0.4189852476119995, "learning_rate": 9.964368331065819e-06, "loss": 0.3346, "step": 3744 }, { "epoch": 0.17185994217796338, "grad_norm": 0.481424480676651, "learning_rate": 9.964339105648714e-06, "loss": 0.4253, "step": 3745 }, { "epoch": 0.17190583268321785, "grad_norm": 0.43244752287864685, "learning_rate": 9.96430986829394e-06, "loss": 0.3178, "step": 3746 }, { "epoch": 0.1719517231884723, "grad_norm": 0.50309157371521, "learning_rate": 9.964280619001567e-06, "loss": 0.4308, "step": 3747 }, { "epoch": 0.17199761369372676, "grad_norm": 0.4675474464893341, "learning_rate": 9.964251357771667e-06, "loss": 0.3889, "step": 3748 }, { "epoch": 0.17204350419898123, "grad_norm": 0.5053369998931885, "learning_rate": 9.96422208460431e-06, "loss": 0.4544, "step": 3749 }, { "epoch": 0.1720893947042357, "grad_norm": 0.4964584708213806, "learning_rate": 9.964192799499564e-06, "loss": 0.4483, "step": 3750 }, { "epoch": 0.17213528520949015, "grad_norm": 0.491644024848938, "learning_rate": 9.9641635024575e-06, "loss": 0.4092, "step": 3751 }, { "epoch": 0.17218117571474462, "grad_norm": 0.4788534939289093, "learning_rate": 9.964134193478191e-06, "loss": 0.375, "step": 3752 }, { "epoch": 0.1722270662199991, "grad_norm": 0.4660608172416687, "learning_rate": 9.964104872561706e-06, "loss": 0.3623, "step": 3753 }, { "epoch": 0.17227295672525356, "grad_norm": 0.4548393785953522, "learning_rate": 9.964075539708116e-06, "loss": 0.3486, "step": 3754 }, { "epoch": 0.172318847230508, "grad_norm": 0.5075151920318604, "learning_rate": 9.964046194917491e-06, "loss": 0.4984, "step": 3755 }, { "epoch": 0.17236473773576247, "grad_norm": 0.4920263886451721, "learning_rate": 9.9640168381899e-06, "loss": 0.4409, "step": 3756 }, { "epoch": 0.17241062824101694, "grad_norm": 0.4510353207588196, "learning_rate": 9.963987469525415e-06, "loss": 0.3959, "step": 3757 }, { "epoch": 0.1724565187462714, "grad_norm": 0.49358996748924255, "learning_rate": 9.963958088924109e-06, "loss": 0.4045, "step": 3758 }, { "epoch": 0.17250240925152585, "grad_norm": 0.4904539883136749, "learning_rate": 9.96392869638605e-06, "loss": 0.4491, "step": 3759 }, { "epoch": 0.17254829975678032, "grad_norm": 0.45569685101509094, "learning_rate": 9.963899291911308e-06, "loss": 0.4318, "step": 3760 }, { "epoch": 0.1725941902620348, "grad_norm": 0.4705761671066284, "learning_rate": 9.963869875499956e-06, "loss": 0.3506, "step": 3761 }, { "epoch": 0.17264008076728926, "grad_norm": 0.4552665650844574, "learning_rate": 9.963840447152063e-06, "loss": 0.4337, "step": 3762 }, { "epoch": 0.1726859712725437, "grad_norm": 0.4712662696838379, "learning_rate": 9.9638110068677e-06, "loss": 0.3753, "step": 3763 }, { "epoch": 0.17273186177779817, "grad_norm": 0.44561973214149475, "learning_rate": 9.96378155464694e-06, "loss": 0.3517, "step": 3764 }, { "epoch": 0.17277775228305264, "grad_norm": 0.50220787525177, "learning_rate": 9.96375209048985e-06, "loss": 0.4737, "step": 3765 }, { "epoch": 0.17282364278830709, "grad_norm": 0.459413081407547, "learning_rate": 9.963722614396503e-06, "loss": 0.4116, "step": 3766 }, { "epoch": 0.17286953329356156, "grad_norm": 0.4793335497379303, "learning_rate": 9.963693126366972e-06, "loss": 0.3843, "step": 3767 }, { "epoch": 0.17291542379881603, "grad_norm": 0.4663759171962738, "learning_rate": 9.963663626401323e-06, "loss": 0.4197, "step": 3768 }, { "epoch": 0.1729613143040705, "grad_norm": 0.4310791492462158, "learning_rate": 9.963634114499629e-06, "loss": 0.358, "step": 3769 }, { "epoch": 0.17300720480932494, "grad_norm": 0.4253799617290497, "learning_rate": 9.963604590661964e-06, "loss": 0.3484, "step": 3770 }, { "epoch": 0.1730530953145794, "grad_norm": 0.5067339539527893, "learning_rate": 9.963575054888395e-06, "loss": 0.4284, "step": 3771 }, { "epoch": 0.17309898581983388, "grad_norm": 0.5011279582977295, "learning_rate": 9.963545507178995e-06, "loss": 0.41, "step": 3772 }, { "epoch": 0.17314487632508835, "grad_norm": 0.45513442158699036, "learning_rate": 9.963515947533835e-06, "loss": 0.4058, "step": 3773 }, { "epoch": 0.1731907668303428, "grad_norm": 0.5227753520011902, "learning_rate": 9.963486375952984e-06, "loss": 0.4831, "step": 3774 }, { "epoch": 0.17323665733559726, "grad_norm": 0.4769015610218048, "learning_rate": 9.963456792436517e-06, "loss": 0.3781, "step": 3775 }, { "epoch": 0.17328254784085173, "grad_norm": 0.4975082278251648, "learning_rate": 9.9634271969845e-06, "loss": 0.364, "step": 3776 }, { "epoch": 0.1733284383461062, "grad_norm": 0.4561043381690979, "learning_rate": 9.96339758959701e-06, "loss": 0.392, "step": 3777 }, { "epoch": 0.17337432885136064, "grad_norm": 0.5077850222587585, "learning_rate": 9.963367970274115e-06, "loss": 0.4662, "step": 3778 }, { "epoch": 0.1734202193566151, "grad_norm": 0.5776070952415466, "learning_rate": 9.963338339015886e-06, "loss": 0.492, "step": 3779 }, { "epoch": 0.17346610986186958, "grad_norm": 0.49255937337875366, "learning_rate": 9.963308695822394e-06, "loss": 0.448, "step": 3780 }, { "epoch": 0.17351200036712405, "grad_norm": 0.4622495472431183, "learning_rate": 9.963279040693712e-06, "loss": 0.4243, "step": 3781 }, { "epoch": 0.1735578908723785, "grad_norm": 0.45348840951919556, "learning_rate": 9.96324937362991e-06, "loss": 0.3295, "step": 3782 }, { "epoch": 0.17360378137763297, "grad_norm": 0.46781226992607117, "learning_rate": 9.96321969463106e-06, "loss": 0.4001, "step": 3783 }, { "epoch": 0.17364967188288744, "grad_norm": 0.5318712592124939, "learning_rate": 9.963190003697232e-06, "loss": 0.5383, "step": 3784 }, { "epoch": 0.1736955623881419, "grad_norm": 0.520931601524353, "learning_rate": 9.963160300828498e-06, "loss": 0.4828, "step": 3785 }, { "epoch": 0.17374145289339635, "grad_norm": 0.4373350143432617, "learning_rate": 9.963130586024932e-06, "loss": 0.3647, "step": 3786 }, { "epoch": 0.17378734339865082, "grad_norm": 0.468069851398468, "learning_rate": 9.963100859286603e-06, "loss": 0.4065, "step": 3787 }, { "epoch": 0.1738332339039053, "grad_norm": 0.44646987318992615, "learning_rate": 9.963071120613582e-06, "loss": 0.3698, "step": 3788 }, { "epoch": 0.17387912440915976, "grad_norm": 0.4330134689807892, "learning_rate": 9.963041370005941e-06, "loss": 0.3641, "step": 3789 }, { "epoch": 0.1739250149144142, "grad_norm": 0.4948446452617645, "learning_rate": 9.963011607463754e-06, "loss": 0.4567, "step": 3790 }, { "epoch": 0.17397090541966867, "grad_norm": 0.47151556611061096, "learning_rate": 9.962981832987088e-06, "loss": 0.4281, "step": 3791 }, { "epoch": 0.17401679592492314, "grad_norm": 0.6274124383926392, "learning_rate": 9.962952046576017e-06, "loss": 0.4597, "step": 3792 }, { "epoch": 0.1740626864301776, "grad_norm": 0.46481117606163025, "learning_rate": 9.962922248230616e-06, "loss": 0.3707, "step": 3793 }, { "epoch": 0.17410857693543205, "grad_norm": 0.4411012828350067, "learning_rate": 9.96289243795095e-06, "loss": 0.3539, "step": 3794 }, { "epoch": 0.17415446744068652, "grad_norm": 0.49497660994529724, "learning_rate": 9.962862615737094e-06, "loss": 0.4638, "step": 3795 }, { "epoch": 0.174200357945941, "grad_norm": 0.4659208655357361, "learning_rate": 9.962832781589123e-06, "loss": 0.4386, "step": 3796 }, { "epoch": 0.17424624845119543, "grad_norm": 0.4618721008300781, "learning_rate": 9.962802935507102e-06, "loss": 0.3876, "step": 3797 }, { "epoch": 0.1742921389564499, "grad_norm": 0.460696280002594, "learning_rate": 9.962773077491107e-06, "loss": 0.3844, "step": 3798 }, { "epoch": 0.17433802946170437, "grad_norm": 0.48609527945518494, "learning_rate": 9.96274320754121e-06, "loss": 0.4433, "step": 3799 }, { "epoch": 0.17438391996695884, "grad_norm": 0.5098398923873901, "learning_rate": 9.96271332565748e-06, "loss": 0.4815, "step": 3800 }, { "epoch": 0.1744298104722133, "grad_norm": 0.40706196427345276, "learning_rate": 9.962683431839994e-06, "loss": 0.2788, "step": 3801 }, { "epoch": 0.17447570097746776, "grad_norm": 0.5089811682701111, "learning_rate": 9.96265352608882e-06, "loss": 0.4467, "step": 3802 }, { "epoch": 0.17452159148272223, "grad_norm": 0.4700194299221039, "learning_rate": 9.962623608404028e-06, "loss": 0.4038, "step": 3803 }, { "epoch": 0.1745674819879767, "grad_norm": 0.5290846824645996, "learning_rate": 9.962593678785693e-06, "loss": 0.5268, "step": 3804 }, { "epoch": 0.17461337249323114, "grad_norm": 0.4735446870326996, "learning_rate": 9.962563737233888e-06, "loss": 0.3945, "step": 3805 }, { "epoch": 0.1746592629984856, "grad_norm": 0.49044692516326904, "learning_rate": 9.962533783748682e-06, "loss": 0.4818, "step": 3806 }, { "epoch": 0.17470515350374008, "grad_norm": 0.43118026852607727, "learning_rate": 9.96250381833015e-06, "loss": 0.3217, "step": 3807 }, { "epoch": 0.17475104400899455, "grad_norm": 0.49126771092414856, "learning_rate": 9.962473840978362e-06, "loss": 0.4709, "step": 3808 }, { "epoch": 0.174796934514249, "grad_norm": 0.46345171332359314, "learning_rate": 9.96244385169339e-06, "loss": 0.4144, "step": 3809 }, { "epoch": 0.17484282501950346, "grad_norm": 0.4336242377758026, "learning_rate": 9.962413850475306e-06, "loss": 0.3282, "step": 3810 }, { "epoch": 0.17488871552475793, "grad_norm": 0.48221275210380554, "learning_rate": 9.962383837324184e-06, "loss": 0.4674, "step": 3811 }, { "epoch": 0.1749346060300124, "grad_norm": 0.4423587918281555, "learning_rate": 9.962353812240096e-06, "loss": 0.3312, "step": 3812 }, { "epoch": 0.17498049653526684, "grad_norm": 0.47093838453292847, "learning_rate": 9.962323775223111e-06, "loss": 0.3988, "step": 3813 }, { "epoch": 0.17502638704052131, "grad_norm": 0.4595012366771698, "learning_rate": 9.962293726273305e-06, "loss": 0.4156, "step": 3814 }, { "epoch": 0.17507227754577578, "grad_norm": 0.4803919792175293, "learning_rate": 9.962263665390748e-06, "loss": 0.4951, "step": 3815 }, { "epoch": 0.17511816805103025, "grad_norm": 0.4672667980194092, "learning_rate": 9.962233592575515e-06, "loss": 0.4091, "step": 3816 }, { "epoch": 0.1751640585562847, "grad_norm": 0.4833678901195526, "learning_rate": 9.962203507827674e-06, "loss": 0.3736, "step": 3817 }, { "epoch": 0.17520994906153917, "grad_norm": 0.4316823482513428, "learning_rate": 9.9621734111473e-06, "loss": 0.3018, "step": 3818 }, { "epoch": 0.17525583956679364, "grad_norm": 0.47848308086395264, "learning_rate": 9.962143302534466e-06, "loss": 0.4222, "step": 3819 }, { "epoch": 0.1753017300720481, "grad_norm": 0.5713353157043457, "learning_rate": 9.962113181989243e-06, "loss": 0.4904, "step": 3820 }, { "epoch": 0.17534762057730255, "grad_norm": 0.5381240248680115, "learning_rate": 9.962083049511704e-06, "loss": 0.4708, "step": 3821 }, { "epoch": 0.17539351108255702, "grad_norm": 0.47154226899147034, "learning_rate": 9.96205290510192e-06, "loss": 0.4344, "step": 3822 }, { "epoch": 0.1754394015878115, "grad_norm": 0.4654196500778198, "learning_rate": 9.962022748759967e-06, "loss": 0.4286, "step": 3823 }, { "epoch": 0.17548529209306593, "grad_norm": 0.4943636357784271, "learning_rate": 9.961992580485913e-06, "loss": 0.4273, "step": 3824 }, { "epoch": 0.1755311825983204, "grad_norm": 0.4802487790584564, "learning_rate": 9.961962400279834e-06, "loss": 0.4116, "step": 3825 }, { "epoch": 0.17557707310357487, "grad_norm": 0.4972319006919861, "learning_rate": 9.961932208141802e-06, "loss": 0.4426, "step": 3826 }, { "epoch": 0.17562296360882934, "grad_norm": 0.4343493580818176, "learning_rate": 9.96190200407189e-06, "loss": 0.3951, "step": 3827 }, { "epoch": 0.17566885411408378, "grad_norm": 0.4805845618247986, "learning_rate": 9.961871788070168e-06, "loss": 0.404, "step": 3828 }, { "epoch": 0.17571474461933825, "grad_norm": 0.45114660263061523, "learning_rate": 9.961841560136712e-06, "loss": 0.3356, "step": 3829 }, { "epoch": 0.17576063512459272, "grad_norm": 0.49260181188583374, "learning_rate": 9.96181132027159e-06, "loss": 0.4555, "step": 3830 }, { "epoch": 0.1758065256298472, "grad_norm": 0.48737210035324097, "learning_rate": 9.96178106847488e-06, "loss": 0.3995, "step": 3831 }, { "epoch": 0.17585241613510164, "grad_norm": 0.4794608950614929, "learning_rate": 9.961750804746654e-06, "loss": 0.3866, "step": 3832 }, { "epoch": 0.1758983066403561, "grad_norm": 0.4778745472431183, "learning_rate": 9.96172052908698e-06, "loss": 0.4369, "step": 3833 }, { "epoch": 0.17594419714561058, "grad_norm": 0.5258398652076721, "learning_rate": 9.961690241495936e-06, "loss": 0.4323, "step": 3834 }, { "epoch": 0.17599008765086505, "grad_norm": 0.45457005500793457, "learning_rate": 9.961659941973592e-06, "loss": 0.3441, "step": 3835 }, { "epoch": 0.1760359781561195, "grad_norm": 0.46597379446029663, "learning_rate": 9.961629630520022e-06, "loss": 0.355, "step": 3836 }, { "epoch": 0.17608186866137396, "grad_norm": 0.5124558806419373, "learning_rate": 9.961599307135299e-06, "loss": 0.4844, "step": 3837 }, { "epoch": 0.17612775916662843, "grad_norm": 0.46185001730918884, "learning_rate": 9.961568971819495e-06, "loss": 0.3821, "step": 3838 }, { "epoch": 0.1761736496718829, "grad_norm": 0.44716793298721313, "learning_rate": 9.961538624572685e-06, "loss": 0.3264, "step": 3839 }, { "epoch": 0.17621954017713734, "grad_norm": 0.49822792410850525, "learning_rate": 9.96150826539494e-06, "loss": 0.4027, "step": 3840 }, { "epoch": 0.1762654306823918, "grad_norm": 0.4766508638858795, "learning_rate": 9.961477894286332e-06, "loss": 0.4056, "step": 3841 }, { "epoch": 0.17631132118764628, "grad_norm": 0.4933946132659912, "learning_rate": 9.961447511246936e-06, "loss": 0.4253, "step": 3842 }, { "epoch": 0.17635721169290075, "grad_norm": 0.4556927978992462, "learning_rate": 9.961417116276826e-06, "loss": 0.3572, "step": 3843 }, { "epoch": 0.1764031021981552, "grad_norm": 0.5002210140228271, "learning_rate": 9.961386709376072e-06, "loss": 0.4649, "step": 3844 }, { "epoch": 0.17644899270340966, "grad_norm": 0.4519858658313751, "learning_rate": 9.961356290544751e-06, "loss": 0.3925, "step": 3845 }, { "epoch": 0.17649488320866413, "grad_norm": 0.4758327603340149, "learning_rate": 9.961325859782933e-06, "loss": 0.3982, "step": 3846 }, { "epoch": 0.1765407737139186, "grad_norm": 0.4676792323589325, "learning_rate": 9.96129541709069e-06, "loss": 0.3477, "step": 3847 }, { "epoch": 0.17658666421917305, "grad_norm": 0.4864857494831085, "learning_rate": 9.9612649624681e-06, "loss": 0.3598, "step": 3848 }, { "epoch": 0.17663255472442752, "grad_norm": 0.49274805188179016, "learning_rate": 9.961234495915233e-06, "loss": 0.463, "step": 3849 }, { "epoch": 0.17667844522968199, "grad_norm": 0.4697285294532776, "learning_rate": 9.961204017432163e-06, "loss": 0.4385, "step": 3850 }, { "epoch": 0.17672433573493646, "grad_norm": 0.46031564474105835, "learning_rate": 9.961173527018963e-06, "loss": 0.3431, "step": 3851 }, { "epoch": 0.1767702262401909, "grad_norm": 0.4652436375617981, "learning_rate": 9.961143024675706e-06, "loss": 0.4305, "step": 3852 }, { "epoch": 0.17681611674544537, "grad_norm": 0.4586910307407379, "learning_rate": 9.961112510402467e-06, "loss": 0.4002, "step": 3853 }, { "epoch": 0.17686200725069984, "grad_norm": 0.46300554275512695, "learning_rate": 9.961081984199316e-06, "loss": 0.4463, "step": 3854 }, { "epoch": 0.17690789775595428, "grad_norm": 0.4883333444595337, "learning_rate": 9.96105144606633e-06, "loss": 0.4351, "step": 3855 }, { "epoch": 0.17695378826120875, "grad_norm": 0.4559125006198883, "learning_rate": 9.96102089600358e-06, "loss": 0.3931, "step": 3856 }, { "epoch": 0.17699967876646322, "grad_norm": 0.5298438668251038, "learning_rate": 9.960990334011142e-06, "loss": 0.4868, "step": 3857 }, { "epoch": 0.1770455692717177, "grad_norm": 0.43969202041625977, "learning_rate": 9.960959760089089e-06, "loss": 0.386, "step": 3858 }, { "epoch": 0.17709145977697213, "grad_norm": 0.4842301905155182, "learning_rate": 9.96092917423749e-06, "loss": 0.4328, "step": 3859 }, { "epoch": 0.1771373502822266, "grad_norm": 0.4557982087135315, "learning_rate": 9.960898576456425e-06, "loss": 0.3619, "step": 3860 }, { "epoch": 0.17718324078748107, "grad_norm": 0.48352086544036865, "learning_rate": 9.960867966745965e-06, "loss": 0.4453, "step": 3861 }, { "epoch": 0.17722913129273554, "grad_norm": 0.5317992568016052, "learning_rate": 9.96083734510618e-06, "loss": 0.4751, "step": 3862 }, { "epoch": 0.17727502179798998, "grad_norm": 0.4392800033092499, "learning_rate": 9.960806711537148e-06, "loss": 0.3603, "step": 3863 }, { "epoch": 0.17732091230324445, "grad_norm": 0.45197445154190063, "learning_rate": 9.960776066038943e-06, "loss": 0.388, "step": 3864 }, { "epoch": 0.17736680280849892, "grad_norm": 0.5064261555671692, "learning_rate": 9.960745408611635e-06, "loss": 0.4131, "step": 3865 }, { "epoch": 0.1774126933137534, "grad_norm": 0.4442690312862396, "learning_rate": 9.960714739255302e-06, "loss": 0.3958, "step": 3866 }, { "epoch": 0.17745858381900784, "grad_norm": 0.4621271789073944, "learning_rate": 9.960684057970016e-06, "loss": 0.4308, "step": 3867 }, { "epoch": 0.1775044743242623, "grad_norm": 0.48386305570602417, "learning_rate": 9.960653364755849e-06, "loss": 0.4659, "step": 3868 }, { "epoch": 0.17755036482951678, "grad_norm": 0.4527401328086853, "learning_rate": 9.960622659612877e-06, "loss": 0.3876, "step": 3869 }, { "epoch": 0.17759625533477125, "grad_norm": 0.4745809733867645, "learning_rate": 9.960591942541173e-06, "loss": 0.458, "step": 3870 }, { "epoch": 0.1776421458400257, "grad_norm": 0.4915264844894409, "learning_rate": 9.960561213540812e-06, "loss": 0.5, "step": 3871 }, { "epoch": 0.17768803634528016, "grad_norm": 0.4707813560962677, "learning_rate": 9.960530472611865e-06, "loss": 0.4334, "step": 3872 }, { "epoch": 0.17773392685053463, "grad_norm": 0.4973183274269104, "learning_rate": 9.960499719754408e-06, "loss": 0.4591, "step": 3873 }, { "epoch": 0.1777798173557891, "grad_norm": 0.4460873603820801, "learning_rate": 9.960468954968516e-06, "loss": 0.3125, "step": 3874 }, { "epoch": 0.17782570786104354, "grad_norm": 0.51996910572052, "learning_rate": 9.960438178254261e-06, "loss": 0.5235, "step": 3875 }, { "epoch": 0.177871598366298, "grad_norm": 0.45582249760627747, "learning_rate": 9.960407389611718e-06, "loss": 0.3608, "step": 3876 }, { "epoch": 0.17791748887155248, "grad_norm": 0.470163494348526, "learning_rate": 9.960376589040962e-06, "loss": 0.4331, "step": 3877 }, { "epoch": 0.17796337937680695, "grad_norm": 0.49415069818496704, "learning_rate": 9.960345776542062e-06, "loss": 0.4714, "step": 3878 }, { "epoch": 0.1780092698820614, "grad_norm": 0.5034629106521606, "learning_rate": 9.9603149521151e-06, "loss": 0.514, "step": 3879 }, { "epoch": 0.17805516038731586, "grad_norm": 0.4771920144557953, "learning_rate": 9.960284115760144e-06, "loss": 0.3596, "step": 3880 }, { "epoch": 0.17810105089257033, "grad_norm": 0.47345420718193054, "learning_rate": 9.96025326747727e-06, "loss": 0.4061, "step": 3881 }, { "epoch": 0.17814694139782478, "grad_norm": 0.44490885734558105, "learning_rate": 9.960222407266553e-06, "loss": 0.3568, "step": 3882 }, { "epoch": 0.17819283190307925, "grad_norm": 0.5291013121604919, "learning_rate": 9.960191535128067e-06, "loss": 0.4777, "step": 3883 }, { "epoch": 0.17823872240833372, "grad_norm": 0.4483031630516052, "learning_rate": 9.960160651061886e-06, "loss": 0.3457, "step": 3884 }, { "epoch": 0.1782846129135882, "grad_norm": 0.48007747530937195, "learning_rate": 9.960129755068084e-06, "loss": 0.439, "step": 3885 }, { "epoch": 0.17833050341884263, "grad_norm": 0.44911623001098633, "learning_rate": 9.960098847146734e-06, "loss": 0.3728, "step": 3886 }, { "epoch": 0.1783763939240971, "grad_norm": 0.46962645649909973, "learning_rate": 9.960067927297912e-06, "loss": 0.3574, "step": 3887 }, { "epoch": 0.17842228442935157, "grad_norm": 0.44819939136505127, "learning_rate": 9.960036995521694e-06, "loss": 0.3183, "step": 3888 }, { "epoch": 0.17846817493460604, "grad_norm": 0.471474826335907, "learning_rate": 9.960006051818151e-06, "loss": 0.4022, "step": 3889 }, { "epoch": 0.17851406543986048, "grad_norm": 0.4894413948059082, "learning_rate": 9.959975096187358e-06, "loss": 0.4282, "step": 3890 }, { "epoch": 0.17855995594511495, "grad_norm": 0.47600311040878296, "learning_rate": 9.959944128629393e-06, "loss": 0.4284, "step": 3891 }, { "epoch": 0.17860584645036942, "grad_norm": 0.4736107587814331, "learning_rate": 9.959913149144325e-06, "loss": 0.4115, "step": 3892 }, { "epoch": 0.1786517369556239, "grad_norm": 0.48581165075302124, "learning_rate": 9.959882157732233e-06, "loss": 0.4666, "step": 3893 }, { "epoch": 0.17869762746087833, "grad_norm": 0.6128531098365784, "learning_rate": 9.95985115439319e-06, "loss": 0.4254, "step": 3894 }, { "epoch": 0.1787435179661328, "grad_norm": 0.4596128463745117, "learning_rate": 9.95982013912727e-06, "loss": 0.353, "step": 3895 }, { "epoch": 0.17878940847138727, "grad_norm": 0.4830702543258667, "learning_rate": 9.959789111934546e-06, "loss": 0.3903, "step": 3896 }, { "epoch": 0.17883529897664174, "grad_norm": 0.4682193696498871, "learning_rate": 9.959758072815097e-06, "loss": 0.3927, "step": 3897 }, { "epoch": 0.17888118948189619, "grad_norm": 0.6033740043640137, "learning_rate": 9.959727021768993e-06, "loss": 0.4599, "step": 3898 }, { "epoch": 0.17892707998715066, "grad_norm": 0.4633723795413971, "learning_rate": 9.959695958796312e-06, "loss": 0.3477, "step": 3899 }, { "epoch": 0.17897297049240513, "grad_norm": 0.4755842089653015, "learning_rate": 9.95966488389713e-06, "loss": 0.4074, "step": 3900 }, { "epoch": 0.1790188609976596, "grad_norm": 0.4634113311767578, "learning_rate": 9.959633797071516e-06, "loss": 0.362, "step": 3901 }, { "epoch": 0.17906475150291404, "grad_norm": 0.9781712293624878, "learning_rate": 9.959602698319548e-06, "loss": 0.4318, "step": 3902 }, { "epoch": 0.1791106420081685, "grad_norm": 0.4694291651248932, "learning_rate": 9.959571587641302e-06, "loss": 0.4251, "step": 3903 }, { "epoch": 0.17915653251342298, "grad_norm": 0.48279836773872375, "learning_rate": 9.959540465036853e-06, "loss": 0.4014, "step": 3904 }, { "epoch": 0.17920242301867745, "grad_norm": 0.47537532448768616, "learning_rate": 9.959509330506272e-06, "loss": 0.4127, "step": 3905 }, { "epoch": 0.1792483135239319, "grad_norm": 0.46119365096092224, "learning_rate": 9.959478184049636e-06, "loss": 0.4297, "step": 3906 }, { "epoch": 0.17929420402918636, "grad_norm": 0.44106653332710266, "learning_rate": 9.959447025667021e-06, "loss": 0.3989, "step": 3907 }, { "epoch": 0.17934009453444083, "grad_norm": 0.4463611841201782, "learning_rate": 9.959415855358503e-06, "loss": 0.3282, "step": 3908 }, { "epoch": 0.1793859850396953, "grad_norm": 0.4646354615688324, "learning_rate": 9.959384673124154e-06, "loss": 0.3682, "step": 3909 }, { "epoch": 0.17943187554494974, "grad_norm": 0.45753049850463867, "learning_rate": 9.95935347896405e-06, "loss": 0.3966, "step": 3910 }, { "epoch": 0.1794777660502042, "grad_norm": 0.4642256796360016, "learning_rate": 9.959322272878264e-06, "loss": 0.3782, "step": 3911 }, { "epoch": 0.17952365655545868, "grad_norm": 0.46932828426361084, "learning_rate": 9.959291054866876e-06, "loss": 0.3421, "step": 3912 }, { "epoch": 0.17956954706071312, "grad_norm": 0.4701588451862335, "learning_rate": 9.959259824929957e-06, "loss": 0.4411, "step": 3913 }, { "epoch": 0.1796154375659676, "grad_norm": 0.4810388386249542, "learning_rate": 9.959228583067583e-06, "loss": 0.4355, "step": 3914 }, { "epoch": 0.17966132807122207, "grad_norm": 0.4653016924858093, "learning_rate": 9.959197329279831e-06, "loss": 0.3771, "step": 3915 }, { "epoch": 0.17970721857647654, "grad_norm": 0.5119547843933105, "learning_rate": 9.959166063566771e-06, "loss": 0.5317, "step": 3916 }, { "epoch": 0.17975310908173098, "grad_norm": 0.44539543986320496, "learning_rate": 9.959134785928486e-06, "loss": 0.3539, "step": 3917 }, { "epoch": 0.17979899958698545, "grad_norm": 0.4559662938117981, "learning_rate": 9.959103496365045e-06, "loss": 0.3529, "step": 3918 }, { "epoch": 0.17984489009223992, "grad_norm": 0.5066208243370056, "learning_rate": 9.959072194876526e-06, "loss": 0.4485, "step": 3919 }, { "epoch": 0.1798907805974944, "grad_norm": 0.4939250349998474, "learning_rate": 9.959040881463002e-06, "loss": 0.4229, "step": 3920 }, { "epoch": 0.17993667110274883, "grad_norm": 0.42819133400917053, "learning_rate": 9.959009556124552e-06, "loss": 0.3316, "step": 3921 }, { "epoch": 0.1799825616080033, "grad_norm": 0.439520925283432, "learning_rate": 9.958978218861249e-06, "loss": 0.3549, "step": 3922 }, { "epoch": 0.18002845211325777, "grad_norm": 0.5061075091362, "learning_rate": 9.958946869673165e-06, "loss": 0.4384, "step": 3923 }, { "epoch": 0.18007434261851224, "grad_norm": 0.4657272696495056, "learning_rate": 9.958915508560382e-06, "loss": 0.3991, "step": 3924 }, { "epoch": 0.18012023312376668, "grad_norm": 0.5086844563484192, "learning_rate": 9.958884135522971e-06, "loss": 0.4468, "step": 3925 }, { "epoch": 0.18016612362902115, "grad_norm": 0.4692036211490631, "learning_rate": 9.95885275056101e-06, "loss": 0.3922, "step": 3926 }, { "epoch": 0.18021201413427562, "grad_norm": 0.509840190410614, "learning_rate": 9.958821353674573e-06, "loss": 0.5127, "step": 3927 }, { "epoch": 0.1802579046395301, "grad_norm": 0.4572141766548157, "learning_rate": 9.958789944863735e-06, "loss": 0.3947, "step": 3928 }, { "epoch": 0.18030379514478453, "grad_norm": 0.47842952609062195, "learning_rate": 9.958758524128572e-06, "loss": 0.3683, "step": 3929 }, { "epoch": 0.180349685650039, "grad_norm": 0.4818190932273865, "learning_rate": 9.95872709146916e-06, "loss": 0.4286, "step": 3930 }, { "epoch": 0.18039557615529347, "grad_norm": 0.4555456340312958, "learning_rate": 9.958695646885575e-06, "loss": 0.3942, "step": 3931 }, { "epoch": 0.18044146666054794, "grad_norm": 0.4639066159725189, "learning_rate": 9.958664190377892e-06, "loss": 0.4262, "step": 3932 }, { "epoch": 0.1804873571658024, "grad_norm": 0.5069988965988159, "learning_rate": 9.958632721946185e-06, "loss": 0.517, "step": 3933 }, { "epoch": 0.18053324767105686, "grad_norm": 0.4582611918449402, "learning_rate": 9.958601241590533e-06, "loss": 0.3656, "step": 3934 }, { "epoch": 0.18057913817631133, "grad_norm": 0.4548102021217346, "learning_rate": 9.958569749311009e-06, "loss": 0.4117, "step": 3935 }, { "epoch": 0.1806250286815658, "grad_norm": 0.4673822224140167, "learning_rate": 9.958538245107691e-06, "loss": 0.4217, "step": 3936 }, { "epoch": 0.18067091918682024, "grad_norm": 0.5158348679542542, "learning_rate": 9.958506728980652e-06, "loss": 0.4531, "step": 3937 }, { "epoch": 0.1807168096920747, "grad_norm": 0.4394562542438507, "learning_rate": 9.95847520092997e-06, "loss": 0.3664, "step": 3938 }, { "epoch": 0.18076270019732918, "grad_norm": 0.5189195275306702, "learning_rate": 9.95844366095572e-06, "loss": 0.4463, "step": 3939 }, { "epoch": 0.18080859070258365, "grad_norm": 0.479986310005188, "learning_rate": 9.958412109057978e-06, "loss": 0.5212, "step": 3940 }, { "epoch": 0.1808544812078381, "grad_norm": 0.4612562954425812, "learning_rate": 9.958380545236819e-06, "loss": 0.3943, "step": 3941 }, { "epoch": 0.18090037171309256, "grad_norm": 0.4503750503063202, "learning_rate": 9.958348969492321e-06, "loss": 0.3546, "step": 3942 }, { "epoch": 0.18094626221834703, "grad_norm": 0.4488202929496765, "learning_rate": 9.958317381824559e-06, "loss": 0.3579, "step": 3943 }, { "epoch": 0.18099215272360147, "grad_norm": 0.5286926627159119, "learning_rate": 9.958285782233607e-06, "loss": 0.406, "step": 3944 }, { "epoch": 0.18103804322885594, "grad_norm": 0.48888880014419556, "learning_rate": 9.958254170719543e-06, "loss": 0.4059, "step": 3945 }, { "epoch": 0.1810839337341104, "grad_norm": 0.5178829431533813, "learning_rate": 9.958222547282442e-06, "loss": 0.4931, "step": 3946 }, { "epoch": 0.18112982423936488, "grad_norm": 0.480390727519989, "learning_rate": 9.958190911922381e-06, "loss": 0.4159, "step": 3947 }, { "epoch": 0.18117571474461933, "grad_norm": 0.44573745131492615, "learning_rate": 9.958159264639437e-06, "loss": 0.3759, "step": 3948 }, { "epoch": 0.1812216052498738, "grad_norm": 0.502204179763794, "learning_rate": 9.958127605433685e-06, "loss": 0.4147, "step": 3949 }, { "epoch": 0.18126749575512827, "grad_norm": 0.4709887206554413, "learning_rate": 9.958095934305198e-06, "loss": 0.4046, "step": 3950 }, { "epoch": 0.18131338626038274, "grad_norm": 0.43431344628334045, "learning_rate": 9.958064251254059e-06, "loss": 0.3325, "step": 3951 }, { "epoch": 0.18135927676563718, "grad_norm": 0.5602325201034546, "learning_rate": 9.958032556280338e-06, "loss": 0.5714, "step": 3952 }, { "epoch": 0.18140516727089165, "grad_norm": 0.49465030431747437, "learning_rate": 9.958000849384113e-06, "loss": 0.3943, "step": 3953 }, { "epoch": 0.18145105777614612, "grad_norm": 0.5021671056747437, "learning_rate": 9.957969130565462e-06, "loss": 0.5217, "step": 3954 }, { "epoch": 0.1814969482814006, "grad_norm": 0.4833013713359833, "learning_rate": 9.957937399824458e-06, "loss": 0.3885, "step": 3955 }, { "epoch": 0.18154283878665503, "grad_norm": 0.4508078396320343, "learning_rate": 9.957905657161182e-06, "loss": 0.4219, "step": 3956 }, { "epoch": 0.1815887292919095, "grad_norm": 0.47512227296829224, "learning_rate": 9.957873902575706e-06, "loss": 0.4352, "step": 3957 }, { "epoch": 0.18163461979716397, "grad_norm": 0.4449382722377777, "learning_rate": 9.95784213606811e-06, "loss": 0.3642, "step": 3958 }, { "epoch": 0.18168051030241844, "grad_norm": 0.4501861333847046, "learning_rate": 9.957810357638466e-06, "loss": 0.3856, "step": 3959 }, { "epoch": 0.18172640080767288, "grad_norm": 0.5231090784072876, "learning_rate": 9.957778567286853e-06, "loss": 0.4758, "step": 3960 }, { "epoch": 0.18177229131292735, "grad_norm": 0.4330017864704132, "learning_rate": 9.957746765013348e-06, "loss": 0.3635, "step": 3961 }, { "epoch": 0.18181818181818182, "grad_norm": 0.45830225944519043, "learning_rate": 9.957714950818027e-06, "loss": 0.4342, "step": 3962 }, { "epoch": 0.1818640723234363, "grad_norm": 0.44776225090026855, "learning_rate": 9.957683124700966e-06, "loss": 0.3411, "step": 3963 }, { "epoch": 0.18190996282869074, "grad_norm": 0.4855400025844574, "learning_rate": 9.957651286662242e-06, "loss": 0.4379, "step": 3964 }, { "epoch": 0.1819558533339452, "grad_norm": 0.459383487701416, "learning_rate": 9.95761943670193e-06, "loss": 0.3399, "step": 3965 }, { "epoch": 0.18200174383919968, "grad_norm": 0.45101475715637207, "learning_rate": 9.95758757482011e-06, "loss": 0.3845, "step": 3966 }, { "epoch": 0.18204763434445415, "grad_norm": 0.46671363711357117, "learning_rate": 9.957555701016855e-06, "loss": 0.4073, "step": 3967 }, { "epoch": 0.1820935248497086, "grad_norm": 0.4768611490726471, "learning_rate": 9.957523815292243e-06, "loss": 0.4268, "step": 3968 }, { "epoch": 0.18213941535496306, "grad_norm": 0.4484013319015503, "learning_rate": 9.957491917646353e-06, "loss": 0.3557, "step": 3969 }, { "epoch": 0.18218530586021753, "grad_norm": 0.5023492574691772, "learning_rate": 9.957460008079257e-06, "loss": 0.4171, "step": 3970 }, { "epoch": 0.18223119636547197, "grad_norm": 0.513933002948761, "learning_rate": 9.957428086591037e-06, "loss": 0.5274, "step": 3971 }, { "epoch": 0.18227708687072644, "grad_norm": 0.46191951632499695, "learning_rate": 9.957396153181764e-06, "loss": 0.4412, "step": 3972 }, { "epoch": 0.1823229773759809, "grad_norm": 0.46174636483192444, "learning_rate": 9.957364207851521e-06, "loss": 0.3979, "step": 3973 }, { "epoch": 0.18236886788123538, "grad_norm": 0.48360326886177063, "learning_rate": 9.95733225060038e-06, "loss": 0.4211, "step": 3974 }, { "epoch": 0.18241475838648982, "grad_norm": 0.4586115777492523, "learning_rate": 9.95730028142842e-06, "loss": 0.3738, "step": 3975 }, { "epoch": 0.1824606488917443, "grad_norm": 0.47462958097457886, "learning_rate": 9.957268300335716e-06, "loss": 0.4475, "step": 3976 }, { "epoch": 0.18250653939699876, "grad_norm": 0.4541427195072174, "learning_rate": 9.957236307322346e-06, "loss": 0.3834, "step": 3977 }, { "epoch": 0.18255242990225323, "grad_norm": 0.4755142033100128, "learning_rate": 9.95720430238839e-06, "loss": 0.463, "step": 3978 }, { "epoch": 0.18259832040750767, "grad_norm": 0.4314226806163788, "learning_rate": 9.95717228553392e-06, "loss": 0.3392, "step": 3979 }, { "epoch": 0.18264421091276214, "grad_norm": 0.4666334092617035, "learning_rate": 9.957140256759016e-06, "loss": 0.4551, "step": 3980 }, { "epoch": 0.18269010141801661, "grad_norm": 0.4947948455810547, "learning_rate": 9.957108216063755e-06, "loss": 0.4132, "step": 3981 }, { "epoch": 0.18273599192327108, "grad_norm": 0.48704156279563904, "learning_rate": 9.957076163448212e-06, "loss": 0.4595, "step": 3982 }, { "epoch": 0.18278188242852553, "grad_norm": 0.48995763063430786, "learning_rate": 9.957044098912466e-06, "loss": 0.4638, "step": 3983 }, { "epoch": 0.18282777293378, "grad_norm": 0.47832152247428894, "learning_rate": 9.957012022456591e-06, "loss": 0.4046, "step": 3984 }, { "epoch": 0.18287366343903447, "grad_norm": 0.5112481713294983, "learning_rate": 9.95697993408067e-06, "loss": 0.3786, "step": 3985 }, { "epoch": 0.18291955394428894, "grad_norm": 0.487212598323822, "learning_rate": 9.956947833784776e-06, "loss": 0.4781, "step": 3986 }, { "epoch": 0.18296544444954338, "grad_norm": 0.4842546284198761, "learning_rate": 9.956915721568984e-06, "loss": 0.4206, "step": 3987 }, { "epoch": 0.18301133495479785, "grad_norm": 0.4674060344696045, "learning_rate": 9.956883597433378e-06, "loss": 0.4173, "step": 3988 }, { "epoch": 0.18305722546005232, "grad_norm": 0.5002169609069824, "learning_rate": 9.95685146137803e-06, "loss": 0.4517, "step": 3989 }, { "epoch": 0.1831031159653068, "grad_norm": 0.49130091071128845, "learning_rate": 9.956819313403018e-06, "loss": 0.4897, "step": 3990 }, { "epoch": 0.18314900647056123, "grad_norm": 0.46109530329704285, "learning_rate": 9.95678715350842e-06, "loss": 0.3323, "step": 3991 }, { "epoch": 0.1831948969758157, "grad_norm": 0.48416006565093994, "learning_rate": 9.956754981694315e-06, "loss": 0.4193, "step": 3992 }, { "epoch": 0.18324078748107017, "grad_norm": 0.5197775959968567, "learning_rate": 9.956722797960775e-06, "loss": 0.4927, "step": 3993 }, { "epoch": 0.18328667798632464, "grad_norm": 0.4878934919834137, "learning_rate": 9.956690602307885e-06, "loss": 0.4042, "step": 3994 }, { "epoch": 0.18333256849157908, "grad_norm": 0.5160571932792664, "learning_rate": 9.956658394735716e-06, "loss": 0.4567, "step": 3995 }, { "epoch": 0.18337845899683355, "grad_norm": 0.4523058235645294, "learning_rate": 9.95662617524435e-06, "loss": 0.353, "step": 3996 }, { "epoch": 0.18342434950208802, "grad_norm": 0.4639807343482971, "learning_rate": 9.956593943833861e-06, "loss": 0.3531, "step": 3997 }, { "epoch": 0.1834702400073425, "grad_norm": 0.4653351902961731, "learning_rate": 9.956561700504329e-06, "loss": 0.3933, "step": 3998 }, { "epoch": 0.18351613051259694, "grad_norm": 0.4264218807220459, "learning_rate": 9.95652944525583e-06, "loss": 0.3293, "step": 3999 }, { "epoch": 0.1835620210178514, "grad_norm": 0.6798258423805237, "learning_rate": 9.95649717808844e-06, "loss": 0.4556, "step": 4000 }, { "epoch": 0.18360791152310588, "grad_norm": 0.4980906844139099, "learning_rate": 9.95646489900224e-06, "loss": 0.4259, "step": 4001 }, { "epoch": 0.18365380202836032, "grad_norm": 0.48687276244163513, "learning_rate": 9.956432607997308e-06, "loss": 0.4209, "step": 4002 }, { "epoch": 0.1836996925336148, "grad_norm": 0.5508676767349243, "learning_rate": 9.956400305073717e-06, "loss": 0.533, "step": 4003 }, { "epoch": 0.18374558303886926, "grad_norm": 0.4933052361011505, "learning_rate": 9.95636799023155e-06, "loss": 0.3412, "step": 4004 }, { "epoch": 0.18379147354412373, "grad_norm": 0.5059148073196411, "learning_rate": 9.956335663470881e-06, "loss": 0.4447, "step": 4005 }, { "epoch": 0.18383736404937817, "grad_norm": 0.5061053037643433, "learning_rate": 9.95630332479179e-06, "loss": 0.4818, "step": 4006 }, { "epoch": 0.18388325455463264, "grad_norm": 0.4864663779735565, "learning_rate": 9.956270974194352e-06, "loss": 0.3813, "step": 4007 }, { "epoch": 0.1839291450598871, "grad_norm": 0.4997391700744629, "learning_rate": 9.956238611678648e-06, "loss": 0.4266, "step": 4008 }, { "epoch": 0.18397503556514158, "grad_norm": 0.47904402017593384, "learning_rate": 9.956206237244753e-06, "loss": 0.3989, "step": 4009 }, { "epoch": 0.18402092607039602, "grad_norm": 0.5069399476051331, "learning_rate": 9.956173850892747e-06, "loss": 0.4303, "step": 4010 }, { "epoch": 0.1840668165756505, "grad_norm": 0.4637645184993744, "learning_rate": 9.956141452622708e-06, "loss": 0.4011, "step": 4011 }, { "epoch": 0.18411270708090496, "grad_norm": 0.46542516350746155, "learning_rate": 9.956109042434712e-06, "loss": 0.3793, "step": 4012 }, { "epoch": 0.18415859758615943, "grad_norm": 0.48267537355422974, "learning_rate": 9.956076620328837e-06, "loss": 0.4578, "step": 4013 }, { "epoch": 0.18420448809141388, "grad_norm": 0.4549073278903961, "learning_rate": 9.956044186305164e-06, "loss": 0.3814, "step": 4014 }, { "epoch": 0.18425037859666835, "grad_norm": 0.5556150674819946, "learning_rate": 9.956011740363767e-06, "loss": 0.5544, "step": 4015 }, { "epoch": 0.18429626910192282, "grad_norm": 0.514133870601654, "learning_rate": 9.955979282504728e-06, "loss": 0.4205, "step": 4016 }, { "epoch": 0.18434215960717729, "grad_norm": 0.42726820707321167, "learning_rate": 9.955946812728121e-06, "loss": 0.3124, "step": 4017 }, { "epoch": 0.18438805011243173, "grad_norm": 0.4487590193748474, "learning_rate": 9.955914331034027e-06, "loss": 0.3693, "step": 4018 }, { "epoch": 0.1844339406176862, "grad_norm": 0.460810124874115, "learning_rate": 9.955881837422524e-06, "loss": 0.4237, "step": 4019 }, { "epoch": 0.18447983112294067, "grad_norm": 0.5685785412788391, "learning_rate": 9.955849331893688e-06, "loss": 0.5322, "step": 4020 }, { "epoch": 0.18452572162819514, "grad_norm": 0.4468558430671692, "learning_rate": 9.955816814447599e-06, "loss": 0.3774, "step": 4021 }, { "epoch": 0.18457161213344958, "grad_norm": 0.5469991564750671, "learning_rate": 9.955784285084334e-06, "loss": 0.5207, "step": 4022 }, { "epoch": 0.18461750263870405, "grad_norm": 0.48401325941085815, "learning_rate": 9.955751743803971e-06, "loss": 0.3991, "step": 4023 }, { "epoch": 0.18466339314395852, "grad_norm": 0.49764013290405273, "learning_rate": 9.955719190606591e-06, "loss": 0.3764, "step": 4024 }, { "epoch": 0.184709283649213, "grad_norm": 0.4192434549331665, "learning_rate": 9.955686625492271e-06, "loss": 0.2729, "step": 4025 }, { "epoch": 0.18475517415446743, "grad_norm": 0.5076939463615417, "learning_rate": 9.955654048461087e-06, "loss": 0.4315, "step": 4026 }, { "epoch": 0.1848010646597219, "grad_norm": 0.4300815463066101, "learning_rate": 9.95562145951312e-06, "loss": 0.3584, "step": 4027 }, { "epoch": 0.18484695516497637, "grad_norm": 0.48613524436950684, "learning_rate": 9.955588858648446e-06, "loss": 0.4223, "step": 4028 }, { "epoch": 0.18489284567023082, "grad_norm": 0.44557058811187744, "learning_rate": 9.955556245867147e-06, "loss": 0.2963, "step": 4029 }, { "epoch": 0.18493873617548529, "grad_norm": 0.4597506821155548, "learning_rate": 9.955523621169299e-06, "loss": 0.3922, "step": 4030 }, { "epoch": 0.18498462668073976, "grad_norm": 0.5106930136680603, "learning_rate": 9.955490984554978e-06, "loss": 0.3743, "step": 4031 }, { "epoch": 0.18503051718599423, "grad_norm": 0.49481138586997986, "learning_rate": 9.955458336024269e-06, "loss": 0.4458, "step": 4032 }, { "epoch": 0.18507640769124867, "grad_norm": 0.4394301474094391, "learning_rate": 9.955425675577244e-06, "loss": 0.3203, "step": 4033 }, { "epoch": 0.18512229819650314, "grad_norm": 0.4792270064353943, "learning_rate": 9.955393003213984e-06, "loss": 0.3465, "step": 4034 }, { "epoch": 0.1851681887017576, "grad_norm": 0.50316321849823, "learning_rate": 9.955360318934571e-06, "loss": 0.4486, "step": 4035 }, { "epoch": 0.18521407920701208, "grad_norm": 0.4733767807483673, "learning_rate": 9.955327622739077e-06, "loss": 0.3962, "step": 4036 }, { "epoch": 0.18525996971226652, "grad_norm": 0.5152449011802673, "learning_rate": 9.955294914627584e-06, "loss": 0.3869, "step": 4037 }, { "epoch": 0.185305860217521, "grad_norm": 0.5111094117164612, "learning_rate": 9.955262194600172e-06, "loss": 0.4123, "step": 4038 }, { "epoch": 0.18535175072277546, "grad_norm": 0.5918365716934204, "learning_rate": 9.95522946265692e-06, "loss": 0.4543, "step": 4039 }, { "epoch": 0.18539764122802993, "grad_norm": 0.5309789776802063, "learning_rate": 9.955196718797902e-06, "loss": 0.513, "step": 4040 }, { "epoch": 0.18544353173328437, "grad_norm": 0.4646918475627899, "learning_rate": 9.955163963023203e-06, "loss": 0.3561, "step": 4041 }, { "epoch": 0.18548942223853884, "grad_norm": 0.44384002685546875, "learning_rate": 9.955131195332894e-06, "loss": 0.3161, "step": 4042 }, { "epoch": 0.1855353127437933, "grad_norm": 0.491471529006958, "learning_rate": 9.955098415727062e-06, "loss": 0.4716, "step": 4043 }, { "epoch": 0.18558120324904778, "grad_norm": 0.48043712973594666, "learning_rate": 9.955065624205782e-06, "loss": 0.429, "step": 4044 }, { "epoch": 0.18562709375430222, "grad_norm": 0.46590757369995117, "learning_rate": 9.955032820769132e-06, "loss": 0.3984, "step": 4045 }, { "epoch": 0.1856729842595567, "grad_norm": 0.45795169472694397, "learning_rate": 9.955000005417191e-06, "loss": 0.3536, "step": 4046 }, { "epoch": 0.18571887476481116, "grad_norm": 0.4461454153060913, "learning_rate": 9.954967178150041e-06, "loss": 0.3526, "step": 4047 }, { "epoch": 0.18576476527006563, "grad_norm": 0.4559987187385559, "learning_rate": 9.954934338967757e-06, "loss": 0.3503, "step": 4048 }, { "epoch": 0.18581065577532008, "grad_norm": 0.43757522106170654, "learning_rate": 9.954901487870421e-06, "loss": 0.3774, "step": 4049 }, { "epoch": 0.18585654628057455, "grad_norm": 0.44442030787467957, "learning_rate": 9.954868624858109e-06, "loss": 0.4096, "step": 4050 }, { "epoch": 0.18590243678582902, "grad_norm": 0.5150233507156372, "learning_rate": 9.954835749930905e-06, "loss": 0.4486, "step": 4051 }, { "epoch": 0.1859483272910835, "grad_norm": 0.4833717942237854, "learning_rate": 9.95480286308888e-06, "loss": 0.384, "step": 4052 }, { "epoch": 0.18599421779633793, "grad_norm": 0.49683383107185364, "learning_rate": 9.954769964332123e-06, "loss": 0.3969, "step": 4053 }, { "epoch": 0.1860401083015924, "grad_norm": 0.44091925024986267, "learning_rate": 9.954737053660705e-06, "loss": 0.3131, "step": 4054 }, { "epoch": 0.18608599880684687, "grad_norm": 0.6123804450035095, "learning_rate": 9.954704131074709e-06, "loss": 0.4667, "step": 4055 }, { "epoch": 0.18613188931210134, "grad_norm": 0.45483189821243286, "learning_rate": 9.954671196574213e-06, "loss": 0.3564, "step": 4056 }, { "epoch": 0.18617777981735578, "grad_norm": 0.5285971164703369, "learning_rate": 9.954638250159297e-06, "loss": 0.5051, "step": 4057 }, { "epoch": 0.18622367032261025, "grad_norm": 0.4830685257911682, "learning_rate": 9.95460529183004e-06, "loss": 0.3996, "step": 4058 }, { "epoch": 0.18626956082786472, "grad_norm": 0.4549676179885864, "learning_rate": 9.95457232158652e-06, "loss": 0.3661, "step": 4059 }, { "epoch": 0.18631545133311916, "grad_norm": 0.4777267575263977, "learning_rate": 9.954539339428817e-06, "loss": 0.3395, "step": 4060 }, { "epoch": 0.18636134183837363, "grad_norm": 0.44971832633018494, "learning_rate": 9.95450634535701e-06, "loss": 0.3558, "step": 4061 }, { "epoch": 0.1864072323436281, "grad_norm": 0.45441970229148865, "learning_rate": 9.954473339371181e-06, "loss": 0.335, "step": 4062 }, { "epoch": 0.18645312284888257, "grad_norm": 0.5144253969192505, "learning_rate": 9.954440321471408e-06, "loss": 0.4626, "step": 4063 }, { "epoch": 0.18649901335413702, "grad_norm": 0.48288124799728394, "learning_rate": 9.954407291657767e-06, "loss": 0.4476, "step": 4064 }, { "epoch": 0.1865449038593915, "grad_norm": 0.4835912585258484, "learning_rate": 9.95437424993034e-06, "loss": 0.3409, "step": 4065 }, { "epoch": 0.18659079436464596, "grad_norm": 0.4531983733177185, "learning_rate": 9.954341196289209e-06, "loss": 0.3627, "step": 4066 }, { "epoch": 0.18663668486990043, "grad_norm": 0.4809619188308716, "learning_rate": 9.95430813073445e-06, "loss": 0.3838, "step": 4067 }, { "epoch": 0.18668257537515487, "grad_norm": 0.47172215580940247, "learning_rate": 9.954275053266143e-06, "loss": 0.4215, "step": 4068 }, { "epoch": 0.18672846588040934, "grad_norm": 0.46933355927467346, "learning_rate": 9.954241963884369e-06, "loss": 0.3847, "step": 4069 }, { "epoch": 0.1867743563856638, "grad_norm": 0.44868549704551697, "learning_rate": 9.954208862589206e-06, "loss": 0.3685, "step": 4070 }, { "epoch": 0.18682024689091828, "grad_norm": 0.484976202249527, "learning_rate": 9.954175749380735e-06, "loss": 0.3832, "step": 4071 }, { "epoch": 0.18686613739617272, "grad_norm": 0.45270830392837524, "learning_rate": 9.954142624259035e-06, "loss": 0.324, "step": 4072 }, { "epoch": 0.1869120279014272, "grad_norm": 0.45380428433418274, "learning_rate": 9.954109487224185e-06, "loss": 0.3614, "step": 4073 }, { "epoch": 0.18695791840668166, "grad_norm": 0.5037969350814819, "learning_rate": 9.954076338276265e-06, "loss": 0.5168, "step": 4074 }, { "epoch": 0.18700380891193613, "grad_norm": 0.4648370146751404, "learning_rate": 9.954043177415355e-06, "loss": 0.3109, "step": 4075 }, { "epoch": 0.18704969941719057, "grad_norm": 0.4700336158275604, "learning_rate": 9.954010004641534e-06, "loss": 0.3497, "step": 4076 }, { "epoch": 0.18709558992244504, "grad_norm": 0.4684875011444092, "learning_rate": 9.953976819954885e-06, "loss": 0.3808, "step": 4077 }, { "epoch": 0.1871414804276995, "grad_norm": 0.4918420612812042, "learning_rate": 9.953943623355482e-06, "loss": 0.3775, "step": 4078 }, { "epoch": 0.18718737093295398, "grad_norm": 0.5494481921195984, "learning_rate": 9.95391041484341e-06, "loss": 0.4873, "step": 4079 }, { "epoch": 0.18723326143820843, "grad_norm": 0.479916512966156, "learning_rate": 9.953877194418747e-06, "loss": 0.3805, "step": 4080 }, { "epoch": 0.1872791519434629, "grad_norm": 0.4575951099395752, "learning_rate": 9.953843962081574e-06, "loss": 0.4386, "step": 4081 }, { "epoch": 0.18732504244871737, "grad_norm": 0.5033926963806152, "learning_rate": 9.953810717831966e-06, "loss": 0.4555, "step": 4082 }, { "epoch": 0.18737093295397184, "grad_norm": 0.46825897693634033, "learning_rate": 9.95377746167001e-06, "loss": 0.3692, "step": 4083 }, { "epoch": 0.18741682345922628, "grad_norm": 0.46704164147377014, "learning_rate": 9.953744193595782e-06, "loss": 0.3737, "step": 4084 }, { "epoch": 0.18746271396448075, "grad_norm": 0.4961448609828949, "learning_rate": 9.953710913609362e-06, "loss": 0.5158, "step": 4085 }, { "epoch": 0.18750860446973522, "grad_norm": 0.46118593215942383, "learning_rate": 9.953677621710831e-06, "loss": 0.3884, "step": 4086 }, { "epoch": 0.1875544949749897, "grad_norm": 0.47987639904022217, "learning_rate": 9.953644317900269e-06, "loss": 0.4246, "step": 4087 }, { "epoch": 0.18760038548024413, "grad_norm": 0.4968966245651245, "learning_rate": 9.953611002177755e-06, "loss": 0.3639, "step": 4088 }, { "epoch": 0.1876462759854986, "grad_norm": 0.4718018174171448, "learning_rate": 9.95357767454337e-06, "loss": 0.3913, "step": 4089 }, { "epoch": 0.18769216649075307, "grad_norm": 0.4618558883666992, "learning_rate": 9.953544334997196e-06, "loss": 0.3679, "step": 4090 }, { "epoch": 0.1877380569960075, "grad_norm": 0.5178093314170837, "learning_rate": 9.95351098353931e-06, "loss": 0.4624, "step": 4091 }, { "epoch": 0.18778394750126198, "grad_norm": 0.44858354330062866, "learning_rate": 9.953477620169793e-06, "loss": 0.3908, "step": 4092 }, { "epoch": 0.18782983800651645, "grad_norm": 0.4788037836551666, "learning_rate": 9.953444244888725e-06, "loss": 0.4495, "step": 4093 }, { "epoch": 0.18787572851177092, "grad_norm": 0.4847019612789154, "learning_rate": 9.953410857696188e-06, "loss": 0.4005, "step": 4094 }, { "epoch": 0.18792161901702537, "grad_norm": 0.530505359172821, "learning_rate": 9.953377458592261e-06, "loss": 0.5364, "step": 4095 }, { "epoch": 0.18796750952227984, "grad_norm": 0.4931606948375702, "learning_rate": 9.953344047577025e-06, "loss": 0.4736, "step": 4096 }, { "epoch": 0.1880134000275343, "grad_norm": 0.4111191928386688, "learning_rate": 9.95331062465056e-06, "loss": 0.2957, "step": 4097 }, { "epoch": 0.18805929053278878, "grad_norm": 0.43773943185806274, "learning_rate": 9.953277189812945e-06, "loss": 0.3478, "step": 4098 }, { "epoch": 0.18810518103804322, "grad_norm": 0.4870125353336334, "learning_rate": 9.953243743064263e-06, "loss": 0.3839, "step": 4099 }, { "epoch": 0.1881510715432977, "grad_norm": 0.4274459779262543, "learning_rate": 9.953210284404592e-06, "loss": 0.3513, "step": 4100 }, { "epoch": 0.18819696204855216, "grad_norm": 0.49045252799987793, "learning_rate": 9.953176813834015e-06, "loss": 0.4276, "step": 4101 }, { "epoch": 0.18824285255380663, "grad_norm": 0.5296708941459656, "learning_rate": 9.95314333135261e-06, "loss": 0.4135, "step": 4102 }, { "epoch": 0.18828874305906107, "grad_norm": 0.5185618996620178, "learning_rate": 9.95310983696046e-06, "loss": 0.4544, "step": 4103 }, { "epoch": 0.18833463356431554, "grad_norm": 0.4736163318157196, "learning_rate": 9.953076330657641e-06, "loss": 0.3828, "step": 4104 }, { "epoch": 0.18838052406957, "grad_norm": 0.4864441454410553, "learning_rate": 9.95304281244424e-06, "loss": 0.4292, "step": 4105 }, { "epoch": 0.18842641457482448, "grad_norm": 0.4908764958381653, "learning_rate": 9.953009282320332e-06, "loss": 0.4628, "step": 4106 }, { "epoch": 0.18847230508007892, "grad_norm": 0.44502270221710205, "learning_rate": 9.952975740286e-06, "loss": 0.4036, "step": 4107 }, { "epoch": 0.1885181955853334, "grad_norm": 0.5089215040206909, "learning_rate": 9.952942186341325e-06, "loss": 0.5021, "step": 4108 }, { "epoch": 0.18856408609058786, "grad_norm": 0.4454263150691986, "learning_rate": 9.952908620486387e-06, "loss": 0.3538, "step": 4109 }, { "epoch": 0.18860997659584233, "grad_norm": 0.46301963925361633, "learning_rate": 9.952875042721268e-06, "loss": 0.3976, "step": 4110 }, { "epoch": 0.18865586710109677, "grad_norm": 0.4358971118927002, "learning_rate": 9.952841453046047e-06, "loss": 0.3036, "step": 4111 }, { "epoch": 0.18870175760635124, "grad_norm": 0.47368836402893066, "learning_rate": 9.952807851460807e-06, "loss": 0.4217, "step": 4112 }, { "epoch": 0.18874764811160571, "grad_norm": 0.550803542137146, "learning_rate": 9.952774237965624e-06, "loss": 0.5123, "step": 4113 }, { "epoch": 0.18879353861686018, "grad_norm": 0.4942846894264221, "learning_rate": 9.952740612560585e-06, "loss": 0.4708, "step": 4114 }, { "epoch": 0.18883942912211463, "grad_norm": 0.47420063614845276, "learning_rate": 9.952706975245767e-06, "loss": 0.3872, "step": 4115 }, { "epoch": 0.1888853196273691, "grad_norm": 0.5260906219482422, "learning_rate": 9.952673326021252e-06, "loss": 0.4132, "step": 4116 }, { "epoch": 0.18893121013262357, "grad_norm": 0.6215383410453796, "learning_rate": 9.95263966488712e-06, "loss": 0.4234, "step": 4117 }, { "epoch": 0.188977100637878, "grad_norm": 0.5035076141357422, "learning_rate": 9.952605991843452e-06, "loss": 0.4147, "step": 4118 }, { "epoch": 0.18902299114313248, "grad_norm": 0.4822065532207489, "learning_rate": 9.952572306890332e-06, "loss": 0.3967, "step": 4119 }, { "epoch": 0.18906888164838695, "grad_norm": 0.5079449415206909, "learning_rate": 9.952538610027837e-06, "loss": 0.4548, "step": 4120 }, { "epoch": 0.18911477215364142, "grad_norm": 0.42507266998291016, "learning_rate": 9.95250490125605e-06, "loss": 0.3327, "step": 4121 }, { "epoch": 0.18916066265889586, "grad_norm": 0.49618425965309143, "learning_rate": 9.952471180575052e-06, "loss": 0.4404, "step": 4122 }, { "epoch": 0.18920655316415033, "grad_norm": 0.5208756923675537, "learning_rate": 9.952437447984924e-06, "loss": 0.4517, "step": 4123 }, { "epoch": 0.1892524436694048, "grad_norm": 0.5111962556838989, "learning_rate": 9.952403703485746e-06, "loss": 0.4436, "step": 4124 }, { "epoch": 0.18929833417465927, "grad_norm": 0.43555372953414917, "learning_rate": 9.952369947077603e-06, "loss": 0.3401, "step": 4125 }, { "epoch": 0.18934422467991371, "grad_norm": 0.493084579706192, "learning_rate": 9.95233617876057e-06, "loss": 0.4378, "step": 4126 }, { "epoch": 0.18939011518516818, "grad_norm": 0.490662544965744, "learning_rate": 9.952302398534733e-06, "loss": 0.4177, "step": 4127 }, { "epoch": 0.18943600569042265, "grad_norm": 0.4406987130641937, "learning_rate": 9.95226860640017e-06, "loss": 0.364, "step": 4128 }, { "epoch": 0.18948189619567712, "grad_norm": 0.47129300236701965, "learning_rate": 9.952234802356966e-06, "loss": 0.3863, "step": 4129 }, { "epoch": 0.18952778670093157, "grad_norm": 0.5045755505561829, "learning_rate": 9.952200986405201e-06, "loss": 0.3848, "step": 4130 }, { "epoch": 0.18957367720618604, "grad_norm": 0.48028242588043213, "learning_rate": 9.952167158544955e-06, "loss": 0.4149, "step": 4131 }, { "epoch": 0.1896195677114405, "grad_norm": 0.5005995631217957, "learning_rate": 9.952133318776307e-06, "loss": 0.4154, "step": 4132 }, { "epoch": 0.18966545821669498, "grad_norm": 0.45134297013282776, "learning_rate": 9.952099467099344e-06, "loss": 0.3423, "step": 4133 }, { "epoch": 0.18971134872194942, "grad_norm": 0.483934223651886, "learning_rate": 9.952065603514145e-06, "loss": 0.5167, "step": 4134 }, { "epoch": 0.1897572392272039, "grad_norm": 0.45987600088119507, "learning_rate": 9.95203172802079e-06, "loss": 0.3426, "step": 4135 }, { "epoch": 0.18980312973245836, "grad_norm": 0.4913298785686493, "learning_rate": 9.951997840619362e-06, "loss": 0.4623, "step": 4136 }, { "epoch": 0.18984902023771283, "grad_norm": 0.46920305490493774, "learning_rate": 9.951963941309941e-06, "loss": 0.3779, "step": 4137 }, { "epoch": 0.18989491074296727, "grad_norm": 0.4733751714229584, "learning_rate": 9.951930030092611e-06, "loss": 0.3656, "step": 4138 }, { "epoch": 0.18994080124822174, "grad_norm": 0.49268674850463867, "learning_rate": 9.951896106967452e-06, "loss": 0.3912, "step": 4139 }, { "epoch": 0.1899866917534762, "grad_norm": 0.4943540096282959, "learning_rate": 9.951862171934545e-06, "loss": 0.4087, "step": 4140 }, { "epoch": 0.19003258225873068, "grad_norm": 0.4985579252243042, "learning_rate": 9.951828224993974e-06, "loss": 0.4165, "step": 4141 }, { "epoch": 0.19007847276398512, "grad_norm": 0.5130932331085205, "learning_rate": 9.951794266145818e-06, "loss": 0.4843, "step": 4142 }, { "epoch": 0.1901243632692396, "grad_norm": 0.5205416679382324, "learning_rate": 9.951760295390158e-06, "loss": 0.4239, "step": 4143 }, { "epoch": 0.19017025377449406, "grad_norm": 0.48263564705848694, "learning_rate": 9.95172631272708e-06, "loss": 0.3825, "step": 4144 }, { "epoch": 0.19021614427974853, "grad_norm": 0.47723209857940674, "learning_rate": 9.951692318156662e-06, "loss": 0.4286, "step": 4145 }, { "epoch": 0.19026203478500298, "grad_norm": 0.4813656210899353, "learning_rate": 9.951658311678987e-06, "loss": 0.3568, "step": 4146 }, { "epoch": 0.19030792529025745, "grad_norm": 0.5103087425231934, "learning_rate": 9.951624293294135e-06, "loss": 0.4905, "step": 4147 }, { "epoch": 0.19035381579551192, "grad_norm": 0.5263637900352478, "learning_rate": 9.951590263002191e-06, "loss": 0.4505, "step": 4148 }, { "epoch": 0.19039970630076636, "grad_norm": 0.46938657760620117, "learning_rate": 9.951556220803233e-06, "loss": 0.437, "step": 4149 }, { "epoch": 0.19044559680602083, "grad_norm": 0.5023797750473022, "learning_rate": 9.951522166697347e-06, "loss": 0.4919, "step": 4150 }, { "epoch": 0.1904914873112753, "grad_norm": 0.5201046466827393, "learning_rate": 9.951488100684613e-06, "loss": 0.4952, "step": 4151 }, { "epoch": 0.19053737781652977, "grad_norm": 0.4829038381576538, "learning_rate": 9.951454022765112e-06, "loss": 0.3966, "step": 4152 }, { "epoch": 0.1905832683217842, "grad_norm": 0.4435252547264099, "learning_rate": 9.951419932938928e-06, "loss": 0.3429, "step": 4153 }, { "epoch": 0.19062915882703868, "grad_norm": 0.4686864912509918, "learning_rate": 9.951385831206142e-06, "loss": 0.4335, "step": 4154 }, { "epoch": 0.19067504933229315, "grad_norm": 0.4584447145462036, "learning_rate": 9.951351717566835e-06, "loss": 0.3807, "step": 4155 }, { "epoch": 0.19072093983754762, "grad_norm": 0.46828722953796387, "learning_rate": 9.951317592021089e-06, "loss": 0.3829, "step": 4156 }, { "epoch": 0.19076683034280206, "grad_norm": 0.5308449268341064, "learning_rate": 9.951283454568987e-06, "loss": 0.4734, "step": 4157 }, { "epoch": 0.19081272084805653, "grad_norm": 0.4835406243801117, "learning_rate": 9.951249305210612e-06, "loss": 0.3929, "step": 4158 }, { "epoch": 0.190858611353311, "grad_norm": 0.4220728874206543, "learning_rate": 9.951215143946044e-06, "loss": 0.3082, "step": 4159 }, { "epoch": 0.19090450185856547, "grad_norm": 0.47217854857444763, "learning_rate": 9.951180970775366e-06, "loss": 0.356, "step": 4160 }, { "epoch": 0.19095039236381992, "grad_norm": 0.5041155815124512, "learning_rate": 9.951146785698663e-06, "loss": 0.4472, "step": 4161 }, { "epoch": 0.19099628286907439, "grad_norm": 0.49931737780570984, "learning_rate": 9.951112588716013e-06, "loss": 0.4941, "step": 4162 }, { "epoch": 0.19104217337432886, "grad_norm": 0.4517405331134796, "learning_rate": 9.951078379827499e-06, "loss": 0.3641, "step": 4163 }, { "epoch": 0.19108806387958333, "grad_norm": 0.5002760887145996, "learning_rate": 9.951044159033204e-06, "loss": 0.3661, "step": 4164 }, { "epoch": 0.19113395438483777, "grad_norm": 0.493163526058197, "learning_rate": 9.951009926333212e-06, "loss": 0.4541, "step": 4165 }, { "epoch": 0.19117984489009224, "grad_norm": 0.4570077955722809, "learning_rate": 9.950975681727602e-06, "loss": 0.3872, "step": 4166 }, { "epoch": 0.1912257353953467, "grad_norm": 0.4414304494857788, "learning_rate": 9.950941425216459e-06, "loss": 0.3718, "step": 4167 }, { "epoch": 0.19127162590060118, "grad_norm": 0.4730222523212433, "learning_rate": 9.950907156799865e-06, "loss": 0.3382, "step": 4168 }, { "epoch": 0.19131751640585562, "grad_norm": 0.4675235450267792, "learning_rate": 9.950872876477901e-06, "loss": 0.4547, "step": 4169 }, { "epoch": 0.1913634069111101, "grad_norm": 0.4699147939682007, "learning_rate": 9.95083858425065e-06, "loss": 0.3982, "step": 4170 }, { "epoch": 0.19140929741636456, "grad_norm": 0.4556610584259033, "learning_rate": 9.950804280118196e-06, "loss": 0.3722, "step": 4171 }, { "epoch": 0.19145518792161903, "grad_norm": 0.46198368072509766, "learning_rate": 9.950769964080618e-06, "loss": 0.4223, "step": 4172 }, { "epoch": 0.19150107842687347, "grad_norm": 0.5001542568206787, "learning_rate": 9.950735636138003e-06, "loss": 0.4736, "step": 4173 }, { "epoch": 0.19154696893212794, "grad_norm": 0.5015148520469666, "learning_rate": 9.95070129629043e-06, "loss": 0.3649, "step": 4174 }, { "epoch": 0.1915928594373824, "grad_norm": 0.5021859407424927, "learning_rate": 9.950666944537983e-06, "loss": 0.5189, "step": 4175 }, { "epoch": 0.19163874994263685, "grad_norm": 0.4707670509815216, "learning_rate": 9.950632580880745e-06, "loss": 0.4479, "step": 4176 }, { "epoch": 0.19168464044789132, "grad_norm": 0.5313079953193665, "learning_rate": 9.950598205318798e-06, "loss": 0.4702, "step": 4177 }, { "epoch": 0.1917305309531458, "grad_norm": 0.5079951286315918, "learning_rate": 9.950563817852225e-06, "loss": 0.5436, "step": 4178 }, { "epoch": 0.19177642145840026, "grad_norm": 0.49826809763908386, "learning_rate": 9.950529418481107e-06, "loss": 0.505, "step": 4179 }, { "epoch": 0.1918223119636547, "grad_norm": 0.4891725480556488, "learning_rate": 9.95049500720553e-06, "loss": 0.4788, "step": 4180 }, { "epoch": 0.19186820246890918, "grad_norm": 0.5809705257415771, "learning_rate": 9.950460584025574e-06, "loss": 0.4472, "step": 4181 }, { "epoch": 0.19191409297416365, "grad_norm": 0.48403993248939514, "learning_rate": 9.950426148941323e-06, "loss": 0.3733, "step": 4182 }, { "epoch": 0.19195998347941812, "grad_norm": 0.6793543100357056, "learning_rate": 9.95039170195286e-06, "loss": 0.4737, "step": 4183 }, { "epoch": 0.19200587398467256, "grad_norm": 0.4881032705307007, "learning_rate": 9.950357243060266e-06, "loss": 0.4271, "step": 4184 }, { "epoch": 0.19205176448992703, "grad_norm": 0.4615977704524994, "learning_rate": 9.950322772263627e-06, "loss": 0.417, "step": 4185 }, { "epoch": 0.1920976549951815, "grad_norm": 0.47727513313293457, "learning_rate": 9.950288289563023e-06, "loss": 0.4553, "step": 4186 }, { "epoch": 0.19214354550043597, "grad_norm": 0.5171818137168884, "learning_rate": 9.950253794958537e-06, "loss": 0.4944, "step": 4187 }, { "epoch": 0.1921894360056904, "grad_norm": 0.4399084746837616, "learning_rate": 9.950219288450255e-06, "loss": 0.2992, "step": 4188 }, { "epoch": 0.19223532651094488, "grad_norm": 0.47041231393814087, "learning_rate": 9.950184770038257e-06, "loss": 0.4007, "step": 4189 }, { "epoch": 0.19228121701619935, "grad_norm": 0.44988352060317993, "learning_rate": 9.950150239722626e-06, "loss": 0.3841, "step": 4190 }, { "epoch": 0.19232710752145382, "grad_norm": 0.5435940027236938, "learning_rate": 9.950115697503449e-06, "loss": 0.5663, "step": 4191 }, { "epoch": 0.19237299802670826, "grad_norm": 0.555151104927063, "learning_rate": 9.950081143380804e-06, "loss": 0.4795, "step": 4192 }, { "epoch": 0.19241888853196273, "grad_norm": 0.4963894486427307, "learning_rate": 9.950046577354774e-06, "loss": 0.4438, "step": 4193 }, { "epoch": 0.1924647790372172, "grad_norm": 0.46585705876350403, "learning_rate": 9.950011999425448e-06, "loss": 0.3749, "step": 4194 }, { "epoch": 0.19251066954247167, "grad_norm": 0.43702268600463867, "learning_rate": 9.949977409592904e-06, "loss": 0.3037, "step": 4195 }, { "epoch": 0.19255656004772612, "grad_norm": 0.5150640606880188, "learning_rate": 9.949942807857225e-06, "loss": 0.4286, "step": 4196 }, { "epoch": 0.1926024505529806, "grad_norm": 0.512639582157135, "learning_rate": 9.949908194218498e-06, "loss": 0.4699, "step": 4197 }, { "epoch": 0.19264834105823506, "grad_norm": 0.49953705072402954, "learning_rate": 9.949873568676801e-06, "loss": 0.4439, "step": 4198 }, { "epoch": 0.19269423156348953, "grad_norm": 0.49101316928863525, "learning_rate": 9.949838931232225e-06, "loss": 0.4182, "step": 4199 }, { "epoch": 0.19274012206874397, "grad_norm": 0.5629458427429199, "learning_rate": 9.949804281884845e-06, "loss": 0.5942, "step": 4200 }, { "epoch": 0.19278601257399844, "grad_norm": 0.46506381034851074, "learning_rate": 9.94976962063475e-06, "loss": 0.3641, "step": 4201 }, { "epoch": 0.1928319030792529, "grad_norm": 0.46575990319252014, "learning_rate": 9.949734947482019e-06, "loss": 0.3955, "step": 4202 }, { "epoch": 0.19287779358450738, "grad_norm": 0.4566187858581543, "learning_rate": 9.94970026242674e-06, "loss": 0.3614, "step": 4203 }, { "epoch": 0.19292368408976182, "grad_norm": 0.5021764039993286, "learning_rate": 9.949665565468994e-06, "loss": 0.4061, "step": 4204 }, { "epoch": 0.1929695745950163, "grad_norm": 0.5028994083404541, "learning_rate": 9.949630856608862e-06, "loss": 0.4135, "step": 4205 }, { "epoch": 0.19301546510027076, "grad_norm": 0.48667213320732117, "learning_rate": 9.94959613584643e-06, "loss": 0.4098, "step": 4206 }, { "epoch": 0.1930613556055252, "grad_norm": 0.45400986075401306, "learning_rate": 9.949561403181785e-06, "loss": 0.3744, "step": 4207 }, { "epoch": 0.19310724611077967, "grad_norm": 0.45983439683914185, "learning_rate": 9.949526658615004e-06, "loss": 0.4092, "step": 4208 }, { "epoch": 0.19315313661603414, "grad_norm": 0.49159324169158936, "learning_rate": 9.949491902146175e-06, "loss": 0.4313, "step": 4209 }, { "epoch": 0.1931990271212886, "grad_norm": 0.46645256876945496, "learning_rate": 9.949457133775379e-06, "loss": 0.4934, "step": 4210 }, { "epoch": 0.19324491762654306, "grad_norm": 0.4616362154483795, "learning_rate": 9.9494223535027e-06, "loss": 0.4111, "step": 4211 }, { "epoch": 0.19329080813179753, "grad_norm": 0.45995163917541504, "learning_rate": 9.949387561328224e-06, "loss": 0.4165, "step": 4212 }, { "epoch": 0.193336698637052, "grad_norm": 0.607218861579895, "learning_rate": 9.949352757252033e-06, "loss": 0.3802, "step": 4213 }, { "epoch": 0.19338258914230647, "grad_norm": 0.5580732226371765, "learning_rate": 9.949317941274209e-06, "loss": 0.5255, "step": 4214 }, { "epoch": 0.1934284796475609, "grad_norm": 0.4484957754611969, "learning_rate": 9.949283113394838e-06, "loss": 0.3946, "step": 4215 }, { "epoch": 0.19347437015281538, "grad_norm": 0.44185206294059753, "learning_rate": 9.949248273614005e-06, "loss": 0.3651, "step": 4216 }, { "epoch": 0.19352026065806985, "grad_norm": 0.4761306047439575, "learning_rate": 9.949213421931789e-06, "loss": 0.4186, "step": 4217 }, { "epoch": 0.19356615116332432, "grad_norm": 0.47021451592445374, "learning_rate": 9.949178558348279e-06, "loss": 0.3737, "step": 4218 }, { "epoch": 0.19361204166857876, "grad_norm": 0.48406535387039185, "learning_rate": 9.949143682863556e-06, "loss": 0.4257, "step": 4219 }, { "epoch": 0.19365793217383323, "grad_norm": 0.4199129343032837, "learning_rate": 9.949108795477704e-06, "loss": 0.287, "step": 4220 }, { "epoch": 0.1937038226790877, "grad_norm": 0.46197354793548584, "learning_rate": 9.949073896190808e-06, "loss": 0.3748, "step": 4221 }, { "epoch": 0.19374971318434217, "grad_norm": 0.47738486528396606, "learning_rate": 9.94903898500295e-06, "loss": 0.4203, "step": 4222 }, { "epoch": 0.1937956036895966, "grad_norm": 0.49528124928474426, "learning_rate": 9.949004061914216e-06, "loss": 0.4721, "step": 4223 }, { "epoch": 0.19384149419485108, "grad_norm": 0.4897059500217438, "learning_rate": 9.94896912692469e-06, "loss": 0.4441, "step": 4224 }, { "epoch": 0.19388738470010555, "grad_norm": 0.4783026874065399, "learning_rate": 9.948934180034453e-06, "loss": 0.4509, "step": 4225 }, { "epoch": 0.19393327520536002, "grad_norm": 0.47514715790748596, "learning_rate": 9.948899221243592e-06, "loss": 0.4035, "step": 4226 }, { "epoch": 0.19397916571061447, "grad_norm": 0.4776126444339752, "learning_rate": 9.948864250552192e-06, "loss": 0.3831, "step": 4227 }, { "epoch": 0.19402505621586894, "grad_norm": 0.49811041355133057, "learning_rate": 9.948829267960335e-06, "loss": 0.3906, "step": 4228 }, { "epoch": 0.1940709467211234, "grad_norm": 0.47314468026161194, "learning_rate": 9.948794273468105e-06, "loss": 0.416, "step": 4229 }, { "epoch": 0.19411683722637788, "grad_norm": 0.4896462857723236, "learning_rate": 9.948759267075585e-06, "loss": 0.4495, "step": 4230 }, { "epoch": 0.19416272773163232, "grad_norm": 0.45454174280166626, "learning_rate": 9.948724248782863e-06, "loss": 0.4122, "step": 4231 }, { "epoch": 0.1942086182368868, "grad_norm": 0.44305551052093506, "learning_rate": 9.94868921859002e-06, "loss": 0.3236, "step": 4232 }, { "epoch": 0.19425450874214126, "grad_norm": 0.4922850430011749, "learning_rate": 9.948654176497142e-06, "loss": 0.4557, "step": 4233 }, { "epoch": 0.19430039924739573, "grad_norm": 0.47722384333610535, "learning_rate": 9.948619122504312e-06, "loss": 0.4404, "step": 4234 }, { "epoch": 0.19434628975265017, "grad_norm": 0.4752442538738251, "learning_rate": 9.948584056611616e-06, "loss": 0.4523, "step": 4235 }, { "epoch": 0.19439218025790464, "grad_norm": 0.42401692271232605, "learning_rate": 9.948548978819135e-06, "loss": 0.3058, "step": 4236 }, { "epoch": 0.1944380707631591, "grad_norm": 0.4936096668243408, "learning_rate": 9.948513889126956e-06, "loss": 0.4493, "step": 4237 }, { "epoch": 0.19448396126841355, "grad_norm": 0.5123659372329712, "learning_rate": 9.948478787535166e-06, "loss": 0.473, "step": 4238 }, { "epoch": 0.19452985177366802, "grad_norm": 0.4875311851501465, "learning_rate": 9.948443674043841e-06, "loss": 0.4571, "step": 4239 }, { "epoch": 0.1945757422789225, "grad_norm": 0.430096834897995, "learning_rate": 9.948408548653074e-06, "loss": 0.3582, "step": 4240 }, { "epoch": 0.19462163278417696, "grad_norm": 0.5173108577728271, "learning_rate": 9.948373411362946e-06, "loss": 0.5267, "step": 4241 }, { "epoch": 0.1946675232894314, "grad_norm": 0.48662590980529785, "learning_rate": 9.948338262173543e-06, "loss": 0.467, "step": 4242 }, { "epoch": 0.19471341379468587, "grad_norm": 0.4564754068851471, "learning_rate": 9.948303101084947e-06, "loss": 0.3897, "step": 4243 }, { "epoch": 0.19475930429994034, "grad_norm": 0.4806521236896515, "learning_rate": 9.948267928097243e-06, "loss": 0.4413, "step": 4244 }, { "epoch": 0.19480519480519481, "grad_norm": 0.45602428913116455, "learning_rate": 9.948232743210516e-06, "loss": 0.385, "step": 4245 }, { "epoch": 0.19485108531044926, "grad_norm": 0.5353230834007263, "learning_rate": 9.948197546424851e-06, "loss": 0.4468, "step": 4246 }, { "epoch": 0.19489697581570373, "grad_norm": 0.4799993634223938, "learning_rate": 9.948162337740334e-06, "loss": 0.3978, "step": 4247 }, { "epoch": 0.1949428663209582, "grad_norm": 0.44750842452049255, "learning_rate": 9.948127117157048e-06, "loss": 0.39, "step": 4248 }, { "epoch": 0.19498875682621267, "grad_norm": 0.43253499269485474, "learning_rate": 9.948091884675077e-06, "loss": 0.3331, "step": 4249 }, { "epoch": 0.1950346473314671, "grad_norm": 0.47233980894088745, "learning_rate": 9.948056640294507e-06, "loss": 0.4055, "step": 4250 }, { "epoch": 0.19508053783672158, "grad_norm": 0.4375777542591095, "learning_rate": 9.948021384015424e-06, "loss": 0.3446, "step": 4251 }, { "epoch": 0.19512642834197605, "grad_norm": 0.5303660035133362, "learning_rate": 9.94798611583791e-06, "loss": 0.4573, "step": 4252 }, { "epoch": 0.19517231884723052, "grad_norm": 0.46682602167129517, "learning_rate": 9.947950835762051e-06, "loss": 0.4336, "step": 4253 }, { "epoch": 0.19521820935248496, "grad_norm": 0.48022106289863586, "learning_rate": 9.947915543787931e-06, "loss": 0.4261, "step": 4254 }, { "epoch": 0.19526409985773943, "grad_norm": 0.47179487347602844, "learning_rate": 9.947880239915637e-06, "loss": 0.474, "step": 4255 }, { "epoch": 0.1953099903629939, "grad_norm": 0.4498111307621002, "learning_rate": 9.947844924145253e-06, "loss": 0.367, "step": 4256 }, { "epoch": 0.19535588086824837, "grad_norm": 0.44921138882637024, "learning_rate": 9.947809596476862e-06, "loss": 0.3785, "step": 4257 }, { "epoch": 0.19540177137350281, "grad_norm": 0.47249871492385864, "learning_rate": 9.947774256910552e-06, "loss": 0.4707, "step": 4258 }, { "epoch": 0.19544766187875728, "grad_norm": 0.7527539134025574, "learning_rate": 9.947738905446405e-06, "loss": 0.4634, "step": 4259 }, { "epoch": 0.19549355238401175, "grad_norm": 0.4666089713573456, "learning_rate": 9.947703542084508e-06, "loss": 0.3485, "step": 4260 }, { "epoch": 0.19553944288926622, "grad_norm": 0.5009175539016724, "learning_rate": 9.947668166824947e-06, "loss": 0.4518, "step": 4261 }, { "epoch": 0.19558533339452067, "grad_norm": 0.5116076469421387, "learning_rate": 9.947632779667802e-06, "loss": 0.491, "step": 4262 }, { "epoch": 0.19563122389977514, "grad_norm": 0.4593135118484497, "learning_rate": 9.947597380613165e-06, "loss": 0.3643, "step": 4263 }, { "epoch": 0.1956771144050296, "grad_norm": 0.47461822628974915, "learning_rate": 9.947561969661117e-06, "loss": 0.4328, "step": 4264 }, { "epoch": 0.19572300491028405, "grad_norm": 0.47299692034721375, "learning_rate": 9.947526546811743e-06, "loss": 0.4174, "step": 4265 }, { "epoch": 0.19576889541553852, "grad_norm": 0.45413699746131897, "learning_rate": 9.94749111206513e-06, "loss": 0.3684, "step": 4266 }, { "epoch": 0.195814785920793, "grad_norm": 0.5074641704559326, "learning_rate": 9.947455665421361e-06, "loss": 0.3983, "step": 4267 }, { "epoch": 0.19586067642604746, "grad_norm": 0.535548746585846, "learning_rate": 9.947420206880525e-06, "loss": 0.5404, "step": 4268 }, { "epoch": 0.1959065669313019, "grad_norm": 0.44411972165107727, "learning_rate": 9.947384736442705e-06, "loss": 0.3346, "step": 4269 }, { "epoch": 0.19595245743655637, "grad_norm": 0.5077770948410034, "learning_rate": 9.947349254107983e-06, "loss": 0.476, "step": 4270 }, { "epoch": 0.19599834794181084, "grad_norm": 0.48464199900627136, "learning_rate": 9.94731375987645e-06, "loss": 0.4549, "step": 4271 }, { "epoch": 0.1960442384470653, "grad_norm": 0.42790719866752625, "learning_rate": 9.947278253748188e-06, "loss": 0.3358, "step": 4272 }, { "epoch": 0.19609012895231975, "grad_norm": 0.48634856939315796, "learning_rate": 9.947242735723284e-06, "loss": 0.3404, "step": 4273 }, { "epoch": 0.19613601945757422, "grad_norm": 0.5001307725906372, "learning_rate": 9.947207205801821e-06, "loss": 0.4408, "step": 4274 }, { "epoch": 0.1961819099628287, "grad_norm": 0.5179278254508972, "learning_rate": 9.947171663983889e-06, "loss": 0.4575, "step": 4275 }, { "epoch": 0.19622780046808316, "grad_norm": 0.4798543453216553, "learning_rate": 9.947136110269566e-06, "loss": 0.4852, "step": 4276 }, { "epoch": 0.1962736909733376, "grad_norm": 0.4629528224468231, "learning_rate": 9.947100544658944e-06, "loss": 0.4064, "step": 4277 }, { "epoch": 0.19631958147859208, "grad_norm": 0.5005196928977966, "learning_rate": 9.947064967152107e-06, "loss": 0.4438, "step": 4278 }, { "epoch": 0.19636547198384655, "grad_norm": 0.5028572678565979, "learning_rate": 9.947029377749141e-06, "loss": 0.4699, "step": 4279 }, { "epoch": 0.19641136248910102, "grad_norm": 0.5163561105728149, "learning_rate": 9.94699377645013e-06, "loss": 0.457, "step": 4280 }, { "epoch": 0.19645725299435546, "grad_norm": 0.45968979597091675, "learning_rate": 9.94695816325516e-06, "loss": 0.4245, "step": 4281 }, { "epoch": 0.19650314349960993, "grad_norm": 0.42907556891441345, "learning_rate": 9.946922538164316e-06, "loss": 0.3038, "step": 4282 }, { "epoch": 0.1965490340048644, "grad_norm": 0.46068835258483887, "learning_rate": 9.946886901177687e-06, "loss": 0.4054, "step": 4283 }, { "epoch": 0.19659492451011887, "grad_norm": 0.4411108195781708, "learning_rate": 9.946851252295356e-06, "loss": 0.358, "step": 4284 }, { "epoch": 0.1966408150153733, "grad_norm": 0.44611024856567383, "learning_rate": 9.946815591517407e-06, "loss": 0.373, "step": 4285 }, { "epoch": 0.19668670552062778, "grad_norm": 0.40401241183280945, "learning_rate": 9.946779918843928e-06, "loss": 0.2614, "step": 4286 }, { "epoch": 0.19673259602588225, "grad_norm": 0.4756178855895996, "learning_rate": 9.946744234275005e-06, "loss": 0.3595, "step": 4287 }, { "epoch": 0.19677848653113672, "grad_norm": 0.485768586397171, "learning_rate": 9.946708537810723e-06, "loss": 0.4089, "step": 4288 }, { "epoch": 0.19682437703639116, "grad_norm": 0.5151448845863342, "learning_rate": 9.94667282945117e-06, "loss": 0.4467, "step": 4289 }, { "epoch": 0.19687026754164563, "grad_norm": 0.48313891887664795, "learning_rate": 9.946637109196428e-06, "loss": 0.419, "step": 4290 }, { "epoch": 0.1969161580469001, "grad_norm": 0.5669131278991699, "learning_rate": 9.946601377046586e-06, "loss": 0.5904, "step": 4291 }, { "epoch": 0.19696204855215457, "grad_norm": 0.49674496054649353, "learning_rate": 9.94656563300173e-06, "loss": 0.4869, "step": 4292 }, { "epoch": 0.19700793905740902, "grad_norm": 0.4247874319553375, "learning_rate": 9.946529877061942e-06, "loss": 0.2985, "step": 4293 }, { "epoch": 0.19705382956266349, "grad_norm": 0.46898841857910156, "learning_rate": 9.946494109227313e-06, "loss": 0.3638, "step": 4294 }, { "epoch": 0.19709972006791796, "grad_norm": 0.49758079648017883, "learning_rate": 9.946458329497926e-06, "loss": 0.4661, "step": 4295 }, { "epoch": 0.1971456105731724, "grad_norm": 0.483528196811676, "learning_rate": 9.946422537873867e-06, "loss": 0.426, "step": 4296 }, { "epoch": 0.19719150107842687, "grad_norm": 0.42439407110214233, "learning_rate": 9.946386734355224e-06, "loss": 0.3243, "step": 4297 }, { "epoch": 0.19723739158368134, "grad_norm": 0.48264139890670776, "learning_rate": 9.946350918942082e-06, "loss": 0.4228, "step": 4298 }, { "epoch": 0.1972832820889358, "grad_norm": 0.5076059699058533, "learning_rate": 9.946315091634527e-06, "loss": 0.4116, "step": 4299 }, { "epoch": 0.19732917259419025, "grad_norm": 0.47341057658195496, "learning_rate": 9.946279252432646e-06, "loss": 0.3503, "step": 4300 }, { "epoch": 0.19737506309944472, "grad_norm": 0.48038241267204285, "learning_rate": 9.946243401336524e-06, "loss": 0.4433, "step": 4301 }, { "epoch": 0.1974209536046992, "grad_norm": 0.5301975011825562, "learning_rate": 9.946207538346247e-06, "loss": 0.4859, "step": 4302 }, { "epoch": 0.19746684410995366, "grad_norm": 0.4284140467643738, "learning_rate": 9.946171663461904e-06, "loss": 0.3626, "step": 4303 }, { "epoch": 0.1975127346152081, "grad_norm": 0.4876924753189087, "learning_rate": 9.946135776683576e-06, "loss": 0.4888, "step": 4304 }, { "epoch": 0.19755862512046257, "grad_norm": 0.5320894122123718, "learning_rate": 9.946099878011355e-06, "loss": 0.4871, "step": 4305 }, { "epoch": 0.19760451562571704, "grad_norm": 0.47058752179145813, "learning_rate": 9.946063967445325e-06, "loss": 0.4272, "step": 4306 }, { "epoch": 0.1976504061309715, "grad_norm": 0.42561522126197815, "learning_rate": 9.94602804498557e-06, "loss": 0.3609, "step": 4307 }, { "epoch": 0.19769629663622595, "grad_norm": 0.46116769313812256, "learning_rate": 9.94599211063218e-06, "loss": 0.411, "step": 4308 }, { "epoch": 0.19774218714148042, "grad_norm": 0.44296127557754517, "learning_rate": 9.94595616438524e-06, "loss": 0.395, "step": 4309 }, { "epoch": 0.1977880776467349, "grad_norm": 0.5113347768783569, "learning_rate": 9.945920206244837e-06, "loss": 0.4722, "step": 4310 }, { "epoch": 0.19783396815198936, "grad_norm": 0.50044184923172, "learning_rate": 9.945884236211055e-06, "loss": 0.4721, "step": 4311 }, { "epoch": 0.1978798586572438, "grad_norm": 0.4492280185222626, "learning_rate": 9.945848254283984e-06, "loss": 0.3816, "step": 4312 }, { "epoch": 0.19792574916249828, "grad_norm": 0.4583589732646942, "learning_rate": 9.945812260463707e-06, "loss": 0.3461, "step": 4313 }, { "epoch": 0.19797163966775275, "grad_norm": 0.4791368842124939, "learning_rate": 9.945776254750314e-06, "loss": 0.4349, "step": 4314 }, { "epoch": 0.19801753017300722, "grad_norm": 0.477237731218338, "learning_rate": 9.94574023714389e-06, "loss": 0.4792, "step": 4315 }, { "epoch": 0.19806342067826166, "grad_norm": 0.47030895948410034, "learning_rate": 9.945704207644523e-06, "loss": 0.3728, "step": 4316 }, { "epoch": 0.19810931118351613, "grad_norm": 0.4306229054927826, "learning_rate": 9.945668166252296e-06, "loss": 0.3525, "step": 4317 }, { "epoch": 0.1981552016887706, "grad_norm": 0.5677311420440674, "learning_rate": 9.945632112967298e-06, "loss": 0.4834, "step": 4318 }, { "epoch": 0.19820109219402507, "grad_norm": 0.4629094898700714, "learning_rate": 9.945596047789617e-06, "loss": 0.3952, "step": 4319 }, { "epoch": 0.1982469826992795, "grad_norm": 0.5262762308120728, "learning_rate": 9.945559970719339e-06, "loss": 0.5055, "step": 4320 }, { "epoch": 0.19829287320453398, "grad_norm": 0.4672391712665558, "learning_rate": 9.945523881756547e-06, "loss": 0.3768, "step": 4321 }, { "epoch": 0.19833876370978845, "grad_norm": 0.47864776849746704, "learning_rate": 9.945487780901334e-06, "loss": 0.4429, "step": 4322 }, { "epoch": 0.1983846542150429, "grad_norm": 0.4701748788356781, "learning_rate": 9.945451668153783e-06, "loss": 0.424, "step": 4323 }, { "epoch": 0.19843054472029736, "grad_norm": 0.4563615620136261, "learning_rate": 9.94541554351398e-06, "loss": 0.3921, "step": 4324 }, { "epoch": 0.19847643522555183, "grad_norm": 0.48338043689727783, "learning_rate": 9.945379406982015e-06, "loss": 0.4768, "step": 4325 }, { "epoch": 0.1985223257308063, "grad_norm": 0.4890962839126587, "learning_rate": 9.945343258557973e-06, "loss": 0.4172, "step": 4326 }, { "epoch": 0.19856821623606075, "grad_norm": 0.5320715308189392, "learning_rate": 9.945307098241941e-06, "loss": 0.3265, "step": 4327 }, { "epoch": 0.19861410674131522, "grad_norm": 0.4402206540107727, "learning_rate": 9.945270926034007e-06, "loss": 0.3586, "step": 4328 }, { "epoch": 0.1986599972465697, "grad_norm": 0.5281245708465576, "learning_rate": 9.945234741934255e-06, "loss": 0.4942, "step": 4329 }, { "epoch": 0.19870588775182416, "grad_norm": 0.5047677755355835, "learning_rate": 9.945198545942778e-06, "loss": 0.489, "step": 4330 }, { "epoch": 0.1987517782570786, "grad_norm": 0.4735115170478821, "learning_rate": 9.945162338059655e-06, "loss": 0.3846, "step": 4331 }, { "epoch": 0.19879766876233307, "grad_norm": 0.49209386110305786, "learning_rate": 9.94512611828498e-06, "loss": 0.4111, "step": 4332 }, { "epoch": 0.19884355926758754, "grad_norm": 0.5027639865875244, "learning_rate": 9.945089886618838e-06, "loss": 0.4653, "step": 4333 }, { "epoch": 0.198889449772842, "grad_norm": 0.4580379128456116, "learning_rate": 9.945053643061314e-06, "loss": 0.3582, "step": 4334 }, { "epoch": 0.19893534027809645, "grad_norm": 0.5238893628120422, "learning_rate": 9.945017387612497e-06, "loss": 0.4862, "step": 4335 }, { "epoch": 0.19898123078335092, "grad_norm": 0.4721764922142029, "learning_rate": 9.944981120272472e-06, "loss": 0.3906, "step": 4336 }, { "epoch": 0.1990271212886054, "grad_norm": 0.5390194058418274, "learning_rate": 9.94494484104133e-06, "loss": 0.395, "step": 4337 }, { "epoch": 0.19907301179385986, "grad_norm": 0.4833170771598816, "learning_rate": 9.944908549919157e-06, "loss": 0.4092, "step": 4338 }, { "epoch": 0.1991189022991143, "grad_norm": 0.4220992624759674, "learning_rate": 9.944872246906039e-06, "loss": 0.3035, "step": 4339 }, { "epoch": 0.19916479280436877, "grad_norm": 0.46783629059791565, "learning_rate": 9.944835932002064e-06, "loss": 0.3739, "step": 4340 }, { "epoch": 0.19921068330962324, "grad_norm": 0.5067040324211121, "learning_rate": 9.944799605207318e-06, "loss": 0.4772, "step": 4341 }, { "epoch": 0.1992565738148777, "grad_norm": 0.4913345277309418, "learning_rate": 9.94476326652189e-06, "loss": 0.4322, "step": 4342 }, { "epoch": 0.19930246432013216, "grad_norm": 0.4785483777523041, "learning_rate": 9.944726915945868e-06, "loss": 0.4452, "step": 4343 }, { "epoch": 0.19934835482538663, "grad_norm": 0.5592127442359924, "learning_rate": 9.94469055347934e-06, "loss": 0.4301, "step": 4344 }, { "epoch": 0.1993942453306411, "grad_norm": 0.47358712553977966, "learning_rate": 9.944654179122388e-06, "loss": 0.4043, "step": 4345 }, { "epoch": 0.19944013583589557, "grad_norm": 0.4482409358024597, "learning_rate": 9.944617792875105e-06, "loss": 0.3777, "step": 4346 }, { "epoch": 0.19948602634115, "grad_norm": 0.45446881651878357, "learning_rate": 9.944581394737576e-06, "loss": 0.3884, "step": 4347 }, { "epoch": 0.19953191684640448, "grad_norm": 0.4457714259624481, "learning_rate": 9.944544984709892e-06, "loss": 0.3887, "step": 4348 }, { "epoch": 0.19957780735165895, "grad_norm": 0.5872363448143005, "learning_rate": 9.944508562792134e-06, "loss": 0.4737, "step": 4349 }, { "epoch": 0.19962369785691342, "grad_norm": 0.478573203086853, "learning_rate": 9.944472128984396e-06, "loss": 0.4174, "step": 4350 }, { "epoch": 0.19966958836216786, "grad_norm": 0.47778937220573425, "learning_rate": 9.944435683286762e-06, "loss": 0.3973, "step": 4351 }, { "epoch": 0.19971547886742233, "grad_norm": 0.47848138213157654, "learning_rate": 9.944399225699321e-06, "loss": 0.4402, "step": 4352 }, { "epoch": 0.1997613693726768, "grad_norm": 0.5309686660766602, "learning_rate": 9.94436275622216e-06, "loss": 0.4762, "step": 4353 }, { "epoch": 0.19980725987793124, "grad_norm": 0.4534291923046112, "learning_rate": 9.94432627485537e-06, "loss": 0.3333, "step": 4354 }, { "epoch": 0.1998531503831857, "grad_norm": 0.46538591384887695, "learning_rate": 9.944289781599033e-06, "loss": 0.3901, "step": 4355 }, { "epoch": 0.19989904088844018, "grad_norm": 0.8436093926429749, "learning_rate": 9.944253276453238e-06, "loss": 0.4983, "step": 4356 }, { "epoch": 0.19994493139369465, "grad_norm": 0.46009090542793274, "learning_rate": 9.944216759418078e-06, "loss": 0.3762, "step": 4357 }, { "epoch": 0.1999908218989491, "grad_norm": 0.4920811057090759, "learning_rate": 9.944180230493634e-06, "loss": 0.4398, "step": 4358 }, { "epoch": 0.20003671240420356, "grad_norm": 0.4614555537700653, "learning_rate": 9.944143689679999e-06, "loss": 0.4109, "step": 4359 }, { "epoch": 0.20008260290945803, "grad_norm": 0.45403623580932617, "learning_rate": 9.944107136977256e-06, "loss": 0.3989, "step": 4360 }, { "epoch": 0.2001284934147125, "grad_norm": 0.4971005916595459, "learning_rate": 9.944070572385499e-06, "loss": 0.4265, "step": 4361 }, { "epoch": 0.20017438391996695, "grad_norm": 0.47762706875801086, "learning_rate": 9.944033995904812e-06, "loss": 0.3909, "step": 4362 }, { "epoch": 0.20022027442522142, "grad_norm": 0.4471714496612549, "learning_rate": 9.943997407535283e-06, "loss": 0.3805, "step": 4363 }, { "epoch": 0.2002661649304759, "grad_norm": 0.4476510286331177, "learning_rate": 9.943960807277e-06, "loss": 0.4116, "step": 4364 }, { "epoch": 0.20031205543573036, "grad_norm": 0.4717048406600952, "learning_rate": 9.943924195130052e-06, "loss": 0.4657, "step": 4365 }, { "epoch": 0.2003579459409848, "grad_norm": 0.4262549579143524, "learning_rate": 9.943887571094529e-06, "loss": 0.353, "step": 4366 }, { "epoch": 0.20040383644623927, "grad_norm": 0.4871671795845032, "learning_rate": 9.943850935170513e-06, "loss": 0.4918, "step": 4367 }, { "epoch": 0.20044972695149374, "grad_norm": 0.4724729061126709, "learning_rate": 9.943814287358099e-06, "loss": 0.4267, "step": 4368 }, { "epoch": 0.2004956174567482, "grad_norm": 0.45711904764175415, "learning_rate": 9.94377762765737e-06, "loss": 0.3473, "step": 4369 }, { "epoch": 0.20054150796200265, "grad_norm": 0.44956108927726746, "learning_rate": 9.943740956068416e-06, "loss": 0.313, "step": 4370 }, { "epoch": 0.20058739846725712, "grad_norm": 0.49146658182144165, "learning_rate": 9.943704272591325e-06, "loss": 0.3787, "step": 4371 }, { "epoch": 0.2006332889725116, "grad_norm": 0.4804185628890991, "learning_rate": 9.943667577226188e-06, "loss": 0.4134, "step": 4372 }, { "epoch": 0.20067917947776606, "grad_norm": 0.501727283000946, "learning_rate": 9.94363086997309e-06, "loss": 0.4848, "step": 4373 }, { "epoch": 0.2007250699830205, "grad_norm": 0.4757954180240631, "learning_rate": 9.943594150832117e-06, "loss": 0.3426, "step": 4374 }, { "epoch": 0.20077096048827497, "grad_norm": 0.5328327417373657, "learning_rate": 9.943557419803362e-06, "loss": 0.5253, "step": 4375 }, { "epoch": 0.20081685099352944, "grad_norm": 0.4832775890827179, "learning_rate": 9.943520676886914e-06, "loss": 0.4047, "step": 4376 }, { "epoch": 0.20086274149878391, "grad_norm": 0.4592137932777405, "learning_rate": 9.943483922082857e-06, "loss": 0.3798, "step": 4377 }, { "epoch": 0.20090863200403836, "grad_norm": 0.49707669019699097, "learning_rate": 9.943447155391281e-06, "loss": 0.452, "step": 4378 }, { "epoch": 0.20095452250929283, "grad_norm": 0.45820289850234985, "learning_rate": 9.943410376812274e-06, "loss": 0.3892, "step": 4379 }, { "epoch": 0.2010004130145473, "grad_norm": 0.47397100925445557, "learning_rate": 9.943373586345928e-06, "loss": 0.4722, "step": 4380 }, { "epoch": 0.20104630351980177, "grad_norm": 0.46828311681747437, "learning_rate": 9.943336783992327e-06, "loss": 0.4242, "step": 4381 }, { "epoch": 0.2010921940250562, "grad_norm": 0.45665767788887024, "learning_rate": 9.94329996975156e-06, "loss": 0.3851, "step": 4382 }, { "epoch": 0.20113808453031068, "grad_norm": 0.4675580561161041, "learning_rate": 9.943263143623719e-06, "loss": 0.3954, "step": 4383 }, { "epoch": 0.20118397503556515, "grad_norm": 0.45406094193458557, "learning_rate": 9.943226305608889e-06, "loss": 0.4383, "step": 4384 }, { "epoch": 0.2012298655408196, "grad_norm": 0.44739025831222534, "learning_rate": 9.943189455707161e-06, "loss": 0.3752, "step": 4385 }, { "epoch": 0.20127575604607406, "grad_norm": 0.5119508504867554, "learning_rate": 9.943152593918622e-06, "loss": 0.4103, "step": 4386 }, { "epoch": 0.20132164655132853, "grad_norm": 0.49356532096862793, "learning_rate": 9.943115720243361e-06, "loss": 0.3819, "step": 4387 }, { "epoch": 0.201367537056583, "grad_norm": 0.4203157424926758, "learning_rate": 9.943078834681465e-06, "loss": 0.3681, "step": 4388 }, { "epoch": 0.20141342756183744, "grad_norm": 0.5196059942245483, "learning_rate": 9.943041937233027e-06, "loss": 0.5161, "step": 4389 }, { "epoch": 0.2014593180670919, "grad_norm": 0.4554559290409088, "learning_rate": 9.943005027898131e-06, "loss": 0.3392, "step": 4390 }, { "epoch": 0.20150520857234638, "grad_norm": 0.4803728759288788, "learning_rate": 9.942968106676871e-06, "loss": 0.3597, "step": 4391 }, { "epoch": 0.20155109907760085, "grad_norm": 0.5130305886268616, "learning_rate": 9.94293117356933e-06, "loss": 0.4558, "step": 4392 }, { "epoch": 0.2015969895828553, "grad_norm": 0.4634862542152405, "learning_rate": 9.942894228575601e-06, "loss": 0.405, "step": 4393 }, { "epoch": 0.20164288008810977, "grad_norm": 0.4780745804309845, "learning_rate": 9.942857271695772e-06, "loss": 0.4258, "step": 4394 }, { "epoch": 0.20168877059336424, "grad_norm": 0.4796124994754791, "learning_rate": 9.94282030292993e-06, "loss": 0.4351, "step": 4395 }, { "epoch": 0.2017346610986187, "grad_norm": 0.4890759289264679, "learning_rate": 9.942783322278165e-06, "loss": 0.5176, "step": 4396 }, { "epoch": 0.20178055160387315, "grad_norm": 0.4720701277256012, "learning_rate": 9.942746329740566e-06, "loss": 0.3985, "step": 4397 }, { "epoch": 0.20182644210912762, "grad_norm": 0.47165292501449585, "learning_rate": 9.942709325317224e-06, "loss": 0.383, "step": 4398 }, { "epoch": 0.2018723326143821, "grad_norm": 0.43780720233917236, "learning_rate": 9.942672309008223e-06, "loss": 0.3484, "step": 4399 }, { "epoch": 0.20191822311963656, "grad_norm": 0.4558258652687073, "learning_rate": 9.942635280813658e-06, "loss": 0.3921, "step": 4400 }, { "epoch": 0.201964113624891, "grad_norm": 0.475436270236969, "learning_rate": 9.942598240733613e-06, "loss": 0.4596, "step": 4401 }, { "epoch": 0.20201000413014547, "grad_norm": 0.4847857654094696, "learning_rate": 9.94256118876818e-06, "loss": 0.4741, "step": 4402 }, { "epoch": 0.20205589463539994, "grad_norm": 0.467704176902771, "learning_rate": 9.942524124917447e-06, "loss": 0.3736, "step": 4403 }, { "epoch": 0.2021017851406544, "grad_norm": 0.47344571352005005, "learning_rate": 9.942487049181504e-06, "loss": 0.4159, "step": 4404 }, { "epoch": 0.20214767564590885, "grad_norm": 0.44193920493125916, "learning_rate": 9.942449961560439e-06, "loss": 0.3511, "step": 4405 }, { "epoch": 0.20219356615116332, "grad_norm": 0.5351225137710571, "learning_rate": 9.94241286205434e-06, "loss": 0.3977, "step": 4406 }, { "epoch": 0.2022394566564178, "grad_norm": 0.45293131470680237, "learning_rate": 9.942375750663302e-06, "loss": 0.3726, "step": 4407 }, { "epoch": 0.20228534716167226, "grad_norm": 0.4660302698612213, "learning_rate": 9.942338627387408e-06, "loss": 0.3984, "step": 4408 }, { "epoch": 0.2023312376669267, "grad_norm": 0.4848549962043762, "learning_rate": 9.942301492226749e-06, "loss": 0.4585, "step": 4409 }, { "epoch": 0.20237712817218118, "grad_norm": 0.49276554584503174, "learning_rate": 9.942264345181414e-06, "loss": 0.4164, "step": 4410 }, { "epoch": 0.20242301867743565, "grad_norm": 0.4952569305896759, "learning_rate": 9.942227186251495e-06, "loss": 0.4355, "step": 4411 }, { "epoch": 0.2024689091826901, "grad_norm": 0.608250617980957, "learning_rate": 9.94219001543708e-06, "loss": 0.5878, "step": 4412 }, { "epoch": 0.20251479968794456, "grad_norm": 0.4380188584327698, "learning_rate": 9.942152832738255e-06, "loss": 0.3362, "step": 4413 }, { "epoch": 0.20256069019319903, "grad_norm": 0.4906007647514343, "learning_rate": 9.942115638155114e-06, "loss": 0.445, "step": 4414 }, { "epoch": 0.2026065806984535, "grad_norm": 0.45186811685562134, "learning_rate": 9.942078431687744e-06, "loss": 0.3808, "step": 4415 }, { "epoch": 0.20265247120370794, "grad_norm": 0.4381430745124817, "learning_rate": 9.942041213336235e-06, "loss": 0.3271, "step": 4416 }, { "epoch": 0.2026983617089624, "grad_norm": 0.48071515560150146, "learning_rate": 9.942003983100678e-06, "loss": 0.4215, "step": 4417 }, { "epoch": 0.20274425221421688, "grad_norm": 0.4494588375091553, "learning_rate": 9.941966740981159e-06, "loss": 0.327, "step": 4418 }, { "epoch": 0.20279014271947135, "grad_norm": 0.4773856997489929, "learning_rate": 9.941929486977773e-06, "loss": 0.4035, "step": 4419 }, { "epoch": 0.2028360332247258, "grad_norm": 0.45517271757125854, "learning_rate": 9.941892221090604e-06, "loss": 0.3514, "step": 4420 }, { "epoch": 0.20288192372998026, "grad_norm": 0.4729015827178955, "learning_rate": 9.941854943319744e-06, "loss": 0.383, "step": 4421 }, { "epoch": 0.20292781423523473, "grad_norm": 0.5019860863685608, "learning_rate": 9.941817653665282e-06, "loss": 0.4457, "step": 4422 }, { "epoch": 0.2029737047404892, "grad_norm": 0.4792742431163788, "learning_rate": 9.94178035212731e-06, "loss": 0.4153, "step": 4423 }, { "epoch": 0.20301959524574364, "grad_norm": 0.44474557042121887, "learning_rate": 9.941743038705914e-06, "loss": 0.376, "step": 4424 }, { "epoch": 0.20306548575099811, "grad_norm": 0.4339393973350525, "learning_rate": 9.941705713401186e-06, "loss": 0.3541, "step": 4425 }, { "epoch": 0.20311137625625258, "grad_norm": 0.49014732241630554, "learning_rate": 9.941668376213216e-06, "loss": 0.4878, "step": 4426 }, { "epoch": 0.20315726676150705, "grad_norm": 0.4729020893573761, "learning_rate": 9.941631027142093e-06, "loss": 0.3974, "step": 4427 }, { "epoch": 0.2032031572667615, "grad_norm": 0.4551442861557007, "learning_rate": 9.941593666187907e-06, "loss": 0.3624, "step": 4428 }, { "epoch": 0.20324904777201597, "grad_norm": 0.44830113649368286, "learning_rate": 9.94155629335075e-06, "loss": 0.4464, "step": 4429 }, { "epoch": 0.20329493827727044, "grad_norm": 0.47621333599090576, "learning_rate": 9.941518908630707e-06, "loss": 0.4121, "step": 4430 }, { "epoch": 0.2033408287825249, "grad_norm": 0.4960765540599823, "learning_rate": 9.94148151202787e-06, "loss": 0.4427, "step": 4431 }, { "epoch": 0.20338671928777935, "grad_norm": 0.45448818802833557, "learning_rate": 9.941444103542332e-06, "loss": 0.3995, "step": 4432 }, { "epoch": 0.20343260979303382, "grad_norm": 0.4503958225250244, "learning_rate": 9.941406683174179e-06, "loss": 0.37, "step": 4433 }, { "epoch": 0.2034785002982883, "grad_norm": 0.5781404376029968, "learning_rate": 9.941369250923503e-06, "loss": 0.4018, "step": 4434 }, { "epoch": 0.20352439080354276, "grad_norm": 0.4687477648258209, "learning_rate": 9.941331806790392e-06, "loss": 0.3866, "step": 4435 }, { "epoch": 0.2035702813087972, "grad_norm": 0.42383551597595215, "learning_rate": 9.94129435077494e-06, "loss": 0.316, "step": 4436 }, { "epoch": 0.20361617181405167, "grad_norm": 0.46412205696105957, "learning_rate": 9.941256882877234e-06, "loss": 0.3539, "step": 4437 }, { "epoch": 0.20366206231930614, "grad_norm": 0.47226232290267944, "learning_rate": 9.941219403097365e-06, "loss": 0.3929, "step": 4438 }, { "epoch": 0.2037079528245606, "grad_norm": 0.4539547562599182, "learning_rate": 9.94118191143542e-06, "loss": 0.3962, "step": 4439 }, { "epoch": 0.20375384332981505, "grad_norm": 0.4528510570526123, "learning_rate": 9.941144407891495e-06, "loss": 0.3672, "step": 4440 }, { "epoch": 0.20379973383506952, "grad_norm": 0.464222252368927, "learning_rate": 9.941106892465675e-06, "loss": 0.3846, "step": 4441 }, { "epoch": 0.203845624340324, "grad_norm": 0.45024967193603516, "learning_rate": 9.941069365158054e-06, "loss": 0.3263, "step": 4442 }, { "epoch": 0.20389151484557844, "grad_norm": 0.466435045003891, "learning_rate": 9.941031825968722e-06, "loss": 0.4019, "step": 4443 }, { "epoch": 0.2039374053508329, "grad_norm": 0.49162209033966064, "learning_rate": 9.940994274897766e-06, "loss": 0.5259, "step": 4444 }, { "epoch": 0.20398329585608738, "grad_norm": 0.47819215059280396, "learning_rate": 9.940956711945278e-06, "loss": 0.4076, "step": 4445 }, { "epoch": 0.20402918636134185, "grad_norm": 0.4855642020702362, "learning_rate": 9.940919137111348e-06, "loss": 0.4123, "step": 4446 }, { "epoch": 0.2040750768665963, "grad_norm": 0.49447107315063477, "learning_rate": 9.940881550396068e-06, "loss": 0.4585, "step": 4447 }, { "epoch": 0.20412096737185076, "grad_norm": 0.4881538450717926, "learning_rate": 9.940843951799526e-06, "loss": 0.4688, "step": 4448 }, { "epoch": 0.20416685787710523, "grad_norm": 0.40869176387786865, "learning_rate": 9.940806341321816e-06, "loss": 0.3085, "step": 4449 }, { "epoch": 0.2042127483823597, "grad_norm": 0.4979475140571594, "learning_rate": 9.940768718963023e-06, "loss": 0.4415, "step": 4450 }, { "epoch": 0.20425863888761414, "grad_norm": 0.47777259349823, "learning_rate": 9.940731084723243e-06, "loss": 0.4227, "step": 4451 }, { "epoch": 0.2043045293928686, "grad_norm": 0.4737626612186432, "learning_rate": 9.940693438602563e-06, "loss": 0.4377, "step": 4452 }, { "epoch": 0.20435041989812308, "grad_norm": 0.504940390586853, "learning_rate": 9.940655780601075e-06, "loss": 0.4374, "step": 4453 }, { "epoch": 0.20439631040337755, "grad_norm": 0.4666500985622406, "learning_rate": 9.94061811071887e-06, "loss": 0.4497, "step": 4454 }, { "epoch": 0.204442200908632, "grad_norm": 0.5264491438865662, "learning_rate": 9.940580428956038e-06, "loss": 0.4791, "step": 4455 }, { "epoch": 0.20448809141388646, "grad_norm": 0.525458037853241, "learning_rate": 9.940542735312667e-06, "loss": 0.486, "step": 4456 }, { "epoch": 0.20453398191914093, "grad_norm": 0.4548773169517517, "learning_rate": 9.94050502978885e-06, "loss": 0.3781, "step": 4457 }, { "epoch": 0.2045798724243954, "grad_norm": 0.48906221985816956, "learning_rate": 9.940467312384679e-06, "loss": 0.4616, "step": 4458 }, { "epoch": 0.20462576292964985, "grad_norm": 0.5151055455207825, "learning_rate": 9.940429583100244e-06, "loss": 0.4066, "step": 4459 }, { "epoch": 0.20467165343490432, "grad_norm": 0.4916255474090576, "learning_rate": 9.940391841935634e-06, "loss": 0.3986, "step": 4460 }, { "epoch": 0.20471754394015879, "grad_norm": 0.5003378391265869, "learning_rate": 9.940354088890942e-06, "loss": 0.4381, "step": 4461 }, { "epoch": 0.20476343444541326, "grad_norm": 0.49715420603752136, "learning_rate": 9.940316323966257e-06, "loss": 0.3912, "step": 4462 }, { "epoch": 0.2048093249506677, "grad_norm": 0.4934089183807373, "learning_rate": 9.94027854716167e-06, "loss": 0.4619, "step": 4463 }, { "epoch": 0.20485521545592217, "grad_norm": 0.4589710831642151, "learning_rate": 9.940240758477273e-06, "loss": 0.424, "step": 4464 }, { "epoch": 0.20490110596117664, "grad_norm": 0.43391865491867065, "learning_rate": 9.940202957913155e-06, "loss": 0.33, "step": 4465 }, { "epoch": 0.2049469964664311, "grad_norm": 0.45424923300743103, "learning_rate": 9.940165145469409e-06, "loss": 0.378, "step": 4466 }, { "epoch": 0.20499288697168555, "grad_norm": 0.4214986562728882, "learning_rate": 9.940127321146125e-06, "loss": 0.3123, "step": 4467 }, { "epoch": 0.20503877747694002, "grad_norm": 0.47325822710990906, "learning_rate": 9.940089484943394e-06, "loss": 0.4446, "step": 4468 }, { "epoch": 0.2050846679821945, "grad_norm": 0.4586750268936157, "learning_rate": 9.940051636861306e-06, "loss": 0.4123, "step": 4469 }, { "epoch": 0.20513055848744893, "grad_norm": 0.46702393889427185, "learning_rate": 9.940013776899954e-06, "loss": 0.4035, "step": 4470 }, { "epoch": 0.2051764489927034, "grad_norm": 0.4892576336860657, "learning_rate": 9.939975905059427e-06, "loss": 0.4395, "step": 4471 }, { "epoch": 0.20522233949795787, "grad_norm": 0.4993913769721985, "learning_rate": 9.939938021339818e-06, "loss": 0.4919, "step": 4472 }, { "epoch": 0.20526823000321234, "grad_norm": 0.46500056982040405, "learning_rate": 9.939900125741217e-06, "loss": 0.3916, "step": 4473 }, { "epoch": 0.20531412050846679, "grad_norm": 0.540874183177948, "learning_rate": 9.939862218263715e-06, "loss": 0.5311, "step": 4474 }, { "epoch": 0.20536001101372126, "grad_norm": 0.4383721947669983, "learning_rate": 9.939824298907403e-06, "loss": 0.3548, "step": 4475 }, { "epoch": 0.20540590151897573, "grad_norm": 0.4833926558494568, "learning_rate": 9.939786367672373e-06, "loss": 0.4557, "step": 4476 }, { "epoch": 0.2054517920242302, "grad_norm": 0.47873058915138245, "learning_rate": 9.939748424558716e-06, "loss": 0.3945, "step": 4477 }, { "epoch": 0.20549768252948464, "grad_norm": 0.4713362157344818, "learning_rate": 9.939710469566524e-06, "loss": 0.4332, "step": 4478 }, { "epoch": 0.2055435730347391, "grad_norm": 0.4947201609611511, "learning_rate": 9.939672502695884e-06, "loss": 0.4073, "step": 4479 }, { "epoch": 0.20558946353999358, "grad_norm": 0.4919685125350952, "learning_rate": 9.939634523946895e-06, "loss": 0.4273, "step": 4480 }, { "epoch": 0.20563535404524805, "grad_norm": 0.4664944112300873, "learning_rate": 9.939596533319643e-06, "loss": 0.3558, "step": 4481 }, { "epoch": 0.2056812445505025, "grad_norm": 0.5513607263565063, "learning_rate": 9.939558530814217e-06, "loss": 0.4335, "step": 4482 }, { "epoch": 0.20572713505575696, "grad_norm": 0.49874311685562134, "learning_rate": 9.939520516430714e-06, "loss": 0.4148, "step": 4483 }, { "epoch": 0.20577302556101143, "grad_norm": 0.4702422618865967, "learning_rate": 9.939482490169223e-06, "loss": 0.4007, "step": 4484 }, { "epoch": 0.2058189160662659, "grad_norm": 0.48694729804992676, "learning_rate": 9.939444452029836e-06, "loss": 0.4521, "step": 4485 }, { "epoch": 0.20586480657152034, "grad_norm": 0.48092415928840637, "learning_rate": 9.939406402012643e-06, "loss": 0.4273, "step": 4486 }, { "epoch": 0.2059106970767748, "grad_norm": 0.49558618664741516, "learning_rate": 9.93936834011774e-06, "loss": 0.4962, "step": 4487 }, { "epoch": 0.20595658758202928, "grad_norm": 0.4936516284942627, "learning_rate": 9.939330266345211e-06, "loss": 0.5067, "step": 4488 }, { "epoch": 0.20600247808728375, "grad_norm": 0.46066442131996155, "learning_rate": 9.939292180695153e-06, "loss": 0.4153, "step": 4489 }, { "epoch": 0.2060483685925382, "grad_norm": 0.45661619305610657, "learning_rate": 9.939254083167655e-06, "loss": 0.3974, "step": 4490 }, { "epoch": 0.20609425909779266, "grad_norm": 0.44079911708831787, "learning_rate": 9.93921597376281e-06, "loss": 0.3302, "step": 4491 }, { "epoch": 0.20614014960304713, "grad_norm": 0.46443063020706177, "learning_rate": 9.939177852480713e-06, "loss": 0.3847, "step": 4492 }, { "epoch": 0.2061860401083016, "grad_norm": 0.46124452352523804, "learning_rate": 9.939139719321449e-06, "loss": 0.3719, "step": 4493 }, { "epoch": 0.20623193061355605, "grad_norm": 0.43286919593811035, "learning_rate": 9.939101574285113e-06, "loss": 0.3668, "step": 4494 }, { "epoch": 0.20627782111881052, "grad_norm": 0.46169283986091614, "learning_rate": 9.939063417371797e-06, "loss": 0.3729, "step": 4495 }, { "epoch": 0.206323711624065, "grad_norm": 0.45073485374450684, "learning_rate": 9.93902524858159e-06, "loss": 0.3499, "step": 4496 }, { "epoch": 0.20636960212931946, "grad_norm": 0.5126428008079529, "learning_rate": 9.938987067914589e-06, "loss": 0.4592, "step": 4497 }, { "epoch": 0.2064154926345739, "grad_norm": 0.49929559230804443, "learning_rate": 9.938948875370882e-06, "loss": 0.5132, "step": 4498 }, { "epoch": 0.20646138313982837, "grad_norm": 0.4436069130897522, "learning_rate": 9.938910670950562e-06, "loss": 0.4044, "step": 4499 }, { "epoch": 0.20650727364508284, "grad_norm": 0.4961375892162323, "learning_rate": 9.93887245465372e-06, "loss": 0.4579, "step": 4500 }, { "epoch": 0.20655316415033728, "grad_norm": 0.5659322738647461, "learning_rate": 9.938834226480449e-06, "loss": 0.4344, "step": 4501 }, { "epoch": 0.20659905465559175, "grad_norm": 0.5714291930198669, "learning_rate": 9.93879598643084e-06, "loss": 0.435, "step": 4502 }, { "epoch": 0.20664494516084622, "grad_norm": 0.5349580645561218, "learning_rate": 9.938757734504987e-06, "loss": 0.5182, "step": 4503 }, { "epoch": 0.2066908356661007, "grad_norm": 0.4456104338169098, "learning_rate": 9.938719470702977e-06, "loss": 0.3345, "step": 4504 }, { "epoch": 0.20673672617135513, "grad_norm": 0.48803824186325073, "learning_rate": 9.938681195024908e-06, "loss": 0.4485, "step": 4505 }, { "epoch": 0.2067826166766096, "grad_norm": 0.4779717028141022, "learning_rate": 9.93864290747087e-06, "loss": 0.4255, "step": 4506 }, { "epoch": 0.20682850718186407, "grad_norm": 0.5309612154960632, "learning_rate": 9.938604608040952e-06, "loss": 0.4291, "step": 4507 }, { "epoch": 0.20687439768711854, "grad_norm": 0.4602609872817993, "learning_rate": 9.93856629673525e-06, "loss": 0.4199, "step": 4508 }, { "epoch": 0.206920288192373, "grad_norm": 0.4964911639690399, "learning_rate": 9.938527973553856e-06, "loss": 0.4335, "step": 4509 }, { "epoch": 0.20696617869762746, "grad_norm": 0.47558078169822693, "learning_rate": 9.938489638496859e-06, "loss": 0.3687, "step": 4510 }, { "epoch": 0.20701206920288193, "grad_norm": 0.5527694225311279, "learning_rate": 9.938451291564352e-06, "loss": 0.4992, "step": 4511 }, { "epoch": 0.2070579597081364, "grad_norm": 0.4722549617290497, "learning_rate": 9.938412932756431e-06, "loss": 0.3937, "step": 4512 }, { "epoch": 0.20710385021339084, "grad_norm": 0.4794664680957794, "learning_rate": 9.938374562073185e-06, "loss": 0.3461, "step": 4513 }, { "epoch": 0.2071497407186453, "grad_norm": 0.5057462453842163, "learning_rate": 9.938336179514704e-06, "loss": 0.4597, "step": 4514 }, { "epoch": 0.20719563122389978, "grad_norm": 0.483297735452652, "learning_rate": 9.938297785081086e-06, "loss": 0.4136, "step": 4515 }, { "epoch": 0.20724152172915425, "grad_norm": 0.43480783700942993, "learning_rate": 9.93825937877242e-06, "loss": 0.3352, "step": 4516 }, { "epoch": 0.2072874122344087, "grad_norm": 0.48694080114364624, "learning_rate": 9.938220960588799e-06, "loss": 0.4332, "step": 4517 }, { "epoch": 0.20733330273966316, "grad_norm": 0.4349701404571533, "learning_rate": 9.938182530530313e-06, "loss": 0.3012, "step": 4518 }, { "epoch": 0.20737919324491763, "grad_norm": 0.462764710187912, "learning_rate": 9.938144088597059e-06, "loss": 0.3018, "step": 4519 }, { "epoch": 0.2074250837501721, "grad_norm": 0.5145973563194275, "learning_rate": 9.938105634789126e-06, "loss": 0.5471, "step": 4520 }, { "epoch": 0.20747097425542654, "grad_norm": 0.4801902174949646, "learning_rate": 9.938067169106606e-06, "loss": 0.3622, "step": 4521 }, { "epoch": 0.207516864760681, "grad_norm": 0.41820165514945984, "learning_rate": 9.938028691549594e-06, "loss": 0.2819, "step": 4522 }, { "epoch": 0.20756275526593548, "grad_norm": 0.5365082025527954, "learning_rate": 9.937990202118183e-06, "loss": 0.412, "step": 4523 }, { "epoch": 0.20760864577118995, "grad_norm": 0.5280576944351196, "learning_rate": 9.937951700812463e-06, "loss": 0.5134, "step": 4524 }, { "epoch": 0.2076545362764444, "grad_norm": 0.4733649790287018, "learning_rate": 9.937913187632528e-06, "loss": 0.4668, "step": 4525 }, { "epoch": 0.20770042678169887, "grad_norm": 0.48566192388534546, "learning_rate": 9.93787466257847e-06, "loss": 0.4427, "step": 4526 }, { "epoch": 0.20774631728695334, "grad_norm": 0.46069398522377014, "learning_rate": 9.937836125650382e-06, "loss": 0.4217, "step": 4527 }, { "epoch": 0.2077922077922078, "grad_norm": 0.4433155357837677, "learning_rate": 9.937797576848356e-06, "loss": 0.3643, "step": 4528 }, { "epoch": 0.20783809829746225, "grad_norm": 0.5018631815910339, "learning_rate": 9.937759016172486e-06, "loss": 0.4516, "step": 4529 }, { "epoch": 0.20788398880271672, "grad_norm": 0.4642772972583771, "learning_rate": 9.937720443622862e-06, "loss": 0.3228, "step": 4530 }, { "epoch": 0.2079298793079712, "grad_norm": 0.43649399280548096, "learning_rate": 9.93768185919958e-06, "loss": 0.3555, "step": 4531 }, { "epoch": 0.20797576981322563, "grad_norm": 0.9431378245353699, "learning_rate": 9.937643262902732e-06, "loss": 0.4603, "step": 4532 }, { "epoch": 0.2080216603184801, "grad_norm": 0.47615882754325867, "learning_rate": 9.93760465473241e-06, "loss": 0.3854, "step": 4533 }, { "epoch": 0.20806755082373457, "grad_norm": 0.48449355363845825, "learning_rate": 9.937566034688708e-06, "loss": 0.446, "step": 4534 }, { "epoch": 0.20811344132898904, "grad_norm": 0.4414539039134979, "learning_rate": 9.937527402771717e-06, "loss": 0.3785, "step": 4535 }, { "epoch": 0.20815933183424348, "grad_norm": 0.48071879148483276, "learning_rate": 9.937488758981531e-06, "loss": 0.3833, "step": 4536 }, { "epoch": 0.20820522233949795, "grad_norm": 0.4578094482421875, "learning_rate": 9.937450103318242e-06, "loss": 0.3959, "step": 4537 }, { "epoch": 0.20825111284475242, "grad_norm": 0.4642413556575775, "learning_rate": 9.937411435781945e-06, "loss": 0.4447, "step": 4538 }, { "epoch": 0.2082970033500069, "grad_norm": 0.4551283121109009, "learning_rate": 9.937372756372732e-06, "loss": 0.3615, "step": 4539 }, { "epoch": 0.20834289385526134, "grad_norm": 0.4219685196876526, "learning_rate": 9.937334065090694e-06, "loss": 0.356, "step": 4540 }, { "epoch": 0.2083887843605158, "grad_norm": 0.47293487191200256, "learning_rate": 9.937295361935928e-06, "loss": 0.4564, "step": 4541 }, { "epoch": 0.20843467486577028, "grad_norm": 0.4208858907222748, "learning_rate": 9.937256646908524e-06, "loss": 0.3348, "step": 4542 }, { "epoch": 0.20848056537102475, "grad_norm": 0.4567595422267914, "learning_rate": 9.937217920008576e-06, "loss": 0.3791, "step": 4543 }, { "epoch": 0.2085264558762792, "grad_norm": 0.3732040524482727, "learning_rate": 9.937179181236177e-06, "loss": 0.2618, "step": 4544 }, { "epoch": 0.20857234638153366, "grad_norm": 0.4999716281890869, "learning_rate": 9.937140430591421e-06, "loss": 0.4753, "step": 4545 }, { "epoch": 0.20861823688678813, "grad_norm": 0.45168647170066833, "learning_rate": 9.937101668074399e-06, "loss": 0.4372, "step": 4546 }, { "epoch": 0.2086641273920426, "grad_norm": 0.476558119058609, "learning_rate": 9.937062893685207e-06, "loss": 0.4153, "step": 4547 }, { "epoch": 0.20871001789729704, "grad_norm": 0.4637129008769989, "learning_rate": 9.937024107423936e-06, "loss": 0.4591, "step": 4548 }, { "epoch": 0.2087559084025515, "grad_norm": 0.45690834522247314, "learning_rate": 9.936985309290681e-06, "loss": 0.4383, "step": 4549 }, { "epoch": 0.20880179890780598, "grad_norm": 0.46199309825897217, "learning_rate": 9.936946499285535e-06, "loss": 0.4008, "step": 4550 }, { "epoch": 0.20884768941306045, "grad_norm": 0.45231765508651733, "learning_rate": 9.93690767740859e-06, "loss": 0.377, "step": 4551 }, { "epoch": 0.2088935799183149, "grad_norm": 0.5034369230270386, "learning_rate": 9.936868843659939e-06, "loss": 0.4892, "step": 4552 }, { "epoch": 0.20893947042356936, "grad_norm": 0.46708884835243225, "learning_rate": 9.93682999803968e-06, "loss": 0.3877, "step": 4553 }, { "epoch": 0.20898536092882383, "grad_norm": 0.46299293637275696, "learning_rate": 9.936791140547899e-06, "loss": 0.3464, "step": 4554 }, { "epoch": 0.2090312514340783, "grad_norm": 0.4468526244163513, "learning_rate": 9.936752271184696e-06, "loss": 0.3677, "step": 4555 }, { "epoch": 0.20907714193933274, "grad_norm": 0.44090819358825684, "learning_rate": 9.93671338995016e-06, "loss": 0.3098, "step": 4556 }, { "epoch": 0.20912303244458721, "grad_norm": 0.4699522852897644, "learning_rate": 9.936674496844387e-06, "loss": 0.376, "step": 4557 }, { "epoch": 0.20916892294984168, "grad_norm": 0.53037428855896, "learning_rate": 9.936635591867472e-06, "loss": 0.4554, "step": 4558 }, { "epoch": 0.20921481345509613, "grad_norm": 0.4777945578098297, "learning_rate": 9.936596675019504e-06, "loss": 0.4197, "step": 4559 }, { "epoch": 0.2092607039603506, "grad_norm": 0.45397379994392395, "learning_rate": 9.936557746300579e-06, "loss": 0.4053, "step": 4560 }, { "epoch": 0.20930659446560507, "grad_norm": 0.492824524641037, "learning_rate": 9.936518805710792e-06, "loss": 0.4595, "step": 4561 }, { "epoch": 0.20935248497085954, "grad_norm": 0.47252020239830017, "learning_rate": 9.936479853250235e-06, "loss": 0.4659, "step": 4562 }, { "epoch": 0.20939837547611398, "grad_norm": 0.47343355417251587, "learning_rate": 9.936440888919e-06, "loss": 0.4794, "step": 4563 }, { "epoch": 0.20944426598136845, "grad_norm": 0.4707490801811218, "learning_rate": 9.936401912717186e-06, "loss": 0.3712, "step": 4564 }, { "epoch": 0.20949015648662292, "grad_norm": 0.45677340030670166, "learning_rate": 9.936362924644882e-06, "loss": 0.3616, "step": 4565 }, { "epoch": 0.2095360469918774, "grad_norm": 0.48813164234161377, "learning_rate": 9.936323924702182e-06, "loss": 0.4205, "step": 4566 }, { "epoch": 0.20958193749713183, "grad_norm": 0.45545223355293274, "learning_rate": 9.93628491288918e-06, "loss": 0.3896, "step": 4567 }, { "epoch": 0.2096278280023863, "grad_norm": 0.44278645515441895, "learning_rate": 9.936245889205974e-06, "loss": 0.3779, "step": 4568 }, { "epoch": 0.20967371850764077, "grad_norm": 0.4800693392753601, "learning_rate": 9.936206853652652e-06, "loss": 0.4588, "step": 4569 }, { "epoch": 0.20971960901289524, "grad_norm": 0.43922656774520874, "learning_rate": 9.936167806229313e-06, "loss": 0.3885, "step": 4570 }, { "epoch": 0.20976549951814968, "grad_norm": 0.4473532438278198, "learning_rate": 9.936128746936046e-06, "loss": 0.3586, "step": 4571 }, { "epoch": 0.20981139002340415, "grad_norm": 0.5110666155815125, "learning_rate": 9.93608967577295e-06, "loss": 0.4732, "step": 4572 }, { "epoch": 0.20985728052865862, "grad_norm": 0.4744645357131958, "learning_rate": 9.936050592740112e-06, "loss": 0.4136, "step": 4573 }, { "epoch": 0.2099031710339131, "grad_norm": 0.44420140981674194, "learning_rate": 9.936011497837632e-06, "loss": 0.321, "step": 4574 }, { "epoch": 0.20994906153916754, "grad_norm": 0.4751008450984955, "learning_rate": 9.935972391065602e-06, "loss": 0.406, "step": 4575 }, { "epoch": 0.209994952044422, "grad_norm": 0.4755615293979645, "learning_rate": 9.935933272424118e-06, "loss": 0.4121, "step": 4576 }, { "epoch": 0.21004084254967648, "grad_norm": 0.5103150606155396, "learning_rate": 9.935894141913272e-06, "loss": 0.4388, "step": 4577 }, { "epoch": 0.21008673305493095, "grad_norm": 0.46369847655296326, "learning_rate": 9.935854999533157e-06, "loss": 0.4117, "step": 4578 }, { "epoch": 0.2101326235601854, "grad_norm": 0.4254971444606781, "learning_rate": 9.93581584528387e-06, "loss": 0.3513, "step": 4579 }, { "epoch": 0.21017851406543986, "grad_norm": 0.4889409840106964, "learning_rate": 9.935776679165501e-06, "loss": 0.4657, "step": 4580 }, { "epoch": 0.21022440457069433, "grad_norm": 0.5066528916358948, "learning_rate": 9.935737501178148e-06, "loss": 0.4835, "step": 4581 }, { "epoch": 0.2102702950759488, "grad_norm": 0.4572798013687134, "learning_rate": 9.935698311321906e-06, "loss": 0.3487, "step": 4582 }, { "epoch": 0.21031618558120324, "grad_norm": 0.4852089583873749, "learning_rate": 9.935659109596865e-06, "loss": 0.4318, "step": 4583 }, { "epoch": 0.2103620760864577, "grad_norm": 0.45804309844970703, "learning_rate": 9.935619896003124e-06, "loss": 0.418, "step": 4584 }, { "epoch": 0.21040796659171218, "grad_norm": 0.45562052726745605, "learning_rate": 9.935580670540772e-06, "loss": 0.4115, "step": 4585 }, { "epoch": 0.21045385709696665, "grad_norm": 0.4580683410167694, "learning_rate": 9.935541433209907e-06, "loss": 0.3851, "step": 4586 }, { "epoch": 0.2104997476022211, "grad_norm": 0.4790094494819641, "learning_rate": 9.935502184010624e-06, "loss": 0.4472, "step": 4587 }, { "epoch": 0.21054563810747556, "grad_norm": 0.46903327107429504, "learning_rate": 9.935462922943016e-06, "loss": 0.4229, "step": 4588 }, { "epoch": 0.21059152861273003, "grad_norm": 0.4421257972717285, "learning_rate": 9.935423650007176e-06, "loss": 0.3426, "step": 4589 }, { "epoch": 0.21063741911798448, "grad_norm": 0.4814123511314392, "learning_rate": 9.9353843652032e-06, "loss": 0.4254, "step": 4590 }, { "epoch": 0.21068330962323895, "grad_norm": 0.4628497064113617, "learning_rate": 9.935345068531182e-06, "loss": 0.3915, "step": 4591 }, { "epoch": 0.21072920012849342, "grad_norm": 0.5278543829917908, "learning_rate": 9.935305759991218e-06, "loss": 0.4866, "step": 4592 }, { "epoch": 0.21077509063374789, "grad_norm": 0.5155585408210754, "learning_rate": 9.9352664395834e-06, "loss": 0.516, "step": 4593 }, { "epoch": 0.21082098113900233, "grad_norm": 0.4477238655090332, "learning_rate": 9.935227107307824e-06, "loss": 0.4218, "step": 4594 }, { "epoch": 0.2108668716442568, "grad_norm": 0.4337116479873657, "learning_rate": 9.935187763164585e-06, "loss": 0.3589, "step": 4595 }, { "epoch": 0.21091276214951127, "grad_norm": 0.44227463006973267, "learning_rate": 9.935148407153776e-06, "loss": 0.3419, "step": 4596 }, { "epoch": 0.21095865265476574, "grad_norm": 0.49109065532684326, "learning_rate": 9.935109039275495e-06, "loss": 0.4522, "step": 4597 }, { "epoch": 0.21100454316002018, "grad_norm": 0.43904322385787964, "learning_rate": 9.935069659529832e-06, "loss": 0.4285, "step": 4598 }, { "epoch": 0.21105043366527465, "grad_norm": 0.49477484822273254, "learning_rate": 9.935030267916885e-06, "loss": 0.4747, "step": 4599 }, { "epoch": 0.21109632417052912, "grad_norm": 0.5089412927627563, "learning_rate": 9.934990864436748e-06, "loss": 0.5179, "step": 4600 }, { "epoch": 0.2111422146757836, "grad_norm": 0.44941091537475586, "learning_rate": 9.934951449089515e-06, "loss": 0.3687, "step": 4601 }, { "epoch": 0.21118810518103803, "grad_norm": 0.4486393630504608, "learning_rate": 9.93491202187528e-06, "loss": 0.3638, "step": 4602 }, { "epoch": 0.2112339956862925, "grad_norm": 0.45564234256744385, "learning_rate": 9.934872582794142e-06, "loss": 0.3335, "step": 4603 }, { "epoch": 0.21127988619154697, "grad_norm": 0.4690791964530945, "learning_rate": 9.934833131846191e-06, "loss": 0.3744, "step": 4604 }, { "epoch": 0.21132577669680144, "grad_norm": 0.5168153643608093, "learning_rate": 9.934793669031524e-06, "loss": 0.4792, "step": 4605 }, { "epoch": 0.21137166720205589, "grad_norm": 0.41905689239501953, "learning_rate": 9.934754194350235e-06, "loss": 0.3344, "step": 4606 }, { "epoch": 0.21141755770731036, "grad_norm": 0.5103574991226196, "learning_rate": 9.934714707802421e-06, "loss": 0.4452, "step": 4607 }, { "epoch": 0.21146344821256483, "grad_norm": 0.43421080708503723, "learning_rate": 9.934675209388174e-06, "loss": 0.2992, "step": 4608 }, { "epoch": 0.2115093387178193, "grad_norm": 0.4528566598892212, "learning_rate": 9.934635699107593e-06, "loss": 0.3869, "step": 4609 }, { "epoch": 0.21155522922307374, "grad_norm": 0.4664683938026428, "learning_rate": 9.934596176960769e-06, "loss": 0.3571, "step": 4610 }, { "epoch": 0.2116011197283282, "grad_norm": 0.4770258367061615, "learning_rate": 9.934556642947798e-06, "loss": 0.4233, "step": 4611 }, { "epoch": 0.21164701023358268, "grad_norm": 0.465563029050827, "learning_rate": 9.934517097068777e-06, "loss": 0.415, "step": 4612 }, { "epoch": 0.21169290073883715, "grad_norm": 0.4937037527561188, "learning_rate": 9.934477539323799e-06, "loss": 0.4737, "step": 4613 }, { "epoch": 0.2117387912440916, "grad_norm": 0.47488823533058167, "learning_rate": 9.93443796971296e-06, "loss": 0.4374, "step": 4614 }, { "epoch": 0.21178468174934606, "grad_norm": 0.512753427028656, "learning_rate": 9.934398388236354e-06, "loss": 0.4745, "step": 4615 }, { "epoch": 0.21183057225460053, "grad_norm": 0.4905538260936737, "learning_rate": 9.93435879489408e-06, "loss": 0.4449, "step": 4616 }, { "epoch": 0.21187646275985497, "grad_norm": 0.4729050397872925, "learning_rate": 9.934319189686228e-06, "loss": 0.3873, "step": 4617 }, { "epoch": 0.21192235326510944, "grad_norm": 0.4916042983531952, "learning_rate": 9.934279572612895e-06, "loss": 0.4925, "step": 4618 }, { "epoch": 0.2119682437703639, "grad_norm": 0.47090786695480347, "learning_rate": 9.93423994367418e-06, "loss": 0.3606, "step": 4619 }, { "epoch": 0.21201413427561838, "grad_norm": 0.45816871523857117, "learning_rate": 9.934200302870173e-06, "loss": 0.3507, "step": 4620 }, { "epoch": 0.21206002478087282, "grad_norm": 0.4796963632106781, "learning_rate": 9.934160650200972e-06, "loss": 0.4552, "step": 4621 }, { "epoch": 0.2121059152861273, "grad_norm": 0.42053475975990295, "learning_rate": 9.934120985666672e-06, "loss": 0.3078, "step": 4622 }, { "epoch": 0.21215180579138176, "grad_norm": 0.4671541452407837, "learning_rate": 9.934081309267369e-06, "loss": 0.3931, "step": 4623 }, { "epoch": 0.21219769629663623, "grad_norm": 0.48593783378601074, "learning_rate": 9.934041621003156e-06, "loss": 0.3703, "step": 4624 }, { "epoch": 0.21224358680189068, "grad_norm": 0.5138500332832336, "learning_rate": 9.934001920874132e-06, "loss": 0.5261, "step": 4625 }, { "epoch": 0.21228947730714515, "grad_norm": 0.5050431489944458, "learning_rate": 9.93396220888039e-06, "loss": 0.4324, "step": 4626 }, { "epoch": 0.21233536781239962, "grad_norm": 0.4766155481338501, "learning_rate": 9.933922485022025e-06, "loss": 0.4238, "step": 4627 }, { "epoch": 0.2123812583176541, "grad_norm": 0.467413991689682, "learning_rate": 9.933882749299134e-06, "loss": 0.3753, "step": 4628 }, { "epoch": 0.21242714882290853, "grad_norm": 0.4811027944087982, "learning_rate": 9.933843001711812e-06, "loss": 0.4998, "step": 4629 }, { "epoch": 0.212473039328163, "grad_norm": 0.44246906042099, "learning_rate": 9.933803242260155e-06, "loss": 0.3488, "step": 4630 }, { "epoch": 0.21251892983341747, "grad_norm": 0.4997427463531494, "learning_rate": 9.933763470944259e-06, "loss": 0.4519, "step": 4631 }, { "epoch": 0.21256482033867194, "grad_norm": 0.46644890308380127, "learning_rate": 9.933723687764218e-06, "loss": 0.3926, "step": 4632 }, { "epoch": 0.21261071084392638, "grad_norm": 0.48944222927093506, "learning_rate": 9.93368389272013e-06, "loss": 0.3765, "step": 4633 }, { "epoch": 0.21265660134918085, "grad_norm": 0.4626849591732025, "learning_rate": 9.93364408581209e-06, "loss": 0.3711, "step": 4634 }, { "epoch": 0.21270249185443532, "grad_norm": 0.6008056998252869, "learning_rate": 9.933604267040191e-06, "loss": 0.5543, "step": 4635 }, { "epoch": 0.2127483823596898, "grad_norm": 0.4939439296722412, "learning_rate": 9.933564436404531e-06, "loss": 0.5149, "step": 4636 }, { "epoch": 0.21279427286494423, "grad_norm": 0.4714864194393158, "learning_rate": 9.933524593905206e-06, "loss": 0.3824, "step": 4637 }, { "epoch": 0.2128401633701987, "grad_norm": 0.4945215880870819, "learning_rate": 9.933484739542312e-06, "loss": 0.4661, "step": 4638 }, { "epoch": 0.21288605387545317, "grad_norm": 0.47502630949020386, "learning_rate": 9.933444873315943e-06, "loss": 0.3845, "step": 4639 }, { "epoch": 0.21293194438070764, "grad_norm": 0.4932652413845062, "learning_rate": 9.933404995226198e-06, "loss": 0.3947, "step": 4640 }, { "epoch": 0.2129778348859621, "grad_norm": 0.5257059335708618, "learning_rate": 9.93336510527317e-06, "loss": 0.5189, "step": 4641 }, { "epoch": 0.21302372539121656, "grad_norm": 0.4927190840244293, "learning_rate": 9.933325203456957e-06, "loss": 0.4563, "step": 4642 }, { "epoch": 0.21306961589647103, "grad_norm": 0.4890120327472687, "learning_rate": 9.933285289777651e-06, "loss": 0.3684, "step": 4643 }, { "epoch": 0.2131155064017255, "grad_norm": 0.47681906819343567, "learning_rate": 9.933245364235353e-06, "loss": 0.4273, "step": 4644 }, { "epoch": 0.21316139690697994, "grad_norm": 0.4440349042415619, "learning_rate": 9.933205426830157e-06, "loss": 0.3612, "step": 4645 }, { "epoch": 0.2132072874122344, "grad_norm": 0.46425560116767883, "learning_rate": 9.933165477562159e-06, "loss": 0.3829, "step": 4646 }, { "epoch": 0.21325317791748888, "grad_norm": 0.42245426774024963, "learning_rate": 9.933125516431454e-06, "loss": 0.3467, "step": 4647 }, { "epoch": 0.21329906842274332, "grad_norm": 0.46113356947898865, "learning_rate": 9.933085543438138e-06, "loss": 0.3626, "step": 4648 }, { "epoch": 0.2133449589279978, "grad_norm": 0.4818299412727356, "learning_rate": 9.93304555858231e-06, "loss": 0.4237, "step": 4649 }, { "epoch": 0.21339084943325226, "grad_norm": 0.47459596395492554, "learning_rate": 9.933005561864064e-06, "loss": 0.4305, "step": 4650 }, { "epoch": 0.21343673993850673, "grad_norm": 0.49322208762168884, "learning_rate": 9.932965553283497e-06, "loss": 0.4904, "step": 4651 }, { "epoch": 0.21348263044376117, "grad_norm": 0.4633965492248535, "learning_rate": 9.932925532840705e-06, "loss": 0.433, "step": 4652 }, { "epoch": 0.21352852094901564, "grad_norm": 0.5234594345092773, "learning_rate": 9.932885500535782e-06, "loss": 0.4421, "step": 4653 }, { "epoch": 0.2135744114542701, "grad_norm": 0.4683118760585785, "learning_rate": 9.93284545636883e-06, "loss": 0.3587, "step": 4654 }, { "epoch": 0.21362030195952458, "grad_norm": 0.4599636495113373, "learning_rate": 9.932805400339937e-06, "loss": 0.3466, "step": 4655 }, { "epoch": 0.21366619246477903, "grad_norm": 0.44249051809310913, "learning_rate": 9.932765332449205e-06, "loss": 0.3813, "step": 4656 }, { "epoch": 0.2137120829700335, "grad_norm": 0.4925571382045746, "learning_rate": 9.93272525269673e-06, "loss": 0.4616, "step": 4657 }, { "epoch": 0.21375797347528797, "grad_norm": 0.49963822960853577, "learning_rate": 9.93268516108261e-06, "loss": 0.4401, "step": 4658 }, { "epoch": 0.21380386398054244, "grad_norm": 0.49291783571243286, "learning_rate": 9.932645057606936e-06, "loss": 0.4371, "step": 4659 }, { "epoch": 0.21384975448579688, "grad_norm": 0.4303255081176758, "learning_rate": 9.932604942269808e-06, "loss": 0.344, "step": 4660 }, { "epoch": 0.21389564499105135, "grad_norm": 0.5120735168457031, "learning_rate": 9.932564815071321e-06, "loss": 0.4922, "step": 4661 }, { "epoch": 0.21394153549630582, "grad_norm": 0.42673927545547485, "learning_rate": 9.932524676011575e-06, "loss": 0.3694, "step": 4662 }, { "epoch": 0.2139874260015603, "grad_norm": 0.4549776315689087, "learning_rate": 9.932484525090662e-06, "loss": 0.3945, "step": 4663 }, { "epoch": 0.21403331650681473, "grad_norm": 0.45083972811698914, "learning_rate": 9.932444362308679e-06, "loss": 0.3774, "step": 4664 }, { "epoch": 0.2140792070120692, "grad_norm": 0.46399959921836853, "learning_rate": 9.932404187665727e-06, "loss": 0.4022, "step": 4665 }, { "epoch": 0.21412509751732367, "grad_norm": 0.4504632353782654, "learning_rate": 9.932364001161899e-06, "loss": 0.3848, "step": 4666 }, { "epoch": 0.21417098802257814, "grad_norm": 0.48397886753082275, "learning_rate": 9.93232380279729e-06, "loss": 0.4029, "step": 4667 }, { "epoch": 0.21421687852783258, "grad_norm": 0.4804795980453491, "learning_rate": 9.932283592572e-06, "loss": 0.3998, "step": 4668 }, { "epoch": 0.21426276903308705, "grad_norm": 0.45309773087501526, "learning_rate": 9.932243370486127e-06, "loss": 0.4238, "step": 4669 }, { "epoch": 0.21430865953834152, "grad_norm": 0.5145482420921326, "learning_rate": 9.932203136539762e-06, "loss": 0.4925, "step": 4670 }, { "epoch": 0.214354550043596, "grad_norm": 0.48443666100502014, "learning_rate": 9.932162890733007e-06, "loss": 0.3905, "step": 4671 }, { "epoch": 0.21440044054885044, "grad_norm": 0.4662611484527588, "learning_rate": 9.932122633065956e-06, "loss": 0.3916, "step": 4672 }, { "epoch": 0.2144463310541049, "grad_norm": 0.4925994575023651, "learning_rate": 9.93208236353871e-06, "loss": 0.44, "step": 4673 }, { "epoch": 0.21449222155935938, "grad_norm": 0.5234211087226868, "learning_rate": 9.932042082151358e-06, "loss": 0.5185, "step": 4674 }, { "epoch": 0.21453811206461385, "grad_norm": 0.4390690326690674, "learning_rate": 9.932001788904004e-06, "loss": 0.3463, "step": 4675 }, { "epoch": 0.2145840025698683, "grad_norm": 0.47881796956062317, "learning_rate": 9.931961483796741e-06, "loss": 0.4358, "step": 4676 }, { "epoch": 0.21462989307512276, "grad_norm": 0.5072889924049377, "learning_rate": 9.931921166829668e-06, "loss": 0.4383, "step": 4677 }, { "epoch": 0.21467578358037723, "grad_norm": 0.48179998993873596, "learning_rate": 9.93188083800288e-06, "loss": 0.3891, "step": 4678 }, { "epoch": 0.21472167408563167, "grad_norm": 0.47286829352378845, "learning_rate": 9.931840497316475e-06, "loss": 0.4204, "step": 4679 }, { "epoch": 0.21476756459088614, "grad_norm": 0.44967126846313477, "learning_rate": 9.931800144770552e-06, "loss": 0.3522, "step": 4680 }, { "epoch": 0.2148134550961406, "grad_norm": 0.502183198928833, "learning_rate": 9.931759780365206e-06, "loss": 0.4726, "step": 4681 }, { "epoch": 0.21485934560139508, "grad_norm": 0.4539750814437866, "learning_rate": 9.931719404100532e-06, "loss": 0.4267, "step": 4682 }, { "epoch": 0.21490523610664952, "grad_norm": 0.47905218601226807, "learning_rate": 9.931679015976631e-06, "loss": 0.4516, "step": 4683 }, { "epoch": 0.214951126611904, "grad_norm": 0.47632932662963867, "learning_rate": 9.9316386159936e-06, "loss": 0.3734, "step": 4684 }, { "epoch": 0.21499701711715846, "grad_norm": 0.41714102029800415, "learning_rate": 9.931598204151533e-06, "loss": 0.3123, "step": 4685 }, { "epoch": 0.21504290762241293, "grad_norm": 0.4931911528110504, "learning_rate": 9.931557780450529e-06, "loss": 0.4126, "step": 4686 }, { "epoch": 0.21508879812766737, "grad_norm": 0.626720666885376, "learning_rate": 9.931517344890684e-06, "loss": 0.4795, "step": 4687 }, { "epoch": 0.21513468863292184, "grad_norm": 0.44990983605384827, "learning_rate": 9.931476897472096e-06, "loss": 0.367, "step": 4688 }, { "epoch": 0.21518057913817631, "grad_norm": 0.5085331797599792, "learning_rate": 9.931436438194863e-06, "loss": 0.4798, "step": 4689 }, { "epoch": 0.21522646964343078, "grad_norm": 0.4893498420715332, "learning_rate": 9.931395967059084e-06, "loss": 0.4724, "step": 4690 }, { "epoch": 0.21527236014868523, "grad_norm": 0.47430935502052307, "learning_rate": 9.93135548406485e-06, "loss": 0.4613, "step": 4691 }, { "epoch": 0.2153182506539397, "grad_norm": 0.44587236642837524, "learning_rate": 9.931314989212267e-06, "loss": 0.3774, "step": 4692 }, { "epoch": 0.21536414115919417, "grad_norm": 0.44930770993232727, "learning_rate": 9.931274482501424e-06, "loss": 0.3579, "step": 4693 }, { "epoch": 0.21541003166444864, "grad_norm": 0.4720441997051239, "learning_rate": 9.931233963932423e-06, "loss": 0.3846, "step": 4694 }, { "epoch": 0.21545592216970308, "grad_norm": 0.48503828048706055, "learning_rate": 9.931193433505362e-06, "loss": 0.4111, "step": 4695 }, { "epoch": 0.21550181267495755, "grad_norm": 0.5085614919662476, "learning_rate": 9.931152891220336e-06, "loss": 0.5087, "step": 4696 }, { "epoch": 0.21554770318021202, "grad_norm": 0.4879940152168274, "learning_rate": 9.931112337077444e-06, "loss": 0.4084, "step": 4697 }, { "epoch": 0.2155935936854665, "grad_norm": 0.48777949810028076, "learning_rate": 9.931071771076783e-06, "loss": 0.396, "step": 4698 }, { "epoch": 0.21563948419072093, "grad_norm": 0.4808366894721985, "learning_rate": 9.931031193218453e-06, "loss": 0.4375, "step": 4699 }, { "epoch": 0.2156853746959754, "grad_norm": 0.4755386412143707, "learning_rate": 9.930990603502547e-06, "loss": 0.4587, "step": 4700 }, { "epoch": 0.21573126520122987, "grad_norm": 0.4220426678657532, "learning_rate": 9.930950001929163e-06, "loss": 0.3619, "step": 4701 }, { "epoch": 0.21577715570648434, "grad_norm": 0.41686415672302246, "learning_rate": 9.930909388498403e-06, "loss": 0.3155, "step": 4702 }, { "epoch": 0.21582304621173878, "grad_norm": 0.46974438428878784, "learning_rate": 9.930868763210361e-06, "loss": 0.3552, "step": 4703 }, { "epoch": 0.21586893671699325, "grad_norm": 0.42856499552726746, "learning_rate": 9.930828126065138e-06, "loss": 0.3378, "step": 4704 }, { "epoch": 0.21591482722224772, "grad_norm": 0.44362497329711914, "learning_rate": 9.930787477062828e-06, "loss": 0.3648, "step": 4705 }, { "epoch": 0.21596071772750217, "grad_norm": 0.4723816215991974, "learning_rate": 9.930746816203529e-06, "loss": 0.3962, "step": 4706 }, { "epoch": 0.21600660823275664, "grad_norm": 0.4279741942882538, "learning_rate": 9.930706143487343e-06, "loss": 0.315, "step": 4707 }, { "epoch": 0.2160524987380111, "grad_norm": 0.504045307636261, "learning_rate": 9.930665458914362e-06, "loss": 0.4924, "step": 4708 }, { "epoch": 0.21609838924326558, "grad_norm": 0.45611247420310974, "learning_rate": 9.930624762484688e-06, "loss": 0.4316, "step": 4709 }, { "epoch": 0.21614427974852002, "grad_norm": 0.49936166405677795, "learning_rate": 9.930584054198419e-06, "loss": 0.4028, "step": 4710 }, { "epoch": 0.2161901702537745, "grad_norm": 0.46898287534713745, "learning_rate": 9.93054333405565e-06, "loss": 0.4093, "step": 4711 }, { "epoch": 0.21623606075902896, "grad_norm": 0.47403737902641296, "learning_rate": 9.93050260205648e-06, "loss": 0.3809, "step": 4712 }, { "epoch": 0.21628195126428343, "grad_norm": 0.5165886282920837, "learning_rate": 9.930461858201008e-06, "loss": 0.4791, "step": 4713 }, { "epoch": 0.21632784176953787, "grad_norm": 0.47071903944015503, "learning_rate": 9.930421102489331e-06, "loss": 0.4067, "step": 4714 }, { "epoch": 0.21637373227479234, "grad_norm": 0.4974483847618103, "learning_rate": 9.930380334921548e-06, "loss": 0.4711, "step": 4715 }, { "epoch": 0.2164196227800468, "grad_norm": 0.48442625999450684, "learning_rate": 9.930339555497756e-06, "loss": 0.4497, "step": 4716 }, { "epoch": 0.21646551328530128, "grad_norm": 0.4645082950592041, "learning_rate": 9.930298764218053e-06, "loss": 0.3787, "step": 4717 }, { "epoch": 0.21651140379055572, "grad_norm": 0.5022499561309814, "learning_rate": 9.930257961082538e-06, "loss": 0.4822, "step": 4718 }, { "epoch": 0.2165572942958102, "grad_norm": 0.4625333249568939, "learning_rate": 9.93021714609131e-06, "loss": 0.4244, "step": 4719 }, { "epoch": 0.21660318480106466, "grad_norm": 0.45841649174690247, "learning_rate": 9.930176319244464e-06, "loss": 0.3745, "step": 4720 }, { "epoch": 0.21664907530631913, "grad_norm": 0.4419494867324829, "learning_rate": 9.9301354805421e-06, "loss": 0.3729, "step": 4721 }, { "epoch": 0.21669496581157358, "grad_norm": 0.4620323181152344, "learning_rate": 9.930094629984317e-06, "loss": 0.3547, "step": 4722 }, { "epoch": 0.21674085631682805, "grad_norm": 0.49037355184555054, "learning_rate": 9.930053767571212e-06, "loss": 0.3814, "step": 4723 }, { "epoch": 0.21678674682208252, "grad_norm": 0.44606703519821167, "learning_rate": 9.930012893302884e-06, "loss": 0.36, "step": 4724 }, { "epoch": 0.21683263732733699, "grad_norm": 0.4860285222530365, "learning_rate": 9.929972007179429e-06, "loss": 0.448, "step": 4725 }, { "epoch": 0.21687852783259143, "grad_norm": 0.4884079694747925, "learning_rate": 9.92993110920095e-06, "loss": 0.4032, "step": 4726 }, { "epoch": 0.2169244183378459, "grad_norm": 0.4397047460079193, "learning_rate": 9.92989019936754e-06, "loss": 0.3398, "step": 4727 }, { "epoch": 0.21697030884310037, "grad_norm": 0.4717492461204529, "learning_rate": 9.929849277679303e-06, "loss": 0.4437, "step": 4728 }, { "epoch": 0.21701619934835484, "grad_norm": 0.45874541997909546, "learning_rate": 9.929808344136332e-06, "loss": 0.4141, "step": 4729 }, { "epoch": 0.21706208985360928, "grad_norm": 0.44129058718681335, "learning_rate": 9.929767398738728e-06, "loss": 0.3837, "step": 4730 }, { "epoch": 0.21710798035886375, "grad_norm": 0.5274844169616699, "learning_rate": 9.92972644148659e-06, "loss": 0.5665, "step": 4731 }, { "epoch": 0.21715387086411822, "grad_norm": 0.47592389583587646, "learning_rate": 9.929685472380015e-06, "loss": 0.4163, "step": 4732 }, { "epoch": 0.2171997613693727, "grad_norm": 0.4594455361366272, "learning_rate": 9.929644491419103e-06, "loss": 0.3405, "step": 4733 }, { "epoch": 0.21724565187462713, "grad_norm": 0.4659575819969177, "learning_rate": 9.929603498603953e-06, "loss": 0.3718, "step": 4734 }, { "epoch": 0.2172915423798816, "grad_norm": 0.47570160031318665, "learning_rate": 9.929562493934661e-06, "loss": 0.3864, "step": 4735 }, { "epoch": 0.21733743288513607, "grad_norm": 0.423115611076355, "learning_rate": 9.929521477411326e-06, "loss": 0.328, "step": 4736 }, { "epoch": 0.21738332339039051, "grad_norm": 0.46982258558273315, "learning_rate": 9.92948044903405e-06, "loss": 0.4263, "step": 4737 }, { "epoch": 0.21742921389564498, "grad_norm": 0.5065194368362427, "learning_rate": 9.929439408802927e-06, "loss": 0.4478, "step": 4738 }, { "epoch": 0.21747510440089945, "grad_norm": 0.5010607838630676, "learning_rate": 9.929398356718061e-06, "loss": 0.5078, "step": 4739 }, { "epoch": 0.21752099490615392, "grad_norm": 0.45388680696487427, "learning_rate": 9.929357292779545e-06, "loss": 0.408, "step": 4740 }, { "epoch": 0.21756688541140837, "grad_norm": 0.4765114188194275, "learning_rate": 9.929316216987482e-06, "loss": 0.3863, "step": 4741 }, { "epoch": 0.21761277591666284, "grad_norm": 0.46761444211006165, "learning_rate": 9.929275129341968e-06, "loss": 0.355, "step": 4742 }, { "epoch": 0.2176586664219173, "grad_norm": 0.49700087308883667, "learning_rate": 9.929234029843103e-06, "loss": 0.3955, "step": 4743 }, { "epoch": 0.21770455692717178, "grad_norm": 0.43094325065612793, "learning_rate": 9.929192918490986e-06, "loss": 0.3257, "step": 4744 }, { "epoch": 0.21775044743242622, "grad_norm": 0.48548972606658936, "learning_rate": 9.929151795285718e-06, "loss": 0.4836, "step": 4745 }, { "epoch": 0.2177963379376807, "grad_norm": 0.45151907205581665, "learning_rate": 9.929110660227392e-06, "loss": 0.3847, "step": 4746 }, { "epoch": 0.21784222844293516, "grad_norm": 0.4795612096786499, "learning_rate": 9.929069513316115e-06, "loss": 0.4237, "step": 4747 }, { "epoch": 0.21788811894818963, "grad_norm": 0.465311199426651, "learning_rate": 9.929028354551977e-06, "loss": 0.4508, "step": 4748 }, { "epoch": 0.21793400945344407, "grad_norm": 0.44016215205192566, "learning_rate": 9.928987183935084e-06, "loss": 0.3319, "step": 4749 }, { "epoch": 0.21797989995869854, "grad_norm": 0.49470531940460205, "learning_rate": 9.928946001465531e-06, "loss": 0.4994, "step": 4750 }, { "epoch": 0.218025790463953, "grad_norm": 0.5167783498764038, "learning_rate": 9.92890480714342e-06, "loss": 0.4838, "step": 4751 }, { "epoch": 0.21807168096920748, "grad_norm": 0.5040650963783264, "learning_rate": 9.928863600968847e-06, "loss": 0.4089, "step": 4752 }, { "epoch": 0.21811757147446192, "grad_norm": 0.43722257018089294, "learning_rate": 9.928822382941914e-06, "loss": 0.3165, "step": 4753 }, { "epoch": 0.2181634619797164, "grad_norm": 0.45732051134109497, "learning_rate": 9.928781153062718e-06, "loss": 0.3594, "step": 4754 }, { "epoch": 0.21820935248497086, "grad_norm": 0.4525343179702759, "learning_rate": 9.92873991133136e-06, "loss": 0.3525, "step": 4755 }, { "epoch": 0.21825524299022533, "grad_norm": 0.4378473460674286, "learning_rate": 9.928698657747936e-06, "loss": 0.3292, "step": 4756 }, { "epoch": 0.21830113349547978, "grad_norm": 0.5118082761764526, "learning_rate": 9.928657392312548e-06, "loss": 0.4048, "step": 4757 }, { "epoch": 0.21834702400073425, "grad_norm": 0.4829464852809906, "learning_rate": 9.928616115025296e-06, "loss": 0.3909, "step": 4758 }, { "epoch": 0.21839291450598872, "grad_norm": 0.5080723762512207, "learning_rate": 9.928574825886277e-06, "loss": 0.4956, "step": 4759 }, { "epoch": 0.2184388050112432, "grad_norm": 0.473085880279541, "learning_rate": 9.928533524895589e-06, "loss": 0.4321, "step": 4760 }, { "epoch": 0.21848469551649763, "grad_norm": 0.450904905796051, "learning_rate": 9.928492212053336e-06, "loss": 0.3879, "step": 4761 }, { "epoch": 0.2185305860217521, "grad_norm": 0.4535611569881439, "learning_rate": 9.928450887359613e-06, "loss": 0.3482, "step": 4762 }, { "epoch": 0.21857647652700657, "grad_norm": 0.5056604146957397, "learning_rate": 9.928409550814524e-06, "loss": 0.4549, "step": 4763 }, { "epoch": 0.21862236703226104, "grad_norm": 0.5012725591659546, "learning_rate": 9.92836820241816e-06, "loss": 0.4525, "step": 4764 }, { "epoch": 0.21866825753751548, "grad_norm": 0.5008786916732788, "learning_rate": 9.928326842170631e-06, "loss": 0.5025, "step": 4765 }, { "epoch": 0.21871414804276995, "grad_norm": 0.528501033782959, "learning_rate": 9.92828547007203e-06, "loss": 0.4971, "step": 4766 }, { "epoch": 0.21876003854802442, "grad_norm": 0.4721131920814514, "learning_rate": 9.928244086122457e-06, "loss": 0.4287, "step": 4767 }, { "epoch": 0.21880592905327886, "grad_norm": 0.4517940282821655, "learning_rate": 9.928202690322013e-06, "loss": 0.357, "step": 4768 }, { "epoch": 0.21885181955853333, "grad_norm": 0.4652464985847473, "learning_rate": 9.928161282670796e-06, "loss": 0.3721, "step": 4769 }, { "epoch": 0.2188977100637878, "grad_norm": 0.47898006439208984, "learning_rate": 9.928119863168907e-06, "loss": 0.4176, "step": 4770 }, { "epoch": 0.21894360056904227, "grad_norm": 0.4960164725780487, "learning_rate": 9.928078431816445e-06, "loss": 0.4252, "step": 4771 }, { "epoch": 0.21898949107429672, "grad_norm": 0.5123987793922424, "learning_rate": 9.928036988613512e-06, "loss": 0.4848, "step": 4772 }, { "epoch": 0.21903538157955119, "grad_norm": 0.5303327441215515, "learning_rate": 9.927995533560202e-06, "loss": 0.5233, "step": 4773 }, { "epoch": 0.21908127208480566, "grad_norm": 0.4559354782104492, "learning_rate": 9.92795406665662e-06, "loss": 0.3632, "step": 4774 }, { "epoch": 0.21912716259006013, "grad_norm": 0.4832862317562103, "learning_rate": 9.927912587902863e-06, "loss": 0.4975, "step": 4775 }, { "epoch": 0.21917305309531457, "grad_norm": 0.8387763500213623, "learning_rate": 9.927871097299033e-06, "loss": 0.4251, "step": 4776 }, { "epoch": 0.21921894360056904, "grad_norm": 0.45203593373298645, "learning_rate": 9.927829594845226e-06, "loss": 0.3637, "step": 4777 }, { "epoch": 0.2192648341058235, "grad_norm": 0.4715596139431, "learning_rate": 9.927788080541546e-06, "loss": 0.4197, "step": 4778 }, { "epoch": 0.21931072461107798, "grad_norm": 0.44730404019355774, "learning_rate": 9.927746554388091e-06, "loss": 0.3367, "step": 4779 }, { "epoch": 0.21935661511633242, "grad_norm": 0.4531867504119873, "learning_rate": 9.927705016384958e-06, "loss": 0.3688, "step": 4780 }, { "epoch": 0.2194025056215869, "grad_norm": 0.43707215785980225, "learning_rate": 9.927663466532253e-06, "loss": 0.3331, "step": 4781 }, { "epoch": 0.21944839612684136, "grad_norm": 0.4890100359916687, "learning_rate": 9.92762190483007e-06, "loss": 0.4518, "step": 4782 }, { "epoch": 0.21949428663209583, "grad_norm": 0.4831465482711792, "learning_rate": 9.927580331278512e-06, "loss": 0.4518, "step": 4783 }, { "epoch": 0.21954017713735027, "grad_norm": 0.46450120210647583, "learning_rate": 9.92753874587768e-06, "loss": 0.4191, "step": 4784 }, { "epoch": 0.21958606764260474, "grad_norm": 0.4956110119819641, "learning_rate": 9.927497148627671e-06, "loss": 0.4453, "step": 4785 }, { "epoch": 0.2196319581478592, "grad_norm": 0.45824500918388367, "learning_rate": 9.927455539528588e-06, "loss": 0.3729, "step": 4786 }, { "epoch": 0.21967784865311368, "grad_norm": 0.4803895354270935, "learning_rate": 9.92741391858053e-06, "loss": 0.4795, "step": 4787 }, { "epoch": 0.21972373915836813, "grad_norm": 0.4855837821960449, "learning_rate": 9.927372285783595e-06, "loss": 0.51, "step": 4788 }, { "epoch": 0.2197696296636226, "grad_norm": 0.41429024934768677, "learning_rate": 9.927330641137884e-06, "loss": 0.3029, "step": 4789 }, { "epoch": 0.21981552016887707, "grad_norm": 0.47005584836006165, "learning_rate": 9.9272889846435e-06, "loss": 0.4218, "step": 4790 }, { "epoch": 0.21986141067413154, "grad_norm": 0.4601205885410309, "learning_rate": 9.92724731630054e-06, "loss": 0.393, "step": 4791 }, { "epoch": 0.21990730117938598, "grad_norm": 0.4288758933544159, "learning_rate": 9.927205636109105e-06, "loss": 0.3036, "step": 4792 }, { "epoch": 0.21995319168464045, "grad_norm": 0.4515363574028015, "learning_rate": 9.927163944069295e-06, "loss": 0.3551, "step": 4793 }, { "epoch": 0.21999908218989492, "grad_norm": 0.4707154333591461, "learning_rate": 9.927122240181212e-06, "loss": 0.3927, "step": 4794 }, { "epoch": 0.22004497269514936, "grad_norm": 0.44049072265625, "learning_rate": 9.927080524444956e-06, "loss": 0.4036, "step": 4795 }, { "epoch": 0.22009086320040383, "grad_norm": 0.5246038436889648, "learning_rate": 9.927038796860623e-06, "loss": 0.4199, "step": 4796 }, { "epoch": 0.2201367537056583, "grad_norm": 0.45726901292800903, "learning_rate": 9.92699705742832e-06, "loss": 0.3726, "step": 4797 }, { "epoch": 0.22018264421091277, "grad_norm": 0.46941837668418884, "learning_rate": 9.926955306148145e-06, "loss": 0.3908, "step": 4798 }, { "epoch": 0.2202285347161672, "grad_norm": 0.46446794271469116, "learning_rate": 9.926913543020194e-06, "loss": 0.4257, "step": 4799 }, { "epoch": 0.22027442522142168, "grad_norm": 0.4293504059314728, "learning_rate": 9.926871768044572e-06, "loss": 0.3154, "step": 4800 }, { "epoch": 0.22032031572667615, "grad_norm": 0.4713653028011322, "learning_rate": 9.92682998122138e-06, "loss": 0.4542, "step": 4801 }, { "epoch": 0.22036620623193062, "grad_norm": 0.4862630069255829, "learning_rate": 9.926788182550715e-06, "loss": 0.4627, "step": 4802 }, { "epoch": 0.22041209673718506, "grad_norm": 0.44805315136909485, "learning_rate": 9.92674637203268e-06, "loss": 0.3545, "step": 4803 }, { "epoch": 0.22045798724243953, "grad_norm": 0.4568629860877991, "learning_rate": 9.926704549667374e-06, "loss": 0.4017, "step": 4804 }, { "epoch": 0.220503877747694, "grad_norm": 0.5366540551185608, "learning_rate": 9.9266627154549e-06, "loss": 0.495, "step": 4805 }, { "epoch": 0.22054976825294847, "grad_norm": 0.4854573607444763, "learning_rate": 9.926620869395356e-06, "loss": 0.4367, "step": 4806 }, { "epoch": 0.22059565875820292, "grad_norm": 0.49729904532432556, "learning_rate": 9.926579011488843e-06, "loss": 0.5154, "step": 4807 }, { "epoch": 0.2206415492634574, "grad_norm": 0.482592910528183, "learning_rate": 9.926537141735463e-06, "loss": 0.3894, "step": 4808 }, { "epoch": 0.22068743976871186, "grad_norm": 1.0421732664108276, "learning_rate": 9.926495260135318e-06, "loss": 0.4382, "step": 4809 }, { "epoch": 0.22073333027396633, "grad_norm": 0.4814826250076294, "learning_rate": 9.926453366688505e-06, "loss": 0.421, "step": 4810 }, { "epoch": 0.22077922077922077, "grad_norm": 0.5726569294929504, "learning_rate": 9.926411461395127e-06, "loss": 0.5295, "step": 4811 }, { "epoch": 0.22082511128447524, "grad_norm": 0.5161497592926025, "learning_rate": 9.926369544255283e-06, "loss": 0.5035, "step": 4812 }, { "epoch": 0.2208710017897297, "grad_norm": 0.46062567830085754, "learning_rate": 9.926327615269077e-06, "loss": 0.3891, "step": 4813 }, { "epoch": 0.22091689229498418, "grad_norm": 0.5229039192199707, "learning_rate": 9.926285674436607e-06, "loss": 0.4578, "step": 4814 }, { "epoch": 0.22096278280023862, "grad_norm": 0.5289216637611389, "learning_rate": 9.926243721757975e-06, "loss": 0.5183, "step": 4815 }, { "epoch": 0.2210086733054931, "grad_norm": 0.4917519986629486, "learning_rate": 9.92620175723328e-06, "loss": 0.3812, "step": 4816 }, { "epoch": 0.22105456381074756, "grad_norm": 0.4435589909553528, "learning_rate": 9.926159780862626e-06, "loss": 0.3763, "step": 4817 }, { "epoch": 0.22110045431600203, "grad_norm": 0.4811856746673584, "learning_rate": 9.926117792646113e-06, "loss": 0.4062, "step": 4818 }, { "epoch": 0.22114634482125647, "grad_norm": 1.2258802652359009, "learning_rate": 9.92607579258384e-06, "loss": 0.5963, "step": 4819 }, { "epoch": 0.22119223532651094, "grad_norm": 0.4955314099788666, "learning_rate": 9.92603378067591e-06, "loss": 0.4613, "step": 4820 }, { "epoch": 0.22123812583176541, "grad_norm": 0.4535846710205078, "learning_rate": 9.925991756922424e-06, "loss": 0.3626, "step": 4821 }, { "epoch": 0.22128401633701988, "grad_norm": 0.499987930059433, "learning_rate": 9.925949721323483e-06, "loss": 0.3775, "step": 4822 }, { "epoch": 0.22132990684227433, "grad_norm": 0.520010232925415, "learning_rate": 9.925907673879184e-06, "loss": 0.4285, "step": 4823 }, { "epoch": 0.2213757973475288, "grad_norm": 0.5386976599693298, "learning_rate": 9.925865614589636e-06, "loss": 0.5437, "step": 4824 }, { "epoch": 0.22142168785278327, "grad_norm": 0.5095916986465454, "learning_rate": 9.925823543454933e-06, "loss": 0.5072, "step": 4825 }, { "epoch": 0.2214675783580377, "grad_norm": 0.4769110381603241, "learning_rate": 9.925781460475181e-06, "loss": 0.411, "step": 4826 }, { "epoch": 0.22151346886329218, "grad_norm": 0.45860767364501953, "learning_rate": 9.925739365650478e-06, "loss": 0.4112, "step": 4827 }, { "epoch": 0.22155935936854665, "grad_norm": 0.4927826523780823, "learning_rate": 9.925697258980927e-06, "loss": 0.4174, "step": 4828 }, { "epoch": 0.22160524987380112, "grad_norm": 0.4682226777076721, "learning_rate": 9.925655140466629e-06, "loss": 0.3407, "step": 4829 }, { "epoch": 0.22165114037905556, "grad_norm": 0.49554941058158875, "learning_rate": 9.925613010107684e-06, "loss": 0.4167, "step": 4830 }, { "epoch": 0.22169703088431003, "grad_norm": 0.48000916838645935, "learning_rate": 9.925570867904195e-06, "loss": 0.3714, "step": 4831 }, { "epoch": 0.2217429213895645, "grad_norm": 0.5282401442527771, "learning_rate": 9.925528713856263e-06, "loss": 0.4281, "step": 4832 }, { "epoch": 0.22178881189481897, "grad_norm": 0.4732506573200226, "learning_rate": 9.925486547963988e-06, "loss": 0.4233, "step": 4833 }, { "epoch": 0.2218347024000734, "grad_norm": 0.4619845747947693, "learning_rate": 9.925444370227473e-06, "loss": 0.4208, "step": 4834 }, { "epoch": 0.22188059290532788, "grad_norm": 0.4698489308357239, "learning_rate": 9.925402180646819e-06, "loss": 0.3937, "step": 4835 }, { "epoch": 0.22192648341058235, "grad_norm": 0.43977537751197815, "learning_rate": 9.925359979222126e-06, "loss": 0.3232, "step": 4836 }, { "epoch": 0.22197237391583682, "grad_norm": 0.5040274262428284, "learning_rate": 9.925317765953498e-06, "loss": 0.4913, "step": 4837 }, { "epoch": 0.22201826442109127, "grad_norm": 0.44031843543052673, "learning_rate": 9.925275540841035e-06, "loss": 0.3401, "step": 4838 }, { "epoch": 0.22206415492634574, "grad_norm": 0.502856969833374, "learning_rate": 9.925233303884838e-06, "loss": 0.4747, "step": 4839 }, { "epoch": 0.2221100454316002, "grad_norm": 0.47573596239089966, "learning_rate": 9.92519105508501e-06, "loss": 0.3765, "step": 4840 }, { "epoch": 0.22215593593685468, "grad_norm": 0.5229244828224182, "learning_rate": 9.925148794441653e-06, "loss": 0.568, "step": 4841 }, { "epoch": 0.22220182644210912, "grad_norm": 0.7730299234390259, "learning_rate": 9.925106521954866e-06, "loss": 0.4092, "step": 4842 }, { "epoch": 0.2222477169473636, "grad_norm": 0.488257497549057, "learning_rate": 9.925064237624754e-06, "loss": 0.4716, "step": 4843 }, { "epoch": 0.22229360745261806, "grad_norm": 0.5342119932174683, "learning_rate": 9.925021941451413e-06, "loss": 0.4693, "step": 4844 }, { "epoch": 0.22233949795787253, "grad_norm": 0.4660462439060211, "learning_rate": 9.924979633434953e-06, "loss": 0.3616, "step": 4845 }, { "epoch": 0.22238538846312697, "grad_norm": 0.5627459287643433, "learning_rate": 9.92493731357547e-06, "loss": 0.4949, "step": 4846 }, { "epoch": 0.22243127896838144, "grad_norm": 0.5174509286880493, "learning_rate": 9.924894981873065e-06, "loss": 0.3901, "step": 4847 }, { "epoch": 0.2224771694736359, "grad_norm": 0.508909285068512, "learning_rate": 9.924852638327845e-06, "loss": 0.4304, "step": 4848 }, { "epoch": 0.22252305997889038, "grad_norm": 0.4349357485771179, "learning_rate": 9.924810282939907e-06, "loss": 0.3564, "step": 4849 }, { "epoch": 0.22256895048414482, "grad_norm": 0.5553564429283142, "learning_rate": 9.924767915709355e-06, "loss": 0.4513, "step": 4850 }, { "epoch": 0.2226148409893993, "grad_norm": 0.4676671624183655, "learning_rate": 9.924725536636289e-06, "loss": 0.3673, "step": 4851 }, { "epoch": 0.22266073149465376, "grad_norm": 0.45374375581741333, "learning_rate": 9.924683145720814e-06, "loss": 0.3632, "step": 4852 }, { "epoch": 0.2227066219999082, "grad_norm": 0.4324459433555603, "learning_rate": 9.92464074296303e-06, "loss": 0.3566, "step": 4853 }, { "epoch": 0.22275251250516268, "grad_norm": 0.46625950932502747, "learning_rate": 9.92459832836304e-06, "loss": 0.3997, "step": 4854 }, { "epoch": 0.22279840301041715, "grad_norm": 0.48104342818260193, "learning_rate": 9.924555901920945e-06, "loss": 0.4583, "step": 4855 }, { "epoch": 0.22284429351567162, "grad_norm": 0.48989519476890564, "learning_rate": 9.924513463636847e-06, "loss": 0.4017, "step": 4856 }, { "epoch": 0.22289018402092606, "grad_norm": 0.48691120743751526, "learning_rate": 9.924471013510848e-06, "loss": 0.335, "step": 4857 }, { "epoch": 0.22293607452618053, "grad_norm": 0.47887545824050903, "learning_rate": 9.92442855154305e-06, "loss": 0.433, "step": 4858 }, { "epoch": 0.222981965031435, "grad_norm": 0.44519808888435364, "learning_rate": 9.924386077733556e-06, "loss": 0.3675, "step": 4859 }, { "epoch": 0.22302785553668947, "grad_norm": 0.45259690284729004, "learning_rate": 9.924343592082468e-06, "loss": 0.3812, "step": 4860 }, { "epoch": 0.2230737460419439, "grad_norm": 0.4651898443698883, "learning_rate": 9.924301094589887e-06, "loss": 0.3783, "step": 4861 }, { "epoch": 0.22311963654719838, "grad_norm": 0.46688616275787354, "learning_rate": 9.924258585255917e-06, "loss": 0.3688, "step": 4862 }, { "epoch": 0.22316552705245285, "grad_norm": 0.48918551206588745, "learning_rate": 9.92421606408066e-06, "loss": 0.4354, "step": 4863 }, { "epoch": 0.22321141755770732, "grad_norm": 0.4666862189769745, "learning_rate": 9.924173531064217e-06, "loss": 0.4221, "step": 4864 }, { "epoch": 0.22325730806296176, "grad_norm": 0.47541213035583496, "learning_rate": 9.924130986206689e-06, "loss": 0.4155, "step": 4865 }, { "epoch": 0.22330319856821623, "grad_norm": 0.4850527346134186, "learning_rate": 9.92408842950818e-06, "loss": 0.4539, "step": 4866 }, { "epoch": 0.2233490890734707, "grad_norm": 0.4666447043418884, "learning_rate": 9.924045860968796e-06, "loss": 0.3746, "step": 4867 }, { "epoch": 0.22339497957872517, "grad_norm": 0.4348229765892029, "learning_rate": 9.924003280588632e-06, "loss": 0.3793, "step": 4868 }, { "epoch": 0.22344087008397961, "grad_norm": 0.4767991900444031, "learning_rate": 9.923960688367795e-06, "loss": 0.4597, "step": 4869 }, { "epoch": 0.22348676058923408, "grad_norm": 0.46462228894233704, "learning_rate": 9.923918084306388e-06, "loss": 0.3848, "step": 4870 }, { "epoch": 0.22353265109448855, "grad_norm": 0.4375239312648773, "learning_rate": 9.923875468404512e-06, "loss": 0.3367, "step": 4871 }, { "epoch": 0.22357854159974302, "grad_norm": 0.5722892880439758, "learning_rate": 9.923832840662269e-06, "loss": 0.5179, "step": 4872 }, { "epoch": 0.22362443210499747, "grad_norm": 0.48885923624038696, "learning_rate": 9.923790201079761e-06, "loss": 0.488, "step": 4873 }, { "epoch": 0.22367032261025194, "grad_norm": 0.5007164478302002, "learning_rate": 9.923747549657093e-06, "loss": 0.411, "step": 4874 }, { "epoch": 0.2237162131155064, "grad_norm": 0.4516223669052124, "learning_rate": 9.923704886394366e-06, "loss": 0.3612, "step": 4875 }, { "epoch": 0.22376210362076088, "grad_norm": 0.4912409782409668, "learning_rate": 9.923662211291682e-06, "loss": 0.4297, "step": 4876 }, { "epoch": 0.22380799412601532, "grad_norm": 0.4462112784385681, "learning_rate": 9.923619524349144e-06, "loss": 0.3832, "step": 4877 }, { "epoch": 0.2238538846312698, "grad_norm": 0.4445367753505707, "learning_rate": 9.923576825566856e-06, "loss": 0.3615, "step": 4878 }, { "epoch": 0.22389977513652426, "grad_norm": 0.4949045777320862, "learning_rate": 9.923534114944918e-06, "loss": 0.3948, "step": 4879 }, { "epoch": 0.22394566564177873, "grad_norm": 0.48120051622390747, "learning_rate": 9.923491392483436e-06, "loss": 0.3816, "step": 4880 }, { "epoch": 0.22399155614703317, "grad_norm": 0.44552674889564514, "learning_rate": 9.92344865818251e-06, "loss": 0.37, "step": 4881 }, { "epoch": 0.22403744665228764, "grad_norm": 0.4595223069190979, "learning_rate": 9.923405912042246e-06, "loss": 0.3798, "step": 4882 }, { "epoch": 0.2240833371575421, "grad_norm": 0.4784606993198395, "learning_rate": 9.923363154062741e-06, "loss": 0.4145, "step": 4883 }, { "epoch": 0.22412922766279655, "grad_norm": 0.4554690718650818, "learning_rate": 9.923320384244104e-06, "loss": 0.3452, "step": 4884 }, { "epoch": 0.22417511816805102, "grad_norm": 0.48759523034095764, "learning_rate": 9.923277602586436e-06, "loss": 0.5028, "step": 4885 }, { "epoch": 0.2242210086733055, "grad_norm": 0.4825500547885895, "learning_rate": 9.923234809089839e-06, "loss": 0.3734, "step": 4886 }, { "epoch": 0.22426689917855996, "grad_norm": 0.5292503237724304, "learning_rate": 9.923192003754414e-06, "loss": 0.5135, "step": 4887 }, { "epoch": 0.2243127896838144, "grad_norm": 0.4699976444244385, "learning_rate": 9.923149186580268e-06, "loss": 0.3411, "step": 4888 }, { "epoch": 0.22435868018906888, "grad_norm": 0.5008219480514526, "learning_rate": 9.9231063575675e-06, "loss": 0.437, "step": 4889 }, { "epoch": 0.22440457069432335, "grad_norm": 0.5333161354064941, "learning_rate": 9.923063516716217e-06, "loss": 0.4769, "step": 4890 }, { "epoch": 0.22445046119957782, "grad_norm": 0.47741106152534485, "learning_rate": 9.92302066402652e-06, "loss": 0.3862, "step": 4891 }, { "epoch": 0.22449635170483226, "grad_norm": 0.48897621035575867, "learning_rate": 9.92297779949851e-06, "loss": 0.4839, "step": 4892 }, { "epoch": 0.22454224221008673, "grad_norm": 0.48338815569877625, "learning_rate": 9.922934923132294e-06, "loss": 0.4277, "step": 4893 }, { "epoch": 0.2245881327153412, "grad_norm": 0.4981870949268341, "learning_rate": 9.922892034927974e-06, "loss": 0.4592, "step": 4894 }, { "epoch": 0.22463402322059567, "grad_norm": 0.5111919641494751, "learning_rate": 9.92284913488565e-06, "loss": 0.5358, "step": 4895 }, { "epoch": 0.2246799137258501, "grad_norm": 0.5054614543914795, "learning_rate": 9.922806223005427e-06, "loss": 0.4364, "step": 4896 }, { "epoch": 0.22472580423110458, "grad_norm": 0.5211411118507385, "learning_rate": 9.922763299287413e-06, "loss": 0.4487, "step": 4897 }, { "epoch": 0.22477169473635905, "grad_norm": 0.4872068464756012, "learning_rate": 9.922720363731704e-06, "loss": 0.4725, "step": 4898 }, { "epoch": 0.22481758524161352, "grad_norm": 0.45341187715530396, "learning_rate": 9.922677416338406e-06, "loss": 0.3843, "step": 4899 }, { "epoch": 0.22486347574686796, "grad_norm": 0.4295167624950409, "learning_rate": 9.922634457107622e-06, "loss": 0.3519, "step": 4900 }, { "epoch": 0.22490936625212243, "grad_norm": 0.4651036560535431, "learning_rate": 9.922591486039456e-06, "loss": 0.3863, "step": 4901 }, { "epoch": 0.2249552567573769, "grad_norm": 0.48001551628112793, "learning_rate": 9.922548503134013e-06, "loss": 0.4453, "step": 4902 }, { "epoch": 0.22500114726263137, "grad_norm": 0.4280970096588135, "learning_rate": 9.922505508391393e-06, "loss": 0.3189, "step": 4903 }, { "epoch": 0.22504703776788582, "grad_norm": 0.44235193729400635, "learning_rate": 9.922462501811701e-06, "loss": 0.3418, "step": 4904 }, { "epoch": 0.22509292827314029, "grad_norm": 0.4464187026023865, "learning_rate": 9.92241948339504e-06, "loss": 0.3817, "step": 4905 }, { "epoch": 0.22513881877839476, "grad_norm": 0.4962049424648285, "learning_rate": 9.922376453141515e-06, "loss": 0.4429, "step": 4906 }, { "epoch": 0.22518470928364923, "grad_norm": 0.4880332350730896, "learning_rate": 9.922333411051225e-06, "loss": 0.4166, "step": 4907 }, { "epoch": 0.22523059978890367, "grad_norm": 0.4541180431842804, "learning_rate": 9.92229035712428e-06, "loss": 0.4276, "step": 4908 }, { "epoch": 0.22527649029415814, "grad_norm": 0.4275414049625397, "learning_rate": 9.922247291360779e-06, "loss": 0.3274, "step": 4909 }, { "epoch": 0.2253223807994126, "grad_norm": 0.4537696838378906, "learning_rate": 9.922204213760827e-06, "loss": 0.3627, "step": 4910 }, { "epoch": 0.22536827130466708, "grad_norm": 0.4656986892223358, "learning_rate": 9.922161124324528e-06, "loss": 0.408, "step": 4911 }, { "epoch": 0.22541416180992152, "grad_norm": 0.49436402320861816, "learning_rate": 9.922118023051983e-06, "loss": 0.4223, "step": 4912 }, { "epoch": 0.225460052315176, "grad_norm": 0.43175196647644043, "learning_rate": 9.9220749099433e-06, "loss": 0.3672, "step": 4913 }, { "epoch": 0.22550594282043046, "grad_norm": 0.45036956667900085, "learning_rate": 9.922031784998577e-06, "loss": 0.3082, "step": 4914 }, { "epoch": 0.2255518333256849, "grad_norm": 0.4689321517944336, "learning_rate": 9.921988648217924e-06, "loss": 0.3918, "step": 4915 }, { "epoch": 0.22559772383093937, "grad_norm": 0.4360283613204956, "learning_rate": 9.921945499601442e-06, "loss": 0.3363, "step": 4916 }, { "epoch": 0.22564361433619384, "grad_norm": 0.4614073634147644, "learning_rate": 9.921902339149232e-06, "loss": 0.4051, "step": 4917 }, { "epoch": 0.2256895048414483, "grad_norm": 0.44617804884910583, "learning_rate": 9.921859166861401e-06, "loss": 0.4, "step": 4918 }, { "epoch": 0.22573539534670276, "grad_norm": 0.5219184756278992, "learning_rate": 9.921815982738054e-06, "loss": 0.5138, "step": 4919 }, { "epoch": 0.22578128585195723, "grad_norm": 0.44831669330596924, "learning_rate": 9.921772786779291e-06, "loss": 0.3405, "step": 4920 }, { "epoch": 0.2258271763572117, "grad_norm": 0.4779570698738098, "learning_rate": 9.921729578985218e-06, "loss": 0.4236, "step": 4921 }, { "epoch": 0.22587306686246617, "grad_norm": 0.42781367897987366, "learning_rate": 9.92168635935594e-06, "loss": 0.336, "step": 4922 }, { "epoch": 0.2259189573677206, "grad_norm": 0.43575751781463623, "learning_rate": 9.921643127891558e-06, "loss": 0.3422, "step": 4923 }, { "epoch": 0.22596484787297508, "grad_norm": 0.47726768255233765, "learning_rate": 9.92159988459218e-06, "loss": 0.4122, "step": 4924 }, { "epoch": 0.22601073837822955, "grad_norm": 0.5359604954719543, "learning_rate": 9.921556629457904e-06, "loss": 0.573, "step": 4925 }, { "epoch": 0.22605662888348402, "grad_norm": 0.4933781027793884, "learning_rate": 9.92151336248884e-06, "loss": 0.4153, "step": 4926 }, { "epoch": 0.22610251938873846, "grad_norm": 0.43301907181739807, "learning_rate": 9.92147008368509e-06, "loss": 0.3239, "step": 4927 }, { "epoch": 0.22614840989399293, "grad_norm": 0.47758033871650696, "learning_rate": 9.921426793046756e-06, "loss": 0.3941, "step": 4928 }, { "epoch": 0.2261943003992474, "grad_norm": 0.5660764575004578, "learning_rate": 9.921383490573944e-06, "loss": 0.5265, "step": 4929 }, { "epoch": 0.22624019090450187, "grad_norm": 0.46754592657089233, "learning_rate": 9.921340176266759e-06, "loss": 0.4155, "step": 4930 }, { "epoch": 0.2262860814097563, "grad_norm": 0.45041424036026, "learning_rate": 9.921296850125304e-06, "loss": 0.4033, "step": 4931 }, { "epoch": 0.22633197191501078, "grad_norm": 0.4429803192615509, "learning_rate": 9.921253512149682e-06, "loss": 0.3396, "step": 4932 }, { "epoch": 0.22637786242026525, "grad_norm": 0.42263364791870117, "learning_rate": 9.92121016234e-06, "loss": 0.3202, "step": 4933 }, { "epoch": 0.22642375292551972, "grad_norm": 0.49394991993904114, "learning_rate": 9.92116680069636e-06, "loss": 0.4353, "step": 4934 }, { "epoch": 0.22646964343077416, "grad_norm": 0.48084592819213867, "learning_rate": 9.921123427218867e-06, "loss": 0.4636, "step": 4935 }, { "epoch": 0.22651553393602863, "grad_norm": 0.5092942714691162, "learning_rate": 9.921080041907625e-06, "loss": 0.4916, "step": 4936 }, { "epoch": 0.2265614244412831, "grad_norm": 0.6865168213844299, "learning_rate": 9.92103664476274e-06, "loss": 0.5119, "step": 4937 }, { "epoch": 0.22660731494653757, "grad_norm": 0.47005903720855713, "learning_rate": 9.920993235784314e-06, "loss": 0.4308, "step": 4938 }, { "epoch": 0.22665320545179202, "grad_norm": 0.545192301273346, "learning_rate": 9.920949814972453e-06, "loss": 0.4275, "step": 4939 }, { "epoch": 0.2266990959570465, "grad_norm": 0.5689237713813782, "learning_rate": 9.92090638232726e-06, "loss": 0.4499, "step": 4940 }, { "epoch": 0.22674498646230096, "grad_norm": 0.46823644638061523, "learning_rate": 9.920862937848841e-06, "loss": 0.3879, "step": 4941 }, { "epoch": 0.2267908769675554, "grad_norm": 0.450050950050354, "learning_rate": 9.920819481537299e-06, "loss": 0.3849, "step": 4942 }, { "epoch": 0.22683676747280987, "grad_norm": 0.46422719955444336, "learning_rate": 9.920776013392739e-06, "loss": 0.4221, "step": 4943 }, { "epoch": 0.22688265797806434, "grad_norm": 0.48257312178611755, "learning_rate": 9.920732533415265e-06, "loss": 0.4016, "step": 4944 }, { "epoch": 0.2269285484833188, "grad_norm": 0.4819818437099457, "learning_rate": 9.920689041604986e-06, "loss": 0.4961, "step": 4945 }, { "epoch": 0.22697443898857325, "grad_norm": 0.46663084626197815, "learning_rate": 9.920645537962e-06, "loss": 0.3989, "step": 4946 }, { "epoch": 0.22702032949382772, "grad_norm": 0.4643586575984955, "learning_rate": 9.920602022486414e-06, "loss": 0.3691, "step": 4947 }, { "epoch": 0.2270662199990822, "grad_norm": 0.4569918215274811, "learning_rate": 9.920558495178334e-06, "loss": 0.3682, "step": 4948 }, { "epoch": 0.22711211050433666, "grad_norm": 0.44335222244262695, "learning_rate": 9.920514956037864e-06, "loss": 0.353, "step": 4949 }, { "epoch": 0.2271580010095911, "grad_norm": 0.5647660493850708, "learning_rate": 9.920471405065108e-06, "loss": 0.5335, "step": 4950 }, { "epoch": 0.22720389151484557, "grad_norm": 0.4861886203289032, "learning_rate": 9.92042784226017e-06, "loss": 0.4031, "step": 4951 }, { "epoch": 0.22724978202010004, "grad_norm": 0.43190282583236694, "learning_rate": 9.920384267623158e-06, "loss": 0.337, "step": 4952 }, { "epoch": 0.22729567252535451, "grad_norm": 0.5032286047935486, "learning_rate": 9.920340681154175e-06, "loss": 0.4178, "step": 4953 }, { "epoch": 0.22734156303060896, "grad_norm": 0.5101658701896667, "learning_rate": 9.920297082853325e-06, "loss": 0.4177, "step": 4954 }, { "epoch": 0.22738745353586343, "grad_norm": 0.5094603896141052, "learning_rate": 9.920253472720713e-06, "loss": 0.3953, "step": 4955 }, { "epoch": 0.2274333440411179, "grad_norm": 0.512938916683197, "learning_rate": 9.920209850756443e-06, "loss": 0.4765, "step": 4956 }, { "epoch": 0.22747923454637237, "grad_norm": 0.4783475995063782, "learning_rate": 9.920166216960622e-06, "loss": 0.3806, "step": 4957 }, { "epoch": 0.2275251250516268, "grad_norm": 0.5001879930496216, "learning_rate": 9.920122571333356e-06, "loss": 0.4572, "step": 4958 }, { "epoch": 0.22757101555688128, "grad_norm": 0.46075478196144104, "learning_rate": 9.920078913874746e-06, "loss": 0.3807, "step": 4959 }, { "epoch": 0.22761690606213575, "grad_norm": 0.4371754229068756, "learning_rate": 9.9200352445849e-06, "loss": 0.3531, "step": 4960 }, { "epoch": 0.22766279656739022, "grad_norm": 0.49161723256111145, "learning_rate": 9.919991563463922e-06, "loss": 0.3624, "step": 4961 }, { "epoch": 0.22770868707264466, "grad_norm": 0.5151509642601013, "learning_rate": 9.919947870511916e-06, "loss": 0.5005, "step": 4962 }, { "epoch": 0.22775457757789913, "grad_norm": 0.43065324425697327, "learning_rate": 9.919904165728992e-06, "loss": 0.3367, "step": 4963 }, { "epoch": 0.2278004680831536, "grad_norm": 0.47946444153785706, "learning_rate": 9.919860449115246e-06, "loss": 0.4089, "step": 4964 }, { "epoch": 0.22784635858840807, "grad_norm": 0.48312655091285706, "learning_rate": 9.91981672067079e-06, "loss": 0.3934, "step": 4965 }, { "epoch": 0.2278922490936625, "grad_norm": 0.4617455303668976, "learning_rate": 9.91977298039573e-06, "loss": 0.3329, "step": 4966 }, { "epoch": 0.22793813959891698, "grad_norm": 0.5390293002128601, "learning_rate": 9.919729228290165e-06, "loss": 0.5246, "step": 4967 }, { "epoch": 0.22798403010417145, "grad_norm": 0.46788522601127625, "learning_rate": 9.919685464354207e-06, "loss": 0.4028, "step": 4968 }, { "epoch": 0.22802992060942592, "grad_norm": 0.451246052980423, "learning_rate": 9.919641688587956e-06, "loss": 0.3774, "step": 4969 }, { "epoch": 0.22807581111468037, "grad_norm": 0.46292322874069214, "learning_rate": 9.919597900991522e-06, "loss": 0.4005, "step": 4970 }, { "epoch": 0.22812170161993484, "grad_norm": 0.46303513646125793, "learning_rate": 9.919554101565005e-06, "loss": 0.3767, "step": 4971 }, { "epoch": 0.2281675921251893, "grad_norm": 0.5459726452827454, "learning_rate": 9.919510290308514e-06, "loss": 0.5472, "step": 4972 }, { "epoch": 0.22821348263044375, "grad_norm": 0.4538825452327728, "learning_rate": 9.919466467222155e-06, "loss": 0.3638, "step": 4973 }, { "epoch": 0.22825937313569822, "grad_norm": 0.4757990539073944, "learning_rate": 9.919422632306028e-06, "loss": 0.42, "step": 4974 }, { "epoch": 0.2283052636409527, "grad_norm": 0.4459306597709656, "learning_rate": 9.919378785560246e-06, "loss": 0.3834, "step": 4975 }, { "epoch": 0.22835115414620716, "grad_norm": 0.45102763175964355, "learning_rate": 9.919334926984909e-06, "loss": 0.3641, "step": 4976 }, { "epoch": 0.2283970446514616, "grad_norm": 0.4604094326496124, "learning_rate": 9.919291056580124e-06, "loss": 0.3718, "step": 4977 }, { "epoch": 0.22844293515671607, "grad_norm": 0.4630272388458252, "learning_rate": 9.919247174345996e-06, "loss": 0.4185, "step": 4978 }, { "epoch": 0.22848882566197054, "grad_norm": 0.4595909118652344, "learning_rate": 9.919203280282633e-06, "loss": 0.3717, "step": 4979 }, { "epoch": 0.228534716167225, "grad_norm": 0.4760896861553192, "learning_rate": 9.919159374390138e-06, "loss": 0.4476, "step": 4980 }, { "epoch": 0.22858060667247945, "grad_norm": 0.48160412907600403, "learning_rate": 9.919115456668615e-06, "loss": 0.4438, "step": 4981 }, { "epoch": 0.22862649717773392, "grad_norm": 0.5180866718292236, "learning_rate": 9.919071527118173e-06, "loss": 0.34, "step": 4982 }, { "epoch": 0.2286723876829884, "grad_norm": 0.4597753882408142, "learning_rate": 9.91902758573892e-06, "loss": 0.4243, "step": 4983 }, { "epoch": 0.22871827818824286, "grad_norm": 0.4963390529155731, "learning_rate": 9.918983632530953e-06, "loss": 0.4177, "step": 4984 }, { "epoch": 0.2287641686934973, "grad_norm": 0.4787067770957947, "learning_rate": 9.918939667494386e-06, "loss": 0.4149, "step": 4985 }, { "epoch": 0.22881005919875178, "grad_norm": 0.44735705852508545, "learning_rate": 9.91889569062932e-06, "loss": 0.3726, "step": 4986 }, { "epoch": 0.22885594970400625, "grad_norm": 0.4647885262966156, "learning_rate": 9.918851701935863e-06, "loss": 0.4184, "step": 4987 }, { "epoch": 0.22890184020926072, "grad_norm": 0.4930536150932312, "learning_rate": 9.918807701414121e-06, "loss": 0.4056, "step": 4988 }, { "epoch": 0.22894773071451516, "grad_norm": 0.48790839314460754, "learning_rate": 9.918763689064197e-06, "loss": 0.4461, "step": 4989 }, { "epoch": 0.22899362121976963, "grad_norm": 0.4574865698814392, "learning_rate": 9.9187196648862e-06, "loss": 0.4118, "step": 4990 }, { "epoch": 0.2290395117250241, "grad_norm": 0.4391997158527374, "learning_rate": 9.918675628880234e-06, "loss": 0.3646, "step": 4991 }, { "epoch": 0.22908540223027857, "grad_norm": 0.5032287836074829, "learning_rate": 9.918631581046406e-06, "loss": 0.3845, "step": 4992 }, { "epoch": 0.229131292735533, "grad_norm": 0.5309714674949646, "learning_rate": 9.918587521384822e-06, "loss": 0.4641, "step": 4993 }, { "epoch": 0.22917718324078748, "grad_norm": 0.48825475573539734, "learning_rate": 9.918543449895586e-06, "loss": 0.4098, "step": 4994 }, { "epoch": 0.22922307374604195, "grad_norm": 0.48650339245796204, "learning_rate": 9.918499366578806e-06, "loss": 0.432, "step": 4995 }, { "epoch": 0.22926896425129642, "grad_norm": 0.49815139174461365, "learning_rate": 9.918455271434586e-06, "loss": 0.4667, "step": 4996 }, { "epoch": 0.22931485475655086, "grad_norm": 0.4991191327571869, "learning_rate": 9.918411164463036e-06, "loss": 0.3274, "step": 4997 }, { "epoch": 0.22936074526180533, "grad_norm": 0.47008630633354187, "learning_rate": 9.918367045664257e-06, "loss": 0.3997, "step": 4998 }, { "epoch": 0.2294066357670598, "grad_norm": 0.5059807896614075, "learning_rate": 9.91832291503836e-06, "loss": 0.4651, "step": 4999 }, { "epoch": 0.22945252627231424, "grad_norm": 0.48933085799217224, "learning_rate": 9.918278772585446e-06, "loss": 0.4763, "step": 5000 }, { "epoch": 0.22949841677756871, "grad_norm": 0.5065524578094482, "learning_rate": 9.918234618305626e-06, "loss": 0.4039, "step": 5001 }, { "epoch": 0.22954430728282318, "grad_norm": 0.47976741194725037, "learning_rate": 9.918190452199002e-06, "loss": 0.4145, "step": 5002 }, { "epoch": 0.22959019778807765, "grad_norm": 0.5124884843826294, "learning_rate": 9.918146274265684e-06, "loss": 0.5291, "step": 5003 }, { "epoch": 0.2296360882933321, "grad_norm": 0.504642128944397, "learning_rate": 9.918102084505776e-06, "loss": 0.4208, "step": 5004 }, { "epoch": 0.22968197879858657, "grad_norm": 0.49537789821624756, "learning_rate": 9.918057882919383e-06, "loss": 0.4488, "step": 5005 }, { "epoch": 0.22972786930384104, "grad_norm": 0.48464325070381165, "learning_rate": 9.918013669506615e-06, "loss": 0.4553, "step": 5006 }, { "epoch": 0.2297737598090955, "grad_norm": 0.502467691898346, "learning_rate": 9.917969444267575e-06, "loss": 0.44, "step": 5007 }, { "epoch": 0.22981965031434995, "grad_norm": 0.5058987140655518, "learning_rate": 9.917925207202372e-06, "loss": 0.4285, "step": 5008 }, { "epoch": 0.22986554081960442, "grad_norm": 0.4840467870235443, "learning_rate": 9.91788095831111e-06, "loss": 0.3878, "step": 5009 }, { "epoch": 0.2299114313248589, "grad_norm": 0.4543435275554657, "learning_rate": 9.917836697593896e-06, "loss": 0.4097, "step": 5010 }, { "epoch": 0.22995732183011336, "grad_norm": 0.43923765420913696, "learning_rate": 9.917792425050838e-06, "loss": 0.3778, "step": 5011 }, { "epoch": 0.2300032123353678, "grad_norm": 0.5077973008155823, "learning_rate": 9.917748140682042e-06, "loss": 0.4911, "step": 5012 }, { "epoch": 0.23004910284062227, "grad_norm": 0.4553045630455017, "learning_rate": 9.917703844487611e-06, "loss": 0.3817, "step": 5013 }, { "epoch": 0.23009499334587674, "grad_norm": 0.4960514008998871, "learning_rate": 9.917659536467656e-06, "loss": 0.4597, "step": 5014 }, { "epoch": 0.2301408838511312, "grad_norm": 0.4994480311870575, "learning_rate": 9.917615216622284e-06, "loss": 0.5244, "step": 5015 }, { "epoch": 0.23018677435638565, "grad_norm": 0.4767090678215027, "learning_rate": 9.917570884951596e-06, "loss": 0.4246, "step": 5016 }, { "epoch": 0.23023266486164012, "grad_norm": 0.44151371717453003, "learning_rate": 9.917526541455704e-06, "loss": 0.3646, "step": 5017 }, { "epoch": 0.2302785553668946, "grad_norm": 0.4284513592720032, "learning_rate": 9.917482186134711e-06, "loss": 0.3836, "step": 5018 }, { "epoch": 0.23032444587214906, "grad_norm": 0.48593762516975403, "learning_rate": 9.917437818988727e-06, "loss": 0.4625, "step": 5019 }, { "epoch": 0.2303703363774035, "grad_norm": 0.4842730760574341, "learning_rate": 9.917393440017856e-06, "loss": 0.4597, "step": 5020 }, { "epoch": 0.23041622688265798, "grad_norm": 0.6369385123252869, "learning_rate": 9.917349049222207e-06, "loss": 0.4711, "step": 5021 }, { "epoch": 0.23046211738791245, "grad_norm": 0.4542660713195801, "learning_rate": 9.917304646601884e-06, "loss": 0.3852, "step": 5022 }, { "epoch": 0.23050800789316692, "grad_norm": 0.4675392508506775, "learning_rate": 9.917260232156995e-06, "loss": 0.3547, "step": 5023 }, { "epoch": 0.23055389839842136, "grad_norm": 0.4550405442714691, "learning_rate": 9.91721580588765e-06, "loss": 0.355, "step": 5024 }, { "epoch": 0.23059978890367583, "grad_norm": 0.47967174649238586, "learning_rate": 9.91717136779395e-06, "loss": 0.435, "step": 5025 }, { "epoch": 0.2306456794089303, "grad_norm": 0.454787939786911, "learning_rate": 9.917126917876006e-06, "loss": 0.4212, "step": 5026 }, { "epoch": 0.23069156991418477, "grad_norm": 0.4577714502811432, "learning_rate": 9.917082456133923e-06, "loss": 0.4459, "step": 5027 }, { "epoch": 0.2307374604194392, "grad_norm": 0.4787876307964325, "learning_rate": 9.91703798256781e-06, "loss": 0.4197, "step": 5028 }, { "epoch": 0.23078335092469368, "grad_norm": 0.4666035771369934, "learning_rate": 9.91699349717777e-06, "loss": 0.3405, "step": 5029 }, { "epoch": 0.23082924142994815, "grad_norm": 0.48483747243881226, "learning_rate": 9.916948999963915e-06, "loss": 0.4135, "step": 5030 }, { "epoch": 0.2308751319352026, "grad_norm": 0.49244368076324463, "learning_rate": 9.916904490926348e-06, "loss": 0.4789, "step": 5031 }, { "epoch": 0.23092102244045706, "grad_norm": 0.47838544845581055, "learning_rate": 9.916859970065178e-06, "loss": 0.39, "step": 5032 }, { "epoch": 0.23096691294571153, "grad_norm": 0.4705907702445984, "learning_rate": 9.916815437380511e-06, "loss": 0.3959, "step": 5033 }, { "epoch": 0.231012803450966, "grad_norm": 0.5116842985153198, "learning_rate": 9.916770892872455e-06, "loss": 0.4381, "step": 5034 }, { "epoch": 0.23105869395622045, "grad_norm": 0.4437456727027893, "learning_rate": 9.916726336541117e-06, "loss": 0.4053, "step": 5035 }, { "epoch": 0.23110458446147492, "grad_norm": 0.4988843500614166, "learning_rate": 9.916681768386605e-06, "loss": 0.4513, "step": 5036 }, { "epoch": 0.23115047496672939, "grad_norm": 0.4737136662006378, "learning_rate": 9.916637188409024e-06, "loss": 0.4184, "step": 5037 }, { "epoch": 0.23119636547198386, "grad_norm": 0.46205082535743713, "learning_rate": 9.91659259660848e-06, "loss": 0.395, "step": 5038 }, { "epoch": 0.2312422559772383, "grad_norm": 0.4780011475086212, "learning_rate": 9.916547992985087e-06, "loss": 0.4331, "step": 5039 }, { "epoch": 0.23128814648249277, "grad_norm": 0.42814281582832336, "learning_rate": 9.916503377538945e-06, "loss": 0.3221, "step": 5040 }, { "epoch": 0.23133403698774724, "grad_norm": 0.4663562774658203, "learning_rate": 9.916458750270164e-06, "loss": 0.3745, "step": 5041 }, { "epoch": 0.2313799274930017, "grad_norm": 0.448881059885025, "learning_rate": 9.916414111178853e-06, "loss": 0.3841, "step": 5042 }, { "epoch": 0.23142581799825615, "grad_norm": 0.45631641149520874, "learning_rate": 9.916369460265116e-06, "loss": 0.399, "step": 5043 }, { "epoch": 0.23147170850351062, "grad_norm": 0.4652758836746216, "learning_rate": 9.916324797529063e-06, "loss": 0.3973, "step": 5044 }, { "epoch": 0.2315175990087651, "grad_norm": 0.42635032534599304, "learning_rate": 9.9162801229708e-06, "loss": 0.3452, "step": 5045 }, { "epoch": 0.23156348951401956, "grad_norm": 0.5374182462692261, "learning_rate": 9.916235436590436e-06, "loss": 0.5509, "step": 5046 }, { "epoch": 0.231609380019274, "grad_norm": 0.4726683497428894, "learning_rate": 9.916190738388076e-06, "loss": 0.3692, "step": 5047 }, { "epoch": 0.23165527052452847, "grad_norm": 0.5173768401145935, "learning_rate": 9.91614602836383e-06, "loss": 0.4784, "step": 5048 }, { "epoch": 0.23170116102978294, "grad_norm": 0.4761207103729248, "learning_rate": 9.916101306517804e-06, "loss": 0.4431, "step": 5049 }, { "epoch": 0.2317470515350374, "grad_norm": 0.46522706747055054, "learning_rate": 9.916056572850106e-06, "loss": 0.4243, "step": 5050 }, { "epoch": 0.23179294204029185, "grad_norm": 0.5308298468589783, "learning_rate": 9.916011827360842e-06, "loss": 0.4571, "step": 5051 }, { "epoch": 0.23183883254554633, "grad_norm": 0.45010480284690857, "learning_rate": 9.915967070050124e-06, "loss": 0.3421, "step": 5052 }, { "epoch": 0.2318847230508008, "grad_norm": 0.5172383189201355, "learning_rate": 9.915922300918054e-06, "loss": 0.4745, "step": 5053 }, { "epoch": 0.23193061355605527, "grad_norm": 0.4733390808105469, "learning_rate": 9.915877519964744e-06, "loss": 0.3964, "step": 5054 }, { "epoch": 0.2319765040613097, "grad_norm": 0.45765385031700134, "learning_rate": 9.9158327271903e-06, "loss": 0.3607, "step": 5055 }, { "epoch": 0.23202239456656418, "grad_norm": 0.5071452260017395, "learning_rate": 9.915787922594829e-06, "loss": 0.4789, "step": 5056 }, { "epoch": 0.23206828507181865, "grad_norm": 0.45090365409851074, "learning_rate": 9.91574310617844e-06, "loss": 0.3889, "step": 5057 }, { "epoch": 0.23211417557707312, "grad_norm": 0.457913339138031, "learning_rate": 9.91569827794124e-06, "loss": 0.4029, "step": 5058 }, { "epoch": 0.23216006608232756, "grad_norm": 0.458809494972229, "learning_rate": 9.915653437883337e-06, "loss": 0.3796, "step": 5059 }, { "epoch": 0.23220595658758203, "grad_norm": 0.5112406015396118, "learning_rate": 9.915608586004839e-06, "loss": 0.5472, "step": 5060 }, { "epoch": 0.2322518470928365, "grad_norm": 0.527134120464325, "learning_rate": 9.915563722305854e-06, "loss": 0.511, "step": 5061 }, { "epoch": 0.23229773759809094, "grad_norm": 0.424339234828949, "learning_rate": 9.915518846786489e-06, "loss": 0.3261, "step": 5062 }, { "epoch": 0.2323436281033454, "grad_norm": 0.5623648166656494, "learning_rate": 9.915473959446853e-06, "loss": 0.3969, "step": 5063 }, { "epoch": 0.23238951860859988, "grad_norm": 0.4731033444404602, "learning_rate": 9.915429060287054e-06, "loss": 0.381, "step": 5064 }, { "epoch": 0.23243540911385435, "grad_norm": 0.4561477303504944, "learning_rate": 9.915384149307198e-06, "loss": 0.3852, "step": 5065 }, { "epoch": 0.2324812996191088, "grad_norm": 0.4622589647769928, "learning_rate": 9.915339226507396e-06, "loss": 0.3907, "step": 5066 }, { "epoch": 0.23252719012436326, "grad_norm": 0.4760129451751709, "learning_rate": 9.915294291887755e-06, "loss": 0.385, "step": 5067 }, { "epoch": 0.23257308062961773, "grad_norm": 0.5139806270599365, "learning_rate": 9.915249345448381e-06, "loss": 0.4594, "step": 5068 }, { "epoch": 0.2326189711348722, "grad_norm": 0.4360876977443695, "learning_rate": 9.915204387189384e-06, "loss": 0.3597, "step": 5069 }, { "epoch": 0.23266486164012665, "grad_norm": 0.44654330611228943, "learning_rate": 9.915159417110874e-06, "loss": 0.3742, "step": 5070 }, { "epoch": 0.23271075214538112, "grad_norm": 0.48947635293006897, "learning_rate": 9.915114435212954e-06, "loss": 0.4181, "step": 5071 }, { "epoch": 0.2327566426506356, "grad_norm": 0.5250278115272522, "learning_rate": 9.915069441495735e-06, "loss": 0.4752, "step": 5072 }, { "epoch": 0.23280253315589006, "grad_norm": 0.4782872200012207, "learning_rate": 9.915024435959326e-06, "loss": 0.424, "step": 5073 }, { "epoch": 0.2328484236611445, "grad_norm": 0.4785747826099396, "learning_rate": 9.914979418603833e-06, "loss": 0.4427, "step": 5074 }, { "epoch": 0.23289431416639897, "grad_norm": 0.6122748255729675, "learning_rate": 9.91493438942937e-06, "loss": 0.5118, "step": 5075 }, { "epoch": 0.23294020467165344, "grad_norm": 0.5223903656005859, "learning_rate": 9.914889348436036e-06, "loss": 0.4699, "step": 5076 }, { "epoch": 0.2329860951769079, "grad_norm": 0.4806162118911743, "learning_rate": 9.914844295623947e-06, "loss": 0.466, "step": 5077 }, { "epoch": 0.23303198568216235, "grad_norm": 0.45232102274894714, "learning_rate": 9.914799230993209e-06, "loss": 0.3862, "step": 5078 }, { "epoch": 0.23307787618741682, "grad_norm": 0.47773313522338867, "learning_rate": 9.914754154543929e-06, "loss": 0.3989, "step": 5079 }, { "epoch": 0.2331237666926713, "grad_norm": 0.4781469404697418, "learning_rate": 9.914709066276216e-06, "loss": 0.4233, "step": 5080 }, { "epoch": 0.23316965719792576, "grad_norm": 0.4485608637332916, "learning_rate": 9.91466396619018e-06, "loss": 0.3653, "step": 5081 }, { "epoch": 0.2332155477031802, "grad_norm": 0.4454772174358368, "learning_rate": 9.914618854285927e-06, "loss": 0.3705, "step": 5082 }, { "epoch": 0.23326143820843467, "grad_norm": 0.464932918548584, "learning_rate": 9.914573730563568e-06, "loss": 0.3975, "step": 5083 }, { "epoch": 0.23330732871368914, "grad_norm": 0.48395225405693054, "learning_rate": 9.91452859502321e-06, "loss": 0.4764, "step": 5084 }, { "epoch": 0.2333532192189436, "grad_norm": 0.5508474707603455, "learning_rate": 9.914483447664961e-06, "loss": 0.5212, "step": 5085 }, { "epoch": 0.23339910972419806, "grad_norm": 0.4481509327888489, "learning_rate": 9.91443828848893e-06, "loss": 0.3757, "step": 5086 }, { "epoch": 0.23344500022945253, "grad_norm": 0.46165764331817627, "learning_rate": 9.914393117495227e-06, "loss": 0.3676, "step": 5087 }, { "epoch": 0.233490890734707, "grad_norm": 0.4828527271747589, "learning_rate": 9.91434793468396e-06, "loss": 0.4699, "step": 5088 }, { "epoch": 0.23353678123996144, "grad_norm": 0.45526665449142456, "learning_rate": 9.914302740055237e-06, "loss": 0.3505, "step": 5089 }, { "epoch": 0.2335826717452159, "grad_norm": 0.4978918135166168, "learning_rate": 9.914257533609166e-06, "loss": 0.399, "step": 5090 }, { "epoch": 0.23362856225047038, "grad_norm": 0.4557889997959137, "learning_rate": 9.914212315345857e-06, "loss": 0.4146, "step": 5091 }, { "epoch": 0.23367445275572485, "grad_norm": 0.46067050099372864, "learning_rate": 9.914167085265418e-06, "loss": 0.3605, "step": 5092 }, { "epoch": 0.2337203432609793, "grad_norm": 0.42339664697647095, "learning_rate": 9.914121843367959e-06, "loss": 0.3508, "step": 5093 }, { "epoch": 0.23376623376623376, "grad_norm": 0.47236618399620056, "learning_rate": 9.914076589653587e-06, "loss": 0.408, "step": 5094 }, { "epoch": 0.23381212427148823, "grad_norm": 0.46094775199890137, "learning_rate": 9.914031324122412e-06, "loss": 0.3546, "step": 5095 }, { "epoch": 0.2338580147767427, "grad_norm": 0.4532855153083801, "learning_rate": 9.913986046774544e-06, "loss": 0.3817, "step": 5096 }, { "epoch": 0.23390390528199714, "grad_norm": 0.5269132256507874, "learning_rate": 9.91394075761009e-06, "loss": 0.4936, "step": 5097 }, { "epoch": 0.2339497957872516, "grad_norm": 0.6242097616195679, "learning_rate": 9.913895456629156e-06, "loss": 0.4235, "step": 5098 }, { "epoch": 0.23399568629250608, "grad_norm": 0.43184566497802734, "learning_rate": 9.913850143831858e-06, "loss": 0.322, "step": 5099 }, { "epoch": 0.23404157679776055, "grad_norm": 0.4632769823074341, "learning_rate": 9.9138048192183e-06, "loss": 0.3936, "step": 5100 }, { "epoch": 0.234087467303015, "grad_norm": 0.5015445947647095, "learning_rate": 9.913759482788593e-06, "loss": 0.4405, "step": 5101 }, { "epoch": 0.23413335780826947, "grad_norm": 0.49522823095321655, "learning_rate": 9.913714134542845e-06, "loss": 0.4501, "step": 5102 }, { "epoch": 0.23417924831352394, "grad_norm": 0.5072798132896423, "learning_rate": 9.913668774481164e-06, "loss": 0.4807, "step": 5103 }, { "epoch": 0.2342251388187784, "grad_norm": 0.5007641315460205, "learning_rate": 9.913623402603663e-06, "loss": 0.4056, "step": 5104 }, { "epoch": 0.23427102932403285, "grad_norm": 0.4772675931453705, "learning_rate": 9.913578018910445e-06, "loss": 0.3997, "step": 5105 }, { "epoch": 0.23431691982928732, "grad_norm": 0.4053618609905243, "learning_rate": 9.913532623401624e-06, "loss": 0.2955, "step": 5106 }, { "epoch": 0.2343628103345418, "grad_norm": 0.483697772026062, "learning_rate": 9.91348721607731e-06, "loss": 0.3812, "step": 5107 }, { "epoch": 0.23440870083979626, "grad_norm": 0.5483150482177734, "learning_rate": 9.913441796937609e-06, "loss": 0.4784, "step": 5108 }, { "epoch": 0.2344545913450507, "grad_norm": 0.44203898310661316, "learning_rate": 9.913396365982631e-06, "loss": 0.3345, "step": 5109 }, { "epoch": 0.23450048185030517, "grad_norm": 0.4321436882019043, "learning_rate": 9.913350923212484e-06, "loss": 0.3443, "step": 5110 }, { "epoch": 0.23454637235555964, "grad_norm": 0.525423526763916, "learning_rate": 9.913305468627279e-06, "loss": 0.482, "step": 5111 }, { "epoch": 0.2345922628608141, "grad_norm": 0.43955475091934204, "learning_rate": 9.913260002227125e-06, "loss": 0.3484, "step": 5112 }, { "epoch": 0.23463815336606855, "grad_norm": 0.49478378891944885, "learning_rate": 9.913214524012134e-06, "loss": 0.4614, "step": 5113 }, { "epoch": 0.23468404387132302, "grad_norm": 0.5197994709014893, "learning_rate": 9.91316903398241e-06, "loss": 0.4403, "step": 5114 }, { "epoch": 0.2347299343765775, "grad_norm": 0.45656052231788635, "learning_rate": 9.913123532138066e-06, "loss": 0.3948, "step": 5115 }, { "epoch": 0.23477582488183196, "grad_norm": 0.5082331299781799, "learning_rate": 9.91307801847921e-06, "loss": 0.4409, "step": 5116 }, { "epoch": 0.2348217153870864, "grad_norm": 0.49327245354652405, "learning_rate": 9.913032493005953e-06, "loss": 0.3817, "step": 5117 }, { "epoch": 0.23486760589234087, "grad_norm": 0.4587899446487427, "learning_rate": 9.912986955718403e-06, "loss": 0.3432, "step": 5118 }, { "epoch": 0.23491349639759534, "grad_norm": 0.4807136356830597, "learning_rate": 9.912941406616669e-06, "loss": 0.4259, "step": 5119 }, { "epoch": 0.2349593869028498, "grad_norm": 0.5040963888168335, "learning_rate": 9.912895845700863e-06, "loss": 0.4801, "step": 5120 }, { "epoch": 0.23500527740810426, "grad_norm": 0.44389253854751587, "learning_rate": 9.912850272971093e-06, "loss": 0.3594, "step": 5121 }, { "epoch": 0.23505116791335873, "grad_norm": 0.4951320290565491, "learning_rate": 9.912804688427468e-06, "loss": 0.4914, "step": 5122 }, { "epoch": 0.2350970584186132, "grad_norm": 0.5539964437484741, "learning_rate": 9.912759092070098e-06, "loss": 0.5691, "step": 5123 }, { "epoch": 0.23514294892386764, "grad_norm": 0.44868677854537964, "learning_rate": 9.912713483899092e-06, "loss": 0.4059, "step": 5124 }, { "epoch": 0.2351888394291221, "grad_norm": 0.4907565414905548, "learning_rate": 9.912667863914562e-06, "loss": 0.4315, "step": 5125 }, { "epoch": 0.23523472993437658, "grad_norm": 0.47038042545318604, "learning_rate": 9.912622232116617e-06, "loss": 0.3852, "step": 5126 }, { "epoch": 0.23528062043963105, "grad_norm": 0.5518418550491333, "learning_rate": 9.912576588505364e-06, "loss": 0.2998, "step": 5127 }, { "epoch": 0.2353265109448855, "grad_norm": 0.4782343804836273, "learning_rate": 9.912530933080916e-06, "loss": 0.3463, "step": 5128 }, { "epoch": 0.23537240145013996, "grad_norm": 0.43094146251678467, "learning_rate": 9.912485265843382e-06, "loss": 0.3668, "step": 5129 }, { "epoch": 0.23541829195539443, "grad_norm": 0.45232853293418884, "learning_rate": 9.91243958679287e-06, "loss": 0.4059, "step": 5130 }, { "epoch": 0.2354641824606489, "grad_norm": 0.4581269919872284, "learning_rate": 9.912393895929492e-06, "loss": 0.4202, "step": 5131 }, { "epoch": 0.23551007296590334, "grad_norm": 0.44661420583724976, "learning_rate": 9.912348193253357e-06, "loss": 0.3487, "step": 5132 }, { "epoch": 0.23555596347115781, "grad_norm": 0.4853942096233368, "learning_rate": 9.912302478764574e-06, "loss": 0.4126, "step": 5133 }, { "epoch": 0.23560185397641228, "grad_norm": 0.4832753539085388, "learning_rate": 9.912256752463256e-06, "loss": 0.4062, "step": 5134 }, { "epoch": 0.23564774448166675, "grad_norm": 0.5102874040603638, "learning_rate": 9.91221101434951e-06, "loss": 0.5426, "step": 5135 }, { "epoch": 0.2356936349869212, "grad_norm": 0.5339062809944153, "learning_rate": 9.912165264423446e-06, "loss": 0.4436, "step": 5136 }, { "epoch": 0.23573952549217567, "grad_norm": 0.4484342634677887, "learning_rate": 9.912119502685175e-06, "loss": 0.3705, "step": 5137 }, { "epoch": 0.23578541599743014, "grad_norm": 0.4828857183456421, "learning_rate": 9.912073729134808e-06, "loss": 0.4456, "step": 5138 }, { "epoch": 0.2358313065026846, "grad_norm": 0.4640159606933594, "learning_rate": 9.912027943772453e-06, "loss": 0.3925, "step": 5139 }, { "epoch": 0.23587719700793905, "grad_norm": 0.46920710802078247, "learning_rate": 9.911982146598222e-06, "loss": 0.4533, "step": 5140 }, { "epoch": 0.23592308751319352, "grad_norm": 0.47482946515083313, "learning_rate": 9.911936337612223e-06, "loss": 0.4311, "step": 5141 }, { "epoch": 0.235968978018448, "grad_norm": 0.4582137167453766, "learning_rate": 9.911890516814568e-06, "loss": 0.4379, "step": 5142 }, { "epoch": 0.23601486852370246, "grad_norm": 0.47792595624923706, "learning_rate": 9.911844684205367e-06, "loss": 0.4349, "step": 5143 }, { "epoch": 0.2360607590289569, "grad_norm": 0.49188026785850525, "learning_rate": 9.911798839784729e-06, "loss": 0.4055, "step": 5144 }, { "epoch": 0.23610664953421137, "grad_norm": 0.45212605595588684, "learning_rate": 9.911752983552765e-06, "loss": 0.34, "step": 5145 }, { "epoch": 0.23615254003946584, "grad_norm": 0.4533120393753052, "learning_rate": 9.911707115509586e-06, "loss": 0.3739, "step": 5146 }, { "epoch": 0.23619843054472028, "grad_norm": 0.48456940054893494, "learning_rate": 9.911661235655302e-06, "loss": 0.4001, "step": 5147 }, { "epoch": 0.23624432104997475, "grad_norm": 0.46132054924964905, "learning_rate": 9.91161534399002e-06, "loss": 0.3723, "step": 5148 }, { "epoch": 0.23629021155522922, "grad_norm": 0.5226472616195679, "learning_rate": 9.911569440513855e-06, "loss": 0.4996, "step": 5149 }, { "epoch": 0.2363361020604837, "grad_norm": 0.5238263607025146, "learning_rate": 9.911523525226915e-06, "loss": 0.4161, "step": 5150 }, { "epoch": 0.23638199256573814, "grad_norm": 0.43530258536338806, "learning_rate": 9.911477598129312e-06, "loss": 0.3656, "step": 5151 }, { "epoch": 0.2364278830709926, "grad_norm": 0.4978402256965637, "learning_rate": 9.911431659221156e-06, "loss": 0.4373, "step": 5152 }, { "epoch": 0.23647377357624708, "grad_norm": 0.5485527515411377, "learning_rate": 9.911385708502557e-06, "loss": 0.4785, "step": 5153 }, { "epoch": 0.23651966408150155, "grad_norm": 0.4689266085624695, "learning_rate": 9.911339745973625e-06, "loss": 0.3464, "step": 5154 }, { "epoch": 0.236565554586756, "grad_norm": 0.46060195565223694, "learning_rate": 9.91129377163447e-06, "loss": 0.3971, "step": 5155 }, { "epoch": 0.23661144509201046, "grad_norm": 0.48276910185813904, "learning_rate": 9.911247785485205e-06, "loss": 0.4089, "step": 5156 }, { "epoch": 0.23665733559726493, "grad_norm": 0.43599194288253784, "learning_rate": 9.911201787525939e-06, "loss": 0.3701, "step": 5157 }, { "epoch": 0.2367032261025194, "grad_norm": 0.49651390314102173, "learning_rate": 9.911155777756782e-06, "loss": 0.4417, "step": 5158 }, { "epoch": 0.23674911660777384, "grad_norm": 0.44457516074180603, "learning_rate": 9.911109756177844e-06, "loss": 0.3784, "step": 5159 }, { "epoch": 0.2367950071130283, "grad_norm": 0.4829283356666565, "learning_rate": 9.91106372278924e-06, "loss": 0.4389, "step": 5160 }, { "epoch": 0.23684089761828278, "grad_norm": 0.4659006893634796, "learning_rate": 9.911017677591074e-06, "loss": 0.4041, "step": 5161 }, { "epoch": 0.23688678812353725, "grad_norm": 0.4855808913707733, "learning_rate": 9.910971620583466e-06, "loss": 0.4141, "step": 5162 }, { "epoch": 0.2369326786287917, "grad_norm": 0.4902080297470093, "learning_rate": 9.910925551766515e-06, "loss": 0.4841, "step": 5163 }, { "epoch": 0.23697856913404616, "grad_norm": 0.4838698208332062, "learning_rate": 9.910879471140342e-06, "loss": 0.4061, "step": 5164 }, { "epoch": 0.23702445963930063, "grad_norm": 0.4559089243412018, "learning_rate": 9.910833378705053e-06, "loss": 0.3783, "step": 5165 }, { "epoch": 0.2370703501445551, "grad_norm": 0.4579033851623535, "learning_rate": 9.910787274460758e-06, "loss": 0.3984, "step": 5166 }, { "epoch": 0.23711624064980955, "grad_norm": 0.4385758936405182, "learning_rate": 9.91074115840757e-06, "loss": 0.3563, "step": 5167 }, { "epoch": 0.23716213115506402, "grad_norm": 0.4908759593963623, "learning_rate": 9.910695030545602e-06, "loss": 0.4099, "step": 5168 }, { "epoch": 0.23720802166031849, "grad_norm": 0.4790913462638855, "learning_rate": 9.91064889087496e-06, "loss": 0.4249, "step": 5169 }, { "epoch": 0.23725391216557296, "grad_norm": 0.4737749397754669, "learning_rate": 9.910602739395758e-06, "loss": 0.3913, "step": 5170 }, { "epoch": 0.2372998026708274, "grad_norm": 0.450844407081604, "learning_rate": 9.910556576108106e-06, "loss": 0.4062, "step": 5171 }, { "epoch": 0.23734569317608187, "grad_norm": 0.43245989084243774, "learning_rate": 9.910510401012114e-06, "loss": 0.3329, "step": 5172 }, { "epoch": 0.23739158368133634, "grad_norm": 0.4363330602645874, "learning_rate": 9.910464214107897e-06, "loss": 0.3513, "step": 5173 }, { "epoch": 0.2374374741865908, "grad_norm": 0.42545145750045776, "learning_rate": 9.910418015395563e-06, "loss": 0.3447, "step": 5174 }, { "epoch": 0.23748336469184525, "grad_norm": 0.4734691083431244, "learning_rate": 9.910371804875221e-06, "loss": 0.3764, "step": 5175 }, { "epoch": 0.23752925519709972, "grad_norm": 0.5080201625823975, "learning_rate": 9.910325582546986e-06, "loss": 0.454, "step": 5176 }, { "epoch": 0.2375751457023542, "grad_norm": 0.5100452899932861, "learning_rate": 9.910279348410967e-06, "loss": 0.5139, "step": 5177 }, { "epoch": 0.23762103620760863, "grad_norm": 0.500399112701416, "learning_rate": 9.910233102467277e-06, "loss": 0.5094, "step": 5178 }, { "epoch": 0.2376669267128631, "grad_norm": 0.4432986378669739, "learning_rate": 9.910186844716025e-06, "loss": 0.3335, "step": 5179 }, { "epoch": 0.23771281721811757, "grad_norm": 0.5063264966011047, "learning_rate": 9.910140575157324e-06, "loss": 0.4478, "step": 5180 }, { "epoch": 0.23775870772337204, "grad_norm": 0.4564862847328186, "learning_rate": 9.910094293791284e-06, "loss": 0.3658, "step": 5181 }, { "epoch": 0.23780459822862648, "grad_norm": 0.5089392066001892, "learning_rate": 9.910048000618019e-06, "loss": 0.4701, "step": 5182 }, { "epoch": 0.23785048873388095, "grad_norm": 0.518581211566925, "learning_rate": 9.910001695637635e-06, "loss": 0.4543, "step": 5183 }, { "epoch": 0.23789637923913542, "grad_norm": 0.5212941765785217, "learning_rate": 9.90995537885025e-06, "loss": 0.5024, "step": 5184 }, { "epoch": 0.2379422697443899, "grad_norm": 0.46136966347694397, "learning_rate": 9.909909050255968e-06, "loss": 0.3411, "step": 5185 }, { "epoch": 0.23798816024964434, "grad_norm": 0.48377469182014465, "learning_rate": 9.909862709854907e-06, "loss": 0.4455, "step": 5186 }, { "epoch": 0.2380340507548988, "grad_norm": 0.5080629587173462, "learning_rate": 9.909816357647175e-06, "loss": 0.4416, "step": 5187 }, { "epoch": 0.23807994126015328, "grad_norm": 0.45300838351249695, "learning_rate": 9.909769993632885e-06, "loss": 0.406, "step": 5188 }, { "epoch": 0.23812583176540775, "grad_norm": 0.46028393507003784, "learning_rate": 9.909723617812148e-06, "loss": 0.3942, "step": 5189 }, { "epoch": 0.2381717222706622, "grad_norm": 0.44495072960853577, "learning_rate": 9.909677230185072e-06, "loss": 0.354, "step": 5190 }, { "epoch": 0.23821761277591666, "grad_norm": 0.5082409977912903, "learning_rate": 9.909630830751776e-06, "loss": 0.4953, "step": 5191 }, { "epoch": 0.23826350328117113, "grad_norm": 0.4716769754886627, "learning_rate": 9.909584419512366e-06, "loss": 0.399, "step": 5192 }, { "epoch": 0.2383093937864256, "grad_norm": 0.4795176684856415, "learning_rate": 9.909537996466954e-06, "loss": 0.3919, "step": 5193 }, { "epoch": 0.23835528429168004, "grad_norm": 0.4324151575565338, "learning_rate": 9.909491561615654e-06, "loss": 0.3471, "step": 5194 }, { "epoch": 0.2384011747969345, "grad_norm": 0.47427061200141907, "learning_rate": 9.909445114958577e-06, "loss": 0.435, "step": 5195 }, { "epoch": 0.23844706530218898, "grad_norm": 0.43681710958480835, "learning_rate": 9.90939865649583e-06, "loss": 0.3474, "step": 5196 }, { "epoch": 0.23849295580744345, "grad_norm": 0.4783264994621277, "learning_rate": 9.909352186227533e-06, "loss": 0.3484, "step": 5197 }, { "epoch": 0.2385388463126979, "grad_norm": 0.42034077644348145, "learning_rate": 9.90930570415379e-06, "loss": 0.3434, "step": 5198 }, { "epoch": 0.23858473681795236, "grad_norm": 0.444210022687912, "learning_rate": 9.909259210274718e-06, "loss": 0.362, "step": 5199 }, { "epoch": 0.23863062732320683, "grad_norm": 0.4768187403678894, "learning_rate": 9.909212704590428e-06, "loss": 0.3682, "step": 5200 }, { "epoch": 0.2386765178284613, "grad_norm": 0.45087242126464844, "learning_rate": 9.909166187101028e-06, "loss": 0.3906, "step": 5201 }, { "epoch": 0.23872240833371575, "grad_norm": 0.443838894367218, "learning_rate": 9.909119657806636e-06, "loss": 0.3508, "step": 5202 }, { "epoch": 0.23876829883897022, "grad_norm": 0.4832899272441864, "learning_rate": 9.909073116707358e-06, "loss": 0.4142, "step": 5203 }, { "epoch": 0.2388141893442247, "grad_norm": 0.45838668942451477, "learning_rate": 9.90902656380331e-06, "loss": 0.334, "step": 5204 }, { "epoch": 0.23886007984947916, "grad_norm": 0.4528806805610657, "learning_rate": 9.908979999094601e-06, "loss": 0.397, "step": 5205 }, { "epoch": 0.2389059703547336, "grad_norm": 0.5295097827911377, "learning_rate": 9.908933422581345e-06, "loss": 0.4663, "step": 5206 }, { "epoch": 0.23895186085998807, "grad_norm": 0.47119978070259094, "learning_rate": 9.908886834263654e-06, "loss": 0.3863, "step": 5207 }, { "epoch": 0.23899775136524254, "grad_norm": 0.5063113570213318, "learning_rate": 9.908840234141638e-06, "loss": 0.4808, "step": 5208 }, { "epoch": 0.23904364187049698, "grad_norm": 0.4429151117801666, "learning_rate": 9.908793622215414e-06, "loss": 0.3421, "step": 5209 }, { "epoch": 0.23908953237575145, "grad_norm": 0.4345877170562744, "learning_rate": 9.908746998485087e-06, "loss": 0.3644, "step": 5210 }, { "epoch": 0.23913542288100592, "grad_norm": 0.4474395215511322, "learning_rate": 9.908700362950774e-06, "loss": 0.3679, "step": 5211 }, { "epoch": 0.2391813133862604, "grad_norm": 0.45323947072029114, "learning_rate": 9.908653715612586e-06, "loss": 0.3864, "step": 5212 }, { "epoch": 0.23922720389151483, "grad_norm": 0.5114389061927795, "learning_rate": 9.908607056470636e-06, "loss": 0.4715, "step": 5213 }, { "epoch": 0.2392730943967693, "grad_norm": 0.5005764961242676, "learning_rate": 9.908560385525033e-06, "loss": 0.4753, "step": 5214 }, { "epoch": 0.23931898490202377, "grad_norm": 0.5477319955825806, "learning_rate": 9.908513702775893e-06, "loss": 0.4803, "step": 5215 }, { "epoch": 0.23936487540727824, "grad_norm": 0.4974982440471649, "learning_rate": 9.908467008223326e-06, "loss": 0.4279, "step": 5216 }, { "epoch": 0.23941076591253269, "grad_norm": 0.4765852987766266, "learning_rate": 9.908420301867444e-06, "loss": 0.4022, "step": 5217 }, { "epoch": 0.23945665641778716, "grad_norm": 0.47904282808303833, "learning_rate": 9.908373583708364e-06, "loss": 0.4516, "step": 5218 }, { "epoch": 0.23950254692304163, "grad_norm": 0.4584992825984955, "learning_rate": 9.90832685374619e-06, "loss": 0.3646, "step": 5219 }, { "epoch": 0.2395484374282961, "grad_norm": 0.498783677816391, "learning_rate": 9.908280111981044e-06, "loss": 0.4333, "step": 5220 }, { "epoch": 0.23959432793355054, "grad_norm": 0.4474789500236511, "learning_rate": 9.908233358413028e-06, "loss": 0.3875, "step": 5221 }, { "epoch": 0.239640218438805, "grad_norm": 0.4636237919330597, "learning_rate": 9.908186593042264e-06, "loss": 0.3933, "step": 5222 }, { "epoch": 0.23968610894405948, "grad_norm": 0.5215755105018616, "learning_rate": 9.90813981586886e-06, "loss": 0.4842, "step": 5223 }, { "epoch": 0.23973199944931395, "grad_norm": 0.44562146067619324, "learning_rate": 9.908093026892927e-06, "loss": 0.3968, "step": 5224 }, { "epoch": 0.2397778899545684, "grad_norm": 0.5041016340255737, "learning_rate": 9.90804622611458e-06, "loss": 0.4594, "step": 5225 }, { "epoch": 0.23982378045982286, "grad_norm": 0.4785310924053192, "learning_rate": 9.90799941353393e-06, "loss": 0.4668, "step": 5226 }, { "epoch": 0.23986967096507733, "grad_norm": 0.46815717220306396, "learning_rate": 9.907952589151093e-06, "loss": 0.4368, "step": 5227 }, { "epoch": 0.2399155614703318, "grad_norm": 0.5085462331771851, "learning_rate": 9.907905752966177e-06, "loss": 0.4655, "step": 5228 }, { "epoch": 0.23996145197558624, "grad_norm": 0.5794103741645813, "learning_rate": 9.907858904979297e-06, "loss": 0.4431, "step": 5229 }, { "epoch": 0.2400073424808407, "grad_norm": 0.4709904193878174, "learning_rate": 9.907812045190563e-06, "loss": 0.4685, "step": 5230 }, { "epoch": 0.24005323298609518, "grad_norm": 0.4938027560710907, "learning_rate": 9.907765173600093e-06, "loss": 0.4597, "step": 5231 }, { "epoch": 0.24009912349134965, "grad_norm": 0.44817906618118286, "learning_rate": 9.907718290207997e-06, "loss": 0.3518, "step": 5232 }, { "epoch": 0.2401450139966041, "grad_norm": 0.4672658443450928, "learning_rate": 9.907671395014385e-06, "loss": 0.4, "step": 5233 }, { "epoch": 0.24019090450185857, "grad_norm": 0.48939913511276245, "learning_rate": 9.907624488019375e-06, "loss": 0.4524, "step": 5234 }, { "epoch": 0.24023679500711304, "grad_norm": 0.43702808022499084, "learning_rate": 9.907577569223075e-06, "loss": 0.3652, "step": 5235 }, { "epoch": 0.24028268551236748, "grad_norm": 0.47192588448524475, "learning_rate": 9.9075306386256e-06, "loss": 0.4474, "step": 5236 }, { "epoch": 0.24032857601762195, "grad_norm": 0.4448605179786682, "learning_rate": 9.907483696227062e-06, "loss": 0.3433, "step": 5237 }, { "epoch": 0.24037446652287642, "grad_norm": 0.45337679982185364, "learning_rate": 9.907436742027577e-06, "loss": 0.4256, "step": 5238 }, { "epoch": 0.2404203570281309, "grad_norm": 0.47401705384254456, "learning_rate": 9.907389776027254e-06, "loss": 0.4088, "step": 5239 }, { "epoch": 0.24046624753338533, "grad_norm": 0.4362351894378662, "learning_rate": 9.907342798226207e-06, "loss": 0.3789, "step": 5240 }, { "epoch": 0.2405121380386398, "grad_norm": 0.4289877712726593, "learning_rate": 9.90729580862455e-06, "loss": 0.3608, "step": 5241 }, { "epoch": 0.24055802854389427, "grad_norm": 0.470234215259552, "learning_rate": 9.907248807222396e-06, "loss": 0.4006, "step": 5242 }, { "epoch": 0.24060391904914874, "grad_norm": 0.46503838896751404, "learning_rate": 9.907201794019855e-06, "loss": 0.4155, "step": 5243 }, { "epoch": 0.24064980955440318, "grad_norm": 0.5531212687492371, "learning_rate": 9.907154769017045e-06, "loss": 0.4964, "step": 5244 }, { "epoch": 0.24069570005965765, "grad_norm": 0.46351608633995056, "learning_rate": 9.907107732214075e-06, "loss": 0.3674, "step": 5245 }, { "epoch": 0.24074159056491212, "grad_norm": 0.470418781042099, "learning_rate": 9.90706068361106e-06, "loss": 0.3869, "step": 5246 }, { "epoch": 0.2407874810701666, "grad_norm": 0.4828798174858093, "learning_rate": 9.907013623208112e-06, "loss": 0.4118, "step": 5247 }, { "epoch": 0.24083337157542103, "grad_norm": 0.5748075246810913, "learning_rate": 9.906966551005348e-06, "loss": 0.3887, "step": 5248 }, { "epoch": 0.2408792620806755, "grad_norm": 0.446044385433197, "learning_rate": 9.906919467002876e-06, "loss": 0.3558, "step": 5249 }, { "epoch": 0.24092515258592997, "grad_norm": 0.5182888507843018, "learning_rate": 9.90687237120081e-06, "loss": 0.4417, "step": 5250 }, { "epoch": 0.24097104309118444, "grad_norm": 0.4966528117656708, "learning_rate": 9.906825263599267e-06, "loss": 0.4883, "step": 5251 }, { "epoch": 0.2410169335964389, "grad_norm": 0.43832606077194214, "learning_rate": 9.906778144198357e-06, "loss": 0.3195, "step": 5252 }, { "epoch": 0.24106282410169336, "grad_norm": 0.5073176026344299, "learning_rate": 9.906731012998194e-06, "loss": 0.5308, "step": 5253 }, { "epoch": 0.24110871460694783, "grad_norm": 0.5341662764549255, "learning_rate": 9.906683869998892e-06, "loss": 0.4406, "step": 5254 }, { "epoch": 0.2411546051122023, "grad_norm": 0.42646026611328125, "learning_rate": 9.906636715200563e-06, "loss": 0.3218, "step": 5255 }, { "epoch": 0.24120049561745674, "grad_norm": 0.46547940373420715, "learning_rate": 9.906589548603322e-06, "loss": 0.4193, "step": 5256 }, { "epoch": 0.2412463861227112, "grad_norm": 0.46058008074760437, "learning_rate": 9.906542370207282e-06, "loss": 0.416, "step": 5257 }, { "epoch": 0.24129227662796568, "grad_norm": 0.562469482421875, "learning_rate": 9.906495180012554e-06, "loss": 0.5718, "step": 5258 }, { "epoch": 0.24133816713322015, "grad_norm": 0.4656757414340973, "learning_rate": 9.906447978019256e-06, "loss": 0.3832, "step": 5259 }, { "epoch": 0.2413840576384746, "grad_norm": 0.4760354161262512, "learning_rate": 9.906400764227496e-06, "loss": 0.3877, "step": 5260 }, { "epoch": 0.24142994814372906, "grad_norm": 0.4418363869190216, "learning_rate": 9.906353538637394e-06, "loss": 0.3989, "step": 5261 }, { "epoch": 0.24147583864898353, "grad_norm": 0.504486083984375, "learning_rate": 9.90630630124906e-06, "loss": 0.4498, "step": 5262 }, { "epoch": 0.241521729154238, "grad_norm": 0.43661609292030334, "learning_rate": 9.906259052062607e-06, "loss": 0.3419, "step": 5263 }, { "epoch": 0.24156761965949244, "grad_norm": 0.49924150109291077, "learning_rate": 9.90621179107815e-06, "loss": 0.4639, "step": 5264 }, { "epoch": 0.24161351016474691, "grad_norm": 0.4788973033428192, "learning_rate": 9.906164518295801e-06, "loss": 0.4391, "step": 5265 }, { "epoch": 0.24165940067000138, "grad_norm": 0.5613639950752258, "learning_rate": 9.906117233715675e-06, "loss": 0.5097, "step": 5266 }, { "epoch": 0.24170529117525583, "grad_norm": 0.4763454496860504, "learning_rate": 9.906069937337886e-06, "loss": 0.3964, "step": 5267 }, { "epoch": 0.2417511816805103, "grad_norm": 0.4812481105327606, "learning_rate": 9.906022629162548e-06, "loss": 0.4186, "step": 5268 }, { "epoch": 0.24179707218576477, "grad_norm": 0.45494502782821655, "learning_rate": 9.905975309189773e-06, "loss": 0.3889, "step": 5269 }, { "epoch": 0.24184296269101924, "grad_norm": 0.4880490303039551, "learning_rate": 9.905927977419676e-06, "loss": 0.3954, "step": 5270 }, { "epoch": 0.24188885319627368, "grad_norm": 0.4997728765010834, "learning_rate": 9.905880633852369e-06, "loss": 0.4384, "step": 5271 }, { "epoch": 0.24193474370152815, "grad_norm": 0.46307483315467834, "learning_rate": 9.905833278487968e-06, "loss": 0.3614, "step": 5272 }, { "epoch": 0.24198063420678262, "grad_norm": 0.41685691475868225, "learning_rate": 9.905785911326588e-06, "loss": 0.3399, "step": 5273 }, { "epoch": 0.2420265247120371, "grad_norm": 0.45186159014701843, "learning_rate": 9.90573853236834e-06, "loss": 0.4201, "step": 5274 }, { "epoch": 0.24207241521729153, "grad_norm": 0.4620940387248993, "learning_rate": 9.905691141613339e-06, "loss": 0.3656, "step": 5275 }, { "epoch": 0.242118305722546, "grad_norm": 0.4886024594306946, "learning_rate": 9.9056437390617e-06, "loss": 0.4279, "step": 5276 }, { "epoch": 0.24216419622780047, "grad_norm": 0.521808922290802, "learning_rate": 9.905596324713536e-06, "loss": 0.5023, "step": 5277 }, { "epoch": 0.24221008673305494, "grad_norm": 0.46379438042640686, "learning_rate": 9.90554889856896e-06, "loss": 0.3682, "step": 5278 }, { "epoch": 0.24225597723830938, "grad_norm": 0.4650763273239136, "learning_rate": 9.905501460628089e-06, "loss": 0.3743, "step": 5279 }, { "epoch": 0.24230186774356385, "grad_norm": 0.4888210892677307, "learning_rate": 9.905454010891034e-06, "loss": 0.4759, "step": 5280 }, { "epoch": 0.24234775824881832, "grad_norm": 0.46028730273246765, "learning_rate": 9.905406549357909e-06, "loss": 0.3395, "step": 5281 }, { "epoch": 0.2423936487540728, "grad_norm": 0.49124062061309814, "learning_rate": 9.905359076028832e-06, "loss": 0.418, "step": 5282 }, { "epoch": 0.24243953925932724, "grad_norm": 0.45967069268226624, "learning_rate": 9.905311590903913e-06, "loss": 0.4075, "step": 5283 }, { "epoch": 0.2424854297645817, "grad_norm": 0.48415204882621765, "learning_rate": 9.905264093983267e-06, "loss": 0.485, "step": 5284 }, { "epoch": 0.24253132026983618, "grad_norm": 0.4569302499294281, "learning_rate": 9.90521658526701e-06, "loss": 0.3483, "step": 5285 }, { "epoch": 0.24257721077509065, "grad_norm": 0.47105672955513, "learning_rate": 9.905169064755255e-06, "loss": 0.4113, "step": 5286 }, { "epoch": 0.2426231012803451, "grad_norm": 0.45132601261138916, "learning_rate": 9.905121532448116e-06, "loss": 0.354, "step": 5287 }, { "epoch": 0.24266899178559956, "grad_norm": 0.44951263070106506, "learning_rate": 9.905073988345711e-06, "loss": 0.3862, "step": 5288 }, { "epoch": 0.24271488229085403, "grad_norm": 0.46571624279022217, "learning_rate": 9.905026432448148e-06, "loss": 0.4024, "step": 5289 }, { "epoch": 0.2427607727961085, "grad_norm": 0.46681633591651917, "learning_rate": 9.904978864755544e-06, "loss": 0.4392, "step": 5290 }, { "epoch": 0.24280666330136294, "grad_norm": 0.48999905586242676, "learning_rate": 9.904931285268015e-06, "loss": 0.3859, "step": 5291 }, { "epoch": 0.2428525538066174, "grad_norm": 0.5016316771507263, "learning_rate": 9.904883693985675e-06, "loss": 0.4552, "step": 5292 }, { "epoch": 0.24289844431187188, "grad_norm": 0.4712497889995575, "learning_rate": 9.904836090908638e-06, "loss": 0.4361, "step": 5293 }, { "epoch": 0.24294433481712632, "grad_norm": 0.4656883478164673, "learning_rate": 9.904788476037017e-06, "loss": 0.3875, "step": 5294 }, { "epoch": 0.2429902253223808, "grad_norm": 0.4602237045764923, "learning_rate": 9.904740849370929e-06, "loss": 0.4351, "step": 5295 }, { "epoch": 0.24303611582763526, "grad_norm": 0.4638361930847168, "learning_rate": 9.904693210910485e-06, "loss": 0.4131, "step": 5296 }, { "epoch": 0.24308200633288973, "grad_norm": 0.4269849359989166, "learning_rate": 9.904645560655804e-06, "loss": 0.3709, "step": 5297 }, { "epoch": 0.24312789683814418, "grad_norm": 0.46226897835731506, "learning_rate": 9.904597898606996e-06, "loss": 0.3598, "step": 5298 }, { "epoch": 0.24317378734339865, "grad_norm": 0.49389010667800903, "learning_rate": 9.90455022476418e-06, "loss": 0.417, "step": 5299 }, { "epoch": 0.24321967784865312, "grad_norm": 0.44024893641471863, "learning_rate": 9.904502539127469e-06, "loss": 0.3864, "step": 5300 }, { "epoch": 0.24326556835390759, "grad_norm": 0.44661688804626465, "learning_rate": 9.904454841696976e-06, "loss": 0.3553, "step": 5301 }, { "epoch": 0.24331145885916203, "grad_norm": 0.4814205467700958, "learning_rate": 9.904407132472818e-06, "loss": 0.4291, "step": 5302 }, { "epoch": 0.2433573493644165, "grad_norm": 0.42537546157836914, "learning_rate": 9.904359411455108e-06, "loss": 0.3558, "step": 5303 }, { "epoch": 0.24340323986967097, "grad_norm": 0.48626184463500977, "learning_rate": 9.90431167864396e-06, "loss": 0.3951, "step": 5304 }, { "epoch": 0.24344913037492544, "grad_norm": 0.4893483817577362, "learning_rate": 9.904263934039494e-06, "loss": 0.4642, "step": 5305 }, { "epoch": 0.24349502088017988, "grad_norm": 0.45457032322883606, "learning_rate": 9.90421617764182e-06, "loss": 0.3421, "step": 5306 }, { "epoch": 0.24354091138543435, "grad_norm": 0.48694470524787903, "learning_rate": 9.904168409451051e-06, "loss": 0.4423, "step": 5307 }, { "epoch": 0.24358680189068882, "grad_norm": 0.48867183923721313, "learning_rate": 9.904120629467307e-06, "loss": 0.4286, "step": 5308 }, { "epoch": 0.2436326923959433, "grad_norm": 0.42153680324554443, "learning_rate": 9.9040728376907e-06, "loss": 0.3241, "step": 5309 }, { "epoch": 0.24367858290119773, "grad_norm": 0.45671263337135315, "learning_rate": 9.904025034121345e-06, "loss": 0.4572, "step": 5310 }, { "epoch": 0.2437244734064522, "grad_norm": 0.49713119864463806, "learning_rate": 9.90397721875936e-06, "loss": 0.4098, "step": 5311 }, { "epoch": 0.24377036391170667, "grad_norm": 0.4736146330833435, "learning_rate": 9.903929391604856e-06, "loss": 0.3934, "step": 5312 }, { "epoch": 0.24381625441696114, "grad_norm": 0.47610053420066833, "learning_rate": 9.903881552657949e-06, "loss": 0.438, "step": 5313 }, { "epoch": 0.24386214492221558, "grad_norm": 0.47151419520378113, "learning_rate": 9.903833701918755e-06, "loss": 0.3813, "step": 5314 }, { "epoch": 0.24390803542747005, "grad_norm": 0.5233833193778992, "learning_rate": 9.903785839387388e-06, "loss": 0.4563, "step": 5315 }, { "epoch": 0.24395392593272452, "grad_norm": 0.4568115770816803, "learning_rate": 9.903737965063966e-06, "loss": 0.426, "step": 5316 }, { "epoch": 0.243999816437979, "grad_norm": 0.4647948145866394, "learning_rate": 9.9036900789486e-06, "loss": 0.3909, "step": 5317 }, { "epoch": 0.24404570694323344, "grad_norm": 0.4955919086933136, "learning_rate": 9.903642181041406e-06, "loss": 0.5079, "step": 5318 }, { "epoch": 0.2440915974484879, "grad_norm": 0.49967601895332336, "learning_rate": 9.903594271342503e-06, "loss": 0.4424, "step": 5319 }, { "epoch": 0.24413748795374238, "grad_norm": 0.5169775485992432, "learning_rate": 9.903546349852002e-06, "loss": 0.4342, "step": 5320 }, { "epoch": 0.24418337845899685, "grad_norm": 0.4782702922821045, "learning_rate": 9.903498416570018e-06, "loss": 0.4022, "step": 5321 }, { "epoch": 0.2442292689642513, "grad_norm": 0.45459210872650146, "learning_rate": 9.903450471496671e-06, "loss": 0.4304, "step": 5322 }, { "epoch": 0.24427515946950576, "grad_norm": 0.478246808052063, "learning_rate": 9.903402514632072e-06, "loss": 0.4448, "step": 5323 }, { "epoch": 0.24432104997476023, "grad_norm": 0.44923147559165955, "learning_rate": 9.903354545976336e-06, "loss": 0.3719, "step": 5324 }, { "epoch": 0.24436694048001467, "grad_norm": 0.48403874039649963, "learning_rate": 9.90330656552958e-06, "loss": 0.438, "step": 5325 }, { "epoch": 0.24441283098526914, "grad_norm": 0.49797770380973816, "learning_rate": 9.90325857329192e-06, "loss": 0.5376, "step": 5326 }, { "epoch": 0.2444587214905236, "grad_norm": 0.4303591251373291, "learning_rate": 9.903210569263472e-06, "loss": 0.3172, "step": 5327 }, { "epoch": 0.24450461199577808, "grad_norm": 0.5149800777435303, "learning_rate": 9.90316255344435e-06, "loss": 0.5387, "step": 5328 }, { "epoch": 0.24455050250103252, "grad_norm": 0.46345871686935425, "learning_rate": 9.903114525834667e-06, "loss": 0.3917, "step": 5329 }, { "epoch": 0.244596393006287, "grad_norm": 0.4298897385597229, "learning_rate": 9.903066486434542e-06, "loss": 0.3343, "step": 5330 }, { "epoch": 0.24464228351154146, "grad_norm": 0.4684180021286011, "learning_rate": 9.903018435244091e-06, "loss": 0.4486, "step": 5331 }, { "epoch": 0.24468817401679593, "grad_norm": 0.5151140093803406, "learning_rate": 9.902970372263428e-06, "loss": 0.5135, "step": 5332 }, { "epoch": 0.24473406452205038, "grad_norm": 0.4247035086154938, "learning_rate": 9.902922297492667e-06, "loss": 0.3458, "step": 5333 }, { "epoch": 0.24477995502730485, "grad_norm": 0.44648730754852295, "learning_rate": 9.902874210931928e-06, "loss": 0.3905, "step": 5334 }, { "epoch": 0.24482584553255932, "grad_norm": 0.4940642714500427, "learning_rate": 9.902826112581321e-06, "loss": 0.4026, "step": 5335 }, { "epoch": 0.2448717360378138, "grad_norm": 0.5639809370040894, "learning_rate": 9.902778002440965e-06, "loss": 0.4083, "step": 5336 }, { "epoch": 0.24491762654306823, "grad_norm": 0.43036025762557983, "learning_rate": 9.902729880510978e-06, "loss": 0.3583, "step": 5337 }, { "epoch": 0.2449635170483227, "grad_norm": 0.460487425327301, "learning_rate": 9.902681746791469e-06, "loss": 0.3922, "step": 5338 }, { "epoch": 0.24500940755357717, "grad_norm": 0.48581740260124207, "learning_rate": 9.902633601282561e-06, "loss": 0.4569, "step": 5339 }, { "epoch": 0.24505529805883164, "grad_norm": 0.493158757686615, "learning_rate": 9.902585443984365e-06, "loss": 0.4104, "step": 5340 }, { "epoch": 0.24510118856408608, "grad_norm": 0.44886279106140137, "learning_rate": 9.902537274896998e-06, "loss": 0.4001, "step": 5341 }, { "epoch": 0.24514707906934055, "grad_norm": 0.4597589671611786, "learning_rate": 9.902489094020576e-06, "loss": 0.4364, "step": 5342 }, { "epoch": 0.24519296957459502, "grad_norm": 0.45080989599227905, "learning_rate": 9.902440901355216e-06, "loss": 0.417, "step": 5343 }, { "epoch": 0.2452388600798495, "grad_norm": 0.4921380877494812, "learning_rate": 9.902392696901031e-06, "loss": 0.4396, "step": 5344 }, { "epoch": 0.24528475058510393, "grad_norm": 0.4710673689842224, "learning_rate": 9.902344480658142e-06, "loss": 0.4282, "step": 5345 }, { "epoch": 0.2453306410903584, "grad_norm": 0.4434351325035095, "learning_rate": 9.902296252626659e-06, "loss": 0.3867, "step": 5346 }, { "epoch": 0.24537653159561287, "grad_norm": 0.5592572093009949, "learning_rate": 9.902248012806702e-06, "loss": 0.6233, "step": 5347 }, { "epoch": 0.24542242210086734, "grad_norm": 0.4374180734157562, "learning_rate": 9.902199761198386e-06, "loss": 0.3276, "step": 5348 }, { "epoch": 0.24546831260612179, "grad_norm": 0.4640013575553894, "learning_rate": 9.902151497801826e-06, "loss": 0.3984, "step": 5349 }, { "epoch": 0.24551420311137626, "grad_norm": 0.6440848708152771, "learning_rate": 9.90210322261714e-06, "loss": 0.5029, "step": 5350 }, { "epoch": 0.24556009361663073, "grad_norm": 0.43278902769088745, "learning_rate": 9.90205493564444e-06, "loss": 0.3086, "step": 5351 }, { "epoch": 0.2456059841218852, "grad_norm": 0.5207533240318298, "learning_rate": 9.902006636883847e-06, "loss": 0.5387, "step": 5352 }, { "epoch": 0.24565187462713964, "grad_norm": 0.48598361015319824, "learning_rate": 9.901958326335475e-06, "loss": 0.4106, "step": 5353 }, { "epoch": 0.2456977651323941, "grad_norm": 0.5017309188842773, "learning_rate": 9.901910003999442e-06, "loss": 0.4588, "step": 5354 }, { "epoch": 0.24574365563764858, "grad_norm": 0.4472849667072296, "learning_rate": 9.901861669875861e-06, "loss": 0.3667, "step": 5355 }, { "epoch": 0.24578954614290302, "grad_norm": 0.5211498737335205, "learning_rate": 9.901813323964848e-06, "loss": 0.5066, "step": 5356 }, { "epoch": 0.2458354366481575, "grad_norm": 0.4777042865753174, "learning_rate": 9.901764966266523e-06, "loss": 0.4603, "step": 5357 }, { "epoch": 0.24588132715341196, "grad_norm": 0.4984428584575653, "learning_rate": 9.901716596781001e-06, "loss": 0.4901, "step": 5358 }, { "epoch": 0.24592721765866643, "grad_norm": 0.45887160301208496, "learning_rate": 9.901668215508397e-06, "loss": 0.3685, "step": 5359 }, { "epoch": 0.24597310816392087, "grad_norm": 0.46002840995788574, "learning_rate": 9.901619822448826e-06, "loss": 0.4, "step": 5360 }, { "epoch": 0.24601899866917534, "grad_norm": 0.5057162642478943, "learning_rate": 9.90157141760241e-06, "loss": 0.4418, "step": 5361 }, { "epoch": 0.2460648891744298, "grad_norm": 0.739828884601593, "learning_rate": 9.90152300096926e-06, "loss": 0.4511, "step": 5362 }, { "epoch": 0.24611077967968428, "grad_norm": 0.43112438917160034, "learning_rate": 9.901474572549492e-06, "loss": 0.3474, "step": 5363 }, { "epoch": 0.24615667018493873, "grad_norm": 0.490246444940567, "learning_rate": 9.901426132343227e-06, "loss": 0.3951, "step": 5364 }, { "epoch": 0.2462025606901932, "grad_norm": 0.4832803010940552, "learning_rate": 9.901377680350579e-06, "loss": 0.4264, "step": 5365 }, { "epoch": 0.24624845119544767, "grad_norm": 0.46472403407096863, "learning_rate": 9.901329216571663e-06, "loss": 0.4087, "step": 5366 }, { "epoch": 0.24629434170070214, "grad_norm": 0.4616561830043793, "learning_rate": 9.901280741006597e-06, "loss": 0.392, "step": 5367 }, { "epoch": 0.24634023220595658, "grad_norm": 0.48619821667671204, "learning_rate": 9.9012322536555e-06, "loss": 0.4162, "step": 5368 }, { "epoch": 0.24638612271121105, "grad_norm": 0.46516546607017517, "learning_rate": 9.901183754518487e-06, "loss": 0.3789, "step": 5369 }, { "epoch": 0.24643201321646552, "grad_norm": 0.4606388509273529, "learning_rate": 9.90113524359567e-06, "loss": 0.3692, "step": 5370 }, { "epoch": 0.24647790372172, "grad_norm": 0.49570757150650024, "learning_rate": 9.901086720887172e-06, "loss": 0.4847, "step": 5371 }, { "epoch": 0.24652379422697443, "grad_norm": 0.4897167682647705, "learning_rate": 9.901038186393108e-06, "loss": 0.3975, "step": 5372 }, { "epoch": 0.2465696847322289, "grad_norm": 0.5077126622200012, "learning_rate": 9.900989640113592e-06, "loss": 0.524, "step": 5373 }, { "epoch": 0.24661557523748337, "grad_norm": 0.45957979559898376, "learning_rate": 9.900941082048743e-06, "loss": 0.3744, "step": 5374 }, { "epoch": 0.24666146574273784, "grad_norm": 0.41891229152679443, "learning_rate": 9.900892512198679e-06, "loss": 0.3375, "step": 5375 }, { "epoch": 0.24670735624799228, "grad_norm": 0.46823734045028687, "learning_rate": 9.900843930563516e-06, "loss": 0.4704, "step": 5376 }, { "epoch": 0.24675324675324675, "grad_norm": 0.46074435114860535, "learning_rate": 9.900795337143367e-06, "loss": 0.3884, "step": 5377 }, { "epoch": 0.24679913725850122, "grad_norm": 0.4602743685245514, "learning_rate": 9.900746731938353e-06, "loss": 0.359, "step": 5378 }, { "epoch": 0.2468450277637557, "grad_norm": 0.43587926030158997, "learning_rate": 9.900698114948591e-06, "loss": 0.3424, "step": 5379 }, { "epoch": 0.24689091826901013, "grad_norm": 0.4460495114326477, "learning_rate": 9.900649486174196e-06, "loss": 0.3973, "step": 5380 }, { "epoch": 0.2469368087742646, "grad_norm": 0.42193976044654846, "learning_rate": 9.900600845615287e-06, "loss": 0.2964, "step": 5381 }, { "epoch": 0.24698269927951907, "grad_norm": 0.5385944247245789, "learning_rate": 9.900552193271977e-06, "loss": 0.5448, "step": 5382 }, { "epoch": 0.24702858978477352, "grad_norm": 0.43305185437202454, "learning_rate": 9.900503529144387e-06, "loss": 0.3894, "step": 5383 }, { "epoch": 0.247074480290028, "grad_norm": 0.44950735569000244, "learning_rate": 9.900454853232632e-06, "loss": 0.3361, "step": 5384 }, { "epoch": 0.24712037079528246, "grad_norm": 0.4558485746383667, "learning_rate": 9.90040616553683e-06, "loss": 0.4135, "step": 5385 }, { "epoch": 0.24716626130053693, "grad_norm": 0.4376548230648041, "learning_rate": 9.9003574660571e-06, "loss": 0.3542, "step": 5386 }, { "epoch": 0.24721215180579137, "grad_norm": 0.45413148403167725, "learning_rate": 9.900308754793556e-06, "loss": 0.3734, "step": 5387 }, { "epoch": 0.24725804231104584, "grad_norm": 0.4224342703819275, "learning_rate": 9.900260031746313e-06, "loss": 0.3484, "step": 5388 }, { "epoch": 0.2473039328163003, "grad_norm": 0.46938297152519226, "learning_rate": 9.900211296915494e-06, "loss": 0.3974, "step": 5389 }, { "epoch": 0.24734982332155478, "grad_norm": 0.5534500479698181, "learning_rate": 9.900162550301212e-06, "loss": 0.5464, "step": 5390 }, { "epoch": 0.24739571382680922, "grad_norm": 0.4902797341346741, "learning_rate": 9.900113791903587e-06, "loss": 0.4971, "step": 5391 }, { "epoch": 0.2474416043320637, "grad_norm": 0.4419653117656708, "learning_rate": 9.900065021722734e-06, "loss": 0.3469, "step": 5392 }, { "epoch": 0.24748749483731816, "grad_norm": 0.4650265574455261, "learning_rate": 9.900016239758771e-06, "loss": 0.3608, "step": 5393 }, { "epoch": 0.24753338534257263, "grad_norm": 0.5291844606399536, "learning_rate": 9.899967446011815e-06, "loss": 0.4684, "step": 5394 }, { "epoch": 0.24757927584782707, "grad_norm": 0.42976513504981995, "learning_rate": 9.899918640481985e-06, "loss": 0.3599, "step": 5395 }, { "epoch": 0.24762516635308154, "grad_norm": 0.46596038341522217, "learning_rate": 9.899869823169397e-06, "loss": 0.4153, "step": 5396 }, { "epoch": 0.24767105685833601, "grad_norm": 0.48037806153297424, "learning_rate": 9.899820994074168e-06, "loss": 0.4672, "step": 5397 }, { "epoch": 0.24771694736359048, "grad_norm": 0.4876294732093811, "learning_rate": 9.899772153196415e-06, "loss": 0.4749, "step": 5398 }, { "epoch": 0.24776283786884493, "grad_norm": 0.4756849408149719, "learning_rate": 9.899723300536257e-06, "loss": 0.4704, "step": 5399 }, { "epoch": 0.2478087283740994, "grad_norm": 0.5220165848731995, "learning_rate": 9.899674436093812e-06, "loss": 0.538, "step": 5400 }, { "epoch": 0.24785461887935387, "grad_norm": 0.4860324263572693, "learning_rate": 9.899625559869194e-06, "loss": 0.4719, "step": 5401 }, { "epoch": 0.24790050938460834, "grad_norm": 0.45706814527511597, "learning_rate": 9.899576671862526e-06, "loss": 0.3734, "step": 5402 }, { "epoch": 0.24794639988986278, "grad_norm": 0.4765598773956299, "learning_rate": 9.89952777207392e-06, "loss": 0.4416, "step": 5403 }, { "epoch": 0.24799229039511725, "grad_norm": 0.46519675850868225, "learning_rate": 9.899478860503497e-06, "loss": 0.3998, "step": 5404 }, { "epoch": 0.24803818090037172, "grad_norm": 0.4826446771621704, "learning_rate": 9.899429937151374e-06, "loss": 0.4689, "step": 5405 }, { "epoch": 0.2480840714056262, "grad_norm": 0.46294093132019043, "learning_rate": 9.89938100201767e-06, "loss": 0.4155, "step": 5406 }, { "epoch": 0.24812996191088063, "grad_norm": 0.44682589173316956, "learning_rate": 9.899332055102498e-06, "loss": 0.313, "step": 5407 }, { "epoch": 0.2481758524161351, "grad_norm": 0.4746490716934204, "learning_rate": 9.899283096405979e-06, "loss": 0.4808, "step": 5408 }, { "epoch": 0.24822174292138957, "grad_norm": 0.4911626875400543, "learning_rate": 9.899234125928232e-06, "loss": 0.4485, "step": 5409 }, { "epoch": 0.24826763342664404, "grad_norm": 0.48363322019577026, "learning_rate": 9.899185143669372e-06, "loss": 0.4176, "step": 5410 }, { "epoch": 0.24831352393189848, "grad_norm": 0.49399086833000183, "learning_rate": 9.899136149629519e-06, "loss": 0.3879, "step": 5411 }, { "epoch": 0.24835941443715295, "grad_norm": 0.5003026723861694, "learning_rate": 9.899087143808787e-06, "loss": 0.4122, "step": 5412 }, { "epoch": 0.24840530494240742, "grad_norm": 0.43921470642089844, "learning_rate": 9.8990381262073e-06, "loss": 0.3523, "step": 5413 }, { "epoch": 0.24845119544766187, "grad_norm": 0.5052942037582397, "learning_rate": 9.898989096825171e-06, "loss": 0.4764, "step": 5414 }, { "epoch": 0.24849708595291634, "grad_norm": 0.4983556866645813, "learning_rate": 9.898940055662518e-06, "loss": 0.5089, "step": 5415 }, { "epoch": 0.2485429764581708, "grad_norm": 0.5024328231811523, "learning_rate": 9.898891002719462e-06, "loss": 0.5261, "step": 5416 }, { "epoch": 0.24858886696342528, "grad_norm": 0.4489428699016571, "learning_rate": 9.89884193799612e-06, "loss": 0.376, "step": 5417 }, { "epoch": 0.24863475746867972, "grad_norm": 0.4745553433895111, "learning_rate": 9.898792861492608e-06, "loss": 0.436, "step": 5418 }, { "epoch": 0.2486806479739342, "grad_norm": 0.43664437532424927, "learning_rate": 9.898743773209045e-06, "loss": 0.3235, "step": 5419 }, { "epoch": 0.24872653847918866, "grad_norm": 0.4805891811847687, "learning_rate": 9.898694673145551e-06, "loss": 0.4153, "step": 5420 }, { "epoch": 0.24877242898444313, "grad_norm": 0.4661649465560913, "learning_rate": 9.89864556130224e-06, "loss": 0.386, "step": 5421 }, { "epoch": 0.24881831948969757, "grad_norm": 0.46313953399658203, "learning_rate": 9.898596437679233e-06, "loss": 0.433, "step": 5422 }, { "epoch": 0.24886420999495204, "grad_norm": 0.46011000871658325, "learning_rate": 9.898547302276649e-06, "loss": 0.4261, "step": 5423 }, { "epoch": 0.2489101005002065, "grad_norm": 0.4639430344104767, "learning_rate": 9.898498155094605e-06, "loss": 0.3763, "step": 5424 }, { "epoch": 0.24895599100546098, "grad_norm": 0.4886471629142761, "learning_rate": 9.898448996133217e-06, "loss": 0.4665, "step": 5425 }, { "epoch": 0.24900188151071542, "grad_norm": 0.4836229085922241, "learning_rate": 9.898399825392606e-06, "loss": 0.401, "step": 5426 }, { "epoch": 0.2490477720159699, "grad_norm": 0.4913848042488098, "learning_rate": 9.89835064287289e-06, "loss": 0.449, "step": 5427 }, { "epoch": 0.24909366252122436, "grad_norm": 0.45920923352241516, "learning_rate": 9.898301448574186e-06, "loss": 0.368, "step": 5428 }, { "epoch": 0.24913955302647883, "grad_norm": 0.38684335350990295, "learning_rate": 9.898252242496614e-06, "loss": 0.2848, "step": 5429 }, { "epoch": 0.24918544353173327, "grad_norm": 0.44047191739082336, "learning_rate": 9.898203024640291e-06, "loss": 0.3122, "step": 5430 }, { "epoch": 0.24923133403698775, "grad_norm": 0.5357457995414734, "learning_rate": 9.898153795005333e-06, "loss": 0.5125, "step": 5431 }, { "epoch": 0.24927722454224222, "grad_norm": 0.483256459236145, "learning_rate": 9.898104553591866e-06, "loss": 0.4326, "step": 5432 }, { "epoch": 0.24932311504749669, "grad_norm": 0.4657694399356842, "learning_rate": 9.898055300399999e-06, "loss": 0.4537, "step": 5433 }, { "epoch": 0.24936900555275113, "grad_norm": 0.4349009096622467, "learning_rate": 9.898006035429857e-06, "loss": 0.3362, "step": 5434 }, { "epoch": 0.2494148960580056, "grad_norm": 0.4730532467365265, "learning_rate": 9.897956758681555e-06, "loss": 0.4016, "step": 5435 }, { "epoch": 0.24946078656326007, "grad_norm": 0.45093175768852234, "learning_rate": 9.897907470155215e-06, "loss": 0.3748, "step": 5436 }, { "epoch": 0.24950667706851454, "grad_norm": 0.4878894090652466, "learning_rate": 9.897858169850952e-06, "loss": 0.4483, "step": 5437 }, { "epoch": 0.24955256757376898, "grad_norm": 0.441736102104187, "learning_rate": 9.897808857768885e-06, "loss": 0.3487, "step": 5438 }, { "epoch": 0.24959845807902345, "grad_norm": 0.4571661949157715, "learning_rate": 9.897759533909134e-06, "loss": 0.3553, "step": 5439 }, { "epoch": 0.24964434858427792, "grad_norm": 0.47505679726600647, "learning_rate": 9.897710198271818e-06, "loss": 0.4569, "step": 5440 }, { "epoch": 0.24969023908953236, "grad_norm": 0.4656061828136444, "learning_rate": 9.897660850857054e-06, "loss": 0.4285, "step": 5441 }, { "epoch": 0.24973612959478683, "grad_norm": 0.4578312337398529, "learning_rate": 9.897611491664962e-06, "loss": 0.3531, "step": 5442 }, { "epoch": 0.2497820201000413, "grad_norm": 0.44951313734054565, "learning_rate": 9.89756212069566e-06, "loss": 0.3796, "step": 5443 }, { "epoch": 0.24982791060529577, "grad_norm": 0.5055868625640869, "learning_rate": 9.897512737949267e-06, "loss": 0.4526, "step": 5444 }, { "epoch": 0.24987380111055021, "grad_norm": 0.48434147238731384, "learning_rate": 9.8974633434259e-06, "loss": 0.4565, "step": 5445 }, { "epoch": 0.24991969161580468, "grad_norm": 0.49553877115249634, "learning_rate": 9.897413937125682e-06, "loss": 0.3681, "step": 5446 }, { "epoch": 0.24996558212105915, "grad_norm": 0.5233808159828186, "learning_rate": 9.897364519048726e-06, "loss": 0.3203, "step": 5447 }, { "epoch": 0.2500114726263136, "grad_norm": 0.47207024693489075, "learning_rate": 9.897315089195156e-06, "loss": 0.4163, "step": 5448 }, { "epoch": 0.25005736313156807, "grad_norm": 0.467523992061615, "learning_rate": 9.89726564756509e-06, "loss": 0.4103, "step": 5449 }, { "epoch": 0.25010325363682256, "grad_norm": 0.5478648543357849, "learning_rate": 9.897216194158643e-06, "loss": 0.4499, "step": 5450 }, { "epoch": 0.250149144142077, "grad_norm": 0.4833225607872009, "learning_rate": 9.897166728975937e-06, "loss": 0.4268, "step": 5451 }, { "epoch": 0.25019503464733145, "grad_norm": 0.4996795952320099, "learning_rate": 9.897117252017091e-06, "loss": 0.4351, "step": 5452 }, { "epoch": 0.25024092515258595, "grad_norm": 0.4672998785972595, "learning_rate": 9.897067763282226e-06, "loss": 0.3898, "step": 5453 }, { "epoch": 0.2502868156578404, "grad_norm": 0.4596433639526367, "learning_rate": 9.897018262771456e-06, "loss": 0.3618, "step": 5454 }, { "epoch": 0.25033270616309483, "grad_norm": 0.44598808884620667, "learning_rate": 9.896968750484902e-06, "loss": 0.3583, "step": 5455 }, { "epoch": 0.25037859666834933, "grad_norm": 0.48249292373657227, "learning_rate": 9.896919226422685e-06, "loss": 0.4144, "step": 5456 }, { "epoch": 0.25042448717360377, "grad_norm": 0.4608473479747772, "learning_rate": 9.896869690584924e-06, "loss": 0.415, "step": 5457 }, { "epoch": 0.25047037767885827, "grad_norm": 0.5003462433815002, "learning_rate": 9.896820142971734e-06, "loss": 0.4949, "step": 5458 }, { "epoch": 0.2505162681841127, "grad_norm": 0.4707450568675995, "learning_rate": 9.89677058358324e-06, "loss": 0.4355, "step": 5459 }, { "epoch": 0.25056215868936715, "grad_norm": 0.47938671708106995, "learning_rate": 9.896721012419557e-06, "loss": 0.4784, "step": 5460 }, { "epoch": 0.25060804919462165, "grad_norm": 0.49174219369888306, "learning_rate": 9.896671429480806e-06, "loss": 0.5469, "step": 5461 }, { "epoch": 0.2506539396998761, "grad_norm": 0.4595566391944885, "learning_rate": 9.896621834767104e-06, "loss": 0.3622, "step": 5462 }, { "epoch": 0.25069983020513054, "grad_norm": 0.5035466551780701, "learning_rate": 9.896572228278574e-06, "loss": 0.4716, "step": 5463 }, { "epoch": 0.25074572071038503, "grad_norm": 0.4592345952987671, "learning_rate": 9.896522610015332e-06, "loss": 0.3313, "step": 5464 }, { "epoch": 0.2507916112156395, "grad_norm": 0.4709053635597229, "learning_rate": 9.896472979977498e-06, "loss": 0.3501, "step": 5465 }, { "epoch": 0.250837501720894, "grad_norm": 0.5008472204208374, "learning_rate": 9.896423338165192e-06, "loss": 0.4512, "step": 5466 }, { "epoch": 0.2508833922261484, "grad_norm": 0.5236533284187317, "learning_rate": 9.896373684578533e-06, "loss": 0.4884, "step": 5467 }, { "epoch": 0.25092928273140286, "grad_norm": 0.514059841632843, "learning_rate": 9.896324019217642e-06, "loss": 0.5125, "step": 5468 }, { "epoch": 0.25097517323665736, "grad_norm": 0.449461430311203, "learning_rate": 9.896274342082637e-06, "loss": 0.3511, "step": 5469 }, { "epoch": 0.2510210637419118, "grad_norm": 0.5872984528541565, "learning_rate": 9.896224653173636e-06, "loss": 0.5086, "step": 5470 }, { "epoch": 0.25106695424716624, "grad_norm": 0.510627269744873, "learning_rate": 9.89617495249076e-06, "loss": 0.4619, "step": 5471 }, { "epoch": 0.25111284475242074, "grad_norm": 0.5474761128425598, "learning_rate": 9.89612524003413e-06, "loss": 0.5013, "step": 5472 }, { "epoch": 0.2511587352576752, "grad_norm": 0.6970511674880981, "learning_rate": 9.896075515803864e-06, "loss": 0.524, "step": 5473 }, { "epoch": 0.2512046257629296, "grad_norm": 0.4522456228733063, "learning_rate": 9.89602577980008e-06, "loss": 0.3851, "step": 5474 }, { "epoch": 0.2512505162681841, "grad_norm": 0.4794396162033081, "learning_rate": 9.8959760320229e-06, "loss": 0.3983, "step": 5475 }, { "epoch": 0.25129640677343856, "grad_norm": 0.4617340564727783, "learning_rate": 9.895926272472444e-06, "loss": 0.4012, "step": 5476 }, { "epoch": 0.25134229727869306, "grad_norm": 0.49735814332962036, "learning_rate": 9.89587650114883e-06, "loss": 0.4215, "step": 5477 }, { "epoch": 0.2513881877839475, "grad_norm": 0.49297571182250977, "learning_rate": 9.895826718052178e-06, "loss": 0.4557, "step": 5478 }, { "epoch": 0.25143407828920195, "grad_norm": 0.5293284058570862, "learning_rate": 9.895776923182607e-06, "loss": 0.5061, "step": 5479 }, { "epoch": 0.25147996879445644, "grad_norm": 0.4811141788959503, "learning_rate": 9.895727116540239e-06, "loss": 0.3875, "step": 5480 }, { "epoch": 0.2515258592997109, "grad_norm": 0.48295044898986816, "learning_rate": 9.895677298125192e-06, "loss": 0.3987, "step": 5481 }, { "epoch": 0.25157174980496533, "grad_norm": 0.516815185546875, "learning_rate": 9.895627467937585e-06, "loss": 0.4116, "step": 5482 }, { "epoch": 0.2516176403102198, "grad_norm": 0.44515928626060486, "learning_rate": 9.89557762597754e-06, "loss": 0.3499, "step": 5483 }, { "epoch": 0.25166353081547427, "grad_norm": 0.43094825744628906, "learning_rate": 9.895527772245177e-06, "loss": 0.3346, "step": 5484 }, { "epoch": 0.25170942132072877, "grad_norm": 0.4834476411342621, "learning_rate": 9.895477906740614e-06, "loss": 0.4147, "step": 5485 }, { "epoch": 0.2517553118259832, "grad_norm": 0.45588424801826477, "learning_rate": 9.89542802946397e-06, "loss": 0.3709, "step": 5486 }, { "epoch": 0.25180120233123765, "grad_norm": 0.47425857186317444, "learning_rate": 9.895378140415369e-06, "loss": 0.4195, "step": 5487 }, { "epoch": 0.25184709283649215, "grad_norm": 0.4501340985298157, "learning_rate": 9.895328239594928e-06, "loss": 0.3498, "step": 5488 }, { "epoch": 0.2518929833417466, "grad_norm": 0.4549058675765991, "learning_rate": 9.895278327002766e-06, "loss": 0.4422, "step": 5489 }, { "epoch": 0.25193887384700103, "grad_norm": 0.457581102848053, "learning_rate": 9.895228402639005e-06, "loss": 0.3769, "step": 5490 }, { "epoch": 0.25198476435225553, "grad_norm": 0.43253082036972046, "learning_rate": 9.895178466503768e-06, "loss": 0.3287, "step": 5491 }, { "epoch": 0.25203065485751, "grad_norm": 0.45106038451194763, "learning_rate": 9.895128518597168e-06, "loss": 0.3674, "step": 5492 }, { "epoch": 0.25207654536276447, "grad_norm": 0.4881967008113861, "learning_rate": 9.895078558919331e-06, "loss": 0.459, "step": 5493 }, { "epoch": 0.2521224358680189, "grad_norm": 0.48361441493034363, "learning_rate": 9.895028587470374e-06, "loss": 0.4003, "step": 5494 }, { "epoch": 0.25216832637327335, "grad_norm": 0.5004562735557556, "learning_rate": 9.894978604250419e-06, "loss": 0.4629, "step": 5495 }, { "epoch": 0.25221421687852785, "grad_norm": 0.4870319962501526, "learning_rate": 9.894928609259585e-06, "loss": 0.4475, "step": 5496 }, { "epoch": 0.2522601073837823, "grad_norm": 0.4632299244403839, "learning_rate": 9.89487860249799e-06, "loss": 0.3918, "step": 5497 }, { "epoch": 0.25230599788903674, "grad_norm": 0.46857255697250366, "learning_rate": 9.89482858396576e-06, "loss": 0.3941, "step": 5498 }, { "epoch": 0.25235188839429123, "grad_norm": 0.7058764100074768, "learning_rate": 9.894778553663012e-06, "loss": 0.4038, "step": 5499 }, { "epoch": 0.2523977788995457, "grad_norm": 0.4546481966972351, "learning_rate": 9.894728511589864e-06, "loss": 0.4424, "step": 5500 }, { "epoch": 0.2524436694048001, "grad_norm": 0.47095194458961487, "learning_rate": 9.89467845774644e-06, "loss": 0.3491, "step": 5501 }, { "epoch": 0.2524895599100546, "grad_norm": 0.5268166065216064, "learning_rate": 9.89462839213286e-06, "loss": 0.4758, "step": 5502 }, { "epoch": 0.25253545041530906, "grad_norm": 0.4952910542488098, "learning_rate": 9.894578314749244e-06, "loss": 0.4379, "step": 5503 }, { "epoch": 0.25258134092056356, "grad_norm": 0.46238887310028076, "learning_rate": 9.894528225595709e-06, "loss": 0.356, "step": 5504 }, { "epoch": 0.252627231425818, "grad_norm": 0.46178463101387024, "learning_rate": 9.894478124672382e-06, "loss": 0.3836, "step": 5505 }, { "epoch": 0.25267312193107244, "grad_norm": 0.49608203768730164, "learning_rate": 9.894428011979378e-06, "loss": 0.4863, "step": 5506 }, { "epoch": 0.25271901243632694, "grad_norm": 0.4876364469528198, "learning_rate": 9.89437788751682e-06, "loss": 0.3791, "step": 5507 }, { "epoch": 0.2527649029415814, "grad_norm": 0.4905228614807129, "learning_rate": 9.894327751284827e-06, "loss": 0.4258, "step": 5508 }, { "epoch": 0.2528107934468358, "grad_norm": 0.46615681052207947, "learning_rate": 9.89427760328352e-06, "loss": 0.4357, "step": 5509 }, { "epoch": 0.2528566839520903, "grad_norm": 0.5354674458503723, "learning_rate": 9.89422744351302e-06, "loss": 0.4851, "step": 5510 }, { "epoch": 0.25290257445734476, "grad_norm": 0.4339059889316559, "learning_rate": 9.89417727197345e-06, "loss": 0.3217, "step": 5511 }, { "epoch": 0.25294846496259926, "grad_norm": 0.4238944947719574, "learning_rate": 9.894127088664927e-06, "loss": 0.3389, "step": 5512 }, { "epoch": 0.2529943554678537, "grad_norm": 0.5029617547988892, "learning_rate": 9.894076893587572e-06, "loss": 0.4502, "step": 5513 }, { "epoch": 0.25304024597310815, "grad_norm": 0.4667750298976898, "learning_rate": 9.894026686741508e-06, "loss": 0.3406, "step": 5514 }, { "epoch": 0.25308613647836264, "grad_norm": 0.4833560883998871, "learning_rate": 9.893976468126853e-06, "loss": 0.4507, "step": 5515 }, { "epoch": 0.2531320269836171, "grad_norm": 0.4052206575870514, "learning_rate": 9.893926237743732e-06, "loss": 0.3219, "step": 5516 }, { "epoch": 0.25317791748887153, "grad_norm": 0.4374043345451355, "learning_rate": 9.893875995592259e-06, "loss": 0.3163, "step": 5517 }, { "epoch": 0.253223807994126, "grad_norm": 0.5027537941932678, "learning_rate": 9.893825741672562e-06, "loss": 0.4739, "step": 5518 }, { "epoch": 0.25326969849938047, "grad_norm": 0.5046042799949646, "learning_rate": 9.893775475984757e-06, "loss": 0.4676, "step": 5519 }, { "epoch": 0.25331558900463497, "grad_norm": 0.5012478828430176, "learning_rate": 9.893725198528968e-06, "loss": 0.3771, "step": 5520 }, { "epoch": 0.2533614795098894, "grad_norm": 0.45714667439460754, "learning_rate": 9.893674909305312e-06, "loss": 0.4008, "step": 5521 }, { "epoch": 0.25340737001514385, "grad_norm": 0.44811463356018066, "learning_rate": 9.893624608313915e-06, "loss": 0.37, "step": 5522 }, { "epoch": 0.25345326052039835, "grad_norm": 0.4998549520969391, "learning_rate": 9.893574295554893e-06, "loss": 0.414, "step": 5523 }, { "epoch": 0.2534991510256528, "grad_norm": 0.5043789744377136, "learning_rate": 9.893523971028371e-06, "loss": 0.4726, "step": 5524 }, { "epoch": 0.25354504153090723, "grad_norm": 0.4747314751148224, "learning_rate": 9.893473634734467e-06, "loss": 0.4017, "step": 5525 }, { "epoch": 0.25359093203616173, "grad_norm": 0.4761166274547577, "learning_rate": 9.893423286673306e-06, "loss": 0.407, "step": 5526 }, { "epoch": 0.2536368225414162, "grad_norm": 0.44375160336494446, "learning_rate": 9.893372926845002e-06, "loss": 0.3302, "step": 5527 }, { "epoch": 0.25368271304667067, "grad_norm": 0.43359482288360596, "learning_rate": 9.893322555249683e-06, "loss": 0.3575, "step": 5528 }, { "epoch": 0.2537286035519251, "grad_norm": 0.45954516530036926, "learning_rate": 9.893272171887467e-06, "loss": 0.3575, "step": 5529 }, { "epoch": 0.25377449405717956, "grad_norm": 0.4840056300163269, "learning_rate": 9.893221776758477e-06, "loss": 0.4109, "step": 5530 }, { "epoch": 0.25382038456243405, "grad_norm": 0.49138200283050537, "learning_rate": 9.893171369862833e-06, "loss": 0.46, "step": 5531 }, { "epoch": 0.2538662750676885, "grad_norm": 0.474150687456131, "learning_rate": 9.893120951200654e-06, "loss": 0.3854, "step": 5532 }, { "epoch": 0.25391216557294294, "grad_norm": 0.4818645119667053, "learning_rate": 9.893070520772066e-06, "loss": 0.496, "step": 5533 }, { "epoch": 0.25395805607819744, "grad_norm": 0.44986072182655334, "learning_rate": 9.893020078577186e-06, "loss": 0.3641, "step": 5534 }, { "epoch": 0.2540039465834519, "grad_norm": 0.4712718427181244, "learning_rate": 9.892969624616138e-06, "loss": 0.4439, "step": 5535 }, { "epoch": 0.2540498370887063, "grad_norm": 0.4735841751098633, "learning_rate": 9.89291915888904e-06, "loss": 0.4061, "step": 5536 }, { "epoch": 0.2540957275939608, "grad_norm": 0.46155858039855957, "learning_rate": 9.89286868139602e-06, "loss": 0.3522, "step": 5537 }, { "epoch": 0.25414161809921526, "grad_norm": 0.5060663819313049, "learning_rate": 9.892818192137192e-06, "loss": 0.485, "step": 5538 }, { "epoch": 0.25418750860446976, "grad_norm": 0.5239194631576538, "learning_rate": 9.89276769111268e-06, "loss": 0.5012, "step": 5539 }, { "epoch": 0.2542333991097242, "grad_norm": 0.48788994550704956, "learning_rate": 9.892717178322606e-06, "loss": 0.4553, "step": 5540 }, { "epoch": 0.25427928961497864, "grad_norm": 1.0392351150512695, "learning_rate": 9.892666653767093e-06, "loss": 0.3119, "step": 5541 }, { "epoch": 0.25432518012023314, "grad_norm": 0.4582270681858063, "learning_rate": 9.892616117446261e-06, "loss": 0.4056, "step": 5542 }, { "epoch": 0.2543710706254876, "grad_norm": 0.4743838608264923, "learning_rate": 9.892565569360231e-06, "loss": 0.4256, "step": 5543 }, { "epoch": 0.254416961130742, "grad_norm": 0.60857754945755, "learning_rate": 9.892515009509122e-06, "loss": 0.4651, "step": 5544 }, { "epoch": 0.2544628516359965, "grad_norm": 0.4752808213233948, "learning_rate": 9.892464437893063e-06, "loss": 0.3833, "step": 5545 }, { "epoch": 0.25450874214125097, "grad_norm": 0.4716651737689972, "learning_rate": 9.892413854512168e-06, "loss": 0.3562, "step": 5546 }, { "epoch": 0.25455463264650546, "grad_norm": 0.44940945506095886, "learning_rate": 9.892363259366565e-06, "loss": 0.3903, "step": 5547 }, { "epoch": 0.2546005231517599, "grad_norm": 0.44334253668785095, "learning_rate": 9.892312652456369e-06, "loss": 0.3756, "step": 5548 }, { "epoch": 0.25464641365701435, "grad_norm": 0.4882247745990753, "learning_rate": 9.892262033781706e-06, "loss": 0.4503, "step": 5549 }, { "epoch": 0.25469230416226885, "grad_norm": 0.47684043645858765, "learning_rate": 9.892211403342697e-06, "loss": 0.386, "step": 5550 }, { "epoch": 0.2547381946675233, "grad_norm": 0.5005866885185242, "learning_rate": 9.892160761139465e-06, "loss": 0.4564, "step": 5551 }, { "epoch": 0.25478408517277773, "grad_norm": 0.4876570999622345, "learning_rate": 9.89211010717213e-06, "loss": 0.4046, "step": 5552 }, { "epoch": 0.25482997567803223, "grad_norm": 0.5085650682449341, "learning_rate": 9.892059441440813e-06, "loss": 0.3926, "step": 5553 }, { "epoch": 0.25487586618328667, "grad_norm": 0.4553479254245758, "learning_rate": 9.892008763945637e-06, "loss": 0.3673, "step": 5554 }, { "epoch": 0.25492175668854117, "grad_norm": 0.4442640244960785, "learning_rate": 9.891958074686725e-06, "loss": 0.3453, "step": 5555 }, { "epoch": 0.2549676471937956, "grad_norm": 0.46716830134391785, "learning_rate": 9.891907373664197e-06, "loss": 0.3529, "step": 5556 }, { "epoch": 0.25501353769905005, "grad_norm": 0.49357402324676514, "learning_rate": 9.891856660878176e-06, "loss": 0.3627, "step": 5557 }, { "epoch": 0.25505942820430455, "grad_norm": 0.4599742591381073, "learning_rate": 9.891805936328783e-06, "loss": 0.4236, "step": 5558 }, { "epoch": 0.255105318709559, "grad_norm": 0.47392216324806213, "learning_rate": 9.89175520001614e-06, "loss": 0.4066, "step": 5559 }, { "epoch": 0.25515120921481343, "grad_norm": 0.41979286074638367, "learning_rate": 9.891704451940372e-06, "loss": 0.3507, "step": 5560 }, { "epoch": 0.25519709972006793, "grad_norm": 0.4431149363517761, "learning_rate": 9.891653692101597e-06, "loss": 0.3008, "step": 5561 }, { "epoch": 0.2552429902253224, "grad_norm": 0.45472508668899536, "learning_rate": 9.891602920499939e-06, "loss": 0.3621, "step": 5562 }, { "epoch": 0.2552888807305768, "grad_norm": 0.5023146271705627, "learning_rate": 9.89155213713552e-06, "loss": 0.3844, "step": 5563 }, { "epoch": 0.2553347712358313, "grad_norm": 0.4400866627693176, "learning_rate": 9.891501342008463e-06, "loss": 0.3393, "step": 5564 }, { "epoch": 0.25538066174108576, "grad_norm": 0.49070653319358826, "learning_rate": 9.891450535118887e-06, "loss": 0.4588, "step": 5565 }, { "epoch": 0.25542655224634025, "grad_norm": 0.4746112823486328, "learning_rate": 9.891399716466919e-06, "loss": 0.3997, "step": 5566 }, { "epoch": 0.2554724427515947, "grad_norm": 0.46234557032585144, "learning_rate": 9.891348886052675e-06, "loss": 0.365, "step": 5567 }, { "epoch": 0.25551833325684914, "grad_norm": 0.46192625164985657, "learning_rate": 9.891298043876282e-06, "loss": 0.366, "step": 5568 }, { "epoch": 0.25556422376210364, "grad_norm": 0.5020145773887634, "learning_rate": 9.891247189937863e-06, "loss": 0.5286, "step": 5569 }, { "epoch": 0.2556101142673581, "grad_norm": 0.5359102487564087, "learning_rate": 9.891196324237536e-06, "loss": 0.4536, "step": 5570 }, { "epoch": 0.2556560047726125, "grad_norm": 0.4720180332660675, "learning_rate": 9.891145446775425e-06, "loss": 0.3753, "step": 5571 }, { "epoch": 0.255701895277867, "grad_norm": 0.45280662178993225, "learning_rate": 9.891094557551654e-06, "loss": 0.3654, "step": 5572 }, { "epoch": 0.25574778578312146, "grad_norm": 0.45483464002609253, "learning_rate": 9.891043656566346e-06, "loss": 0.3865, "step": 5573 }, { "epoch": 0.25579367628837596, "grad_norm": 0.46961984038352966, "learning_rate": 9.890992743819618e-06, "loss": 0.4235, "step": 5574 }, { "epoch": 0.2558395667936304, "grad_norm": 0.4911828637123108, "learning_rate": 9.8909418193116e-06, "loss": 0.4326, "step": 5575 }, { "epoch": 0.25588545729888484, "grad_norm": 0.48026853799819946, "learning_rate": 9.890890883042408e-06, "loss": 0.4484, "step": 5576 }, { "epoch": 0.25593134780413934, "grad_norm": 0.49577564001083374, "learning_rate": 9.890839935012166e-06, "loss": 0.5035, "step": 5577 }, { "epoch": 0.2559772383093938, "grad_norm": 0.4562341272830963, "learning_rate": 9.890788975221e-06, "loss": 0.3576, "step": 5578 }, { "epoch": 0.2560231288146482, "grad_norm": 0.557658314704895, "learning_rate": 9.890738003669029e-06, "loss": 0.5264, "step": 5579 }, { "epoch": 0.2560690193199027, "grad_norm": 0.4654678702354431, "learning_rate": 9.890687020356378e-06, "loss": 0.3988, "step": 5580 }, { "epoch": 0.25611490982515717, "grad_norm": 0.5333075523376465, "learning_rate": 9.890636025283166e-06, "loss": 0.4855, "step": 5581 }, { "epoch": 0.25616080033041166, "grad_norm": 0.45178472995758057, "learning_rate": 9.890585018449518e-06, "loss": 0.3296, "step": 5582 }, { "epoch": 0.2562066908356661, "grad_norm": 0.4532388746738434, "learning_rate": 9.890533999855558e-06, "loss": 0.3558, "step": 5583 }, { "epoch": 0.25625258134092055, "grad_norm": 0.4403430223464966, "learning_rate": 9.890482969501408e-06, "loss": 0.3978, "step": 5584 }, { "epoch": 0.25629847184617505, "grad_norm": 0.46092748641967773, "learning_rate": 9.890431927387188e-06, "loss": 0.398, "step": 5585 }, { "epoch": 0.2563443623514295, "grad_norm": 0.693153440952301, "learning_rate": 9.890380873513023e-06, "loss": 0.5726, "step": 5586 }, { "epoch": 0.25639025285668393, "grad_norm": 0.48726266622543335, "learning_rate": 9.890329807879037e-06, "loss": 0.408, "step": 5587 }, { "epoch": 0.25643614336193843, "grad_norm": 0.49542340636253357, "learning_rate": 9.89027873048535e-06, "loss": 0.4368, "step": 5588 }, { "epoch": 0.25648203386719287, "grad_norm": 0.5103986263275146, "learning_rate": 9.890227641332086e-06, "loss": 0.5059, "step": 5589 }, { "epoch": 0.2565279243724473, "grad_norm": 0.4401119351387024, "learning_rate": 9.890176540419367e-06, "loss": 0.3144, "step": 5590 }, { "epoch": 0.2565738148777018, "grad_norm": 0.47371500730514526, "learning_rate": 9.890125427747319e-06, "loss": 0.4317, "step": 5591 }, { "epoch": 0.25661970538295625, "grad_norm": 0.5025570392608643, "learning_rate": 9.890074303316062e-06, "loss": 0.4695, "step": 5592 }, { "epoch": 0.25666559588821075, "grad_norm": 0.5215364694595337, "learning_rate": 9.89002316712572e-06, "loss": 0.4513, "step": 5593 }, { "epoch": 0.2567114863934652, "grad_norm": 0.4691109359264374, "learning_rate": 9.889972019176415e-06, "loss": 0.3736, "step": 5594 }, { "epoch": 0.25675737689871964, "grad_norm": 0.4602471590042114, "learning_rate": 9.88992085946827e-06, "loss": 0.4056, "step": 5595 }, { "epoch": 0.25680326740397413, "grad_norm": 0.47685331106185913, "learning_rate": 9.889869688001407e-06, "loss": 0.4077, "step": 5596 }, { "epoch": 0.2568491579092286, "grad_norm": 0.4990473687648773, "learning_rate": 9.889818504775955e-06, "loss": 0.4436, "step": 5597 }, { "epoch": 0.256895048414483, "grad_norm": 0.496969610452652, "learning_rate": 9.889767309792029e-06, "loss": 0.4826, "step": 5598 }, { "epoch": 0.2569409389197375, "grad_norm": 0.45757797360420227, "learning_rate": 9.889716103049757e-06, "loss": 0.409, "step": 5599 }, { "epoch": 0.25698682942499196, "grad_norm": 0.45495250821113586, "learning_rate": 9.889664884549262e-06, "loss": 0.4045, "step": 5600 }, { "epoch": 0.25703271993024646, "grad_norm": 0.5110076665878296, "learning_rate": 9.889613654290666e-06, "loss": 0.4376, "step": 5601 }, { "epoch": 0.2570786104355009, "grad_norm": 0.47988972067832947, "learning_rate": 9.889562412274092e-06, "loss": 0.4244, "step": 5602 }, { "epoch": 0.25712450094075534, "grad_norm": 0.48515453934669495, "learning_rate": 9.889511158499664e-06, "loss": 0.4985, "step": 5603 }, { "epoch": 0.25717039144600984, "grad_norm": 0.4759584665298462, "learning_rate": 9.889459892967504e-06, "loss": 0.3717, "step": 5604 }, { "epoch": 0.2572162819512643, "grad_norm": 0.4437233805656433, "learning_rate": 9.889408615677737e-06, "loss": 0.3377, "step": 5605 }, { "epoch": 0.2572621724565187, "grad_norm": 0.45227745175361633, "learning_rate": 9.889357326630485e-06, "loss": 0.3572, "step": 5606 }, { "epoch": 0.2573080629617732, "grad_norm": 0.42537635564804077, "learning_rate": 9.88930602582587e-06, "loss": 0.2736, "step": 5607 }, { "epoch": 0.25735395346702766, "grad_norm": 0.4980013370513916, "learning_rate": 9.88925471326402e-06, "loss": 0.4433, "step": 5608 }, { "epoch": 0.25739984397228216, "grad_norm": 0.5872184634208679, "learning_rate": 9.889203388945054e-06, "loss": 0.4934, "step": 5609 }, { "epoch": 0.2574457344775366, "grad_norm": 0.561522364616394, "learning_rate": 9.889152052869098e-06, "loss": 0.5086, "step": 5610 }, { "epoch": 0.25749162498279105, "grad_norm": 0.5021909475326538, "learning_rate": 9.889100705036274e-06, "loss": 0.4543, "step": 5611 }, { "epoch": 0.25753751548804554, "grad_norm": 0.4873514473438263, "learning_rate": 9.889049345446706e-06, "loss": 0.3693, "step": 5612 }, { "epoch": 0.2575834059933, "grad_norm": 0.5251417756080627, "learning_rate": 9.888997974100517e-06, "loss": 0.4893, "step": 5613 }, { "epoch": 0.2576292964985544, "grad_norm": 0.48143884539604187, "learning_rate": 9.88894659099783e-06, "loss": 0.3885, "step": 5614 }, { "epoch": 0.2576751870038089, "grad_norm": 0.49281200766563416, "learning_rate": 9.88889519613877e-06, "loss": 0.4234, "step": 5615 }, { "epoch": 0.25772107750906337, "grad_norm": 0.4757404625415802, "learning_rate": 9.888843789523461e-06, "loss": 0.4158, "step": 5616 }, { "epoch": 0.2577669680143178, "grad_norm": 0.44728314876556396, "learning_rate": 9.888792371152023e-06, "loss": 0.3709, "step": 5617 }, { "epoch": 0.2578128585195723, "grad_norm": 0.4950680136680603, "learning_rate": 9.888740941024585e-06, "loss": 0.4795, "step": 5618 }, { "epoch": 0.25785874902482675, "grad_norm": 0.4776691198348999, "learning_rate": 9.888689499141268e-06, "loss": 0.4224, "step": 5619 }, { "epoch": 0.25790463953008125, "grad_norm": 0.4736878573894501, "learning_rate": 9.888638045502194e-06, "loss": 0.3926, "step": 5620 }, { "epoch": 0.2579505300353357, "grad_norm": 0.42839500308036804, "learning_rate": 9.888586580107489e-06, "loss": 0.2935, "step": 5621 }, { "epoch": 0.25799642054059013, "grad_norm": 0.4897996485233307, "learning_rate": 9.888535102957278e-06, "loss": 0.3817, "step": 5622 }, { "epoch": 0.25804231104584463, "grad_norm": 0.4382639527320862, "learning_rate": 9.88848361405168e-06, "loss": 0.3819, "step": 5623 }, { "epoch": 0.2580882015510991, "grad_norm": 0.48136210441589355, "learning_rate": 9.888432113390823e-06, "loss": 0.3766, "step": 5624 }, { "epoch": 0.2581340920563535, "grad_norm": 0.4868128299713135, "learning_rate": 9.88838060097483e-06, "loss": 0.3944, "step": 5625 }, { "epoch": 0.258179982561608, "grad_norm": 0.5675439238548279, "learning_rate": 9.888329076803824e-06, "loss": 0.4392, "step": 5626 }, { "epoch": 0.25822587306686245, "grad_norm": 0.46386784315109253, "learning_rate": 9.88827754087793e-06, "loss": 0.4326, "step": 5627 }, { "epoch": 0.25827176357211695, "grad_norm": 0.47001662850379944, "learning_rate": 9.88822599319727e-06, "loss": 0.4033, "step": 5628 }, { "epoch": 0.2583176540773714, "grad_norm": 0.4964858889579773, "learning_rate": 9.888174433761971e-06, "loss": 0.4086, "step": 5629 }, { "epoch": 0.25836354458262584, "grad_norm": 0.48577457666397095, "learning_rate": 9.888122862572154e-06, "loss": 0.4221, "step": 5630 }, { "epoch": 0.25840943508788033, "grad_norm": 0.4313344955444336, "learning_rate": 9.888071279627944e-06, "loss": 0.3411, "step": 5631 }, { "epoch": 0.2584553255931348, "grad_norm": 0.5158870816230774, "learning_rate": 9.888019684929467e-06, "loss": 0.4409, "step": 5632 }, { "epoch": 0.2585012160983892, "grad_norm": 0.5176045298576355, "learning_rate": 9.887968078476844e-06, "loss": 0.4836, "step": 5633 }, { "epoch": 0.2585471066036437, "grad_norm": 0.45540517568588257, "learning_rate": 9.8879164602702e-06, "loss": 0.379, "step": 5634 }, { "epoch": 0.25859299710889816, "grad_norm": 0.470023512840271, "learning_rate": 9.88786483030966e-06, "loss": 0.4302, "step": 5635 }, { "epoch": 0.25863888761415266, "grad_norm": 0.4943717122077942, "learning_rate": 9.887813188595348e-06, "loss": 0.4839, "step": 5636 }, { "epoch": 0.2586847781194071, "grad_norm": 0.4424592852592468, "learning_rate": 9.887761535127388e-06, "loss": 0.3031, "step": 5637 }, { "epoch": 0.25873066862466154, "grad_norm": 0.489914208650589, "learning_rate": 9.887709869905903e-06, "loss": 0.419, "step": 5638 }, { "epoch": 0.25877655912991604, "grad_norm": 0.4437110126018524, "learning_rate": 9.88765819293102e-06, "loss": 0.324, "step": 5639 }, { "epoch": 0.2588224496351705, "grad_norm": 0.47663143277168274, "learning_rate": 9.887606504202862e-06, "loss": 0.4116, "step": 5640 }, { "epoch": 0.2588683401404249, "grad_norm": 0.4884633719921112, "learning_rate": 9.887554803721551e-06, "loss": 0.4195, "step": 5641 }, { "epoch": 0.2589142306456794, "grad_norm": 0.44610312581062317, "learning_rate": 9.887503091487215e-06, "loss": 0.3988, "step": 5642 }, { "epoch": 0.25896012115093386, "grad_norm": 0.4350149631500244, "learning_rate": 9.887451367499977e-06, "loss": 0.3877, "step": 5643 }, { "epoch": 0.25900601165618836, "grad_norm": 0.4546768367290497, "learning_rate": 9.88739963175996e-06, "loss": 0.3519, "step": 5644 }, { "epoch": 0.2590519021614428, "grad_norm": 0.46952152252197266, "learning_rate": 9.887347884267287e-06, "loss": 0.3849, "step": 5645 }, { "epoch": 0.25909779266669725, "grad_norm": 0.50528484582901, "learning_rate": 9.887296125022089e-06, "loss": 0.468, "step": 5646 }, { "epoch": 0.25914368317195174, "grad_norm": 0.4472591280937195, "learning_rate": 9.887244354024484e-06, "loss": 0.3703, "step": 5647 }, { "epoch": 0.2591895736772062, "grad_norm": 0.477812260389328, "learning_rate": 9.887192571274597e-06, "loss": 0.4241, "step": 5648 }, { "epoch": 0.25923546418246063, "grad_norm": 0.4468366801738739, "learning_rate": 9.887140776772557e-06, "loss": 0.3831, "step": 5649 }, { "epoch": 0.2592813546877151, "grad_norm": 0.47129249572753906, "learning_rate": 9.887088970518485e-06, "loss": 0.4195, "step": 5650 }, { "epoch": 0.25932724519296957, "grad_norm": 0.5563701391220093, "learning_rate": 9.887037152512507e-06, "loss": 0.4239, "step": 5651 }, { "epoch": 0.259373135698224, "grad_norm": 0.47246047854423523, "learning_rate": 9.886985322754748e-06, "loss": 0.4148, "step": 5652 }, { "epoch": 0.2594190262034785, "grad_norm": 0.45770326256752014, "learning_rate": 9.88693348124533e-06, "loss": 0.3352, "step": 5653 }, { "epoch": 0.25946491670873295, "grad_norm": 0.4647235572338104, "learning_rate": 9.886881627984379e-06, "loss": 0.4179, "step": 5654 }, { "epoch": 0.25951080721398745, "grad_norm": 0.4490934908390045, "learning_rate": 9.886829762972021e-06, "loss": 0.3971, "step": 5655 }, { "epoch": 0.2595566977192419, "grad_norm": 0.4440559148788452, "learning_rate": 9.886777886208379e-06, "loss": 0.3211, "step": 5656 }, { "epoch": 0.25960258822449633, "grad_norm": 0.44536978006362915, "learning_rate": 9.886725997693581e-06, "loss": 0.3703, "step": 5657 }, { "epoch": 0.25964847872975083, "grad_norm": 0.5159117579460144, "learning_rate": 9.886674097427746e-06, "loss": 0.4096, "step": 5658 }, { "epoch": 0.2596943692350053, "grad_norm": 0.4857231378555298, "learning_rate": 9.886622185411004e-06, "loss": 0.396, "step": 5659 }, { "epoch": 0.2597402597402597, "grad_norm": 0.4726279079914093, "learning_rate": 9.886570261643477e-06, "loss": 0.3939, "step": 5660 }, { "epoch": 0.2597861502455142, "grad_norm": 0.4712117314338684, "learning_rate": 9.886518326125291e-06, "loss": 0.4464, "step": 5661 }, { "epoch": 0.25983204075076866, "grad_norm": 0.47619110345840454, "learning_rate": 9.886466378856571e-06, "loss": 0.4226, "step": 5662 }, { "epoch": 0.25987793125602315, "grad_norm": 0.47308769822120667, "learning_rate": 9.886414419837441e-06, "loss": 0.4048, "step": 5663 }, { "epoch": 0.2599238217612776, "grad_norm": 0.4963769316673279, "learning_rate": 9.886362449068027e-06, "loss": 0.4494, "step": 5664 }, { "epoch": 0.25996971226653204, "grad_norm": 0.45801129937171936, "learning_rate": 9.886310466548454e-06, "loss": 0.3756, "step": 5665 }, { "epoch": 0.26001560277178654, "grad_norm": 0.46514126658439636, "learning_rate": 9.886258472278845e-06, "loss": 0.3812, "step": 5666 }, { "epoch": 0.260061493277041, "grad_norm": 0.5091173052787781, "learning_rate": 9.886206466259327e-06, "loss": 0.428, "step": 5667 }, { "epoch": 0.2601073837822954, "grad_norm": 0.49231746792793274, "learning_rate": 9.886154448490025e-06, "loss": 0.4484, "step": 5668 }, { "epoch": 0.2601532742875499, "grad_norm": 0.4699980318546295, "learning_rate": 9.886102418971063e-06, "loss": 0.3719, "step": 5669 }, { "epoch": 0.26019916479280436, "grad_norm": 0.4968722462654114, "learning_rate": 9.886050377702567e-06, "loss": 0.4844, "step": 5670 }, { "epoch": 0.26024505529805886, "grad_norm": 0.4719417989253998, "learning_rate": 9.885998324684664e-06, "loss": 0.4785, "step": 5671 }, { "epoch": 0.2602909458033133, "grad_norm": 0.47552016377449036, "learning_rate": 9.885946259917475e-06, "loss": 0.4222, "step": 5672 }, { "epoch": 0.26033683630856774, "grad_norm": 0.49827972054481506, "learning_rate": 9.885894183401127e-06, "loss": 0.4506, "step": 5673 }, { "epoch": 0.26038272681382224, "grad_norm": 0.4808642268180847, "learning_rate": 9.885842095135747e-06, "loss": 0.4455, "step": 5674 }, { "epoch": 0.2604286173190767, "grad_norm": 0.43016520142555237, "learning_rate": 9.885789995121458e-06, "loss": 0.3076, "step": 5675 }, { "epoch": 0.2604745078243311, "grad_norm": 0.44965511560440063, "learning_rate": 9.885737883358386e-06, "loss": 0.3776, "step": 5676 }, { "epoch": 0.2605203983295856, "grad_norm": 0.455857515335083, "learning_rate": 9.885685759846657e-06, "loss": 0.4175, "step": 5677 }, { "epoch": 0.26056628883484007, "grad_norm": 0.5128337144851685, "learning_rate": 9.885633624586394e-06, "loss": 0.471, "step": 5678 }, { "epoch": 0.2606121793400945, "grad_norm": 0.5024634599685669, "learning_rate": 9.885581477577725e-06, "loss": 0.4149, "step": 5679 }, { "epoch": 0.260658069845349, "grad_norm": 0.4706532657146454, "learning_rate": 9.885529318820775e-06, "loss": 0.3852, "step": 5680 }, { "epoch": 0.26070396035060345, "grad_norm": 0.46730953454971313, "learning_rate": 9.885477148315668e-06, "loss": 0.3759, "step": 5681 }, { "epoch": 0.26074985085585795, "grad_norm": 0.49785250425338745, "learning_rate": 9.885424966062531e-06, "loss": 0.4406, "step": 5682 }, { "epoch": 0.2607957413611124, "grad_norm": 0.5237879753112793, "learning_rate": 9.885372772061489e-06, "loss": 0.471, "step": 5683 }, { "epoch": 0.26084163186636683, "grad_norm": 0.48327532410621643, "learning_rate": 9.885320566312667e-06, "loss": 0.4935, "step": 5684 }, { "epoch": 0.2608875223716213, "grad_norm": 0.46478649973869324, "learning_rate": 9.88526834881619e-06, "loss": 0.3727, "step": 5685 }, { "epoch": 0.26093341287687577, "grad_norm": 0.44101977348327637, "learning_rate": 9.885216119572187e-06, "loss": 0.344, "step": 5686 }, { "epoch": 0.2609793033821302, "grad_norm": 0.4429907202720642, "learning_rate": 9.88516387858078e-06, "loss": 0.359, "step": 5687 }, { "epoch": 0.2610251938873847, "grad_norm": 0.4707849621772766, "learning_rate": 9.885111625842094e-06, "loss": 0.3665, "step": 5688 }, { "epoch": 0.26107108439263915, "grad_norm": 0.47951963543891907, "learning_rate": 9.885059361356257e-06, "loss": 0.3379, "step": 5689 }, { "epoch": 0.26111697489789365, "grad_norm": 0.4259636700153351, "learning_rate": 9.885007085123394e-06, "loss": 0.3238, "step": 5690 }, { "epoch": 0.2611628654031481, "grad_norm": 0.46246758103370667, "learning_rate": 9.884954797143634e-06, "loss": 0.422, "step": 5691 }, { "epoch": 0.26120875590840253, "grad_norm": 0.5283334255218506, "learning_rate": 9.884902497417094e-06, "loss": 0.5073, "step": 5692 }, { "epoch": 0.26125464641365703, "grad_norm": 0.4169554114341736, "learning_rate": 9.884850185943909e-06, "loss": 0.3214, "step": 5693 }, { "epoch": 0.2613005369189115, "grad_norm": 0.4948008060455322, "learning_rate": 9.8847978627242e-06, "loss": 0.4556, "step": 5694 }, { "epoch": 0.2613464274241659, "grad_norm": 0.4492760896682739, "learning_rate": 9.884745527758095e-06, "loss": 0.376, "step": 5695 }, { "epoch": 0.2613923179294204, "grad_norm": 0.4468342363834381, "learning_rate": 9.884693181045716e-06, "loss": 0.3981, "step": 5696 }, { "epoch": 0.26143820843467486, "grad_norm": 0.4745164215564728, "learning_rate": 9.884640822587193e-06, "loss": 0.4631, "step": 5697 }, { "epoch": 0.26148409893992935, "grad_norm": 0.4509073495864868, "learning_rate": 9.88458845238265e-06, "loss": 0.3812, "step": 5698 }, { "epoch": 0.2615299894451838, "grad_norm": 0.4773392677307129, "learning_rate": 9.884536070432214e-06, "loss": 0.4344, "step": 5699 }, { "epoch": 0.26157587995043824, "grad_norm": 0.43625348806381226, "learning_rate": 9.884483676736011e-06, "loss": 0.31, "step": 5700 }, { "epoch": 0.26162177045569274, "grad_norm": 0.4980311393737793, "learning_rate": 9.884431271294166e-06, "loss": 0.4344, "step": 5701 }, { "epoch": 0.2616676609609472, "grad_norm": 0.4493265748023987, "learning_rate": 9.884378854106804e-06, "loss": 0.3803, "step": 5702 }, { "epoch": 0.2617135514662016, "grad_norm": 0.4348578155040741, "learning_rate": 9.884326425174053e-06, "loss": 0.3679, "step": 5703 }, { "epoch": 0.2617594419714561, "grad_norm": 0.4575294852256775, "learning_rate": 9.88427398449604e-06, "loss": 0.3482, "step": 5704 }, { "epoch": 0.26180533247671056, "grad_norm": 0.49547243118286133, "learning_rate": 9.884221532072887e-06, "loss": 0.457, "step": 5705 }, { "epoch": 0.261851222981965, "grad_norm": 0.4817352890968323, "learning_rate": 9.884169067904725e-06, "loss": 0.4634, "step": 5706 }, { "epoch": 0.2618971134872195, "grad_norm": 0.44964274764060974, "learning_rate": 9.884116591991677e-06, "loss": 0.3597, "step": 5707 }, { "epoch": 0.26194300399247394, "grad_norm": 0.44966939091682434, "learning_rate": 9.88406410433387e-06, "loss": 0.3964, "step": 5708 }, { "epoch": 0.26198889449772844, "grad_norm": 0.4539758563041687, "learning_rate": 9.88401160493143e-06, "loss": 0.3721, "step": 5709 }, { "epoch": 0.2620347850029829, "grad_norm": 0.4753742218017578, "learning_rate": 9.883959093784482e-06, "loss": 0.4485, "step": 5710 }, { "epoch": 0.2620806755082373, "grad_norm": 0.4653816223144531, "learning_rate": 9.883906570893158e-06, "loss": 0.4017, "step": 5711 }, { "epoch": 0.2621265660134918, "grad_norm": 0.513113796710968, "learning_rate": 9.883854036257577e-06, "loss": 0.5253, "step": 5712 }, { "epoch": 0.26217245651874627, "grad_norm": 0.45812666416168213, "learning_rate": 9.883801489877869e-06, "loss": 0.3516, "step": 5713 }, { "epoch": 0.2622183470240007, "grad_norm": 0.47847726941108704, "learning_rate": 9.88374893175416e-06, "loss": 0.4634, "step": 5714 }, { "epoch": 0.2622642375292552, "grad_norm": 0.507748007774353, "learning_rate": 9.883696361886577e-06, "loss": 0.5075, "step": 5715 }, { "epoch": 0.26231012803450965, "grad_norm": 0.44268330931663513, "learning_rate": 9.883643780275246e-06, "loss": 0.3411, "step": 5716 }, { "epoch": 0.26235601853976415, "grad_norm": 0.46061259508132935, "learning_rate": 9.88359118692029e-06, "loss": 0.382, "step": 5717 }, { "epoch": 0.2624019090450186, "grad_norm": 0.45478954911231995, "learning_rate": 9.883538581821841e-06, "loss": 0.441, "step": 5718 }, { "epoch": 0.26244779955027303, "grad_norm": 0.4721737504005432, "learning_rate": 9.883485964980022e-06, "loss": 0.3561, "step": 5719 }, { "epoch": 0.26249369005552753, "grad_norm": 0.46608126163482666, "learning_rate": 9.88343333639496e-06, "loss": 0.3668, "step": 5720 }, { "epoch": 0.26253958056078197, "grad_norm": 0.4553476572036743, "learning_rate": 9.883380696066783e-06, "loss": 0.3346, "step": 5721 }, { "epoch": 0.2625854710660364, "grad_norm": 0.4680670499801636, "learning_rate": 9.883328043995617e-06, "loss": 0.3955, "step": 5722 }, { "epoch": 0.2626313615712909, "grad_norm": 0.4598741829395294, "learning_rate": 9.883275380181588e-06, "loss": 0.3939, "step": 5723 }, { "epoch": 0.26267725207654535, "grad_norm": 0.4326989948749542, "learning_rate": 9.883222704624822e-06, "loss": 0.3301, "step": 5724 }, { "epoch": 0.26272314258179985, "grad_norm": 0.49923720955848694, "learning_rate": 9.883170017325447e-06, "loss": 0.4002, "step": 5725 }, { "epoch": 0.2627690330870543, "grad_norm": 0.47960779070854187, "learning_rate": 9.88311731828359e-06, "loss": 0.412, "step": 5726 }, { "epoch": 0.26281492359230874, "grad_norm": 0.45729711651802063, "learning_rate": 9.883064607499377e-06, "loss": 0.3864, "step": 5727 }, { "epoch": 0.26286081409756323, "grad_norm": 0.4778943657875061, "learning_rate": 9.883011884972934e-06, "loss": 0.3802, "step": 5728 }, { "epoch": 0.2629067046028177, "grad_norm": 0.5121933817863464, "learning_rate": 9.88295915070439e-06, "loss": 0.493, "step": 5729 }, { "epoch": 0.2629525951080721, "grad_norm": 0.4572793245315552, "learning_rate": 9.882906404693868e-06, "loss": 0.3748, "step": 5730 }, { "epoch": 0.2629984856133266, "grad_norm": 0.48729655146598816, "learning_rate": 9.8828536469415e-06, "loss": 0.387, "step": 5731 }, { "epoch": 0.26304437611858106, "grad_norm": 0.4959196150302887, "learning_rate": 9.882800877447406e-06, "loss": 0.459, "step": 5732 }, { "epoch": 0.26309026662383556, "grad_norm": 0.46004343032836914, "learning_rate": 9.88274809621172e-06, "loss": 0.3487, "step": 5733 }, { "epoch": 0.26313615712909, "grad_norm": 0.44322940707206726, "learning_rate": 9.882695303234567e-06, "loss": 0.3927, "step": 5734 }, { "epoch": 0.26318204763434444, "grad_norm": 0.4589797258377075, "learning_rate": 9.88264249851607e-06, "loss": 0.3698, "step": 5735 }, { "epoch": 0.26322793813959894, "grad_norm": 0.49748119711875916, "learning_rate": 9.882589682056361e-06, "loss": 0.4289, "step": 5736 }, { "epoch": 0.2632738286448534, "grad_norm": 0.4449711740016937, "learning_rate": 9.882536853855563e-06, "loss": 0.3617, "step": 5737 }, { "epoch": 0.2633197191501078, "grad_norm": 0.4532252252101898, "learning_rate": 9.882484013913804e-06, "loss": 0.3536, "step": 5738 }, { "epoch": 0.2633656096553623, "grad_norm": 0.4398576021194458, "learning_rate": 9.882431162231214e-06, "loss": 0.3379, "step": 5739 }, { "epoch": 0.26341150016061676, "grad_norm": 0.4621528387069702, "learning_rate": 9.882378298807918e-06, "loss": 0.3954, "step": 5740 }, { "epoch": 0.2634573906658712, "grad_norm": 0.4594041705131531, "learning_rate": 9.882325423644042e-06, "loss": 0.3696, "step": 5741 }, { "epoch": 0.2635032811711257, "grad_norm": 0.5007659792900085, "learning_rate": 9.882272536739715e-06, "loss": 0.4275, "step": 5742 }, { "epoch": 0.26354917167638015, "grad_norm": 0.48232123255729675, "learning_rate": 9.882219638095063e-06, "loss": 0.3846, "step": 5743 }, { "epoch": 0.26359506218163464, "grad_norm": 0.538147509098053, "learning_rate": 9.882166727710214e-06, "loss": 0.4239, "step": 5744 }, { "epoch": 0.2636409526868891, "grad_norm": 0.511863648891449, "learning_rate": 9.882113805585296e-06, "loss": 0.4344, "step": 5745 }, { "epoch": 0.2636868431921435, "grad_norm": 0.45896872878074646, "learning_rate": 9.882060871720434e-06, "loss": 0.3709, "step": 5746 }, { "epoch": 0.263732733697398, "grad_norm": 0.5214148163795471, "learning_rate": 9.882007926115755e-06, "loss": 0.4948, "step": 5747 }, { "epoch": 0.26377862420265247, "grad_norm": 0.46154317259788513, "learning_rate": 9.88195496877139e-06, "loss": 0.3642, "step": 5748 }, { "epoch": 0.2638245147079069, "grad_norm": 0.44234949350357056, "learning_rate": 9.881901999687463e-06, "loss": 0.3731, "step": 5749 }, { "epoch": 0.2638704052131614, "grad_norm": 0.5097475647926331, "learning_rate": 9.881849018864103e-06, "loss": 0.4874, "step": 5750 }, { "epoch": 0.26391629571841585, "grad_norm": 0.47292381525039673, "learning_rate": 9.881796026301435e-06, "loss": 0.4313, "step": 5751 }, { "epoch": 0.26396218622367035, "grad_norm": 0.43647173047065735, "learning_rate": 9.881743021999592e-06, "loss": 0.321, "step": 5752 }, { "epoch": 0.2640080767289248, "grad_norm": 0.41725602746009827, "learning_rate": 9.881690005958695e-06, "loss": 0.3319, "step": 5753 }, { "epoch": 0.26405396723417923, "grad_norm": 0.4627762734889984, "learning_rate": 9.881636978178874e-06, "loss": 0.3703, "step": 5754 }, { "epoch": 0.26409985773943373, "grad_norm": 0.423604816198349, "learning_rate": 9.881583938660257e-06, "loss": 0.3589, "step": 5755 }, { "epoch": 0.26414574824468817, "grad_norm": 0.45560187101364136, "learning_rate": 9.881530887402972e-06, "loss": 0.3751, "step": 5756 }, { "epoch": 0.2641916387499426, "grad_norm": 0.44976869225502014, "learning_rate": 9.881477824407146e-06, "loss": 0.4118, "step": 5757 }, { "epoch": 0.2642375292551971, "grad_norm": 0.4750780165195465, "learning_rate": 9.881424749672907e-06, "loss": 0.451, "step": 5758 }, { "epoch": 0.26428341976045155, "grad_norm": 0.42854002118110657, "learning_rate": 9.881371663200381e-06, "loss": 0.3403, "step": 5759 }, { "epoch": 0.26432931026570605, "grad_norm": 0.45952582359313965, "learning_rate": 9.881318564989698e-06, "loss": 0.4611, "step": 5760 }, { "epoch": 0.2643752007709605, "grad_norm": 0.48808106780052185, "learning_rate": 9.881265455040982e-06, "loss": 0.4437, "step": 5761 }, { "epoch": 0.26442109127621494, "grad_norm": 0.514225423336029, "learning_rate": 9.881212333354366e-06, "loss": 0.5328, "step": 5762 }, { "epoch": 0.26446698178146943, "grad_norm": 0.479399710893631, "learning_rate": 9.881159199929973e-06, "loss": 0.4818, "step": 5763 }, { "epoch": 0.2645128722867239, "grad_norm": 0.478593647480011, "learning_rate": 9.881106054767933e-06, "loss": 0.4653, "step": 5764 }, { "epoch": 0.2645587627919783, "grad_norm": 0.5184587240219116, "learning_rate": 9.881052897868375e-06, "loss": 0.567, "step": 5765 }, { "epoch": 0.2646046532972328, "grad_norm": 0.4631170630455017, "learning_rate": 9.880999729231423e-06, "loss": 0.3712, "step": 5766 }, { "epoch": 0.26465054380248726, "grad_norm": 0.4283306300640106, "learning_rate": 9.880946548857207e-06, "loss": 0.3075, "step": 5767 }, { "epoch": 0.2646964343077417, "grad_norm": 0.4697890281677246, "learning_rate": 9.880893356745857e-06, "loss": 0.4248, "step": 5768 }, { "epoch": 0.2647423248129962, "grad_norm": 0.49365106225013733, "learning_rate": 9.880840152897497e-06, "loss": 0.4529, "step": 5769 }, { "epoch": 0.26478821531825064, "grad_norm": 0.4644263982772827, "learning_rate": 9.880786937312259e-06, "loss": 0.4098, "step": 5770 }, { "epoch": 0.26483410582350514, "grad_norm": 0.4478089213371277, "learning_rate": 9.880733709990267e-06, "loss": 0.4018, "step": 5771 }, { "epoch": 0.2648799963287596, "grad_norm": 0.5120177865028381, "learning_rate": 9.880680470931652e-06, "loss": 0.4748, "step": 5772 }, { "epoch": 0.264925886834014, "grad_norm": 0.4669431746006012, "learning_rate": 9.88062722013654e-06, "loss": 0.3958, "step": 5773 }, { "epoch": 0.2649717773392685, "grad_norm": 0.45079270005226135, "learning_rate": 9.880573957605061e-06, "loss": 0.3406, "step": 5774 }, { "epoch": 0.26501766784452296, "grad_norm": 0.4865074157714844, "learning_rate": 9.880520683337341e-06, "loss": 0.489, "step": 5775 }, { "epoch": 0.2650635583497774, "grad_norm": 0.4581310451030731, "learning_rate": 9.880467397333509e-06, "loss": 0.4049, "step": 5776 }, { "epoch": 0.2651094488550319, "grad_norm": 0.44578155875205994, "learning_rate": 9.880414099593693e-06, "loss": 0.3336, "step": 5777 }, { "epoch": 0.26515533936028635, "grad_norm": 0.44198092818260193, "learning_rate": 9.880360790118024e-06, "loss": 0.3614, "step": 5778 }, { "epoch": 0.26520122986554084, "grad_norm": 0.46466711163520813, "learning_rate": 9.880307468906625e-06, "loss": 0.3915, "step": 5779 }, { "epoch": 0.2652471203707953, "grad_norm": 0.5257967710494995, "learning_rate": 9.880254135959628e-06, "loss": 0.5765, "step": 5780 }, { "epoch": 0.26529301087604973, "grad_norm": 0.4531961679458618, "learning_rate": 9.880200791277158e-06, "loss": 0.4019, "step": 5781 }, { "epoch": 0.2653389013813042, "grad_norm": 0.47527703642845154, "learning_rate": 9.880147434859348e-06, "loss": 0.4204, "step": 5782 }, { "epoch": 0.26538479188655867, "grad_norm": 0.4635518789291382, "learning_rate": 9.880094066706322e-06, "loss": 0.3913, "step": 5783 }, { "epoch": 0.2654306823918131, "grad_norm": 0.4819350242614746, "learning_rate": 9.880040686818212e-06, "loss": 0.318, "step": 5784 }, { "epoch": 0.2654765728970676, "grad_norm": 0.4777471721172333, "learning_rate": 9.87998729519514e-06, "loss": 0.4002, "step": 5785 }, { "epoch": 0.26552246340232205, "grad_norm": 0.4680997133255005, "learning_rate": 9.879933891837243e-06, "loss": 0.4685, "step": 5786 }, { "epoch": 0.26556835390757655, "grad_norm": 0.484408438205719, "learning_rate": 9.879880476744643e-06, "loss": 0.4164, "step": 5787 }, { "epoch": 0.265614244412831, "grad_norm": 0.48880401253700256, "learning_rate": 9.879827049917471e-06, "loss": 0.4097, "step": 5788 }, { "epoch": 0.26566013491808543, "grad_norm": 0.4620544910430908, "learning_rate": 9.879773611355856e-06, "loss": 0.3871, "step": 5789 }, { "epoch": 0.26570602542333993, "grad_norm": 0.5591461658477783, "learning_rate": 9.879720161059924e-06, "loss": 0.4778, "step": 5790 }, { "epoch": 0.2657519159285944, "grad_norm": 0.4735066890716553, "learning_rate": 9.879666699029806e-06, "loss": 0.3572, "step": 5791 }, { "epoch": 0.2657978064338488, "grad_norm": 0.4814257323741913, "learning_rate": 9.87961322526563e-06, "loss": 0.4181, "step": 5792 }, { "epoch": 0.2658436969391033, "grad_norm": 0.424625039100647, "learning_rate": 9.879559739767524e-06, "loss": 0.2849, "step": 5793 }, { "epoch": 0.26588958744435776, "grad_norm": 0.485614538192749, "learning_rate": 9.879506242535617e-06, "loss": 0.4594, "step": 5794 }, { "epoch": 0.2659354779496122, "grad_norm": 0.4749804735183716, "learning_rate": 9.879452733570037e-06, "loss": 0.4785, "step": 5795 }, { "epoch": 0.2659813684548667, "grad_norm": 0.5107864141464233, "learning_rate": 9.879399212870914e-06, "loss": 0.4985, "step": 5796 }, { "epoch": 0.26602725896012114, "grad_norm": 0.4326646625995636, "learning_rate": 9.879345680438375e-06, "loss": 0.3177, "step": 5797 }, { "epoch": 0.26607314946537564, "grad_norm": 0.44445276260375977, "learning_rate": 9.87929213627255e-06, "loss": 0.343, "step": 5798 }, { "epoch": 0.2661190399706301, "grad_norm": 0.7286478877067566, "learning_rate": 9.879238580373566e-06, "loss": 0.5213, "step": 5799 }, { "epoch": 0.2661649304758845, "grad_norm": 0.4173383414745331, "learning_rate": 9.879185012741555e-06, "loss": 0.3196, "step": 5800 }, { "epoch": 0.266210820981139, "grad_norm": 0.5137373208999634, "learning_rate": 9.879131433376643e-06, "loss": 0.4986, "step": 5801 }, { "epoch": 0.26625671148639346, "grad_norm": 0.46538951992988586, "learning_rate": 9.879077842278962e-06, "loss": 0.413, "step": 5802 }, { "epoch": 0.2663026019916479, "grad_norm": 0.45981860160827637, "learning_rate": 9.879024239448635e-06, "loss": 0.4034, "step": 5803 }, { "epoch": 0.2663484924969024, "grad_norm": 0.5179405212402344, "learning_rate": 9.878970624885798e-06, "loss": 0.4053, "step": 5804 }, { "epoch": 0.26639438300215684, "grad_norm": 0.4923546314239502, "learning_rate": 9.878916998590574e-06, "loss": 0.4159, "step": 5805 }, { "epoch": 0.26644027350741134, "grad_norm": 0.46567609906196594, "learning_rate": 9.878863360563096e-06, "loss": 0.4509, "step": 5806 }, { "epoch": 0.2664861640126658, "grad_norm": 0.5032748579978943, "learning_rate": 9.87880971080349e-06, "loss": 0.4966, "step": 5807 }, { "epoch": 0.2665320545179202, "grad_norm": 0.5178773403167725, "learning_rate": 9.878756049311887e-06, "loss": 0.4739, "step": 5808 }, { "epoch": 0.2665779450231747, "grad_norm": 0.49333029985427856, "learning_rate": 9.878702376088417e-06, "loss": 0.4365, "step": 5809 }, { "epoch": 0.26662383552842917, "grad_norm": 0.4771004617214203, "learning_rate": 9.878648691133206e-06, "loss": 0.4197, "step": 5810 }, { "epoch": 0.2666697260336836, "grad_norm": 0.49757635593414307, "learning_rate": 9.878594994446385e-06, "loss": 0.4898, "step": 5811 }, { "epoch": 0.2667156165389381, "grad_norm": 0.46155011653900146, "learning_rate": 9.878541286028082e-06, "loss": 0.3847, "step": 5812 }, { "epoch": 0.26676150704419255, "grad_norm": 0.4896308481693268, "learning_rate": 9.878487565878427e-06, "loss": 0.431, "step": 5813 }, { "epoch": 0.26680739754944705, "grad_norm": 0.45312026143074036, "learning_rate": 9.87843383399755e-06, "loss": 0.3733, "step": 5814 }, { "epoch": 0.2668532880547015, "grad_norm": 0.4678769111633301, "learning_rate": 9.878380090385578e-06, "loss": 0.399, "step": 5815 }, { "epoch": 0.26689917855995593, "grad_norm": 0.4658665060997009, "learning_rate": 9.878326335042643e-06, "loss": 0.4442, "step": 5816 }, { "epoch": 0.2669450690652104, "grad_norm": 0.504565417766571, "learning_rate": 9.878272567968871e-06, "loss": 0.396, "step": 5817 }, { "epoch": 0.26699095957046487, "grad_norm": 0.46499332785606384, "learning_rate": 9.878218789164396e-06, "loss": 0.4007, "step": 5818 }, { "epoch": 0.2670368500757193, "grad_norm": 0.47522398829460144, "learning_rate": 9.878164998629342e-06, "loss": 0.4289, "step": 5819 }, { "epoch": 0.2670827405809738, "grad_norm": 0.44240060448646545, "learning_rate": 9.878111196363843e-06, "loss": 0.3217, "step": 5820 }, { "epoch": 0.26712863108622825, "grad_norm": 0.4531952738761902, "learning_rate": 9.878057382368025e-06, "loss": 0.3433, "step": 5821 }, { "epoch": 0.26717452159148275, "grad_norm": 0.478973925113678, "learning_rate": 9.878003556642017e-06, "loss": 0.4489, "step": 5822 }, { "epoch": 0.2672204120967372, "grad_norm": 0.49470970034599304, "learning_rate": 9.877949719185951e-06, "loss": 0.4187, "step": 5823 }, { "epoch": 0.26726630260199163, "grad_norm": 0.4251794219017029, "learning_rate": 9.877895869999956e-06, "loss": 0.3585, "step": 5824 }, { "epoch": 0.26731219310724613, "grad_norm": 0.4552548825740814, "learning_rate": 9.877842009084163e-06, "loss": 0.4083, "step": 5825 }, { "epoch": 0.2673580836125006, "grad_norm": 0.46127310395240784, "learning_rate": 9.877788136438696e-06, "loss": 0.4271, "step": 5826 }, { "epoch": 0.267403974117755, "grad_norm": 0.4486490488052368, "learning_rate": 9.877734252063688e-06, "loss": 0.3841, "step": 5827 }, { "epoch": 0.2674498646230095, "grad_norm": 0.4565608501434326, "learning_rate": 9.877680355959271e-06, "loss": 0.3767, "step": 5828 }, { "epoch": 0.26749575512826396, "grad_norm": 0.49532851576805115, "learning_rate": 9.87762644812557e-06, "loss": 0.4108, "step": 5829 }, { "epoch": 0.2675416456335184, "grad_norm": 0.4849661886692047, "learning_rate": 9.877572528562718e-06, "loss": 0.4619, "step": 5830 }, { "epoch": 0.2675875361387729, "grad_norm": 0.4641399383544922, "learning_rate": 9.877518597270843e-06, "loss": 0.3853, "step": 5831 }, { "epoch": 0.26763342664402734, "grad_norm": 0.5094941258430481, "learning_rate": 9.877464654250074e-06, "loss": 0.483, "step": 5832 }, { "epoch": 0.26767931714928184, "grad_norm": 0.4141210913658142, "learning_rate": 9.877410699500545e-06, "loss": 0.3292, "step": 5833 }, { "epoch": 0.2677252076545363, "grad_norm": 0.4716409742832184, "learning_rate": 9.877356733022381e-06, "loss": 0.3975, "step": 5834 }, { "epoch": 0.2677710981597907, "grad_norm": 0.44029295444488525, "learning_rate": 9.877302754815713e-06, "loss": 0.3804, "step": 5835 }, { "epoch": 0.2678169886650452, "grad_norm": 0.4616566300392151, "learning_rate": 9.87724876488067e-06, "loss": 0.3806, "step": 5836 }, { "epoch": 0.26786287917029966, "grad_norm": 0.5055999755859375, "learning_rate": 9.877194763217387e-06, "loss": 0.4449, "step": 5837 }, { "epoch": 0.2679087696755541, "grad_norm": 0.45195257663726807, "learning_rate": 9.877140749825986e-06, "loss": 0.411, "step": 5838 }, { "epoch": 0.2679546601808086, "grad_norm": 0.45655742287635803, "learning_rate": 9.877086724706601e-06, "loss": 0.4326, "step": 5839 }, { "epoch": 0.26800055068606304, "grad_norm": 0.4880322515964508, "learning_rate": 9.877032687859364e-06, "loss": 0.487, "step": 5840 }, { "epoch": 0.26804644119131754, "grad_norm": 0.45049306750297546, "learning_rate": 9.876978639284402e-06, "loss": 0.374, "step": 5841 }, { "epoch": 0.268092331696572, "grad_norm": 0.45101916790008545, "learning_rate": 9.876924578981845e-06, "loss": 0.3572, "step": 5842 }, { "epoch": 0.2681382222018264, "grad_norm": 0.4441739022731781, "learning_rate": 9.876870506951823e-06, "loss": 0.4055, "step": 5843 }, { "epoch": 0.2681841127070809, "grad_norm": 0.5254185795783997, "learning_rate": 9.876816423194468e-06, "loss": 0.4843, "step": 5844 }, { "epoch": 0.26823000321233537, "grad_norm": 0.5126960277557373, "learning_rate": 9.876762327709908e-06, "loss": 0.448, "step": 5845 }, { "epoch": 0.2682758937175898, "grad_norm": 0.426925390958786, "learning_rate": 9.876708220498272e-06, "loss": 0.3594, "step": 5846 }, { "epoch": 0.2683217842228443, "grad_norm": 0.6193928718566895, "learning_rate": 9.876654101559695e-06, "loss": 0.5764, "step": 5847 }, { "epoch": 0.26836767472809875, "grad_norm": 0.4437464773654938, "learning_rate": 9.876599970894303e-06, "loss": 0.3566, "step": 5848 }, { "epoch": 0.26841356523335325, "grad_norm": 0.4854471683502197, "learning_rate": 9.876545828502227e-06, "loss": 0.3888, "step": 5849 }, { "epoch": 0.2684594557386077, "grad_norm": 0.4578111469745636, "learning_rate": 9.876491674383595e-06, "loss": 0.3887, "step": 5850 }, { "epoch": 0.26850534624386213, "grad_norm": 0.4687778353691101, "learning_rate": 9.876437508538543e-06, "loss": 0.4203, "step": 5851 }, { "epoch": 0.26855123674911663, "grad_norm": 0.4990726709365845, "learning_rate": 9.876383330967197e-06, "loss": 0.388, "step": 5852 }, { "epoch": 0.26859712725437107, "grad_norm": 0.5319591164588928, "learning_rate": 9.876329141669687e-06, "loss": 0.484, "step": 5853 }, { "epoch": 0.2686430177596255, "grad_norm": 0.4489472508430481, "learning_rate": 9.876274940646144e-06, "loss": 0.4378, "step": 5854 }, { "epoch": 0.26868890826488, "grad_norm": 0.4428797662258148, "learning_rate": 9.876220727896701e-06, "loss": 0.3298, "step": 5855 }, { "epoch": 0.26873479877013445, "grad_norm": 0.444782018661499, "learning_rate": 9.876166503421485e-06, "loss": 0.3861, "step": 5856 }, { "epoch": 0.2687806892753889, "grad_norm": 0.48130282759666443, "learning_rate": 9.876112267220627e-06, "loss": 0.419, "step": 5857 }, { "epoch": 0.2688265797806434, "grad_norm": 0.4787299931049347, "learning_rate": 9.876058019294257e-06, "loss": 0.4544, "step": 5858 }, { "epoch": 0.26887247028589784, "grad_norm": 0.4317842423915863, "learning_rate": 9.87600375964251e-06, "loss": 0.3802, "step": 5859 }, { "epoch": 0.26891836079115233, "grad_norm": 0.48446449637413025, "learning_rate": 9.875949488265509e-06, "loss": 0.3837, "step": 5860 }, { "epoch": 0.2689642512964068, "grad_norm": 0.46203431487083435, "learning_rate": 9.87589520516339e-06, "loss": 0.4113, "step": 5861 }, { "epoch": 0.2690101418016612, "grad_norm": 0.4458620548248291, "learning_rate": 9.875840910336282e-06, "loss": 0.3601, "step": 5862 }, { "epoch": 0.2690560323069157, "grad_norm": 0.49450910091400146, "learning_rate": 9.875786603784317e-06, "loss": 0.3779, "step": 5863 }, { "epoch": 0.26910192281217016, "grad_norm": 0.4460464119911194, "learning_rate": 9.875732285507622e-06, "loss": 0.3369, "step": 5864 }, { "epoch": 0.2691478133174246, "grad_norm": 0.468208372592926, "learning_rate": 9.875677955506329e-06, "loss": 0.3797, "step": 5865 }, { "epoch": 0.2691937038226791, "grad_norm": 0.4886190891265869, "learning_rate": 9.875623613780572e-06, "loss": 0.3437, "step": 5866 }, { "epoch": 0.26923959432793354, "grad_norm": 0.4577125012874603, "learning_rate": 9.875569260330476e-06, "loss": 0.4853, "step": 5867 }, { "epoch": 0.26928548483318804, "grad_norm": 0.4669910669326782, "learning_rate": 9.875514895156176e-06, "loss": 0.429, "step": 5868 }, { "epoch": 0.2693313753384425, "grad_norm": 0.4648025333881378, "learning_rate": 9.875460518257803e-06, "loss": 0.3617, "step": 5869 }, { "epoch": 0.2693772658436969, "grad_norm": 0.44554293155670166, "learning_rate": 9.875406129635484e-06, "loss": 0.369, "step": 5870 }, { "epoch": 0.2694231563489514, "grad_norm": 0.4423474073410034, "learning_rate": 9.875351729289353e-06, "loss": 0.3745, "step": 5871 }, { "epoch": 0.26946904685420586, "grad_norm": 0.5084224939346313, "learning_rate": 9.87529731721954e-06, "loss": 0.4447, "step": 5872 }, { "epoch": 0.2695149373594603, "grad_norm": 0.45994704961776733, "learning_rate": 9.875242893426175e-06, "loss": 0.3462, "step": 5873 }, { "epoch": 0.2695608278647148, "grad_norm": 0.47925201058387756, "learning_rate": 9.87518845790939e-06, "loss": 0.4285, "step": 5874 }, { "epoch": 0.26960671836996924, "grad_norm": 0.6300848126411438, "learning_rate": 9.875134010669314e-06, "loss": 0.3816, "step": 5875 }, { "epoch": 0.26965260887522374, "grad_norm": 0.4699513018131256, "learning_rate": 9.875079551706081e-06, "loss": 0.4287, "step": 5876 }, { "epoch": 0.2696984993804782, "grad_norm": 0.4392528533935547, "learning_rate": 9.87502508101982e-06, "loss": 0.3791, "step": 5877 }, { "epoch": 0.2697443898857326, "grad_norm": 0.46475958824157715, "learning_rate": 9.874970598610661e-06, "loss": 0.3803, "step": 5878 }, { "epoch": 0.2697902803909871, "grad_norm": 0.47555962204933167, "learning_rate": 9.874916104478737e-06, "loss": 0.443, "step": 5879 }, { "epoch": 0.26983617089624157, "grad_norm": 0.4785141944885254, "learning_rate": 9.874861598624179e-06, "loss": 0.3838, "step": 5880 }, { "epoch": 0.269882061401496, "grad_norm": 0.48035871982574463, "learning_rate": 9.874807081047118e-06, "loss": 0.4224, "step": 5881 }, { "epoch": 0.2699279519067505, "grad_norm": 0.4470847547054291, "learning_rate": 9.874752551747685e-06, "loss": 0.402, "step": 5882 }, { "epoch": 0.26997384241200495, "grad_norm": 0.45945751667022705, "learning_rate": 9.874698010726007e-06, "loss": 0.4107, "step": 5883 }, { "epoch": 0.2700197329172594, "grad_norm": 0.4976825714111328, "learning_rate": 9.874643457982222e-06, "loss": 0.432, "step": 5884 }, { "epoch": 0.2700656234225139, "grad_norm": 0.4576255679130554, "learning_rate": 9.874588893516458e-06, "loss": 0.4294, "step": 5885 }, { "epoch": 0.27011151392776833, "grad_norm": 0.45117494463920593, "learning_rate": 9.874534317328844e-06, "loss": 0.3383, "step": 5886 }, { "epoch": 0.27015740443302283, "grad_norm": 0.4742228388786316, "learning_rate": 9.874479729419515e-06, "loss": 0.4182, "step": 5887 }, { "epoch": 0.27020329493827727, "grad_norm": 0.4628211557865143, "learning_rate": 9.8744251297886e-06, "loss": 0.3856, "step": 5888 }, { "epoch": 0.2702491854435317, "grad_norm": 0.4697141647338867, "learning_rate": 9.874370518436232e-06, "loss": 0.3867, "step": 5889 }, { "epoch": 0.2702950759487862, "grad_norm": 0.5377175807952881, "learning_rate": 9.874315895362539e-06, "loss": 0.5391, "step": 5890 }, { "epoch": 0.27034096645404065, "grad_norm": 0.47244906425476074, "learning_rate": 9.874261260567656e-06, "loss": 0.4338, "step": 5891 }, { "epoch": 0.2703868569592951, "grad_norm": 0.4866069555282593, "learning_rate": 9.874206614051713e-06, "loss": 0.406, "step": 5892 }, { "epoch": 0.2704327474645496, "grad_norm": 0.44471365213394165, "learning_rate": 9.874151955814843e-06, "loss": 0.4138, "step": 5893 }, { "epoch": 0.27047863796980404, "grad_norm": 0.45400270819664, "learning_rate": 9.874097285857173e-06, "loss": 0.3691, "step": 5894 }, { "epoch": 0.27052452847505853, "grad_norm": 0.5032569169998169, "learning_rate": 9.874042604178839e-06, "loss": 0.536, "step": 5895 }, { "epoch": 0.270570418980313, "grad_norm": 0.47621259093284607, "learning_rate": 9.873987910779971e-06, "loss": 0.3677, "step": 5896 }, { "epoch": 0.2706163094855674, "grad_norm": 0.4404245913028717, "learning_rate": 9.873933205660701e-06, "loss": 0.3465, "step": 5897 }, { "epoch": 0.2706621999908219, "grad_norm": 0.4451431334018707, "learning_rate": 9.873878488821157e-06, "loss": 0.4091, "step": 5898 }, { "epoch": 0.27070809049607636, "grad_norm": 0.524995744228363, "learning_rate": 9.873823760261476e-06, "loss": 0.43, "step": 5899 }, { "epoch": 0.2707539810013308, "grad_norm": 0.49389970302581787, "learning_rate": 9.873769019981787e-06, "loss": 0.4485, "step": 5900 }, { "epoch": 0.2707998715065853, "grad_norm": 0.899696946144104, "learning_rate": 9.873714267982222e-06, "loss": 0.4961, "step": 5901 }, { "epoch": 0.27084576201183974, "grad_norm": 0.5391474366188049, "learning_rate": 9.873659504262911e-06, "loss": 0.4806, "step": 5902 }, { "epoch": 0.27089165251709424, "grad_norm": 0.4806353449821472, "learning_rate": 9.873604728823988e-06, "loss": 0.4258, "step": 5903 }, { "epoch": 0.2709375430223487, "grad_norm": 0.47695058584213257, "learning_rate": 9.873549941665583e-06, "loss": 0.3707, "step": 5904 }, { "epoch": 0.2709834335276031, "grad_norm": 0.5217123031616211, "learning_rate": 9.87349514278783e-06, "loss": 0.4838, "step": 5905 }, { "epoch": 0.2710293240328576, "grad_norm": 0.4487816393375397, "learning_rate": 9.873440332190857e-06, "loss": 0.3887, "step": 5906 }, { "epoch": 0.27107521453811206, "grad_norm": 0.47574514150619507, "learning_rate": 9.8733855098748e-06, "loss": 0.3999, "step": 5907 }, { "epoch": 0.2711211050433665, "grad_norm": 0.48733723163604736, "learning_rate": 9.873330675839787e-06, "loss": 0.4528, "step": 5908 }, { "epoch": 0.271166995548621, "grad_norm": 0.4945688843727112, "learning_rate": 9.873275830085953e-06, "loss": 0.4224, "step": 5909 }, { "epoch": 0.27121288605387545, "grad_norm": 0.4674447476863861, "learning_rate": 9.87322097261343e-06, "loss": 0.3548, "step": 5910 }, { "epoch": 0.2712587765591299, "grad_norm": 0.445092111825943, "learning_rate": 9.873166103422348e-06, "loss": 0.3592, "step": 5911 }, { "epoch": 0.2713046670643844, "grad_norm": 0.4449789226055145, "learning_rate": 9.873111222512838e-06, "loss": 0.3496, "step": 5912 }, { "epoch": 0.27135055756963883, "grad_norm": 0.5025137066841125, "learning_rate": 9.873056329885033e-06, "loss": 0.3531, "step": 5913 }, { "epoch": 0.2713964480748933, "grad_norm": 0.4014642536640167, "learning_rate": 9.873001425539068e-06, "loss": 0.3382, "step": 5914 }, { "epoch": 0.27144233858014777, "grad_norm": 0.45683935284614563, "learning_rate": 9.872946509475072e-06, "loss": 0.3925, "step": 5915 }, { "epoch": 0.2714882290854022, "grad_norm": 0.507476270198822, "learning_rate": 9.872891581693175e-06, "loss": 0.4704, "step": 5916 }, { "epoch": 0.2715341195906567, "grad_norm": 0.45307326316833496, "learning_rate": 9.872836642193514e-06, "loss": 0.4062, "step": 5917 }, { "epoch": 0.27158001009591115, "grad_norm": 0.45223546028137207, "learning_rate": 9.87278169097622e-06, "loss": 0.3459, "step": 5918 }, { "epoch": 0.2716259006011656, "grad_norm": 0.49584969878196716, "learning_rate": 9.872726728041422e-06, "loss": 0.4939, "step": 5919 }, { "epoch": 0.2716717911064201, "grad_norm": 0.45140790939331055, "learning_rate": 9.872671753389254e-06, "loss": 0.358, "step": 5920 }, { "epoch": 0.27171768161167453, "grad_norm": 0.4547443389892578, "learning_rate": 9.87261676701985e-06, "loss": 0.4085, "step": 5921 }, { "epoch": 0.27176357211692903, "grad_norm": 0.4753084182739258, "learning_rate": 9.87256176893334e-06, "loss": 0.4668, "step": 5922 }, { "epoch": 0.2718094626221835, "grad_norm": 0.45439091324806213, "learning_rate": 9.872506759129856e-06, "loss": 0.3493, "step": 5923 }, { "epoch": 0.2718553531274379, "grad_norm": 0.45839372277259827, "learning_rate": 9.87245173760953e-06, "loss": 0.3633, "step": 5924 }, { "epoch": 0.2719012436326924, "grad_norm": 0.5432732105255127, "learning_rate": 9.872396704372497e-06, "loss": 0.474, "step": 5925 }, { "epoch": 0.27194713413794686, "grad_norm": 0.474185585975647, "learning_rate": 9.872341659418887e-06, "loss": 0.3808, "step": 5926 }, { "epoch": 0.2719930246432013, "grad_norm": 0.47217246890068054, "learning_rate": 9.872286602748834e-06, "loss": 0.432, "step": 5927 }, { "epoch": 0.2720389151484558, "grad_norm": 0.47725555300712585, "learning_rate": 9.872231534362468e-06, "loss": 0.396, "step": 5928 }, { "epoch": 0.27208480565371024, "grad_norm": 0.504806637763977, "learning_rate": 9.872176454259924e-06, "loss": 0.4386, "step": 5929 }, { "epoch": 0.27213069615896474, "grad_norm": 0.4668581485748291, "learning_rate": 9.872121362441331e-06, "loss": 0.3863, "step": 5930 }, { "epoch": 0.2721765866642192, "grad_norm": 0.48398470878601074, "learning_rate": 9.872066258906827e-06, "loss": 0.3986, "step": 5931 }, { "epoch": 0.2722224771694736, "grad_norm": 0.482972115278244, "learning_rate": 9.87201114365654e-06, "loss": 0.3975, "step": 5932 }, { "epoch": 0.2722683676747281, "grad_norm": 0.45084044337272644, "learning_rate": 9.871956016690604e-06, "loss": 0.4015, "step": 5933 }, { "epoch": 0.27231425817998256, "grad_norm": 0.46074631810188293, "learning_rate": 9.87190087800915e-06, "loss": 0.4036, "step": 5934 }, { "epoch": 0.272360148685237, "grad_norm": 0.5191996097564697, "learning_rate": 9.871845727612316e-06, "loss": 0.4689, "step": 5935 }, { "epoch": 0.2724060391904915, "grad_norm": 0.5497806668281555, "learning_rate": 9.871790565500226e-06, "loss": 0.525, "step": 5936 }, { "epoch": 0.27245192969574594, "grad_norm": 0.4974724054336548, "learning_rate": 9.87173539167302e-06, "loss": 0.4359, "step": 5937 }, { "epoch": 0.27249782020100044, "grad_norm": 0.46319347620010376, "learning_rate": 9.871680206130827e-06, "loss": 0.4271, "step": 5938 }, { "epoch": 0.2725437107062549, "grad_norm": 0.4652736485004425, "learning_rate": 9.87162500887378e-06, "loss": 0.3636, "step": 5939 }, { "epoch": 0.2725896012115093, "grad_norm": 0.49670347571372986, "learning_rate": 9.871569799902014e-06, "loss": 0.4716, "step": 5940 }, { "epoch": 0.2726354917167638, "grad_norm": 0.4571923613548279, "learning_rate": 9.87151457921566e-06, "loss": 0.3567, "step": 5941 }, { "epoch": 0.27268138222201826, "grad_norm": 0.45072683691978455, "learning_rate": 9.871459346814848e-06, "loss": 0.4093, "step": 5942 }, { "epoch": 0.2727272727272727, "grad_norm": 0.4656715393066406, "learning_rate": 9.871404102699717e-06, "loss": 0.393, "step": 5943 }, { "epoch": 0.2727731632325272, "grad_norm": 0.46393948793411255, "learning_rate": 9.871348846870396e-06, "loss": 0.4274, "step": 5944 }, { "epoch": 0.27281905373778165, "grad_norm": 0.45287051796913147, "learning_rate": 9.871293579327018e-06, "loss": 0.3387, "step": 5945 }, { "epoch": 0.2728649442430361, "grad_norm": 0.484746515750885, "learning_rate": 9.871238300069716e-06, "loss": 0.4234, "step": 5946 }, { "epoch": 0.2729108347482906, "grad_norm": 0.4816627502441406, "learning_rate": 9.871183009098624e-06, "loss": 0.419, "step": 5947 }, { "epoch": 0.27295672525354503, "grad_norm": 0.509975790977478, "learning_rate": 9.871127706413874e-06, "loss": 0.4628, "step": 5948 }, { "epoch": 0.2730026157587995, "grad_norm": 0.49830472469329834, "learning_rate": 9.871072392015599e-06, "loss": 0.5383, "step": 5949 }, { "epoch": 0.27304850626405397, "grad_norm": 0.43803879618644714, "learning_rate": 9.871017065903934e-06, "loss": 0.3549, "step": 5950 }, { "epoch": 0.2730943967693084, "grad_norm": 0.46389350295066833, "learning_rate": 9.870961728079007e-06, "loss": 0.3929, "step": 5951 }, { "epoch": 0.2731402872745629, "grad_norm": 0.43711918592453003, "learning_rate": 9.870906378540956e-06, "loss": 0.3382, "step": 5952 }, { "epoch": 0.27318617777981735, "grad_norm": 0.456473171710968, "learning_rate": 9.870851017289913e-06, "loss": 0.4041, "step": 5953 }, { "epoch": 0.2732320682850718, "grad_norm": 0.4576350152492523, "learning_rate": 9.87079564432601e-06, "loss": 0.359, "step": 5954 }, { "epoch": 0.2732779587903263, "grad_norm": 0.5189813375473022, "learning_rate": 9.870740259649381e-06, "loss": 0.5216, "step": 5955 }, { "epoch": 0.27332384929558073, "grad_norm": 0.4453928470611572, "learning_rate": 9.87068486326016e-06, "loss": 0.341, "step": 5956 }, { "epoch": 0.27336973980083523, "grad_norm": 0.44720685482025146, "learning_rate": 9.870629455158478e-06, "loss": 0.3877, "step": 5957 }, { "epoch": 0.2734156303060897, "grad_norm": 0.4457598924636841, "learning_rate": 9.87057403534447e-06, "loss": 0.3428, "step": 5958 }, { "epoch": 0.2734615208113441, "grad_norm": 0.492818683385849, "learning_rate": 9.870518603818267e-06, "loss": 0.4015, "step": 5959 }, { "epoch": 0.2735074113165986, "grad_norm": 0.48952701687812805, "learning_rate": 9.870463160580005e-06, "loss": 0.4396, "step": 5960 }, { "epoch": 0.27355330182185306, "grad_norm": 0.4919719994068146, "learning_rate": 9.870407705629816e-06, "loss": 0.4305, "step": 5961 }, { "epoch": 0.2735991923271075, "grad_norm": 0.48953744769096375, "learning_rate": 9.870352238967834e-06, "loss": 0.4562, "step": 5962 }, { "epoch": 0.273645082832362, "grad_norm": 0.5031746625900269, "learning_rate": 9.87029676059419e-06, "loss": 0.434, "step": 5963 }, { "epoch": 0.27369097333761644, "grad_norm": 0.6283437013626099, "learning_rate": 9.870241270509023e-06, "loss": 0.5591, "step": 5964 }, { "epoch": 0.27373686384287094, "grad_norm": 0.4421950876712799, "learning_rate": 9.87018576871246e-06, "loss": 0.3357, "step": 5965 }, { "epoch": 0.2737827543481254, "grad_norm": 0.4651041626930237, "learning_rate": 9.87013025520464e-06, "loss": 0.3752, "step": 5966 }, { "epoch": 0.2738286448533798, "grad_norm": 0.5725246667861938, "learning_rate": 9.870074729985692e-06, "loss": 0.4919, "step": 5967 }, { "epoch": 0.2738745353586343, "grad_norm": 0.5128257870674133, "learning_rate": 9.870019193055752e-06, "loss": 0.4658, "step": 5968 }, { "epoch": 0.27392042586388876, "grad_norm": 0.4714244604110718, "learning_rate": 9.869963644414952e-06, "loss": 0.3826, "step": 5969 }, { "epoch": 0.2739663163691432, "grad_norm": 0.45479047298431396, "learning_rate": 9.869908084063426e-06, "loss": 0.3877, "step": 5970 }, { "epoch": 0.2740122068743977, "grad_norm": 0.6153403520584106, "learning_rate": 9.869852512001309e-06, "loss": 0.4738, "step": 5971 }, { "epoch": 0.27405809737965214, "grad_norm": 0.4815972149372101, "learning_rate": 9.869796928228733e-06, "loss": 0.3445, "step": 5972 }, { "epoch": 0.2741039878849066, "grad_norm": 0.4348076283931732, "learning_rate": 9.869741332745833e-06, "loss": 0.3542, "step": 5973 }, { "epoch": 0.2741498783901611, "grad_norm": 0.47442400455474854, "learning_rate": 9.869685725552743e-06, "loss": 0.3773, "step": 5974 }, { "epoch": 0.2741957688954155, "grad_norm": 0.48906534910202026, "learning_rate": 9.869630106649593e-06, "loss": 0.5121, "step": 5975 }, { "epoch": 0.27424165940067, "grad_norm": 0.4835379719734192, "learning_rate": 9.869574476036521e-06, "loss": 0.4297, "step": 5976 }, { "epoch": 0.27428754990592447, "grad_norm": 0.49576854705810547, "learning_rate": 9.869518833713661e-06, "loss": 0.4579, "step": 5977 }, { "epoch": 0.2743334404111789, "grad_norm": 0.45131343603134155, "learning_rate": 9.869463179681143e-06, "loss": 0.4173, "step": 5978 }, { "epoch": 0.2743793309164334, "grad_norm": 0.5188154578208923, "learning_rate": 9.869407513939102e-06, "loss": 0.4362, "step": 5979 }, { "epoch": 0.27442522142168785, "grad_norm": 0.4979189336299896, "learning_rate": 9.869351836487675e-06, "loss": 0.4805, "step": 5980 }, { "epoch": 0.2744711119269423, "grad_norm": 0.4617033898830414, "learning_rate": 9.869296147326991e-06, "loss": 0.4108, "step": 5981 }, { "epoch": 0.2745170024321968, "grad_norm": 0.4562774896621704, "learning_rate": 9.86924044645719e-06, "loss": 0.3971, "step": 5982 }, { "epoch": 0.27456289293745123, "grad_norm": 0.4326223134994507, "learning_rate": 9.8691847338784e-06, "loss": 0.3449, "step": 5983 }, { "epoch": 0.27460878344270573, "grad_norm": 0.45280131697654724, "learning_rate": 9.869129009590759e-06, "loss": 0.4136, "step": 5984 }, { "epoch": 0.27465467394796017, "grad_norm": 0.4537236988544464, "learning_rate": 9.869073273594398e-06, "loss": 0.3549, "step": 5985 }, { "epoch": 0.2747005644532146, "grad_norm": 0.44010278582572937, "learning_rate": 9.869017525889455e-06, "loss": 0.4083, "step": 5986 }, { "epoch": 0.2747464549584691, "grad_norm": 0.44472166895866394, "learning_rate": 9.86896176647606e-06, "loss": 0.3514, "step": 5987 }, { "epoch": 0.27479234546372355, "grad_norm": 0.4771850109100342, "learning_rate": 9.868905995354347e-06, "loss": 0.3602, "step": 5988 }, { "epoch": 0.274838235968978, "grad_norm": 0.4713582694530487, "learning_rate": 9.868850212524454e-06, "loss": 0.3913, "step": 5989 }, { "epoch": 0.2748841264742325, "grad_norm": 0.48855167627334595, "learning_rate": 9.868794417986512e-06, "loss": 0.4371, "step": 5990 }, { "epoch": 0.27493001697948694, "grad_norm": 0.4928397834300995, "learning_rate": 9.868738611740655e-06, "loss": 0.4089, "step": 5991 }, { "epoch": 0.27497590748474143, "grad_norm": 0.48676592111587524, "learning_rate": 9.86868279378702e-06, "loss": 0.4136, "step": 5992 }, { "epoch": 0.2750217979899959, "grad_norm": 0.4864250123500824, "learning_rate": 9.868626964125738e-06, "loss": 0.3802, "step": 5993 }, { "epoch": 0.2750676884952503, "grad_norm": 0.44911256432533264, "learning_rate": 9.868571122756948e-06, "loss": 0.4073, "step": 5994 }, { "epoch": 0.2751135790005048, "grad_norm": 0.5066159963607788, "learning_rate": 9.868515269680777e-06, "loss": 0.4672, "step": 5995 }, { "epoch": 0.27515946950575926, "grad_norm": 0.4743802547454834, "learning_rate": 9.868459404897366e-06, "loss": 0.4571, "step": 5996 }, { "epoch": 0.2752053600110137, "grad_norm": 0.44951435923576355, "learning_rate": 9.868403528406846e-06, "loss": 0.3973, "step": 5997 }, { "epoch": 0.2752512505162682, "grad_norm": 0.4462641775608063, "learning_rate": 9.86834764020935e-06, "loss": 0.3708, "step": 5998 }, { "epoch": 0.27529714102152264, "grad_norm": 0.452903687953949, "learning_rate": 9.868291740305016e-06, "loss": 0.3918, "step": 5999 }, { "epoch": 0.2753430315267771, "grad_norm": 0.48993316292762756, "learning_rate": 9.868235828693976e-06, "loss": 0.4715, "step": 6000 }, { "epoch": 0.2753889220320316, "grad_norm": 0.46686631441116333, "learning_rate": 9.868179905376367e-06, "loss": 0.3712, "step": 6001 }, { "epoch": 0.275434812537286, "grad_norm": 0.45403245091438293, "learning_rate": 9.86812397035232e-06, "loss": 0.4324, "step": 6002 }, { "epoch": 0.2754807030425405, "grad_norm": 0.43723514676094055, "learning_rate": 9.868068023621974e-06, "loss": 0.358, "step": 6003 }, { "epoch": 0.27552659354779496, "grad_norm": 0.4659489691257477, "learning_rate": 9.868012065185459e-06, "loss": 0.3852, "step": 6004 }, { "epoch": 0.2755724840530494, "grad_norm": 0.46562132239341736, "learning_rate": 9.867956095042911e-06, "loss": 0.3726, "step": 6005 }, { "epoch": 0.2756183745583039, "grad_norm": 0.4423086941242218, "learning_rate": 9.867900113194467e-06, "loss": 0.3031, "step": 6006 }, { "epoch": 0.27566426506355834, "grad_norm": 0.46174925565719604, "learning_rate": 9.867844119640257e-06, "loss": 0.3695, "step": 6007 }, { "epoch": 0.2757101555688128, "grad_norm": 0.43979412317276, "learning_rate": 9.867788114380418e-06, "loss": 0.3412, "step": 6008 }, { "epoch": 0.2757560460740673, "grad_norm": 0.44853121042251587, "learning_rate": 9.867732097415087e-06, "loss": 0.3702, "step": 6009 }, { "epoch": 0.2758019365793217, "grad_norm": 0.47380557656288147, "learning_rate": 9.867676068744396e-06, "loss": 0.3724, "step": 6010 }, { "epoch": 0.2758478270845762, "grad_norm": 0.45100897550582886, "learning_rate": 9.86762002836848e-06, "loss": 0.3805, "step": 6011 }, { "epoch": 0.27589371758983067, "grad_norm": 0.47433117032051086, "learning_rate": 9.867563976287474e-06, "loss": 0.4351, "step": 6012 }, { "epoch": 0.2759396080950851, "grad_norm": 0.4508553147315979, "learning_rate": 9.867507912501512e-06, "loss": 0.3911, "step": 6013 }, { "epoch": 0.2759854986003396, "grad_norm": 0.455611914396286, "learning_rate": 9.867451837010732e-06, "loss": 0.41, "step": 6014 }, { "epoch": 0.27603138910559405, "grad_norm": 0.48005589842796326, "learning_rate": 9.867395749815265e-06, "loss": 0.3878, "step": 6015 }, { "epoch": 0.2760772796108485, "grad_norm": 0.4138370156288147, "learning_rate": 9.867339650915248e-06, "loss": 0.3232, "step": 6016 }, { "epoch": 0.276123170116103, "grad_norm": 0.48386910557746887, "learning_rate": 9.867283540310814e-06, "loss": 0.3999, "step": 6017 }, { "epoch": 0.27616906062135743, "grad_norm": 0.44743290543556213, "learning_rate": 9.867227418002101e-06, "loss": 0.3728, "step": 6018 }, { "epoch": 0.27621495112661193, "grad_norm": 0.4604841470718384, "learning_rate": 9.86717128398924e-06, "loss": 0.3521, "step": 6019 }, { "epoch": 0.27626084163186637, "grad_norm": 0.4918363690376282, "learning_rate": 9.867115138272372e-06, "loss": 0.4684, "step": 6020 }, { "epoch": 0.2763067321371208, "grad_norm": 0.4465678036212921, "learning_rate": 9.867058980851625e-06, "loss": 0.3749, "step": 6021 }, { "epoch": 0.2763526226423753, "grad_norm": 0.514955997467041, "learning_rate": 9.867002811727138e-06, "loss": 0.4157, "step": 6022 }, { "epoch": 0.27639851314762975, "grad_norm": 0.45760083198547363, "learning_rate": 9.866946630899046e-06, "loss": 0.4192, "step": 6023 }, { "epoch": 0.2764444036528842, "grad_norm": 0.46829721331596375, "learning_rate": 9.866890438367482e-06, "loss": 0.429, "step": 6024 }, { "epoch": 0.2764902941581387, "grad_norm": 0.4232097566127777, "learning_rate": 9.866834234132583e-06, "loss": 0.3617, "step": 6025 }, { "epoch": 0.27653618466339314, "grad_norm": 0.4737151861190796, "learning_rate": 9.866778018194484e-06, "loss": 0.4451, "step": 6026 }, { "epoch": 0.27658207516864763, "grad_norm": 0.4926268458366394, "learning_rate": 9.86672179055332e-06, "loss": 0.5006, "step": 6027 }, { "epoch": 0.2766279656739021, "grad_norm": 0.4679906368255615, "learning_rate": 9.866665551209227e-06, "loss": 0.4562, "step": 6028 }, { "epoch": 0.2766738561791565, "grad_norm": 0.4732656180858612, "learning_rate": 9.866609300162338e-06, "loss": 0.4293, "step": 6029 }, { "epoch": 0.276719746684411, "grad_norm": 0.458100289106369, "learning_rate": 9.866553037412789e-06, "loss": 0.4312, "step": 6030 }, { "epoch": 0.27676563718966546, "grad_norm": 0.4775184988975525, "learning_rate": 9.866496762960718e-06, "loss": 0.4759, "step": 6031 }, { "epoch": 0.2768115276949199, "grad_norm": 0.427941232919693, "learning_rate": 9.866440476806256e-06, "loss": 0.3221, "step": 6032 }, { "epoch": 0.2768574182001744, "grad_norm": 0.44261130690574646, "learning_rate": 9.866384178949543e-06, "loss": 0.2905, "step": 6033 }, { "epoch": 0.27690330870542884, "grad_norm": 0.5109956860542297, "learning_rate": 9.866327869390708e-06, "loss": 0.381, "step": 6034 }, { "epoch": 0.2769491992106833, "grad_norm": 0.47376441955566406, "learning_rate": 9.866271548129894e-06, "loss": 0.3962, "step": 6035 }, { "epoch": 0.2769950897159378, "grad_norm": 0.46055057644844055, "learning_rate": 9.866215215167233e-06, "loss": 0.4346, "step": 6036 }, { "epoch": 0.2770409802211922, "grad_norm": 0.46654534339904785, "learning_rate": 9.866158870502857e-06, "loss": 0.4225, "step": 6037 }, { "epoch": 0.2770868707264467, "grad_norm": 0.46061649918556213, "learning_rate": 9.866102514136906e-06, "loss": 0.343, "step": 6038 }, { "epoch": 0.27713276123170116, "grad_norm": 0.4611658751964569, "learning_rate": 9.866046146069515e-06, "loss": 0.3944, "step": 6039 }, { "epoch": 0.2771786517369556, "grad_norm": 0.4746161103248596, "learning_rate": 9.865989766300819e-06, "loss": 0.3886, "step": 6040 }, { "epoch": 0.2772245422422101, "grad_norm": 0.49253445863723755, "learning_rate": 9.865933374830951e-06, "loss": 0.4248, "step": 6041 }, { "epoch": 0.27727043274746455, "grad_norm": 0.42558035254478455, "learning_rate": 9.865876971660052e-06, "loss": 0.2707, "step": 6042 }, { "epoch": 0.277316323252719, "grad_norm": 0.47400254011154175, "learning_rate": 9.865820556788252e-06, "loss": 0.4119, "step": 6043 }, { "epoch": 0.2773622137579735, "grad_norm": 0.5436603426933289, "learning_rate": 9.86576413021569e-06, "loss": 0.5675, "step": 6044 }, { "epoch": 0.27740810426322793, "grad_norm": 0.4616427719593048, "learning_rate": 9.865707691942504e-06, "loss": 0.3613, "step": 6045 }, { "epoch": 0.2774539947684824, "grad_norm": 0.4473024606704712, "learning_rate": 9.865651241968823e-06, "loss": 0.3783, "step": 6046 }, { "epoch": 0.27749988527373687, "grad_norm": 0.5100050568580627, "learning_rate": 9.865594780294789e-06, "loss": 0.4904, "step": 6047 }, { "epoch": 0.2775457757789913, "grad_norm": 0.45534130930900574, "learning_rate": 9.865538306920533e-06, "loss": 0.3858, "step": 6048 }, { "epoch": 0.2775916662842458, "grad_norm": 0.49166369438171387, "learning_rate": 9.865481821846194e-06, "loss": 0.431, "step": 6049 }, { "epoch": 0.27763755678950025, "grad_norm": 0.43643680214881897, "learning_rate": 9.865425325071906e-06, "loss": 0.3599, "step": 6050 }, { "epoch": 0.2776834472947547, "grad_norm": 0.4726827144622803, "learning_rate": 9.865368816597806e-06, "loss": 0.4008, "step": 6051 }, { "epoch": 0.2777293378000092, "grad_norm": 0.4844609797000885, "learning_rate": 9.86531229642403e-06, "loss": 0.4984, "step": 6052 }, { "epoch": 0.27777522830526363, "grad_norm": 0.4752030074596405, "learning_rate": 9.865255764550714e-06, "loss": 0.407, "step": 6053 }, { "epoch": 0.27782111881051813, "grad_norm": 0.4702875316143036, "learning_rate": 9.865199220977993e-06, "loss": 0.3394, "step": 6054 }, { "epoch": 0.2778670093157726, "grad_norm": 0.4608556032180786, "learning_rate": 9.865142665706002e-06, "loss": 0.3854, "step": 6055 }, { "epoch": 0.277912899821027, "grad_norm": 0.4405447244644165, "learning_rate": 9.86508609873488e-06, "loss": 0.3974, "step": 6056 }, { "epoch": 0.2779587903262815, "grad_norm": 0.4800127446651459, "learning_rate": 9.865029520064762e-06, "loss": 0.4155, "step": 6057 }, { "epoch": 0.27800468083153596, "grad_norm": 0.4713591933250427, "learning_rate": 9.864972929695782e-06, "loss": 0.4152, "step": 6058 }, { "epoch": 0.2780505713367904, "grad_norm": 0.46247512102127075, "learning_rate": 9.864916327628078e-06, "loss": 0.4498, "step": 6059 }, { "epoch": 0.2780964618420449, "grad_norm": 0.47576457262039185, "learning_rate": 9.864859713861786e-06, "loss": 0.4022, "step": 6060 }, { "epoch": 0.27814235234729934, "grad_norm": 0.4680711627006531, "learning_rate": 9.86480308839704e-06, "loss": 0.4515, "step": 6061 }, { "epoch": 0.2781882428525538, "grad_norm": 0.47682899236679077, "learning_rate": 9.86474645123398e-06, "loss": 0.4855, "step": 6062 }, { "epoch": 0.2782341333578083, "grad_norm": 0.4476422071456909, "learning_rate": 9.864689802372738e-06, "loss": 0.3862, "step": 6063 }, { "epoch": 0.2782800238630627, "grad_norm": 0.4934159517288208, "learning_rate": 9.864633141813455e-06, "loss": 0.4421, "step": 6064 }, { "epoch": 0.2783259143683172, "grad_norm": 0.45127177238464355, "learning_rate": 9.864576469556263e-06, "loss": 0.414, "step": 6065 }, { "epoch": 0.27837180487357166, "grad_norm": 0.4764055013656616, "learning_rate": 9.8645197856013e-06, "loss": 0.4681, "step": 6066 }, { "epoch": 0.2784176953788261, "grad_norm": 0.4488907754421234, "learning_rate": 9.864463089948701e-06, "loss": 0.3914, "step": 6067 }, { "epoch": 0.2784635858840806, "grad_norm": 0.486619770526886, "learning_rate": 9.864406382598605e-06, "loss": 0.449, "step": 6068 }, { "epoch": 0.27850947638933504, "grad_norm": 0.46068549156188965, "learning_rate": 9.864349663551147e-06, "loss": 0.3418, "step": 6069 }, { "epoch": 0.2785553668945895, "grad_norm": 0.5222238302230835, "learning_rate": 9.864292932806464e-06, "loss": 0.5161, "step": 6070 }, { "epoch": 0.278601257399844, "grad_norm": 0.43491387367248535, "learning_rate": 9.864236190364691e-06, "loss": 0.4342, "step": 6071 }, { "epoch": 0.2786471479050984, "grad_norm": 0.44954827427864075, "learning_rate": 9.864179436225965e-06, "loss": 0.366, "step": 6072 }, { "epoch": 0.2786930384103529, "grad_norm": 0.5231358408927917, "learning_rate": 9.864122670390424e-06, "loss": 0.4041, "step": 6073 }, { "epoch": 0.27873892891560736, "grad_norm": 0.46889233589172363, "learning_rate": 9.8640658928582e-06, "loss": 0.4475, "step": 6074 }, { "epoch": 0.2787848194208618, "grad_norm": 0.469134122133255, "learning_rate": 9.864009103629435e-06, "loss": 0.3953, "step": 6075 }, { "epoch": 0.2788307099261163, "grad_norm": 0.4443010687828064, "learning_rate": 9.863952302704262e-06, "loss": 0.3408, "step": 6076 }, { "epoch": 0.27887660043137075, "grad_norm": 0.46828216314315796, "learning_rate": 9.86389549008282e-06, "loss": 0.4016, "step": 6077 }, { "epoch": 0.2789224909366252, "grad_norm": 0.440003365278244, "learning_rate": 9.863838665765245e-06, "loss": 0.366, "step": 6078 }, { "epoch": 0.2789683814418797, "grad_norm": 0.5009279847145081, "learning_rate": 9.863781829751671e-06, "loss": 0.3238, "step": 6079 }, { "epoch": 0.27901427194713413, "grad_norm": 0.4994819164276123, "learning_rate": 9.863724982042238e-06, "loss": 0.3638, "step": 6080 }, { "epoch": 0.2790601624523886, "grad_norm": 0.6921394467353821, "learning_rate": 9.863668122637082e-06, "loss": 0.4238, "step": 6081 }, { "epoch": 0.27910605295764307, "grad_norm": 0.4951264560222626, "learning_rate": 9.863611251536339e-06, "loss": 0.4093, "step": 6082 }, { "epoch": 0.2791519434628975, "grad_norm": 0.43739986419677734, "learning_rate": 9.863554368740145e-06, "loss": 0.3596, "step": 6083 }, { "epoch": 0.279197833968152, "grad_norm": 0.511479377746582, "learning_rate": 9.863497474248638e-06, "loss": 0.4941, "step": 6084 }, { "epoch": 0.27924372447340645, "grad_norm": 0.43493038415908813, "learning_rate": 9.863440568061954e-06, "loss": 0.3226, "step": 6085 }, { "epoch": 0.2792896149786609, "grad_norm": 0.5051162242889404, "learning_rate": 9.863383650180232e-06, "loss": 0.4715, "step": 6086 }, { "epoch": 0.2793355054839154, "grad_norm": 0.47087568044662476, "learning_rate": 9.863326720603605e-06, "loss": 0.3864, "step": 6087 }, { "epoch": 0.27938139598916983, "grad_norm": 0.4549236595630646, "learning_rate": 9.863269779332214e-06, "loss": 0.3953, "step": 6088 }, { "epoch": 0.2794272864944243, "grad_norm": 0.42424309253692627, "learning_rate": 9.863212826366192e-06, "loss": 0.3251, "step": 6089 }, { "epoch": 0.2794731769996788, "grad_norm": 0.508939266204834, "learning_rate": 9.86315586170568e-06, "loss": 0.4594, "step": 6090 }, { "epoch": 0.2795190675049332, "grad_norm": 0.415714293718338, "learning_rate": 9.863098885350812e-06, "loss": 0.3422, "step": 6091 }, { "epoch": 0.2795649580101877, "grad_norm": 0.4551631510257721, "learning_rate": 9.863041897301728e-06, "loss": 0.3628, "step": 6092 }, { "epoch": 0.27961084851544216, "grad_norm": 0.5171768665313721, "learning_rate": 9.86298489755856e-06, "loss": 0.4421, "step": 6093 }, { "epoch": 0.2796567390206966, "grad_norm": 0.4643041789531708, "learning_rate": 9.862927886121452e-06, "loss": 0.4118, "step": 6094 }, { "epoch": 0.2797026295259511, "grad_norm": 0.5115693211555481, "learning_rate": 9.862870862990534e-06, "loss": 0.5214, "step": 6095 }, { "epoch": 0.27974852003120554, "grad_norm": 0.42760568857192993, "learning_rate": 9.862813828165948e-06, "loss": 0.3509, "step": 6096 }, { "epoch": 0.27979441053646, "grad_norm": 0.41440504789352417, "learning_rate": 9.862756781647829e-06, "loss": 0.318, "step": 6097 }, { "epoch": 0.2798403010417145, "grad_norm": 0.43415525555610657, "learning_rate": 9.862699723436314e-06, "loss": 0.3542, "step": 6098 }, { "epoch": 0.2798861915469689, "grad_norm": 0.4744921624660492, "learning_rate": 9.86264265353154e-06, "loss": 0.3865, "step": 6099 }, { "epoch": 0.2799320820522234, "grad_norm": 0.471827894449234, "learning_rate": 9.862585571933648e-06, "loss": 0.3922, "step": 6100 }, { "epoch": 0.27997797255747786, "grad_norm": 0.5131245851516724, "learning_rate": 9.86252847864277e-06, "loss": 0.4893, "step": 6101 }, { "epoch": 0.2800238630627323, "grad_norm": 0.42059680819511414, "learning_rate": 9.862471373659045e-06, "loss": 0.3381, "step": 6102 }, { "epoch": 0.2800697535679868, "grad_norm": 0.5674087405204773, "learning_rate": 9.862414256982614e-06, "loss": 0.3941, "step": 6103 }, { "epoch": 0.28011564407324124, "grad_norm": 0.48303478956222534, "learning_rate": 9.86235712861361e-06, "loss": 0.4399, "step": 6104 }, { "epoch": 0.2801615345784957, "grad_norm": 0.477804571390152, "learning_rate": 9.86229998855217e-06, "loss": 0.436, "step": 6105 }, { "epoch": 0.2802074250837502, "grad_norm": 0.5001541376113892, "learning_rate": 9.862242836798436e-06, "loss": 0.4081, "step": 6106 }, { "epoch": 0.2802533155890046, "grad_norm": 0.4856053292751312, "learning_rate": 9.86218567335254e-06, "loss": 0.426, "step": 6107 }, { "epoch": 0.2802992060942591, "grad_norm": 0.5164276361465454, "learning_rate": 9.862128498214625e-06, "loss": 0.3316, "step": 6108 }, { "epoch": 0.28034509659951357, "grad_norm": 0.49896273016929626, "learning_rate": 9.862071311384821e-06, "loss": 0.4582, "step": 6109 }, { "epoch": 0.280390987104768, "grad_norm": 0.4719243347644806, "learning_rate": 9.862014112863274e-06, "loss": 0.4441, "step": 6110 }, { "epoch": 0.2804368776100225, "grad_norm": 0.4493636190891266, "learning_rate": 9.861956902650116e-06, "loss": 0.3212, "step": 6111 }, { "epoch": 0.28048276811527695, "grad_norm": 0.46457400918006897, "learning_rate": 9.861899680745487e-06, "loss": 0.4405, "step": 6112 }, { "epoch": 0.2805286586205314, "grad_norm": 0.4559255838394165, "learning_rate": 9.861842447149524e-06, "loss": 0.3922, "step": 6113 }, { "epoch": 0.2805745491257859, "grad_norm": 0.5552449822425842, "learning_rate": 9.861785201862363e-06, "loss": 0.3847, "step": 6114 }, { "epoch": 0.28062043963104033, "grad_norm": 0.5308847427368164, "learning_rate": 9.861727944884143e-06, "loss": 0.4579, "step": 6115 }, { "epoch": 0.28066633013629483, "grad_norm": 0.42697668075561523, "learning_rate": 9.861670676215003e-06, "loss": 0.3741, "step": 6116 }, { "epoch": 0.28071222064154927, "grad_norm": 0.47981858253479004, "learning_rate": 9.861613395855079e-06, "loss": 0.4689, "step": 6117 }, { "epoch": 0.2807581111468037, "grad_norm": 0.4542839825153351, "learning_rate": 9.861556103804508e-06, "loss": 0.3981, "step": 6118 }, { "epoch": 0.2808040016520582, "grad_norm": 0.5141986608505249, "learning_rate": 9.861498800063429e-06, "loss": 0.513, "step": 6119 }, { "epoch": 0.28084989215731265, "grad_norm": 0.45149800181388855, "learning_rate": 9.86144148463198e-06, "loss": 0.3897, "step": 6120 }, { "epoch": 0.2808957826625671, "grad_norm": 0.4892216920852661, "learning_rate": 9.8613841575103e-06, "loss": 0.454, "step": 6121 }, { "epoch": 0.2809416731678216, "grad_norm": 0.4294017553329468, "learning_rate": 9.861326818698524e-06, "loss": 0.3142, "step": 6122 }, { "epoch": 0.28098756367307604, "grad_norm": 0.4647076725959778, "learning_rate": 9.861269468196791e-06, "loss": 0.4483, "step": 6123 }, { "epoch": 0.2810334541783305, "grad_norm": 0.5382829904556274, "learning_rate": 9.86121210600524e-06, "loss": 0.474, "step": 6124 }, { "epoch": 0.281079344683585, "grad_norm": 0.46035826206207275, "learning_rate": 9.861154732124008e-06, "loss": 0.4208, "step": 6125 }, { "epoch": 0.2811252351888394, "grad_norm": 0.47033190727233887, "learning_rate": 9.861097346553232e-06, "loss": 0.3954, "step": 6126 }, { "epoch": 0.2811711256940939, "grad_norm": 0.44361451268196106, "learning_rate": 9.861039949293052e-06, "loss": 0.3444, "step": 6127 }, { "epoch": 0.28121701619934836, "grad_norm": 0.4546622633934021, "learning_rate": 9.860982540343606e-06, "loss": 0.3728, "step": 6128 }, { "epoch": 0.2812629067046028, "grad_norm": 0.4699499309062958, "learning_rate": 9.86092511970503e-06, "loss": 0.4348, "step": 6129 }, { "epoch": 0.2813087972098573, "grad_norm": 0.4492476284503937, "learning_rate": 9.860867687377464e-06, "loss": 0.398, "step": 6130 }, { "epoch": 0.28135468771511174, "grad_norm": 0.48728132247924805, "learning_rate": 9.860810243361046e-06, "loss": 0.4207, "step": 6131 }, { "epoch": 0.2814005782203662, "grad_norm": 0.47781768441200256, "learning_rate": 9.860752787655911e-06, "loss": 0.4069, "step": 6132 }, { "epoch": 0.2814464687256207, "grad_norm": 0.4726647436618805, "learning_rate": 9.860695320262202e-06, "loss": 0.4453, "step": 6133 }, { "epoch": 0.2814923592308751, "grad_norm": 0.5571734309196472, "learning_rate": 9.860637841180054e-06, "loss": 0.5334, "step": 6134 }, { "epoch": 0.2815382497361296, "grad_norm": 0.7431037425994873, "learning_rate": 9.860580350409606e-06, "loss": 0.3719, "step": 6135 }, { "epoch": 0.28158414024138406, "grad_norm": 0.46557629108428955, "learning_rate": 9.860522847950995e-06, "loss": 0.4142, "step": 6136 }, { "epoch": 0.2816300307466385, "grad_norm": 0.43953147530555725, "learning_rate": 9.860465333804364e-06, "loss": 0.3497, "step": 6137 }, { "epoch": 0.281675921251893, "grad_norm": 0.4810963273048401, "learning_rate": 9.860407807969845e-06, "loss": 0.4325, "step": 6138 }, { "epoch": 0.28172181175714744, "grad_norm": 0.44177404046058655, "learning_rate": 9.860350270447579e-06, "loss": 0.3463, "step": 6139 }, { "epoch": 0.2817677022624019, "grad_norm": 0.4617029130458832, "learning_rate": 9.860292721237705e-06, "loss": 0.402, "step": 6140 }, { "epoch": 0.2818135927676564, "grad_norm": 0.40834078192710876, "learning_rate": 9.86023516034036e-06, "loss": 0.3023, "step": 6141 }, { "epoch": 0.2818594832729108, "grad_norm": 0.5194640755653381, "learning_rate": 9.860177587755686e-06, "loss": 0.4701, "step": 6142 }, { "epoch": 0.2819053737781653, "grad_norm": 0.4561716318130493, "learning_rate": 9.860120003483818e-06, "loss": 0.3613, "step": 6143 }, { "epoch": 0.28195126428341977, "grad_norm": 0.46242067217826843, "learning_rate": 9.860062407524895e-06, "loss": 0.3776, "step": 6144 }, { "epoch": 0.2819971547886742, "grad_norm": 0.481309711933136, "learning_rate": 9.860004799879056e-06, "loss": 0.4168, "step": 6145 }, { "epoch": 0.2820430452939287, "grad_norm": 0.488971471786499, "learning_rate": 9.859947180546439e-06, "loss": 0.3861, "step": 6146 }, { "epoch": 0.28208893579918315, "grad_norm": 0.4441189467906952, "learning_rate": 9.859889549527183e-06, "loss": 0.3681, "step": 6147 }, { "epoch": 0.2821348263044376, "grad_norm": 0.49475425481796265, "learning_rate": 9.859831906821427e-06, "loss": 0.4366, "step": 6148 }, { "epoch": 0.2821807168096921, "grad_norm": 0.4932027757167816, "learning_rate": 9.859774252429309e-06, "loss": 0.5011, "step": 6149 }, { "epoch": 0.28222660731494653, "grad_norm": 0.43192771077156067, "learning_rate": 9.859716586350965e-06, "loss": 0.3367, "step": 6150 }, { "epoch": 0.282272497820201, "grad_norm": 0.5084330439567566, "learning_rate": 9.85965890858654e-06, "loss": 0.4483, "step": 6151 }, { "epoch": 0.28231838832545547, "grad_norm": 0.472550630569458, "learning_rate": 9.859601219136167e-06, "loss": 0.4775, "step": 6152 }, { "epoch": 0.2823642788307099, "grad_norm": 0.465170681476593, "learning_rate": 9.859543517999987e-06, "loss": 0.4077, "step": 6153 }, { "epoch": 0.2824101693359644, "grad_norm": 0.45738843083381653, "learning_rate": 9.859485805178139e-06, "loss": 0.4112, "step": 6154 }, { "epoch": 0.28245605984121885, "grad_norm": 0.4472235143184662, "learning_rate": 9.859428080670761e-06, "loss": 0.3617, "step": 6155 }, { "epoch": 0.2825019503464733, "grad_norm": 0.42663973569869995, "learning_rate": 9.859370344477993e-06, "loss": 0.3578, "step": 6156 }, { "epoch": 0.2825478408517278, "grad_norm": 0.44951340556144714, "learning_rate": 9.859312596599972e-06, "loss": 0.3795, "step": 6157 }, { "epoch": 0.28259373135698224, "grad_norm": 0.4800887405872345, "learning_rate": 9.859254837036838e-06, "loss": 0.4393, "step": 6158 }, { "epoch": 0.2826396218622367, "grad_norm": 0.4766150116920471, "learning_rate": 9.859197065788731e-06, "loss": 0.434, "step": 6159 }, { "epoch": 0.2826855123674912, "grad_norm": 0.44755032658576965, "learning_rate": 9.859139282855787e-06, "loss": 0.3704, "step": 6160 }, { "epoch": 0.2827314028727456, "grad_norm": 0.48971590399742126, "learning_rate": 9.859081488238147e-06, "loss": 0.4697, "step": 6161 }, { "epoch": 0.2827772933780001, "grad_norm": 0.4429587423801422, "learning_rate": 9.859023681935951e-06, "loss": 0.39, "step": 6162 }, { "epoch": 0.28282318388325456, "grad_norm": 0.46537846326828003, "learning_rate": 9.858965863949335e-06, "loss": 0.4017, "step": 6163 }, { "epoch": 0.282869074388509, "grad_norm": 0.4563058912754059, "learning_rate": 9.85890803427844e-06, "loss": 0.3872, "step": 6164 }, { "epoch": 0.2829149648937635, "grad_norm": 0.47252127528190613, "learning_rate": 9.858850192923405e-06, "loss": 0.4008, "step": 6165 }, { "epoch": 0.28296085539901794, "grad_norm": 0.4733385741710663, "learning_rate": 9.858792339884368e-06, "loss": 0.4248, "step": 6166 }, { "epoch": 0.2830067459042724, "grad_norm": 0.48564577102661133, "learning_rate": 9.85873447516147e-06, "loss": 0.4165, "step": 6167 }, { "epoch": 0.2830526364095269, "grad_norm": 0.4254152774810791, "learning_rate": 9.858676598754848e-06, "loss": 0.3245, "step": 6168 }, { "epoch": 0.2830985269147813, "grad_norm": 0.4674239456653595, "learning_rate": 9.858618710664644e-06, "loss": 0.4076, "step": 6169 }, { "epoch": 0.2831444174200358, "grad_norm": 0.4380985498428345, "learning_rate": 9.858560810890993e-06, "loss": 0.3581, "step": 6170 }, { "epoch": 0.28319030792529026, "grad_norm": 0.45615044236183167, "learning_rate": 9.858502899434038e-06, "loss": 0.3923, "step": 6171 }, { "epoch": 0.2832361984305447, "grad_norm": 0.5834977030754089, "learning_rate": 9.858444976293918e-06, "loss": 0.2977, "step": 6172 }, { "epoch": 0.2832820889357992, "grad_norm": 0.4499538540840149, "learning_rate": 9.85838704147077e-06, "loss": 0.32, "step": 6173 }, { "epoch": 0.28332797944105365, "grad_norm": 0.4485560357570648, "learning_rate": 9.858329094964734e-06, "loss": 0.3897, "step": 6174 }, { "epoch": 0.2833738699463081, "grad_norm": 0.491111159324646, "learning_rate": 9.85827113677595e-06, "loss": 0.4323, "step": 6175 }, { "epoch": 0.2834197604515626, "grad_norm": 0.44551801681518555, "learning_rate": 9.858213166904558e-06, "loss": 0.3833, "step": 6176 }, { "epoch": 0.28346565095681703, "grad_norm": 0.44683682918548584, "learning_rate": 9.858155185350696e-06, "loss": 0.3739, "step": 6177 }, { "epoch": 0.28351154146207147, "grad_norm": 0.46934232115745544, "learning_rate": 9.858097192114504e-06, "loss": 0.3867, "step": 6178 }, { "epoch": 0.28355743196732597, "grad_norm": 0.4553755223751068, "learning_rate": 9.858039187196122e-06, "loss": 0.3992, "step": 6179 }, { "epoch": 0.2836033224725804, "grad_norm": 0.45895448327064514, "learning_rate": 9.857981170595689e-06, "loss": 0.431, "step": 6180 }, { "epoch": 0.2836492129778349, "grad_norm": 0.4690587520599365, "learning_rate": 9.857923142313343e-06, "loss": 0.4161, "step": 6181 }, { "epoch": 0.28369510348308935, "grad_norm": 0.493902325630188, "learning_rate": 9.857865102349227e-06, "loss": 0.464, "step": 6182 }, { "epoch": 0.2837409939883438, "grad_norm": 0.47692567110061646, "learning_rate": 9.857807050703478e-06, "loss": 0.378, "step": 6183 }, { "epoch": 0.2837868844935983, "grad_norm": 0.47036850452423096, "learning_rate": 9.857748987376237e-06, "loss": 0.3987, "step": 6184 }, { "epoch": 0.28383277499885273, "grad_norm": 0.5170320868492126, "learning_rate": 9.85769091236764e-06, "loss": 0.4314, "step": 6185 }, { "epoch": 0.2838786655041072, "grad_norm": 0.5118705630302429, "learning_rate": 9.857632825677832e-06, "loss": 0.4361, "step": 6186 }, { "epoch": 0.2839245560093617, "grad_norm": 0.46199196577072144, "learning_rate": 9.85757472730695e-06, "loss": 0.3972, "step": 6187 }, { "epoch": 0.2839704465146161, "grad_norm": 0.43804553151130676, "learning_rate": 9.857516617255134e-06, "loss": 0.3553, "step": 6188 }, { "epoch": 0.2840163370198706, "grad_norm": 0.48519113659858704, "learning_rate": 9.857458495522523e-06, "loss": 0.454, "step": 6189 }, { "epoch": 0.28406222752512506, "grad_norm": 0.4630095958709717, "learning_rate": 9.857400362109258e-06, "loss": 0.4249, "step": 6190 }, { "epoch": 0.2841081180303795, "grad_norm": 0.45949694514274597, "learning_rate": 9.857342217015477e-06, "loss": 0.3727, "step": 6191 }, { "epoch": 0.284154008535634, "grad_norm": 0.438872367143631, "learning_rate": 9.857284060241321e-06, "loss": 0.3853, "step": 6192 }, { "epoch": 0.28419989904088844, "grad_norm": 0.4650721251964569, "learning_rate": 9.857225891786932e-06, "loss": 0.3775, "step": 6193 }, { "epoch": 0.2842457895461429, "grad_norm": 0.4481979310512543, "learning_rate": 9.857167711652445e-06, "loss": 0.3502, "step": 6194 }, { "epoch": 0.2842916800513974, "grad_norm": 0.4544903635978699, "learning_rate": 9.857109519838005e-06, "loss": 0.4039, "step": 6195 }, { "epoch": 0.2843375705566518, "grad_norm": 0.531489372253418, "learning_rate": 9.857051316343748e-06, "loss": 0.4356, "step": 6196 }, { "epoch": 0.2843834610619063, "grad_norm": 0.482708215713501, "learning_rate": 9.856993101169817e-06, "loss": 0.4393, "step": 6197 }, { "epoch": 0.28442935156716076, "grad_norm": 0.44754308462142944, "learning_rate": 9.856934874316348e-06, "loss": 0.3202, "step": 6198 }, { "epoch": 0.2844752420724152, "grad_norm": 0.5014321804046631, "learning_rate": 9.856876635783484e-06, "loss": 0.435, "step": 6199 }, { "epoch": 0.2845211325776697, "grad_norm": 0.43194690346717834, "learning_rate": 9.856818385571366e-06, "loss": 0.3564, "step": 6200 }, { "epoch": 0.28456702308292414, "grad_norm": 0.5064954161643982, "learning_rate": 9.856760123680133e-06, "loss": 0.4168, "step": 6201 }, { "epoch": 0.2846129135881786, "grad_norm": 0.4354548752307892, "learning_rate": 9.856701850109923e-06, "loss": 0.3646, "step": 6202 }, { "epoch": 0.2846588040934331, "grad_norm": 0.5071757435798645, "learning_rate": 9.85664356486088e-06, "loss": 0.4773, "step": 6203 }, { "epoch": 0.2847046945986875, "grad_norm": 0.5185554623603821, "learning_rate": 9.85658526793314e-06, "loss": 0.4839, "step": 6204 }, { "epoch": 0.28475058510394197, "grad_norm": 0.4655497372150421, "learning_rate": 9.856526959326845e-06, "loss": 0.4514, "step": 6205 }, { "epoch": 0.28479647560919646, "grad_norm": 0.4587070941925049, "learning_rate": 9.856468639042137e-06, "loss": 0.3853, "step": 6206 }, { "epoch": 0.2848423661144509, "grad_norm": 0.46329328417778015, "learning_rate": 9.856410307079154e-06, "loss": 0.4049, "step": 6207 }, { "epoch": 0.2848882566197054, "grad_norm": 0.4677334129810333, "learning_rate": 9.856351963438038e-06, "loss": 0.416, "step": 6208 }, { "epoch": 0.28493414712495985, "grad_norm": 0.4836731553077698, "learning_rate": 9.856293608118926e-06, "loss": 0.4301, "step": 6209 }, { "epoch": 0.2849800376302143, "grad_norm": 0.48330870270729065, "learning_rate": 9.856235241121962e-06, "loss": 0.4022, "step": 6210 }, { "epoch": 0.2850259281354688, "grad_norm": 0.46648284792900085, "learning_rate": 9.856176862447285e-06, "loss": 0.4227, "step": 6211 }, { "epoch": 0.28507181864072323, "grad_norm": 0.47135964035987854, "learning_rate": 9.856118472095036e-06, "loss": 0.4559, "step": 6212 }, { "epoch": 0.28511770914597767, "grad_norm": 0.43633779883384705, "learning_rate": 9.856060070065352e-06, "loss": 0.4009, "step": 6213 }, { "epoch": 0.28516359965123217, "grad_norm": 0.47200459241867065, "learning_rate": 9.856001656358378e-06, "loss": 0.4159, "step": 6214 }, { "epoch": 0.2852094901564866, "grad_norm": 0.466999888420105, "learning_rate": 9.855943230974252e-06, "loss": 0.3784, "step": 6215 }, { "epoch": 0.2852553806617411, "grad_norm": 0.4383961260318756, "learning_rate": 9.855884793913117e-06, "loss": 0.3757, "step": 6216 }, { "epoch": 0.28530127116699555, "grad_norm": 0.4501575827598572, "learning_rate": 9.85582634517511e-06, "loss": 0.3763, "step": 6217 }, { "epoch": 0.28534716167225, "grad_norm": 0.48830845952033997, "learning_rate": 9.855767884760374e-06, "loss": 0.4774, "step": 6218 }, { "epoch": 0.2853930521775045, "grad_norm": 0.4896560609340668, "learning_rate": 9.855709412669047e-06, "loss": 0.4351, "step": 6219 }, { "epoch": 0.28543894268275893, "grad_norm": 0.4627843499183655, "learning_rate": 9.855650928901273e-06, "loss": 0.394, "step": 6220 }, { "epoch": 0.2854848331880134, "grad_norm": 0.4609028697013855, "learning_rate": 9.85559243345719e-06, "loss": 0.3892, "step": 6221 }, { "epoch": 0.2855307236932679, "grad_norm": 0.4463323950767517, "learning_rate": 9.85553392633694e-06, "loss": 0.3555, "step": 6222 }, { "epoch": 0.2855766141985223, "grad_norm": 0.46091026067733765, "learning_rate": 9.855475407540664e-06, "loss": 0.3662, "step": 6223 }, { "epoch": 0.2856225047037768, "grad_norm": 0.5015755295753479, "learning_rate": 9.855416877068503e-06, "loss": 0.5323, "step": 6224 }, { "epoch": 0.28566839520903126, "grad_norm": 0.47849828004837036, "learning_rate": 9.855358334920596e-06, "loss": 0.419, "step": 6225 }, { "epoch": 0.2857142857142857, "grad_norm": 0.472225159406662, "learning_rate": 9.855299781097083e-06, "loss": 0.428, "step": 6226 }, { "epoch": 0.2857601762195402, "grad_norm": 0.482185423374176, "learning_rate": 9.855241215598107e-06, "loss": 0.482, "step": 6227 }, { "epoch": 0.28580606672479464, "grad_norm": 0.530707836151123, "learning_rate": 9.855182638423809e-06, "loss": 0.5243, "step": 6228 }, { "epoch": 0.2858519572300491, "grad_norm": 0.4378359317779541, "learning_rate": 9.855124049574328e-06, "loss": 0.3606, "step": 6229 }, { "epoch": 0.2858978477353036, "grad_norm": 0.43571460247039795, "learning_rate": 9.855065449049808e-06, "loss": 0.3494, "step": 6230 }, { "epoch": 0.285943738240558, "grad_norm": 0.4354548454284668, "learning_rate": 9.855006836850386e-06, "loss": 0.3529, "step": 6231 }, { "epoch": 0.2859896287458125, "grad_norm": 0.4659758508205414, "learning_rate": 9.854948212976205e-06, "loss": 0.4743, "step": 6232 }, { "epoch": 0.28603551925106696, "grad_norm": 0.4602943956851959, "learning_rate": 9.854889577427406e-06, "loss": 0.387, "step": 6233 }, { "epoch": 0.2860814097563214, "grad_norm": 0.4563579261302948, "learning_rate": 9.854830930204132e-06, "loss": 0.4165, "step": 6234 }, { "epoch": 0.2861273002615759, "grad_norm": 0.4972166419029236, "learning_rate": 9.85477227130652e-06, "loss": 0.4951, "step": 6235 }, { "epoch": 0.28617319076683034, "grad_norm": 0.4556387960910797, "learning_rate": 9.85471360073471e-06, "loss": 0.3638, "step": 6236 }, { "epoch": 0.2862190812720848, "grad_norm": 0.477286696434021, "learning_rate": 9.854654918488852e-06, "loss": 0.4429, "step": 6237 }, { "epoch": 0.2862649717773393, "grad_norm": 0.45831719040870667, "learning_rate": 9.854596224569076e-06, "loss": 0.3959, "step": 6238 }, { "epoch": 0.2863108622825937, "grad_norm": 0.4485675096511841, "learning_rate": 9.854537518975531e-06, "loss": 0.3725, "step": 6239 }, { "epoch": 0.28635675278784817, "grad_norm": 0.5275249481201172, "learning_rate": 9.854478801708353e-06, "loss": 0.4309, "step": 6240 }, { "epoch": 0.28640264329310267, "grad_norm": 0.46288010478019714, "learning_rate": 9.854420072767689e-06, "loss": 0.3662, "step": 6241 }, { "epoch": 0.2864485337983571, "grad_norm": 0.4517335295677185, "learning_rate": 9.854361332153675e-06, "loss": 0.3587, "step": 6242 }, { "epoch": 0.2864944243036116, "grad_norm": 0.45703309774398804, "learning_rate": 9.854302579866454e-06, "loss": 0.3633, "step": 6243 }, { "epoch": 0.28654031480886605, "grad_norm": 0.5704601407051086, "learning_rate": 9.854243815906168e-06, "loss": 0.5165, "step": 6244 }, { "epoch": 0.2865862053141205, "grad_norm": 0.4818931519985199, "learning_rate": 9.854185040272956e-06, "loss": 0.3881, "step": 6245 }, { "epoch": 0.286632095819375, "grad_norm": 0.45618441700935364, "learning_rate": 9.854126252966963e-06, "loss": 0.4299, "step": 6246 }, { "epoch": 0.28667798632462943, "grad_norm": 0.46862849593162537, "learning_rate": 9.854067453988327e-06, "loss": 0.3777, "step": 6247 }, { "epoch": 0.2867238768298839, "grad_norm": 0.49320071935653687, "learning_rate": 9.854008643337191e-06, "loss": 0.4403, "step": 6248 }, { "epoch": 0.28676976733513837, "grad_norm": 0.5146374106407166, "learning_rate": 9.853949821013697e-06, "loss": 0.4296, "step": 6249 }, { "epoch": 0.2868156578403928, "grad_norm": 0.493019163608551, "learning_rate": 9.853890987017984e-06, "loss": 0.4529, "step": 6250 }, { "epoch": 0.2868615483456473, "grad_norm": 0.4376666247844696, "learning_rate": 9.853832141350196e-06, "loss": 0.3588, "step": 6251 }, { "epoch": 0.28690743885090175, "grad_norm": 0.5039815306663513, "learning_rate": 9.853773284010476e-06, "loss": 0.5084, "step": 6252 }, { "epoch": 0.2869533293561562, "grad_norm": 0.43810781836509705, "learning_rate": 9.85371441499896e-06, "loss": 0.381, "step": 6253 }, { "epoch": 0.2869992198614107, "grad_norm": 0.48453664779663086, "learning_rate": 9.853655534315793e-06, "loss": 0.4326, "step": 6254 }, { "epoch": 0.28704511036666513, "grad_norm": 0.47260886430740356, "learning_rate": 9.853596641961118e-06, "loss": 0.4076, "step": 6255 }, { "epoch": 0.2870910008719196, "grad_norm": 0.4749000370502472, "learning_rate": 9.853537737935072e-06, "loss": 0.3791, "step": 6256 }, { "epoch": 0.2871368913771741, "grad_norm": 0.48497769236564636, "learning_rate": 9.853478822237804e-06, "loss": 0.3888, "step": 6257 }, { "epoch": 0.2871827818824285, "grad_norm": 0.4510013461112976, "learning_rate": 9.853419894869446e-06, "loss": 0.3574, "step": 6258 }, { "epoch": 0.287228672387683, "grad_norm": 0.4828437268733978, "learning_rate": 9.853360955830149e-06, "loss": 0.4116, "step": 6259 }, { "epoch": 0.28727456289293746, "grad_norm": 0.483196884393692, "learning_rate": 9.85330200512005e-06, "loss": 0.4448, "step": 6260 }, { "epoch": 0.2873204533981919, "grad_norm": 0.4670977294445038, "learning_rate": 9.853243042739289e-06, "loss": 0.4238, "step": 6261 }, { "epoch": 0.2873663439034464, "grad_norm": 0.5728627443313599, "learning_rate": 9.853184068688013e-06, "loss": 0.5693, "step": 6262 }, { "epoch": 0.28741223440870084, "grad_norm": 0.4972585141658783, "learning_rate": 9.85312508296636e-06, "loss": 0.5224, "step": 6263 }, { "epoch": 0.2874581249139553, "grad_norm": 0.46810635924339294, "learning_rate": 9.853066085574472e-06, "loss": 0.4353, "step": 6264 }, { "epoch": 0.2875040154192098, "grad_norm": 0.4771280884742737, "learning_rate": 9.853007076512492e-06, "loss": 0.4605, "step": 6265 }, { "epoch": 0.2875499059244642, "grad_norm": 0.47825321555137634, "learning_rate": 9.852948055780562e-06, "loss": 0.413, "step": 6266 }, { "epoch": 0.28759579642971866, "grad_norm": 0.45670366287231445, "learning_rate": 9.852889023378824e-06, "loss": 0.359, "step": 6267 }, { "epoch": 0.28764168693497316, "grad_norm": 0.4528508484363556, "learning_rate": 9.85282997930742e-06, "loss": 0.3876, "step": 6268 }, { "epoch": 0.2876875774402276, "grad_norm": 0.4421367347240448, "learning_rate": 9.85277092356649e-06, "loss": 0.3386, "step": 6269 }, { "epoch": 0.2877334679454821, "grad_norm": 0.5153143405914307, "learning_rate": 9.852711856156178e-06, "loss": 0.5255, "step": 6270 }, { "epoch": 0.28777935845073654, "grad_norm": 0.473999559879303, "learning_rate": 9.852652777076627e-06, "loss": 0.4572, "step": 6271 }, { "epoch": 0.287825248955991, "grad_norm": 0.5037572383880615, "learning_rate": 9.852593686327975e-06, "loss": 0.4231, "step": 6272 }, { "epoch": 0.2878711394612455, "grad_norm": 0.4410327076911926, "learning_rate": 9.852534583910369e-06, "loss": 0.28, "step": 6273 }, { "epoch": 0.2879170299664999, "grad_norm": 0.44536107778549194, "learning_rate": 9.852475469823947e-06, "loss": 0.3573, "step": 6274 }, { "epoch": 0.28796292047175437, "grad_norm": 0.5043711066246033, "learning_rate": 9.852416344068855e-06, "loss": 0.4452, "step": 6275 }, { "epoch": 0.28800881097700887, "grad_norm": 0.4719536006450653, "learning_rate": 9.852357206645232e-06, "loss": 0.4123, "step": 6276 }, { "epoch": 0.2880547014822633, "grad_norm": 0.4324806034564972, "learning_rate": 9.852298057553222e-06, "loss": 0.3518, "step": 6277 }, { "epoch": 0.2881005919875178, "grad_norm": 0.4597470462322235, "learning_rate": 9.852238896792964e-06, "loss": 0.3938, "step": 6278 }, { "epoch": 0.28814648249277225, "grad_norm": 0.490261971950531, "learning_rate": 9.852179724364607e-06, "loss": 0.454, "step": 6279 }, { "epoch": 0.2881923729980267, "grad_norm": 0.43131354451179504, "learning_rate": 9.852120540268286e-06, "loss": 0.3061, "step": 6280 }, { "epoch": 0.2882382635032812, "grad_norm": 0.4604109525680542, "learning_rate": 9.852061344504148e-06, "loss": 0.4241, "step": 6281 }, { "epoch": 0.28828415400853563, "grad_norm": 0.4556732773780823, "learning_rate": 9.852002137072334e-06, "loss": 0.3855, "step": 6282 }, { "epoch": 0.2883300445137901, "grad_norm": 0.4630882143974304, "learning_rate": 9.851942917972986e-06, "loss": 0.3837, "step": 6283 }, { "epoch": 0.28837593501904457, "grad_norm": 0.45042598247528076, "learning_rate": 9.851883687206244e-06, "loss": 0.3589, "step": 6284 }, { "epoch": 0.288421825524299, "grad_norm": 0.47822022438049316, "learning_rate": 9.851824444772256e-06, "loss": 0.3997, "step": 6285 }, { "epoch": 0.2884677160295535, "grad_norm": 0.47928518056869507, "learning_rate": 9.85176519067116e-06, "loss": 0.4617, "step": 6286 }, { "epoch": 0.28851360653480795, "grad_norm": 0.4958309233188629, "learning_rate": 9.851705924903101e-06, "loss": 0.4961, "step": 6287 }, { "epoch": 0.2885594970400624, "grad_norm": 0.4729976952075958, "learning_rate": 9.85164664746822e-06, "loss": 0.4363, "step": 6288 }, { "epoch": 0.2886053875453169, "grad_norm": 0.4796024560928345, "learning_rate": 9.851587358366662e-06, "loss": 0.4429, "step": 6289 }, { "epoch": 0.28865127805057134, "grad_norm": 0.4620402753353119, "learning_rate": 9.851528057598564e-06, "loss": 0.3999, "step": 6290 }, { "epoch": 0.2886971685558258, "grad_norm": 0.4856235384941101, "learning_rate": 9.851468745164074e-06, "loss": 0.397, "step": 6291 }, { "epoch": 0.2887430590610803, "grad_norm": 0.4951448440551758, "learning_rate": 9.851409421063334e-06, "loss": 0.455, "step": 6292 }, { "epoch": 0.2887889495663347, "grad_norm": 0.5033533573150635, "learning_rate": 9.851350085296484e-06, "loss": 0.4282, "step": 6293 }, { "epoch": 0.28883484007158916, "grad_norm": 0.45773935317993164, "learning_rate": 9.851290737863668e-06, "loss": 0.4021, "step": 6294 }, { "epoch": 0.28888073057684366, "grad_norm": 0.4243152141571045, "learning_rate": 9.851231378765029e-06, "loss": 0.3325, "step": 6295 }, { "epoch": 0.2889266210820981, "grad_norm": 0.4444849193096161, "learning_rate": 9.85117200800071e-06, "loss": 0.3756, "step": 6296 }, { "epoch": 0.2889725115873526, "grad_norm": 0.48350393772125244, "learning_rate": 9.851112625570853e-06, "loss": 0.4251, "step": 6297 }, { "epoch": 0.28901840209260704, "grad_norm": 0.46487635374069214, "learning_rate": 9.851053231475603e-06, "loss": 0.3936, "step": 6298 }, { "epoch": 0.2890642925978615, "grad_norm": 0.47869253158569336, "learning_rate": 9.8509938257151e-06, "loss": 0.4332, "step": 6299 }, { "epoch": 0.289110183103116, "grad_norm": 0.45086222887039185, "learning_rate": 9.850934408289488e-06, "loss": 0.3187, "step": 6300 }, { "epoch": 0.2891560736083704, "grad_norm": 0.5071315169334412, "learning_rate": 9.850874979198908e-06, "loss": 0.4334, "step": 6301 }, { "epoch": 0.28920196411362487, "grad_norm": 0.4443237781524658, "learning_rate": 9.850815538443505e-06, "loss": 0.3971, "step": 6302 }, { "epoch": 0.28924785461887936, "grad_norm": 0.47912678122520447, "learning_rate": 9.850756086023423e-06, "loss": 0.4374, "step": 6303 }, { "epoch": 0.2892937451241338, "grad_norm": 0.4409722685813904, "learning_rate": 9.850696621938803e-06, "loss": 0.3711, "step": 6304 }, { "epoch": 0.2893396356293883, "grad_norm": 0.5260995626449585, "learning_rate": 9.850637146189789e-06, "loss": 0.4472, "step": 6305 }, { "epoch": 0.28938552613464275, "grad_norm": 0.45590466260910034, "learning_rate": 9.850577658776523e-06, "loss": 0.4021, "step": 6306 }, { "epoch": 0.2894314166398972, "grad_norm": 0.4203667640686035, "learning_rate": 9.850518159699149e-06, "loss": 0.3079, "step": 6307 }, { "epoch": 0.2894773071451517, "grad_norm": 0.4631643295288086, "learning_rate": 9.85045864895781e-06, "loss": 0.4046, "step": 6308 }, { "epoch": 0.28952319765040613, "grad_norm": 0.46179717779159546, "learning_rate": 9.850399126552649e-06, "loss": 0.3801, "step": 6309 }, { "epoch": 0.28956908815566057, "grad_norm": 0.4564787745475769, "learning_rate": 9.850339592483807e-06, "loss": 0.3699, "step": 6310 }, { "epoch": 0.28961497866091507, "grad_norm": 0.44304466247558594, "learning_rate": 9.850280046751431e-06, "loss": 0.3717, "step": 6311 }, { "epoch": 0.2896608691661695, "grad_norm": 0.45484358072280884, "learning_rate": 9.85022048935566e-06, "loss": 0.4025, "step": 6312 }, { "epoch": 0.289706759671424, "grad_norm": 0.5319861173629761, "learning_rate": 9.85016092029664e-06, "loss": 0.5365, "step": 6313 }, { "epoch": 0.28975265017667845, "grad_norm": 0.4413909614086151, "learning_rate": 9.850101339574516e-06, "loss": 0.3917, "step": 6314 }, { "epoch": 0.2897985406819329, "grad_norm": 0.43781182169914246, "learning_rate": 9.850041747189428e-06, "loss": 0.3902, "step": 6315 }, { "epoch": 0.2898444311871874, "grad_norm": 0.4508521854877472, "learning_rate": 9.84998214314152e-06, "loss": 0.4106, "step": 6316 }, { "epoch": 0.28989032169244183, "grad_norm": 0.4941001832485199, "learning_rate": 9.849922527430937e-06, "loss": 0.5126, "step": 6317 }, { "epoch": 0.2899362121976963, "grad_norm": 0.41639837622642517, "learning_rate": 9.849862900057818e-06, "loss": 0.3229, "step": 6318 }, { "epoch": 0.2899821027029508, "grad_norm": 0.49247151613235474, "learning_rate": 9.849803261022313e-06, "loss": 0.4942, "step": 6319 }, { "epoch": 0.2900279932082052, "grad_norm": 1.1304585933685303, "learning_rate": 9.849743610324558e-06, "loss": 0.4759, "step": 6320 }, { "epoch": 0.2900738837134597, "grad_norm": 0.4426749050617218, "learning_rate": 9.849683947964704e-06, "loss": 0.3618, "step": 6321 }, { "epoch": 0.29011977421871415, "grad_norm": 0.46332091093063354, "learning_rate": 9.849624273942888e-06, "loss": 0.3773, "step": 6322 }, { "epoch": 0.2901656647239686, "grad_norm": 0.4430350661277771, "learning_rate": 9.849564588259257e-06, "loss": 0.4225, "step": 6323 }, { "epoch": 0.2902115552292231, "grad_norm": 0.44035014510154724, "learning_rate": 9.849504890913955e-06, "loss": 0.4053, "step": 6324 }, { "epoch": 0.29025744573447754, "grad_norm": 0.50107741355896, "learning_rate": 9.849445181907123e-06, "loss": 0.5099, "step": 6325 }, { "epoch": 0.290303336239732, "grad_norm": 0.5155110955238342, "learning_rate": 9.849385461238905e-06, "loss": 0.4322, "step": 6326 }, { "epoch": 0.2903492267449865, "grad_norm": 0.46884647011756897, "learning_rate": 9.849325728909447e-06, "loss": 0.3444, "step": 6327 }, { "epoch": 0.2903951172502409, "grad_norm": 0.5182893872261047, "learning_rate": 9.84926598491889e-06, "loss": 0.5378, "step": 6328 }, { "epoch": 0.29044100775549536, "grad_norm": 0.48908722400665283, "learning_rate": 9.849206229267379e-06, "loss": 0.4469, "step": 6329 }, { "epoch": 0.29048689826074986, "grad_norm": 0.4591538906097412, "learning_rate": 9.849146461955059e-06, "loss": 0.4338, "step": 6330 }, { "epoch": 0.2905327887660043, "grad_norm": 0.38945627212524414, "learning_rate": 9.84908668298207e-06, "loss": 0.3045, "step": 6331 }, { "epoch": 0.2905786792712588, "grad_norm": 0.49531134963035583, "learning_rate": 9.849026892348559e-06, "loss": 0.5334, "step": 6332 }, { "epoch": 0.29062456977651324, "grad_norm": 0.4673563539981842, "learning_rate": 9.848967090054669e-06, "loss": 0.3642, "step": 6333 }, { "epoch": 0.2906704602817677, "grad_norm": 0.46669042110443115, "learning_rate": 9.848907276100544e-06, "loss": 0.3777, "step": 6334 }, { "epoch": 0.2907163507870222, "grad_norm": 0.5021378993988037, "learning_rate": 9.848847450486324e-06, "loss": 0.4991, "step": 6335 }, { "epoch": 0.2907622412922766, "grad_norm": 0.4477701187133789, "learning_rate": 9.848787613212161e-06, "loss": 0.3844, "step": 6336 }, { "epoch": 0.29080813179753107, "grad_norm": 0.7059426307678223, "learning_rate": 9.848727764278192e-06, "loss": 0.4781, "step": 6337 }, { "epoch": 0.29085402230278556, "grad_norm": 0.46379926800727844, "learning_rate": 9.848667903684563e-06, "loss": 0.3711, "step": 6338 }, { "epoch": 0.29089991280804, "grad_norm": 0.5228615999221802, "learning_rate": 9.848608031431417e-06, "loss": 0.4622, "step": 6339 }, { "epoch": 0.2909458033132945, "grad_norm": 0.5413428544998169, "learning_rate": 9.8485481475189e-06, "loss": 0.5449, "step": 6340 }, { "epoch": 0.29099169381854895, "grad_norm": 0.47650569677352905, "learning_rate": 9.848488251947154e-06, "loss": 0.4415, "step": 6341 }, { "epoch": 0.2910375843238034, "grad_norm": 0.491497665643692, "learning_rate": 9.848428344716325e-06, "loss": 0.4288, "step": 6342 }, { "epoch": 0.2910834748290579, "grad_norm": 0.4416244924068451, "learning_rate": 9.848368425826557e-06, "loss": 0.339, "step": 6343 }, { "epoch": 0.29112936533431233, "grad_norm": 0.496927946805954, "learning_rate": 9.848308495277991e-06, "loss": 0.4707, "step": 6344 }, { "epoch": 0.29117525583956677, "grad_norm": 0.47234657406806946, "learning_rate": 9.848248553070776e-06, "loss": 0.4215, "step": 6345 }, { "epoch": 0.29122114634482127, "grad_norm": 0.4572868049144745, "learning_rate": 9.848188599205051e-06, "loss": 0.367, "step": 6346 }, { "epoch": 0.2912670368500757, "grad_norm": 0.45265915989875793, "learning_rate": 9.848128633680962e-06, "loss": 0.4051, "step": 6347 }, { "epoch": 0.2913129273553302, "grad_norm": 0.4708366394042969, "learning_rate": 9.848068656498655e-06, "loss": 0.3957, "step": 6348 }, { "epoch": 0.29135881786058465, "grad_norm": 0.5093710422515869, "learning_rate": 9.848008667658274e-06, "loss": 0.4993, "step": 6349 }, { "epoch": 0.2914047083658391, "grad_norm": 0.4938952922821045, "learning_rate": 9.84794866715996e-06, "loss": 0.4665, "step": 6350 }, { "epoch": 0.2914505988710936, "grad_norm": 0.4694634675979614, "learning_rate": 9.847888655003861e-06, "loss": 0.4194, "step": 6351 }, { "epoch": 0.29149648937634803, "grad_norm": 0.4938065707683563, "learning_rate": 9.84782863119012e-06, "loss": 0.3888, "step": 6352 }, { "epoch": 0.2915423798816025, "grad_norm": 0.48033443093299866, "learning_rate": 9.84776859571888e-06, "loss": 0.385, "step": 6353 }, { "epoch": 0.291588270386857, "grad_norm": 0.4907785654067993, "learning_rate": 9.847708548590286e-06, "loss": 0.4851, "step": 6354 }, { "epoch": 0.2916341608921114, "grad_norm": 0.4656744599342346, "learning_rate": 9.847648489804484e-06, "loss": 0.4369, "step": 6355 }, { "epoch": 0.29168005139736586, "grad_norm": 0.4629974067211151, "learning_rate": 9.847588419361617e-06, "loss": 0.4166, "step": 6356 }, { "epoch": 0.29172594190262036, "grad_norm": 0.46845969557762146, "learning_rate": 9.847528337261831e-06, "loss": 0.4363, "step": 6357 }, { "epoch": 0.2917718324078748, "grad_norm": 0.42425650358200073, "learning_rate": 9.84746824350527e-06, "loss": 0.3541, "step": 6358 }, { "epoch": 0.2918177229131293, "grad_norm": 0.511037290096283, "learning_rate": 9.847408138092075e-06, "loss": 0.4267, "step": 6359 }, { "epoch": 0.29186361341838374, "grad_norm": 0.46509623527526855, "learning_rate": 9.847348021022395e-06, "loss": 0.3967, "step": 6360 }, { "epoch": 0.2919095039236382, "grad_norm": 0.4297862946987152, "learning_rate": 9.847287892296373e-06, "loss": 0.3803, "step": 6361 }, { "epoch": 0.2919553944288927, "grad_norm": 0.4749957323074341, "learning_rate": 9.847227751914153e-06, "loss": 0.4744, "step": 6362 }, { "epoch": 0.2920012849341471, "grad_norm": 0.4495031237602234, "learning_rate": 9.847167599875878e-06, "loss": 0.3885, "step": 6363 }, { "epoch": 0.29204717543940156, "grad_norm": 0.46682700514793396, "learning_rate": 9.847107436181698e-06, "loss": 0.4055, "step": 6364 }, { "epoch": 0.29209306594465606, "grad_norm": 0.49244606494903564, "learning_rate": 9.847047260831752e-06, "loss": 0.4439, "step": 6365 }, { "epoch": 0.2921389564499105, "grad_norm": 0.4242806136608124, "learning_rate": 9.846987073826189e-06, "loss": 0.3485, "step": 6366 }, { "epoch": 0.292184846955165, "grad_norm": 0.44537290930747986, "learning_rate": 9.846926875165151e-06, "loss": 0.3951, "step": 6367 }, { "epoch": 0.29223073746041944, "grad_norm": 0.5811315178871155, "learning_rate": 9.846866664848784e-06, "loss": 0.3869, "step": 6368 }, { "epoch": 0.2922766279656739, "grad_norm": 0.4557437002658844, "learning_rate": 9.846806442877231e-06, "loss": 0.3864, "step": 6369 }, { "epoch": 0.2923225184709284, "grad_norm": 0.4330872893333435, "learning_rate": 9.846746209250639e-06, "loss": 0.3092, "step": 6370 }, { "epoch": 0.2923684089761828, "grad_norm": 0.48558738827705383, "learning_rate": 9.846685963969153e-06, "loss": 0.4185, "step": 6371 }, { "epoch": 0.29241429948143727, "grad_norm": 0.46199607849121094, "learning_rate": 9.846625707032916e-06, "loss": 0.3748, "step": 6372 }, { "epoch": 0.29246018998669177, "grad_norm": 0.4925325810909271, "learning_rate": 9.846565438442074e-06, "loss": 0.4596, "step": 6373 }, { "epoch": 0.2925060804919462, "grad_norm": 0.44381558895111084, "learning_rate": 9.846505158196772e-06, "loss": 0.4229, "step": 6374 }, { "epoch": 0.2925519709972007, "grad_norm": 0.44838079810142517, "learning_rate": 9.846444866297155e-06, "loss": 0.3692, "step": 6375 }, { "epoch": 0.29259786150245515, "grad_norm": 0.42445579171180725, "learning_rate": 9.846384562743368e-06, "loss": 0.3145, "step": 6376 }, { "epoch": 0.2926437520077096, "grad_norm": 0.43952476978302, "learning_rate": 9.846324247535553e-06, "loss": 0.3756, "step": 6377 }, { "epoch": 0.2926896425129641, "grad_norm": 0.4262857139110565, "learning_rate": 9.84626392067386e-06, "loss": 0.2983, "step": 6378 }, { "epoch": 0.29273553301821853, "grad_norm": 0.41948673129081726, "learning_rate": 9.846203582158432e-06, "loss": 0.3118, "step": 6379 }, { "epoch": 0.292781423523473, "grad_norm": 0.4843999445438385, "learning_rate": 9.846143231989414e-06, "loss": 0.4254, "step": 6380 }, { "epoch": 0.29282731402872747, "grad_norm": 0.4298352003097534, "learning_rate": 9.846082870166948e-06, "loss": 0.3348, "step": 6381 }, { "epoch": 0.2928732045339819, "grad_norm": 0.48264268040657043, "learning_rate": 9.846022496691186e-06, "loss": 0.434, "step": 6382 }, { "epoch": 0.29291909503923635, "grad_norm": 0.4493555724620819, "learning_rate": 9.845962111562267e-06, "loss": 0.3571, "step": 6383 }, { "epoch": 0.29296498554449085, "grad_norm": 0.4615822434425354, "learning_rate": 9.84590171478034e-06, "loss": 0.4247, "step": 6384 }, { "epoch": 0.2930108760497453, "grad_norm": 0.4548717439174652, "learning_rate": 9.845841306345547e-06, "loss": 0.3291, "step": 6385 }, { "epoch": 0.2930567665549998, "grad_norm": 0.4489175081253052, "learning_rate": 9.845780886258037e-06, "loss": 0.3643, "step": 6386 }, { "epoch": 0.29310265706025423, "grad_norm": 0.49297118186950684, "learning_rate": 9.845720454517952e-06, "loss": 0.444, "step": 6387 }, { "epoch": 0.2931485475655087, "grad_norm": 0.43482524156570435, "learning_rate": 9.845660011125439e-06, "loss": 0.3757, "step": 6388 }, { "epoch": 0.2931944380707632, "grad_norm": 0.4555683434009552, "learning_rate": 9.845599556080643e-06, "loss": 0.3224, "step": 6389 }, { "epoch": 0.2932403285760176, "grad_norm": 0.4259699285030365, "learning_rate": 9.84553908938371e-06, "loss": 0.3416, "step": 6390 }, { "epoch": 0.29328621908127206, "grad_norm": 0.47314390540122986, "learning_rate": 9.845478611034785e-06, "loss": 0.3927, "step": 6391 }, { "epoch": 0.29333210958652656, "grad_norm": 0.4683067798614502, "learning_rate": 9.845418121034012e-06, "loss": 0.36, "step": 6392 }, { "epoch": 0.293378000091781, "grad_norm": 0.5327858328819275, "learning_rate": 9.845357619381538e-06, "loss": 0.5268, "step": 6393 }, { "epoch": 0.2934238905970355, "grad_norm": 0.4776647984981537, "learning_rate": 9.845297106077508e-06, "loss": 0.4274, "step": 6394 }, { "epoch": 0.29346978110228994, "grad_norm": 0.4570990204811096, "learning_rate": 9.845236581122068e-06, "loss": 0.3529, "step": 6395 }, { "epoch": 0.2935156716075444, "grad_norm": 0.46448954939842224, "learning_rate": 9.845176044515364e-06, "loss": 0.4181, "step": 6396 }, { "epoch": 0.2935615621127989, "grad_norm": 0.4757956266403198, "learning_rate": 9.84511549625754e-06, "loss": 0.4223, "step": 6397 }, { "epoch": 0.2936074526180533, "grad_norm": 0.48264947533607483, "learning_rate": 9.845054936348742e-06, "loss": 0.3668, "step": 6398 }, { "epoch": 0.29365334312330776, "grad_norm": 0.4112507402896881, "learning_rate": 9.844994364789116e-06, "loss": 0.3455, "step": 6399 }, { "epoch": 0.29369923362856226, "grad_norm": 0.462025910615921, "learning_rate": 9.84493378157881e-06, "loss": 0.4124, "step": 6400 }, { "epoch": 0.2937451241338167, "grad_norm": 0.5162631273269653, "learning_rate": 9.844873186717965e-06, "loss": 0.4927, "step": 6401 }, { "epoch": 0.2937910146390712, "grad_norm": 0.4539836049079895, "learning_rate": 9.844812580206729e-06, "loss": 0.3894, "step": 6402 }, { "epoch": 0.29383690514432564, "grad_norm": 0.4653432369232178, "learning_rate": 9.84475196204525e-06, "loss": 0.3955, "step": 6403 }, { "epoch": 0.2938827956495801, "grad_norm": 0.44719579815864563, "learning_rate": 9.844691332233669e-06, "loss": 0.3909, "step": 6404 }, { "epoch": 0.2939286861548346, "grad_norm": 0.45951777696609497, "learning_rate": 9.844630690772134e-06, "loss": 0.4455, "step": 6405 }, { "epoch": 0.293974576660089, "grad_norm": 0.4864802360534668, "learning_rate": 9.844570037660793e-06, "loss": 0.4325, "step": 6406 }, { "epoch": 0.29402046716534347, "grad_norm": 0.4538343548774719, "learning_rate": 9.84450937289979e-06, "loss": 0.3527, "step": 6407 }, { "epoch": 0.29406635767059797, "grad_norm": 0.4541611075401306, "learning_rate": 9.84444869648927e-06, "loss": 0.389, "step": 6408 }, { "epoch": 0.2941122481758524, "grad_norm": 0.4638700783252716, "learning_rate": 9.844388008429381e-06, "loss": 0.3964, "step": 6409 }, { "epoch": 0.2941581386811069, "grad_norm": 0.47902151942253113, "learning_rate": 9.844327308720267e-06, "loss": 0.4073, "step": 6410 }, { "epoch": 0.29420402918636135, "grad_norm": 0.47087058424949646, "learning_rate": 9.844266597362075e-06, "loss": 0.4433, "step": 6411 }, { "epoch": 0.2942499196916158, "grad_norm": 0.4772723913192749, "learning_rate": 9.84420587435495e-06, "loss": 0.4385, "step": 6412 }, { "epoch": 0.2942958101968703, "grad_norm": 0.4500417113304138, "learning_rate": 9.84414513969904e-06, "loss": 0.3983, "step": 6413 }, { "epoch": 0.29434170070212473, "grad_norm": 0.4624321460723877, "learning_rate": 9.844084393394489e-06, "loss": 0.3858, "step": 6414 }, { "epoch": 0.2943875912073792, "grad_norm": 0.479070246219635, "learning_rate": 9.844023635441444e-06, "loss": 0.3847, "step": 6415 }, { "epoch": 0.29443348171263367, "grad_norm": 0.46939918398857117, "learning_rate": 9.84396286584005e-06, "loss": 0.4602, "step": 6416 }, { "epoch": 0.2944793722178881, "grad_norm": 0.44934970140457153, "learning_rate": 9.843902084590456e-06, "loss": 0.3572, "step": 6417 }, { "epoch": 0.29452526272314256, "grad_norm": 0.45539265871047974, "learning_rate": 9.843841291692807e-06, "loss": 0.4148, "step": 6418 }, { "epoch": 0.29457115322839705, "grad_norm": 0.4768684208393097, "learning_rate": 9.843780487147245e-06, "loss": 0.4083, "step": 6419 }, { "epoch": 0.2946170437336515, "grad_norm": 0.45186012983322144, "learning_rate": 9.843719670953922e-06, "loss": 0.4194, "step": 6420 }, { "epoch": 0.294662934238906, "grad_norm": 0.4309633672237396, "learning_rate": 9.843658843112983e-06, "loss": 0.3339, "step": 6421 }, { "epoch": 0.29470882474416044, "grad_norm": 0.45534074306488037, "learning_rate": 9.84359800362457e-06, "loss": 0.3801, "step": 6422 }, { "epoch": 0.2947547152494149, "grad_norm": 0.5023715496063232, "learning_rate": 9.843537152488834e-06, "loss": 0.4925, "step": 6423 }, { "epoch": 0.2948006057546694, "grad_norm": 0.4333205223083496, "learning_rate": 9.84347628970592e-06, "loss": 0.3989, "step": 6424 }, { "epoch": 0.2948464962599238, "grad_norm": 0.49474775791168213, "learning_rate": 9.843415415275974e-06, "loss": 0.4363, "step": 6425 }, { "epoch": 0.29489238676517826, "grad_norm": 0.4935716688632965, "learning_rate": 9.843354529199144e-06, "loss": 0.4546, "step": 6426 }, { "epoch": 0.29493827727043276, "grad_norm": 0.45932480692863464, "learning_rate": 9.843293631475571e-06, "loss": 0.4189, "step": 6427 }, { "epoch": 0.2949841677756872, "grad_norm": 0.4477168321609497, "learning_rate": 9.84323272210541e-06, "loss": 0.3724, "step": 6428 }, { "epoch": 0.2950300582809417, "grad_norm": 0.4423210918903351, "learning_rate": 9.8431718010888e-06, "loss": 0.3546, "step": 6429 }, { "epoch": 0.29507594878619614, "grad_norm": 0.4845663607120514, "learning_rate": 9.84311086842589e-06, "loss": 0.4647, "step": 6430 }, { "epoch": 0.2951218392914506, "grad_norm": 0.4656390845775604, "learning_rate": 9.84304992411683e-06, "loss": 0.3645, "step": 6431 }, { "epoch": 0.2951677297967051, "grad_norm": 0.4559401273727417, "learning_rate": 9.842988968161762e-06, "loss": 0.3815, "step": 6432 }, { "epoch": 0.2952136203019595, "grad_norm": 0.507779061794281, "learning_rate": 9.842928000560833e-06, "loss": 0.4388, "step": 6433 }, { "epoch": 0.29525951080721397, "grad_norm": 0.4545881748199463, "learning_rate": 9.842867021314192e-06, "loss": 0.3696, "step": 6434 }, { "epoch": 0.29530540131246846, "grad_norm": 0.49496033787727356, "learning_rate": 9.842806030421983e-06, "loss": 0.44, "step": 6435 }, { "epoch": 0.2953512918177229, "grad_norm": 0.4435413181781769, "learning_rate": 9.842745027884353e-06, "loss": 0.3548, "step": 6436 }, { "epoch": 0.2953971823229774, "grad_norm": 0.45298701524734497, "learning_rate": 9.842684013701452e-06, "loss": 0.3755, "step": 6437 }, { "epoch": 0.29544307282823185, "grad_norm": 0.5006486773490906, "learning_rate": 9.842622987873423e-06, "loss": 0.5108, "step": 6438 }, { "epoch": 0.2954889633334863, "grad_norm": 0.41208890080451965, "learning_rate": 9.842561950400414e-06, "loss": 0.3358, "step": 6439 }, { "epoch": 0.2955348538387408, "grad_norm": 0.4643336832523346, "learning_rate": 9.842500901282573e-06, "loss": 0.4702, "step": 6440 }, { "epoch": 0.2955807443439952, "grad_norm": 0.5686439871788025, "learning_rate": 9.842439840520046e-06, "loss": 0.5074, "step": 6441 }, { "epoch": 0.29562663484924967, "grad_norm": 0.497657835483551, "learning_rate": 9.842378768112977e-06, "loss": 0.4649, "step": 6442 }, { "epoch": 0.29567252535450417, "grad_norm": 0.4624097943305969, "learning_rate": 9.842317684061518e-06, "loss": 0.3705, "step": 6443 }, { "epoch": 0.2957184158597586, "grad_norm": 0.5089067220687866, "learning_rate": 9.842256588365811e-06, "loss": 0.4939, "step": 6444 }, { "epoch": 0.29576430636501305, "grad_norm": 0.45058178901672363, "learning_rate": 9.842195481026006e-06, "loss": 0.3532, "step": 6445 }, { "epoch": 0.29581019687026755, "grad_norm": 0.5498747229576111, "learning_rate": 9.842134362042251e-06, "loss": 0.4785, "step": 6446 }, { "epoch": 0.295856087375522, "grad_norm": 0.46191641688346863, "learning_rate": 9.842073231414688e-06, "loss": 0.4063, "step": 6447 }, { "epoch": 0.2959019778807765, "grad_norm": 0.5215046405792236, "learning_rate": 9.84201208914347e-06, "loss": 0.4213, "step": 6448 }, { "epoch": 0.29594786838603093, "grad_norm": 0.44170433282852173, "learning_rate": 9.841950935228738e-06, "loss": 0.3179, "step": 6449 }, { "epoch": 0.2959937588912854, "grad_norm": 0.4883936047554016, "learning_rate": 9.841889769670644e-06, "loss": 0.4437, "step": 6450 }, { "epoch": 0.2960396493965399, "grad_norm": 0.5236921310424805, "learning_rate": 9.841828592469334e-06, "loss": 0.4653, "step": 6451 }, { "epoch": 0.2960855399017943, "grad_norm": 0.482817143201828, "learning_rate": 9.841767403624953e-06, "loss": 0.3997, "step": 6452 }, { "epoch": 0.29613143040704876, "grad_norm": 0.44491875171661377, "learning_rate": 9.84170620313765e-06, "loss": 0.361, "step": 6453 }, { "epoch": 0.29617732091230325, "grad_norm": 0.5162143111228943, "learning_rate": 9.84164499100757e-06, "loss": 0.4497, "step": 6454 }, { "epoch": 0.2962232114175577, "grad_norm": 0.4418305456638336, "learning_rate": 9.841583767234865e-06, "loss": 0.3561, "step": 6455 }, { "epoch": 0.2962691019228122, "grad_norm": 0.5333799719810486, "learning_rate": 9.841522531819677e-06, "loss": 0.4374, "step": 6456 }, { "epoch": 0.29631499242806664, "grad_norm": 0.5086747407913208, "learning_rate": 9.841461284762155e-06, "loss": 0.4472, "step": 6457 }, { "epoch": 0.2963608829333211, "grad_norm": 0.46974003314971924, "learning_rate": 9.841400026062449e-06, "loss": 0.4156, "step": 6458 }, { "epoch": 0.2964067734385756, "grad_norm": 0.5830226540565491, "learning_rate": 9.841338755720702e-06, "loss": 0.5177, "step": 6459 }, { "epoch": 0.29645266394383, "grad_norm": 0.45398828387260437, "learning_rate": 9.841277473737063e-06, "loss": 0.4129, "step": 6460 }, { "epoch": 0.29649855444908446, "grad_norm": 0.5102665424346924, "learning_rate": 9.841216180111682e-06, "loss": 0.5214, "step": 6461 }, { "epoch": 0.29654444495433896, "grad_norm": 0.4631696343421936, "learning_rate": 9.841154874844702e-06, "loss": 0.3102, "step": 6462 }, { "epoch": 0.2965903354595934, "grad_norm": 0.4801292419433594, "learning_rate": 9.841093557936273e-06, "loss": 0.3703, "step": 6463 }, { "epoch": 0.2966362259648479, "grad_norm": 0.45674824714660645, "learning_rate": 9.84103222938654e-06, "loss": 0.4139, "step": 6464 }, { "epoch": 0.29668211647010234, "grad_norm": 0.47155240178108215, "learning_rate": 9.840970889195656e-06, "loss": 0.419, "step": 6465 }, { "epoch": 0.2967280069753568, "grad_norm": 0.5416005253791809, "learning_rate": 9.840909537363762e-06, "loss": 0.5438, "step": 6466 }, { "epoch": 0.2967738974806113, "grad_norm": 0.5135039687156677, "learning_rate": 9.84084817389101e-06, "loss": 0.5158, "step": 6467 }, { "epoch": 0.2968197879858657, "grad_norm": 0.452438622713089, "learning_rate": 9.840786798777544e-06, "loss": 0.3799, "step": 6468 }, { "epoch": 0.29686567849112017, "grad_norm": 0.4335687756538391, "learning_rate": 9.840725412023514e-06, "loss": 0.3331, "step": 6469 }, { "epoch": 0.29691156899637466, "grad_norm": 0.4449588358402252, "learning_rate": 9.840664013629069e-06, "loss": 0.3797, "step": 6470 }, { "epoch": 0.2969574595016291, "grad_norm": 0.4790562391281128, "learning_rate": 9.840602603594354e-06, "loss": 0.399, "step": 6471 }, { "epoch": 0.29700335000688355, "grad_norm": 0.4763764441013336, "learning_rate": 9.840541181919518e-06, "loss": 0.3955, "step": 6472 }, { "epoch": 0.29704924051213805, "grad_norm": 0.4998711347579956, "learning_rate": 9.840479748604706e-06, "loss": 0.4513, "step": 6473 }, { "epoch": 0.2970951310173925, "grad_norm": 0.45951715111732483, "learning_rate": 9.84041830365007e-06, "loss": 0.413, "step": 6474 }, { "epoch": 0.297141021522647, "grad_norm": 0.4189939498901367, "learning_rate": 9.840356847055754e-06, "loss": 0.3119, "step": 6475 }, { "epoch": 0.29718691202790143, "grad_norm": 0.4655221998691559, "learning_rate": 9.840295378821909e-06, "loss": 0.3727, "step": 6476 }, { "epoch": 0.29723280253315587, "grad_norm": 0.439113587141037, "learning_rate": 9.84023389894868e-06, "loss": 0.3901, "step": 6477 }, { "epoch": 0.29727869303841037, "grad_norm": 0.49309042096138, "learning_rate": 9.840172407436217e-06, "loss": 0.4295, "step": 6478 }, { "epoch": 0.2973245835436648, "grad_norm": 0.5152106881141663, "learning_rate": 9.840110904284668e-06, "loss": 0.5105, "step": 6479 }, { "epoch": 0.29737047404891925, "grad_norm": 0.49067193269729614, "learning_rate": 9.840049389494177e-06, "loss": 0.3915, "step": 6480 }, { "epoch": 0.29741636455417375, "grad_norm": 0.5083408355712891, "learning_rate": 9.839987863064897e-06, "loss": 0.5256, "step": 6481 }, { "epoch": 0.2974622550594282, "grad_norm": 0.4649994969367981, "learning_rate": 9.839926324996974e-06, "loss": 0.4401, "step": 6482 }, { "epoch": 0.2975081455646827, "grad_norm": 0.49472588300704956, "learning_rate": 9.839864775290555e-06, "loss": 0.5231, "step": 6483 }, { "epoch": 0.29755403606993713, "grad_norm": 0.5070353746414185, "learning_rate": 9.839803213945788e-06, "loss": 0.5542, "step": 6484 }, { "epoch": 0.2975999265751916, "grad_norm": 0.48984256386756897, "learning_rate": 9.839741640962822e-06, "loss": 0.4659, "step": 6485 }, { "epoch": 0.2976458170804461, "grad_norm": 0.4643564522266388, "learning_rate": 9.839680056341805e-06, "loss": 0.4231, "step": 6486 }, { "epoch": 0.2976917075857005, "grad_norm": 0.4469192922115326, "learning_rate": 9.839618460082885e-06, "loss": 0.3666, "step": 6487 }, { "epoch": 0.29773759809095496, "grad_norm": 0.41152966022491455, "learning_rate": 9.839556852186209e-06, "loss": 0.3313, "step": 6488 }, { "epoch": 0.29778348859620946, "grad_norm": 0.4438681900501251, "learning_rate": 9.839495232651928e-06, "loss": 0.3721, "step": 6489 }, { "epoch": 0.2978293791014639, "grad_norm": 0.4528467655181885, "learning_rate": 9.839433601480188e-06, "loss": 0.4153, "step": 6490 }, { "epoch": 0.2978752696067184, "grad_norm": 0.45829904079437256, "learning_rate": 9.839371958671138e-06, "loss": 0.4018, "step": 6491 }, { "epoch": 0.29792116011197284, "grad_norm": 0.48633503913879395, "learning_rate": 9.839310304224925e-06, "loss": 0.4758, "step": 6492 }, { "epoch": 0.2979670506172273, "grad_norm": 0.47346365451812744, "learning_rate": 9.839248638141697e-06, "loss": 0.4716, "step": 6493 }, { "epoch": 0.2980129411224818, "grad_norm": 0.4712088108062744, "learning_rate": 9.839186960421605e-06, "loss": 0.4176, "step": 6494 }, { "epoch": 0.2980588316277362, "grad_norm": 0.46418920159339905, "learning_rate": 9.839125271064794e-06, "loss": 0.3809, "step": 6495 }, { "epoch": 0.29810472213299066, "grad_norm": 0.5189914703369141, "learning_rate": 9.839063570071416e-06, "loss": 0.513, "step": 6496 }, { "epoch": 0.29815061263824516, "grad_norm": 0.4563591480255127, "learning_rate": 9.839001857441616e-06, "loss": 0.3524, "step": 6497 }, { "epoch": 0.2981965031434996, "grad_norm": 0.4932597577571869, "learning_rate": 9.838940133175544e-06, "loss": 0.4692, "step": 6498 }, { "epoch": 0.2982423936487541, "grad_norm": 0.49317753314971924, "learning_rate": 9.838878397273348e-06, "loss": 0.4512, "step": 6499 }, { "epoch": 0.29828828415400854, "grad_norm": 0.4801498055458069, "learning_rate": 9.838816649735177e-06, "loss": 0.4833, "step": 6500 }, { "epoch": 0.298334174659263, "grad_norm": 0.4859009385108948, "learning_rate": 9.83875489056118e-06, "loss": 0.415, "step": 6501 }, { "epoch": 0.2983800651645175, "grad_norm": 0.4693339765071869, "learning_rate": 9.838693119751504e-06, "loss": 0.3941, "step": 6502 }, { "epoch": 0.2984259556697719, "grad_norm": 0.4729110300540924, "learning_rate": 9.838631337306296e-06, "loss": 0.4608, "step": 6503 }, { "epoch": 0.29847184617502637, "grad_norm": 0.4753352105617523, "learning_rate": 9.838569543225708e-06, "loss": 0.4244, "step": 6504 }, { "epoch": 0.29851773668028087, "grad_norm": 0.436207115650177, "learning_rate": 9.83850773750989e-06, "loss": 0.339, "step": 6505 }, { "epoch": 0.2985636271855353, "grad_norm": 0.4982065260410309, "learning_rate": 9.838445920158986e-06, "loss": 0.446, "step": 6506 }, { "epoch": 0.29860951769078975, "grad_norm": 0.4677481949329376, "learning_rate": 9.838384091173146e-06, "loss": 0.45, "step": 6507 }, { "epoch": 0.29865540819604425, "grad_norm": 0.46237245202064514, "learning_rate": 9.83832225055252e-06, "loss": 0.385, "step": 6508 }, { "epoch": 0.2987012987012987, "grad_norm": 0.43586406111717224, "learning_rate": 9.838260398297256e-06, "loss": 0.3582, "step": 6509 }, { "epoch": 0.2987471892065532, "grad_norm": 0.48615148663520813, "learning_rate": 9.838198534407503e-06, "loss": 0.4371, "step": 6510 }, { "epoch": 0.29879307971180763, "grad_norm": 0.45605766773223877, "learning_rate": 9.838136658883408e-06, "loss": 0.3935, "step": 6511 }, { "epoch": 0.29883897021706207, "grad_norm": 0.4805205762386322, "learning_rate": 9.838074771725124e-06, "loss": 0.4107, "step": 6512 }, { "epoch": 0.29888486072231657, "grad_norm": 0.46869051456451416, "learning_rate": 9.838012872932793e-06, "loss": 0.4279, "step": 6513 }, { "epoch": 0.298930751227571, "grad_norm": 0.4750407040119171, "learning_rate": 9.837950962506573e-06, "loss": 0.435, "step": 6514 }, { "epoch": 0.29897664173282545, "grad_norm": 0.4712446630001068, "learning_rate": 9.837889040446604e-06, "loss": 0.376, "step": 6515 }, { "epoch": 0.29902253223807995, "grad_norm": 0.48265543580055237, "learning_rate": 9.837827106753041e-06, "loss": 0.4212, "step": 6516 }, { "epoch": 0.2990684227433344, "grad_norm": 0.4499903619289398, "learning_rate": 9.83776516142603e-06, "loss": 0.4286, "step": 6517 }, { "epoch": 0.2991143132485889, "grad_norm": 0.46939411759376526, "learning_rate": 9.837703204465719e-06, "loss": 0.4197, "step": 6518 }, { "epoch": 0.29916020375384333, "grad_norm": 0.43522733449935913, "learning_rate": 9.837641235872261e-06, "loss": 0.3497, "step": 6519 }, { "epoch": 0.2992060942590978, "grad_norm": 0.46146097779273987, "learning_rate": 9.837579255645802e-06, "loss": 0.3878, "step": 6520 }, { "epoch": 0.2992519847643523, "grad_norm": 0.4577597975730896, "learning_rate": 9.837517263786492e-06, "loss": 0.3928, "step": 6521 }, { "epoch": 0.2992978752696067, "grad_norm": 0.45282700657844543, "learning_rate": 9.837455260294477e-06, "loss": 0.3904, "step": 6522 }, { "epoch": 0.29934376577486116, "grad_norm": 0.4805585443973541, "learning_rate": 9.837393245169911e-06, "loss": 0.4518, "step": 6523 }, { "epoch": 0.29938965628011566, "grad_norm": 0.46631982922554016, "learning_rate": 9.837331218412942e-06, "loss": 0.4321, "step": 6524 }, { "epoch": 0.2994355467853701, "grad_norm": 0.4699938893318176, "learning_rate": 9.837269180023717e-06, "loss": 0.3663, "step": 6525 }, { "epoch": 0.2994814372906246, "grad_norm": 0.4217190742492676, "learning_rate": 9.837207130002387e-06, "loss": 0.3209, "step": 6526 }, { "epoch": 0.29952732779587904, "grad_norm": 0.6820508241653442, "learning_rate": 9.837145068349099e-06, "loss": 0.56, "step": 6527 }, { "epoch": 0.2995732183011335, "grad_norm": 0.5178728699684143, "learning_rate": 9.837082995064005e-06, "loss": 0.4858, "step": 6528 }, { "epoch": 0.299619108806388, "grad_norm": 0.4568500518798828, "learning_rate": 9.837020910147251e-06, "loss": 0.3856, "step": 6529 }, { "epoch": 0.2996649993116424, "grad_norm": 0.4973050057888031, "learning_rate": 9.83695881359899e-06, "loss": 0.4751, "step": 6530 }, { "epoch": 0.29971088981689686, "grad_norm": 0.45793575048446655, "learning_rate": 9.83689670541937e-06, "loss": 0.4092, "step": 6531 }, { "epoch": 0.29975678032215136, "grad_norm": 0.4828096032142639, "learning_rate": 9.836834585608539e-06, "loss": 0.4164, "step": 6532 }, { "epoch": 0.2998026708274058, "grad_norm": 0.49118292331695557, "learning_rate": 9.83677245416665e-06, "loss": 0.5064, "step": 6533 }, { "epoch": 0.29984856133266025, "grad_norm": 0.4733990728855133, "learning_rate": 9.836710311093847e-06, "loss": 0.4062, "step": 6534 }, { "epoch": 0.29989445183791474, "grad_norm": 0.4406806528568268, "learning_rate": 9.836648156390283e-06, "loss": 0.3573, "step": 6535 }, { "epoch": 0.2999403423431692, "grad_norm": 0.43260520696640015, "learning_rate": 9.836585990056106e-06, "loss": 0.3823, "step": 6536 }, { "epoch": 0.2999862328484237, "grad_norm": 0.450859397649765, "learning_rate": 9.836523812091467e-06, "loss": 0.383, "step": 6537 }, { "epoch": 0.3000321233536781, "grad_norm": 0.4424304962158203, "learning_rate": 9.836461622496513e-06, "loss": 0.3363, "step": 6538 }, { "epoch": 0.30007801385893257, "grad_norm": 0.430082768201828, "learning_rate": 9.836399421271398e-06, "loss": 0.3244, "step": 6539 }, { "epoch": 0.30012390436418707, "grad_norm": 0.45263203978538513, "learning_rate": 9.836337208416267e-06, "loss": 0.3847, "step": 6540 }, { "epoch": 0.3001697948694415, "grad_norm": 0.43024125695228577, "learning_rate": 9.836274983931271e-06, "loss": 0.3556, "step": 6541 }, { "epoch": 0.30021568537469595, "grad_norm": 0.49892905354499817, "learning_rate": 9.836212747816561e-06, "loss": 0.4458, "step": 6542 }, { "epoch": 0.30026157587995045, "grad_norm": 0.46596211194992065, "learning_rate": 9.836150500072286e-06, "loss": 0.3829, "step": 6543 }, { "epoch": 0.3003074663852049, "grad_norm": 0.4795933961868286, "learning_rate": 9.836088240698596e-06, "loss": 0.425, "step": 6544 }, { "epoch": 0.3003533568904594, "grad_norm": 0.49768438935279846, "learning_rate": 9.836025969695638e-06, "loss": 0.4604, "step": 6545 }, { "epoch": 0.30039924739571383, "grad_norm": 0.4352327585220337, "learning_rate": 9.835963687063566e-06, "loss": 0.3119, "step": 6546 }, { "epoch": 0.3004451379009683, "grad_norm": 0.47003909945487976, "learning_rate": 9.835901392802527e-06, "loss": 0.3984, "step": 6547 }, { "epoch": 0.30049102840622277, "grad_norm": 0.46133220195770264, "learning_rate": 9.83583908691267e-06, "loss": 0.4355, "step": 6548 }, { "epoch": 0.3005369189114772, "grad_norm": 0.46906328201293945, "learning_rate": 9.835776769394147e-06, "loss": 0.4026, "step": 6549 }, { "epoch": 0.30058280941673166, "grad_norm": 0.4672048091888428, "learning_rate": 9.835714440247107e-06, "loss": 0.4234, "step": 6550 }, { "epoch": 0.30062869992198615, "grad_norm": 0.5263383984565735, "learning_rate": 9.8356520994717e-06, "loss": 0.5082, "step": 6551 }, { "epoch": 0.3006745904272406, "grad_norm": 0.47042831778526306, "learning_rate": 9.835589747068077e-06, "loss": 0.4367, "step": 6552 }, { "epoch": 0.3007204809324951, "grad_norm": 0.4628260135650635, "learning_rate": 9.835527383036384e-06, "loss": 0.4198, "step": 6553 }, { "epoch": 0.30076637143774954, "grad_norm": 0.46939021348953247, "learning_rate": 9.835465007376776e-06, "loss": 0.3787, "step": 6554 }, { "epoch": 0.300812261943004, "grad_norm": 0.49608689546585083, "learning_rate": 9.835402620089401e-06, "loss": 0.4822, "step": 6555 }, { "epoch": 0.3008581524482585, "grad_norm": 0.47997990250587463, "learning_rate": 9.835340221174409e-06, "loss": 0.4409, "step": 6556 }, { "epoch": 0.3009040429535129, "grad_norm": 0.4424269497394562, "learning_rate": 9.83527781063195e-06, "loss": 0.3396, "step": 6557 }, { "epoch": 0.30094993345876736, "grad_norm": 0.4835622012615204, "learning_rate": 9.835215388462172e-06, "loss": 0.4149, "step": 6558 }, { "epoch": 0.30099582396402186, "grad_norm": 0.44946855306625366, "learning_rate": 9.835152954665228e-06, "loss": 0.3803, "step": 6559 }, { "epoch": 0.3010417144692763, "grad_norm": 0.4765688180923462, "learning_rate": 9.835090509241267e-06, "loss": 0.4577, "step": 6560 }, { "epoch": 0.30108760497453074, "grad_norm": 0.4615131616592407, "learning_rate": 9.83502805219044e-06, "loss": 0.4384, "step": 6561 }, { "epoch": 0.30113349547978524, "grad_norm": 0.428415447473526, "learning_rate": 9.834965583512895e-06, "loss": 0.4073, "step": 6562 }, { "epoch": 0.3011793859850397, "grad_norm": 0.4402190148830414, "learning_rate": 9.834903103208786e-06, "loss": 0.376, "step": 6563 }, { "epoch": 0.3012252764902942, "grad_norm": 0.43763837218284607, "learning_rate": 9.834840611278259e-06, "loss": 0.3386, "step": 6564 }, { "epoch": 0.3012711669955486, "grad_norm": 0.49497467279434204, "learning_rate": 9.834778107721469e-06, "loss": 0.4624, "step": 6565 }, { "epoch": 0.30131705750080306, "grad_norm": 0.4978835880756378, "learning_rate": 9.83471559253856e-06, "loss": 0.4564, "step": 6566 }, { "epoch": 0.30136294800605756, "grad_norm": 0.43127644062042236, "learning_rate": 9.834653065729686e-06, "loss": 0.3442, "step": 6567 }, { "epoch": 0.301408838511312, "grad_norm": 0.4543454945087433, "learning_rate": 9.834590527295001e-06, "loss": 0.3691, "step": 6568 }, { "epoch": 0.30145472901656645, "grad_norm": 0.4683040678501129, "learning_rate": 9.834527977234648e-06, "loss": 0.458, "step": 6569 }, { "epoch": 0.30150061952182095, "grad_norm": 0.46243152022361755, "learning_rate": 9.834465415548782e-06, "loss": 0.342, "step": 6570 }, { "epoch": 0.3015465100270754, "grad_norm": 0.5035633444786072, "learning_rate": 9.834402842237554e-06, "loss": 0.3959, "step": 6571 }, { "epoch": 0.3015924005323299, "grad_norm": 0.48306867480278015, "learning_rate": 9.83434025730111e-06, "loss": 0.454, "step": 6572 }, { "epoch": 0.3016382910375843, "grad_norm": 0.5091403722763062, "learning_rate": 9.834277660739606e-06, "loss": 0.42, "step": 6573 }, { "epoch": 0.30168418154283877, "grad_norm": 0.5102655291557312, "learning_rate": 9.83421505255319e-06, "loss": 0.3819, "step": 6574 }, { "epoch": 0.30173007204809327, "grad_norm": 0.4413202106952667, "learning_rate": 9.83415243274201e-06, "loss": 0.3708, "step": 6575 }, { "epoch": 0.3017759625533477, "grad_norm": 0.5381954908370972, "learning_rate": 9.834089801306222e-06, "loss": 0.5789, "step": 6576 }, { "epoch": 0.30182185305860215, "grad_norm": 0.4343145489692688, "learning_rate": 9.834027158245973e-06, "loss": 0.3496, "step": 6577 }, { "epoch": 0.30186774356385665, "grad_norm": 0.4530371427536011, "learning_rate": 9.833964503561413e-06, "loss": 0.4166, "step": 6578 }, { "epoch": 0.3019136340691111, "grad_norm": 0.4598515033721924, "learning_rate": 9.833901837252695e-06, "loss": 0.3957, "step": 6579 }, { "epoch": 0.3019595245743656, "grad_norm": 0.44292208552360535, "learning_rate": 9.83383915931997e-06, "loss": 0.3571, "step": 6580 }, { "epoch": 0.30200541507962003, "grad_norm": 0.4871571958065033, "learning_rate": 9.833776469763385e-06, "loss": 0.3955, "step": 6581 }, { "epoch": 0.3020513055848745, "grad_norm": 0.44335129857063293, "learning_rate": 9.833713768583093e-06, "loss": 0.3983, "step": 6582 }, { "epoch": 0.30209719609012897, "grad_norm": 0.4381604492664337, "learning_rate": 9.833651055779247e-06, "loss": 0.3204, "step": 6583 }, { "epoch": 0.3021430865953834, "grad_norm": 0.46537187695503235, "learning_rate": 9.833588331351995e-06, "loss": 0.37, "step": 6584 }, { "epoch": 0.30218897710063786, "grad_norm": 0.4677359163761139, "learning_rate": 9.833525595301488e-06, "loss": 0.3702, "step": 6585 }, { "epoch": 0.30223486760589235, "grad_norm": 0.4967990219593048, "learning_rate": 9.833462847627877e-06, "loss": 0.4279, "step": 6586 }, { "epoch": 0.3022807581111468, "grad_norm": 0.4469231963157654, "learning_rate": 9.833400088331314e-06, "loss": 0.4083, "step": 6587 }, { "epoch": 0.30232664861640124, "grad_norm": 0.44363659620285034, "learning_rate": 9.83333731741195e-06, "loss": 0.3688, "step": 6588 }, { "epoch": 0.30237253912165574, "grad_norm": 0.4754067361354828, "learning_rate": 9.833274534869934e-06, "loss": 0.4374, "step": 6589 }, { "epoch": 0.3024184296269102, "grad_norm": 0.44701388478279114, "learning_rate": 9.83321174070542e-06, "loss": 0.3807, "step": 6590 }, { "epoch": 0.3024643201321647, "grad_norm": 0.4392775893211365, "learning_rate": 9.833148934918555e-06, "loss": 0.3418, "step": 6591 }, { "epoch": 0.3025102106374191, "grad_norm": 0.47023794054985046, "learning_rate": 9.833086117509493e-06, "loss": 0.3818, "step": 6592 }, { "epoch": 0.30255610114267356, "grad_norm": 0.46759796142578125, "learning_rate": 9.833023288478385e-06, "loss": 0.3714, "step": 6593 }, { "epoch": 0.30260199164792806, "grad_norm": 0.4507737159729004, "learning_rate": 9.832960447825382e-06, "loss": 0.4002, "step": 6594 }, { "epoch": 0.3026478821531825, "grad_norm": 0.47006985545158386, "learning_rate": 9.832897595550633e-06, "loss": 0.4088, "step": 6595 }, { "epoch": 0.30269377265843694, "grad_norm": 0.46786174178123474, "learning_rate": 9.83283473165429e-06, "loss": 0.4398, "step": 6596 }, { "epoch": 0.30273966316369144, "grad_norm": 0.4522979259490967, "learning_rate": 9.832771856136506e-06, "loss": 0.3766, "step": 6597 }, { "epoch": 0.3027855536689459, "grad_norm": 0.5230292677879333, "learning_rate": 9.832708968997432e-06, "loss": 0.4698, "step": 6598 }, { "epoch": 0.3028314441742004, "grad_norm": 0.44311752915382385, "learning_rate": 9.832646070237214e-06, "loss": 0.3761, "step": 6599 }, { "epoch": 0.3028773346794548, "grad_norm": 0.524550199508667, "learning_rate": 9.832583159856012e-06, "loss": 0.4986, "step": 6600 }, { "epoch": 0.30292322518470927, "grad_norm": 0.42996567487716675, "learning_rate": 9.832520237853971e-06, "loss": 0.3615, "step": 6601 }, { "epoch": 0.30296911568996376, "grad_norm": 0.8597694039344788, "learning_rate": 9.832457304231243e-06, "loss": 0.3857, "step": 6602 }, { "epoch": 0.3030150061952182, "grad_norm": 0.47751420736312866, "learning_rate": 9.832394358987982e-06, "loss": 0.4148, "step": 6603 }, { "epoch": 0.30306089670047265, "grad_norm": 0.47136661410331726, "learning_rate": 9.832331402124337e-06, "loss": 0.3816, "step": 6604 }, { "epoch": 0.30310678720572715, "grad_norm": 0.4670959413051605, "learning_rate": 9.83226843364046e-06, "loss": 0.3492, "step": 6605 }, { "epoch": 0.3031526777109816, "grad_norm": 0.42425355315208435, "learning_rate": 9.832205453536501e-06, "loss": 0.3333, "step": 6606 }, { "epoch": 0.3031985682162361, "grad_norm": 0.46095117926597595, "learning_rate": 9.832142461812615e-06, "loss": 0.3674, "step": 6607 }, { "epoch": 0.30324445872149053, "grad_norm": 0.4839651584625244, "learning_rate": 9.832079458468952e-06, "loss": 0.4645, "step": 6608 }, { "epoch": 0.30329034922674497, "grad_norm": 0.4896933138370514, "learning_rate": 9.83201644350566e-06, "loss": 0.4454, "step": 6609 }, { "epoch": 0.30333623973199947, "grad_norm": 0.44667983055114746, "learning_rate": 9.831953416922896e-06, "loss": 0.426, "step": 6610 }, { "epoch": 0.3033821302372539, "grad_norm": 0.49069464206695557, "learning_rate": 9.83189037872081e-06, "loss": 0.5117, "step": 6611 }, { "epoch": 0.30342802074250835, "grad_norm": 0.46525976061820984, "learning_rate": 9.83182732889955e-06, "loss": 0.402, "step": 6612 }, { "epoch": 0.30347391124776285, "grad_norm": 0.47777456045150757, "learning_rate": 9.831764267459272e-06, "loss": 0.4385, "step": 6613 }, { "epoch": 0.3035198017530173, "grad_norm": 0.4671141505241394, "learning_rate": 9.831701194400125e-06, "loss": 0.364, "step": 6614 }, { "epoch": 0.3035656922582718, "grad_norm": 0.4922887086868286, "learning_rate": 9.831638109722261e-06, "loss": 0.4119, "step": 6615 }, { "epoch": 0.30361158276352623, "grad_norm": 0.43810832500457764, "learning_rate": 9.831575013425834e-06, "loss": 0.3676, "step": 6616 }, { "epoch": 0.3036574732687807, "grad_norm": 0.48343196511268616, "learning_rate": 9.831511905510993e-06, "loss": 0.4167, "step": 6617 }, { "epoch": 0.3037033637740352, "grad_norm": 0.4384100139141083, "learning_rate": 9.831448785977891e-06, "loss": 0.3578, "step": 6618 }, { "epoch": 0.3037492542792896, "grad_norm": 0.4542798101902008, "learning_rate": 9.83138565482668e-06, "loss": 0.3735, "step": 6619 }, { "epoch": 0.30379514478454406, "grad_norm": 0.5416396260261536, "learning_rate": 9.831322512057509e-06, "loss": 0.5325, "step": 6620 }, { "epoch": 0.30384103528979856, "grad_norm": 0.45069217681884766, "learning_rate": 9.831259357670536e-06, "loss": 0.3483, "step": 6621 }, { "epoch": 0.303886925795053, "grad_norm": 0.4554518163204193, "learning_rate": 9.831196191665905e-06, "loss": 0.4149, "step": 6622 }, { "epoch": 0.30393281630030744, "grad_norm": 0.4834083616733551, "learning_rate": 9.831133014043774e-06, "loss": 0.3852, "step": 6623 }, { "epoch": 0.30397870680556194, "grad_norm": 0.5047228336334229, "learning_rate": 9.831069824804292e-06, "loss": 0.4666, "step": 6624 }, { "epoch": 0.3040245973108164, "grad_norm": 0.4621022641658783, "learning_rate": 9.831006623947613e-06, "loss": 0.3605, "step": 6625 }, { "epoch": 0.3040704878160709, "grad_norm": 0.46323683857917786, "learning_rate": 9.830943411473888e-06, "loss": 0.4444, "step": 6626 }, { "epoch": 0.3041163783213253, "grad_norm": 0.4874735176563263, "learning_rate": 9.830880187383268e-06, "loss": 0.5466, "step": 6627 }, { "epoch": 0.30416226882657976, "grad_norm": 0.4583863317966461, "learning_rate": 9.830816951675904e-06, "loss": 0.359, "step": 6628 }, { "epoch": 0.30420815933183426, "grad_norm": 0.45843398571014404, "learning_rate": 9.830753704351952e-06, "loss": 0.3759, "step": 6629 }, { "epoch": 0.3042540498370887, "grad_norm": 0.4736770689487457, "learning_rate": 9.830690445411562e-06, "loss": 0.3644, "step": 6630 }, { "epoch": 0.30429994034234314, "grad_norm": 0.46205586194992065, "learning_rate": 9.830627174854886e-06, "loss": 0.3539, "step": 6631 }, { "epoch": 0.30434583084759764, "grad_norm": 0.4209558367729187, "learning_rate": 9.830563892682076e-06, "loss": 0.3371, "step": 6632 }, { "epoch": 0.3043917213528521, "grad_norm": 0.5072474479675293, "learning_rate": 9.830500598893285e-06, "loss": 0.4838, "step": 6633 }, { "epoch": 0.3044376118581066, "grad_norm": 0.49206194281578064, "learning_rate": 9.830437293488662e-06, "loss": 0.4432, "step": 6634 }, { "epoch": 0.304483502363361, "grad_norm": 0.5380978584289551, "learning_rate": 9.830373976468365e-06, "loss": 0.4835, "step": 6635 }, { "epoch": 0.30452939286861547, "grad_norm": 0.4803902506828308, "learning_rate": 9.830310647832542e-06, "loss": 0.4243, "step": 6636 }, { "epoch": 0.30457528337386996, "grad_norm": 0.47719717025756836, "learning_rate": 9.830247307581346e-06, "loss": 0.397, "step": 6637 }, { "epoch": 0.3046211738791244, "grad_norm": 0.458240807056427, "learning_rate": 9.83018395571493e-06, "loss": 0.3738, "step": 6638 }, { "epoch": 0.30466706438437885, "grad_norm": 0.48805272579193115, "learning_rate": 9.830120592233445e-06, "loss": 0.3818, "step": 6639 }, { "epoch": 0.30471295488963335, "grad_norm": 0.4603116512298584, "learning_rate": 9.830057217137045e-06, "loss": 0.3588, "step": 6640 }, { "epoch": 0.3047588453948878, "grad_norm": 0.51239013671875, "learning_rate": 9.829993830425884e-06, "loss": 0.4586, "step": 6641 }, { "epoch": 0.3048047359001423, "grad_norm": 0.46400687098503113, "learning_rate": 9.82993043210011e-06, "loss": 0.3897, "step": 6642 }, { "epoch": 0.30485062640539673, "grad_norm": 0.5125425457954407, "learning_rate": 9.829867022159878e-06, "loss": 0.4143, "step": 6643 }, { "epoch": 0.30489651691065117, "grad_norm": 0.4582475423812866, "learning_rate": 9.82980360060534e-06, "loss": 0.4024, "step": 6644 }, { "epoch": 0.30494240741590567, "grad_norm": 0.4980606436729431, "learning_rate": 9.82974016743665e-06, "loss": 0.433, "step": 6645 }, { "epoch": 0.3049882979211601, "grad_norm": 0.478557825088501, "learning_rate": 9.829676722653958e-06, "loss": 0.456, "step": 6646 }, { "epoch": 0.30503418842641455, "grad_norm": 0.48435866832733154, "learning_rate": 9.829613266257418e-06, "loss": 0.4389, "step": 6647 }, { "epoch": 0.30508007893166905, "grad_norm": 0.46086686849594116, "learning_rate": 9.829549798247182e-06, "loss": 0.4359, "step": 6648 }, { "epoch": 0.3051259694369235, "grad_norm": 0.4501870572566986, "learning_rate": 9.829486318623403e-06, "loss": 0.3573, "step": 6649 }, { "epoch": 0.30517185994217794, "grad_norm": 0.4945095181465149, "learning_rate": 9.829422827386233e-06, "loss": 0.511, "step": 6650 }, { "epoch": 0.30521775044743243, "grad_norm": 0.5074198842048645, "learning_rate": 9.829359324535828e-06, "loss": 0.3607, "step": 6651 }, { "epoch": 0.3052636409526869, "grad_norm": 0.48221343755722046, "learning_rate": 9.829295810072335e-06, "loss": 0.413, "step": 6652 }, { "epoch": 0.3053095314579414, "grad_norm": 0.4983444809913635, "learning_rate": 9.82923228399591e-06, "loss": 0.4966, "step": 6653 }, { "epoch": 0.3053554219631958, "grad_norm": 0.49983564019203186, "learning_rate": 9.829168746306708e-06, "loss": 0.5255, "step": 6654 }, { "epoch": 0.30540131246845026, "grad_norm": 0.43073901534080505, "learning_rate": 9.829105197004879e-06, "loss": 0.3646, "step": 6655 }, { "epoch": 0.30544720297370476, "grad_norm": 0.4418764114379883, "learning_rate": 9.829041636090573e-06, "loss": 0.3333, "step": 6656 }, { "epoch": 0.3054930934789592, "grad_norm": 0.4820026159286499, "learning_rate": 9.828978063563948e-06, "loss": 0.3755, "step": 6657 }, { "epoch": 0.30553898398421364, "grad_norm": 0.5043435096740723, "learning_rate": 9.828914479425156e-06, "loss": 0.4748, "step": 6658 }, { "epoch": 0.30558487448946814, "grad_norm": 0.4486979842185974, "learning_rate": 9.828850883674346e-06, "loss": 0.4015, "step": 6659 }, { "epoch": 0.3056307649947226, "grad_norm": 0.5034217238426208, "learning_rate": 9.828787276311676e-06, "loss": 0.4817, "step": 6660 }, { "epoch": 0.3056766554999771, "grad_norm": 0.45763203501701355, "learning_rate": 9.828723657337295e-06, "loss": 0.3262, "step": 6661 }, { "epoch": 0.3057225460052315, "grad_norm": 0.5401304364204407, "learning_rate": 9.828660026751357e-06, "loss": 0.4488, "step": 6662 }, { "epoch": 0.30576843651048596, "grad_norm": 0.4717482924461365, "learning_rate": 9.828596384554019e-06, "loss": 0.4169, "step": 6663 }, { "epoch": 0.30581432701574046, "grad_norm": 0.4565146267414093, "learning_rate": 9.828532730745429e-06, "loss": 0.417, "step": 6664 }, { "epoch": 0.3058602175209949, "grad_norm": 0.4420337975025177, "learning_rate": 9.82846906532574e-06, "loss": 0.3572, "step": 6665 }, { "epoch": 0.30590610802624935, "grad_norm": 0.4823428988456726, "learning_rate": 9.828405388295108e-06, "loss": 0.4379, "step": 6666 }, { "epoch": 0.30595199853150384, "grad_norm": 0.4770444631576538, "learning_rate": 9.828341699653684e-06, "loss": 0.4272, "step": 6667 }, { "epoch": 0.3059978890367583, "grad_norm": 0.4345322251319885, "learning_rate": 9.828277999401624e-06, "loss": 0.3315, "step": 6668 }, { "epoch": 0.3060437795420128, "grad_norm": 0.47918233275413513, "learning_rate": 9.828214287539078e-06, "loss": 0.4397, "step": 6669 }, { "epoch": 0.3060896700472672, "grad_norm": 0.4709562063217163, "learning_rate": 9.8281505640662e-06, "loss": 0.4066, "step": 6670 }, { "epoch": 0.30613556055252167, "grad_norm": 0.4455477297306061, "learning_rate": 9.828086828983146e-06, "loss": 0.3195, "step": 6671 }, { "epoch": 0.30618145105777617, "grad_norm": 0.4749097526073456, "learning_rate": 9.828023082290065e-06, "loss": 0.4371, "step": 6672 }, { "epoch": 0.3062273415630306, "grad_norm": 0.4528983533382416, "learning_rate": 9.827959323987113e-06, "loss": 0.3659, "step": 6673 }, { "epoch": 0.30627323206828505, "grad_norm": 0.4834461808204651, "learning_rate": 9.827895554074443e-06, "loss": 0.4083, "step": 6674 }, { "epoch": 0.30631912257353955, "grad_norm": 0.44200068712234497, "learning_rate": 9.827831772552206e-06, "loss": 0.3884, "step": 6675 }, { "epoch": 0.306365013078794, "grad_norm": 0.48811590671539307, "learning_rate": 9.827767979420559e-06, "loss": 0.443, "step": 6676 }, { "epoch": 0.30641090358404843, "grad_norm": 0.4491792917251587, "learning_rate": 9.827704174679653e-06, "loss": 0.3705, "step": 6677 }, { "epoch": 0.30645679408930293, "grad_norm": 0.47117140889167786, "learning_rate": 9.827640358329641e-06, "loss": 0.4032, "step": 6678 }, { "epoch": 0.3065026845945574, "grad_norm": 0.4772627055644989, "learning_rate": 9.82757653037068e-06, "loss": 0.4043, "step": 6679 }, { "epoch": 0.30654857509981187, "grad_norm": 0.5154597163200378, "learning_rate": 9.82751269080292e-06, "loss": 0.495, "step": 6680 }, { "epoch": 0.3065944656050663, "grad_norm": 0.4699918329715729, "learning_rate": 9.827448839626516e-06, "loss": 0.3969, "step": 6681 }, { "epoch": 0.30664035611032076, "grad_norm": 0.5239112377166748, "learning_rate": 9.82738497684162e-06, "loss": 0.4475, "step": 6682 }, { "epoch": 0.30668624661557525, "grad_norm": 0.5227832198143005, "learning_rate": 9.827321102448387e-06, "loss": 0.5436, "step": 6683 }, { "epoch": 0.3067321371208297, "grad_norm": 0.4827341139316559, "learning_rate": 9.82725721644697e-06, "loss": 0.4618, "step": 6684 }, { "epoch": 0.30677802762608414, "grad_norm": 0.4586300551891327, "learning_rate": 9.827193318837525e-06, "loss": 0.3699, "step": 6685 }, { "epoch": 0.30682391813133864, "grad_norm": 0.4359295070171356, "learning_rate": 9.827129409620202e-06, "loss": 0.3373, "step": 6686 }, { "epoch": 0.3068698086365931, "grad_norm": 0.5202856659889221, "learning_rate": 9.827065488795155e-06, "loss": 0.4576, "step": 6687 }, { "epoch": 0.3069156991418476, "grad_norm": 0.4542224705219269, "learning_rate": 9.827001556362541e-06, "loss": 0.3763, "step": 6688 }, { "epoch": 0.306961589647102, "grad_norm": 0.5091878771781921, "learning_rate": 9.826937612322512e-06, "loss": 0.4471, "step": 6689 }, { "epoch": 0.30700748015235646, "grad_norm": 0.48083943128585815, "learning_rate": 9.82687365667522e-06, "loss": 0.4775, "step": 6690 }, { "epoch": 0.30705337065761096, "grad_norm": 0.4575759470462799, "learning_rate": 9.82680968942082e-06, "loss": 0.4145, "step": 6691 }, { "epoch": 0.3070992611628654, "grad_norm": 0.44299009442329407, "learning_rate": 9.826745710559468e-06, "loss": 0.4012, "step": 6692 }, { "epoch": 0.30714515166811984, "grad_norm": 0.5044476389884949, "learning_rate": 9.826681720091316e-06, "loss": 0.4123, "step": 6693 }, { "epoch": 0.30719104217337434, "grad_norm": 0.44683578610420227, "learning_rate": 9.826617718016516e-06, "loss": 0.3326, "step": 6694 }, { "epoch": 0.3072369326786288, "grad_norm": 0.4374343156814575, "learning_rate": 9.826553704335224e-06, "loss": 0.3638, "step": 6695 }, { "epoch": 0.3072828231838833, "grad_norm": 0.47219011187553406, "learning_rate": 9.826489679047594e-06, "loss": 0.3738, "step": 6696 }, { "epoch": 0.3073287136891377, "grad_norm": 0.4438578486442566, "learning_rate": 9.82642564215378e-06, "loss": 0.391, "step": 6697 }, { "epoch": 0.30737460419439216, "grad_norm": 0.49390602111816406, "learning_rate": 9.826361593653935e-06, "loss": 0.4326, "step": 6698 }, { "epoch": 0.30742049469964666, "grad_norm": 0.5040885806083679, "learning_rate": 9.826297533548213e-06, "loss": 0.462, "step": 6699 }, { "epoch": 0.3074663852049011, "grad_norm": 0.4489750266075134, "learning_rate": 9.826233461836769e-06, "loss": 0.3422, "step": 6700 }, { "epoch": 0.30751227571015555, "grad_norm": 0.47171011567115784, "learning_rate": 9.826169378519759e-06, "loss": 0.3885, "step": 6701 }, { "epoch": 0.30755816621541004, "grad_norm": 0.5421554446220398, "learning_rate": 9.826105283597331e-06, "loss": 0.3882, "step": 6702 }, { "epoch": 0.3076040567206645, "grad_norm": 0.4394133985042572, "learning_rate": 9.826041177069645e-06, "loss": 0.3487, "step": 6703 }, { "epoch": 0.307649947225919, "grad_norm": 0.45673462748527527, "learning_rate": 9.825977058936855e-06, "loss": 0.4245, "step": 6704 }, { "epoch": 0.3076958377311734, "grad_norm": 0.4650372862815857, "learning_rate": 9.82591292919911e-06, "loss": 0.3721, "step": 6705 }, { "epoch": 0.30774172823642787, "grad_norm": 0.44464802742004395, "learning_rate": 9.825848787856569e-06, "loss": 0.3382, "step": 6706 }, { "epoch": 0.30778761874168237, "grad_norm": 0.4994361102581024, "learning_rate": 9.825784634909385e-06, "loss": 0.4831, "step": 6707 }, { "epoch": 0.3078335092469368, "grad_norm": 0.4889013171195984, "learning_rate": 9.825720470357712e-06, "loss": 0.4627, "step": 6708 }, { "epoch": 0.30787939975219125, "grad_norm": 0.4376967251300812, "learning_rate": 9.825656294201705e-06, "loss": 0.3755, "step": 6709 }, { "epoch": 0.30792529025744575, "grad_norm": 0.43729689717292786, "learning_rate": 9.825592106441515e-06, "loss": 0.3339, "step": 6710 }, { "epoch": 0.3079711807627002, "grad_norm": 0.47534286975860596, "learning_rate": 9.825527907077302e-06, "loss": 0.3803, "step": 6711 }, { "epoch": 0.30801707126795463, "grad_norm": 0.5077545046806335, "learning_rate": 9.825463696109215e-06, "loss": 0.5131, "step": 6712 }, { "epoch": 0.30806296177320913, "grad_norm": 0.42200082540512085, "learning_rate": 9.825399473537411e-06, "loss": 0.3106, "step": 6713 }, { "epoch": 0.3081088522784636, "grad_norm": 0.4354619085788727, "learning_rate": 9.825335239362044e-06, "loss": 0.3895, "step": 6714 }, { "epoch": 0.30815474278371807, "grad_norm": 0.4706278145313263, "learning_rate": 9.825270993583271e-06, "loss": 0.4428, "step": 6715 }, { "epoch": 0.3082006332889725, "grad_norm": 0.44295641779899597, "learning_rate": 9.825206736201243e-06, "loss": 0.3724, "step": 6716 }, { "epoch": 0.30824652379422696, "grad_norm": 0.48312485218048096, "learning_rate": 9.825142467216115e-06, "loss": 0.4525, "step": 6717 }, { "epoch": 0.30829241429948145, "grad_norm": 0.4753396511077881, "learning_rate": 9.825078186628043e-06, "loss": 0.4604, "step": 6718 }, { "epoch": 0.3083383048047359, "grad_norm": 0.5158783197402954, "learning_rate": 9.82501389443718e-06, "loss": 0.5569, "step": 6719 }, { "epoch": 0.30838419530999034, "grad_norm": 0.5148168206214905, "learning_rate": 9.824949590643683e-06, "loss": 0.574, "step": 6720 }, { "epoch": 0.30843008581524484, "grad_norm": 0.44234567880630493, "learning_rate": 9.824885275247702e-06, "loss": 0.4117, "step": 6721 }, { "epoch": 0.3084759763204993, "grad_norm": 0.4514029026031494, "learning_rate": 9.824820948249398e-06, "loss": 0.354, "step": 6722 }, { "epoch": 0.3085218668257538, "grad_norm": 0.520056962966919, "learning_rate": 9.82475660964892e-06, "loss": 0.437, "step": 6723 }, { "epoch": 0.3085677573310082, "grad_norm": 0.4706914722919464, "learning_rate": 9.824692259446426e-06, "loss": 0.3694, "step": 6724 }, { "epoch": 0.30861364783626266, "grad_norm": 0.4695865511894226, "learning_rate": 9.82462789764207e-06, "loss": 0.4361, "step": 6725 }, { "epoch": 0.30865953834151716, "grad_norm": 0.4542319178581238, "learning_rate": 9.824563524236006e-06, "loss": 0.39, "step": 6726 }, { "epoch": 0.3087054288467716, "grad_norm": 0.4493260383605957, "learning_rate": 9.82449913922839e-06, "loss": 0.3702, "step": 6727 }, { "epoch": 0.30875131935202604, "grad_norm": 0.5305804014205933, "learning_rate": 9.824434742619377e-06, "loss": 0.454, "step": 6728 }, { "epoch": 0.30879720985728054, "grad_norm": 0.47414857149124146, "learning_rate": 9.824370334409119e-06, "loss": 0.4491, "step": 6729 }, { "epoch": 0.308843100362535, "grad_norm": 0.4635559916496277, "learning_rate": 9.824305914597774e-06, "loss": 0.4394, "step": 6730 }, { "epoch": 0.3088889908677895, "grad_norm": 0.5106761455535889, "learning_rate": 9.824241483185497e-06, "loss": 0.4769, "step": 6731 }, { "epoch": 0.3089348813730439, "grad_norm": 0.48257845640182495, "learning_rate": 9.82417704017244e-06, "loss": 0.3978, "step": 6732 }, { "epoch": 0.30898077187829837, "grad_norm": 0.45212891697883606, "learning_rate": 9.824112585558761e-06, "loss": 0.3702, "step": 6733 }, { "epoch": 0.30902666238355286, "grad_norm": 0.4260169267654419, "learning_rate": 9.824048119344614e-06, "loss": 0.2992, "step": 6734 }, { "epoch": 0.3090725528888073, "grad_norm": 0.43211981654167175, "learning_rate": 9.823983641530153e-06, "loss": 0.3891, "step": 6735 }, { "epoch": 0.30911844339406175, "grad_norm": 0.4793911278247833, "learning_rate": 9.823919152115533e-06, "loss": 0.427, "step": 6736 }, { "epoch": 0.30916433389931625, "grad_norm": 0.4465513229370117, "learning_rate": 9.823854651100911e-06, "loss": 0.3903, "step": 6737 }, { "epoch": 0.3092102244045707, "grad_norm": 0.4502408802509308, "learning_rate": 9.823790138486443e-06, "loss": 0.3753, "step": 6738 }, { "epoch": 0.30925611490982513, "grad_norm": 0.5921756625175476, "learning_rate": 9.82372561427228e-06, "loss": 0.5627, "step": 6739 }, { "epoch": 0.30930200541507963, "grad_norm": 0.4430526793003082, "learning_rate": 9.823661078458577e-06, "loss": 0.3759, "step": 6740 }, { "epoch": 0.30934789592033407, "grad_norm": 0.45313969254493713, "learning_rate": 9.823596531045495e-06, "loss": 0.3884, "step": 6741 }, { "epoch": 0.30939378642558857, "grad_norm": 0.5367785692214966, "learning_rate": 9.823531972033184e-06, "loss": 0.3975, "step": 6742 }, { "epoch": 0.309439676930843, "grad_norm": 0.4326730966567993, "learning_rate": 9.823467401421802e-06, "loss": 0.3607, "step": 6743 }, { "epoch": 0.30948556743609745, "grad_norm": 0.4941154420375824, "learning_rate": 9.823402819211503e-06, "loss": 0.4385, "step": 6744 }, { "epoch": 0.30953145794135195, "grad_norm": 0.46483370661735535, "learning_rate": 9.823338225402442e-06, "loss": 0.3745, "step": 6745 }, { "epoch": 0.3095773484466064, "grad_norm": 0.4239407181739807, "learning_rate": 9.823273619994776e-06, "loss": 0.3263, "step": 6746 }, { "epoch": 0.30962323895186084, "grad_norm": 0.47472575306892395, "learning_rate": 9.823209002988657e-06, "loss": 0.4236, "step": 6747 }, { "epoch": 0.30966912945711533, "grad_norm": 0.4964427351951599, "learning_rate": 9.823144374384244e-06, "loss": 0.4787, "step": 6748 }, { "epoch": 0.3097150199623698, "grad_norm": 0.4612743556499481, "learning_rate": 9.82307973418169e-06, "loss": 0.3665, "step": 6749 }, { "epoch": 0.3097609104676243, "grad_norm": 0.46500423550605774, "learning_rate": 9.823015082381154e-06, "loss": 0.4377, "step": 6750 }, { "epoch": 0.3098068009728787, "grad_norm": 0.43587544560432434, "learning_rate": 9.822950418982788e-06, "loss": 0.344, "step": 6751 }, { "epoch": 0.30985269147813316, "grad_norm": 0.46367624402046204, "learning_rate": 9.822885743986747e-06, "loss": 0.4613, "step": 6752 }, { "epoch": 0.30989858198338766, "grad_norm": 0.44159722328186035, "learning_rate": 9.822821057393188e-06, "loss": 0.3268, "step": 6753 }, { "epoch": 0.3099444724886421, "grad_norm": 0.47241494059562683, "learning_rate": 9.822756359202268e-06, "loss": 0.3823, "step": 6754 }, { "epoch": 0.30999036299389654, "grad_norm": 0.48571261763572693, "learning_rate": 9.82269164941414e-06, "loss": 0.3809, "step": 6755 }, { "epoch": 0.31003625349915104, "grad_norm": 0.46417245268821716, "learning_rate": 9.82262692802896e-06, "loss": 0.3901, "step": 6756 }, { "epoch": 0.3100821440044055, "grad_norm": 0.47183191776275635, "learning_rate": 9.822562195046886e-06, "loss": 0.4448, "step": 6757 }, { "epoch": 0.31012803450966, "grad_norm": 0.8337662816047668, "learning_rate": 9.82249745046807e-06, "loss": 0.4436, "step": 6758 }, { "epoch": 0.3101739250149144, "grad_norm": 0.4456123113632202, "learning_rate": 9.822432694292671e-06, "loss": 0.3869, "step": 6759 }, { "epoch": 0.31021981552016886, "grad_norm": 0.516566812992096, "learning_rate": 9.822367926520844e-06, "loss": 0.5755, "step": 6760 }, { "epoch": 0.31026570602542336, "grad_norm": 0.49161389470100403, "learning_rate": 9.822303147152743e-06, "loss": 0.4532, "step": 6761 }, { "epoch": 0.3103115965306778, "grad_norm": 0.46822506189346313, "learning_rate": 9.822238356188525e-06, "loss": 0.4379, "step": 6762 }, { "epoch": 0.31035748703593224, "grad_norm": 0.4389892518520355, "learning_rate": 9.822173553628348e-06, "loss": 0.3266, "step": 6763 }, { "epoch": 0.31040337754118674, "grad_norm": 0.4525942802429199, "learning_rate": 9.822108739472362e-06, "loss": 0.4083, "step": 6764 }, { "epoch": 0.3104492680464412, "grad_norm": 0.497713565826416, "learning_rate": 9.822043913720726e-06, "loss": 0.4315, "step": 6765 }, { "epoch": 0.3104951585516956, "grad_norm": 0.43228432536125183, "learning_rate": 9.821979076373601e-06, "loss": 0.3552, "step": 6766 }, { "epoch": 0.3105410490569501, "grad_norm": 0.469587117433548, "learning_rate": 9.821914227431135e-06, "loss": 0.4346, "step": 6767 }, { "epoch": 0.31058693956220457, "grad_norm": 0.4590999484062195, "learning_rate": 9.821849366893485e-06, "loss": 0.3984, "step": 6768 }, { "epoch": 0.31063283006745906, "grad_norm": 0.4557521641254425, "learning_rate": 9.821784494760814e-06, "loss": 0.3753, "step": 6769 }, { "epoch": 0.3106787205727135, "grad_norm": 0.48752713203430176, "learning_rate": 9.82171961103327e-06, "loss": 0.4944, "step": 6770 }, { "epoch": 0.31072461107796795, "grad_norm": 0.4644518196582794, "learning_rate": 9.821654715711013e-06, "loss": 0.4506, "step": 6771 }, { "epoch": 0.31077050158322245, "grad_norm": 0.48173952102661133, "learning_rate": 9.821589808794196e-06, "loss": 0.4288, "step": 6772 }, { "epoch": 0.3108163920884769, "grad_norm": 0.4833815097808838, "learning_rate": 9.82152489028298e-06, "loss": 0.4272, "step": 6773 }, { "epoch": 0.31086228259373133, "grad_norm": 0.4809361398220062, "learning_rate": 9.821459960177518e-06, "loss": 0.4431, "step": 6774 }, { "epoch": 0.31090817309898583, "grad_norm": 0.5219428539276123, "learning_rate": 9.821395018477965e-06, "loss": 0.4928, "step": 6775 }, { "epoch": 0.31095406360424027, "grad_norm": 0.472053587436676, "learning_rate": 9.82133006518448e-06, "loss": 0.4697, "step": 6776 }, { "epoch": 0.31099995410949477, "grad_norm": 0.5082793831825256, "learning_rate": 9.821265100297217e-06, "loss": 0.4758, "step": 6777 }, { "epoch": 0.3110458446147492, "grad_norm": 0.4976414442062378, "learning_rate": 9.821200123816334e-06, "loss": 0.4297, "step": 6778 }, { "epoch": 0.31109173512000365, "grad_norm": 0.4456143081188202, "learning_rate": 9.821135135741986e-06, "loss": 0.3396, "step": 6779 }, { "epoch": 0.31113762562525815, "grad_norm": 0.5019738674163818, "learning_rate": 9.82107013607433e-06, "loss": 0.4587, "step": 6780 }, { "epoch": 0.3111835161305126, "grad_norm": 0.42361247539520264, "learning_rate": 9.82100512481352e-06, "loss": 0.3399, "step": 6781 }, { "epoch": 0.31122940663576704, "grad_norm": 0.500214159488678, "learning_rate": 9.820940101959715e-06, "loss": 0.3793, "step": 6782 }, { "epoch": 0.31127529714102153, "grad_norm": 0.4825003743171692, "learning_rate": 9.820875067513072e-06, "loss": 0.4559, "step": 6783 }, { "epoch": 0.311321187646276, "grad_norm": 0.457776814699173, "learning_rate": 9.820810021473745e-06, "loss": 0.3326, "step": 6784 }, { "epoch": 0.3113670781515305, "grad_norm": 0.45118215680122375, "learning_rate": 9.820744963841892e-06, "loss": 0.3781, "step": 6785 }, { "epoch": 0.3114129686567849, "grad_norm": 0.4419827461242676, "learning_rate": 9.820679894617669e-06, "loss": 0.3899, "step": 6786 }, { "epoch": 0.31145885916203936, "grad_norm": 0.44874539971351624, "learning_rate": 9.82061481380123e-06, "loss": 0.3887, "step": 6787 }, { "epoch": 0.31150474966729386, "grad_norm": 0.4241480231285095, "learning_rate": 9.820549721392736e-06, "loss": 0.3616, "step": 6788 }, { "epoch": 0.3115506401725483, "grad_norm": 0.4799668788909912, "learning_rate": 9.82048461739234e-06, "loss": 0.3893, "step": 6789 }, { "epoch": 0.31159653067780274, "grad_norm": 0.5291328430175781, "learning_rate": 9.820419501800201e-06, "loss": 0.4716, "step": 6790 }, { "epoch": 0.31164242118305724, "grad_norm": 0.47475743293762207, "learning_rate": 9.820354374616475e-06, "loss": 0.4743, "step": 6791 }, { "epoch": 0.3116883116883117, "grad_norm": 0.449655145406723, "learning_rate": 9.820289235841316e-06, "loss": 0.4246, "step": 6792 }, { "epoch": 0.3117342021935662, "grad_norm": 0.4789987802505493, "learning_rate": 9.820224085474884e-06, "loss": 0.47, "step": 6793 }, { "epoch": 0.3117800926988206, "grad_norm": 0.43304553627967834, "learning_rate": 9.820158923517335e-06, "loss": 0.3307, "step": 6794 }, { "epoch": 0.31182598320407506, "grad_norm": 0.4332766532897949, "learning_rate": 9.820093749968825e-06, "loss": 0.3732, "step": 6795 }, { "epoch": 0.31187187370932956, "grad_norm": 0.43455588817596436, "learning_rate": 9.820028564829509e-06, "loss": 0.3597, "step": 6796 }, { "epoch": 0.311917764214584, "grad_norm": 0.45282676815986633, "learning_rate": 9.819963368099548e-06, "loss": 0.3876, "step": 6797 }, { "epoch": 0.31196365471983845, "grad_norm": 0.5092303156852722, "learning_rate": 9.819898159779093e-06, "loss": 0.4611, "step": 6798 }, { "epoch": 0.31200954522509294, "grad_norm": 0.47585415840148926, "learning_rate": 9.819832939868307e-06, "loss": 0.413, "step": 6799 }, { "epoch": 0.3120554357303474, "grad_norm": 0.48095840215682983, "learning_rate": 9.819767708367342e-06, "loss": 0.4087, "step": 6800 }, { "epoch": 0.31210132623560183, "grad_norm": 0.42886844277381897, "learning_rate": 9.819702465276359e-06, "loss": 0.3632, "step": 6801 }, { "epoch": 0.3121472167408563, "grad_norm": 0.43350598216056824, "learning_rate": 9.819637210595513e-06, "loss": 0.411, "step": 6802 }, { "epoch": 0.31219310724611077, "grad_norm": 0.4743501543998718, "learning_rate": 9.819571944324958e-06, "loss": 0.4221, "step": 6803 }, { "epoch": 0.31223899775136527, "grad_norm": 0.4626290798187256, "learning_rate": 9.819506666464855e-06, "loss": 0.4594, "step": 6804 }, { "epoch": 0.3122848882566197, "grad_norm": 0.4297126829624176, "learning_rate": 9.81944137701536e-06, "loss": 0.3511, "step": 6805 }, { "epoch": 0.31233077876187415, "grad_norm": 0.47251227498054504, "learning_rate": 9.819376075976628e-06, "loss": 0.3901, "step": 6806 }, { "epoch": 0.31237666926712865, "grad_norm": 0.46752774715423584, "learning_rate": 9.81931076334882e-06, "loss": 0.434, "step": 6807 }, { "epoch": 0.3124225597723831, "grad_norm": 0.4704759120941162, "learning_rate": 9.819245439132087e-06, "loss": 0.3896, "step": 6808 }, { "epoch": 0.31246845027763753, "grad_norm": 0.4572508633136749, "learning_rate": 9.819180103326592e-06, "loss": 0.3852, "step": 6809 }, { "epoch": 0.31251434078289203, "grad_norm": 0.4954196810722351, "learning_rate": 9.819114755932491e-06, "loss": 0.5039, "step": 6810 }, { "epoch": 0.3125602312881465, "grad_norm": 0.447406142950058, "learning_rate": 9.819049396949939e-06, "loss": 0.3404, "step": 6811 }, { "epoch": 0.31260612179340097, "grad_norm": 0.46359962224960327, "learning_rate": 9.818984026379093e-06, "loss": 0.3905, "step": 6812 }, { "epoch": 0.3126520122986554, "grad_norm": 0.489170104265213, "learning_rate": 9.818918644220113e-06, "loss": 0.4083, "step": 6813 }, { "epoch": 0.31269790280390986, "grad_norm": 0.5148153305053711, "learning_rate": 9.818853250473153e-06, "loss": 0.4992, "step": 6814 }, { "epoch": 0.31274379330916435, "grad_norm": 0.43294501304626465, "learning_rate": 9.818787845138373e-06, "loss": 0.3482, "step": 6815 }, { "epoch": 0.3127896838144188, "grad_norm": 0.4783685803413391, "learning_rate": 9.818722428215927e-06, "loss": 0.4115, "step": 6816 }, { "epoch": 0.31283557431967324, "grad_norm": 0.45074763894081116, "learning_rate": 9.818656999705977e-06, "loss": 0.3764, "step": 6817 }, { "epoch": 0.31288146482492774, "grad_norm": 0.4706534147262573, "learning_rate": 9.818591559608676e-06, "loss": 0.4095, "step": 6818 }, { "epoch": 0.3129273553301822, "grad_norm": 0.43691709637641907, "learning_rate": 9.818526107924185e-06, "loss": 0.357, "step": 6819 }, { "epoch": 0.3129732458354367, "grad_norm": 0.4433576762676239, "learning_rate": 9.818460644652658e-06, "loss": 0.3544, "step": 6820 }, { "epoch": 0.3130191363406911, "grad_norm": 0.39241915941238403, "learning_rate": 9.818395169794253e-06, "loss": 0.2927, "step": 6821 }, { "epoch": 0.31306502684594556, "grad_norm": 0.4818485975265503, "learning_rate": 9.81832968334913e-06, "loss": 0.3995, "step": 6822 }, { "epoch": 0.31311091735120006, "grad_norm": 0.5008805394172668, "learning_rate": 9.818264185317445e-06, "loss": 0.4308, "step": 6823 }, { "epoch": 0.3131568078564545, "grad_norm": 0.5207908749580383, "learning_rate": 9.818198675699354e-06, "loss": 0.4304, "step": 6824 }, { "epoch": 0.31320269836170894, "grad_norm": 0.5156873464584351, "learning_rate": 9.818133154495017e-06, "loss": 0.5269, "step": 6825 }, { "epoch": 0.31324858886696344, "grad_norm": 0.4534202814102173, "learning_rate": 9.81806762170459e-06, "loss": 0.3903, "step": 6826 }, { "epoch": 0.3132944793722179, "grad_norm": 0.4930480122566223, "learning_rate": 9.81800207732823e-06, "loss": 0.4219, "step": 6827 }, { "epoch": 0.3133403698774723, "grad_norm": 0.4778401255607605, "learning_rate": 9.817936521366097e-06, "loss": 0.4259, "step": 6828 }, { "epoch": 0.3133862603827268, "grad_norm": 0.4639195203781128, "learning_rate": 9.817870953818348e-06, "loss": 0.4056, "step": 6829 }, { "epoch": 0.31343215088798126, "grad_norm": 0.4899890720844269, "learning_rate": 9.817805374685137e-06, "loss": 0.4101, "step": 6830 }, { "epoch": 0.31347804139323576, "grad_norm": 0.4542444944381714, "learning_rate": 9.817739783966627e-06, "loss": 0.404, "step": 6831 }, { "epoch": 0.3135239318984902, "grad_norm": 0.4820408523082733, "learning_rate": 9.817674181662973e-06, "loss": 0.429, "step": 6832 }, { "epoch": 0.31356982240374465, "grad_norm": 0.440933495759964, "learning_rate": 9.817608567774332e-06, "loss": 0.4221, "step": 6833 }, { "epoch": 0.31361571290899914, "grad_norm": 0.4212203323841095, "learning_rate": 9.817542942300864e-06, "loss": 0.3191, "step": 6834 }, { "epoch": 0.3136616034142536, "grad_norm": 0.4479084610939026, "learning_rate": 9.817477305242725e-06, "loss": 0.3881, "step": 6835 }, { "epoch": 0.31370749391950803, "grad_norm": 0.4258545935153961, "learning_rate": 9.817411656600073e-06, "loss": 0.3374, "step": 6836 }, { "epoch": 0.3137533844247625, "grad_norm": 0.4910781681537628, "learning_rate": 9.817345996373068e-06, "loss": 0.4312, "step": 6837 }, { "epoch": 0.31379927493001697, "grad_norm": 0.5013216137886047, "learning_rate": 9.817280324561867e-06, "loss": 0.5181, "step": 6838 }, { "epoch": 0.31384516543527147, "grad_norm": 0.469016432762146, "learning_rate": 9.817214641166624e-06, "loss": 0.4246, "step": 6839 }, { "epoch": 0.3138910559405259, "grad_norm": 0.43212074041366577, "learning_rate": 9.817148946187502e-06, "loss": 0.3472, "step": 6840 }, { "epoch": 0.31393694644578035, "grad_norm": 0.41999009251594543, "learning_rate": 9.817083239624657e-06, "loss": 0.3224, "step": 6841 }, { "epoch": 0.31398283695103485, "grad_norm": 0.4577331840991974, "learning_rate": 9.817017521478247e-06, "loss": 0.418, "step": 6842 }, { "epoch": 0.3140287274562893, "grad_norm": 0.46392470598220825, "learning_rate": 9.81695179174843e-06, "loss": 0.3958, "step": 6843 }, { "epoch": 0.31407461796154373, "grad_norm": 0.4637294411659241, "learning_rate": 9.816886050435366e-06, "loss": 0.4212, "step": 6844 }, { "epoch": 0.31412050846679823, "grad_norm": 0.5168417692184448, "learning_rate": 9.816820297539208e-06, "loss": 0.4553, "step": 6845 }, { "epoch": 0.3141663989720527, "grad_norm": 0.4453659653663635, "learning_rate": 9.81675453306012e-06, "loss": 0.4034, "step": 6846 }, { "epoch": 0.31421228947730717, "grad_norm": 0.47412481904029846, "learning_rate": 9.816688756998255e-06, "loss": 0.4703, "step": 6847 }, { "epoch": 0.3142581799825616, "grad_norm": 0.45725494623184204, "learning_rate": 9.816622969353777e-06, "loss": 0.3651, "step": 6848 }, { "epoch": 0.31430407048781606, "grad_norm": 0.420184463262558, "learning_rate": 9.81655717012684e-06, "loss": 0.3373, "step": 6849 }, { "epoch": 0.31434996099307055, "grad_norm": 0.46347182989120483, "learning_rate": 9.816491359317602e-06, "loss": 0.4177, "step": 6850 }, { "epoch": 0.314395851498325, "grad_norm": 0.43666452169418335, "learning_rate": 9.816425536926223e-06, "loss": 0.3647, "step": 6851 }, { "epoch": 0.31444174200357944, "grad_norm": 0.4481038451194763, "learning_rate": 9.81635970295286e-06, "loss": 0.4295, "step": 6852 }, { "epoch": 0.31448763250883394, "grad_norm": 0.4232562184333801, "learning_rate": 9.816293857397675e-06, "loss": 0.372, "step": 6853 }, { "epoch": 0.3145335230140884, "grad_norm": 0.44476720690727234, "learning_rate": 9.81622800026082e-06, "loss": 0.3777, "step": 6854 }, { "epoch": 0.3145794135193428, "grad_norm": 0.4472750723361969, "learning_rate": 9.816162131542457e-06, "loss": 0.3674, "step": 6855 }, { "epoch": 0.3146253040245973, "grad_norm": 0.508522629737854, "learning_rate": 9.816096251242745e-06, "loss": 0.4854, "step": 6856 }, { "epoch": 0.31467119452985176, "grad_norm": 0.4416707754135132, "learning_rate": 9.816030359361842e-06, "loss": 0.3662, "step": 6857 }, { "epoch": 0.31471708503510626, "grad_norm": 0.44998955726623535, "learning_rate": 9.815964455899904e-06, "loss": 0.3842, "step": 6858 }, { "epoch": 0.3147629755403607, "grad_norm": 0.4699195325374603, "learning_rate": 9.815898540857093e-06, "loss": 0.4812, "step": 6859 }, { "epoch": 0.31480886604561514, "grad_norm": 0.4703376293182373, "learning_rate": 9.815832614233566e-06, "loss": 0.4087, "step": 6860 }, { "epoch": 0.31485475655086964, "grad_norm": 0.49357494711875916, "learning_rate": 9.815766676029481e-06, "loss": 0.5339, "step": 6861 }, { "epoch": 0.3149006470561241, "grad_norm": 0.4573034942150116, "learning_rate": 9.815700726244997e-06, "loss": 0.3443, "step": 6862 }, { "epoch": 0.3149465375613785, "grad_norm": 0.495026171207428, "learning_rate": 9.815634764880273e-06, "loss": 0.474, "step": 6863 }, { "epoch": 0.314992428066633, "grad_norm": 0.45636898279190063, "learning_rate": 9.815568791935467e-06, "loss": 0.4166, "step": 6864 }, { "epoch": 0.31503831857188747, "grad_norm": 0.5281257629394531, "learning_rate": 9.815502807410736e-06, "loss": 0.5191, "step": 6865 }, { "epoch": 0.31508420907714196, "grad_norm": 0.4458218216896057, "learning_rate": 9.815436811306245e-06, "loss": 0.3546, "step": 6866 }, { "epoch": 0.3151300995823964, "grad_norm": 0.49445730447769165, "learning_rate": 9.815370803622145e-06, "loss": 0.4454, "step": 6867 }, { "epoch": 0.31517599008765085, "grad_norm": 0.46657928824424744, "learning_rate": 9.8153047843586e-06, "loss": 0.4503, "step": 6868 }, { "epoch": 0.31522188059290535, "grad_norm": 0.445287823677063, "learning_rate": 9.815238753515764e-06, "loss": 0.3806, "step": 6869 }, { "epoch": 0.3152677710981598, "grad_norm": 0.4831392168998718, "learning_rate": 9.8151727110938e-06, "loss": 0.4556, "step": 6870 }, { "epoch": 0.31531366160341423, "grad_norm": 0.46448814868927, "learning_rate": 9.815106657092866e-06, "loss": 0.3889, "step": 6871 }, { "epoch": 0.31535955210866873, "grad_norm": 0.4358893036842346, "learning_rate": 9.81504059151312e-06, "loss": 0.3642, "step": 6872 }, { "epoch": 0.31540544261392317, "grad_norm": 0.49335893988609314, "learning_rate": 9.81497451435472e-06, "loss": 0.3775, "step": 6873 }, { "epoch": 0.31545133311917767, "grad_norm": 0.47260338068008423, "learning_rate": 9.814908425617827e-06, "loss": 0.365, "step": 6874 }, { "epoch": 0.3154972236244321, "grad_norm": 0.45433372259140015, "learning_rate": 9.814842325302598e-06, "loss": 0.3685, "step": 6875 }, { "epoch": 0.31554311412968655, "grad_norm": 0.44888371229171753, "learning_rate": 9.814776213409194e-06, "loss": 0.3977, "step": 6876 }, { "epoch": 0.31558900463494105, "grad_norm": 0.451658695936203, "learning_rate": 9.81471008993777e-06, "loss": 0.3366, "step": 6877 }, { "epoch": 0.3156348951401955, "grad_norm": 0.4468802511692047, "learning_rate": 9.814643954888492e-06, "loss": 0.398, "step": 6878 }, { "epoch": 0.31568078564544994, "grad_norm": 0.503246009349823, "learning_rate": 9.814577808261511e-06, "loss": 0.4326, "step": 6879 }, { "epoch": 0.31572667615070443, "grad_norm": 0.41447919607162476, "learning_rate": 9.814511650056993e-06, "loss": 0.3594, "step": 6880 }, { "epoch": 0.3157725666559589, "grad_norm": 0.44382134079933167, "learning_rate": 9.814445480275093e-06, "loss": 0.3424, "step": 6881 }, { "epoch": 0.3158184571612133, "grad_norm": 0.45402440428733826, "learning_rate": 9.81437929891597e-06, "loss": 0.3828, "step": 6882 }, { "epoch": 0.3158643476664678, "grad_norm": 0.42428353428840637, "learning_rate": 9.814313105979784e-06, "loss": 0.3192, "step": 6883 }, { "epoch": 0.31591023817172226, "grad_norm": 0.4662676155567169, "learning_rate": 9.814246901466697e-06, "loss": 0.3875, "step": 6884 }, { "epoch": 0.31595612867697676, "grad_norm": 0.4538199007511139, "learning_rate": 9.814180685376864e-06, "loss": 0.3851, "step": 6885 }, { "epoch": 0.3160020191822312, "grad_norm": 0.4505821764469147, "learning_rate": 9.814114457710445e-06, "loss": 0.3876, "step": 6886 }, { "epoch": 0.31604790968748564, "grad_norm": 0.48920732736587524, "learning_rate": 9.814048218467601e-06, "loss": 0.4925, "step": 6887 }, { "epoch": 0.31609380019274014, "grad_norm": 0.43079760670661926, "learning_rate": 9.81398196764849e-06, "loss": 0.323, "step": 6888 }, { "epoch": 0.3161396906979946, "grad_norm": 0.4810311496257782, "learning_rate": 9.813915705253271e-06, "loss": 0.4861, "step": 6889 }, { "epoch": 0.316185581203249, "grad_norm": 0.4472326636314392, "learning_rate": 9.813849431282107e-06, "loss": 0.4089, "step": 6890 }, { "epoch": 0.3162314717085035, "grad_norm": 0.4294922351837158, "learning_rate": 9.813783145735152e-06, "loss": 0.3728, "step": 6891 }, { "epoch": 0.31627736221375796, "grad_norm": 0.5034098625183105, "learning_rate": 9.813716848612568e-06, "loss": 0.4398, "step": 6892 }, { "epoch": 0.31632325271901246, "grad_norm": 0.4683046042919159, "learning_rate": 9.813650539914514e-06, "loss": 0.4353, "step": 6893 }, { "epoch": 0.3163691432242669, "grad_norm": 0.4813278019428253, "learning_rate": 9.81358421964115e-06, "loss": 0.4366, "step": 6894 }, { "epoch": 0.31641503372952134, "grad_norm": 0.5084885954856873, "learning_rate": 9.813517887792637e-06, "loss": 0.425, "step": 6895 }, { "epoch": 0.31646092423477584, "grad_norm": 0.45638856291770935, "learning_rate": 9.81345154436913e-06, "loss": 0.3871, "step": 6896 }, { "epoch": 0.3165068147400303, "grad_norm": 0.4983183741569519, "learning_rate": 9.813385189370792e-06, "loss": 0.4615, "step": 6897 }, { "epoch": 0.3165527052452847, "grad_norm": 0.458781898021698, "learning_rate": 9.813318822797783e-06, "loss": 0.4101, "step": 6898 }, { "epoch": 0.3165985957505392, "grad_norm": 0.4356752634048462, "learning_rate": 9.81325244465026e-06, "loss": 0.3081, "step": 6899 }, { "epoch": 0.31664448625579367, "grad_norm": 0.45443177223205566, "learning_rate": 9.813186054928384e-06, "loss": 0.3498, "step": 6900 }, { "epoch": 0.31669037676104816, "grad_norm": 0.4583868086338043, "learning_rate": 9.813119653632314e-06, "loss": 0.3936, "step": 6901 }, { "epoch": 0.3167362672663026, "grad_norm": 0.44368353486061096, "learning_rate": 9.813053240762213e-06, "loss": 0.3739, "step": 6902 }, { "epoch": 0.31678215777155705, "grad_norm": 0.476655513048172, "learning_rate": 9.812986816318236e-06, "loss": 0.4399, "step": 6903 }, { "epoch": 0.31682804827681155, "grad_norm": 0.4905949831008911, "learning_rate": 9.812920380300545e-06, "loss": 0.4837, "step": 6904 }, { "epoch": 0.316873938782066, "grad_norm": 0.48626771569252014, "learning_rate": 9.812853932709298e-06, "loss": 0.4532, "step": 6905 }, { "epoch": 0.31691982928732043, "grad_norm": 0.4806963801383972, "learning_rate": 9.81278747354466e-06, "loss": 0.4484, "step": 6906 }, { "epoch": 0.31696571979257493, "grad_norm": 0.4692946672439575, "learning_rate": 9.812721002806783e-06, "loss": 0.4232, "step": 6907 }, { "epoch": 0.31701161029782937, "grad_norm": 0.5127679705619812, "learning_rate": 9.812654520495833e-06, "loss": 0.4092, "step": 6908 }, { "epoch": 0.31705750080308387, "grad_norm": 0.5175108909606934, "learning_rate": 9.812588026611968e-06, "loss": 0.4258, "step": 6909 }, { "epoch": 0.3171033913083383, "grad_norm": 0.4932761788368225, "learning_rate": 9.812521521155348e-06, "loss": 0.4263, "step": 6910 }, { "epoch": 0.31714928181359275, "grad_norm": 0.4879387319087982, "learning_rate": 9.812455004126131e-06, "loss": 0.4454, "step": 6911 }, { "epoch": 0.31719517231884725, "grad_norm": 0.46224719285964966, "learning_rate": 9.81238847552448e-06, "loss": 0.4198, "step": 6912 }, { "epoch": 0.3172410628241017, "grad_norm": 0.4714747965335846, "learning_rate": 9.812321935350554e-06, "loss": 0.3862, "step": 6913 }, { "epoch": 0.31728695332935614, "grad_norm": 0.4764077961444855, "learning_rate": 9.81225538360451e-06, "loss": 0.4279, "step": 6914 }, { "epoch": 0.31733284383461063, "grad_norm": 0.4578917622566223, "learning_rate": 9.812188820286513e-06, "loss": 0.4449, "step": 6915 }, { "epoch": 0.3173787343398651, "grad_norm": 0.39900583028793335, "learning_rate": 9.81212224539672e-06, "loss": 0.2864, "step": 6916 }, { "epoch": 0.3174246248451195, "grad_norm": 0.4593629837036133, "learning_rate": 9.81205565893529e-06, "loss": 0.4134, "step": 6917 }, { "epoch": 0.317470515350374, "grad_norm": 0.4725753366947174, "learning_rate": 9.811989060902387e-06, "loss": 0.4788, "step": 6918 }, { "epoch": 0.31751640585562846, "grad_norm": 0.47312673926353455, "learning_rate": 9.811922451298169e-06, "loss": 0.4613, "step": 6919 }, { "epoch": 0.31756229636088296, "grad_norm": 0.48358017206192017, "learning_rate": 9.811855830122795e-06, "loss": 0.4758, "step": 6920 }, { "epoch": 0.3176081868661374, "grad_norm": 0.46165886521339417, "learning_rate": 9.811789197376427e-06, "loss": 0.3356, "step": 6921 }, { "epoch": 0.31765407737139184, "grad_norm": 0.4359027147293091, "learning_rate": 9.811722553059224e-06, "loss": 0.3635, "step": 6922 }, { "epoch": 0.31769996787664634, "grad_norm": 0.45390981435775757, "learning_rate": 9.811655897171348e-06, "loss": 0.3932, "step": 6923 }, { "epoch": 0.3177458583819008, "grad_norm": 0.4628724753856659, "learning_rate": 9.81158922971296e-06, "loss": 0.4826, "step": 6924 }, { "epoch": 0.3177917488871552, "grad_norm": 0.4714392125606537, "learning_rate": 9.811522550684214e-06, "loss": 0.3806, "step": 6925 }, { "epoch": 0.3178376393924097, "grad_norm": 0.485016405582428, "learning_rate": 9.811455860085277e-06, "loss": 0.4665, "step": 6926 }, { "epoch": 0.31788352989766416, "grad_norm": 0.4822465777397156, "learning_rate": 9.811389157916307e-06, "loss": 0.4384, "step": 6927 }, { "epoch": 0.31792942040291866, "grad_norm": 0.46231627464294434, "learning_rate": 9.811322444177464e-06, "loss": 0.3518, "step": 6928 }, { "epoch": 0.3179753109081731, "grad_norm": 0.42591583728790283, "learning_rate": 9.811255718868911e-06, "loss": 0.3583, "step": 6929 }, { "epoch": 0.31802120141342755, "grad_norm": 0.4956151247024536, "learning_rate": 9.811188981990804e-06, "loss": 0.4451, "step": 6930 }, { "epoch": 0.31806709191868204, "grad_norm": 0.43963199853897095, "learning_rate": 9.81112223354331e-06, "loss": 0.3631, "step": 6931 }, { "epoch": 0.3181129824239365, "grad_norm": 0.43790316581726074, "learning_rate": 9.811055473526581e-06, "loss": 0.3464, "step": 6932 }, { "epoch": 0.31815887292919093, "grad_norm": 0.46583110094070435, "learning_rate": 9.810988701940784e-06, "loss": 0.3773, "step": 6933 }, { "epoch": 0.3182047634344454, "grad_norm": 0.4792909622192383, "learning_rate": 9.810921918786078e-06, "loss": 0.3611, "step": 6934 }, { "epoch": 0.31825065393969987, "grad_norm": 0.43365955352783203, "learning_rate": 9.810855124062622e-06, "loss": 0.3427, "step": 6935 }, { "epoch": 0.31829654444495437, "grad_norm": 0.4479745924472809, "learning_rate": 9.810788317770578e-06, "loss": 0.3929, "step": 6936 }, { "epoch": 0.3183424349502088, "grad_norm": 0.45520591735839844, "learning_rate": 9.810721499910106e-06, "loss": 0.3704, "step": 6937 }, { "epoch": 0.31838832545546325, "grad_norm": 0.4315906763076782, "learning_rate": 9.810654670481367e-06, "loss": 0.3478, "step": 6938 }, { "epoch": 0.31843421596071775, "grad_norm": 0.504904568195343, "learning_rate": 9.810587829484524e-06, "loss": 0.4925, "step": 6939 }, { "epoch": 0.3184801064659722, "grad_norm": 0.4537879228591919, "learning_rate": 9.810520976919734e-06, "loss": 0.3708, "step": 6940 }, { "epoch": 0.31852599697122663, "grad_norm": 0.4790514409542084, "learning_rate": 9.81045411278716e-06, "loss": 0.36, "step": 6941 }, { "epoch": 0.31857188747648113, "grad_norm": 0.44858425855636597, "learning_rate": 9.810387237086961e-06, "loss": 0.3354, "step": 6942 }, { "epoch": 0.3186177779817356, "grad_norm": 0.4829810559749603, "learning_rate": 9.8103203498193e-06, "loss": 0.4302, "step": 6943 }, { "epoch": 0.31866366848699, "grad_norm": 0.48292991518974304, "learning_rate": 9.810253450984336e-06, "loss": 0.3937, "step": 6944 }, { "epoch": 0.3187095589922445, "grad_norm": 0.4910351037979126, "learning_rate": 9.810186540582229e-06, "loss": 0.3995, "step": 6945 }, { "epoch": 0.31875544949749895, "grad_norm": 0.4695226848125458, "learning_rate": 9.810119618613144e-06, "loss": 0.3814, "step": 6946 }, { "epoch": 0.31880134000275345, "grad_norm": 0.4637891948223114, "learning_rate": 9.810052685077239e-06, "loss": 0.3707, "step": 6947 }, { "epoch": 0.3188472305080079, "grad_norm": 0.45026934146881104, "learning_rate": 9.809985739974674e-06, "loss": 0.363, "step": 6948 }, { "epoch": 0.31889312101326234, "grad_norm": 0.5132016539573669, "learning_rate": 9.809918783305614e-06, "loss": 0.4293, "step": 6949 }, { "epoch": 0.31893901151851684, "grad_norm": 0.4351544976234436, "learning_rate": 9.809851815070215e-06, "loss": 0.3798, "step": 6950 }, { "epoch": 0.3189849020237713, "grad_norm": 0.48168808221817017, "learning_rate": 9.80978483526864e-06, "loss": 0.4532, "step": 6951 }, { "epoch": 0.3190307925290257, "grad_norm": 0.4781895577907562, "learning_rate": 9.809717843901052e-06, "loss": 0.4552, "step": 6952 }, { "epoch": 0.3190766830342802, "grad_norm": 0.48530498147010803, "learning_rate": 9.80965084096761e-06, "loss": 0.4408, "step": 6953 }, { "epoch": 0.31912257353953466, "grad_norm": 0.4523056745529175, "learning_rate": 9.809583826468477e-06, "loss": 0.3825, "step": 6954 }, { "epoch": 0.31916846404478916, "grad_norm": 0.4712919294834137, "learning_rate": 9.809516800403811e-06, "loss": 0.4002, "step": 6955 }, { "epoch": 0.3192143545500436, "grad_norm": 0.47801870107650757, "learning_rate": 9.809449762773776e-06, "loss": 0.4436, "step": 6956 }, { "epoch": 0.31926024505529804, "grad_norm": 0.45333749055862427, "learning_rate": 9.809382713578533e-06, "loss": 0.367, "step": 6957 }, { "epoch": 0.31930613556055254, "grad_norm": 0.4778808355331421, "learning_rate": 9.80931565281824e-06, "loss": 0.4332, "step": 6958 }, { "epoch": 0.319352026065807, "grad_norm": 0.47561392188072205, "learning_rate": 9.809248580493062e-06, "loss": 0.3716, "step": 6959 }, { "epoch": 0.3193979165710614, "grad_norm": 0.4420454204082489, "learning_rate": 9.80918149660316e-06, "loss": 0.3823, "step": 6960 }, { "epoch": 0.3194438070763159, "grad_norm": 0.48482653498649597, "learning_rate": 9.809114401148694e-06, "loss": 0.3927, "step": 6961 }, { "epoch": 0.31948969758157036, "grad_norm": 0.47465482354164124, "learning_rate": 9.809047294129825e-06, "loss": 0.4422, "step": 6962 }, { "epoch": 0.31953558808682486, "grad_norm": 0.48826512694358826, "learning_rate": 9.808980175546715e-06, "loss": 0.4382, "step": 6963 }, { "epoch": 0.3195814785920793, "grad_norm": 0.4856005609035492, "learning_rate": 9.808913045399527e-06, "loss": 0.4235, "step": 6964 }, { "epoch": 0.31962736909733375, "grad_norm": 0.4142557382583618, "learning_rate": 9.808845903688418e-06, "loss": 0.3019, "step": 6965 }, { "epoch": 0.31967325960258824, "grad_norm": 0.42200809717178345, "learning_rate": 9.808778750413554e-06, "loss": 0.3034, "step": 6966 }, { "epoch": 0.3197191501078427, "grad_norm": 0.5138015151023865, "learning_rate": 9.808711585575094e-06, "loss": 0.4655, "step": 6967 }, { "epoch": 0.31976504061309713, "grad_norm": 0.41709649562835693, "learning_rate": 9.808644409173202e-06, "loss": 0.3122, "step": 6968 }, { "epoch": 0.3198109311183516, "grad_norm": 0.438488245010376, "learning_rate": 9.808577221208036e-06, "loss": 0.4023, "step": 6969 }, { "epoch": 0.31985682162360607, "grad_norm": 0.46840208768844604, "learning_rate": 9.808510021679761e-06, "loss": 0.4192, "step": 6970 }, { "epoch": 0.3199027121288605, "grad_norm": 0.47444257140159607, "learning_rate": 9.808442810588537e-06, "loss": 0.4053, "step": 6971 }, { "epoch": 0.319948602634115, "grad_norm": 0.46105051040649414, "learning_rate": 9.808375587934524e-06, "loss": 0.362, "step": 6972 }, { "epoch": 0.31999449313936945, "grad_norm": 0.4323471188545227, "learning_rate": 9.808308353717887e-06, "loss": 0.327, "step": 6973 }, { "epoch": 0.32004038364462395, "grad_norm": 0.48899421095848083, "learning_rate": 9.808241107938783e-06, "loss": 0.4483, "step": 6974 }, { "epoch": 0.3200862741498784, "grad_norm": 0.4337638020515442, "learning_rate": 9.80817385059738e-06, "loss": 0.382, "step": 6975 }, { "epoch": 0.32013216465513283, "grad_norm": 0.4616484045982361, "learning_rate": 9.808106581693835e-06, "loss": 0.4113, "step": 6976 }, { "epoch": 0.32017805516038733, "grad_norm": 0.45937931537628174, "learning_rate": 9.808039301228311e-06, "loss": 0.3923, "step": 6977 }, { "epoch": 0.3202239456656418, "grad_norm": 0.44955921173095703, "learning_rate": 9.80797200920097e-06, "loss": 0.3513, "step": 6978 }, { "epoch": 0.3202698361708962, "grad_norm": 0.4731086790561676, "learning_rate": 9.807904705611975e-06, "loss": 0.4254, "step": 6979 }, { "epoch": 0.3203157266761507, "grad_norm": 0.4315633475780487, "learning_rate": 9.807837390461486e-06, "loss": 0.3215, "step": 6980 }, { "epoch": 0.32036161718140516, "grad_norm": 0.449117511510849, "learning_rate": 9.807770063749664e-06, "loss": 0.3426, "step": 6981 }, { "epoch": 0.32040750768665965, "grad_norm": 0.4739314615726471, "learning_rate": 9.807702725476673e-06, "loss": 0.4126, "step": 6982 }, { "epoch": 0.3204533981919141, "grad_norm": 0.4968165457248688, "learning_rate": 9.807635375642676e-06, "loss": 0.4268, "step": 6983 }, { "epoch": 0.32049928869716854, "grad_norm": 0.47270330786705017, "learning_rate": 9.807568014247831e-06, "loss": 0.42, "step": 6984 }, { "epoch": 0.32054517920242304, "grad_norm": 0.43920719623565674, "learning_rate": 9.807500641292304e-06, "loss": 0.3528, "step": 6985 }, { "epoch": 0.3205910697076775, "grad_norm": 0.4495665431022644, "learning_rate": 9.807433256776255e-06, "loss": 0.3774, "step": 6986 }, { "epoch": 0.3206369602129319, "grad_norm": 0.4770635664463043, "learning_rate": 9.807365860699844e-06, "loss": 0.4482, "step": 6987 }, { "epoch": 0.3206828507181864, "grad_norm": 0.4722910225391388, "learning_rate": 9.807298453063239e-06, "loss": 0.3922, "step": 6988 }, { "epoch": 0.32072874122344086, "grad_norm": 0.5004497170448303, "learning_rate": 9.807231033866596e-06, "loss": 0.4724, "step": 6989 }, { "epoch": 0.32077463172869536, "grad_norm": 0.44961291551589966, "learning_rate": 9.80716360311008e-06, "loss": 0.3485, "step": 6990 }, { "epoch": 0.3208205222339498, "grad_norm": 0.5045326948165894, "learning_rate": 9.807096160793852e-06, "loss": 0.5059, "step": 6991 }, { "epoch": 0.32086641273920424, "grad_norm": 0.4946959614753723, "learning_rate": 9.807028706918076e-06, "loss": 0.3704, "step": 6992 }, { "epoch": 0.32091230324445874, "grad_norm": 0.44438186287879944, "learning_rate": 9.806961241482913e-06, "loss": 0.3803, "step": 6993 }, { "epoch": 0.3209581937497132, "grad_norm": 0.46101340651512146, "learning_rate": 9.806893764488526e-06, "loss": 0.4262, "step": 6994 }, { "epoch": 0.3210040842549676, "grad_norm": 0.44207680225372314, "learning_rate": 9.806826275935076e-06, "loss": 0.3462, "step": 6995 }, { "epoch": 0.3210499747602221, "grad_norm": 0.5115715265274048, "learning_rate": 9.806758775822724e-06, "loss": 0.4651, "step": 6996 }, { "epoch": 0.32109586526547657, "grad_norm": 0.45746099948883057, "learning_rate": 9.806691264151637e-06, "loss": 0.3775, "step": 6997 }, { "epoch": 0.32114175577073106, "grad_norm": 0.4725476801395416, "learning_rate": 9.806623740921974e-06, "loss": 0.4832, "step": 6998 }, { "epoch": 0.3211876462759855, "grad_norm": 0.5354151725769043, "learning_rate": 9.806556206133897e-06, "loss": 0.4192, "step": 6999 }, { "epoch": 0.32123353678123995, "grad_norm": 0.4477815330028534, "learning_rate": 9.80648865978757e-06, "loss": 0.3633, "step": 7000 }, { "epoch": 0.32127942728649445, "grad_norm": 0.49112147092819214, "learning_rate": 9.806421101883155e-06, "loss": 0.4279, "step": 7001 }, { "epoch": 0.3213253177917489, "grad_norm": 0.40987682342529297, "learning_rate": 9.806353532420812e-06, "loss": 0.315, "step": 7002 }, { "epoch": 0.32137120829700333, "grad_norm": 0.48975443840026855, "learning_rate": 9.806285951400707e-06, "loss": 0.4083, "step": 7003 }, { "epoch": 0.32141709880225783, "grad_norm": 0.4901741147041321, "learning_rate": 9.806218358823003e-06, "loss": 0.4654, "step": 7004 }, { "epoch": 0.32146298930751227, "grad_norm": 0.47973528504371643, "learning_rate": 9.806150754687858e-06, "loss": 0.3852, "step": 7005 }, { "epoch": 0.3215088798127667, "grad_norm": 0.497993141412735, "learning_rate": 9.80608313899544e-06, "loss": 0.471, "step": 7006 }, { "epoch": 0.3215547703180212, "grad_norm": 0.4888631999492645, "learning_rate": 9.806015511745906e-06, "loss": 0.4445, "step": 7007 }, { "epoch": 0.32160066082327565, "grad_norm": 0.5411748886108398, "learning_rate": 9.805947872939424e-06, "loss": 0.5165, "step": 7008 }, { "epoch": 0.32164655132853015, "grad_norm": 0.5264907479286194, "learning_rate": 9.805880222576151e-06, "loss": 0.5388, "step": 7009 }, { "epoch": 0.3216924418337846, "grad_norm": 0.4764230251312256, "learning_rate": 9.805812560656256e-06, "loss": 0.4247, "step": 7010 }, { "epoch": 0.32173833233903903, "grad_norm": 0.4643279016017914, "learning_rate": 9.805744887179897e-06, "loss": 0.3476, "step": 7011 }, { "epoch": 0.32178422284429353, "grad_norm": 0.47345560789108276, "learning_rate": 9.805677202147239e-06, "loss": 0.4037, "step": 7012 }, { "epoch": 0.321830113349548, "grad_norm": 0.4348125457763672, "learning_rate": 9.805609505558444e-06, "loss": 0.3175, "step": 7013 }, { "epoch": 0.3218760038548024, "grad_norm": 0.5154327750205994, "learning_rate": 9.805541797413674e-06, "loss": 0.4469, "step": 7014 }, { "epoch": 0.3219218943600569, "grad_norm": 0.45275041460990906, "learning_rate": 9.805474077713094e-06, "loss": 0.3966, "step": 7015 }, { "epoch": 0.32196778486531136, "grad_norm": 0.49300527572631836, "learning_rate": 9.805406346456864e-06, "loss": 0.4317, "step": 7016 }, { "epoch": 0.32201367537056585, "grad_norm": 0.460650235414505, "learning_rate": 9.80533860364515e-06, "loss": 0.3902, "step": 7017 }, { "epoch": 0.3220595658758203, "grad_norm": 0.5048458576202393, "learning_rate": 9.805270849278112e-06, "loss": 0.4507, "step": 7018 }, { "epoch": 0.32210545638107474, "grad_norm": 0.5237061381340027, "learning_rate": 9.805203083355915e-06, "loss": 0.5366, "step": 7019 }, { "epoch": 0.32215134688632924, "grad_norm": 0.5383538007736206, "learning_rate": 9.805135305878721e-06, "loss": 0.5092, "step": 7020 }, { "epoch": 0.3221972373915837, "grad_norm": 0.450731486082077, "learning_rate": 9.805067516846694e-06, "loss": 0.3733, "step": 7021 }, { "epoch": 0.3222431278968381, "grad_norm": 0.44247156381607056, "learning_rate": 9.804999716259994e-06, "loss": 0.3733, "step": 7022 }, { "epoch": 0.3222890184020926, "grad_norm": 0.4735422432422638, "learning_rate": 9.804931904118787e-06, "loss": 0.3995, "step": 7023 }, { "epoch": 0.32233490890734706, "grad_norm": 0.4424068033695221, "learning_rate": 9.804864080423235e-06, "loss": 0.3455, "step": 7024 }, { "epoch": 0.32238079941260156, "grad_norm": 0.46388566493988037, "learning_rate": 9.804796245173502e-06, "loss": 0.4523, "step": 7025 }, { "epoch": 0.322426689917856, "grad_norm": 0.4692797064781189, "learning_rate": 9.80472839836975e-06, "loss": 0.4066, "step": 7026 }, { "epoch": 0.32247258042311044, "grad_norm": 0.4479910731315613, "learning_rate": 9.804660540012144e-06, "loss": 0.3168, "step": 7027 }, { "epoch": 0.32251847092836494, "grad_norm": 0.46002689003944397, "learning_rate": 9.804592670100844e-06, "loss": 0.3653, "step": 7028 }, { "epoch": 0.3225643614336194, "grad_norm": 0.491105854511261, "learning_rate": 9.804524788636015e-06, "loss": 0.4993, "step": 7029 }, { "epoch": 0.3226102519388738, "grad_norm": 0.45007118582725525, "learning_rate": 9.804456895617822e-06, "loss": 0.3991, "step": 7030 }, { "epoch": 0.3226561424441283, "grad_norm": 0.48447445034980774, "learning_rate": 9.804388991046425e-06, "loss": 0.4252, "step": 7031 }, { "epoch": 0.32270203294938277, "grad_norm": 0.4327069818973541, "learning_rate": 9.80432107492199e-06, "loss": 0.3564, "step": 7032 }, { "epoch": 0.3227479234546372, "grad_norm": 0.47352153062820435, "learning_rate": 9.804253147244677e-06, "loss": 0.5026, "step": 7033 }, { "epoch": 0.3227938139598917, "grad_norm": 0.474072128534317, "learning_rate": 9.804185208014653e-06, "loss": 0.4239, "step": 7034 }, { "epoch": 0.32283970446514615, "grad_norm": 0.484637588262558, "learning_rate": 9.80411725723208e-06, "loss": 0.395, "step": 7035 }, { "epoch": 0.32288559497040065, "grad_norm": 0.5270601511001587, "learning_rate": 9.804049294897121e-06, "loss": 0.4765, "step": 7036 }, { "epoch": 0.3229314854756551, "grad_norm": 0.5001643300056458, "learning_rate": 9.803981321009938e-06, "loss": 0.4406, "step": 7037 }, { "epoch": 0.32297737598090953, "grad_norm": 0.5076088309288025, "learning_rate": 9.8039133355707e-06, "loss": 0.4903, "step": 7038 }, { "epoch": 0.32302326648616403, "grad_norm": 0.43508297204971313, "learning_rate": 9.803845338579563e-06, "loss": 0.3405, "step": 7039 }, { "epoch": 0.32306915699141847, "grad_norm": 0.4845043122768402, "learning_rate": 9.803777330036696e-06, "loss": 0.4611, "step": 7040 }, { "epoch": 0.3231150474966729, "grad_norm": 0.4102739989757538, "learning_rate": 9.80370930994226e-06, "loss": 0.2978, "step": 7041 }, { "epoch": 0.3231609380019274, "grad_norm": 0.49328944087028503, "learning_rate": 9.803641278296418e-06, "loss": 0.4564, "step": 7042 }, { "epoch": 0.32320682850718185, "grad_norm": 0.4268563985824585, "learning_rate": 9.803573235099336e-06, "loss": 0.3137, "step": 7043 }, { "epoch": 0.32325271901243635, "grad_norm": 0.44580456614494324, "learning_rate": 9.803505180351176e-06, "loss": 0.3794, "step": 7044 }, { "epoch": 0.3232986095176908, "grad_norm": 0.5148495435714722, "learning_rate": 9.803437114052103e-06, "loss": 0.4334, "step": 7045 }, { "epoch": 0.32334450002294524, "grad_norm": 0.4483284652233124, "learning_rate": 9.80336903620228e-06, "loss": 0.3684, "step": 7046 }, { "epoch": 0.32339039052819973, "grad_norm": 0.49176308512687683, "learning_rate": 9.803300946801868e-06, "loss": 0.4482, "step": 7047 }, { "epoch": 0.3234362810334542, "grad_norm": 0.42327481508255005, "learning_rate": 9.803232845851037e-06, "loss": 0.3303, "step": 7048 }, { "epoch": 0.3234821715387086, "grad_norm": 0.4596683382987976, "learning_rate": 9.803164733349944e-06, "loss": 0.4131, "step": 7049 }, { "epoch": 0.3235280620439631, "grad_norm": 0.43727219104766846, "learning_rate": 9.803096609298756e-06, "loss": 0.3492, "step": 7050 }, { "epoch": 0.32357395254921756, "grad_norm": 0.46289801597595215, "learning_rate": 9.803028473697637e-06, "loss": 0.3989, "step": 7051 }, { "epoch": 0.32361984305447206, "grad_norm": 0.5220200419425964, "learning_rate": 9.80296032654675e-06, "loss": 0.5431, "step": 7052 }, { "epoch": 0.3236657335597265, "grad_norm": 0.4585173428058624, "learning_rate": 9.80289216784626e-06, "loss": 0.4008, "step": 7053 }, { "epoch": 0.32371162406498094, "grad_norm": 0.46890947222709656, "learning_rate": 9.802823997596328e-06, "loss": 0.4512, "step": 7054 }, { "epoch": 0.32375751457023544, "grad_norm": 0.4436309039592743, "learning_rate": 9.802755815797124e-06, "loss": 0.3756, "step": 7055 }, { "epoch": 0.3238034050754899, "grad_norm": 0.454496294260025, "learning_rate": 9.802687622448805e-06, "loss": 0.4038, "step": 7056 }, { "epoch": 0.3238492955807443, "grad_norm": 0.46063926815986633, "learning_rate": 9.802619417551539e-06, "loss": 0.4051, "step": 7057 }, { "epoch": 0.3238951860859988, "grad_norm": 0.43899863958358765, "learning_rate": 9.80255120110549e-06, "loss": 0.3332, "step": 7058 }, { "epoch": 0.32394107659125326, "grad_norm": 0.4674995541572571, "learning_rate": 9.80248297311082e-06, "loss": 0.4234, "step": 7059 }, { "epoch": 0.3239869670965077, "grad_norm": 0.4729859530925751, "learning_rate": 9.802414733567695e-06, "loss": 0.4144, "step": 7060 }, { "epoch": 0.3240328576017622, "grad_norm": 0.476251482963562, "learning_rate": 9.802346482476277e-06, "loss": 0.4748, "step": 7061 }, { "epoch": 0.32407874810701665, "grad_norm": 0.435566782951355, "learning_rate": 9.802278219836731e-06, "loss": 0.3687, "step": 7062 }, { "epoch": 0.32412463861227114, "grad_norm": 0.46455636620521545, "learning_rate": 9.802209945649224e-06, "loss": 0.4097, "step": 7063 }, { "epoch": 0.3241705291175256, "grad_norm": 0.4642910957336426, "learning_rate": 9.802141659913916e-06, "loss": 0.4161, "step": 7064 }, { "epoch": 0.32421641962278, "grad_norm": 0.478520929813385, "learning_rate": 9.802073362630973e-06, "loss": 0.4581, "step": 7065 }, { "epoch": 0.3242623101280345, "grad_norm": 0.5269163846969604, "learning_rate": 9.802005053800559e-06, "loss": 0.4577, "step": 7066 }, { "epoch": 0.32430820063328897, "grad_norm": 0.4635535180568695, "learning_rate": 9.801936733422839e-06, "loss": 0.401, "step": 7067 }, { "epoch": 0.3243540911385434, "grad_norm": 0.4734187126159668, "learning_rate": 9.801868401497975e-06, "loss": 0.4034, "step": 7068 }, { "epoch": 0.3243999816437979, "grad_norm": 0.45452624559402466, "learning_rate": 9.801800058026136e-06, "loss": 0.4311, "step": 7069 }, { "epoch": 0.32444587214905235, "grad_norm": 0.47766679525375366, "learning_rate": 9.801731703007481e-06, "loss": 0.4112, "step": 7070 }, { "epoch": 0.32449176265430685, "grad_norm": 0.48639923334121704, "learning_rate": 9.801663336442178e-06, "loss": 0.4079, "step": 7071 }, { "epoch": 0.3245376531595613, "grad_norm": 0.478443443775177, "learning_rate": 9.80159495833039e-06, "loss": 0.4015, "step": 7072 }, { "epoch": 0.32458354366481573, "grad_norm": 0.5137795805931091, "learning_rate": 9.80152656867228e-06, "loss": 0.4385, "step": 7073 }, { "epoch": 0.32462943417007023, "grad_norm": 0.5265718102455139, "learning_rate": 9.801458167468015e-06, "loss": 0.3669, "step": 7074 }, { "epoch": 0.3246753246753247, "grad_norm": 0.4520326852798462, "learning_rate": 9.801389754717758e-06, "loss": 0.366, "step": 7075 }, { "epoch": 0.3247212151805791, "grad_norm": 0.4636530578136444, "learning_rate": 9.801321330421676e-06, "loss": 0.3618, "step": 7076 }, { "epoch": 0.3247671056858336, "grad_norm": 0.46683430671691895, "learning_rate": 9.80125289457993e-06, "loss": 0.354, "step": 7077 }, { "epoch": 0.32481299619108805, "grad_norm": 0.5233639478683472, "learning_rate": 9.801184447192686e-06, "loss": 0.4779, "step": 7078 }, { "epoch": 0.32485888669634255, "grad_norm": 0.4272993803024292, "learning_rate": 9.801115988260109e-06, "loss": 0.3138, "step": 7079 }, { "epoch": 0.324904777201597, "grad_norm": 0.46075162291526794, "learning_rate": 9.801047517782362e-06, "loss": 0.3997, "step": 7080 }, { "epoch": 0.32495066770685144, "grad_norm": 0.4669494330883026, "learning_rate": 9.800979035759614e-06, "loss": 0.4474, "step": 7081 }, { "epoch": 0.32499655821210593, "grad_norm": 0.48696446418762207, "learning_rate": 9.800910542192024e-06, "loss": 0.4384, "step": 7082 }, { "epoch": 0.3250424487173604, "grad_norm": 0.4249604344367981, "learning_rate": 9.800842037079761e-06, "loss": 0.3286, "step": 7083 }, { "epoch": 0.3250883392226148, "grad_norm": 0.44789379835128784, "learning_rate": 9.800773520422988e-06, "loss": 0.381, "step": 7084 }, { "epoch": 0.3251342297278693, "grad_norm": 0.45104047656059265, "learning_rate": 9.800704992221867e-06, "loss": 0.3582, "step": 7085 }, { "epoch": 0.32518012023312376, "grad_norm": 0.4855601489543915, "learning_rate": 9.800636452476568e-06, "loss": 0.436, "step": 7086 }, { "epoch": 0.32522601073837826, "grad_norm": 0.48952972888946533, "learning_rate": 9.800567901187254e-06, "loss": 0.4434, "step": 7087 }, { "epoch": 0.3252719012436327, "grad_norm": 0.45446062088012695, "learning_rate": 9.80049933835409e-06, "loss": 0.4054, "step": 7088 }, { "epoch": 0.32531779174888714, "grad_norm": 0.42279475927352905, "learning_rate": 9.800430763977237e-06, "loss": 0.3123, "step": 7089 }, { "epoch": 0.32536368225414164, "grad_norm": 0.4780943691730499, "learning_rate": 9.800362178056866e-06, "loss": 0.4539, "step": 7090 }, { "epoch": 0.3254095727593961, "grad_norm": 0.4395153820514679, "learning_rate": 9.800293580593137e-06, "loss": 0.3645, "step": 7091 }, { "epoch": 0.3254554632646505, "grad_norm": 0.509714663028717, "learning_rate": 9.800224971586219e-06, "loss": 0.4953, "step": 7092 }, { "epoch": 0.325501353769905, "grad_norm": 0.5116782784461975, "learning_rate": 9.800156351036274e-06, "loss": 0.4945, "step": 7093 }, { "epoch": 0.32554724427515946, "grad_norm": 0.4845845401287079, "learning_rate": 9.800087718943467e-06, "loss": 0.4493, "step": 7094 }, { "epoch": 0.3255931347804139, "grad_norm": 0.4175945520401001, "learning_rate": 9.800019075307964e-06, "loss": 0.3195, "step": 7095 }, { "epoch": 0.3256390252856684, "grad_norm": 0.4356710910797119, "learning_rate": 9.799950420129932e-06, "loss": 0.3676, "step": 7096 }, { "epoch": 0.32568491579092285, "grad_norm": 0.4709814786911011, "learning_rate": 9.799881753409532e-06, "loss": 0.467, "step": 7097 }, { "epoch": 0.32573080629617734, "grad_norm": 0.4607873857021332, "learning_rate": 9.799813075146933e-06, "loss": 0.3508, "step": 7098 }, { "epoch": 0.3257766968014318, "grad_norm": 0.541644275188446, "learning_rate": 9.799744385342297e-06, "loss": 0.5019, "step": 7099 }, { "epoch": 0.32582258730668623, "grad_norm": 0.4374476373195648, "learning_rate": 9.79967568399579e-06, "loss": 0.3826, "step": 7100 }, { "epoch": 0.3258684778119407, "grad_norm": 0.422495037317276, "learning_rate": 9.79960697110758e-06, "loss": 0.3211, "step": 7101 }, { "epoch": 0.32591436831719517, "grad_norm": 0.43906939029693604, "learning_rate": 9.79953824667783e-06, "loss": 0.3528, "step": 7102 }, { "epoch": 0.3259602588224496, "grad_norm": 0.4160074293613434, "learning_rate": 9.799469510706703e-06, "loss": 0.3348, "step": 7103 }, { "epoch": 0.3260061493277041, "grad_norm": 0.47561705112457275, "learning_rate": 9.79940076319437e-06, "loss": 0.3957, "step": 7104 }, { "epoch": 0.32605203983295855, "grad_norm": 0.46520480513572693, "learning_rate": 9.799332004140989e-06, "loss": 0.3878, "step": 7105 }, { "epoch": 0.32609793033821305, "grad_norm": 0.5143229365348816, "learning_rate": 9.799263233546731e-06, "loss": 0.4957, "step": 7106 }, { "epoch": 0.3261438208434675, "grad_norm": 0.456076979637146, "learning_rate": 9.79919445141176e-06, "loss": 0.3233, "step": 7107 }, { "epoch": 0.32618971134872193, "grad_norm": 0.439850389957428, "learning_rate": 9.799125657736241e-06, "loss": 0.3266, "step": 7108 }, { "epoch": 0.32623560185397643, "grad_norm": 0.4379720985889435, "learning_rate": 9.799056852520339e-06, "loss": 0.3678, "step": 7109 }, { "epoch": 0.3262814923592309, "grad_norm": 0.45695942640304565, "learning_rate": 9.798988035764219e-06, "loss": 0.3551, "step": 7110 }, { "epoch": 0.3263273828644853, "grad_norm": 0.4401470124721527, "learning_rate": 9.798919207468048e-06, "loss": 0.3616, "step": 7111 }, { "epoch": 0.3263732733697398, "grad_norm": 0.4338792860507965, "learning_rate": 9.798850367631991e-06, "loss": 0.3577, "step": 7112 }, { "epoch": 0.32641916387499426, "grad_norm": 0.469707190990448, "learning_rate": 9.798781516256215e-06, "loss": 0.3933, "step": 7113 }, { "epoch": 0.32646505438024875, "grad_norm": 0.4474703371524811, "learning_rate": 9.798712653340882e-06, "loss": 0.3433, "step": 7114 }, { "epoch": 0.3265109448855032, "grad_norm": 0.45166924595832825, "learning_rate": 9.79864377888616e-06, "loss": 0.3616, "step": 7115 }, { "epoch": 0.32655683539075764, "grad_norm": 0.4328984022140503, "learning_rate": 9.798574892892214e-06, "loss": 0.3671, "step": 7116 }, { "epoch": 0.32660272589601214, "grad_norm": 0.4833146035671234, "learning_rate": 9.79850599535921e-06, "loss": 0.4224, "step": 7117 }, { "epoch": 0.3266486164012666, "grad_norm": 0.4277803897857666, "learning_rate": 9.798437086287316e-06, "loss": 0.3241, "step": 7118 }, { "epoch": 0.326694506906521, "grad_norm": 0.5145024657249451, "learning_rate": 9.798368165676693e-06, "loss": 0.4964, "step": 7119 }, { "epoch": 0.3267403974117755, "grad_norm": 0.42128852009773254, "learning_rate": 9.798299233527508e-06, "loss": 0.3438, "step": 7120 }, { "epoch": 0.32678628791702996, "grad_norm": 0.4736366271972656, "learning_rate": 9.79823028983993e-06, "loss": 0.4334, "step": 7121 }, { "epoch": 0.3268321784222844, "grad_norm": 0.42067641019821167, "learning_rate": 9.79816133461412e-06, "loss": 0.328, "step": 7122 }, { "epoch": 0.3268780689275389, "grad_norm": 0.45127958059310913, "learning_rate": 9.798092367850248e-06, "loss": 0.3417, "step": 7123 }, { "epoch": 0.32692395943279334, "grad_norm": 0.4792923033237457, "learning_rate": 9.798023389548478e-06, "loss": 0.4227, "step": 7124 }, { "epoch": 0.32696984993804784, "grad_norm": 0.5275638699531555, "learning_rate": 9.797954399708975e-06, "loss": 0.4821, "step": 7125 }, { "epoch": 0.3270157404433023, "grad_norm": 0.48499158024787903, "learning_rate": 9.797885398331908e-06, "loss": 0.43, "step": 7126 }, { "epoch": 0.3270616309485567, "grad_norm": 0.6616424322128296, "learning_rate": 9.797816385417438e-06, "loss": 0.3699, "step": 7127 }, { "epoch": 0.3271075214538112, "grad_norm": 0.47677940130233765, "learning_rate": 9.797747360965736e-06, "loss": 0.4036, "step": 7128 }, { "epoch": 0.32715341195906567, "grad_norm": 0.5152416229248047, "learning_rate": 9.797678324976966e-06, "loss": 0.4422, "step": 7129 }, { "epoch": 0.3271993024643201, "grad_norm": 0.4563952088356018, "learning_rate": 9.797609277451294e-06, "loss": 0.3685, "step": 7130 }, { "epoch": 0.3272451929695746, "grad_norm": 0.44995447993278503, "learning_rate": 9.797540218388884e-06, "loss": 0.374, "step": 7131 }, { "epoch": 0.32729108347482905, "grad_norm": 0.4183056354522705, "learning_rate": 9.797471147789905e-06, "loss": 0.3169, "step": 7132 }, { "epoch": 0.32733697398008355, "grad_norm": 0.45963019132614136, "learning_rate": 9.797402065654522e-06, "loss": 0.3875, "step": 7133 }, { "epoch": 0.327382864485338, "grad_norm": 0.48143985867500305, "learning_rate": 9.797332971982902e-06, "loss": 0.4199, "step": 7134 }, { "epoch": 0.32742875499059243, "grad_norm": 0.4704936146736145, "learning_rate": 9.797263866775209e-06, "loss": 0.4185, "step": 7135 }, { "epoch": 0.3274746454958469, "grad_norm": 0.4663526713848114, "learning_rate": 9.79719475003161e-06, "loss": 0.3888, "step": 7136 }, { "epoch": 0.32752053600110137, "grad_norm": 0.45433521270751953, "learning_rate": 9.797125621752273e-06, "loss": 0.386, "step": 7137 }, { "epoch": 0.3275664265063558, "grad_norm": 0.4518144130706787, "learning_rate": 9.797056481937362e-06, "loss": 0.3491, "step": 7138 }, { "epoch": 0.3276123170116103, "grad_norm": 0.5075174570083618, "learning_rate": 9.796987330587045e-06, "loss": 0.4592, "step": 7139 }, { "epoch": 0.32765820751686475, "grad_norm": 0.46694523096084595, "learning_rate": 9.796918167701487e-06, "loss": 0.4264, "step": 7140 }, { "epoch": 0.32770409802211925, "grad_norm": 0.4850994944572449, "learning_rate": 9.796848993280853e-06, "loss": 0.4002, "step": 7141 }, { "epoch": 0.3277499885273737, "grad_norm": 0.4872625172138214, "learning_rate": 9.796779807325313e-06, "loss": 0.3883, "step": 7142 }, { "epoch": 0.32779587903262813, "grad_norm": 0.5141732692718506, "learning_rate": 9.796710609835031e-06, "loss": 0.4552, "step": 7143 }, { "epoch": 0.32784176953788263, "grad_norm": 0.4589981138706207, "learning_rate": 9.796641400810176e-06, "loss": 0.3665, "step": 7144 }, { "epoch": 0.3278876600431371, "grad_norm": 0.5192262530326843, "learning_rate": 9.796572180250909e-06, "loss": 0.4587, "step": 7145 }, { "epoch": 0.3279335505483915, "grad_norm": 0.4339587390422821, "learning_rate": 9.796502948157403e-06, "loss": 0.3545, "step": 7146 }, { "epoch": 0.327979441053646, "grad_norm": 0.5236297249794006, "learning_rate": 9.796433704529818e-06, "loss": 0.4239, "step": 7147 }, { "epoch": 0.32802533155890046, "grad_norm": 0.44440963864326477, "learning_rate": 9.796364449368326e-06, "loss": 0.3881, "step": 7148 }, { "epoch": 0.3280712220641549, "grad_norm": 0.47269517183303833, "learning_rate": 9.79629518267309e-06, "loss": 0.419, "step": 7149 }, { "epoch": 0.3281171125694094, "grad_norm": 0.4474486708641052, "learning_rate": 9.79622590444428e-06, "loss": 0.3476, "step": 7150 }, { "epoch": 0.32816300307466384, "grad_norm": 0.4910537600517273, "learning_rate": 9.796156614682058e-06, "loss": 0.4651, "step": 7151 }, { "epoch": 0.32820889357991834, "grad_norm": 0.4842228591442108, "learning_rate": 9.796087313386594e-06, "loss": 0.4837, "step": 7152 }, { "epoch": 0.3282547840851728, "grad_norm": 0.42983606457710266, "learning_rate": 9.796018000558052e-06, "loss": 0.3841, "step": 7153 }, { "epoch": 0.3283006745904272, "grad_norm": 0.48621898889541626, "learning_rate": 9.795948676196602e-06, "loss": 0.5052, "step": 7154 }, { "epoch": 0.3283465650956817, "grad_norm": 0.5249859690666199, "learning_rate": 9.79587934030241e-06, "loss": 0.4923, "step": 7155 }, { "epoch": 0.32839245560093616, "grad_norm": 0.4727841019630432, "learning_rate": 9.79580999287564e-06, "loss": 0.3707, "step": 7156 }, { "epoch": 0.3284383461061906, "grad_norm": 0.46034321188926697, "learning_rate": 9.795740633916462e-06, "loss": 0.4326, "step": 7157 }, { "epoch": 0.3284842366114451, "grad_norm": 0.478100061416626, "learning_rate": 9.79567126342504e-06, "loss": 0.3785, "step": 7158 }, { "epoch": 0.32853012711669954, "grad_norm": 0.4651584327220917, "learning_rate": 9.795601881401544e-06, "loss": 0.3653, "step": 7159 }, { "epoch": 0.32857601762195404, "grad_norm": 0.4878479838371277, "learning_rate": 9.795532487846138e-06, "loss": 0.4823, "step": 7160 }, { "epoch": 0.3286219081272085, "grad_norm": 0.4737392067909241, "learning_rate": 9.79546308275899e-06, "loss": 0.4219, "step": 7161 }, { "epoch": 0.3286677986324629, "grad_norm": 0.41405847668647766, "learning_rate": 9.795393666140264e-06, "loss": 0.3403, "step": 7162 }, { "epoch": 0.3287136891377174, "grad_norm": 0.45254451036453247, "learning_rate": 9.795324237990134e-06, "loss": 0.3722, "step": 7163 }, { "epoch": 0.32875957964297187, "grad_norm": 0.5143303275108337, "learning_rate": 9.79525479830876e-06, "loss": 0.456, "step": 7164 }, { "epoch": 0.3288054701482263, "grad_norm": 0.5072575211524963, "learning_rate": 9.795185347096312e-06, "loss": 0.4478, "step": 7165 }, { "epoch": 0.3288513606534808, "grad_norm": 0.7139179110527039, "learning_rate": 9.795115884352958e-06, "loss": 0.4245, "step": 7166 }, { "epoch": 0.32889725115873525, "grad_norm": 0.4818011522293091, "learning_rate": 9.795046410078863e-06, "loss": 0.4641, "step": 7167 }, { "epoch": 0.32894314166398975, "grad_norm": 0.4635820686817169, "learning_rate": 9.794976924274195e-06, "loss": 0.3921, "step": 7168 }, { "epoch": 0.3289890321692442, "grad_norm": 0.4369891583919525, "learning_rate": 9.79490742693912e-06, "loss": 0.3806, "step": 7169 }, { "epoch": 0.32903492267449863, "grad_norm": 0.5386903285980225, "learning_rate": 9.794837918073807e-06, "loss": 0.5265, "step": 7170 }, { "epoch": 0.32908081317975313, "grad_norm": 0.4369814693927765, "learning_rate": 9.79476839767842e-06, "loss": 0.317, "step": 7171 }, { "epoch": 0.32912670368500757, "grad_norm": 0.5245389938354492, "learning_rate": 9.79469886575313e-06, "loss": 0.5484, "step": 7172 }, { "epoch": 0.329172594190262, "grad_norm": 0.480056494474411, "learning_rate": 9.794629322298102e-06, "loss": 0.4701, "step": 7173 }, { "epoch": 0.3292184846955165, "grad_norm": 0.4815145432949066, "learning_rate": 9.794559767313505e-06, "loss": 0.4118, "step": 7174 }, { "epoch": 0.32926437520077095, "grad_norm": 0.4611259400844574, "learning_rate": 9.794490200799504e-06, "loss": 0.3807, "step": 7175 }, { "epoch": 0.3293102657060254, "grad_norm": 0.45196035504341125, "learning_rate": 9.794420622756267e-06, "loss": 0.3288, "step": 7176 }, { "epoch": 0.3293561562112799, "grad_norm": 0.4814394414424896, "learning_rate": 9.794351033183962e-06, "loss": 0.4466, "step": 7177 }, { "epoch": 0.32940204671653434, "grad_norm": 0.4677477478981018, "learning_rate": 9.794281432082755e-06, "loss": 0.4389, "step": 7178 }, { "epoch": 0.32944793722178883, "grad_norm": 0.4827670156955719, "learning_rate": 9.794211819452814e-06, "loss": 0.4288, "step": 7179 }, { "epoch": 0.3294938277270433, "grad_norm": 0.489328533411026, "learning_rate": 9.794142195294308e-06, "loss": 0.4772, "step": 7180 }, { "epoch": 0.3295397182322977, "grad_norm": 0.4538560211658478, "learning_rate": 9.794072559607404e-06, "loss": 0.4263, "step": 7181 }, { "epoch": 0.3295856087375522, "grad_norm": 0.457256942987442, "learning_rate": 9.794002912392266e-06, "loss": 0.3747, "step": 7182 }, { "epoch": 0.32963149924280666, "grad_norm": 0.4503881633281708, "learning_rate": 9.793933253649066e-06, "loss": 0.4264, "step": 7183 }, { "epoch": 0.3296773897480611, "grad_norm": 0.5007647275924683, "learning_rate": 9.793863583377969e-06, "loss": 0.4302, "step": 7184 }, { "epoch": 0.3297232802533156, "grad_norm": 0.43484941124916077, "learning_rate": 9.793793901579143e-06, "loss": 0.3447, "step": 7185 }, { "epoch": 0.32976917075857004, "grad_norm": 0.4231371283531189, "learning_rate": 9.793724208252755e-06, "loss": 0.3184, "step": 7186 }, { "epoch": 0.32981506126382454, "grad_norm": 0.4640555679798126, "learning_rate": 9.793654503398973e-06, "loss": 0.433, "step": 7187 }, { "epoch": 0.329860951769079, "grad_norm": 0.47930943965911865, "learning_rate": 9.793584787017966e-06, "loss": 0.4666, "step": 7188 }, { "epoch": 0.3299068422743334, "grad_norm": 0.4279690384864807, "learning_rate": 9.793515059109899e-06, "loss": 0.2937, "step": 7189 }, { "epoch": 0.3299527327795879, "grad_norm": 0.5087365508079529, "learning_rate": 9.793445319674944e-06, "loss": 0.4128, "step": 7190 }, { "epoch": 0.32999862328484236, "grad_norm": 0.4626550078392029, "learning_rate": 9.793375568713264e-06, "loss": 0.3579, "step": 7191 }, { "epoch": 0.3300445137900968, "grad_norm": 0.4642115533351898, "learning_rate": 9.793305806225028e-06, "loss": 0.4468, "step": 7192 }, { "epoch": 0.3300904042953513, "grad_norm": 0.46554937958717346, "learning_rate": 9.793236032210405e-06, "loss": 0.3858, "step": 7193 }, { "epoch": 0.33013629480060575, "grad_norm": 0.48201119899749756, "learning_rate": 9.79316624666956e-06, "loss": 0.3998, "step": 7194 }, { "epoch": 0.33018218530586024, "grad_norm": 0.49105948209762573, "learning_rate": 9.793096449602666e-06, "loss": 0.4233, "step": 7195 }, { "epoch": 0.3302280758111147, "grad_norm": 0.44804123044013977, "learning_rate": 9.793026641009888e-06, "loss": 0.3812, "step": 7196 }, { "epoch": 0.3302739663163691, "grad_norm": 0.47043225169181824, "learning_rate": 9.792956820891392e-06, "loss": 0.3847, "step": 7197 }, { "epoch": 0.3303198568216236, "grad_norm": 0.47808828949928284, "learning_rate": 9.792886989247347e-06, "loss": 0.4185, "step": 7198 }, { "epoch": 0.33036574732687807, "grad_norm": 0.5053940415382385, "learning_rate": 9.792817146077923e-06, "loss": 0.3971, "step": 7199 }, { "epoch": 0.3304116378321325, "grad_norm": 0.49025535583496094, "learning_rate": 9.792747291383287e-06, "loss": 0.4273, "step": 7200 }, { "epoch": 0.330457528337387, "grad_norm": 0.46630677580833435, "learning_rate": 9.792677425163604e-06, "loss": 0.4232, "step": 7201 }, { "epoch": 0.33050341884264145, "grad_norm": 0.4766657054424286, "learning_rate": 9.792607547419046e-06, "loss": 0.4236, "step": 7202 }, { "epoch": 0.33054930934789595, "grad_norm": 0.42645126581192017, "learning_rate": 9.792537658149778e-06, "loss": 0.3416, "step": 7203 }, { "epoch": 0.3305951998531504, "grad_norm": 0.47774538397789, "learning_rate": 9.792467757355973e-06, "loss": 0.3896, "step": 7204 }, { "epoch": 0.33064109035840483, "grad_norm": 0.46343567967414856, "learning_rate": 9.792397845037793e-06, "loss": 0.4005, "step": 7205 }, { "epoch": 0.33068698086365933, "grad_norm": 0.4334917962551117, "learning_rate": 9.792327921195408e-06, "loss": 0.3492, "step": 7206 }, { "epoch": 0.3307328713689138, "grad_norm": 0.4725760817527771, "learning_rate": 9.792257985828988e-06, "loss": 0.4258, "step": 7207 }, { "epoch": 0.3307787618741682, "grad_norm": 0.49206680059432983, "learning_rate": 9.7921880389387e-06, "loss": 0.4647, "step": 7208 }, { "epoch": 0.3308246523794227, "grad_norm": 0.5222856402397156, "learning_rate": 9.792118080524713e-06, "loss": 0.4053, "step": 7209 }, { "epoch": 0.33087054288467715, "grad_norm": 0.473874568939209, "learning_rate": 9.792048110587194e-06, "loss": 0.3893, "step": 7210 }, { "epoch": 0.3309164333899316, "grad_norm": 0.495440274477005, "learning_rate": 9.791978129126309e-06, "loss": 0.4391, "step": 7211 }, { "epoch": 0.3309623238951861, "grad_norm": 0.4714395999908447, "learning_rate": 9.791908136142232e-06, "loss": 0.3767, "step": 7212 }, { "epoch": 0.33100821440044054, "grad_norm": 0.46735507249832153, "learning_rate": 9.791838131635128e-06, "loss": 0.392, "step": 7213 }, { "epoch": 0.33105410490569503, "grad_norm": 0.4522331655025482, "learning_rate": 9.791768115605165e-06, "loss": 0.3825, "step": 7214 }, { "epoch": 0.3310999954109495, "grad_norm": 0.4642760157585144, "learning_rate": 9.791698088052513e-06, "loss": 0.3896, "step": 7215 }, { "epoch": 0.3311458859162039, "grad_norm": 0.45967569947242737, "learning_rate": 9.791628048977337e-06, "loss": 0.3795, "step": 7216 }, { "epoch": 0.3311917764214584, "grad_norm": 0.46094194054603577, "learning_rate": 9.79155799837981e-06, "loss": 0.38, "step": 7217 }, { "epoch": 0.33123766692671286, "grad_norm": 0.4372478425502777, "learning_rate": 9.791487936260098e-06, "loss": 0.3637, "step": 7218 }, { "epoch": 0.3312835574319673, "grad_norm": 0.5078421831130981, "learning_rate": 9.79141786261837e-06, "loss": 0.4898, "step": 7219 }, { "epoch": 0.3313294479372218, "grad_norm": 0.42087045311927795, "learning_rate": 9.791347777454792e-06, "loss": 0.3459, "step": 7220 }, { "epoch": 0.33137533844247624, "grad_norm": 0.4824684262275696, "learning_rate": 9.791277680769537e-06, "loss": 0.4553, "step": 7221 }, { "epoch": 0.33142122894773074, "grad_norm": 0.45815813541412354, "learning_rate": 9.79120757256277e-06, "loss": 0.4095, "step": 7222 }, { "epoch": 0.3314671194529852, "grad_norm": 0.6095938682556152, "learning_rate": 9.791137452834662e-06, "loss": 0.4104, "step": 7223 }, { "epoch": 0.3315130099582396, "grad_norm": 0.5119222402572632, "learning_rate": 9.79106732158538e-06, "loss": 0.5002, "step": 7224 }, { "epoch": 0.3315589004634941, "grad_norm": 0.494268000125885, "learning_rate": 9.790997178815094e-06, "loss": 0.4571, "step": 7225 }, { "epoch": 0.33160479096874856, "grad_norm": 0.5009903311729431, "learning_rate": 9.790927024523972e-06, "loss": 0.4527, "step": 7226 }, { "epoch": 0.331650681474003, "grad_norm": 0.4653403162956238, "learning_rate": 9.79085685871218e-06, "loss": 0.4065, "step": 7227 }, { "epoch": 0.3316965719792575, "grad_norm": 0.4519183337688446, "learning_rate": 9.790786681379894e-06, "loss": 0.3558, "step": 7228 }, { "epoch": 0.33174246248451195, "grad_norm": 0.49175065755844116, "learning_rate": 9.790716492527275e-06, "loss": 0.4353, "step": 7229 }, { "epoch": 0.33178835298976644, "grad_norm": 0.4947044253349304, "learning_rate": 9.790646292154494e-06, "loss": 0.4518, "step": 7230 }, { "epoch": 0.3318342434950209, "grad_norm": 0.44338518381118774, "learning_rate": 9.790576080261722e-06, "loss": 0.3291, "step": 7231 }, { "epoch": 0.33188013400027533, "grad_norm": 0.44278407096862793, "learning_rate": 9.790505856849127e-06, "loss": 0.355, "step": 7232 }, { "epoch": 0.3319260245055298, "grad_norm": 0.5269693732261658, "learning_rate": 9.790435621916877e-06, "loss": 0.3901, "step": 7233 }, { "epoch": 0.33197191501078427, "grad_norm": 0.43144699931144714, "learning_rate": 9.79036537546514e-06, "loss": 0.3214, "step": 7234 }, { "epoch": 0.3320178055160387, "grad_norm": 0.46735692024230957, "learning_rate": 9.790295117494086e-06, "loss": 0.4135, "step": 7235 }, { "epoch": 0.3320636960212932, "grad_norm": 0.4459136426448822, "learning_rate": 9.790224848003885e-06, "loss": 0.3713, "step": 7236 }, { "epoch": 0.33210958652654765, "grad_norm": 0.511888325214386, "learning_rate": 9.790154566994706e-06, "loss": 0.4826, "step": 7237 }, { "epoch": 0.3321554770318021, "grad_norm": 0.519389271736145, "learning_rate": 9.790084274466716e-06, "loss": 0.4835, "step": 7238 }, { "epoch": 0.3322013675370566, "grad_norm": 0.49302688241004944, "learning_rate": 9.790013970420084e-06, "loss": 0.4773, "step": 7239 }, { "epoch": 0.33224725804231103, "grad_norm": 0.5046329498291016, "learning_rate": 9.789943654854983e-06, "loss": 0.4823, "step": 7240 }, { "epoch": 0.33229314854756553, "grad_norm": 0.46639305353164673, "learning_rate": 9.789873327771578e-06, "loss": 0.4214, "step": 7241 }, { "epoch": 0.33233903905282, "grad_norm": 0.5145673751831055, "learning_rate": 9.789802989170039e-06, "loss": 0.4655, "step": 7242 }, { "epoch": 0.3323849295580744, "grad_norm": 0.4844233989715576, "learning_rate": 9.789732639050536e-06, "loss": 0.4006, "step": 7243 }, { "epoch": 0.3324308200633289, "grad_norm": 0.5160293579101562, "learning_rate": 9.789662277413236e-06, "loss": 0.5178, "step": 7244 }, { "epoch": 0.33247671056858336, "grad_norm": 0.46510404348373413, "learning_rate": 9.789591904258311e-06, "loss": 0.4424, "step": 7245 }, { "epoch": 0.3325226010738378, "grad_norm": 0.490192711353302, "learning_rate": 9.78952151958593e-06, "loss": 0.4831, "step": 7246 }, { "epoch": 0.3325684915790923, "grad_norm": 0.46661701798439026, "learning_rate": 9.78945112339626e-06, "loss": 0.3739, "step": 7247 }, { "epoch": 0.33261438208434674, "grad_norm": 0.4853288531303406, "learning_rate": 9.789380715689472e-06, "loss": 0.4102, "step": 7248 }, { "epoch": 0.33266027258960124, "grad_norm": 0.49862831830978394, "learning_rate": 9.789310296465737e-06, "loss": 0.4497, "step": 7249 }, { "epoch": 0.3327061630948557, "grad_norm": 1.1059547662734985, "learning_rate": 9.789239865725219e-06, "loss": 0.4951, "step": 7250 }, { "epoch": 0.3327520536001101, "grad_norm": 0.48300477862358093, "learning_rate": 9.789169423468092e-06, "loss": 0.491, "step": 7251 }, { "epoch": 0.3327979441053646, "grad_norm": 0.4315713942050934, "learning_rate": 9.789098969694525e-06, "loss": 0.3301, "step": 7252 }, { "epoch": 0.33284383461061906, "grad_norm": 0.48701056838035583, "learning_rate": 9.789028504404685e-06, "loss": 0.3717, "step": 7253 }, { "epoch": 0.3328897251158735, "grad_norm": 0.4320523738861084, "learning_rate": 9.788958027598745e-06, "loss": 0.3022, "step": 7254 }, { "epoch": 0.332935615621128, "grad_norm": 0.4756210744380951, "learning_rate": 9.78888753927687e-06, "loss": 0.3696, "step": 7255 }, { "epoch": 0.33298150612638244, "grad_norm": 0.49892500042915344, "learning_rate": 9.788817039439233e-06, "loss": 0.4624, "step": 7256 }, { "epoch": 0.33302739663163694, "grad_norm": 0.5936259627342224, "learning_rate": 9.788746528086002e-06, "loss": 0.481, "step": 7257 }, { "epoch": 0.3330732871368914, "grad_norm": 0.48534366488456726, "learning_rate": 9.788676005217348e-06, "loss": 0.4262, "step": 7258 }, { "epoch": 0.3331191776421458, "grad_norm": 0.4955044388771057, "learning_rate": 9.788605470833438e-06, "loss": 0.398, "step": 7259 }, { "epoch": 0.3331650681474003, "grad_norm": 0.503099262714386, "learning_rate": 9.788534924934445e-06, "loss": 0.4667, "step": 7260 }, { "epoch": 0.33321095865265477, "grad_norm": 0.5332765579223633, "learning_rate": 9.788464367520536e-06, "loss": 0.4635, "step": 7261 }, { "epoch": 0.3332568491579092, "grad_norm": 0.4678882956504822, "learning_rate": 9.78839379859188e-06, "loss": 0.361, "step": 7262 }, { "epoch": 0.3333027396631637, "grad_norm": 0.49001142382621765, "learning_rate": 9.78832321814865e-06, "loss": 0.4849, "step": 7263 }, { "epoch": 0.33334863016841815, "grad_norm": 0.4585002064704895, "learning_rate": 9.788252626191013e-06, "loss": 0.4336, "step": 7264 }, { "epoch": 0.3333945206736726, "grad_norm": 0.430682897567749, "learning_rate": 9.788182022719141e-06, "loss": 0.3388, "step": 7265 }, { "epoch": 0.3334404111789271, "grad_norm": 0.5248414278030396, "learning_rate": 9.788111407733201e-06, "loss": 0.4567, "step": 7266 }, { "epoch": 0.33348630168418153, "grad_norm": 0.4665341377258301, "learning_rate": 9.788040781233364e-06, "loss": 0.407, "step": 7267 }, { "epoch": 0.333532192189436, "grad_norm": 0.4517952501773834, "learning_rate": 9.787970143219802e-06, "loss": 0.3562, "step": 7268 }, { "epoch": 0.33357808269469047, "grad_norm": 0.4285184442996979, "learning_rate": 9.787899493692681e-06, "loss": 0.3468, "step": 7269 }, { "epoch": 0.3336239731999449, "grad_norm": 0.4653485417366028, "learning_rate": 9.787828832652172e-06, "loss": 0.395, "step": 7270 }, { "epoch": 0.3336698637051994, "grad_norm": 0.49116840958595276, "learning_rate": 9.787758160098448e-06, "loss": 0.4869, "step": 7271 }, { "epoch": 0.33371575421045385, "grad_norm": 0.44094935059547424, "learning_rate": 9.787687476031678e-06, "loss": 0.37, "step": 7272 }, { "epoch": 0.3337616447157083, "grad_norm": 0.4531886875629425, "learning_rate": 9.787616780452027e-06, "loss": 0.3624, "step": 7273 }, { "epoch": 0.3338075352209628, "grad_norm": 0.49113717675209045, "learning_rate": 9.78754607335967e-06, "loss": 0.4738, "step": 7274 }, { "epoch": 0.33385342572621723, "grad_norm": 0.450723797082901, "learning_rate": 9.787475354754777e-06, "loss": 0.4091, "step": 7275 }, { "epoch": 0.33389931623147173, "grad_norm": 0.5710439682006836, "learning_rate": 9.787404624637515e-06, "loss": 0.4463, "step": 7276 }, { "epoch": 0.3339452067367262, "grad_norm": 0.4603712856769562, "learning_rate": 9.787333883008057e-06, "loss": 0.3562, "step": 7277 }, { "epoch": 0.3339910972419806, "grad_norm": 0.4705801010131836, "learning_rate": 9.78726312986657e-06, "loss": 0.3933, "step": 7278 }, { "epoch": 0.3340369877472351, "grad_norm": 0.5127866864204407, "learning_rate": 9.787192365213229e-06, "loss": 0.4299, "step": 7279 }, { "epoch": 0.33408287825248956, "grad_norm": 0.47350046038627625, "learning_rate": 9.7871215890482e-06, "loss": 0.401, "step": 7280 }, { "epoch": 0.334128768757744, "grad_norm": 0.47064611315727234, "learning_rate": 9.787050801371654e-06, "loss": 0.4301, "step": 7281 }, { "epoch": 0.3341746592629985, "grad_norm": 0.514356255531311, "learning_rate": 9.786980002183761e-06, "loss": 0.5029, "step": 7282 }, { "epoch": 0.33422054976825294, "grad_norm": 0.5017508268356323, "learning_rate": 9.786909191484692e-06, "loss": 0.4608, "step": 7283 }, { "epoch": 0.33426644027350744, "grad_norm": 0.48175665736198425, "learning_rate": 9.78683836927462e-06, "loss": 0.4587, "step": 7284 }, { "epoch": 0.3343123307787619, "grad_norm": 0.4773286283016205, "learning_rate": 9.78676753555371e-06, "loss": 0.4436, "step": 7285 }, { "epoch": 0.3343582212840163, "grad_norm": 0.4366253614425659, "learning_rate": 9.786696690322135e-06, "loss": 0.3282, "step": 7286 }, { "epoch": 0.3344041117892708, "grad_norm": 0.47915419936180115, "learning_rate": 9.786625833580066e-06, "loss": 0.4337, "step": 7287 }, { "epoch": 0.33445000229452526, "grad_norm": 0.47704559564590454, "learning_rate": 9.786554965327671e-06, "loss": 0.4453, "step": 7288 }, { "epoch": 0.3344958927997797, "grad_norm": 0.43671613931655884, "learning_rate": 9.786484085565123e-06, "loss": 0.3287, "step": 7289 }, { "epoch": 0.3345417833050342, "grad_norm": 0.4726767838001251, "learning_rate": 9.786413194292593e-06, "loss": 0.4294, "step": 7290 }, { "epoch": 0.33458767381028864, "grad_norm": 0.5310972929000854, "learning_rate": 9.786342291510248e-06, "loss": 0.5696, "step": 7291 }, { "epoch": 0.33463356431554314, "grad_norm": 0.5059199333190918, "learning_rate": 9.78627137721826e-06, "loss": 0.4087, "step": 7292 }, { "epoch": 0.3346794548207976, "grad_norm": 0.4523943066596985, "learning_rate": 9.7862004514168e-06, "loss": 0.4209, "step": 7293 }, { "epoch": 0.334725345326052, "grad_norm": 0.46609583497047424, "learning_rate": 9.78612951410604e-06, "loss": 0.4482, "step": 7294 }, { "epoch": 0.3347712358313065, "grad_norm": 0.46719783544540405, "learning_rate": 9.786058565286148e-06, "loss": 0.3592, "step": 7295 }, { "epoch": 0.33481712633656097, "grad_norm": 0.4765491783618927, "learning_rate": 9.785987604957296e-06, "loss": 0.372, "step": 7296 }, { "epoch": 0.3348630168418154, "grad_norm": 0.5152177214622498, "learning_rate": 9.785916633119655e-06, "loss": 0.4995, "step": 7297 }, { "epoch": 0.3349089073470699, "grad_norm": 0.4777143895626068, "learning_rate": 9.785845649773394e-06, "loss": 0.4153, "step": 7298 }, { "epoch": 0.33495479785232435, "grad_norm": 0.4916500151157379, "learning_rate": 9.785774654918684e-06, "loss": 0.5133, "step": 7299 }, { "epoch": 0.3350006883575788, "grad_norm": 0.44841763377189636, "learning_rate": 9.785703648555698e-06, "loss": 0.3407, "step": 7300 }, { "epoch": 0.3350465788628333, "grad_norm": 0.46499189734458923, "learning_rate": 9.785632630684604e-06, "loss": 0.3804, "step": 7301 }, { "epoch": 0.33509246936808773, "grad_norm": 0.48407605290412903, "learning_rate": 9.785561601305574e-06, "loss": 0.4472, "step": 7302 }, { "epoch": 0.33513835987334223, "grad_norm": 0.5582600235939026, "learning_rate": 9.78549056041878e-06, "loss": 0.5371, "step": 7303 }, { "epoch": 0.33518425037859667, "grad_norm": 0.46563172340393066, "learning_rate": 9.78541950802439e-06, "loss": 0.3987, "step": 7304 }, { "epoch": 0.3352301408838511, "grad_norm": 0.4615235924720764, "learning_rate": 9.785348444122577e-06, "loss": 0.4229, "step": 7305 }, { "epoch": 0.3352760313891056, "grad_norm": 0.4656515419483185, "learning_rate": 9.785277368713511e-06, "loss": 0.4111, "step": 7306 }, { "epoch": 0.33532192189436005, "grad_norm": 0.49970924854278564, "learning_rate": 9.785206281797364e-06, "loss": 0.43, "step": 7307 }, { "epoch": 0.3353678123996145, "grad_norm": 0.4648034870624542, "learning_rate": 9.785135183374305e-06, "loss": 0.4315, "step": 7308 }, { "epoch": 0.335413702904869, "grad_norm": 0.4694409966468811, "learning_rate": 9.785064073444506e-06, "loss": 0.4015, "step": 7309 }, { "epoch": 0.33545959341012344, "grad_norm": 0.4168854355812073, "learning_rate": 9.78499295200814e-06, "loss": 0.3232, "step": 7310 }, { "epoch": 0.33550548391537793, "grad_norm": 0.4731890857219696, "learning_rate": 9.784921819065374e-06, "loss": 0.3618, "step": 7311 }, { "epoch": 0.3355513744206324, "grad_norm": 0.47363045811653137, "learning_rate": 9.784850674616382e-06, "loss": 0.4217, "step": 7312 }, { "epoch": 0.3355972649258868, "grad_norm": 0.5044989585876465, "learning_rate": 9.784779518661334e-06, "loss": 0.4715, "step": 7313 }, { "epoch": 0.3356431554311413, "grad_norm": 0.44733887910842896, "learning_rate": 9.784708351200402e-06, "loss": 0.3689, "step": 7314 }, { "epoch": 0.33568904593639576, "grad_norm": 0.45569586753845215, "learning_rate": 9.784637172233757e-06, "loss": 0.4305, "step": 7315 }, { "epoch": 0.3357349364416502, "grad_norm": 0.470212459564209, "learning_rate": 9.78456598176157e-06, "loss": 0.4393, "step": 7316 }, { "epoch": 0.3357808269469047, "grad_norm": 0.45726487040519714, "learning_rate": 9.784494779784011e-06, "loss": 0.4165, "step": 7317 }, { "epoch": 0.33582671745215914, "grad_norm": 0.45731082558631897, "learning_rate": 9.784423566301252e-06, "loss": 0.4185, "step": 7318 }, { "epoch": 0.33587260795741364, "grad_norm": 0.46821147203445435, "learning_rate": 9.784352341313464e-06, "loss": 0.4396, "step": 7319 }, { "epoch": 0.3359184984626681, "grad_norm": 0.4776060879230499, "learning_rate": 9.784281104820819e-06, "loss": 0.4332, "step": 7320 }, { "epoch": 0.3359643889679225, "grad_norm": 0.43097859621047974, "learning_rate": 9.784209856823486e-06, "loss": 0.3142, "step": 7321 }, { "epoch": 0.336010279473177, "grad_norm": 0.4558699131011963, "learning_rate": 9.784138597321641e-06, "loss": 0.3547, "step": 7322 }, { "epoch": 0.33605616997843146, "grad_norm": 0.4798987805843353, "learning_rate": 9.784067326315451e-06, "loss": 0.4673, "step": 7323 }, { "epoch": 0.3361020604836859, "grad_norm": 0.45970186591148376, "learning_rate": 9.78399604380509e-06, "loss": 0.3984, "step": 7324 }, { "epoch": 0.3361479509889404, "grad_norm": 0.4498862326145172, "learning_rate": 9.783924749790728e-06, "loss": 0.3628, "step": 7325 }, { "epoch": 0.33619384149419484, "grad_norm": 0.4933110475540161, "learning_rate": 9.783853444272538e-06, "loss": 0.4336, "step": 7326 }, { "epoch": 0.3362397319994493, "grad_norm": 0.4886942505836487, "learning_rate": 9.783782127250686e-06, "loss": 0.502, "step": 7327 }, { "epoch": 0.3362856225047038, "grad_norm": 0.4729424715042114, "learning_rate": 9.783710798725352e-06, "loss": 0.4605, "step": 7328 }, { "epoch": 0.3363315130099582, "grad_norm": 0.5042920112609863, "learning_rate": 9.783639458696704e-06, "loss": 0.4152, "step": 7329 }, { "epoch": 0.3363774035152127, "grad_norm": 0.4635804295539856, "learning_rate": 9.78356810716491e-06, "loss": 0.381, "step": 7330 }, { "epoch": 0.33642329402046717, "grad_norm": 0.5181156992912292, "learning_rate": 9.783496744130145e-06, "loss": 0.5257, "step": 7331 }, { "epoch": 0.3364691845257216, "grad_norm": 0.4616307318210602, "learning_rate": 9.783425369592581e-06, "loss": 0.4525, "step": 7332 }, { "epoch": 0.3365150750309761, "grad_norm": 0.4939948320388794, "learning_rate": 9.783353983552388e-06, "loss": 0.4589, "step": 7333 }, { "epoch": 0.33656096553623055, "grad_norm": 0.4589994251728058, "learning_rate": 9.783282586009738e-06, "loss": 0.4259, "step": 7334 }, { "epoch": 0.336606856041485, "grad_norm": 0.45634517073631287, "learning_rate": 9.783211176964805e-06, "loss": 0.3807, "step": 7335 }, { "epoch": 0.3366527465467395, "grad_norm": 0.46722501516342163, "learning_rate": 9.783139756417757e-06, "loss": 0.4704, "step": 7336 }, { "epoch": 0.33669863705199393, "grad_norm": 0.4854772984981537, "learning_rate": 9.783068324368767e-06, "loss": 0.4293, "step": 7337 }, { "epoch": 0.33674452755724843, "grad_norm": 0.45999211072921753, "learning_rate": 9.782996880818008e-06, "loss": 0.3623, "step": 7338 }, { "epoch": 0.33679041806250287, "grad_norm": 0.4588179588317871, "learning_rate": 9.782925425765652e-06, "loss": 0.3652, "step": 7339 }, { "epoch": 0.3368363085677573, "grad_norm": 0.46099618077278137, "learning_rate": 9.782853959211869e-06, "loss": 0.4297, "step": 7340 }, { "epoch": 0.3368821990730118, "grad_norm": 0.4382844567298889, "learning_rate": 9.782782481156832e-06, "loss": 0.3752, "step": 7341 }, { "epoch": 0.33692808957826625, "grad_norm": 0.4364856481552124, "learning_rate": 9.782710991600712e-06, "loss": 0.3582, "step": 7342 }, { "epoch": 0.3369739800835207, "grad_norm": 0.5402190089225769, "learning_rate": 9.782639490543682e-06, "loss": 0.5645, "step": 7343 }, { "epoch": 0.3370198705887752, "grad_norm": 0.5003296732902527, "learning_rate": 9.782567977985913e-06, "loss": 0.4358, "step": 7344 }, { "epoch": 0.33706576109402964, "grad_norm": 0.46167850494384766, "learning_rate": 9.782496453927578e-06, "loss": 0.3983, "step": 7345 }, { "epoch": 0.33711165159928413, "grad_norm": 0.4567517042160034, "learning_rate": 9.78242491836885e-06, "loss": 0.3937, "step": 7346 }, { "epoch": 0.3371575421045386, "grad_norm": 0.4671885371208191, "learning_rate": 9.782353371309896e-06, "loss": 0.391, "step": 7347 }, { "epoch": 0.337203432609793, "grad_norm": 0.4809706509113312, "learning_rate": 9.782281812750894e-06, "loss": 0.4286, "step": 7348 }, { "epoch": 0.3372493231150475, "grad_norm": 0.4873986840248108, "learning_rate": 9.782210242692012e-06, "loss": 0.4265, "step": 7349 }, { "epoch": 0.33729521362030196, "grad_norm": 0.46643853187561035, "learning_rate": 9.782138661133425e-06, "loss": 0.3784, "step": 7350 }, { "epoch": 0.3373411041255564, "grad_norm": 0.4696289598941803, "learning_rate": 9.782067068075303e-06, "loss": 0.3923, "step": 7351 }, { "epoch": 0.3373869946308109, "grad_norm": 0.4900300204753876, "learning_rate": 9.78199546351782e-06, "loss": 0.408, "step": 7352 }, { "epoch": 0.33743288513606534, "grad_norm": 0.4681951403617859, "learning_rate": 9.781923847461146e-06, "loss": 0.3547, "step": 7353 }, { "epoch": 0.3374787756413198, "grad_norm": 0.5273519158363342, "learning_rate": 9.781852219905454e-06, "loss": 0.5323, "step": 7354 }, { "epoch": 0.3375246661465743, "grad_norm": 0.4322478771209717, "learning_rate": 9.781780580850918e-06, "loss": 0.3641, "step": 7355 }, { "epoch": 0.3375705566518287, "grad_norm": 0.482887327671051, "learning_rate": 9.781708930297707e-06, "loss": 0.4685, "step": 7356 }, { "epoch": 0.3376164471570832, "grad_norm": 0.4598298966884613, "learning_rate": 9.781637268245998e-06, "loss": 0.435, "step": 7357 }, { "epoch": 0.33766233766233766, "grad_norm": 0.46907344460487366, "learning_rate": 9.781565594695958e-06, "loss": 0.4061, "step": 7358 }, { "epoch": 0.3377082281675921, "grad_norm": 0.4659236669540405, "learning_rate": 9.781493909647763e-06, "loss": 0.3572, "step": 7359 }, { "epoch": 0.3377541186728466, "grad_norm": 0.4964559078216553, "learning_rate": 9.781422213101583e-06, "loss": 0.4637, "step": 7360 }, { "epoch": 0.33780000917810105, "grad_norm": 0.4614852964878082, "learning_rate": 9.781350505057594e-06, "loss": 0.4129, "step": 7361 }, { "epoch": 0.3378458996833555, "grad_norm": 0.4575325548648834, "learning_rate": 9.781278785515964e-06, "loss": 0.3943, "step": 7362 }, { "epoch": 0.33789179018861, "grad_norm": 0.4790130853652954, "learning_rate": 9.781207054476868e-06, "loss": 0.3808, "step": 7363 }, { "epoch": 0.33793768069386443, "grad_norm": 0.47188782691955566, "learning_rate": 9.781135311940479e-06, "loss": 0.4109, "step": 7364 }, { "epoch": 0.3379835711991189, "grad_norm": 0.4460643529891968, "learning_rate": 9.781063557906967e-06, "loss": 0.384, "step": 7365 }, { "epoch": 0.33802946170437337, "grad_norm": 0.4549757242202759, "learning_rate": 9.780991792376507e-06, "loss": 0.3717, "step": 7366 }, { "epoch": 0.3380753522096278, "grad_norm": 0.4437856674194336, "learning_rate": 9.780920015349272e-06, "loss": 0.28, "step": 7367 }, { "epoch": 0.3381212427148823, "grad_norm": 0.48330235481262207, "learning_rate": 9.780848226825431e-06, "loss": 0.4135, "step": 7368 }, { "epoch": 0.33816713322013675, "grad_norm": 0.46891406178474426, "learning_rate": 9.78077642680516e-06, "loss": 0.3988, "step": 7369 }, { "epoch": 0.3382130237253912, "grad_norm": 0.5104764699935913, "learning_rate": 9.78070461528863e-06, "loss": 0.4929, "step": 7370 }, { "epoch": 0.3382589142306457, "grad_norm": 0.47054728865623474, "learning_rate": 9.780632792276013e-06, "loss": 0.4311, "step": 7371 }, { "epoch": 0.33830480473590013, "grad_norm": 0.463750958442688, "learning_rate": 9.780560957767485e-06, "loss": 0.3682, "step": 7372 }, { "epoch": 0.33835069524115463, "grad_norm": 0.48049643635749817, "learning_rate": 9.780489111763216e-06, "loss": 0.4498, "step": 7373 }, { "epoch": 0.3383965857464091, "grad_norm": 0.46395596861839294, "learning_rate": 9.780417254263379e-06, "loss": 0.3965, "step": 7374 }, { "epoch": 0.3384424762516635, "grad_norm": 0.48248291015625, "learning_rate": 9.780345385268149e-06, "loss": 0.3944, "step": 7375 }, { "epoch": 0.338488366756918, "grad_norm": 0.4716425836086273, "learning_rate": 9.780273504777695e-06, "loss": 0.3583, "step": 7376 }, { "epoch": 0.33853425726217246, "grad_norm": 0.4617311358451843, "learning_rate": 9.780201612792192e-06, "loss": 0.4188, "step": 7377 }, { "epoch": 0.3385801477674269, "grad_norm": 0.4742300808429718, "learning_rate": 9.780129709311815e-06, "loss": 0.3849, "step": 7378 }, { "epoch": 0.3386260382726814, "grad_norm": 0.4515502154827118, "learning_rate": 9.780057794336733e-06, "loss": 0.4069, "step": 7379 }, { "epoch": 0.33867192877793584, "grad_norm": 0.4433266818523407, "learning_rate": 9.77998586786712e-06, "loss": 0.3805, "step": 7380 }, { "epoch": 0.33871781928319034, "grad_norm": 0.4708617329597473, "learning_rate": 9.77991392990315e-06, "loss": 0.3756, "step": 7381 }, { "epoch": 0.3387637097884448, "grad_norm": 0.4890288710594177, "learning_rate": 9.779841980444997e-06, "loss": 0.4572, "step": 7382 }, { "epoch": 0.3388096002936992, "grad_norm": 0.4627659320831299, "learning_rate": 9.779770019492832e-06, "loss": 0.3503, "step": 7383 }, { "epoch": 0.3388554907989537, "grad_norm": 0.46248921751976013, "learning_rate": 9.779698047046829e-06, "loss": 0.3711, "step": 7384 }, { "epoch": 0.33890138130420816, "grad_norm": 0.45156726241111755, "learning_rate": 9.77962606310716e-06, "loss": 0.4164, "step": 7385 }, { "epoch": 0.3389472718094626, "grad_norm": 0.5241009593009949, "learning_rate": 9.779554067673998e-06, "loss": 0.4313, "step": 7386 }, { "epoch": 0.3389931623147171, "grad_norm": 0.4329284727573395, "learning_rate": 9.779482060747518e-06, "loss": 0.351, "step": 7387 }, { "epoch": 0.33903905281997154, "grad_norm": 0.472797691822052, "learning_rate": 9.779410042327892e-06, "loss": 0.4198, "step": 7388 }, { "epoch": 0.339084943325226, "grad_norm": 0.468295693397522, "learning_rate": 9.779338012415294e-06, "loss": 0.3968, "step": 7389 }, { "epoch": 0.3391308338304805, "grad_norm": 0.43124014139175415, "learning_rate": 9.779265971009894e-06, "loss": 0.3049, "step": 7390 }, { "epoch": 0.3391767243357349, "grad_norm": 0.4837438762187958, "learning_rate": 9.779193918111872e-06, "loss": 0.4124, "step": 7391 }, { "epoch": 0.3392226148409894, "grad_norm": 0.4602155089378357, "learning_rate": 9.779121853721392e-06, "loss": 0.4022, "step": 7392 }, { "epoch": 0.33926850534624386, "grad_norm": 0.4885324239730835, "learning_rate": 9.779049777838636e-06, "loss": 0.5222, "step": 7393 }, { "epoch": 0.3393143958514983, "grad_norm": 0.43079251050949097, "learning_rate": 9.778977690463772e-06, "loss": 0.3498, "step": 7394 }, { "epoch": 0.3393602863567528, "grad_norm": 0.4765048921108246, "learning_rate": 9.778905591596975e-06, "loss": 0.4488, "step": 7395 }, { "epoch": 0.33940617686200725, "grad_norm": 0.48552173376083374, "learning_rate": 9.778833481238419e-06, "loss": 0.4377, "step": 7396 }, { "epoch": 0.3394520673672617, "grad_norm": 0.4365825951099396, "learning_rate": 9.778761359388275e-06, "loss": 0.4156, "step": 7397 }, { "epoch": 0.3394979578725162, "grad_norm": 0.4548175632953644, "learning_rate": 9.77868922604672e-06, "loss": 0.376, "step": 7398 }, { "epoch": 0.33954384837777063, "grad_norm": 0.49979400634765625, "learning_rate": 9.778617081213925e-06, "loss": 0.4958, "step": 7399 }, { "epoch": 0.3395897388830251, "grad_norm": 0.47651001811027527, "learning_rate": 9.778544924890064e-06, "loss": 0.4356, "step": 7400 }, { "epoch": 0.33963562938827957, "grad_norm": 0.4891176223754883, "learning_rate": 9.77847275707531e-06, "loss": 0.4214, "step": 7401 }, { "epoch": 0.339681519893534, "grad_norm": 0.47928473353385925, "learning_rate": 9.77840057776984e-06, "loss": 0.4383, "step": 7402 }, { "epoch": 0.3397274103987885, "grad_norm": 0.5314728021621704, "learning_rate": 9.778328386973821e-06, "loss": 0.6025, "step": 7403 }, { "epoch": 0.33977330090404295, "grad_norm": 0.46432963013648987, "learning_rate": 9.778256184687432e-06, "loss": 0.4022, "step": 7404 }, { "epoch": 0.3398191914092974, "grad_norm": 0.42920351028442383, "learning_rate": 9.778183970910846e-06, "loss": 0.3159, "step": 7405 }, { "epoch": 0.3398650819145519, "grad_norm": 0.4578544497489929, "learning_rate": 9.778111745644234e-06, "loss": 0.4192, "step": 7406 }, { "epoch": 0.33991097241980633, "grad_norm": 0.5304063558578491, "learning_rate": 9.778039508887771e-06, "loss": 0.4783, "step": 7407 }, { "epoch": 0.33995686292506083, "grad_norm": 0.4821876585483551, "learning_rate": 9.777967260641631e-06, "loss": 0.4547, "step": 7408 }, { "epoch": 0.3400027534303153, "grad_norm": 0.42716121673583984, "learning_rate": 9.777895000905988e-06, "loss": 0.3188, "step": 7409 }, { "epoch": 0.3400486439355697, "grad_norm": 0.47628113627433777, "learning_rate": 9.777822729681017e-06, "loss": 0.4419, "step": 7410 }, { "epoch": 0.3400945344408242, "grad_norm": 0.48569783568382263, "learning_rate": 9.777750446966889e-06, "loss": 0.4191, "step": 7411 }, { "epoch": 0.34014042494607866, "grad_norm": 0.49629464745521545, "learning_rate": 9.777678152763779e-06, "loss": 0.4734, "step": 7412 }, { "epoch": 0.3401863154513331, "grad_norm": 0.4593360722064972, "learning_rate": 9.77760584707186e-06, "loss": 0.3641, "step": 7413 }, { "epoch": 0.3402322059565876, "grad_norm": 0.4248492419719696, "learning_rate": 9.777533529891309e-06, "loss": 0.3361, "step": 7414 }, { "epoch": 0.34027809646184204, "grad_norm": 0.4597489535808563, "learning_rate": 9.777461201222296e-06, "loss": 0.3887, "step": 7415 }, { "epoch": 0.3403239869670965, "grad_norm": 0.45317384600639343, "learning_rate": 9.777388861064998e-06, "loss": 0.3714, "step": 7416 }, { "epoch": 0.340369877472351, "grad_norm": 0.4567270576953888, "learning_rate": 9.777316509419586e-06, "loss": 0.4214, "step": 7417 }, { "epoch": 0.3404157679776054, "grad_norm": 0.4635644853115082, "learning_rate": 9.777244146286236e-06, "loss": 0.403, "step": 7418 }, { "epoch": 0.3404616584828599, "grad_norm": 0.44588348269462585, "learning_rate": 9.777171771665122e-06, "loss": 0.3936, "step": 7419 }, { "epoch": 0.34050754898811436, "grad_norm": 0.4618290066719055, "learning_rate": 9.777099385556418e-06, "loss": 0.3959, "step": 7420 }, { "epoch": 0.3405534394933688, "grad_norm": 0.4413503408432007, "learning_rate": 9.777026987960296e-06, "loss": 0.3685, "step": 7421 }, { "epoch": 0.3405993299986233, "grad_norm": 0.4958931505680084, "learning_rate": 9.776954578876933e-06, "loss": 0.5016, "step": 7422 }, { "epoch": 0.34064522050387774, "grad_norm": 0.4384816586971283, "learning_rate": 9.776882158306502e-06, "loss": 0.326, "step": 7423 }, { "epoch": 0.3406911110091322, "grad_norm": 0.4481096863746643, "learning_rate": 9.776809726249177e-06, "loss": 0.3409, "step": 7424 }, { "epoch": 0.3407370015143867, "grad_norm": 0.5002566576004028, "learning_rate": 9.776737282705134e-06, "loss": 0.4446, "step": 7425 }, { "epoch": 0.3407828920196411, "grad_norm": 0.49928605556488037, "learning_rate": 9.776664827674542e-06, "loss": 0.5451, "step": 7426 }, { "epoch": 0.3408287825248956, "grad_norm": 0.4513339102268219, "learning_rate": 9.77659236115758e-06, "loss": 0.4233, "step": 7427 }, { "epoch": 0.34087467303015007, "grad_norm": 0.47260984778404236, "learning_rate": 9.77651988315442e-06, "loss": 0.3183, "step": 7428 }, { "epoch": 0.3409205635354045, "grad_norm": 0.4430692195892334, "learning_rate": 9.776447393665239e-06, "loss": 0.3944, "step": 7429 }, { "epoch": 0.340966454040659, "grad_norm": 0.5236510038375854, "learning_rate": 9.776374892690209e-06, "loss": 0.4704, "step": 7430 }, { "epoch": 0.34101234454591345, "grad_norm": 0.5233997106552124, "learning_rate": 9.776302380229506e-06, "loss": 0.4617, "step": 7431 }, { "epoch": 0.3410582350511679, "grad_norm": 0.472449392080307, "learning_rate": 9.776229856283302e-06, "loss": 0.3668, "step": 7432 }, { "epoch": 0.3411041255564224, "grad_norm": 0.4406496286392212, "learning_rate": 9.776157320851771e-06, "loss": 0.3931, "step": 7433 }, { "epoch": 0.34115001606167683, "grad_norm": 0.4443661570549011, "learning_rate": 9.77608477393509e-06, "loss": 0.3288, "step": 7434 }, { "epoch": 0.34119590656693133, "grad_norm": 0.43557700514793396, "learning_rate": 9.776012215533434e-06, "loss": 0.3403, "step": 7435 }, { "epoch": 0.34124179707218577, "grad_norm": 0.48722273111343384, "learning_rate": 9.775939645646975e-06, "loss": 0.4342, "step": 7436 }, { "epoch": 0.3412876875774402, "grad_norm": 0.43956923484802246, "learning_rate": 9.775867064275888e-06, "loss": 0.3229, "step": 7437 }, { "epoch": 0.3413335780826947, "grad_norm": 0.4262770712375641, "learning_rate": 9.775794471420348e-06, "loss": 0.3336, "step": 7438 }, { "epoch": 0.34137946858794915, "grad_norm": 0.4993220567703247, "learning_rate": 9.77572186708053e-06, "loss": 0.4723, "step": 7439 }, { "epoch": 0.3414253590932036, "grad_norm": 0.4437876343727112, "learning_rate": 9.775649251256609e-06, "loss": 0.4048, "step": 7440 }, { "epoch": 0.3414712495984581, "grad_norm": 0.4749569892883301, "learning_rate": 9.775576623948758e-06, "loss": 0.3839, "step": 7441 }, { "epoch": 0.34151714010371254, "grad_norm": 0.4711102843284607, "learning_rate": 9.775503985157153e-06, "loss": 0.446, "step": 7442 }, { "epoch": 0.341563030608967, "grad_norm": 0.5104339718818665, "learning_rate": 9.775431334881966e-06, "loss": 0.5018, "step": 7443 }, { "epoch": 0.3416089211142215, "grad_norm": 0.5113730430603027, "learning_rate": 9.775358673123376e-06, "loss": 0.5161, "step": 7444 }, { "epoch": 0.3416548116194759, "grad_norm": 0.5042397379875183, "learning_rate": 9.775285999881554e-06, "loss": 0.4196, "step": 7445 }, { "epoch": 0.3417007021247304, "grad_norm": 0.4670191705226898, "learning_rate": 9.775213315156677e-06, "loss": 0.3446, "step": 7446 }, { "epoch": 0.34174659262998486, "grad_norm": 0.49151450395584106, "learning_rate": 9.77514061894892e-06, "loss": 0.4573, "step": 7447 }, { "epoch": 0.3417924831352393, "grad_norm": 0.4387458264827728, "learning_rate": 9.775067911258455e-06, "loss": 0.4224, "step": 7448 }, { "epoch": 0.3418383736404938, "grad_norm": 0.4623500108718872, "learning_rate": 9.774995192085461e-06, "loss": 0.3767, "step": 7449 }, { "epoch": 0.34188426414574824, "grad_norm": 0.46996501088142395, "learning_rate": 9.77492246143011e-06, "loss": 0.4493, "step": 7450 }, { "epoch": 0.3419301546510027, "grad_norm": 0.4974955916404724, "learning_rate": 9.774849719292577e-06, "loss": 0.4775, "step": 7451 }, { "epoch": 0.3419760451562572, "grad_norm": 0.5088875889778137, "learning_rate": 9.774776965673036e-06, "loss": 0.4994, "step": 7452 }, { "epoch": 0.3420219356615116, "grad_norm": 0.49207568168640137, "learning_rate": 9.774704200571665e-06, "loss": 0.4823, "step": 7453 }, { "epoch": 0.3420678261667661, "grad_norm": 0.48566123843193054, "learning_rate": 9.774631423988638e-06, "loss": 0.3988, "step": 7454 }, { "epoch": 0.34211371667202056, "grad_norm": 0.4807029962539673, "learning_rate": 9.774558635924127e-06, "loss": 0.435, "step": 7455 }, { "epoch": 0.342159607177275, "grad_norm": 0.467380166053772, "learning_rate": 9.774485836378311e-06, "loss": 0.3394, "step": 7456 }, { "epoch": 0.3422054976825295, "grad_norm": 0.459309458732605, "learning_rate": 9.774413025351364e-06, "loss": 0.4007, "step": 7457 }, { "epoch": 0.34225138818778394, "grad_norm": 0.4063344895839691, "learning_rate": 9.774340202843459e-06, "loss": 0.3032, "step": 7458 }, { "epoch": 0.3422972786930384, "grad_norm": 0.4463087320327759, "learning_rate": 9.774267368854775e-06, "loss": 0.3259, "step": 7459 }, { "epoch": 0.3423431691982929, "grad_norm": 0.4934060871601105, "learning_rate": 9.774194523385482e-06, "loss": 0.4326, "step": 7460 }, { "epoch": 0.3423890597035473, "grad_norm": 0.4699383080005646, "learning_rate": 9.77412166643576e-06, "loss": 0.4409, "step": 7461 }, { "epoch": 0.3424349502088018, "grad_norm": 0.45647281408309937, "learning_rate": 9.77404879800578e-06, "loss": 0.3772, "step": 7462 }, { "epoch": 0.34248084071405627, "grad_norm": 0.44446781277656555, "learning_rate": 9.773975918095722e-06, "loss": 0.4095, "step": 7463 }, { "epoch": 0.3425267312193107, "grad_norm": 0.4419586956501007, "learning_rate": 9.773903026705756e-06, "loss": 0.3584, "step": 7464 }, { "epoch": 0.3425726217245652, "grad_norm": 0.46052002906799316, "learning_rate": 9.773830123836061e-06, "loss": 0.3994, "step": 7465 }, { "epoch": 0.34261851222981965, "grad_norm": 0.5097686648368835, "learning_rate": 9.773757209486811e-06, "loss": 0.4763, "step": 7466 }, { "epoch": 0.3426644027350741, "grad_norm": 0.4929233193397522, "learning_rate": 9.773684283658182e-06, "loss": 0.4249, "step": 7467 }, { "epoch": 0.3427102932403286, "grad_norm": 0.4204244017601013, "learning_rate": 9.773611346350348e-06, "loss": 0.3391, "step": 7468 }, { "epoch": 0.34275618374558303, "grad_norm": 0.6642082929611206, "learning_rate": 9.773538397563487e-06, "loss": 0.4263, "step": 7469 }, { "epoch": 0.3428020742508375, "grad_norm": 0.470769464969635, "learning_rate": 9.773465437297772e-06, "loss": 0.3931, "step": 7470 }, { "epoch": 0.34284796475609197, "grad_norm": 0.510100245475769, "learning_rate": 9.773392465553379e-06, "loss": 0.4977, "step": 7471 }, { "epoch": 0.3428938552613464, "grad_norm": 0.4964597821235657, "learning_rate": 9.773319482330483e-06, "loss": 0.4551, "step": 7472 }, { "epoch": 0.3429397457666009, "grad_norm": 0.5028560161590576, "learning_rate": 9.773246487629262e-06, "loss": 0.4151, "step": 7473 }, { "epoch": 0.34298563627185535, "grad_norm": 0.5038970708847046, "learning_rate": 9.77317348144989e-06, "loss": 0.3966, "step": 7474 }, { "epoch": 0.3430315267771098, "grad_norm": 0.5261569023132324, "learning_rate": 9.77310046379254e-06, "loss": 0.4499, "step": 7475 }, { "epoch": 0.3430774172823643, "grad_norm": 0.4408392906188965, "learning_rate": 9.77302743465739e-06, "loss": 0.3591, "step": 7476 }, { "epoch": 0.34312330778761874, "grad_norm": 0.4895802140235901, "learning_rate": 9.772954394044616e-06, "loss": 0.4207, "step": 7477 }, { "epoch": 0.3431691982928732, "grad_norm": 0.6594173312187195, "learning_rate": 9.772881341954393e-06, "loss": 0.4924, "step": 7478 }, { "epoch": 0.3432150887981277, "grad_norm": 0.46530622243881226, "learning_rate": 9.772808278386896e-06, "loss": 0.4068, "step": 7479 }, { "epoch": 0.3432609793033821, "grad_norm": 0.4976832866668701, "learning_rate": 9.772735203342303e-06, "loss": 0.4494, "step": 7480 }, { "epoch": 0.3433068698086366, "grad_norm": 0.4775434732437134, "learning_rate": 9.772662116820788e-06, "loss": 0.4023, "step": 7481 }, { "epoch": 0.34335276031389106, "grad_norm": 0.5039447546005249, "learning_rate": 9.772589018822524e-06, "loss": 0.4189, "step": 7482 }, { "epoch": 0.3433986508191455, "grad_norm": 0.535153865814209, "learning_rate": 9.772515909347694e-06, "loss": 0.5446, "step": 7483 }, { "epoch": 0.3434445413244, "grad_norm": 0.472126305103302, "learning_rate": 9.772442788396466e-06, "loss": 0.4248, "step": 7484 }, { "epoch": 0.34349043182965444, "grad_norm": 0.483585387468338, "learning_rate": 9.772369655969021e-06, "loss": 0.4117, "step": 7485 }, { "epoch": 0.3435363223349089, "grad_norm": 0.4720342755317688, "learning_rate": 9.772296512065534e-06, "loss": 0.4871, "step": 7486 }, { "epoch": 0.3435822128401634, "grad_norm": 0.4842703640460968, "learning_rate": 9.77222335668618e-06, "loss": 0.4475, "step": 7487 }, { "epoch": 0.3436281033454178, "grad_norm": 0.44866934418678284, "learning_rate": 9.772150189831133e-06, "loss": 0.3661, "step": 7488 }, { "epoch": 0.3436739938506723, "grad_norm": 0.4554039239883423, "learning_rate": 9.77207701150057e-06, "loss": 0.4475, "step": 7489 }, { "epoch": 0.34371988435592676, "grad_norm": 0.4484030306339264, "learning_rate": 9.77200382169467e-06, "loss": 0.4091, "step": 7490 }, { "epoch": 0.3437657748611812, "grad_norm": 0.5099649429321289, "learning_rate": 9.771930620413608e-06, "loss": 0.5278, "step": 7491 }, { "epoch": 0.3438116653664357, "grad_norm": 0.5197622179985046, "learning_rate": 9.771857407657558e-06, "loss": 0.4491, "step": 7492 }, { "epoch": 0.34385755587169015, "grad_norm": 0.4589492678642273, "learning_rate": 9.771784183426695e-06, "loss": 0.4519, "step": 7493 }, { "epoch": 0.3439034463769446, "grad_norm": 0.46764957904815674, "learning_rate": 9.7717109477212e-06, "loss": 0.4426, "step": 7494 }, { "epoch": 0.3439493368821991, "grad_norm": 0.4547639787197113, "learning_rate": 9.771637700541246e-06, "loss": 0.3956, "step": 7495 }, { "epoch": 0.34399522738745353, "grad_norm": 0.5215696692466736, "learning_rate": 9.771564441887007e-06, "loss": 0.4559, "step": 7496 }, { "epoch": 0.344041117892708, "grad_norm": 0.4635773003101349, "learning_rate": 9.771491171758663e-06, "loss": 0.4242, "step": 7497 }, { "epoch": 0.34408700839796247, "grad_norm": 0.4817538857460022, "learning_rate": 9.771417890156388e-06, "loss": 0.494, "step": 7498 }, { "epoch": 0.3441328989032169, "grad_norm": 0.45221272110939026, "learning_rate": 9.77134459708036e-06, "loss": 0.3741, "step": 7499 }, { "epoch": 0.3441787894084714, "grad_norm": 0.47556164860725403, "learning_rate": 9.771271292530753e-06, "loss": 0.4498, "step": 7500 }, { "epoch": 0.34422467991372585, "grad_norm": 0.5360143780708313, "learning_rate": 9.771197976507745e-06, "loss": 0.4889, "step": 7501 }, { "epoch": 0.3442705704189803, "grad_norm": 0.4882326126098633, "learning_rate": 9.77112464901151e-06, "loss": 0.3097, "step": 7502 }, { "epoch": 0.3443164609242348, "grad_norm": 0.5099873542785645, "learning_rate": 9.771051310042227e-06, "loss": 0.458, "step": 7503 }, { "epoch": 0.34436235142948923, "grad_norm": 0.5317543745040894, "learning_rate": 9.770977959600072e-06, "loss": 0.5257, "step": 7504 }, { "epoch": 0.3444082419347437, "grad_norm": 0.4684421420097351, "learning_rate": 9.770904597685222e-06, "loss": 0.4284, "step": 7505 }, { "epoch": 0.3444541324399982, "grad_norm": 0.4865078926086426, "learning_rate": 9.77083122429785e-06, "loss": 0.3681, "step": 7506 }, { "epoch": 0.3445000229452526, "grad_norm": 0.5494277477264404, "learning_rate": 9.770757839438135e-06, "loss": 0.4223, "step": 7507 }, { "epoch": 0.3445459134505071, "grad_norm": 0.45232754945755005, "learning_rate": 9.770684443106254e-06, "loss": 0.3585, "step": 7508 }, { "epoch": 0.34459180395576156, "grad_norm": 0.4730750024318695, "learning_rate": 9.770611035302383e-06, "loss": 0.4006, "step": 7509 }, { "epoch": 0.344637694461016, "grad_norm": 0.45557841658592224, "learning_rate": 9.770537616026697e-06, "loss": 0.375, "step": 7510 }, { "epoch": 0.3446835849662705, "grad_norm": 0.4844571053981781, "learning_rate": 9.770464185279374e-06, "loss": 0.4796, "step": 7511 }, { "epoch": 0.34472947547152494, "grad_norm": 0.4509688913822174, "learning_rate": 9.77039074306059e-06, "loss": 0.397, "step": 7512 }, { "epoch": 0.3447753659767794, "grad_norm": 0.4386076033115387, "learning_rate": 9.77031728937052e-06, "loss": 0.3526, "step": 7513 }, { "epoch": 0.3448212564820339, "grad_norm": 0.4587988555431366, "learning_rate": 9.770243824209346e-06, "loss": 0.4188, "step": 7514 }, { "epoch": 0.3448671469872883, "grad_norm": 0.44408902525901794, "learning_rate": 9.770170347577239e-06, "loss": 0.3547, "step": 7515 }, { "epoch": 0.3449130374925428, "grad_norm": 0.4457341730594635, "learning_rate": 9.770096859474378e-06, "loss": 0.3843, "step": 7516 }, { "epoch": 0.34495892799779726, "grad_norm": 0.4639625549316406, "learning_rate": 9.77002335990094e-06, "loss": 0.385, "step": 7517 }, { "epoch": 0.3450048185030517, "grad_norm": 0.42840731143951416, "learning_rate": 9.7699498488571e-06, "loss": 0.3852, "step": 7518 }, { "epoch": 0.3450507090083062, "grad_norm": 0.443139910697937, "learning_rate": 9.769876326343038e-06, "loss": 0.4059, "step": 7519 }, { "epoch": 0.34509659951356064, "grad_norm": 0.4856567680835724, "learning_rate": 9.769802792358929e-06, "loss": 0.4479, "step": 7520 }, { "epoch": 0.3451424900188151, "grad_norm": 0.47379782795906067, "learning_rate": 9.769729246904949e-06, "loss": 0.4116, "step": 7521 }, { "epoch": 0.3451883805240696, "grad_norm": 0.4640233814716339, "learning_rate": 9.769655689981274e-06, "loss": 0.4809, "step": 7522 }, { "epoch": 0.345234271029324, "grad_norm": 0.459486722946167, "learning_rate": 9.769582121588084e-06, "loss": 0.4004, "step": 7523 }, { "epoch": 0.3452801615345785, "grad_norm": 0.4644634425640106, "learning_rate": 9.769508541725555e-06, "loss": 0.3768, "step": 7524 }, { "epoch": 0.34532605203983296, "grad_norm": 0.46861618757247925, "learning_rate": 9.769434950393862e-06, "loss": 0.4209, "step": 7525 }, { "epoch": 0.3453719425450874, "grad_norm": 0.4838896691799164, "learning_rate": 9.769361347593182e-06, "loss": 0.5021, "step": 7526 }, { "epoch": 0.3454178330503419, "grad_norm": 0.4666491150856018, "learning_rate": 9.769287733323696e-06, "loss": 0.4339, "step": 7527 }, { "epoch": 0.34546372355559635, "grad_norm": 0.42675402760505676, "learning_rate": 9.769214107585576e-06, "loss": 0.3755, "step": 7528 }, { "epoch": 0.3455096140608508, "grad_norm": 0.5362486839294434, "learning_rate": 9.769140470379002e-06, "loss": 0.5166, "step": 7529 }, { "epoch": 0.3455555045661053, "grad_norm": 0.5078483819961548, "learning_rate": 9.769066821704151e-06, "loss": 0.4289, "step": 7530 }, { "epoch": 0.34560139507135973, "grad_norm": 0.46759679913520813, "learning_rate": 9.7689931615612e-06, "loss": 0.4006, "step": 7531 }, { "epoch": 0.34564728557661417, "grad_norm": 0.48502886295318604, "learning_rate": 9.768919489950324e-06, "loss": 0.4283, "step": 7532 }, { "epoch": 0.34569317608186867, "grad_norm": 0.4736391007900238, "learning_rate": 9.768845806871704e-06, "loss": 0.4122, "step": 7533 }, { "epoch": 0.3457390665871231, "grad_norm": 0.5229737162590027, "learning_rate": 9.76877211232551e-06, "loss": 0.4758, "step": 7534 }, { "epoch": 0.3457849570923776, "grad_norm": 0.40566006302833557, "learning_rate": 9.76869840631193e-06, "loss": 0.317, "step": 7535 }, { "epoch": 0.34583084759763205, "grad_norm": 0.4811519682407379, "learning_rate": 9.768624688831132e-06, "loss": 0.4311, "step": 7536 }, { "epoch": 0.3458767381028865, "grad_norm": 0.4791409969329834, "learning_rate": 9.768550959883296e-06, "loss": 0.3871, "step": 7537 }, { "epoch": 0.345922628608141, "grad_norm": 0.46593907475471497, "learning_rate": 9.768477219468602e-06, "loss": 0.3786, "step": 7538 }, { "epoch": 0.34596851911339543, "grad_norm": 0.45886313915252686, "learning_rate": 9.768403467587225e-06, "loss": 0.3734, "step": 7539 }, { "epoch": 0.3460144096186499, "grad_norm": 0.4441809058189392, "learning_rate": 9.768329704239342e-06, "loss": 0.3372, "step": 7540 }, { "epoch": 0.3460603001239044, "grad_norm": 0.5206360816955566, "learning_rate": 9.76825592942513e-06, "loss": 0.4563, "step": 7541 }, { "epoch": 0.3461061906291588, "grad_norm": 0.4262565076351166, "learning_rate": 9.76818214314477e-06, "loss": 0.3201, "step": 7542 }, { "epoch": 0.3461520811344133, "grad_norm": 0.4973420202732086, "learning_rate": 9.768108345398436e-06, "loss": 0.4333, "step": 7543 }, { "epoch": 0.34619797163966776, "grad_norm": 0.44933706521987915, "learning_rate": 9.768034536186304e-06, "loss": 0.3556, "step": 7544 }, { "epoch": 0.3462438621449222, "grad_norm": 0.4575684368610382, "learning_rate": 9.767960715508557e-06, "loss": 0.3872, "step": 7545 }, { "epoch": 0.3462897526501767, "grad_norm": 0.4663599729537964, "learning_rate": 9.767886883365367e-06, "loss": 0.3769, "step": 7546 }, { "epoch": 0.34633564315543114, "grad_norm": 0.45803332328796387, "learning_rate": 9.767813039756915e-06, "loss": 0.4193, "step": 7547 }, { "epoch": 0.3463815336606856, "grad_norm": 0.48722875118255615, "learning_rate": 9.767739184683377e-06, "loss": 0.4366, "step": 7548 }, { "epoch": 0.3464274241659401, "grad_norm": 0.4429193437099457, "learning_rate": 9.767665318144932e-06, "loss": 0.3692, "step": 7549 }, { "epoch": 0.3464733146711945, "grad_norm": 0.46046680212020874, "learning_rate": 9.767591440141755e-06, "loss": 0.3868, "step": 7550 }, { "epoch": 0.346519205176449, "grad_norm": 0.44924381375312805, "learning_rate": 9.767517550674026e-06, "loss": 0.313, "step": 7551 }, { "epoch": 0.34656509568170346, "grad_norm": 0.4696842133998871, "learning_rate": 9.767443649741921e-06, "loss": 0.4281, "step": 7552 }, { "epoch": 0.3466109861869579, "grad_norm": 0.47532036900520325, "learning_rate": 9.76736973734562e-06, "loss": 0.4298, "step": 7553 }, { "epoch": 0.3466568766922124, "grad_norm": 0.47718507051467896, "learning_rate": 9.7672958134853e-06, "loss": 0.4406, "step": 7554 }, { "epoch": 0.34670276719746684, "grad_norm": 0.47704941034317017, "learning_rate": 9.767221878161136e-06, "loss": 0.4042, "step": 7555 }, { "epoch": 0.3467486577027213, "grad_norm": 0.4445193111896515, "learning_rate": 9.767147931373309e-06, "loss": 0.3546, "step": 7556 }, { "epoch": 0.3467945482079758, "grad_norm": 0.4629325270652771, "learning_rate": 9.767073973121997e-06, "loss": 0.3604, "step": 7557 }, { "epoch": 0.3468404387132302, "grad_norm": 0.4590432345867157, "learning_rate": 9.767000003407375e-06, "loss": 0.3934, "step": 7558 }, { "epoch": 0.34688632921848467, "grad_norm": 0.451778382062912, "learning_rate": 9.766926022229623e-06, "loss": 0.3826, "step": 7559 }, { "epoch": 0.34693221972373917, "grad_norm": 0.44265544414520264, "learning_rate": 9.766852029588917e-06, "loss": 0.4331, "step": 7560 }, { "epoch": 0.3469781102289936, "grad_norm": 0.46604517102241516, "learning_rate": 9.76677802548544e-06, "loss": 0.501, "step": 7561 }, { "epoch": 0.3470240007342481, "grad_norm": 0.4993465542793274, "learning_rate": 9.766704009919364e-06, "loss": 0.4682, "step": 7562 }, { "epoch": 0.34706989123950255, "grad_norm": 0.4657362103462219, "learning_rate": 9.766629982890869e-06, "loss": 0.4157, "step": 7563 }, { "epoch": 0.347115781744757, "grad_norm": 0.5058092474937439, "learning_rate": 9.766555944400134e-06, "loss": 0.4851, "step": 7564 }, { "epoch": 0.3471616722500115, "grad_norm": 0.43176037073135376, "learning_rate": 9.766481894447334e-06, "loss": 0.3812, "step": 7565 }, { "epoch": 0.34720756275526593, "grad_norm": 0.506644606590271, "learning_rate": 9.766407833032652e-06, "loss": 0.5175, "step": 7566 }, { "epoch": 0.3472534532605204, "grad_norm": 0.4476737380027771, "learning_rate": 9.766333760156263e-06, "loss": 0.3327, "step": 7567 }, { "epoch": 0.34729934376577487, "grad_norm": 0.48623380064964294, "learning_rate": 9.766259675818345e-06, "loss": 0.3904, "step": 7568 }, { "epoch": 0.3473452342710293, "grad_norm": 0.45771583914756775, "learning_rate": 9.766185580019077e-06, "loss": 0.3821, "step": 7569 }, { "epoch": 0.3473911247762838, "grad_norm": 0.46506422758102417, "learning_rate": 9.766111472758637e-06, "loss": 0.4236, "step": 7570 }, { "epoch": 0.34743701528153825, "grad_norm": 0.4524925649166107, "learning_rate": 9.766037354037202e-06, "loss": 0.3786, "step": 7571 }, { "epoch": 0.3474829057867927, "grad_norm": 0.4716859459877014, "learning_rate": 9.765963223854955e-06, "loss": 0.4596, "step": 7572 }, { "epoch": 0.3475287962920472, "grad_norm": 0.4681797921657562, "learning_rate": 9.765889082212068e-06, "loss": 0.4272, "step": 7573 }, { "epoch": 0.34757468679730164, "grad_norm": 0.47106581926345825, "learning_rate": 9.765814929108721e-06, "loss": 0.453, "step": 7574 }, { "epoch": 0.3476205773025561, "grad_norm": 0.47277775406837463, "learning_rate": 9.765740764545096e-06, "loss": 0.4526, "step": 7575 }, { "epoch": 0.3476664678078106, "grad_norm": 0.48712101578712463, "learning_rate": 9.765666588521366e-06, "loss": 0.4492, "step": 7576 }, { "epoch": 0.347712358313065, "grad_norm": 0.41602057218551636, "learning_rate": 9.765592401037713e-06, "loss": 0.3263, "step": 7577 }, { "epoch": 0.3477582488183195, "grad_norm": 0.4814780056476593, "learning_rate": 9.765518202094313e-06, "loss": 0.4366, "step": 7578 }, { "epoch": 0.34780413932357396, "grad_norm": 0.46363043785095215, "learning_rate": 9.765443991691347e-06, "loss": 0.417, "step": 7579 }, { "epoch": 0.3478500298288284, "grad_norm": 0.44514644145965576, "learning_rate": 9.765369769828994e-06, "loss": 0.3675, "step": 7580 }, { "epoch": 0.3478959203340829, "grad_norm": 0.42944419384002686, "learning_rate": 9.765295536507429e-06, "loss": 0.3409, "step": 7581 }, { "epoch": 0.34794181083933734, "grad_norm": 0.46850618720054626, "learning_rate": 9.765221291726833e-06, "loss": 0.4455, "step": 7582 }, { "epoch": 0.3479877013445918, "grad_norm": 0.4708006978034973, "learning_rate": 9.765147035487381e-06, "loss": 0.404, "step": 7583 }, { "epoch": 0.3480335918498463, "grad_norm": 0.4630293846130371, "learning_rate": 9.765072767789257e-06, "loss": 0.3902, "step": 7584 }, { "epoch": 0.3480794823551007, "grad_norm": 0.48770028352737427, "learning_rate": 9.764998488632635e-06, "loss": 0.5014, "step": 7585 }, { "epoch": 0.3481253728603552, "grad_norm": 0.4378030002117157, "learning_rate": 9.764924198017696e-06, "loss": 0.3357, "step": 7586 }, { "epoch": 0.34817126336560966, "grad_norm": 0.43347692489624023, "learning_rate": 9.76484989594462e-06, "loss": 0.3606, "step": 7587 }, { "epoch": 0.3482171538708641, "grad_norm": 0.48702043294906616, "learning_rate": 9.764775582413581e-06, "loss": 0.4239, "step": 7588 }, { "epoch": 0.3482630443761186, "grad_norm": 0.4599001109600067, "learning_rate": 9.764701257424764e-06, "loss": 0.4388, "step": 7589 }, { "epoch": 0.34830893488137304, "grad_norm": 0.43673670291900635, "learning_rate": 9.764626920978342e-06, "loss": 0.3221, "step": 7590 }, { "epoch": 0.3483548253866275, "grad_norm": 0.4766339063644409, "learning_rate": 9.764552573074496e-06, "loss": 0.4216, "step": 7591 }, { "epoch": 0.348400715891882, "grad_norm": 0.4625072479248047, "learning_rate": 9.764478213713405e-06, "loss": 0.405, "step": 7592 }, { "epoch": 0.3484466063971364, "grad_norm": 0.4607468545436859, "learning_rate": 9.764403842895246e-06, "loss": 0.401, "step": 7593 }, { "epoch": 0.34849249690239087, "grad_norm": 0.4497205913066864, "learning_rate": 9.764329460620203e-06, "loss": 0.3526, "step": 7594 }, { "epoch": 0.34853838740764537, "grad_norm": 0.47558921575546265, "learning_rate": 9.76425506688845e-06, "loss": 0.4318, "step": 7595 }, { "epoch": 0.3485842779128998, "grad_norm": 0.4221167266368866, "learning_rate": 9.764180661700167e-06, "loss": 0.3213, "step": 7596 }, { "epoch": 0.3486301684181543, "grad_norm": 0.495102196931839, "learning_rate": 9.76410624505553e-06, "loss": 0.4529, "step": 7597 }, { "epoch": 0.34867605892340875, "grad_norm": 0.4522758722305298, "learning_rate": 9.764031816954726e-06, "loss": 0.4188, "step": 7598 }, { "epoch": 0.3487219494286632, "grad_norm": 0.4883578419685364, "learning_rate": 9.763957377397927e-06, "loss": 0.4327, "step": 7599 }, { "epoch": 0.3487678399339177, "grad_norm": 0.481063574552536, "learning_rate": 9.763882926385314e-06, "loss": 0.4431, "step": 7600 }, { "epoch": 0.34881373043917213, "grad_norm": 0.4394407570362091, "learning_rate": 9.763808463917067e-06, "loss": 0.336, "step": 7601 }, { "epoch": 0.3488596209444266, "grad_norm": 0.47730714082717896, "learning_rate": 9.763733989993362e-06, "loss": 0.4527, "step": 7602 }, { "epoch": 0.34890551144968107, "grad_norm": 0.4249570071697235, "learning_rate": 9.763659504614382e-06, "loss": 0.3607, "step": 7603 }, { "epoch": 0.3489514019549355, "grad_norm": 0.43443503975868225, "learning_rate": 9.763585007780302e-06, "loss": 0.3925, "step": 7604 }, { "epoch": 0.34899729246019, "grad_norm": 0.4808576703071594, "learning_rate": 9.763510499491307e-06, "loss": 0.488, "step": 7605 }, { "epoch": 0.34904318296544445, "grad_norm": 0.42800503969192505, "learning_rate": 9.763435979747572e-06, "loss": 0.3532, "step": 7606 }, { "epoch": 0.3490890734706989, "grad_norm": 0.4606592059135437, "learning_rate": 9.763361448549276e-06, "loss": 0.4119, "step": 7607 }, { "epoch": 0.3491349639759534, "grad_norm": 0.46513354778289795, "learning_rate": 9.7632869058966e-06, "loss": 0.392, "step": 7608 }, { "epoch": 0.34918085448120784, "grad_norm": 0.48260772228240967, "learning_rate": 9.76321235178972e-06, "loss": 0.4427, "step": 7609 }, { "epoch": 0.3492267449864623, "grad_norm": 0.46555444598197937, "learning_rate": 9.763137786228819e-06, "loss": 0.4097, "step": 7610 }, { "epoch": 0.3492726354917168, "grad_norm": 0.4430099129676819, "learning_rate": 9.763063209214074e-06, "loss": 0.3802, "step": 7611 }, { "epoch": 0.3493185259969712, "grad_norm": 0.48142874240875244, "learning_rate": 9.762988620745666e-06, "loss": 0.4731, "step": 7612 }, { "epoch": 0.3493644165022257, "grad_norm": 0.4732450842857361, "learning_rate": 9.762914020823772e-06, "loss": 0.4733, "step": 7613 }, { "epoch": 0.34941030700748016, "grad_norm": 0.42975950241088867, "learning_rate": 9.762839409448575e-06, "loss": 0.4021, "step": 7614 }, { "epoch": 0.3494561975127346, "grad_norm": 0.44928207993507385, "learning_rate": 9.762764786620254e-06, "loss": 0.4275, "step": 7615 }, { "epoch": 0.3495020880179891, "grad_norm": 0.4540664851665497, "learning_rate": 9.762690152338984e-06, "loss": 0.3657, "step": 7616 }, { "epoch": 0.34954797852324354, "grad_norm": 0.46384164690971375, "learning_rate": 9.762615506604947e-06, "loss": 0.4554, "step": 7617 }, { "epoch": 0.349593869028498, "grad_norm": 0.47765716910362244, "learning_rate": 9.762540849418325e-06, "loss": 0.4297, "step": 7618 }, { "epoch": 0.3496397595337525, "grad_norm": 0.4374117851257324, "learning_rate": 9.762466180779295e-06, "loss": 0.3794, "step": 7619 }, { "epoch": 0.3496856500390069, "grad_norm": 0.4981018900871277, "learning_rate": 9.762391500688035e-06, "loss": 0.4546, "step": 7620 }, { "epoch": 0.34973154054426137, "grad_norm": 0.44146597385406494, "learning_rate": 9.762316809144728e-06, "loss": 0.4038, "step": 7621 }, { "epoch": 0.34977743104951586, "grad_norm": 0.46910905838012695, "learning_rate": 9.762242106149553e-06, "loss": 0.4241, "step": 7622 }, { "epoch": 0.3498233215547703, "grad_norm": 0.45875227451324463, "learning_rate": 9.762167391702688e-06, "loss": 0.41, "step": 7623 }, { "epoch": 0.3498692120600248, "grad_norm": 0.48692455887794495, "learning_rate": 9.762092665804313e-06, "loss": 0.4912, "step": 7624 }, { "epoch": 0.34991510256527925, "grad_norm": 0.46123653650283813, "learning_rate": 9.762017928454608e-06, "loss": 0.4409, "step": 7625 }, { "epoch": 0.3499609930705337, "grad_norm": 0.4709842801094055, "learning_rate": 9.761943179653752e-06, "loss": 0.3886, "step": 7626 }, { "epoch": 0.3500068835757882, "grad_norm": 0.4526197910308838, "learning_rate": 9.761868419401928e-06, "loss": 0.3963, "step": 7627 }, { "epoch": 0.35005277408104263, "grad_norm": 0.44349461793899536, "learning_rate": 9.761793647699311e-06, "loss": 0.3245, "step": 7628 }, { "epoch": 0.35009866458629707, "grad_norm": 0.5441825985908508, "learning_rate": 9.761718864546083e-06, "loss": 0.3911, "step": 7629 }, { "epoch": 0.35014455509155157, "grad_norm": 0.45233970880508423, "learning_rate": 9.761644069942426e-06, "loss": 0.3987, "step": 7630 }, { "epoch": 0.350190445596806, "grad_norm": 0.48667091131210327, "learning_rate": 9.761569263888515e-06, "loss": 0.4647, "step": 7631 }, { "epoch": 0.3502363361020605, "grad_norm": 0.4708206355571747, "learning_rate": 9.761494446384537e-06, "loss": 0.43, "step": 7632 }, { "epoch": 0.35028222660731495, "grad_norm": 0.4773523807525635, "learning_rate": 9.761419617430663e-06, "loss": 0.4534, "step": 7633 }, { "epoch": 0.3503281171125694, "grad_norm": 0.4876518249511719, "learning_rate": 9.761344777027081e-06, "loss": 0.4542, "step": 7634 }, { "epoch": 0.3503740076178239, "grad_norm": 0.46373939514160156, "learning_rate": 9.761269925173965e-06, "loss": 0.3625, "step": 7635 }, { "epoch": 0.35041989812307833, "grad_norm": 0.493642121553421, "learning_rate": 9.7611950618715e-06, "loss": 0.4174, "step": 7636 }, { "epoch": 0.3504657886283328, "grad_norm": 0.4417996406555176, "learning_rate": 9.761120187119863e-06, "loss": 0.3771, "step": 7637 }, { "epoch": 0.3505116791335873, "grad_norm": 0.4484770894050598, "learning_rate": 9.761045300919234e-06, "loss": 0.3818, "step": 7638 }, { "epoch": 0.3505575696388417, "grad_norm": 0.44239673018455505, "learning_rate": 9.760970403269794e-06, "loss": 0.3683, "step": 7639 }, { "epoch": 0.3506034601440962, "grad_norm": 0.4582446217536926, "learning_rate": 9.760895494171721e-06, "loss": 0.4083, "step": 7640 }, { "epoch": 0.35064935064935066, "grad_norm": 0.475626140832901, "learning_rate": 9.760820573625199e-06, "loss": 0.4032, "step": 7641 }, { "epoch": 0.3506952411546051, "grad_norm": 0.46180489659309387, "learning_rate": 9.760745641630405e-06, "loss": 0.3767, "step": 7642 }, { "epoch": 0.3507411316598596, "grad_norm": 0.46454235911369324, "learning_rate": 9.760670698187521e-06, "loss": 0.3942, "step": 7643 }, { "epoch": 0.35078702216511404, "grad_norm": 0.4947590231895447, "learning_rate": 9.760595743296726e-06, "loss": 0.4151, "step": 7644 }, { "epoch": 0.3508329126703685, "grad_norm": 0.43232065439224243, "learning_rate": 9.760520776958202e-06, "loss": 0.3852, "step": 7645 }, { "epoch": 0.350878803175623, "grad_norm": 0.44100961089134216, "learning_rate": 9.760445799172125e-06, "loss": 0.3756, "step": 7646 }, { "epoch": 0.3509246936808774, "grad_norm": 0.47151312232017517, "learning_rate": 9.760370809938681e-06, "loss": 0.4546, "step": 7647 }, { "epoch": 0.35097058418613186, "grad_norm": 0.4657520651817322, "learning_rate": 9.760295809258048e-06, "loss": 0.4345, "step": 7648 }, { "epoch": 0.35101647469138636, "grad_norm": 0.5069671869277954, "learning_rate": 9.760220797130404e-06, "loss": 0.4271, "step": 7649 }, { "epoch": 0.3510623651966408, "grad_norm": 0.4813474118709564, "learning_rate": 9.76014577355593e-06, "loss": 0.4842, "step": 7650 }, { "epoch": 0.3511082557018953, "grad_norm": 0.4638098180294037, "learning_rate": 9.760070738534813e-06, "loss": 0.4126, "step": 7651 }, { "epoch": 0.35115414620714974, "grad_norm": 0.501701831817627, "learning_rate": 9.759995692067224e-06, "loss": 0.3932, "step": 7652 }, { "epoch": 0.3512000367124042, "grad_norm": 0.4455215632915497, "learning_rate": 9.759920634153348e-06, "loss": 0.3458, "step": 7653 }, { "epoch": 0.3512459272176587, "grad_norm": 0.4620753526687622, "learning_rate": 9.759845564793367e-06, "loss": 0.4218, "step": 7654 }, { "epoch": 0.3512918177229131, "grad_norm": 0.6094580292701721, "learning_rate": 9.759770483987459e-06, "loss": 0.4281, "step": 7655 }, { "epoch": 0.35133770822816757, "grad_norm": 0.44705837965011597, "learning_rate": 9.759695391735805e-06, "loss": 0.3885, "step": 7656 }, { "epoch": 0.35138359873342206, "grad_norm": 0.444299578666687, "learning_rate": 9.759620288038585e-06, "loss": 0.3727, "step": 7657 }, { "epoch": 0.3514294892386765, "grad_norm": 0.4475720226764679, "learning_rate": 9.75954517289598e-06, "loss": 0.3575, "step": 7658 }, { "epoch": 0.351475379743931, "grad_norm": 0.47326457500457764, "learning_rate": 9.759470046308172e-06, "loss": 0.4009, "step": 7659 }, { "epoch": 0.35152127024918545, "grad_norm": 0.4519866108894348, "learning_rate": 9.75939490827534e-06, "loss": 0.3705, "step": 7660 }, { "epoch": 0.3515671607544399, "grad_norm": 0.4684807062149048, "learning_rate": 9.759319758797668e-06, "loss": 0.4154, "step": 7661 }, { "epoch": 0.3516130512596944, "grad_norm": 0.49699339270591736, "learning_rate": 9.759244597875331e-06, "loss": 0.4872, "step": 7662 }, { "epoch": 0.35165894176494883, "grad_norm": 0.4818732440471649, "learning_rate": 9.759169425508514e-06, "loss": 0.464, "step": 7663 }, { "epoch": 0.35170483227020327, "grad_norm": 0.4806285500526428, "learning_rate": 9.759094241697395e-06, "loss": 0.383, "step": 7664 }, { "epoch": 0.35175072277545777, "grad_norm": 0.4402625262737274, "learning_rate": 9.75901904644216e-06, "loss": 0.4168, "step": 7665 }, { "epoch": 0.3517966132807122, "grad_norm": 0.44949936866760254, "learning_rate": 9.758943839742982e-06, "loss": 0.4153, "step": 7666 }, { "epoch": 0.3518425037859667, "grad_norm": 0.42444729804992676, "learning_rate": 9.758868621600048e-06, "loss": 0.348, "step": 7667 }, { "epoch": 0.35188839429122115, "grad_norm": 0.44733190536499023, "learning_rate": 9.758793392013538e-06, "loss": 0.4005, "step": 7668 }, { "epoch": 0.3519342847964756, "grad_norm": 0.4605877101421356, "learning_rate": 9.75871815098363e-06, "loss": 0.4127, "step": 7669 }, { "epoch": 0.3519801753017301, "grad_norm": 0.48094403743743896, "learning_rate": 9.758642898510509e-06, "loss": 0.4713, "step": 7670 }, { "epoch": 0.35202606580698453, "grad_norm": 0.45184120535850525, "learning_rate": 9.758567634594351e-06, "loss": 0.3871, "step": 7671 }, { "epoch": 0.352071956312239, "grad_norm": 0.5021167993545532, "learning_rate": 9.758492359235341e-06, "loss": 0.5071, "step": 7672 }, { "epoch": 0.3521178468174935, "grad_norm": 0.44302135705947876, "learning_rate": 9.75841707243366e-06, "loss": 0.3998, "step": 7673 }, { "epoch": 0.3521637373227479, "grad_norm": 0.49103161692619324, "learning_rate": 9.758341774189484e-06, "loss": 0.379, "step": 7674 }, { "epoch": 0.3522096278280024, "grad_norm": 0.433456152677536, "learning_rate": 9.758266464503002e-06, "loss": 0.3236, "step": 7675 }, { "epoch": 0.35225551833325686, "grad_norm": 0.44734370708465576, "learning_rate": 9.758191143374388e-06, "loss": 0.4047, "step": 7676 }, { "epoch": 0.3523014088385113, "grad_norm": 0.5386069416999817, "learning_rate": 9.758115810803827e-06, "loss": 0.5584, "step": 7677 }, { "epoch": 0.3523472993437658, "grad_norm": 0.48044177889823914, "learning_rate": 9.758040466791499e-06, "loss": 0.5166, "step": 7678 }, { "epoch": 0.35239318984902024, "grad_norm": 0.4861387610435486, "learning_rate": 9.757965111337586e-06, "loss": 0.3863, "step": 7679 }, { "epoch": 0.3524390803542747, "grad_norm": 0.4583209753036499, "learning_rate": 9.757889744442268e-06, "loss": 0.3483, "step": 7680 }, { "epoch": 0.3524849708595292, "grad_norm": 0.4502546489238739, "learning_rate": 9.757814366105727e-06, "loss": 0.3488, "step": 7681 }, { "epoch": 0.3525308613647836, "grad_norm": 0.47257286310195923, "learning_rate": 9.757738976328144e-06, "loss": 0.4316, "step": 7682 }, { "epoch": 0.35257675187003806, "grad_norm": 0.44128793478012085, "learning_rate": 9.7576635751097e-06, "loss": 0.3447, "step": 7683 }, { "epoch": 0.35262264237529256, "grad_norm": 0.46079134941101074, "learning_rate": 9.757588162450577e-06, "loss": 0.3577, "step": 7684 }, { "epoch": 0.352668532880547, "grad_norm": 0.4709286391735077, "learning_rate": 9.757512738350957e-06, "loss": 0.422, "step": 7685 }, { "epoch": 0.3527144233858015, "grad_norm": 0.49472177028656006, "learning_rate": 9.75743730281102e-06, "loss": 0.4517, "step": 7686 }, { "epoch": 0.35276031389105594, "grad_norm": 0.45751404762268066, "learning_rate": 9.757361855830945e-06, "loss": 0.3875, "step": 7687 }, { "epoch": 0.3528062043963104, "grad_norm": 0.47235843539237976, "learning_rate": 9.757286397410919e-06, "loss": 0.4256, "step": 7688 }, { "epoch": 0.3528520949015649, "grad_norm": 0.47484615445137024, "learning_rate": 9.75721092755112e-06, "loss": 0.4535, "step": 7689 }, { "epoch": 0.3528979854068193, "grad_norm": 0.46440690755844116, "learning_rate": 9.757135446251732e-06, "loss": 0.3657, "step": 7690 }, { "epoch": 0.35294387591207377, "grad_norm": 0.5233825445175171, "learning_rate": 9.75705995351293e-06, "loss": 0.4062, "step": 7691 }, { "epoch": 0.35298976641732827, "grad_norm": 0.46515750885009766, "learning_rate": 9.756984449334905e-06, "loss": 0.4359, "step": 7692 }, { "epoch": 0.3530356569225827, "grad_norm": 0.4670040011405945, "learning_rate": 9.75690893371783e-06, "loss": 0.4315, "step": 7693 }, { "epoch": 0.3530815474278372, "grad_norm": 0.4625621736049652, "learning_rate": 9.756833406661892e-06, "loss": 0.374, "step": 7694 }, { "epoch": 0.35312743793309165, "grad_norm": 0.4773547649383545, "learning_rate": 9.756757868167273e-06, "loss": 0.4316, "step": 7695 }, { "epoch": 0.3531733284383461, "grad_norm": 0.48153454065322876, "learning_rate": 9.75668231823415e-06, "loss": 0.3806, "step": 7696 }, { "epoch": 0.3532192189436006, "grad_norm": 0.47327664494514465, "learning_rate": 9.756606756862708e-06, "loss": 0.4027, "step": 7697 }, { "epoch": 0.35326510944885503, "grad_norm": 0.4210459887981415, "learning_rate": 9.756531184053128e-06, "loss": 0.333, "step": 7698 }, { "epoch": 0.3533109999541095, "grad_norm": 0.4121096134185791, "learning_rate": 9.756455599805591e-06, "loss": 0.3196, "step": 7699 }, { "epoch": 0.35335689045936397, "grad_norm": 0.4854283630847931, "learning_rate": 9.75638000412028e-06, "loss": 0.4747, "step": 7700 }, { "epoch": 0.3534027809646184, "grad_norm": 0.5128400325775146, "learning_rate": 9.756304396997376e-06, "loss": 0.4958, "step": 7701 }, { "epoch": 0.3534486714698729, "grad_norm": 0.47235602140426636, "learning_rate": 9.756228778437061e-06, "loss": 0.4246, "step": 7702 }, { "epoch": 0.35349456197512735, "grad_norm": 0.4501318335533142, "learning_rate": 9.756153148439518e-06, "loss": 0.3874, "step": 7703 }, { "epoch": 0.3535404524803818, "grad_norm": 0.4421451985836029, "learning_rate": 9.756077507004927e-06, "loss": 0.358, "step": 7704 }, { "epoch": 0.3535863429856363, "grad_norm": 0.4623696208000183, "learning_rate": 9.75600185413347e-06, "loss": 0.4097, "step": 7705 }, { "epoch": 0.35363223349089073, "grad_norm": 0.4466571807861328, "learning_rate": 9.75592618982533e-06, "loss": 0.3721, "step": 7706 }, { "epoch": 0.3536781239961452, "grad_norm": 0.509890079498291, "learning_rate": 9.75585051408069e-06, "loss": 0.5093, "step": 7707 }, { "epoch": 0.3537240145013997, "grad_norm": 0.4390869140625, "learning_rate": 9.75577482689973e-06, "loss": 0.3904, "step": 7708 }, { "epoch": 0.3537699050066541, "grad_norm": 0.4590704143047333, "learning_rate": 9.755699128282631e-06, "loss": 0.4314, "step": 7709 }, { "epoch": 0.35381579551190856, "grad_norm": 0.5847171545028687, "learning_rate": 9.755623418229577e-06, "loss": 0.4429, "step": 7710 }, { "epoch": 0.35386168601716306, "grad_norm": 0.44052451848983765, "learning_rate": 9.755547696740751e-06, "loss": 0.4015, "step": 7711 }, { "epoch": 0.3539075765224175, "grad_norm": 0.43914538621902466, "learning_rate": 9.755471963816331e-06, "loss": 0.3486, "step": 7712 }, { "epoch": 0.353953467027672, "grad_norm": 0.44835731387138367, "learning_rate": 9.755396219456503e-06, "loss": 0.4374, "step": 7713 }, { "epoch": 0.35399935753292644, "grad_norm": 0.4606400430202484, "learning_rate": 9.755320463661449e-06, "loss": 0.4279, "step": 7714 }, { "epoch": 0.3540452480381809, "grad_norm": 0.4537215232849121, "learning_rate": 9.75524469643135e-06, "loss": 0.3369, "step": 7715 }, { "epoch": 0.3540911385434354, "grad_norm": 0.4954622983932495, "learning_rate": 9.755168917766387e-06, "loss": 0.3899, "step": 7716 }, { "epoch": 0.3541370290486898, "grad_norm": 0.47796550393104553, "learning_rate": 9.755093127666746e-06, "loss": 0.4118, "step": 7717 }, { "epoch": 0.35418291955394426, "grad_norm": 0.43439486622810364, "learning_rate": 9.755017326132603e-06, "loss": 0.3434, "step": 7718 }, { "epoch": 0.35422881005919876, "grad_norm": 0.4611569344997406, "learning_rate": 9.754941513164146e-06, "loss": 0.3655, "step": 7719 }, { "epoch": 0.3542747005644532, "grad_norm": 0.46086350083351135, "learning_rate": 9.754865688761556e-06, "loss": 0.3756, "step": 7720 }, { "epoch": 0.3543205910697077, "grad_norm": 0.45835474133491516, "learning_rate": 9.754789852925015e-06, "loss": 0.4078, "step": 7721 }, { "epoch": 0.35436648157496214, "grad_norm": 0.4975372850894928, "learning_rate": 9.754714005654704e-06, "loss": 0.4873, "step": 7722 }, { "epoch": 0.3544123720802166, "grad_norm": 0.7710484862327576, "learning_rate": 9.754638146950806e-06, "loss": 0.4407, "step": 7723 }, { "epoch": 0.3544582625854711, "grad_norm": 0.49734094738960266, "learning_rate": 9.754562276813505e-06, "loss": 0.4624, "step": 7724 }, { "epoch": 0.3545041530907255, "grad_norm": 0.40934163331985474, "learning_rate": 9.754486395242981e-06, "loss": 0.2775, "step": 7725 }, { "epoch": 0.35455004359597997, "grad_norm": 0.49207213521003723, "learning_rate": 9.75441050223942e-06, "loss": 0.4016, "step": 7726 }, { "epoch": 0.35459593410123447, "grad_norm": 0.5139963626861572, "learning_rate": 9.754334597803002e-06, "loss": 0.4973, "step": 7727 }, { "epoch": 0.3546418246064889, "grad_norm": 0.45089462399482727, "learning_rate": 9.754258681933907e-06, "loss": 0.4124, "step": 7728 }, { "epoch": 0.3546877151117434, "grad_norm": 0.398920476436615, "learning_rate": 9.754182754632322e-06, "loss": 0.3441, "step": 7729 }, { "epoch": 0.35473360561699785, "grad_norm": 0.46400922536849976, "learning_rate": 9.754106815898427e-06, "loss": 0.4032, "step": 7730 }, { "epoch": 0.3547794961222523, "grad_norm": 0.45743653178215027, "learning_rate": 9.754030865732408e-06, "loss": 0.3683, "step": 7731 }, { "epoch": 0.3548253866275068, "grad_norm": 0.5279353857040405, "learning_rate": 9.753954904134444e-06, "loss": 0.5009, "step": 7732 }, { "epoch": 0.35487127713276123, "grad_norm": 0.49791938066482544, "learning_rate": 9.75387893110472e-06, "loss": 0.3971, "step": 7733 }, { "epoch": 0.3549171676380157, "grad_norm": 0.48502689599990845, "learning_rate": 9.753802946643414e-06, "loss": 0.4203, "step": 7734 }, { "epoch": 0.35496305814327017, "grad_norm": 0.5431489944458008, "learning_rate": 9.753726950750714e-06, "loss": 0.4644, "step": 7735 }, { "epoch": 0.3550089486485246, "grad_norm": 0.5550913214683533, "learning_rate": 9.753650943426802e-06, "loss": 0.4091, "step": 7736 }, { "epoch": 0.35505483915377906, "grad_norm": 0.5059146285057068, "learning_rate": 9.75357492467186e-06, "loss": 0.5144, "step": 7737 }, { "epoch": 0.35510072965903355, "grad_norm": 0.4584423899650574, "learning_rate": 9.75349889448607e-06, "loss": 0.3674, "step": 7738 }, { "epoch": 0.355146620164288, "grad_norm": 0.4817931354045868, "learning_rate": 9.753422852869615e-06, "loss": 0.3707, "step": 7739 }, { "epoch": 0.3551925106695425, "grad_norm": 0.47430333495140076, "learning_rate": 9.753346799822678e-06, "loss": 0.3851, "step": 7740 }, { "epoch": 0.35523840117479694, "grad_norm": 0.4733114242553711, "learning_rate": 9.753270735345441e-06, "loss": 0.3991, "step": 7741 }, { "epoch": 0.3552842916800514, "grad_norm": 0.5028703212738037, "learning_rate": 9.75319465943809e-06, "loss": 0.5023, "step": 7742 }, { "epoch": 0.3553301821853059, "grad_norm": 0.473196417093277, "learning_rate": 9.753118572100805e-06, "loss": 0.433, "step": 7743 }, { "epoch": 0.3553760726905603, "grad_norm": 0.560771644115448, "learning_rate": 9.75304247333377e-06, "loss": 0.4082, "step": 7744 }, { "epoch": 0.35542196319581476, "grad_norm": 0.5072433352470398, "learning_rate": 9.75296636313717e-06, "loss": 0.4519, "step": 7745 }, { "epoch": 0.35546785370106926, "grad_norm": 0.4948049485683441, "learning_rate": 9.752890241511184e-06, "loss": 0.4845, "step": 7746 }, { "epoch": 0.3555137442063237, "grad_norm": 0.4300050735473633, "learning_rate": 9.752814108455998e-06, "loss": 0.3378, "step": 7747 }, { "epoch": 0.3555596347115782, "grad_norm": 0.42569500207901, "learning_rate": 9.752737963971794e-06, "loss": 0.2983, "step": 7748 }, { "epoch": 0.35560552521683264, "grad_norm": 0.4897690713405609, "learning_rate": 9.752661808058755e-06, "loss": 0.5083, "step": 7749 }, { "epoch": 0.3556514157220871, "grad_norm": 0.4683336913585663, "learning_rate": 9.752585640717064e-06, "loss": 0.349, "step": 7750 }, { "epoch": 0.3556973062273416, "grad_norm": 0.4705594778060913, "learning_rate": 9.752509461946904e-06, "loss": 0.4051, "step": 7751 }, { "epoch": 0.355743196732596, "grad_norm": 0.4658791720867157, "learning_rate": 9.752433271748461e-06, "loss": 0.4634, "step": 7752 }, { "epoch": 0.35578908723785047, "grad_norm": 0.4247986674308777, "learning_rate": 9.752357070121915e-06, "loss": 0.3238, "step": 7753 }, { "epoch": 0.35583497774310496, "grad_norm": 0.46490156650543213, "learning_rate": 9.752280857067447e-06, "loss": 0.4495, "step": 7754 }, { "epoch": 0.3558808682483594, "grad_norm": 0.4360750615596771, "learning_rate": 9.752204632585248e-06, "loss": 0.3791, "step": 7755 }, { "epoch": 0.3559267587536139, "grad_norm": 0.485958456993103, "learning_rate": 9.752128396675493e-06, "loss": 0.4704, "step": 7756 }, { "epoch": 0.35597264925886835, "grad_norm": 0.4442468583583832, "learning_rate": 9.752052149338371e-06, "loss": 0.3851, "step": 7757 }, { "epoch": 0.3560185397641228, "grad_norm": 0.467679888010025, "learning_rate": 9.751975890574062e-06, "loss": 0.4581, "step": 7758 }, { "epoch": 0.3560644302693773, "grad_norm": 0.4374620318412781, "learning_rate": 9.751899620382753e-06, "loss": 0.3498, "step": 7759 }, { "epoch": 0.35611032077463173, "grad_norm": 0.49808269739151, "learning_rate": 9.751823338764624e-06, "loss": 0.4232, "step": 7760 }, { "epoch": 0.35615621127988617, "grad_norm": 0.417671799659729, "learning_rate": 9.751747045719859e-06, "loss": 0.3399, "step": 7761 }, { "epoch": 0.35620210178514067, "grad_norm": 0.4990861713886261, "learning_rate": 9.751670741248642e-06, "loss": 0.4508, "step": 7762 }, { "epoch": 0.3562479922903951, "grad_norm": 0.46671056747436523, "learning_rate": 9.751594425351156e-06, "loss": 0.3873, "step": 7763 }, { "epoch": 0.35629388279564955, "grad_norm": 0.45195499062538147, "learning_rate": 9.751518098027586e-06, "loss": 0.3415, "step": 7764 }, { "epoch": 0.35633977330090405, "grad_norm": 0.46051305532455444, "learning_rate": 9.751441759278115e-06, "loss": 0.4018, "step": 7765 }, { "epoch": 0.3563856638061585, "grad_norm": 0.4263821244239807, "learning_rate": 9.751365409102926e-06, "loss": 0.3276, "step": 7766 }, { "epoch": 0.356431554311413, "grad_norm": 0.427919864654541, "learning_rate": 9.751289047502201e-06, "loss": 0.3558, "step": 7767 }, { "epoch": 0.35647744481666743, "grad_norm": 0.47430238127708435, "learning_rate": 9.751212674476129e-06, "loss": 0.4146, "step": 7768 }, { "epoch": 0.3565233353219219, "grad_norm": 0.4680333733558655, "learning_rate": 9.751136290024888e-06, "loss": 0.3563, "step": 7769 }, { "epoch": 0.3565692258271764, "grad_norm": 0.46310386061668396, "learning_rate": 9.751059894148662e-06, "loss": 0.4163, "step": 7770 }, { "epoch": 0.3566151163324308, "grad_norm": 0.44909363985061646, "learning_rate": 9.750983486847638e-06, "loss": 0.3926, "step": 7771 }, { "epoch": 0.35666100683768526, "grad_norm": 0.4788450002670288, "learning_rate": 9.750907068121999e-06, "loss": 0.4853, "step": 7772 }, { "epoch": 0.35670689734293975, "grad_norm": 0.47060585021972656, "learning_rate": 9.750830637971928e-06, "loss": 0.4284, "step": 7773 }, { "epoch": 0.3567527878481942, "grad_norm": 0.4730147123336792, "learning_rate": 9.750754196397607e-06, "loss": 0.403, "step": 7774 }, { "epoch": 0.3567986783534487, "grad_norm": 0.4556496739387512, "learning_rate": 9.750677743399223e-06, "loss": 0.3824, "step": 7775 }, { "epoch": 0.35684456885870314, "grad_norm": 0.45856332778930664, "learning_rate": 9.750601278976958e-06, "loss": 0.3533, "step": 7776 }, { "epoch": 0.3568904593639576, "grad_norm": 0.4799988567829132, "learning_rate": 9.750524803130996e-06, "loss": 0.4198, "step": 7777 }, { "epoch": 0.3569363498692121, "grad_norm": 0.46967068314552307, "learning_rate": 9.75044831586152e-06, "loss": 0.3939, "step": 7778 }, { "epoch": 0.3569822403744665, "grad_norm": 0.44241034984588623, "learning_rate": 9.750371817168718e-06, "loss": 0.3768, "step": 7779 }, { "epoch": 0.35702813087972096, "grad_norm": 0.4487215280532837, "learning_rate": 9.750295307052769e-06, "loss": 0.381, "step": 7780 }, { "epoch": 0.35707402138497546, "grad_norm": 0.4378577470779419, "learning_rate": 9.750218785513859e-06, "loss": 0.3629, "step": 7781 }, { "epoch": 0.3571199118902299, "grad_norm": 0.42855754494667053, "learning_rate": 9.750142252552172e-06, "loss": 0.3382, "step": 7782 }, { "epoch": 0.3571658023954844, "grad_norm": 0.47133708000183105, "learning_rate": 9.750065708167894e-06, "loss": 0.3997, "step": 7783 }, { "epoch": 0.35721169290073884, "grad_norm": 0.4432305693626404, "learning_rate": 9.749989152361205e-06, "loss": 0.3429, "step": 7784 }, { "epoch": 0.3572575834059933, "grad_norm": 0.44957345724105835, "learning_rate": 9.749912585132292e-06, "loss": 0.4048, "step": 7785 }, { "epoch": 0.3573034739112478, "grad_norm": 0.44443538784980774, "learning_rate": 9.749836006481338e-06, "loss": 0.35, "step": 7786 }, { "epoch": 0.3573493644165022, "grad_norm": 0.470224529504776, "learning_rate": 9.749759416408528e-06, "loss": 0.4396, "step": 7787 }, { "epoch": 0.35739525492175667, "grad_norm": 0.47980430722236633, "learning_rate": 9.749682814914046e-06, "loss": 0.4742, "step": 7788 }, { "epoch": 0.35744114542701116, "grad_norm": 0.45683225989341736, "learning_rate": 9.749606201998075e-06, "loss": 0.4464, "step": 7789 }, { "epoch": 0.3574870359322656, "grad_norm": 0.5236878991127014, "learning_rate": 9.7495295776608e-06, "loss": 0.4779, "step": 7790 }, { "epoch": 0.3575329264375201, "grad_norm": 0.5000109672546387, "learning_rate": 9.749452941902405e-06, "loss": 0.4626, "step": 7791 }, { "epoch": 0.35757881694277455, "grad_norm": 0.4409351944923401, "learning_rate": 9.749376294723076e-06, "loss": 0.3602, "step": 7792 }, { "epoch": 0.357624707448029, "grad_norm": 0.45808207988739014, "learning_rate": 9.749299636122996e-06, "loss": 0.3734, "step": 7793 }, { "epoch": 0.3576705979532835, "grad_norm": 0.41938477754592896, "learning_rate": 9.749222966102347e-06, "loss": 0.3579, "step": 7794 }, { "epoch": 0.35771648845853793, "grad_norm": 0.43687528371810913, "learning_rate": 9.749146284661318e-06, "loss": 0.3778, "step": 7795 }, { "epoch": 0.35776237896379237, "grad_norm": 0.8167298436164856, "learning_rate": 9.74906959180009e-06, "loss": 0.411, "step": 7796 }, { "epoch": 0.35780826946904687, "grad_norm": 0.49253273010253906, "learning_rate": 9.74899288751885e-06, "loss": 0.4197, "step": 7797 }, { "epoch": 0.3578541599743013, "grad_norm": 0.47256723046302795, "learning_rate": 9.74891617181778e-06, "loss": 0.4295, "step": 7798 }, { "epoch": 0.35790005047955575, "grad_norm": 0.45953455567359924, "learning_rate": 9.748839444697065e-06, "loss": 0.4315, "step": 7799 }, { "epoch": 0.35794594098481025, "grad_norm": 0.45818227529525757, "learning_rate": 9.748762706156889e-06, "loss": 0.4282, "step": 7800 }, { "epoch": 0.3579918314900647, "grad_norm": 0.4343860149383545, "learning_rate": 9.74868595619744e-06, "loss": 0.365, "step": 7801 }, { "epoch": 0.3580377219953192, "grad_norm": 0.44190502166748047, "learning_rate": 9.748609194818897e-06, "loss": 0.341, "step": 7802 }, { "epoch": 0.35808361250057363, "grad_norm": 0.47795814275741577, "learning_rate": 9.74853242202145e-06, "loss": 0.4164, "step": 7803 }, { "epoch": 0.3581295030058281, "grad_norm": 0.5763908624649048, "learning_rate": 9.748455637805279e-06, "loss": 0.413, "step": 7804 }, { "epoch": 0.3581753935110826, "grad_norm": 0.4751807451248169, "learning_rate": 9.748378842170573e-06, "loss": 0.4114, "step": 7805 }, { "epoch": 0.358221284016337, "grad_norm": 0.5041840076446533, "learning_rate": 9.748302035117513e-06, "loss": 0.4775, "step": 7806 }, { "epoch": 0.35826717452159146, "grad_norm": 0.46125584840774536, "learning_rate": 9.748225216646287e-06, "loss": 0.3733, "step": 7807 }, { "epoch": 0.35831306502684596, "grad_norm": 0.4681585431098938, "learning_rate": 9.748148386757077e-06, "loss": 0.3873, "step": 7808 }, { "epoch": 0.3583589555321004, "grad_norm": 0.4707982540130615, "learning_rate": 9.748071545450068e-06, "loss": 0.3958, "step": 7809 }, { "epoch": 0.3584048460373549, "grad_norm": 0.4922158718109131, "learning_rate": 9.747994692725446e-06, "loss": 0.4335, "step": 7810 }, { "epoch": 0.35845073654260934, "grad_norm": 0.5148698687553406, "learning_rate": 9.747917828583396e-06, "loss": 0.4249, "step": 7811 }, { "epoch": 0.3584966270478638, "grad_norm": 0.5053566098213196, "learning_rate": 9.747840953024102e-06, "loss": 0.4917, "step": 7812 }, { "epoch": 0.3585425175531183, "grad_norm": 0.5171065330505371, "learning_rate": 9.747764066047746e-06, "loss": 0.5039, "step": 7813 }, { "epoch": 0.3585884080583727, "grad_norm": 0.4390053451061249, "learning_rate": 9.74768716765452e-06, "loss": 0.3239, "step": 7814 }, { "epoch": 0.35863429856362716, "grad_norm": 0.46710720658302307, "learning_rate": 9.747610257844602e-06, "loss": 0.4435, "step": 7815 }, { "epoch": 0.35868018906888166, "grad_norm": 0.4621230661869049, "learning_rate": 9.74753333661818e-06, "loss": 0.3884, "step": 7816 }, { "epoch": 0.3587260795741361, "grad_norm": 0.4727157652378082, "learning_rate": 9.747456403975441e-06, "loss": 0.4455, "step": 7817 }, { "epoch": 0.3587719700793906, "grad_norm": 0.4456329345703125, "learning_rate": 9.747379459916566e-06, "loss": 0.4084, "step": 7818 }, { "epoch": 0.35881786058464504, "grad_norm": 0.5070009827613831, "learning_rate": 9.747302504441742e-06, "loss": 0.4945, "step": 7819 }, { "epoch": 0.3588637510898995, "grad_norm": 0.4655400216579437, "learning_rate": 9.747225537551153e-06, "loss": 0.4418, "step": 7820 }, { "epoch": 0.358909641595154, "grad_norm": 0.435698002576828, "learning_rate": 9.747148559244986e-06, "loss": 0.3099, "step": 7821 }, { "epoch": 0.3589555321004084, "grad_norm": 0.4887925982475281, "learning_rate": 9.747071569523423e-06, "loss": 0.4227, "step": 7822 }, { "epoch": 0.35900142260566287, "grad_norm": 0.47531765699386597, "learning_rate": 9.746994568386653e-06, "loss": 0.3816, "step": 7823 }, { "epoch": 0.35904731311091737, "grad_norm": 0.48864513635635376, "learning_rate": 9.746917555834858e-06, "loss": 0.4512, "step": 7824 }, { "epoch": 0.3590932036161718, "grad_norm": 0.46279168128967285, "learning_rate": 9.746840531868228e-06, "loss": 0.3494, "step": 7825 }, { "epoch": 0.35913909412142625, "grad_norm": 0.5045490264892578, "learning_rate": 9.746763496486939e-06, "loss": 0.4512, "step": 7826 }, { "epoch": 0.35918498462668075, "grad_norm": 0.4746337831020355, "learning_rate": 9.746686449691185e-06, "loss": 0.4295, "step": 7827 }, { "epoch": 0.3592308751319352, "grad_norm": 0.4743708074092865, "learning_rate": 9.74660939148115e-06, "loss": 0.418, "step": 7828 }, { "epoch": 0.3592767656371897, "grad_norm": 0.4391838014125824, "learning_rate": 9.746532321857014e-06, "loss": 0.3247, "step": 7829 }, { "epoch": 0.35932265614244413, "grad_norm": 0.4868067800998688, "learning_rate": 9.746455240818967e-06, "loss": 0.4604, "step": 7830 }, { "epoch": 0.3593685466476986, "grad_norm": 0.4518764615058899, "learning_rate": 9.746378148367194e-06, "loss": 0.3991, "step": 7831 }, { "epoch": 0.35941443715295307, "grad_norm": 0.485799640417099, "learning_rate": 9.746301044501877e-06, "loss": 0.461, "step": 7832 }, { "epoch": 0.3594603276582075, "grad_norm": 0.44626227021217346, "learning_rate": 9.746223929223208e-06, "loss": 0.379, "step": 7833 }, { "epoch": 0.35950621816346195, "grad_norm": 0.45438483357429504, "learning_rate": 9.746146802531367e-06, "loss": 0.4061, "step": 7834 }, { "epoch": 0.35955210866871645, "grad_norm": 0.46258604526519775, "learning_rate": 9.746069664426539e-06, "loss": 0.3958, "step": 7835 }, { "epoch": 0.3595979991739709, "grad_norm": 0.47691595554351807, "learning_rate": 9.745992514908912e-06, "loss": 0.4187, "step": 7836 }, { "epoch": 0.3596438896792254, "grad_norm": 0.5064228773117065, "learning_rate": 9.745915353978672e-06, "loss": 0.4322, "step": 7837 }, { "epoch": 0.35968978018447983, "grad_norm": 0.5110331773757935, "learning_rate": 9.745838181636003e-06, "loss": 0.5039, "step": 7838 }, { "epoch": 0.3597356706897343, "grad_norm": 0.505457878112793, "learning_rate": 9.74576099788109e-06, "loss": 0.522, "step": 7839 }, { "epoch": 0.3597815611949888, "grad_norm": 0.45371755957603455, "learning_rate": 9.74568380271412e-06, "loss": 0.389, "step": 7840 }, { "epoch": 0.3598274517002432, "grad_norm": 0.483137845993042, "learning_rate": 9.74560659613528e-06, "loss": 0.4681, "step": 7841 }, { "epoch": 0.35987334220549766, "grad_norm": 0.446307510137558, "learning_rate": 9.745529378144752e-06, "loss": 0.3258, "step": 7842 }, { "epoch": 0.35991923271075216, "grad_norm": 0.49514302611351013, "learning_rate": 9.745452148742725e-06, "loss": 0.498, "step": 7843 }, { "epoch": 0.3599651232160066, "grad_norm": 0.48106908798217773, "learning_rate": 9.745374907929382e-06, "loss": 0.4203, "step": 7844 }, { "epoch": 0.3600110137212611, "grad_norm": 0.4576026201248169, "learning_rate": 9.745297655704911e-06, "loss": 0.4265, "step": 7845 }, { "epoch": 0.36005690422651554, "grad_norm": 0.4870549440383911, "learning_rate": 9.745220392069497e-06, "loss": 0.421, "step": 7846 }, { "epoch": 0.36010279473177, "grad_norm": 0.4612952768802643, "learning_rate": 9.745143117023323e-06, "loss": 0.409, "step": 7847 }, { "epoch": 0.3601486852370245, "grad_norm": 0.46829164028167725, "learning_rate": 9.74506583056658e-06, "loss": 0.3973, "step": 7848 }, { "epoch": 0.3601945757422789, "grad_norm": 0.4556679427623749, "learning_rate": 9.74498853269945e-06, "loss": 0.3589, "step": 7849 }, { "epoch": 0.36024046624753336, "grad_norm": 0.4898398816585541, "learning_rate": 9.744911223422122e-06, "loss": 0.4219, "step": 7850 }, { "epoch": 0.36028635675278786, "grad_norm": 0.4399726986885071, "learning_rate": 9.744833902734778e-06, "loss": 0.3717, "step": 7851 }, { "epoch": 0.3603322472580423, "grad_norm": 0.469087690114975, "learning_rate": 9.744756570637608e-06, "loss": 0.4642, "step": 7852 }, { "epoch": 0.36037813776329675, "grad_norm": 0.5035542249679565, "learning_rate": 9.744679227130793e-06, "loss": 0.4944, "step": 7853 }, { "epoch": 0.36042402826855124, "grad_norm": 0.4788002371788025, "learning_rate": 9.744601872214525e-06, "loss": 0.4799, "step": 7854 }, { "epoch": 0.3604699187738057, "grad_norm": 0.46393391489982605, "learning_rate": 9.744524505888985e-06, "loss": 0.4026, "step": 7855 }, { "epoch": 0.3605158092790602, "grad_norm": 0.4808913469314575, "learning_rate": 9.744447128154362e-06, "loss": 0.3827, "step": 7856 }, { "epoch": 0.3605616997843146, "grad_norm": 0.4478481411933899, "learning_rate": 9.74436973901084e-06, "loss": 0.3692, "step": 7857 }, { "epoch": 0.36060759028956907, "grad_norm": 0.47368311882019043, "learning_rate": 9.744292338458606e-06, "loss": 0.4095, "step": 7858 }, { "epoch": 0.36065348079482357, "grad_norm": 0.4372916519641876, "learning_rate": 9.744214926497849e-06, "loss": 0.3465, "step": 7859 }, { "epoch": 0.360699371300078, "grad_norm": 0.467933714389801, "learning_rate": 9.74413750312875e-06, "loss": 0.4185, "step": 7860 }, { "epoch": 0.36074526180533245, "grad_norm": 0.4576081335544586, "learning_rate": 9.744060068351498e-06, "loss": 0.4262, "step": 7861 }, { "epoch": 0.36079115231058695, "grad_norm": 0.48531052470207214, "learning_rate": 9.743982622166277e-06, "loss": 0.412, "step": 7862 }, { "epoch": 0.3608370428158414, "grad_norm": 0.4853992164134979, "learning_rate": 9.743905164573278e-06, "loss": 0.4408, "step": 7863 }, { "epoch": 0.3608829333210959, "grad_norm": 0.44082745909690857, "learning_rate": 9.743827695572682e-06, "loss": 0.3457, "step": 7864 }, { "epoch": 0.36092882382635033, "grad_norm": 0.45144349336624146, "learning_rate": 9.74375021516468e-06, "loss": 0.3713, "step": 7865 }, { "epoch": 0.3609747143316048, "grad_norm": 0.5377853512763977, "learning_rate": 9.743672723349453e-06, "loss": 0.4886, "step": 7866 }, { "epoch": 0.36102060483685927, "grad_norm": 0.46922898292541504, "learning_rate": 9.743595220127194e-06, "loss": 0.3851, "step": 7867 }, { "epoch": 0.3610664953421137, "grad_norm": 0.46638351678848267, "learning_rate": 9.743517705498083e-06, "loss": 0.3693, "step": 7868 }, { "epoch": 0.36111238584736816, "grad_norm": 0.45565682649612427, "learning_rate": 9.743440179462309e-06, "loss": 0.3911, "step": 7869 }, { "epoch": 0.36115827635262265, "grad_norm": 0.49627816677093506, "learning_rate": 9.743362642020059e-06, "loss": 0.4815, "step": 7870 }, { "epoch": 0.3612041668578771, "grad_norm": 0.46992817521095276, "learning_rate": 9.743285093171518e-06, "loss": 0.4337, "step": 7871 }, { "epoch": 0.3612500573631316, "grad_norm": 0.4777149260044098, "learning_rate": 9.743207532916873e-06, "loss": 0.3943, "step": 7872 }, { "epoch": 0.36129594786838604, "grad_norm": 0.47868648171424866, "learning_rate": 9.743129961256315e-06, "loss": 0.4345, "step": 7873 }, { "epoch": 0.3613418383736405, "grad_norm": 0.509576141834259, "learning_rate": 9.743052378190021e-06, "loss": 0.5466, "step": 7874 }, { "epoch": 0.361387728878895, "grad_norm": 0.45552173256874084, "learning_rate": 9.742974783718187e-06, "loss": 0.4081, "step": 7875 }, { "epoch": 0.3614336193841494, "grad_norm": 0.5026187300682068, "learning_rate": 9.742897177840992e-06, "loss": 0.4541, "step": 7876 }, { "epoch": 0.36147950988940386, "grad_norm": 0.4564884305000305, "learning_rate": 9.74281956055863e-06, "loss": 0.3673, "step": 7877 }, { "epoch": 0.36152540039465836, "grad_norm": 0.4623177945613861, "learning_rate": 9.742741931871282e-06, "loss": 0.3979, "step": 7878 }, { "epoch": 0.3615712908999128, "grad_norm": 0.47553497552871704, "learning_rate": 9.742664291779137e-06, "loss": 0.3921, "step": 7879 }, { "epoch": 0.3616171814051673, "grad_norm": 0.48133906722068787, "learning_rate": 9.742586640282382e-06, "loss": 0.4832, "step": 7880 }, { "epoch": 0.36166307191042174, "grad_norm": 0.4605104327201843, "learning_rate": 9.7425089773812e-06, "loss": 0.3594, "step": 7881 }, { "epoch": 0.3617089624156762, "grad_norm": 0.4825238287448883, "learning_rate": 9.742431303075783e-06, "loss": 0.4512, "step": 7882 }, { "epoch": 0.3617548529209307, "grad_norm": 0.48937928676605225, "learning_rate": 9.742353617366315e-06, "loss": 0.439, "step": 7883 }, { "epoch": 0.3618007434261851, "grad_norm": 0.4327901303768158, "learning_rate": 9.742275920252982e-06, "loss": 0.3527, "step": 7884 }, { "epoch": 0.36184663393143957, "grad_norm": 0.4557718336582184, "learning_rate": 9.742198211735974e-06, "loss": 0.3619, "step": 7885 }, { "epoch": 0.36189252443669406, "grad_norm": 0.48134058713912964, "learning_rate": 9.742120491815475e-06, "loss": 0.4231, "step": 7886 }, { "epoch": 0.3619384149419485, "grad_norm": 0.47641870379447937, "learning_rate": 9.742042760491673e-06, "loss": 0.3821, "step": 7887 }, { "epoch": 0.36198430544720295, "grad_norm": 0.47012123465538025, "learning_rate": 9.741965017764756e-06, "loss": 0.4714, "step": 7888 }, { "epoch": 0.36203019595245745, "grad_norm": 0.4664852023124695, "learning_rate": 9.741887263634909e-06, "loss": 0.4504, "step": 7889 }, { "epoch": 0.3620760864577119, "grad_norm": 0.46899867057800293, "learning_rate": 9.741809498102319e-06, "loss": 0.4597, "step": 7890 }, { "epoch": 0.3621219769629664, "grad_norm": 0.4533671736717224, "learning_rate": 9.741731721167173e-06, "loss": 0.3454, "step": 7891 }, { "epoch": 0.3621678674682208, "grad_norm": 0.4495394229888916, "learning_rate": 9.74165393282966e-06, "loss": 0.3776, "step": 7892 }, { "epoch": 0.36221375797347527, "grad_norm": 0.4663272798061371, "learning_rate": 9.741576133089967e-06, "loss": 0.3986, "step": 7893 }, { "epoch": 0.36225964847872977, "grad_norm": 0.4765564203262329, "learning_rate": 9.741498321948278e-06, "loss": 0.4229, "step": 7894 }, { "epoch": 0.3623055389839842, "grad_norm": 0.45327427983283997, "learning_rate": 9.741420499404782e-06, "loss": 0.4103, "step": 7895 }, { "epoch": 0.36235142948923865, "grad_norm": 0.44875141978263855, "learning_rate": 9.741342665459667e-06, "loss": 0.398, "step": 7896 }, { "epoch": 0.36239731999449315, "grad_norm": 0.47597238421440125, "learning_rate": 9.74126482011312e-06, "loss": 0.43, "step": 7897 }, { "epoch": 0.3624432104997476, "grad_norm": 0.49926310777664185, "learning_rate": 9.741186963365326e-06, "loss": 0.4118, "step": 7898 }, { "epoch": 0.3624891010050021, "grad_norm": 0.4713848829269409, "learning_rate": 9.741109095216473e-06, "loss": 0.409, "step": 7899 }, { "epoch": 0.36253499151025653, "grad_norm": 0.4256654381752014, "learning_rate": 9.74103121566675e-06, "loss": 0.3286, "step": 7900 }, { "epoch": 0.362580882015511, "grad_norm": 0.41809964179992676, "learning_rate": 9.740953324716345e-06, "loss": 0.3188, "step": 7901 }, { "epoch": 0.3626267725207655, "grad_norm": 0.4738941788673401, "learning_rate": 9.74087542236544e-06, "loss": 0.4476, "step": 7902 }, { "epoch": 0.3626726630260199, "grad_norm": 0.6595299243927002, "learning_rate": 9.740797508614228e-06, "loss": 0.3794, "step": 7903 }, { "epoch": 0.36271855353127436, "grad_norm": 0.44186052680015564, "learning_rate": 9.740719583462894e-06, "loss": 0.3737, "step": 7904 }, { "epoch": 0.36276444403652885, "grad_norm": 0.48513317108154297, "learning_rate": 9.740641646911627e-06, "loss": 0.3911, "step": 7905 }, { "epoch": 0.3628103345417833, "grad_norm": 0.4776601493358612, "learning_rate": 9.74056369896061e-06, "loss": 0.4289, "step": 7906 }, { "epoch": 0.3628562250470378, "grad_norm": 0.4962489902973175, "learning_rate": 9.740485739610037e-06, "loss": 0.4146, "step": 7907 }, { "epoch": 0.36290211555229224, "grad_norm": 0.4357406795024872, "learning_rate": 9.74040776886009e-06, "loss": 0.3859, "step": 7908 }, { "epoch": 0.3629480060575467, "grad_norm": 0.5032374262809753, "learning_rate": 9.740329786710959e-06, "loss": 0.4832, "step": 7909 }, { "epoch": 0.3629938965628012, "grad_norm": 0.45092374086380005, "learning_rate": 9.740251793162829e-06, "loss": 0.3764, "step": 7910 }, { "epoch": 0.3630397870680556, "grad_norm": 0.4660680294036865, "learning_rate": 9.74017378821589e-06, "loss": 0.4938, "step": 7911 }, { "epoch": 0.36308567757331006, "grad_norm": 0.4867881238460541, "learning_rate": 9.74009577187033e-06, "loss": 0.4193, "step": 7912 }, { "epoch": 0.36313156807856456, "grad_norm": 0.44200262427330017, "learning_rate": 9.740017744126337e-06, "loss": 0.3725, "step": 7913 }, { "epoch": 0.363177458583819, "grad_norm": 0.45537450909614563, "learning_rate": 9.739939704984095e-06, "loss": 0.3911, "step": 7914 }, { "epoch": 0.36322334908907344, "grad_norm": 0.49914830923080444, "learning_rate": 9.739861654443796e-06, "loss": 0.504, "step": 7915 }, { "epoch": 0.36326923959432794, "grad_norm": 0.4785454273223877, "learning_rate": 9.739783592505625e-06, "loss": 0.4042, "step": 7916 }, { "epoch": 0.3633151300995824, "grad_norm": 0.4819380044937134, "learning_rate": 9.73970551916977e-06, "loss": 0.4635, "step": 7917 }, { "epoch": 0.3633610206048369, "grad_norm": 0.43958884477615356, "learning_rate": 9.739627434436419e-06, "loss": 0.3693, "step": 7918 }, { "epoch": 0.3634069111100913, "grad_norm": 0.47859689593315125, "learning_rate": 9.73954933830576e-06, "loss": 0.4124, "step": 7919 }, { "epoch": 0.36345280161534577, "grad_norm": 0.49066272377967834, "learning_rate": 9.73947123077798e-06, "loss": 0.4944, "step": 7920 }, { "epoch": 0.36349869212060026, "grad_norm": 0.47038328647613525, "learning_rate": 9.739393111853268e-06, "loss": 0.4648, "step": 7921 }, { "epoch": 0.3635445826258547, "grad_norm": 0.4451914429664612, "learning_rate": 9.739314981531811e-06, "loss": 0.3687, "step": 7922 }, { "epoch": 0.36359047313110915, "grad_norm": 0.458329975605011, "learning_rate": 9.739236839813799e-06, "loss": 0.4393, "step": 7923 }, { "epoch": 0.36363636363636365, "grad_norm": 0.4524228274822235, "learning_rate": 9.739158686699417e-06, "loss": 0.3649, "step": 7924 }, { "epoch": 0.3636822541416181, "grad_norm": 0.4961557388305664, "learning_rate": 9.739080522188854e-06, "loss": 0.4529, "step": 7925 }, { "epoch": 0.3637281446468726, "grad_norm": 0.46224445104599, "learning_rate": 9.739002346282298e-06, "loss": 0.4069, "step": 7926 }, { "epoch": 0.36377403515212703, "grad_norm": 0.4642353057861328, "learning_rate": 9.738924158979937e-06, "loss": 0.3756, "step": 7927 }, { "epoch": 0.36381992565738147, "grad_norm": 0.523690402507782, "learning_rate": 9.73884596028196e-06, "loss": 0.4648, "step": 7928 }, { "epoch": 0.36386581616263597, "grad_norm": 0.4337020516395569, "learning_rate": 9.738767750188554e-06, "loss": 0.3333, "step": 7929 }, { "epoch": 0.3639117066678904, "grad_norm": 0.4346909523010254, "learning_rate": 9.738689528699906e-06, "loss": 0.3229, "step": 7930 }, { "epoch": 0.36395759717314485, "grad_norm": 0.4247654676437378, "learning_rate": 9.738611295816205e-06, "loss": 0.3347, "step": 7931 }, { "epoch": 0.36400348767839935, "grad_norm": 0.4586181640625, "learning_rate": 9.73853305153764e-06, "loss": 0.4501, "step": 7932 }, { "epoch": 0.3640493781836538, "grad_norm": 0.4227321147918701, "learning_rate": 9.738454795864398e-06, "loss": 0.3257, "step": 7933 }, { "epoch": 0.3640952686889083, "grad_norm": 0.8993532657623291, "learning_rate": 9.73837652879667e-06, "loss": 0.4287, "step": 7934 }, { "epoch": 0.36414115919416273, "grad_norm": 0.4714774489402771, "learning_rate": 9.738298250334639e-06, "loss": 0.398, "step": 7935 }, { "epoch": 0.3641870496994172, "grad_norm": 0.45705947279930115, "learning_rate": 9.738219960478499e-06, "loss": 0.4146, "step": 7936 }, { "epoch": 0.3642329402046717, "grad_norm": 0.449897825717926, "learning_rate": 9.738141659228433e-06, "loss": 0.3609, "step": 7937 }, { "epoch": 0.3642788307099261, "grad_norm": 0.45911169052124023, "learning_rate": 9.738063346584631e-06, "loss": 0.36, "step": 7938 }, { "epoch": 0.36432472121518056, "grad_norm": 0.46098849177360535, "learning_rate": 9.737985022547285e-06, "loss": 0.3791, "step": 7939 }, { "epoch": 0.36437061172043506, "grad_norm": 0.44855889678001404, "learning_rate": 9.737906687116578e-06, "loss": 0.375, "step": 7940 }, { "epoch": 0.3644165022256895, "grad_norm": 0.479921817779541, "learning_rate": 9.737828340292703e-06, "loss": 0.4512, "step": 7941 }, { "epoch": 0.36446239273094394, "grad_norm": 0.4942409098148346, "learning_rate": 9.737749982075844e-06, "loss": 0.4476, "step": 7942 }, { "epoch": 0.36450828323619844, "grad_norm": 0.5237117409706116, "learning_rate": 9.737671612466192e-06, "loss": 0.409, "step": 7943 }, { "epoch": 0.3645541737414529, "grad_norm": 0.45513567328453064, "learning_rate": 9.737593231463937e-06, "loss": 0.3522, "step": 7944 }, { "epoch": 0.3646000642467074, "grad_norm": 0.5181505680084229, "learning_rate": 9.737514839069263e-06, "loss": 0.5133, "step": 7945 }, { "epoch": 0.3646459547519618, "grad_norm": 0.4719248116016388, "learning_rate": 9.737436435282362e-06, "loss": 0.3977, "step": 7946 }, { "epoch": 0.36469184525721626, "grad_norm": 0.5288018584251404, "learning_rate": 9.737358020103422e-06, "loss": 0.534, "step": 7947 }, { "epoch": 0.36473773576247076, "grad_norm": 0.4950268268585205, "learning_rate": 9.73727959353263e-06, "loss": 0.4654, "step": 7948 }, { "epoch": 0.3647836262677252, "grad_norm": 0.44992390275001526, "learning_rate": 9.737201155570177e-06, "loss": 0.4266, "step": 7949 }, { "epoch": 0.36482951677297965, "grad_norm": 0.4925025403499603, "learning_rate": 9.73712270621625e-06, "loss": 0.4857, "step": 7950 }, { "epoch": 0.36487540727823414, "grad_norm": 0.5202951431274414, "learning_rate": 9.737044245471036e-06, "loss": 0.4753, "step": 7951 }, { "epoch": 0.3649212977834886, "grad_norm": 0.4630596935749054, "learning_rate": 9.736965773334727e-06, "loss": 0.4286, "step": 7952 }, { "epoch": 0.3649671882887431, "grad_norm": 0.4617745280265808, "learning_rate": 9.736887289807511e-06, "loss": 0.4045, "step": 7953 }, { "epoch": 0.3650130787939975, "grad_norm": 0.4143560230731964, "learning_rate": 9.736808794889577e-06, "loss": 0.2965, "step": 7954 }, { "epoch": 0.36505896929925197, "grad_norm": 0.5091694593429565, "learning_rate": 9.736730288581111e-06, "loss": 0.4693, "step": 7955 }, { "epoch": 0.36510485980450647, "grad_norm": 0.4266557991504669, "learning_rate": 9.736651770882304e-06, "loss": 0.3079, "step": 7956 }, { "epoch": 0.3651507503097609, "grad_norm": 0.5005029439926147, "learning_rate": 9.736573241793345e-06, "loss": 0.4579, "step": 7957 }, { "epoch": 0.36519664081501535, "grad_norm": 0.4957512319087982, "learning_rate": 9.736494701314421e-06, "loss": 0.4787, "step": 7958 }, { "epoch": 0.36524253132026985, "grad_norm": 0.4641667306423187, "learning_rate": 9.736416149445724e-06, "loss": 0.3949, "step": 7959 }, { "epoch": 0.3652884218255243, "grad_norm": 0.4726676642894745, "learning_rate": 9.736337586187438e-06, "loss": 0.3537, "step": 7960 }, { "epoch": 0.3653343123307788, "grad_norm": 0.505027174949646, "learning_rate": 9.736259011539758e-06, "loss": 0.452, "step": 7961 }, { "epoch": 0.36538020283603323, "grad_norm": 0.4358639717102051, "learning_rate": 9.73618042550287e-06, "loss": 0.3539, "step": 7962 }, { "epoch": 0.36542609334128767, "grad_norm": 0.47085753083229065, "learning_rate": 9.73610182807696e-06, "loss": 0.4127, "step": 7963 }, { "epoch": 0.36547198384654217, "grad_norm": 0.5275803208351135, "learning_rate": 9.736023219262223e-06, "loss": 0.4694, "step": 7964 }, { "epoch": 0.3655178743517966, "grad_norm": 1.7177000045776367, "learning_rate": 9.735944599058842e-06, "loss": 0.3908, "step": 7965 }, { "epoch": 0.36556376485705105, "grad_norm": 0.4439907670021057, "learning_rate": 9.735865967467011e-06, "loss": 0.3698, "step": 7966 }, { "epoch": 0.36560965536230555, "grad_norm": 0.4635026454925537, "learning_rate": 9.735787324486916e-06, "loss": 0.3921, "step": 7967 }, { "epoch": 0.36565554586756, "grad_norm": 0.6005432605743408, "learning_rate": 9.735708670118748e-06, "loss": 0.5426, "step": 7968 }, { "epoch": 0.3657014363728145, "grad_norm": 0.47062450647354126, "learning_rate": 9.735630004362695e-06, "loss": 0.3658, "step": 7969 }, { "epoch": 0.36574732687806893, "grad_norm": 0.46867257356643677, "learning_rate": 9.735551327218946e-06, "loss": 0.411, "step": 7970 }, { "epoch": 0.3657932173833234, "grad_norm": 0.5301876664161682, "learning_rate": 9.735472638687691e-06, "loss": 0.5041, "step": 7971 }, { "epoch": 0.3658391078885779, "grad_norm": 0.4820837080478668, "learning_rate": 9.735393938769118e-06, "loss": 0.3985, "step": 7972 }, { "epoch": 0.3658849983938323, "grad_norm": 0.45955127477645874, "learning_rate": 9.735315227463418e-06, "loss": 0.3785, "step": 7973 }, { "epoch": 0.36593088889908676, "grad_norm": 0.4670088589191437, "learning_rate": 9.73523650477078e-06, "loss": 0.3965, "step": 7974 }, { "epoch": 0.36597677940434126, "grad_norm": 0.4967685639858246, "learning_rate": 9.735157770691391e-06, "loss": 0.4598, "step": 7975 }, { "epoch": 0.3660226699095957, "grad_norm": 0.49250465631484985, "learning_rate": 9.735079025225443e-06, "loss": 0.459, "step": 7976 }, { "epoch": 0.36606856041485014, "grad_norm": 0.47690075635910034, "learning_rate": 9.735000268373125e-06, "loss": 0.396, "step": 7977 }, { "epoch": 0.36611445092010464, "grad_norm": 0.4766157269477844, "learning_rate": 9.734921500134623e-06, "loss": 0.4069, "step": 7978 }, { "epoch": 0.3661603414253591, "grad_norm": 0.45853912830352783, "learning_rate": 9.734842720510132e-06, "loss": 0.3928, "step": 7979 }, { "epoch": 0.3662062319306136, "grad_norm": 0.4406105577945709, "learning_rate": 9.734763929499839e-06, "loss": 0.4138, "step": 7980 }, { "epoch": 0.366252122435868, "grad_norm": 0.49110254645347595, "learning_rate": 9.734685127103932e-06, "loss": 0.4184, "step": 7981 }, { "epoch": 0.36629801294112246, "grad_norm": 0.4742239713668823, "learning_rate": 9.734606313322601e-06, "loss": 0.3729, "step": 7982 }, { "epoch": 0.36634390344637696, "grad_norm": 0.486428827047348, "learning_rate": 9.734527488156037e-06, "loss": 0.4794, "step": 7983 }, { "epoch": 0.3663897939516314, "grad_norm": 0.4881732165813446, "learning_rate": 9.734448651604427e-06, "loss": 0.5084, "step": 7984 }, { "epoch": 0.36643568445688585, "grad_norm": 0.4637189507484436, "learning_rate": 9.734369803667965e-06, "loss": 0.4171, "step": 7985 }, { "epoch": 0.36648157496214034, "grad_norm": 0.5018987655639648, "learning_rate": 9.734290944346837e-06, "loss": 0.439, "step": 7986 }, { "epoch": 0.3665274654673948, "grad_norm": 0.497918963432312, "learning_rate": 9.734212073641231e-06, "loss": 0.4926, "step": 7987 }, { "epoch": 0.3665733559726493, "grad_norm": 0.42361322045326233, "learning_rate": 9.734133191551342e-06, "loss": 0.3401, "step": 7988 }, { "epoch": 0.3666192464779037, "grad_norm": 0.4947502613067627, "learning_rate": 9.734054298077355e-06, "loss": 0.3858, "step": 7989 }, { "epoch": 0.36666513698315817, "grad_norm": 0.43561750650405884, "learning_rate": 9.733975393219462e-06, "loss": 0.3477, "step": 7990 }, { "epoch": 0.36671102748841267, "grad_norm": 0.45899152755737305, "learning_rate": 9.733896476977853e-06, "loss": 0.414, "step": 7991 }, { "epoch": 0.3667569179936671, "grad_norm": 0.45467379689216614, "learning_rate": 9.733817549352716e-06, "loss": 0.39, "step": 7992 }, { "epoch": 0.36680280849892155, "grad_norm": 0.4570685625076294, "learning_rate": 9.733738610344244e-06, "loss": 0.3725, "step": 7993 }, { "epoch": 0.36684869900417605, "grad_norm": 0.4900420904159546, "learning_rate": 9.733659659952621e-06, "loss": 0.4643, "step": 7994 }, { "epoch": 0.3668945895094305, "grad_norm": 0.4762815535068512, "learning_rate": 9.733580698178045e-06, "loss": 0.409, "step": 7995 }, { "epoch": 0.366940480014685, "grad_norm": 0.44649678468704224, "learning_rate": 9.733501725020698e-06, "loss": 0.3446, "step": 7996 }, { "epoch": 0.36698637051993943, "grad_norm": 0.4945557415485382, "learning_rate": 9.733422740480774e-06, "loss": 0.4329, "step": 7997 }, { "epoch": 0.3670322610251939, "grad_norm": 0.4655408561229706, "learning_rate": 9.733343744558462e-06, "loss": 0.4047, "step": 7998 }, { "epoch": 0.36707815153044837, "grad_norm": 0.46442753076553345, "learning_rate": 9.733264737253954e-06, "loss": 0.4269, "step": 7999 }, { "epoch": 0.3671240420357028, "grad_norm": 0.49488914012908936, "learning_rate": 9.733185718567435e-06, "loss": 0.4925, "step": 8000 }, { "epoch": 0.36716993254095726, "grad_norm": 0.45052027702331543, "learning_rate": 9.733106688499101e-06, "loss": 0.3999, "step": 8001 }, { "epoch": 0.36721582304621175, "grad_norm": 0.4414416551589966, "learning_rate": 9.733027647049138e-06, "loss": 0.4014, "step": 8002 }, { "epoch": 0.3672617135514662, "grad_norm": 0.4591817259788513, "learning_rate": 9.732948594217737e-06, "loss": 0.4023, "step": 8003 }, { "epoch": 0.36730760405672064, "grad_norm": 0.4903697669506073, "learning_rate": 9.73286953000509e-06, "loss": 0.419, "step": 8004 }, { "epoch": 0.36735349456197514, "grad_norm": 0.45190897583961487, "learning_rate": 9.732790454411384e-06, "loss": 0.3792, "step": 8005 }, { "epoch": 0.3673993850672296, "grad_norm": 0.4193302392959595, "learning_rate": 9.732711367436812e-06, "loss": 0.3274, "step": 8006 }, { "epoch": 0.3674452755724841, "grad_norm": 0.4411812722682953, "learning_rate": 9.73263226908156e-06, "loss": 0.3432, "step": 8007 }, { "epoch": 0.3674911660777385, "grad_norm": 0.44763392210006714, "learning_rate": 9.732553159345824e-06, "loss": 0.3955, "step": 8008 }, { "epoch": 0.36753705658299296, "grad_norm": 0.4963451325893402, "learning_rate": 9.73247403822979e-06, "loss": 0.3898, "step": 8009 }, { "epoch": 0.36758294708824746, "grad_norm": 0.44995906949043274, "learning_rate": 9.732394905733649e-06, "loss": 0.3448, "step": 8010 }, { "epoch": 0.3676288375935019, "grad_norm": 0.42833393812179565, "learning_rate": 9.732315761857594e-06, "loss": 0.3531, "step": 8011 }, { "epoch": 0.36767472809875634, "grad_norm": 0.48283034563064575, "learning_rate": 9.73223660660181e-06, "loss": 0.4453, "step": 8012 }, { "epoch": 0.36772061860401084, "grad_norm": 0.5067793726921082, "learning_rate": 9.732157439966494e-06, "loss": 0.5105, "step": 8013 }, { "epoch": 0.3677665091092653, "grad_norm": 0.46469753980636597, "learning_rate": 9.73207826195183e-06, "loss": 0.4428, "step": 8014 }, { "epoch": 0.3678123996145198, "grad_norm": 0.518373966217041, "learning_rate": 9.731999072558014e-06, "loss": 0.4657, "step": 8015 }, { "epoch": 0.3678582901197742, "grad_norm": 0.4774475395679474, "learning_rate": 9.731919871785231e-06, "loss": 0.4674, "step": 8016 }, { "epoch": 0.36790418062502867, "grad_norm": 0.4660840928554535, "learning_rate": 9.731840659633675e-06, "loss": 0.4101, "step": 8017 }, { "epoch": 0.36795007113028316, "grad_norm": 0.4802314043045044, "learning_rate": 9.731761436103536e-06, "loss": 0.4052, "step": 8018 }, { "epoch": 0.3679959616355376, "grad_norm": 0.48872867226600647, "learning_rate": 9.731682201195005e-06, "loss": 0.4028, "step": 8019 }, { "epoch": 0.36804185214079205, "grad_norm": 0.4654121696949005, "learning_rate": 9.73160295490827e-06, "loss": 0.4347, "step": 8020 }, { "epoch": 0.36808774264604655, "grad_norm": 0.4395206868648529, "learning_rate": 9.731523697243523e-06, "loss": 0.3254, "step": 8021 }, { "epoch": 0.368133633151301, "grad_norm": 0.5039449334144592, "learning_rate": 9.731444428200957e-06, "loss": 0.5234, "step": 8022 }, { "epoch": 0.3681795236565555, "grad_norm": 0.4755762219429016, "learning_rate": 9.73136514778076e-06, "loss": 0.481, "step": 8023 }, { "epoch": 0.3682254141618099, "grad_norm": 0.4889451563358307, "learning_rate": 9.731285855983121e-06, "loss": 0.3608, "step": 8024 }, { "epoch": 0.36827130466706437, "grad_norm": 0.47623538970947266, "learning_rate": 9.731206552808234e-06, "loss": 0.4812, "step": 8025 }, { "epoch": 0.36831719517231887, "grad_norm": 0.4962598979473114, "learning_rate": 9.731127238256288e-06, "loss": 0.474, "step": 8026 }, { "epoch": 0.3683630856775733, "grad_norm": 0.4775947034358978, "learning_rate": 9.731047912327475e-06, "loss": 0.4698, "step": 8027 }, { "epoch": 0.36840897618282775, "grad_norm": 0.4568853974342346, "learning_rate": 9.730968575021985e-06, "loss": 0.4123, "step": 8028 }, { "epoch": 0.36845486668808225, "grad_norm": 0.4713488817214966, "learning_rate": 9.730889226340007e-06, "loss": 0.4179, "step": 8029 }, { "epoch": 0.3685007571933367, "grad_norm": 0.4416423439979553, "learning_rate": 9.730809866281737e-06, "loss": 0.3621, "step": 8030 }, { "epoch": 0.36854664769859113, "grad_norm": 0.46662425994873047, "learning_rate": 9.73073049484736e-06, "loss": 0.3324, "step": 8031 }, { "epoch": 0.36859253820384563, "grad_norm": 0.4579404592514038, "learning_rate": 9.730651112037069e-06, "loss": 0.4361, "step": 8032 }, { "epoch": 0.3686384287091001, "grad_norm": 0.4758075475692749, "learning_rate": 9.730571717851055e-06, "loss": 0.3991, "step": 8033 }, { "epoch": 0.36868431921435457, "grad_norm": 0.45792677998542786, "learning_rate": 9.73049231228951e-06, "loss": 0.3729, "step": 8034 }, { "epoch": 0.368730209719609, "grad_norm": 0.4402088224887848, "learning_rate": 9.730412895352623e-06, "loss": 0.42, "step": 8035 }, { "epoch": 0.36877610022486346, "grad_norm": 0.5387215614318848, "learning_rate": 9.730333467040587e-06, "loss": 0.4696, "step": 8036 }, { "epoch": 0.36882199073011795, "grad_norm": 0.45912817120552063, "learning_rate": 9.730254027353592e-06, "loss": 0.4138, "step": 8037 }, { "epoch": 0.3688678812353724, "grad_norm": 0.5113257169723511, "learning_rate": 9.730174576291828e-06, "loss": 0.4988, "step": 8038 }, { "epoch": 0.36891377174062684, "grad_norm": 0.47522401809692383, "learning_rate": 9.730095113855487e-06, "loss": 0.4432, "step": 8039 }, { "epoch": 0.36895966224588134, "grad_norm": 0.4611656069755554, "learning_rate": 9.73001564004476e-06, "loss": 0.3948, "step": 8040 }, { "epoch": 0.3690055527511358, "grad_norm": 0.4605605900287628, "learning_rate": 9.729936154859839e-06, "loss": 0.4, "step": 8041 }, { "epoch": 0.3690514432563903, "grad_norm": 0.5019216537475586, "learning_rate": 9.729856658300914e-06, "loss": 0.4318, "step": 8042 }, { "epoch": 0.3690973337616447, "grad_norm": 0.4993398189544678, "learning_rate": 9.729777150368177e-06, "loss": 0.4073, "step": 8043 }, { "epoch": 0.36914322426689916, "grad_norm": 0.41586291790008545, "learning_rate": 9.729697631061819e-06, "loss": 0.3428, "step": 8044 }, { "epoch": 0.36918911477215366, "grad_norm": 0.4678192436695099, "learning_rate": 9.72961810038203e-06, "loss": 0.3977, "step": 8045 }, { "epoch": 0.3692350052774081, "grad_norm": 0.8091902732849121, "learning_rate": 9.729538558329002e-06, "loss": 0.5191, "step": 8046 }, { "epoch": 0.36928089578266254, "grad_norm": 0.45810991525650024, "learning_rate": 9.729459004902927e-06, "loss": 0.3857, "step": 8047 }, { "epoch": 0.36932678628791704, "grad_norm": 0.46541959047317505, "learning_rate": 9.729379440103996e-06, "loss": 0.4144, "step": 8048 }, { "epoch": 0.3693726767931715, "grad_norm": 0.49193358421325684, "learning_rate": 9.7292998639324e-06, "loss": 0.3851, "step": 8049 }, { "epoch": 0.369418567298426, "grad_norm": 0.4602566957473755, "learning_rate": 9.72922027638833e-06, "loss": 0.3705, "step": 8050 }, { "epoch": 0.3694644578036804, "grad_norm": 0.46869969367980957, "learning_rate": 9.729140677471978e-06, "loss": 0.401, "step": 8051 }, { "epoch": 0.36951034830893487, "grad_norm": 0.4285276234149933, "learning_rate": 9.729061067183534e-06, "loss": 0.336, "step": 8052 }, { "epoch": 0.36955623881418936, "grad_norm": 0.45687344670295715, "learning_rate": 9.728981445523193e-06, "loss": 0.3566, "step": 8053 }, { "epoch": 0.3696021293194438, "grad_norm": 0.47448474168777466, "learning_rate": 9.728901812491142e-06, "loss": 0.3981, "step": 8054 }, { "epoch": 0.36964801982469825, "grad_norm": 0.44643843173980713, "learning_rate": 9.728822168087577e-06, "loss": 0.3645, "step": 8055 }, { "epoch": 0.36969391032995275, "grad_norm": 0.42920738458633423, "learning_rate": 9.728742512312686e-06, "loss": 0.3616, "step": 8056 }, { "epoch": 0.3697398008352072, "grad_norm": 0.46367207169532776, "learning_rate": 9.728662845166661e-06, "loss": 0.4826, "step": 8057 }, { "epoch": 0.36978569134046163, "grad_norm": 0.482564777135849, "learning_rate": 9.728583166649694e-06, "loss": 0.3564, "step": 8058 }, { "epoch": 0.36983158184571613, "grad_norm": 0.5418311953544617, "learning_rate": 9.728503476761978e-06, "loss": 0.5025, "step": 8059 }, { "epoch": 0.36987747235097057, "grad_norm": 0.4505927562713623, "learning_rate": 9.728423775503703e-06, "loss": 0.399, "step": 8060 }, { "epoch": 0.36992336285622507, "grad_norm": 0.4269683361053467, "learning_rate": 9.728344062875061e-06, "loss": 0.3556, "step": 8061 }, { "epoch": 0.3699692533614795, "grad_norm": 0.5010607242584229, "learning_rate": 9.728264338876245e-06, "loss": 0.3637, "step": 8062 }, { "epoch": 0.37001514386673395, "grad_norm": 0.4785168468952179, "learning_rate": 9.728184603507445e-06, "loss": 0.4574, "step": 8063 }, { "epoch": 0.37006103437198845, "grad_norm": 0.4363779127597809, "learning_rate": 9.728104856768853e-06, "loss": 0.3622, "step": 8064 }, { "epoch": 0.3701069248772429, "grad_norm": 0.4038386046886444, "learning_rate": 9.72802509866066e-06, "loss": 0.3071, "step": 8065 }, { "epoch": 0.37015281538249734, "grad_norm": 0.4707005023956299, "learning_rate": 9.72794532918306e-06, "loss": 0.4239, "step": 8066 }, { "epoch": 0.37019870588775183, "grad_norm": 0.4823363125324249, "learning_rate": 9.727865548336245e-06, "loss": 0.4239, "step": 8067 }, { "epoch": 0.3702445963930063, "grad_norm": 0.450297474861145, "learning_rate": 9.727785756120402e-06, "loss": 0.3783, "step": 8068 }, { "epoch": 0.3702904868982608, "grad_norm": 0.5109229683876038, "learning_rate": 9.72770595253573e-06, "loss": 0.4809, "step": 8069 }, { "epoch": 0.3703363774035152, "grad_norm": 0.4691585600376129, "learning_rate": 9.727626137582416e-06, "loss": 0.3848, "step": 8070 }, { "epoch": 0.37038226790876966, "grad_norm": 0.46388256549835205, "learning_rate": 9.727546311260651e-06, "loss": 0.4518, "step": 8071 }, { "epoch": 0.37042815841402416, "grad_norm": 0.47018569707870483, "learning_rate": 9.727466473570632e-06, "loss": 0.3919, "step": 8072 }, { "epoch": 0.3704740489192786, "grad_norm": 0.43438851833343506, "learning_rate": 9.727386624512546e-06, "loss": 0.3402, "step": 8073 }, { "epoch": 0.37051993942453304, "grad_norm": 0.4712080955505371, "learning_rate": 9.727306764086591e-06, "loss": 0.4324, "step": 8074 }, { "epoch": 0.37056582992978754, "grad_norm": 0.48610761761665344, "learning_rate": 9.727226892292952e-06, "loss": 0.4226, "step": 8075 }, { "epoch": 0.370611720435042, "grad_norm": 0.4481717050075531, "learning_rate": 9.727147009131824e-06, "loss": 0.3749, "step": 8076 }, { "epoch": 0.3706576109402965, "grad_norm": 0.4812678098678589, "learning_rate": 9.727067114603402e-06, "loss": 0.4426, "step": 8077 }, { "epoch": 0.3707035014455509, "grad_norm": 0.44761893153190613, "learning_rate": 9.726987208707873e-06, "loss": 0.3825, "step": 8078 }, { "epoch": 0.37074939195080536, "grad_norm": 0.4644887149333954, "learning_rate": 9.72690729144543e-06, "loss": 0.4108, "step": 8079 }, { "epoch": 0.37079528245605986, "grad_norm": 0.4892721176147461, "learning_rate": 9.726827362816272e-06, "loss": 0.4612, "step": 8080 }, { "epoch": 0.3708411729613143, "grad_norm": 0.47281813621520996, "learning_rate": 9.726747422820582e-06, "loss": 0.3941, "step": 8081 }, { "epoch": 0.37088706346656874, "grad_norm": 0.4691774249076843, "learning_rate": 9.726667471458558e-06, "loss": 0.4512, "step": 8082 }, { "epoch": 0.37093295397182324, "grad_norm": 0.4633810818195343, "learning_rate": 9.726587508730389e-06, "loss": 0.4224, "step": 8083 }, { "epoch": 0.3709788444770777, "grad_norm": 0.45314016938209534, "learning_rate": 9.726507534636271e-06, "loss": 0.4136, "step": 8084 }, { "epoch": 0.3710247349823322, "grad_norm": 0.45336097478866577, "learning_rate": 9.726427549176393e-06, "loss": 0.4142, "step": 8085 }, { "epoch": 0.3710706254875866, "grad_norm": 0.4467010796070099, "learning_rate": 9.726347552350948e-06, "loss": 0.3816, "step": 8086 }, { "epoch": 0.37111651599284107, "grad_norm": 0.45809540152549744, "learning_rate": 9.72626754416013e-06, "loss": 0.4042, "step": 8087 }, { "epoch": 0.37116240649809557, "grad_norm": 0.526657223701477, "learning_rate": 9.72618752460413e-06, "loss": 0.3665, "step": 8088 }, { "epoch": 0.37120829700335, "grad_norm": 0.48596763610839844, "learning_rate": 9.726107493683139e-06, "loss": 0.4757, "step": 8089 }, { "epoch": 0.37125418750860445, "grad_norm": 0.4452357590198517, "learning_rate": 9.726027451397351e-06, "loss": 0.4281, "step": 8090 }, { "epoch": 0.37130007801385895, "grad_norm": 0.441429078578949, "learning_rate": 9.725947397746961e-06, "loss": 0.349, "step": 8091 }, { "epoch": 0.3713459685191134, "grad_norm": 0.517781674861908, "learning_rate": 9.725867332732157e-06, "loss": 0.5259, "step": 8092 }, { "epoch": 0.37139185902436783, "grad_norm": 0.487958699464798, "learning_rate": 9.725787256353134e-06, "loss": 0.4962, "step": 8093 }, { "epoch": 0.37143774952962233, "grad_norm": 0.46005934476852417, "learning_rate": 9.725707168610085e-06, "loss": 0.3649, "step": 8094 }, { "epoch": 0.37148364003487677, "grad_norm": 0.48686784505844116, "learning_rate": 9.725627069503201e-06, "loss": 0.42, "step": 8095 }, { "epoch": 0.37152953054013127, "grad_norm": 0.45250093936920166, "learning_rate": 9.725546959032676e-06, "loss": 0.3806, "step": 8096 }, { "epoch": 0.3715754210453857, "grad_norm": 0.4368855953216553, "learning_rate": 9.725466837198702e-06, "loss": 0.3566, "step": 8097 }, { "epoch": 0.37162131155064015, "grad_norm": 0.49231866002082825, "learning_rate": 9.725386704001471e-06, "loss": 0.451, "step": 8098 }, { "epoch": 0.37166720205589465, "grad_norm": 0.45820948481559753, "learning_rate": 9.725306559441177e-06, "loss": 0.3865, "step": 8099 }, { "epoch": 0.3717130925611491, "grad_norm": 0.4619249999523163, "learning_rate": 9.725226403518011e-06, "loss": 0.4132, "step": 8100 }, { "epoch": 0.37175898306640354, "grad_norm": 0.4838564097881317, "learning_rate": 9.725146236232168e-06, "loss": 0.4788, "step": 8101 }, { "epoch": 0.37180487357165803, "grad_norm": 0.43584656715393066, "learning_rate": 9.725066057583839e-06, "loss": 0.3823, "step": 8102 }, { "epoch": 0.3718507640769125, "grad_norm": 0.43821296095848083, "learning_rate": 9.724985867573218e-06, "loss": 0.3435, "step": 8103 }, { "epoch": 0.371896654582167, "grad_norm": 0.4871059060096741, "learning_rate": 9.724905666200496e-06, "loss": 0.431, "step": 8104 }, { "epoch": 0.3719425450874214, "grad_norm": 0.47945672273635864, "learning_rate": 9.724825453465867e-06, "loss": 0.4234, "step": 8105 }, { "epoch": 0.37198843559267586, "grad_norm": 0.46462875604629517, "learning_rate": 9.724745229369526e-06, "loss": 0.3931, "step": 8106 }, { "epoch": 0.37203432609793036, "grad_norm": 0.4686892330646515, "learning_rate": 9.724664993911662e-06, "loss": 0.3871, "step": 8107 }, { "epoch": 0.3720802166031848, "grad_norm": 0.4590422213077545, "learning_rate": 9.724584747092471e-06, "loss": 0.3704, "step": 8108 }, { "epoch": 0.37212610710843924, "grad_norm": 0.43814969062805176, "learning_rate": 9.724504488912144e-06, "loss": 0.3134, "step": 8109 }, { "epoch": 0.37217199761369374, "grad_norm": 0.4633161723613739, "learning_rate": 9.724424219370876e-06, "loss": 0.3491, "step": 8110 }, { "epoch": 0.3722178881189482, "grad_norm": 0.4515477418899536, "learning_rate": 9.724343938468857e-06, "loss": 0.3521, "step": 8111 }, { "epoch": 0.3722637786242027, "grad_norm": 0.4554186761379242, "learning_rate": 9.724263646206284e-06, "loss": 0.4451, "step": 8112 }, { "epoch": 0.3723096691294571, "grad_norm": 0.47241196036338806, "learning_rate": 9.724183342583347e-06, "loss": 0.3444, "step": 8113 }, { "epoch": 0.37235555963471156, "grad_norm": 0.44862687587738037, "learning_rate": 9.724103027600239e-06, "loss": 0.3639, "step": 8114 }, { "epoch": 0.37240145013996606, "grad_norm": 0.4485190510749817, "learning_rate": 9.724022701257155e-06, "loss": 0.3434, "step": 8115 }, { "epoch": 0.3724473406452205, "grad_norm": 0.4773459732532501, "learning_rate": 9.723942363554288e-06, "loss": 0.4407, "step": 8116 }, { "epoch": 0.37249323115047495, "grad_norm": 0.46536871790885925, "learning_rate": 9.723862014491829e-06, "loss": 0.4085, "step": 8117 }, { "epoch": 0.37253912165572944, "grad_norm": 0.4543834924697876, "learning_rate": 9.723781654069973e-06, "loss": 0.334, "step": 8118 }, { "epoch": 0.3725850121609839, "grad_norm": 0.4743187725543976, "learning_rate": 9.723701282288914e-06, "loss": 0.4437, "step": 8119 }, { "epoch": 0.37263090266623833, "grad_norm": 0.5024212002754211, "learning_rate": 9.723620899148844e-06, "loss": 0.4955, "step": 8120 }, { "epoch": 0.3726767931714928, "grad_norm": 0.47235605120658875, "learning_rate": 9.723540504649957e-06, "loss": 0.4826, "step": 8121 }, { "epoch": 0.37272268367674727, "grad_norm": 0.45504891872406006, "learning_rate": 9.723460098792446e-06, "loss": 0.354, "step": 8122 }, { "epoch": 0.37276857418200177, "grad_norm": 0.46430978178977966, "learning_rate": 9.723379681576504e-06, "loss": 0.4321, "step": 8123 }, { "epoch": 0.3728144646872562, "grad_norm": 0.4982248842716217, "learning_rate": 9.723299253002324e-06, "loss": 0.4926, "step": 8124 }, { "epoch": 0.37286035519251065, "grad_norm": 0.44990095496177673, "learning_rate": 9.723218813070099e-06, "loss": 0.3483, "step": 8125 }, { "epoch": 0.37290624569776515, "grad_norm": 0.42564651370048523, "learning_rate": 9.723138361780024e-06, "loss": 0.3379, "step": 8126 }, { "epoch": 0.3729521362030196, "grad_norm": 0.427450031042099, "learning_rate": 9.723057899132293e-06, "loss": 0.3139, "step": 8127 }, { "epoch": 0.37299802670827403, "grad_norm": 0.4523105025291443, "learning_rate": 9.722977425127099e-06, "loss": 0.438, "step": 8128 }, { "epoch": 0.37304391721352853, "grad_norm": 0.4399386942386627, "learning_rate": 9.722896939764634e-06, "loss": 0.3582, "step": 8129 }, { "epoch": 0.373089807718783, "grad_norm": 0.5143728256225586, "learning_rate": 9.722816443045091e-06, "loss": 0.4195, "step": 8130 }, { "epoch": 0.37313569822403747, "grad_norm": 0.4279252588748932, "learning_rate": 9.722735934968667e-06, "loss": 0.3418, "step": 8131 }, { "epoch": 0.3731815887292919, "grad_norm": 0.44557589292526245, "learning_rate": 9.722655415535554e-06, "loss": 0.3673, "step": 8132 }, { "epoch": 0.37322747923454636, "grad_norm": 0.4313417673110962, "learning_rate": 9.722574884745944e-06, "loss": 0.3499, "step": 8133 }, { "epoch": 0.37327336973980085, "grad_norm": 0.45331770181655884, "learning_rate": 9.72249434260003e-06, "loss": 0.4123, "step": 8134 }, { "epoch": 0.3733192602450553, "grad_norm": 0.49247288703918457, "learning_rate": 9.72241378909801e-06, "loss": 0.497, "step": 8135 }, { "epoch": 0.37336515075030974, "grad_norm": 0.5737965703010559, "learning_rate": 9.722333224240074e-06, "loss": 0.3849, "step": 8136 }, { "epoch": 0.37341104125556424, "grad_norm": 0.48814693093299866, "learning_rate": 9.722252648026421e-06, "loss": 0.4537, "step": 8137 }, { "epoch": 0.3734569317608187, "grad_norm": 0.4780582785606384, "learning_rate": 9.722172060457236e-06, "loss": 0.4296, "step": 8138 }, { "epoch": 0.3735028222660732, "grad_norm": 0.4632754921913147, "learning_rate": 9.722091461532718e-06, "loss": 0.3281, "step": 8139 }, { "epoch": 0.3735487127713276, "grad_norm": 0.46364662051200867, "learning_rate": 9.722010851253064e-06, "loss": 0.3693, "step": 8140 }, { "epoch": 0.37359460327658206, "grad_norm": 0.45502471923828125, "learning_rate": 9.721930229618461e-06, "loss": 0.3474, "step": 8141 }, { "epoch": 0.37364049378183656, "grad_norm": 0.5249725580215454, "learning_rate": 9.721849596629106e-06, "loss": 0.4211, "step": 8142 }, { "epoch": 0.373686384287091, "grad_norm": 0.4458678364753723, "learning_rate": 9.721768952285195e-06, "loss": 0.3686, "step": 8143 }, { "epoch": 0.37373227479234544, "grad_norm": 0.46885237097740173, "learning_rate": 9.721688296586917e-06, "loss": 0.3746, "step": 8144 }, { "epoch": 0.37377816529759994, "grad_norm": 0.4442320764064789, "learning_rate": 9.721607629534473e-06, "loss": 0.3726, "step": 8145 }, { "epoch": 0.3738240558028544, "grad_norm": 0.4607435464859009, "learning_rate": 9.72152695112805e-06, "loss": 0.3858, "step": 8146 }, { "epoch": 0.3738699463081088, "grad_norm": 0.45024415850639343, "learning_rate": 9.721446261367844e-06, "loss": 0.3547, "step": 8147 }, { "epoch": 0.3739158368133633, "grad_norm": 0.4726341962814331, "learning_rate": 9.721365560254052e-06, "loss": 0.4038, "step": 8148 }, { "epoch": 0.37396172731861776, "grad_norm": 0.4787328243255615, "learning_rate": 9.721284847786865e-06, "loss": 0.3894, "step": 8149 }, { "epoch": 0.37400761782387226, "grad_norm": 0.4535009562969208, "learning_rate": 9.721204123966479e-06, "loss": 0.4018, "step": 8150 }, { "epoch": 0.3740535083291267, "grad_norm": 0.4782507121562958, "learning_rate": 9.721123388793085e-06, "loss": 0.4268, "step": 8151 }, { "epoch": 0.37409939883438115, "grad_norm": 0.481377512216568, "learning_rate": 9.72104264226688e-06, "loss": 0.3855, "step": 8152 }, { "epoch": 0.37414528933963564, "grad_norm": 0.47644633054733276, "learning_rate": 9.720961884388058e-06, "loss": 0.3527, "step": 8153 }, { "epoch": 0.3741911798448901, "grad_norm": 0.44543230533599854, "learning_rate": 9.720881115156813e-06, "loss": 0.3561, "step": 8154 }, { "epoch": 0.37423707035014453, "grad_norm": 0.4528518319129944, "learning_rate": 9.720800334573339e-06, "loss": 0.346, "step": 8155 }, { "epoch": 0.374282960855399, "grad_norm": 0.4602299630641937, "learning_rate": 9.72071954263783e-06, "loss": 0.3263, "step": 8156 }, { "epoch": 0.37432885136065347, "grad_norm": 0.4406090974807739, "learning_rate": 9.720638739350479e-06, "loss": 0.3746, "step": 8157 }, { "epoch": 0.37437474186590797, "grad_norm": 0.42417415976524353, "learning_rate": 9.720557924711483e-06, "loss": 0.3472, "step": 8158 }, { "epoch": 0.3744206323711624, "grad_norm": 0.41825437545776367, "learning_rate": 9.720477098721034e-06, "loss": 0.3116, "step": 8159 }, { "epoch": 0.37446652287641685, "grad_norm": 0.48889315128326416, "learning_rate": 9.72039626137933e-06, "loss": 0.422, "step": 8160 }, { "epoch": 0.37451241338167135, "grad_norm": 0.4643462002277374, "learning_rate": 9.720315412686561e-06, "loss": 0.3873, "step": 8161 }, { "epoch": 0.3745583038869258, "grad_norm": 0.4061015248298645, "learning_rate": 9.720234552642921e-06, "loss": 0.3423, "step": 8162 }, { "epoch": 0.37460419439218023, "grad_norm": 0.4500768780708313, "learning_rate": 9.720153681248612e-06, "loss": 0.3727, "step": 8163 }, { "epoch": 0.37465008489743473, "grad_norm": 0.4545755088329315, "learning_rate": 9.72007279850382e-06, "loss": 0.3921, "step": 8164 }, { "epoch": 0.3746959754026892, "grad_norm": 0.49224841594696045, "learning_rate": 9.71999190440874e-06, "loss": 0.4213, "step": 8165 }, { "epoch": 0.37474186590794367, "grad_norm": 0.48643729090690613, "learning_rate": 9.719910998963574e-06, "loss": 0.4964, "step": 8166 }, { "epoch": 0.3747877564131981, "grad_norm": 0.5026622414588928, "learning_rate": 9.719830082168508e-06, "loss": 0.5113, "step": 8167 }, { "epoch": 0.37483364691845256, "grad_norm": 0.45205119252204895, "learning_rate": 9.719749154023743e-06, "loss": 0.3411, "step": 8168 }, { "epoch": 0.37487953742370705, "grad_norm": 0.46454983949661255, "learning_rate": 9.719668214529468e-06, "loss": 0.3569, "step": 8169 }, { "epoch": 0.3749254279289615, "grad_norm": 0.44269686937332153, "learning_rate": 9.719587263685882e-06, "loss": 0.3974, "step": 8170 }, { "epoch": 0.37497131843421594, "grad_norm": 0.5285033583641052, "learning_rate": 9.719506301493179e-06, "loss": 0.4325, "step": 8171 }, { "epoch": 0.37501720893947044, "grad_norm": 0.45297661423683167, "learning_rate": 9.719425327951552e-06, "loss": 0.3534, "step": 8172 }, { "epoch": 0.3750630994447249, "grad_norm": 0.4935588240623474, "learning_rate": 9.719344343061197e-06, "loss": 0.4017, "step": 8173 }, { "epoch": 0.3751089899499794, "grad_norm": 0.4304918646812439, "learning_rate": 9.719263346822308e-06, "loss": 0.3535, "step": 8174 }, { "epoch": 0.3751548804552338, "grad_norm": 0.4949699640274048, "learning_rate": 9.71918233923508e-06, "loss": 0.4339, "step": 8175 }, { "epoch": 0.37520077096048826, "grad_norm": 0.4781631827354431, "learning_rate": 9.719101320299708e-06, "loss": 0.4488, "step": 8176 }, { "epoch": 0.37524666146574276, "grad_norm": 0.4888651669025421, "learning_rate": 9.719020290016386e-06, "loss": 0.4003, "step": 8177 }, { "epoch": 0.3752925519709972, "grad_norm": 0.4683087170124054, "learning_rate": 9.718939248385311e-06, "loss": 0.4143, "step": 8178 }, { "epoch": 0.37533844247625164, "grad_norm": 0.46588730812072754, "learning_rate": 9.718858195406675e-06, "loss": 0.388, "step": 8179 }, { "epoch": 0.37538433298150614, "grad_norm": 0.4829517602920532, "learning_rate": 9.718777131080676e-06, "loss": 0.4694, "step": 8180 }, { "epoch": 0.3754302234867606, "grad_norm": 0.4469701945781708, "learning_rate": 9.718696055407506e-06, "loss": 0.3786, "step": 8181 }, { "epoch": 0.375476113992015, "grad_norm": 0.5979227423667908, "learning_rate": 9.718614968387361e-06, "loss": 0.5714, "step": 8182 }, { "epoch": 0.3755220044972695, "grad_norm": 0.45635634660720825, "learning_rate": 9.718533870020436e-06, "loss": 0.3049, "step": 8183 }, { "epoch": 0.37556789500252397, "grad_norm": 0.45526766777038574, "learning_rate": 9.718452760306928e-06, "loss": 0.3959, "step": 8184 }, { "epoch": 0.37561378550777846, "grad_norm": 0.4375819265842438, "learning_rate": 9.718371639247031e-06, "loss": 0.332, "step": 8185 }, { "epoch": 0.3756596760130329, "grad_norm": 0.49725911021232605, "learning_rate": 9.718290506840937e-06, "loss": 0.4112, "step": 8186 }, { "epoch": 0.37570556651828735, "grad_norm": 0.5130476355552673, "learning_rate": 9.718209363088844e-06, "loss": 0.3664, "step": 8187 }, { "epoch": 0.37575145702354185, "grad_norm": 0.48302939534187317, "learning_rate": 9.718128207990947e-06, "loss": 0.3991, "step": 8188 }, { "epoch": 0.3757973475287963, "grad_norm": 0.4441150426864624, "learning_rate": 9.71804704154744e-06, "loss": 0.3049, "step": 8189 }, { "epoch": 0.37584323803405073, "grad_norm": 0.43720924854278564, "learning_rate": 9.717965863758521e-06, "loss": 0.3473, "step": 8190 }, { "epoch": 0.37588912853930523, "grad_norm": 0.45557448267936707, "learning_rate": 9.71788467462438e-06, "loss": 0.3919, "step": 8191 }, { "epoch": 0.37593501904455967, "grad_norm": 0.46237924695014954, "learning_rate": 9.717803474145217e-06, "loss": 0.4151, "step": 8192 }, { "epoch": 0.37598090954981417, "grad_norm": 0.4438866078853607, "learning_rate": 9.717722262321227e-06, "loss": 0.3338, "step": 8193 }, { "epoch": 0.3760268000550686, "grad_norm": 0.5185517072677612, "learning_rate": 9.717641039152603e-06, "loss": 0.5049, "step": 8194 }, { "epoch": 0.37607269056032305, "grad_norm": 0.4654289186000824, "learning_rate": 9.71755980463954e-06, "loss": 0.3356, "step": 8195 }, { "epoch": 0.37611858106557755, "grad_norm": 0.47097015380859375, "learning_rate": 9.717478558782236e-06, "loss": 0.3879, "step": 8196 }, { "epoch": 0.376164471570832, "grad_norm": 0.5115286707878113, "learning_rate": 9.717397301580885e-06, "loss": 0.5489, "step": 8197 }, { "epoch": 0.37621036207608644, "grad_norm": 0.4676591157913208, "learning_rate": 9.717316033035679e-06, "loss": 0.3835, "step": 8198 }, { "epoch": 0.37625625258134093, "grad_norm": 0.44990721344947815, "learning_rate": 9.71723475314682e-06, "loss": 0.3384, "step": 8199 }, { "epoch": 0.3763021430865954, "grad_norm": 0.48706746101379395, "learning_rate": 9.7171534619145e-06, "loss": 0.4235, "step": 8200 }, { "epoch": 0.3763480335918499, "grad_norm": 0.48613953590393066, "learning_rate": 9.717072159338914e-06, "loss": 0.4891, "step": 8201 }, { "epoch": 0.3763939240971043, "grad_norm": 0.5020565390586853, "learning_rate": 9.716990845420259e-06, "loss": 0.4717, "step": 8202 }, { "epoch": 0.37643981460235876, "grad_norm": 0.4507388770580292, "learning_rate": 9.71690952015873e-06, "loss": 0.3763, "step": 8203 }, { "epoch": 0.37648570510761326, "grad_norm": 0.47889065742492676, "learning_rate": 9.716828183554521e-06, "loss": 0.4376, "step": 8204 }, { "epoch": 0.3765315956128677, "grad_norm": 0.460827112197876, "learning_rate": 9.716746835607829e-06, "loss": 0.3815, "step": 8205 }, { "epoch": 0.37657748611812214, "grad_norm": 0.4715343117713928, "learning_rate": 9.716665476318851e-06, "loss": 0.3768, "step": 8206 }, { "epoch": 0.37662337662337664, "grad_norm": 0.449844628572464, "learning_rate": 9.71658410568778e-06, "loss": 0.3932, "step": 8207 }, { "epoch": 0.3766692671286311, "grad_norm": 0.46386027336120605, "learning_rate": 9.716502723714813e-06, "loss": 0.4176, "step": 8208 }, { "epoch": 0.3767151576338855, "grad_norm": 0.4416763484477997, "learning_rate": 9.716421330400146e-06, "loss": 0.3637, "step": 8209 }, { "epoch": 0.37676104813914, "grad_norm": 0.44814369082450867, "learning_rate": 9.716339925743972e-06, "loss": 0.3526, "step": 8210 }, { "epoch": 0.37680693864439446, "grad_norm": 0.48289379477500916, "learning_rate": 9.716258509746492e-06, "loss": 0.4202, "step": 8211 }, { "epoch": 0.37685282914964896, "grad_norm": 0.49049267172813416, "learning_rate": 9.716177082407897e-06, "loss": 0.4116, "step": 8212 }, { "epoch": 0.3768987196549034, "grad_norm": 0.4465479850769043, "learning_rate": 9.716095643728385e-06, "loss": 0.3506, "step": 8213 }, { "epoch": 0.37694461016015784, "grad_norm": 0.47375378012657166, "learning_rate": 9.716014193708153e-06, "loss": 0.4015, "step": 8214 }, { "epoch": 0.37699050066541234, "grad_norm": 0.47693172097206116, "learning_rate": 9.715932732347392e-06, "loss": 0.4315, "step": 8215 }, { "epoch": 0.3770363911706668, "grad_norm": 0.47348132729530334, "learning_rate": 9.715851259646302e-06, "loss": 0.4407, "step": 8216 }, { "epoch": 0.3770822816759212, "grad_norm": 0.48790687322616577, "learning_rate": 9.715769775605081e-06, "loss": 0.4909, "step": 8217 }, { "epoch": 0.3771281721811757, "grad_norm": 0.418795645236969, "learning_rate": 9.71568828022392e-06, "loss": 0.338, "step": 8218 }, { "epoch": 0.37717406268643017, "grad_norm": 0.4578612446784973, "learning_rate": 9.715606773503017e-06, "loss": 0.3925, "step": 8219 }, { "epoch": 0.37721995319168466, "grad_norm": 0.45661234855651855, "learning_rate": 9.715525255442569e-06, "loss": 0.3501, "step": 8220 }, { "epoch": 0.3772658436969391, "grad_norm": 0.4631122350692749, "learning_rate": 9.715443726042767e-06, "loss": 0.4087, "step": 8221 }, { "epoch": 0.37731173420219355, "grad_norm": 0.44347864389419556, "learning_rate": 9.715362185303817e-06, "loss": 0.3735, "step": 8222 }, { "epoch": 0.37735762470744805, "grad_norm": 0.4790317118167877, "learning_rate": 9.715280633225906e-06, "loss": 0.4146, "step": 8223 }, { "epoch": 0.3774035152127025, "grad_norm": 0.4675402045249939, "learning_rate": 9.715199069809234e-06, "loss": 0.4484, "step": 8224 }, { "epoch": 0.37744940571795693, "grad_norm": 0.4524977505207062, "learning_rate": 9.715117495053996e-06, "loss": 0.3488, "step": 8225 }, { "epoch": 0.37749529622321143, "grad_norm": 0.43830931186676025, "learning_rate": 9.715035908960387e-06, "loss": 0.3731, "step": 8226 }, { "epoch": 0.37754118672846587, "grad_norm": 0.46418023109436035, "learning_rate": 9.714954311528608e-06, "loss": 0.4149, "step": 8227 }, { "epoch": 0.37758707723372037, "grad_norm": 0.5092841982841492, "learning_rate": 9.714872702758848e-06, "loss": 0.4191, "step": 8228 }, { "epoch": 0.3776329677389748, "grad_norm": 0.4970175325870514, "learning_rate": 9.714791082651311e-06, "loss": 0.4023, "step": 8229 }, { "epoch": 0.37767885824422925, "grad_norm": 0.4844402074813843, "learning_rate": 9.714709451206187e-06, "loss": 0.4583, "step": 8230 }, { "epoch": 0.37772474874948375, "grad_norm": 0.4584631025791168, "learning_rate": 9.714627808423675e-06, "loss": 0.3986, "step": 8231 }, { "epoch": 0.3777706392547382, "grad_norm": 0.4738438129425049, "learning_rate": 9.714546154303972e-06, "loss": 0.3893, "step": 8232 }, { "epoch": 0.37781652975999264, "grad_norm": 0.4953833818435669, "learning_rate": 9.714464488847274e-06, "loss": 0.4488, "step": 8233 }, { "epoch": 0.37786242026524713, "grad_norm": 0.4003993570804596, "learning_rate": 9.714382812053777e-06, "loss": 0.281, "step": 8234 }, { "epoch": 0.3779083107705016, "grad_norm": 0.5398240685462952, "learning_rate": 9.714301123923675e-06, "loss": 0.4551, "step": 8235 }, { "epoch": 0.377954201275756, "grad_norm": 0.5457197427749634, "learning_rate": 9.71421942445717e-06, "loss": 0.4189, "step": 8236 }, { "epoch": 0.3780000917810105, "grad_norm": 0.4626288414001465, "learning_rate": 9.714137713654452e-06, "loss": 0.3555, "step": 8237 }, { "epoch": 0.37804598228626496, "grad_norm": 0.41474246978759766, "learning_rate": 9.714055991515722e-06, "loss": 0.3186, "step": 8238 }, { "epoch": 0.37809187279151946, "grad_norm": 0.456434428691864, "learning_rate": 9.713974258041175e-06, "loss": 0.4133, "step": 8239 }, { "epoch": 0.3781377632967739, "grad_norm": 0.40926459431648254, "learning_rate": 9.713892513231007e-06, "loss": 0.2841, "step": 8240 }, { "epoch": 0.37818365380202834, "grad_norm": 0.4456164538860321, "learning_rate": 9.713810757085415e-06, "loss": 0.3693, "step": 8241 }, { "epoch": 0.37822954430728284, "grad_norm": 0.43123140931129456, "learning_rate": 9.713728989604598e-06, "loss": 0.3622, "step": 8242 }, { "epoch": 0.3782754348125373, "grad_norm": 0.4761943221092224, "learning_rate": 9.71364721078875e-06, "loss": 0.4717, "step": 8243 }, { "epoch": 0.3783213253177917, "grad_norm": 0.4528585374355316, "learning_rate": 9.713565420638067e-06, "loss": 0.4023, "step": 8244 }, { "epoch": 0.3783672158230462, "grad_norm": 0.49487781524658203, "learning_rate": 9.713483619152746e-06, "loss": 0.4649, "step": 8245 }, { "epoch": 0.37841310632830066, "grad_norm": 0.4666244685649872, "learning_rate": 9.713401806332986e-06, "loss": 0.365, "step": 8246 }, { "epoch": 0.37845899683355516, "grad_norm": 0.46442312002182007, "learning_rate": 9.713319982178981e-06, "loss": 0.3467, "step": 8247 }, { "epoch": 0.3785048873388096, "grad_norm": 0.4666978120803833, "learning_rate": 9.71323814669093e-06, "loss": 0.439, "step": 8248 }, { "epoch": 0.37855077784406405, "grad_norm": 0.464617520570755, "learning_rate": 9.713156299869028e-06, "loss": 0.4004, "step": 8249 }, { "epoch": 0.37859666834931854, "grad_norm": 0.5115286111831665, "learning_rate": 9.713074441713475e-06, "loss": 0.4607, "step": 8250 }, { "epoch": 0.378642558854573, "grad_norm": 0.4645697772502899, "learning_rate": 9.712992572224463e-06, "loss": 0.4062, "step": 8251 }, { "epoch": 0.37868844935982743, "grad_norm": 0.52003014087677, "learning_rate": 9.712910691402194e-06, "loss": 0.498, "step": 8252 }, { "epoch": 0.3787343398650819, "grad_norm": 0.440660297870636, "learning_rate": 9.712828799246858e-06, "loss": 0.3562, "step": 8253 }, { "epoch": 0.37878023037033637, "grad_norm": 0.472378134727478, "learning_rate": 9.71274689575866e-06, "loss": 0.4419, "step": 8254 }, { "epoch": 0.37882612087559087, "grad_norm": 0.45117640495300293, "learning_rate": 9.712664980937791e-06, "loss": 0.4224, "step": 8255 }, { "epoch": 0.3788720113808453, "grad_norm": 0.5475156903266907, "learning_rate": 9.71258305478445e-06, "loss": 0.5655, "step": 8256 }, { "epoch": 0.37891790188609975, "grad_norm": 0.5003858208656311, "learning_rate": 9.712501117298835e-06, "loss": 0.4582, "step": 8257 }, { "epoch": 0.37896379239135425, "grad_norm": 0.4168681800365448, "learning_rate": 9.712419168481142e-06, "loss": 0.3305, "step": 8258 }, { "epoch": 0.3790096828966087, "grad_norm": 0.5392186045646667, "learning_rate": 9.712337208331568e-06, "loss": 0.4708, "step": 8259 }, { "epoch": 0.37905557340186313, "grad_norm": 0.49951058626174927, "learning_rate": 9.71225523685031e-06, "loss": 0.4676, "step": 8260 }, { "epoch": 0.37910146390711763, "grad_norm": 0.4714858829975128, "learning_rate": 9.712173254037567e-06, "loss": 0.4164, "step": 8261 }, { "epoch": 0.3791473544123721, "grad_norm": 0.5070141553878784, "learning_rate": 9.712091259893532e-06, "loss": 0.4381, "step": 8262 }, { "epoch": 0.37919324491762657, "grad_norm": 0.4539435803890228, "learning_rate": 9.712009254418407e-06, "loss": 0.3811, "step": 8263 }, { "epoch": 0.379239135422881, "grad_norm": 0.4992663264274597, "learning_rate": 9.711927237612386e-06, "loss": 0.4175, "step": 8264 }, { "epoch": 0.37928502592813546, "grad_norm": 0.46159428358078003, "learning_rate": 9.711845209475665e-06, "loss": 0.3727, "step": 8265 }, { "epoch": 0.37933091643338995, "grad_norm": 0.44057777523994446, "learning_rate": 9.711763170008447e-06, "loss": 0.3485, "step": 8266 }, { "epoch": 0.3793768069386444, "grad_norm": 0.46408411860466003, "learning_rate": 9.711681119210923e-06, "loss": 0.36, "step": 8267 }, { "epoch": 0.37942269744389884, "grad_norm": 0.471848726272583, "learning_rate": 9.711599057083294e-06, "loss": 0.3848, "step": 8268 }, { "epoch": 0.37946858794915334, "grad_norm": 0.4797298312187195, "learning_rate": 9.711516983625757e-06, "loss": 0.3684, "step": 8269 }, { "epoch": 0.3795144784544078, "grad_norm": 0.8952170610427856, "learning_rate": 9.711434898838508e-06, "loss": 0.4252, "step": 8270 }, { "epoch": 0.3795603689596622, "grad_norm": 0.4285070300102234, "learning_rate": 9.711352802721744e-06, "loss": 0.3429, "step": 8271 }, { "epoch": 0.3796062594649167, "grad_norm": 0.4940952658653259, "learning_rate": 9.711270695275666e-06, "loss": 0.4656, "step": 8272 }, { "epoch": 0.37965214997017116, "grad_norm": 0.5163105130195618, "learning_rate": 9.711188576500465e-06, "loss": 0.4868, "step": 8273 }, { "epoch": 0.37969804047542566, "grad_norm": 0.4435252547264099, "learning_rate": 9.711106446396345e-06, "loss": 0.3222, "step": 8274 }, { "epoch": 0.3797439309806801, "grad_norm": 0.4323263466358185, "learning_rate": 9.7110243049635e-06, "loss": 0.3481, "step": 8275 }, { "epoch": 0.37978982148593454, "grad_norm": 0.45938247442245483, "learning_rate": 9.71094215220213e-06, "loss": 0.3663, "step": 8276 }, { "epoch": 0.37983571199118904, "grad_norm": 0.5354594588279724, "learning_rate": 9.710859988112429e-06, "loss": 0.4911, "step": 8277 }, { "epoch": 0.3798816024964435, "grad_norm": 0.479200541973114, "learning_rate": 9.710777812694598e-06, "loss": 0.4471, "step": 8278 }, { "epoch": 0.3799274930016979, "grad_norm": 0.44190752506256104, "learning_rate": 9.710695625948832e-06, "loss": 0.3578, "step": 8279 }, { "epoch": 0.3799733835069524, "grad_norm": 0.45690789818763733, "learning_rate": 9.710613427875331e-06, "loss": 0.3934, "step": 8280 }, { "epoch": 0.38001927401220686, "grad_norm": 0.47667229175567627, "learning_rate": 9.710531218474291e-06, "loss": 0.4304, "step": 8281 }, { "epoch": 0.38006516451746136, "grad_norm": 0.4858250319957733, "learning_rate": 9.71044899774591e-06, "loss": 0.4131, "step": 8282 }, { "epoch": 0.3801110550227158, "grad_norm": 0.4979327321052551, "learning_rate": 9.710366765690384e-06, "loss": 0.4392, "step": 8283 }, { "epoch": 0.38015694552797025, "grad_norm": 0.49216383695602417, "learning_rate": 9.710284522307914e-06, "loss": 0.4238, "step": 8284 }, { "epoch": 0.38020283603322474, "grad_norm": 0.41371646523475647, "learning_rate": 9.710202267598697e-06, "loss": 0.3432, "step": 8285 }, { "epoch": 0.3802487265384792, "grad_norm": 0.4837815761566162, "learning_rate": 9.71012000156293e-06, "loss": 0.4219, "step": 8286 }, { "epoch": 0.38029461704373363, "grad_norm": 0.45832619071006775, "learning_rate": 9.71003772420081e-06, "loss": 0.352, "step": 8287 }, { "epoch": 0.3803405075489881, "grad_norm": 0.4829460382461548, "learning_rate": 9.709955435512538e-06, "loss": 0.4202, "step": 8288 }, { "epoch": 0.38038639805424257, "grad_norm": 0.4729141891002655, "learning_rate": 9.709873135498307e-06, "loss": 0.4027, "step": 8289 }, { "epoch": 0.38043228855949707, "grad_norm": 0.5346100926399231, "learning_rate": 9.709790824158318e-06, "loss": 0.4894, "step": 8290 }, { "epoch": 0.3804781790647515, "grad_norm": 0.5437172055244446, "learning_rate": 9.70970850149277e-06, "loss": 0.5451, "step": 8291 }, { "epoch": 0.38052406957000595, "grad_norm": 0.45044800639152527, "learning_rate": 9.709626167501858e-06, "loss": 0.3225, "step": 8292 }, { "epoch": 0.38056996007526045, "grad_norm": 0.4613552689552307, "learning_rate": 9.709543822185781e-06, "loss": 0.4099, "step": 8293 }, { "epoch": 0.3806158505805149, "grad_norm": 0.47125422954559326, "learning_rate": 9.70946146554474e-06, "loss": 0.2868, "step": 8294 }, { "epoch": 0.38066174108576933, "grad_norm": 0.4469430446624756, "learning_rate": 9.709379097578929e-06, "loss": 0.3564, "step": 8295 }, { "epoch": 0.38070763159102383, "grad_norm": 0.5077095627784729, "learning_rate": 9.709296718288548e-06, "loss": 0.4828, "step": 8296 }, { "epoch": 0.3807535220962783, "grad_norm": 0.4666096866130829, "learning_rate": 9.709214327673795e-06, "loss": 0.4056, "step": 8297 }, { "epoch": 0.3807994126015327, "grad_norm": 0.4739914834499359, "learning_rate": 9.709131925734866e-06, "loss": 0.3924, "step": 8298 }, { "epoch": 0.3808453031067872, "grad_norm": 0.46668705344200134, "learning_rate": 9.709049512471962e-06, "loss": 0.3901, "step": 8299 }, { "epoch": 0.38089119361204166, "grad_norm": 0.4913577735424042, "learning_rate": 9.70896708788528e-06, "loss": 0.4143, "step": 8300 }, { "epoch": 0.38093708411729615, "grad_norm": 0.4827563762664795, "learning_rate": 9.70888465197502e-06, "loss": 0.4219, "step": 8301 }, { "epoch": 0.3809829746225506, "grad_norm": 0.44982078671455383, "learning_rate": 9.708802204741377e-06, "loss": 0.3535, "step": 8302 }, { "epoch": 0.38102886512780504, "grad_norm": 0.4478099048137665, "learning_rate": 9.70871974618455e-06, "loss": 0.4027, "step": 8303 }, { "epoch": 0.38107475563305954, "grad_norm": 0.48432034254074097, "learning_rate": 9.70863727630474e-06, "loss": 0.4367, "step": 8304 }, { "epoch": 0.381120646138314, "grad_norm": 0.4797355532646179, "learning_rate": 9.708554795102141e-06, "loss": 0.4354, "step": 8305 }, { "epoch": 0.3811665366435684, "grad_norm": 0.4524461030960083, "learning_rate": 9.708472302576957e-06, "loss": 0.4401, "step": 8306 }, { "epoch": 0.3812124271488229, "grad_norm": 0.45739978551864624, "learning_rate": 9.70838979872938e-06, "loss": 0.3756, "step": 8307 }, { "epoch": 0.38125831765407736, "grad_norm": 0.48678842186927795, "learning_rate": 9.708307283559614e-06, "loss": 0.483, "step": 8308 }, { "epoch": 0.38130420815933186, "grad_norm": 0.4844021499156952, "learning_rate": 9.708224757067854e-06, "loss": 0.4392, "step": 8309 }, { "epoch": 0.3813500986645863, "grad_norm": 0.5173817276954651, "learning_rate": 9.7081422192543e-06, "loss": 0.4988, "step": 8310 }, { "epoch": 0.38139598916984074, "grad_norm": 0.4789606034755707, "learning_rate": 9.70805967011915e-06, "loss": 0.4551, "step": 8311 }, { "epoch": 0.38144187967509524, "grad_norm": 0.42956599593162537, "learning_rate": 9.7079771096626e-06, "loss": 0.3246, "step": 8312 }, { "epoch": 0.3814877701803497, "grad_norm": 0.49112486839294434, "learning_rate": 9.707894537884854e-06, "loss": 0.4657, "step": 8313 }, { "epoch": 0.3815336606856041, "grad_norm": 0.45202475786209106, "learning_rate": 9.707811954786106e-06, "loss": 0.4416, "step": 8314 }, { "epoch": 0.3815795511908586, "grad_norm": 0.4294533133506775, "learning_rate": 9.707729360366555e-06, "loss": 0.3074, "step": 8315 }, { "epoch": 0.38162544169611307, "grad_norm": 0.4875158965587616, "learning_rate": 9.707646754626404e-06, "loss": 0.4598, "step": 8316 }, { "epoch": 0.38167133220136756, "grad_norm": 0.4753047227859497, "learning_rate": 9.707564137565845e-06, "loss": 0.4554, "step": 8317 }, { "epoch": 0.381717222706622, "grad_norm": 0.44825419783592224, "learning_rate": 9.707481509185082e-06, "loss": 0.4122, "step": 8318 }, { "epoch": 0.38176311321187645, "grad_norm": 0.43962204456329346, "learning_rate": 9.70739886948431e-06, "loss": 0.3636, "step": 8319 }, { "epoch": 0.38180900371713095, "grad_norm": 0.4450395405292511, "learning_rate": 9.70731621846373e-06, "loss": 0.3952, "step": 8320 }, { "epoch": 0.3818548942223854, "grad_norm": 0.4353902041912079, "learning_rate": 9.707233556123542e-06, "loss": 0.3541, "step": 8321 }, { "epoch": 0.38190078472763983, "grad_norm": 0.43953320384025574, "learning_rate": 9.70715088246394e-06, "loss": 0.3376, "step": 8322 }, { "epoch": 0.38194667523289433, "grad_norm": 0.4470181167125702, "learning_rate": 9.707068197485128e-06, "loss": 0.3843, "step": 8323 }, { "epoch": 0.38199256573814877, "grad_norm": 0.47696831822395325, "learning_rate": 9.706985501187302e-06, "loss": 0.4273, "step": 8324 }, { "epoch": 0.3820384562434032, "grad_norm": 0.49272671341896057, "learning_rate": 9.706902793570661e-06, "loss": 0.5135, "step": 8325 }, { "epoch": 0.3820843467486577, "grad_norm": 0.4752046465873718, "learning_rate": 9.706820074635405e-06, "loss": 0.4341, "step": 8326 }, { "epoch": 0.38213023725391215, "grad_norm": 0.4485021233558655, "learning_rate": 9.706737344381732e-06, "loss": 0.3836, "step": 8327 }, { "epoch": 0.38217612775916665, "grad_norm": 0.44785410165786743, "learning_rate": 9.706654602809839e-06, "loss": 0.3488, "step": 8328 }, { "epoch": 0.3822220182644211, "grad_norm": 0.464387983083725, "learning_rate": 9.70657184991993e-06, "loss": 0.396, "step": 8329 }, { "epoch": 0.38226790876967554, "grad_norm": 0.48688462376594543, "learning_rate": 9.7064890857122e-06, "loss": 0.4341, "step": 8330 }, { "epoch": 0.38231379927493003, "grad_norm": 0.42955482006073, "learning_rate": 9.70640631018685e-06, "loss": 0.3605, "step": 8331 }, { "epoch": 0.3823596897801845, "grad_norm": 0.434162974357605, "learning_rate": 9.706323523344077e-06, "loss": 0.3592, "step": 8332 }, { "epoch": 0.3824055802854389, "grad_norm": 0.49073097109794617, "learning_rate": 9.706240725184083e-06, "loss": 0.4413, "step": 8333 }, { "epoch": 0.3824514707906934, "grad_norm": 0.4110638201236725, "learning_rate": 9.706157915707064e-06, "loss": 0.2926, "step": 8334 }, { "epoch": 0.38249736129594786, "grad_norm": 0.4811594486236572, "learning_rate": 9.706075094913222e-06, "loss": 0.4186, "step": 8335 }, { "epoch": 0.38254325180120236, "grad_norm": 0.4778303802013397, "learning_rate": 9.705992262802753e-06, "loss": 0.4189, "step": 8336 }, { "epoch": 0.3825891423064568, "grad_norm": 0.4732328951358795, "learning_rate": 9.705909419375859e-06, "loss": 0.4317, "step": 8337 }, { "epoch": 0.38263503281171124, "grad_norm": 0.45637059211730957, "learning_rate": 9.705826564632736e-06, "loss": 0.3987, "step": 8338 }, { "epoch": 0.38268092331696574, "grad_norm": 0.44102656841278076, "learning_rate": 9.705743698573586e-06, "loss": 0.3642, "step": 8339 }, { "epoch": 0.3827268138222202, "grad_norm": 0.4993564188480377, "learning_rate": 9.70566082119861e-06, "loss": 0.4381, "step": 8340 }, { "epoch": 0.3827727043274746, "grad_norm": 0.4486505687236786, "learning_rate": 9.705577932508004e-06, "loss": 0.3664, "step": 8341 }, { "epoch": 0.3828185948327291, "grad_norm": 0.4665676951408386, "learning_rate": 9.705495032501966e-06, "loss": 0.3959, "step": 8342 }, { "epoch": 0.38286448533798356, "grad_norm": 0.5160298347473145, "learning_rate": 9.705412121180699e-06, "loss": 0.5282, "step": 8343 }, { "epoch": 0.38291037584323806, "grad_norm": 0.4632991850376129, "learning_rate": 9.7053291985444e-06, "loss": 0.4255, "step": 8344 }, { "epoch": 0.3829562663484925, "grad_norm": 0.46649202704429626, "learning_rate": 9.705246264593271e-06, "loss": 0.4196, "step": 8345 }, { "epoch": 0.38300215685374694, "grad_norm": 0.4724917411804199, "learning_rate": 9.705163319327508e-06, "loss": 0.4375, "step": 8346 }, { "epoch": 0.38304804735900144, "grad_norm": 0.4525358974933624, "learning_rate": 9.705080362747314e-06, "loss": 0.3612, "step": 8347 }, { "epoch": 0.3830939378642559, "grad_norm": 0.443567156791687, "learning_rate": 9.704997394852887e-06, "loss": 0.3399, "step": 8348 }, { "epoch": 0.3831398283695103, "grad_norm": 0.4305554926395416, "learning_rate": 9.704914415644425e-06, "loss": 0.3343, "step": 8349 }, { "epoch": 0.3831857188747648, "grad_norm": 0.44661766290664673, "learning_rate": 9.704831425122128e-06, "loss": 0.3465, "step": 8350 }, { "epoch": 0.38323160938001927, "grad_norm": 0.4470031261444092, "learning_rate": 9.704748423286196e-06, "loss": 0.3771, "step": 8351 }, { "epoch": 0.3832774998852737, "grad_norm": 0.4797007441520691, "learning_rate": 9.70466541013683e-06, "loss": 0.4049, "step": 8352 }, { "epoch": 0.3833233903905282, "grad_norm": 0.49246424436569214, "learning_rate": 9.70458238567423e-06, "loss": 0.4165, "step": 8353 }, { "epoch": 0.38336928089578265, "grad_norm": 0.448569118976593, "learning_rate": 9.704499349898593e-06, "loss": 0.3709, "step": 8354 }, { "epoch": 0.38341517140103715, "grad_norm": 0.43227648735046387, "learning_rate": 9.704416302810118e-06, "loss": 0.3323, "step": 8355 }, { "epoch": 0.3834610619062916, "grad_norm": 0.5418111681938171, "learning_rate": 9.70433324440901e-06, "loss": 0.4384, "step": 8356 }, { "epoch": 0.38350695241154603, "grad_norm": 0.5218394994735718, "learning_rate": 9.704250174695463e-06, "loss": 0.4795, "step": 8357 }, { "epoch": 0.38355284291680053, "grad_norm": 0.4497133493423462, "learning_rate": 9.704167093669678e-06, "loss": 0.3759, "step": 8358 }, { "epoch": 0.38359873342205497, "grad_norm": 0.47627773880958557, "learning_rate": 9.704084001331858e-06, "loss": 0.395, "step": 8359 }, { "epoch": 0.3836446239273094, "grad_norm": 0.49893948435783386, "learning_rate": 9.7040008976822e-06, "loss": 0.4328, "step": 8360 }, { "epoch": 0.3836905144325639, "grad_norm": 0.46872392296791077, "learning_rate": 9.703917782720904e-06, "loss": 0.4086, "step": 8361 }, { "epoch": 0.38373640493781835, "grad_norm": 0.4398308992385864, "learning_rate": 9.70383465644817e-06, "loss": 0.3391, "step": 8362 }, { "epoch": 0.38378229544307285, "grad_norm": 0.43760430812835693, "learning_rate": 9.703751518864199e-06, "loss": 0.3523, "step": 8363 }, { "epoch": 0.3838281859483273, "grad_norm": 0.4730702340602875, "learning_rate": 9.703668369969191e-06, "loss": 0.3992, "step": 8364 }, { "epoch": 0.38387407645358174, "grad_norm": 0.45412975549697876, "learning_rate": 9.703585209763344e-06, "loss": 0.4617, "step": 8365 }, { "epoch": 0.38391996695883623, "grad_norm": 0.4601110816001892, "learning_rate": 9.70350203824686e-06, "loss": 0.3668, "step": 8366 }, { "epoch": 0.3839658574640907, "grad_norm": 0.4649891257286072, "learning_rate": 9.703418855419937e-06, "loss": 0.4254, "step": 8367 }, { "epoch": 0.3840117479693451, "grad_norm": 0.47691312432289124, "learning_rate": 9.703335661282776e-06, "loss": 0.4247, "step": 8368 }, { "epoch": 0.3840576384745996, "grad_norm": 0.4574812948703766, "learning_rate": 9.703252455835576e-06, "loss": 0.333, "step": 8369 }, { "epoch": 0.38410352897985406, "grad_norm": 0.46248960494995117, "learning_rate": 9.70316923907854e-06, "loss": 0.415, "step": 8370 }, { "epoch": 0.38414941948510856, "grad_norm": 0.5206280946731567, "learning_rate": 9.703086011011867e-06, "loss": 0.4653, "step": 8371 }, { "epoch": 0.384195309990363, "grad_norm": 0.47378215193748474, "learning_rate": 9.703002771635755e-06, "loss": 0.4273, "step": 8372 }, { "epoch": 0.38424120049561744, "grad_norm": 0.5185543298721313, "learning_rate": 9.702919520950406e-06, "loss": 0.481, "step": 8373 }, { "epoch": 0.38428709100087194, "grad_norm": 0.48190850019454956, "learning_rate": 9.70283625895602e-06, "loss": 0.4519, "step": 8374 }, { "epoch": 0.3843329815061264, "grad_norm": 0.4631403982639313, "learning_rate": 9.702752985652798e-06, "loss": 0.4326, "step": 8375 }, { "epoch": 0.3843788720113808, "grad_norm": 0.4552483558654785, "learning_rate": 9.702669701040938e-06, "loss": 0.3611, "step": 8376 }, { "epoch": 0.3844247625166353, "grad_norm": 0.4930436909198761, "learning_rate": 9.702586405120642e-06, "loss": 0.385, "step": 8377 }, { "epoch": 0.38447065302188976, "grad_norm": 0.429897278547287, "learning_rate": 9.702503097892112e-06, "loss": 0.3455, "step": 8378 }, { "epoch": 0.38451654352714426, "grad_norm": 0.4308724105358124, "learning_rate": 9.702419779355542e-06, "loss": 0.3798, "step": 8379 }, { "epoch": 0.3845624340323987, "grad_norm": 0.49082064628601074, "learning_rate": 9.702336449511138e-06, "loss": 0.4063, "step": 8380 }, { "epoch": 0.38460832453765315, "grad_norm": 0.47112369537353516, "learning_rate": 9.7022531083591e-06, "loss": 0.4269, "step": 8381 }, { "epoch": 0.38465421504290764, "grad_norm": 0.45445263385772705, "learning_rate": 9.702169755899627e-06, "loss": 0.3778, "step": 8382 }, { "epoch": 0.3847001055481621, "grad_norm": 0.45491957664489746, "learning_rate": 9.70208639213292e-06, "loss": 0.4068, "step": 8383 }, { "epoch": 0.38474599605341653, "grad_norm": 0.47046777606010437, "learning_rate": 9.702003017059179e-06, "loss": 0.3934, "step": 8384 }, { "epoch": 0.384791886558671, "grad_norm": 0.47704777121543884, "learning_rate": 9.701919630678604e-06, "loss": 0.4147, "step": 8385 }, { "epoch": 0.38483777706392547, "grad_norm": 0.4478757679462433, "learning_rate": 9.701836232991397e-06, "loss": 0.3877, "step": 8386 }, { "epoch": 0.3848836675691799, "grad_norm": 0.4703952670097351, "learning_rate": 9.701752823997759e-06, "loss": 0.4235, "step": 8387 }, { "epoch": 0.3849295580744344, "grad_norm": 0.46203720569610596, "learning_rate": 9.701669403697887e-06, "loss": 0.441, "step": 8388 }, { "epoch": 0.38497544857968885, "grad_norm": 0.4345662295818329, "learning_rate": 9.701585972091984e-06, "loss": 0.4014, "step": 8389 }, { "epoch": 0.38502133908494335, "grad_norm": 0.4861225485801697, "learning_rate": 9.701502529180253e-06, "loss": 0.4328, "step": 8390 }, { "epoch": 0.3850672295901978, "grad_norm": 0.5345048308372498, "learning_rate": 9.70141907496289e-06, "loss": 0.5577, "step": 8391 }, { "epoch": 0.38511312009545223, "grad_norm": 0.46646860241889954, "learning_rate": 9.7013356094401e-06, "loss": 0.3742, "step": 8392 }, { "epoch": 0.38515901060070673, "grad_norm": 0.46382835507392883, "learning_rate": 9.70125213261208e-06, "loss": 0.4333, "step": 8393 }, { "epoch": 0.3852049011059612, "grad_norm": 0.4557649791240692, "learning_rate": 9.701168644479033e-06, "loss": 0.3744, "step": 8394 }, { "epoch": 0.3852507916112156, "grad_norm": 0.44920802116394043, "learning_rate": 9.70108514504116e-06, "loss": 0.3411, "step": 8395 }, { "epoch": 0.3852966821164701, "grad_norm": 0.49356749653816223, "learning_rate": 9.70100163429866e-06, "loss": 0.4903, "step": 8396 }, { "epoch": 0.38534257262172456, "grad_norm": 0.44686180353164673, "learning_rate": 9.700918112251733e-06, "loss": 0.4096, "step": 8397 }, { "epoch": 0.38538846312697905, "grad_norm": 0.48263344168663025, "learning_rate": 9.700834578900583e-06, "loss": 0.4149, "step": 8398 }, { "epoch": 0.3854343536322335, "grad_norm": 0.49760544300079346, "learning_rate": 9.70075103424541e-06, "loss": 0.5004, "step": 8399 }, { "epoch": 0.38548024413748794, "grad_norm": 0.4403175413608551, "learning_rate": 9.700667478286415e-06, "loss": 0.3843, "step": 8400 }, { "epoch": 0.38552613464274244, "grad_norm": 0.4831075370311737, "learning_rate": 9.700583911023798e-06, "loss": 0.3931, "step": 8401 }, { "epoch": 0.3855720251479969, "grad_norm": 0.47512587904930115, "learning_rate": 9.700500332457758e-06, "loss": 0.4417, "step": 8402 }, { "epoch": 0.3856179156532513, "grad_norm": 0.47712501883506775, "learning_rate": 9.700416742588501e-06, "loss": 0.4216, "step": 8403 }, { "epoch": 0.3856638061585058, "grad_norm": 0.48236021399497986, "learning_rate": 9.700333141416224e-06, "loss": 0.4912, "step": 8404 }, { "epoch": 0.38570969666376026, "grad_norm": 0.4740074574947357, "learning_rate": 9.70024952894113e-06, "loss": 0.4072, "step": 8405 }, { "epoch": 0.38575558716901476, "grad_norm": 0.4570198357105255, "learning_rate": 9.700165905163419e-06, "loss": 0.3705, "step": 8406 }, { "epoch": 0.3858014776742692, "grad_norm": 0.45355919003486633, "learning_rate": 9.70008227008329e-06, "loss": 0.3839, "step": 8407 }, { "epoch": 0.38584736817952364, "grad_norm": 0.45618879795074463, "learning_rate": 9.69999862370095e-06, "loss": 0.4008, "step": 8408 }, { "epoch": 0.38589325868477814, "grad_norm": 0.48290762305259705, "learning_rate": 9.699914966016595e-06, "loss": 0.3982, "step": 8409 }, { "epoch": 0.3859391491900326, "grad_norm": 0.47827664017677307, "learning_rate": 9.699831297030429e-06, "loss": 0.4636, "step": 8410 }, { "epoch": 0.385985039695287, "grad_norm": 0.46036893129348755, "learning_rate": 9.699747616742651e-06, "loss": 0.4115, "step": 8411 }, { "epoch": 0.3860309302005415, "grad_norm": 0.4618206024169922, "learning_rate": 9.699663925153463e-06, "loss": 0.418, "step": 8412 }, { "epoch": 0.38607682070579596, "grad_norm": 0.4307831823825836, "learning_rate": 9.699580222263068e-06, "loss": 0.3225, "step": 8413 }, { "epoch": 0.3861227112110504, "grad_norm": 0.44958049058914185, "learning_rate": 9.699496508071665e-06, "loss": 0.351, "step": 8414 }, { "epoch": 0.3861686017163049, "grad_norm": 0.4616737961769104, "learning_rate": 9.699412782579454e-06, "loss": 0.4309, "step": 8415 }, { "epoch": 0.38621449222155935, "grad_norm": 0.4471718668937683, "learning_rate": 9.69932904578664e-06, "loss": 0.3834, "step": 8416 }, { "epoch": 0.38626038272681384, "grad_norm": 0.4755763113498688, "learning_rate": 9.699245297693424e-06, "loss": 0.4484, "step": 8417 }, { "epoch": 0.3863062732320683, "grad_norm": 0.42185693979263306, "learning_rate": 9.699161538300006e-06, "loss": 0.3188, "step": 8418 }, { "epoch": 0.38635216373732273, "grad_norm": 0.4859219789505005, "learning_rate": 9.699077767606587e-06, "loss": 0.4504, "step": 8419 }, { "epoch": 0.3863980542425772, "grad_norm": 0.4808337688446045, "learning_rate": 9.698993985613367e-06, "loss": 0.3962, "step": 8420 }, { "epoch": 0.38644394474783167, "grad_norm": 0.4514528810977936, "learning_rate": 9.698910192320552e-06, "loss": 0.3964, "step": 8421 }, { "epoch": 0.3864898352530861, "grad_norm": 0.44106897711753845, "learning_rate": 9.698826387728341e-06, "loss": 0.3675, "step": 8422 }, { "epoch": 0.3865357257583406, "grad_norm": 0.4566441476345062, "learning_rate": 9.698742571836934e-06, "loss": 0.4168, "step": 8423 }, { "epoch": 0.38658161626359505, "grad_norm": 0.4769282937049866, "learning_rate": 9.698658744646536e-06, "loss": 0.4725, "step": 8424 }, { "epoch": 0.38662750676884955, "grad_norm": 0.5071470141410828, "learning_rate": 9.698574906157346e-06, "loss": 0.4504, "step": 8425 }, { "epoch": 0.386673397274104, "grad_norm": 0.47008535265922546, "learning_rate": 9.698491056369566e-06, "loss": 0.3983, "step": 8426 }, { "epoch": 0.38671928777935843, "grad_norm": 0.48331886529922485, "learning_rate": 9.698407195283396e-06, "loss": 0.4095, "step": 8427 }, { "epoch": 0.38676517828461293, "grad_norm": 0.46814489364624023, "learning_rate": 9.698323322899044e-06, "loss": 0.4155, "step": 8428 }, { "epoch": 0.3868110687898674, "grad_norm": 0.41404491662979126, "learning_rate": 9.698239439216703e-06, "loss": 0.3112, "step": 8429 }, { "epoch": 0.3868569592951218, "grad_norm": 0.4392063319683075, "learning_rate": 9.69815554423658e-06, "loss": 0.375, "step": 8430 }, { "epoch": 0.3869028498003763, "grad_norm": 0.5238365530967712, "learning_rate": 9.698071637958877e-06, "loss": 0.4396, "step": 8431 }, { "epoch": 0.38694874030563076, "grad_norm": 0.44936665892601013, "learning_rate": 9.697987720383795e-06, "loss": 0.3931, "step": 8432 }, { "epoch": 0.38699463081088525, "grad_norm": 0.4627906382083893, "learning_rate": 9.697903791511534e-06, "loss": 0.4581, "step": 8433 }, { "epoch": 0.3870405213161397, "grad_norm": 0.45458608865737915, "learning_rate": 9.697819851342296e-06, "loss": 0.3489, "step": 8434 }, { "epoch": 0.38708641182139414, "grad_norm": 0.4643838107585907, "learning_rate": 9.697735899876286e-06, "loss": 0.4042, "step": 8435 }, { "epoch": 0.38713230232664864, "grad_norm": 0.45791617035865784, "learning_rate": 9.697651937113703e-06, "loss": 0.3752, "step": 8436 }, { "epoch": 0.3871781928319031, "grad_norm": 0.4958685040473938, "learning_rate": 9.697567963054748e-06, "loss": 0.4622, "step": 8437 }, { "epoch": 0.3872240833371575, "grad_norm": 0.4513900578022003, "learning_rate": 9.697483977699626e-06, "loss": 0.351, "step": 8438 }, { "epoch": 0.387269973842412, "grad_norm": 0.4998222589492798, "learning_rate": 9.697399981048539e-06, "loss": 0.4314, "step": 8439 }, { "epoch": 0.38731586434766646, "grad_norm": 0.5018230676651001, "learning_rate": 9.697315973101686e-06, "loss": 0.4611, "step": 8440 }, { "epoch": 0.3873617548529209, "grad_norm": 0.4948044419288635, "learning_rate": 9.697231953859271e-06, "loss": 0.5221, "step": 8441 }, { "epoch": 0.3874076453581754, "grad_norm": 0.5019229054450989, "learning_rate": 9.697147923321496e-06, "loss": 0.4145, "step": 8442 }, { "epoch": 0.38745353586342984, "grad_norm": 0.4509308338165283, "learning_rate": 9.697063881488561e-06, "loss": 0.3844, "step": 8443 }, { "epoch": 0.38749942636868434, "grad_norm": 0.4653271734714508, "learning_rate": 9.696979828360673e-06, "loss": 0.4791, "step": 8444 }, { "epoch": 0.3875453168739388, "grad_norm": 0.5491061210632324, "learning_rate": 9.696895763938027e-06, "loss": 0.5448, "step": 8445 }, { "epoch": 0.3875912073791932, "grad_norm": 0.49758028984069824, "learning_rate": 9.696811688220833e-06, "loss": 0.4972, "step": 8446 }, { "epoch": 0.3876370978844477, "grad_norm": 0.4888775050640106, "learning_rate": 9.696727601209287e-06, "loss": 0.492, "step": 8447 }, { "epoch": 0.38768298838970217, "grad_norm": 0.4525541663169861, "learning_rate": 9.696643502903594e-06, "loss": 0.3484, "step": 8448 }, { "epoch": 0.3877288788949566, "grad_norm": 0.4701160192489624, "learning_rate": 9.696559393303954e-06, "loss": 0.4824, "step": 8449 }, { "epoch": 0.3877747694002111, "grad_norm": 0.45301681756973267, "learning_rate": 9.696475272410574e-06, "loss": 0.4303, "step": 8450 }, { "epoch": 0.38782065990546555, "grad_norm": 0.49915072321891785, "learning_rate": 9.696391140223651e-06, "loss": 0.4596, "step": 8451 }, { "epoch": 0.38786655041072005, "grad_norm": 0.4563049376010895, "learning_rate": 9.69630699674339e-06, "loss": 0.3533, "step": 8452 }, { "epoch": 0.3879124409159745, "grad_norm": 0.44588980078697205, "learning_rate": 9.696222841969993e-06, "loss": 0.3923, "step": 8453 }, { "epoch": 0.38795833142122893, "grad_norm": 0.47425606846809387, "learning_rate": 9.696138675903663e-06, "loss": 0.3236, "step": 8454 }, { "epoch": 0.38800422192648343, "grad_norm": 0.48490166664123535, "learning_rate": 9.6960544985446e-06, "loss": 0.4331, "step": 8455 }, { "epoch": 0.38805011243173787, "grad_norm": 0.48784756660461426, "learning_rate": 9.69597030989301e-06, "loss": 0.439, "step": 8456 }, { "epoch": 0.3880960029369923, "grad_norm": 0.48858046531677246, "learning_rate": 9.695886109949093e-06, "loss": 0.4961, "step": 8457 }, { "epoch": 0.3881418934422468, "grad_norm": 0.48567572236061096, "learning_rate": 9.69580189871305e-06, "loss": 0.4434, "step": 8458 }, { "epoch": 0.38818778394750125, "grad_norm": 0.4231758713722229, "learning_rate": 9.695717676185088e-06, "loss": 0.3258, "step": 8459 }, { "epoch": 0.38823367445275575, "grad_norm": 0.5134083032608032, "learning_rate": 9.695633442365406e-06, "loss": 0.4745, "step": 8460 }, { "epoch": 0.3882795649580102, "grad_norm": 0.45244917273521423, "learning_rate": 9.695549197254208e-06, "loss": 0.3711, "step": 8461 }, { "epoch": 0.38832545546326463, "grad_norm": 0.48285987973213196, "learning_rate": 9.695464940851696e-06, "loss": 0.4354, "step": 8462 }, { "epoch": 0.38837134596851913, "grad_norm": 0.4743655323982239, "learning_rate": 9.695380673158072e-06, "loss": 0.4325, "step": 8463 }, { "epoch": 0.3884172364737736, "grad_norm": 0.49074769020080566, "learning_rate": 9.695296394173539e-06, "loss": 0.4708, "step": 8464 }, { "epoch": 0.388463126979028, "grad_norm": 0.4766613841056824, "learning_rate": 9.6952121038983e-06, "loss": 0.4008, "step": 8465 }, { "epoch": 0.3885090174842825, "grad_norm": 0.4954339265823364, "learning_rate": 9.69512780233256e-06, "loss": 0.4165, "step": 8466 }, { "epoch": 0.38855490798953696, "grad_norm": 0.44261690974235535, "learning_rate": 9.695043489476517e-06, "loss": 0.3501, "step": 8467 }, { "epoch": 0.38860079849479146, "grad_norm": 0.4264901280403137, "learning_rate": 9.694959165330378e-06, "loss": 0.3331, "step": 8468 }, { "epoch": 0.3886466890000459, "grad_norm": 0.5148507952690125, "learning_rate": 9.694874829894343e-06, "loss": 0.4608, "step": 8469 }, { "epoch": 0.38869257950530034, "grad_norm": 0.48227229714393616, "learning_rate": 9.694790483168617e-06, "loss": 0.4412, "step": 8470 }, { "epoch": 0.38873847001055484, "grad_norm": 0.4578356444835663, "learning_rate": 9.694706125153399e-06, "loss": 0.3351, "step": 8471 }, { "epoch": 0.3887843605158093, "grad_norm": 0.4463386535644531, "learning_rate": 9.694621755848896e-06, "loss": 0.3749, "step": 8472 }, { "epoch": 0.3888302510210637, "grad_norm": 0.48371225595474243, "learning_rate": 9.69453737525531e-06, "loss": 0.4609, "step": 8473 }, { "epoch": 0.3888761415263182, "grad_norm": 0.47581446170806885, "learning_rate": 9.694452983372841e-06, "loss": 0.37, "step": 8474 }, { "epoch": 0.38892203203157266, "grad_norm": 0.49874043464660645, "learning_rate": 9.694368580201698e-06, "loss": 0.4723, "step": 8475 }, { "epoch": 0.3889679225368271, "grad_norm": 0.4929570257663727, "learning_rate": 9.694284165742077e-06, "loss": 0.4854, "step": 8476 }, { "epoch": 0.3890138130420816, "grad_norm": 0.4662057161331177, "learning_rate": 9.694199739994185e-06, "loss": 0.374, "step": 8477 }, { "epoch": 0.38905970354733604, "grad_norm": 0.4705458879470825, "learning_rate": 9.694115302958225e-06, "loss": 0.4088, "step": 8478 }, { "epoch": 0.38910559405259054, "grad_norm": 0.5173704624176025, "learning_rate": 9.694030854634398e-06, "loss": 0.4742, "step": 8479 }, { "epoch": 0.389151484557845, "grad_norm": 0.4635837972164154, "learning_rate": 9.693946395022908e-06, "loss": 0.4277, "step": 8480 }, { "epoch": 0.3891973750630994, "grad_norm": 0.46322330832481384, "learning_rate": 9.693861924123959e-06, "loss": 0.3846, "step": 8481 }, { "epoch": 0.3892432655683539, "grad_norm": 0.4779599905014038, "learning_rate": 9.693777441937755e-06, "loss": 0.4831, "step": 8482 }, { "epoch": 0.38928915607360837, "grad_norm": 0.4560511112213135, "learning_rate": 9.693692948464495e-06, "loss": 0.4042, "step": 8483 }, { "epoch": 0.3893350465788628, "grad_norm": 0.5237135887145996, "learning_rate": 9.693608443704388e-06, "loss": 0.4776, "step": 8484 }, { "epoch": 0.3893809370841173, "grad_norm": 0.5197299718856812, "learning_rate": 9.69352392765763e-06, "loss": 0.6065, "step": 8485 }, { "epoch": 0.38942682758937175, "grad_norm": 0.4203716218471527, "learning_rate": 9.693439400324431e-06, "loss": 0.3024, "step": 8486 }, { "epoch": 0.38947271809462625, "grad_norm": 0.4687536060810089, "learning_rate": 9.693354861704991e-06, "loss": 0.4207, "step": 8487 }, { "epoch": 0.3895186085998807, "grad_norm": 0.45304107666015625, "learning_rate": 9.693270311799515e-06, "loss": 0.3672, "step": 8488 }, { "epoch": 0.38956449910513513, "grad_norm": 0.4355490505695343, "learning_rate": 9.693185750608204e-06, "loss": 0.3401, "step": 8489 }, { "epoch": 0.38961038961038963, "grad_norm": 0.43713536858558655, "learning_rate": 9.69310117813126e-06, "loss": 0.375, "step": 8490 }, { "epoch": 0.38965628011564407, "grad_norm": 0.439841091632843, "learning_rate": 9.693016594368893e-06, "loss": 0.3499, "step": 8491 }, { "epoch": 0.3897021706208985, "grad_norm": 0.49400460720062256, "learning_rate": 9.6929319993213e-06, "loss": 0.4991, "step": 8492 }, { "epoch": 0.389748061126153, "grad_norm": 0.4501924514770508, "learning_rate": 9.692847392988686e-06, "loss": 0.3877, "step": 8493 }, { "epoch": 0.38979395163140745, "grad_norm": 0.5039815306663513, "learning_rate": 9.692762775371259e-06, "loss": 0.4918, "step": 8494 }, { "epoch": 0.38983984213666195, "grad_norm": 0.4276253581047058, "learning_rate": 9.692678146469214e-06, "loss": 0.329, "step": 8495 }, { "epoch": 0.3898857326419164, "grad_norm": 0.43700525164604187, "learning_rate": 9.692593506282762e-06, "loss": 0.3587, "step": 8496 }, { "epoch": 0.38993162314717084, "grad_norm": 0.4485294818878174, "learning_rate": 9.692508854812102e-06, "loss": 0.3581, "step": 8497 }, { "epoch": 0.38997751365242533, "grad_norm": 0.47604647278785706, "learning_rate": 9.692424192057441e-06, "loss": 0.3977, "step": 8498 }, { "epoch": 0.3900234041576798, "grad_norm": 0.4553724229335785, "learning_rate": 9.692339518018978e-06, "loss": 0.3736, "step": 8499 }, { "epoch": 0.3900692946629342, "grad_norm": 0.45458129048347473, "learning_rate": 9.692254832696921e-06, "loss": 0.3968, "step": 8500 }, { "epoch": 0.3901151851681887, "grad_norm": 0.46565794944763184, "learning_rate": 9.692170136091472e-06, "loss": 0.4006, "step": 8501 }, { "epoch": 0.39016107567344316, "grad_norm": 0.5006628632545471, "learning_rate": 9.692085428202835e-06, "loss": 0.4566, "step": 8502 }, { "epoch": 0.3902069661786976, "grad_norm": 0.48373353481292725, "learning_rate": 9.692000709031214e-06, "loss": 0.4101, "step": 8503 }, { "epoch": 0.3902528566839521, "grad_norm": 0.48382923007011414, "learning_rate": 9.691915978576812e-06, "loss": 0.4183, "step": 8504 }, { "epoch": 0.39029874718920654, "grad_norm": 0.4850400984287262, "learning_rate": 9.691831236839832e-06, "loss": 0.4086, "step": 8505 }, { "epoch": 0.39034463769446104, "grad_norm": 0.46354368329048157, "learning_rate": 9.69174648382048e-06, "loss": 0.4245, "step": 8506 }, { "epoch": 0.3903905281997155, "grad_norm": 0.47254428267478943, "learning_rate": 9.691661719518955e-06, "loss": 0.4764, "step": 8507 }, { "epoch": 0.3904364187049699, "grad_norm": 0.46487656235694885, "learning_rate": 9.691576943935468e-06, "loss": 0.4001, "step": 8508 }, { "epoch": 0.3904823092102244, "grad_norm": 0.4596210718154907, "learning_rate": 9.691492157070217e-06, "loss": 0.3776, "step": 8509 }, { "epoch": 0.39052819971547886, "grad_norm": 0.4168436527252197, "learning_rate": 9.691407358923409e-06, "loss": 0.3023, "step": 8510 }, { "epoch": 0.3905740902207333, "grad_norm": 0.45442837476730347, "learning_rate": 9.691322549495247e-06, "loss": 0.3876, "step": 8511 }, { "epoch": 0.3906199807259878, "grad_norm": 0.4610825777053833, "learning_rate": 9.691237728785934e-06, "loss": 0.4181, "step": 8512 }, { "epoch": 0.39066587123124225, "grad_norm": 0.4814831614494324, "learning_rate": 9.691152896795676e-06, "loss": 0.4046, "step": 8513 }, { "epoch": 0.39071176173649674, "grad_norm": 0.43903297185897827, "learning_rate": 9.691068053524675e-06, "loss": 0.4178, "step": 8514 }, { "epoch": 0.3907576522417512, "grad_norm": 0.4849167466163635, "learning_rate": 9.690983198973136e-06, "loss": 0.4721, "step": 8515 }, { "epoch": 0.39080354274700563, "grad_norm": 0.48738500475883484, "learning_rate": 9.690898333141264e-06, "loss": 0.3764, "step": 8516 }, { "epoch": 0.3908494332522601, "grad_norm": 0.45710647106170654, "learning_rate": 9.690813456029262e-06, "loss": 0.4057, "step": 8517 }, { "epoch": 0.39089532375751457, "grad_norm": 0.46582651138305664, "learning_rate": 9.690728567637332e-06, "loss": 0.4071, "step": 8518 }, { "epoch": 0.390941214262769, "grad_norm": 0.4486205577850342, "learning_rate": 9.690643667965682e-06, "loss": 0.3692, "step": 8519 }, { "epoch": 0.3909871047680235, "grad_norm": 0.44895729422569275, "learning_rate": 9.690558757014514e-06, "loss": 0.3793, "step": 8520 }, { "epoch": 0.39103299527327795, "grad_norm": 0.515789270401001, "learning_rate": 9.690473834784032e-06, "loss": 0.4626, "step": 8521 }, { "epoch": 0.39107888577853245, "grad_norm": 0.43202105164527893, "learning_rate": 9.690388901274442e-06, "loss": 0.3591, "step": 8522 }, { "epoch": 0.3911247762837869, "grad_norm": 0.46173349022865295, "learning_rate": 9.690303956485946e-06, "loss": 0.4006, "step": 8523 }, { "epoch": 0.39117066678904133, "grad_norm": 0.4614937901496887, "learning_rate": 9.69021900041875e-06, "loss": 0.3967, "step": 8524 }, { "epoch": 0.39121655729429583, "grad_norm": 0.4738675355911255, "learning_rate": 9.690134033073056e-06, "loss": 0.4006, "step": 8525 }, { "epoch": 0.3912624477995503, "grad_norm": 0.4683053493499756, "learning_rate": 9.690049054449071e-06, "loss": 0.4394, "step": 8526 }, { "epoch": 0.3913083383048047, "grad_norm": 0.4362429678440094, "learning_rate": 9.689964064546999e-06, "loss": 0.3818, "step": 8527 }, { "epoch": 0.3913542288100592, "grad_norm": 0.431314617395401, "learning_rate": 9.689879063367042e-06, "loss": 0.3552, "step": 8528 }, { "epoch": 0.39140011931531365, "grad_norm": 0.4697848856449127, "learning_rate": 9.689794050909408e-06, "loss": 0.5138, "step": 8529 }, { "epoch": 0.3914460098205681, "grad_norm": 0.4638991951942444, "learning_rate": 9.689709027174299e-06, "loss": 0.435, "step": 8530 }, { "epoch": 0.3914919003258226, "grad_norm": 0.44568994641304016, "learning_rate": 9.689623992161918e-06, "loss": 0.3829, "step": 8531 }, { "epoch": 0.39153779083107704, "grad_norm": 0.4965907335281372, "learning_rate": 9.689538945872473e-06, "loss": 0.3303, "step": 8532 }, { "epoch": 0.39158368133633153, "grad_norm": 0.47154876589775085, "learning_rate": 9.689453888306167e-06, "loss": 0.3669, "step": 8533 }, { "epoch": 0.391629571841586, "grad_norm": 0.47644510865211487, "learning_rate": 9.689368819463204e-06, "loss": 0.4193, "step": 8534 }, { "epoch": 0.3916754623468404, "grad_norm": 0.43990036845207214, "learning_rate": 9.68928373934379e-06, "loss": 0.3319, "step": 8535 }, { "epoch": 0.3917213528520949, "grad_norm": 0.47022491693496704, "learning_rate": 9.689198647948126e-06, "loss": 0.4163, "step": 8536 }, { "epoch": 0.39176724335734936, "grad_norm": 0.4544909596443176, "learning_rate": 9.68911354527642e-06, "loss": 0.4216, "step": 8537 }, { "epoch": 0.3918131338626038, "grad_norm": 0.4939616322517395, "learning_rate": 9.689028431328877e-06, "loss": 0.4723, "step": 8538 }, { "epoch": 0.3918590243678583, "grad_norm": 0.46473363041877747, "learning_rate": 9.6889433061057e-06, "loss": 0.4063, "step": 8539 }, { "epoch": 0.39190491487311274, "grad_norm": 0.5054740309715271, "learning_rate": 9.688858169607095e-06, "loss": 0.4681, "step": 8540 }, { "epoch": 0.39195080537836724, "grad_norm": 0.43713995814323425, "learning_rate": 9.688773021833266e-06, "loss": 0.3545, "step": 8541 }, { "epoch": 0.3919966958836217, "grad_norm": 0.47141167521476746, "learning_rate": 9.688687862784416e-06, "loss": 0.4337, "step": 8542 }, { "epoch": 0.3920425863888761, "grad_norm": 0.5062408447265625, "learning_rate": 9.688602692460754e-06, "loss": 0.5004, "step": 8543 }, { "epoch": 0.3920884768941306, "grad_norm": 0.5107776522636414, "learning_rate": 9.688517510862481e-06, "loss": 0.5179, "step": 8544 }, { "epoch": 0.39213436739938506, "grad_norm": 0.477706640958786, "learning_rate": 9.688432317989802e-06, "loss": 0.4731, "step": 8545 }, { "epoch": 0.3921802579046395, "grad_norm": 0.4755861759185791, "learning_rate": 9.688347113842924e-06, "loss": 0.4613, "step": 8546 }, { "epoch": 0.392226148409894, "grad_norm": 0.44089922308921814, "learning_rate": 9.688261898422052e-06, "loss": 0.3978, "step": 8547 }, { "epoch": 0.39227203891514845, "grad_norm": 0.4510617256164551, "learning_rate": 9.688176671727387e-06, "loss": 0.4135, "step": 8548 }, { "epoch": 0.39231792942040294, "grad_norm": 0.4418678879737854, "learning_rate": 9.68809143375914e-06, "loss": 0.3326, "step": 8549 }, { "epoch": 0.3923638199256574, "grad_norm": 0.46288397908210754, "learning_rate": 9.68800618451751e-06, "loss": 0.3966, "step": 8550 }, { "epoch": 0.39240971043091183, "grad_norm": 0.4673866331577301, "learning_rate": 9.687920924002705e-06, "loss": 0.426, "step": 8551 }, { "epoch": 0.3924556009361663, "grad_norm": 0.46155846118927, "learning_rate": 9.687835652214932e-06, "loss": 0.3853, "step": 8552 }, { "epoch": 0.39250149144142077, "grad_norm": 0.494147926568985, "learning_rate": 9.687750369154391e-06, "loss": 0.4917, "step": 8553 }, { "epoch": 0.3925473819466752, "grad_norm": 0.48230621218681335, "learning_rate": 9.687665074821291e-06, "loss": 0.4006, "step": 8554 }, { "epoch": 0.3925932724519297, "grad_norm": 0.473418265581131, "learning_rate": 9.687579769215837e-06, "loss": 0.4253, "step": 8555 }, { "epoch": 0.39263916295718415, "grad_norm": 0.5901590585708618, "learning_rate": 9.687494452338232e-06, "loss": 0.3793, "step": 8556 }, { "epoch": 0.39268505346243865, "grad_norm": 0.46326932311058044, "learning_rate": 9.687409124188682e-06, "loss": 0.399, "step": 8557 }, { "epoch": 0.3927309439676931, "grad_norm": 0.4534607231616974, "learning_rate": 9.687323784767394e-06, "loss": 0.4032, "step": 8558 }, { "epoch": 0.39277683447294753, "grad_norm": 0.42637670040130615, "learning_rate": 9.68723843407457e-06, "loss": 0.3167, "step": 8559 }, { "epoch": 0.39282272497820203, "grad_norm": 0.4752059280872345, "learning_rate": 9.687153072110418e-06, "loss": 0.3901, "step": 8560 }, { "epoch": 0.3928686154834565, "grad_norm": 0.4701560139656067, "learning_rate": 9.68706769887514e-06, "loss": 0.4818, "step": 8561 }, { "epoch": 0.3929145059887109, "grad_norm": 0.4895174205303192, "learning_rate": 9.686982314368946e-06, "loss": 0.5422, "step": 8562 }, { "epoch": 0.3929603964939654, "grad_norm": 0.435077041387558, "learning_rate": 9.686896918592037e-06, "loss": 0.3287, "step": 8563 }, { "epoch": 0.39300628699921986, "grad_norm": 0.4535768926143646, "learning_rate": 9.686811511544622e-06, "loss": 0.3589, "step": 8564 }, { "epoch": 0.3930521775044743, "grad_norm": 0.4414762556552887, "learning_rate": 9.686726093226904e-06, "loss": 0.4104, "step": 8565 }, { "epoch": 0.3930980680097288, "grad_norm": 0.5018694996833801, "learning_rate": 9.686640663639087e-06, "loss": 0.4713, "step": 8566 }, { "epoch": 0.39314395851498324, "grad_norm": 0.45490220189094543, "learning_rate": 9.68655522278138e-06, "loss": 0.4358, "step": 8567 }, { "epoch": 0.39318984902023774, "grad_norm": 0.5319716930389404, "learning_rate": 9.686469770653987e-06, "loss": 0.3918, "step": 8568 }, { "epoch": 0.3932357395254922, "grad_norm": 0.4732978940010071, "learning_rate": 9.686384307257113e-06, "loss": 0.429, "step": 8569 }, { "epoch": 0.3932816300307466, "grad_norm": 0.47988659143447876, "learning_rate": 9.686298832590963e-06, "loss": 0.432, "step": 8570 }, { "epoch": 0.3933275205360011, "grad_norm": 0.444713830947876, "learning_rate": 9.686213346655745e-06, "loss": 0.3807, "step": 8571 }, { "epoch": 0.39337341104125556, "grad_norm": 0.44191858172416687, "learning_rate": 9.686127849451661e-06, "loss": 0.3998, "step": 8572 }, { "epoch": 0.39341930154651, "grad_norm": 0.49021926522254944, "learning_rate": 9.68604234097892e-06, "loss": 0.4851, "step": 8573 }, { "epoch": 0.3934651920517645, "grad_norm": 0.4527379274368286, "learning_rate": 9.685956821237726e-06, "loss": 0.4, "step": 8574 }, { "epoch": 0.39351108255701894, "grad_norm": 0.45258426666259766, "learning_rate": 9.685871290228284e-06, "loss": 0.3384, "step": 8575 }, { "epoch": 0.39355697306227344, "grad_norm": 0.4111131727695465, "learning_rate": 9.685785747950801e-06, "loss": 0.3023, "step": 8576 }, { "epoch": 0.3936028635675279, "grad_norm": 0.4864082932472229, "learning_rate": 9.685700194405482e-06, "loss": 0.4327, "step": 8577 }, { "epoch": 0.3936487540727823, "grad_norm": 0.4577995538711548, "learning_rate": 9.685614629592533e-06, "loss": 0.3697, "step": 8578 }, { "epoch": 0.3936946445780368, "grad_norm": 0.45337432622909546, "learning_rate": 9.685529053512159e-06, "loss": 0.4147, "step": 8579 }, { "epoch": 0.39374053508329127, "grad_norm": 0.43586283922195435, "learning_rate": 9.685443466164567e-06, "loss": 0.3508, "step": 8580 }, { "epoch": 0.3937864255885457, "grad_norm": 0.43408092856407166, "learning_rate": 9.685357867549963e-06, "loss": 0.3576, "step": 8581 }, { "epoch": 0.3938323160938002, "grad_norm": 0.5230141282081604, "learning_rate": 9.68527225766855e-06, "loss": 0.5352, "step": 8582 }, { "epoch": 0.39387820659905465, "grad_norm": 0.4405350387096405, "learning_rate": 9.685186636520538e-06, "loss": 0.3796, "step": 8583 }, { "epoch": 0.39392409710430915, "grad_norm": 0.45898011326789856, "learning_rate": 9.68510100410613e-06, "loss": 0.412, "step": 8584 }, { "epoch": 0.3939699876095636, "grad_norm": 0.5374167561531067, "learning_rate": 9.685015360425532e-06, "loss": 0.4964, "step": 8585 }, { "epoch": 0.39401587811481803, "grad_norm": 0.4404294788837433, "learning_rate": 9.684929705478951e-06, "loss": 0.34, "step": 8586 }, { "epoch": 0.39406176862007253, "grad_norm": 0.8178809881210327, "learning_rate": 9.684844039266594e-06, "loss": 0.4853, "step": 8587 }, { "epoch": 0.39410765912532697, "grad_norm": 0.45292502641677856, "learning_rate": 9.684758361788664e-06, "loss": 0.3611, "step": 8588 }, { "epoch": 0.3941535496305814, "grad_norm": 0.47277024388313293, "learning_rate": 9.68467267304537e-06, "loss": 0.4334, "step": 8589 }, { "epoch": 0.3941994401358359, "grad_norm": 0.4607960283756256, "learning_rate": 9.684586973036915e-06, "loss": 0.3686, "step": 8590 }, { "epoch": 0.39424533064109035, "grad_norm": 0.4308449923992157, "learning_rate": 9.684501261763509e-06, "loss": 0.3531, "step": 8591 }, { "epoch": 0.3942912211463448, "grad_norm": 0.48661860823631287, "learning_rate": 9.684415539225355e-06, "loss": 0.4151, "step": 8592 }, { "epoch": 0.3943371116515993, "grad_norm": 0.4583309292793274, "learning_rate": 9.68432980542266e-06, "loss": 0.373, "step": 8593 }, { "epoch": 0.39438300215685373, "grad_norm": 0.49229541420936584, "learning_rate": 9.684244060355629e-06, "loss": 0.4696, "step": 8594 }, { "epoch": 0.39442889266210823, "grad_norm": 0.4471207559108734, "learning_rate": 9.684158304024472e-06, "loss": 0.3559, "step": 8595 }, { "epoch": 0.3944747831673627, "grad_norm": 0.4282625615596771, "learning_rate": 9.684072536429392e-06, "loss": 0.3334, "step": 8596 }, { "epoch": 0.3945206736726171, "grad_norm": 0.4677380323410034, "learning_rate": 9.683986757570594e-06, "loss": 0.4136, "step": 8597 }, { "epoch": 0.3945665641778716, "grad_norm": 0.4334053695201874, "learning_rate": 9.683900967448288e-06, "loss": 0.3456, "step": 8598 }, { "epoch": 0.39461245468312606, "grad_norm": 0.5461660623550415, "learning_rate": 9.683815166062678e-06, "loss": 0.5009, "step": 8599 }, { "epoch": 0.3946583451883805, "grad_norm": 0.48494023084640503, "learning_rate": 9.683729353413971e-06, "loss": 0.4338, "step": 8600 }, { "epoch": 0.394704235693635, "grad_norm": 0.47498902678489685, "learning_rate": 9.683643529502372e-06, "loss": 0.4112, "step": 8601 }, { "epoch": 0.39475012619888944, "grad_norm": 0.4361850917339325, "learning_rate": 9.68355769432809e-06, "loss": 0.2788, "step": 8602 }, { "epoch": 0.39479601670414394, "grad_norm": 0.4567633867263794, "learning_rate": 9.683471847891327e-06, "loss": 0.3805, "step": 8603 }, { "epoch": 0.3948419072093984, "grad_norm": 0.5143073201179504, "learning_rate": 9.683385990192295e-06, "loss": 0.5381, "step": 8604 }, { "epoch": 0.3948877977146528, "grad_norm": 0.46747028827667236, "learning_rate": 9.683300121231199e-06, "loss": 0.4511, "step": 8605 }, { "epoch": 0.3949336882199073, "grad_norm": 0.47865384817123413, "learning_rate": 9.683214241008242e-06, "loss": 0.4808, "step": 8606 }, { "epoch": 0.39497957872516176, "grad_norm": 0.4456637501716614, "learning_rate": 9.683128349523633e-06, "loss": 0.3755, "step": 8607 }, { "epoch": 0.3950254692304162, "grad_norm": 0.4276733994483948, "learning_rate": 9.683042446777579e-06, "loss": 0.3112, "step": 8608 }, { "epoch": 0.3950713597356707, "grad_norm": 0.47264665365219116, "learning_rate": 9.682956532770286e-06, "loss": 0.4235, "step": 8609 }, { "epoch": 0.39511725024092514, "grad_norm": 0.4751649498939514, "learning_rate": 9.68287060750196e-06, "loss": 0.3473, "step": 8610 }, { "epoch": 0.39516314074617964, "grad_norm": 0.48298001289367676, "learning_rate": 9.682784670972808e-06, "loss": 0.3829, "step": 8611 }, { "epoch": 0.3952090312514341, "grad_norm": 0.5119484663009644, "learning_rate": 9.682698723183036e-06, "loss": 0.5422, "step": 8612 }, { "epoch": 0.3952549217566885, "grad_norm": 0.44060173630714417, "learning_rate": 9.682612764132854e-06, "loss": 0.3846, "step": 8613 }, { "epoch": 0.395300812261943, "grad_norm": 0.45631831884384155, "learning_rate": 9.682526793822465e-06, "loss": 0.4581, "step": 8614 }, { "epoch": 0.39534670276719747, "grad_norm": 0.4429319500923157, "learning_rate": 9.682440812252077e-06, "loss": 0.3627, "step": 8615 }, { "epoch": 0.3953925932724519, "grad_norm": 0.4876386225223541, "learning_rate": 9.682354819421896e-06, "loss": 0.5217, "step": 8616 }, { "epoch": 0.3954384837777064, "grad_norm": 0.436872273683548, "learning_rate": 9.68226881533213e-06, "loss": 0.3166, "step": 8617 }, { "epoch": 0.39548437428296085, "grad_norm": 0.5447903275489807, "learning_rate": 9.682182799982985e-06, "loss": 0.5565, "step": 8618 }, { "epoch": 0.3955302647882153, "grad_norm": 0.44715312123298645, "learning_rate": 9.682096773374668e-06, "loss": 0.3737, "step": 8619 }, { "epoch": 0.3955761552934698, "grad_norm": 0.4636843204498291, "learning_rate": 9.682010735507387e-06, "loss": 0.4234, "step": 8620 }, { "epoch": 0.39562204579872423, "grad_norm": 0.49817508459091187, "learning_rate": 9.681924686381348e-06, "loss": 0.552, "step": 8621 }, { "epoch": 0.39566793630397873, "grad_norm": 0.4809209704399109, "learning_rate": 9.681838625996755e-06, "loss": 0.4036, "step": 8622 }, { "epoch": 0.39571382680923317, "grad_norm": 0.43769770860671997, "learning_rate": 9.68175255435382e-06, "loss": 0.3464, "step": 8623 }, { "epoch": 0.3957597173144876, "grad_norm": 0.4978189170360565, "learning_rate": 9.681666471452748e-06, "loss": 0.3743, "step": 8624 }, { "epoch": 0.3958056078197421, "grad_norm": 0.42524853348731995, "learning_rate": 9.681580377293744e-06, "loss": 0.3209, "step": 8625 }, { "epoch": 0.39585149832499655, "grad_norm": 0.4592619836330414, "learning_rate": 9.68149427187702e-06, "loss": 0.3811, "step": 8626 }, { "epoch": 0.395897388830251, "grad_norm": 0.49542543292045593, "learning_rate": 9.681408155202776e-06, "loss": 0.4291, "step": 8627 }, { "epoch": 0.3959432793355055, "grad_norm": 0.47730204463005066, "learning_rate": 9.681322027271225e-06, "loss": 0.4148, "step": 8628 }, { "epoch": 0.39598916984075994, "grad_norm": 0.46925047039985657, "learning_rate": 9.68123588808257e-06, "loss": 0.3972, "step": 8629 }, { "epoch": 0.39603506034601443, "grad_norm": 0.47826483845710754, "learning_rate": 9.681149737637023e-06, "loss": 0.404, "step": 8630 }, { "epoch": 0.3960809508512689, "grad_norm": 0.47326555848121643, "learning_rate": 9.681063575934786e-06, "loss": 0.4716, "step": 8631 }, { "epoch": 0.3961268413565233, "grad_norm": 0.46842607855796814, "learning_rate": 9.68097740297607e-06, "loss": 0.3945, "step": 8632 }, { "epoch": 0.3961727318617778, "grad_norm": 0.444974809885025, "learning_rate": 9.68089121876108e-06, "loss": 0.3945, "step": 8633 }, { "epoch": 0.39621862236703226, "grad_norm": 0.4635046720504761, "learning_rate": 9.680805023290022e-06, "loss": 0.3563, "step": 8634 }, { "epoch": 0.3962645128722867, "grad_norm": 0.6594986915588379, "learning_rate": 9.680718816563108e-06, "loss": 0.452, "step": 8635 }, { "epoch": 0.3963104033775412, "grad_norm": 0.4828979969024658, "learning_rate": 9.680632598580543e-06, "loss": 0.3869, "step": 8636 }, { "epoch": 0.39635629388279564, "grad_norm": 0.4403561055660248, "learning_rate": 9.680546369342532e-06, "loss": 0.3422, "step": 8637 }, { "epoch": 0.39640218438805014, "grad_norm": 0.4655972421169281, "learning_rate": 9.680460128849285e-06, "loss": 0.4474, "step": 8638 }, { "epoch": 0.3964480748933046, "grad_norm": 0.5102384686470032, "learning_rate": 9.680373877101008e-06, "loss": 0.5371, "step": 8639 }, { "epoch": 0.396493965398559, "grad_norm": 0.5012203454971313, "learning_rate": 9.68028761409791e-06, "loss": 0.4112, "step": 8640 }, { "epoch": 0.3965398559038135, "grad_norm": 0.49299463629722595, "learning_rate": 9.680201339840197e-06, "loss": 0.4381, "step": 8641 }, { "epoch": 0.39658574640906796, "grad_norm": 0.45829281210899353, "learning_rate": 9.680115054328077e-06, "loss": 0.4173, "step": 8642 }, { "epoch": 0.3966316369143224, "grad_norm": 0.4389673173427582, "learning_rate": 9.680028757561757e-06, "loss": 0.3092, "step": 8643 }, { "epoch": 0.3966775274195769, "grad_norm": 0.4829205870628357, "learning_rate": 9.679942449541444e-06, "loss": 0.4208, "step": 8644 }, { "epoch": 0.39672341792483135, "grad_norm": 0.45412907004356384, "learning_rate": 9.679856130267348e-06, "loss": 0.3746, "step": 8645 }, { "epoch": 0.3967693084300858, "grad_norm": 0.44100072979927063, "learning_rate": 9.679769799739674e-06, "loss": 0.3858, "step": 8646 }, { "epoch": 0.3968151989353403, "grad_norm": 0.4650171399116516, "learning_rate": 9.67968345795863e-06, "loss": 0.3736, "step": 8647 }, { "epoch": 0.3968610894405947, "grad_norm": 0.4790247678756714, "learning_rate": 9.679597104924426e-06, "loss": 0.436, "step": 8648 }, { "epoch": 0.3969069799458492, "grad_norm": 0.43356242775917053, "learning_rate": 9.679510740637268e-06, "loss": 0.35, "step": 8649 }, { "epoch": 0.39695287045110367, "grad_norm": 0.47927725315093994, "learning_rate": 9.679424365097363e-06, "loss": 0.4029, "step": 8650 }, { "epoch": 0.3969987609563581, "grad_norm": 0.46591562032699585, "learning_rate": 9.67933797830492e-06, "loss": 0.4259, "step": 8651 }, { "epoch": 0.3970446514616126, "grad_norm": 0.551018476486206, "learning_rate": 9.679251580260142e-06, "loss": 0.4364, "step": 8652 }, { "epoch": 0.39709054196686705, "grad_norm": 0.5083770155906677, "learning_rate": 9.679165170963244e-06, "loss": 0.5282, "step": 8653 }, { "epoch": 0.3971364324721215, "grad_norm": 0.4843112826347351, "learning_rate": 9.679078750414432e-06, "loss": 0.4272, "step": 8654 }, { "epoch": 0.397182322977376, "grad_norm": 0.4888418912887573, "learning_rate": 9.67899231861391e-06, "loss": 0.4052, "step": 8655 }, { "epoch": 0.39722821348263043, "grad_norm": 0.449928879737854, "learning_rate": 9.67890587556189e-06, "loss": 0.3592, "step": 8656 }, { "epoch": 0.39727410398788493, "grad_norm": 0.43793001770973206, "learning_rate": 9.678819421258577e-06, "loss": 0.3789, "step": 8657 }, { "epoch": 0.3973199944931394, "grad_norm": 0.47794607281684875, "learning_rate": 9.67873295570418e-06, "loss": 0.4464, "step": 8658 }, { "epoch": 0.3973658849983938, "grad_norm": 0.43191906809806824, "learning_rate": 9.678646478898905e-06, "loss": 0.3627, "step": 8659 }, { "epoch": 0.3974117755036483, "grad_norm": 0.44276878237724304, "learning_rate": 9.678559990842964e-06, "loss": 0.3922, "step": 8660 }, { "epoch": 0.39745766600890275, "grad_norm": 0.49213507771492004, "learning_rate": 9.678473491536563e-06, "loss": 0.4544, "step": 8661 }, { "epoch": 0.3975035565141572, "grad_norm": 0.4666652977466583, "learning_rate": 9.678386980979909e-06, "loss": 0.4494, "step": 8662 }, { "epoch": 0.3975494470194117, "grad_norm": 0.4337114691734314, "learning_rate": 9.678300459173212e-06, "loss": 0.3423, "step": 8663 }, { "epoch": 0.39759533752466614, "grad_norm": 0.47524869441986084, "learning_rate": 9.678213926116677e-06, "loss": 0.4516, "step": 8664 }, { "epoch": 0.39764122802992063, "grad_norm": 0.4766837954521179, "learning_rate": 9.678127381810516e-06, "loss": 0.3835, "step": 8665 }, { "epoch": 0.3976871185351751, "grad_norm": 0.4313611686229706, "learning_rate": 9.678040826254935e-06, "loss": 0.3573, "step": 8666 }, { "epoch": 0.3977330090404295, "grad_norm": 0.4806932210922241, "learning_rate": 9.67795425945014e-06, "loss": 0.4467, "step": 8667 }, { "epoch": 0.397778899545684, "grad_norm": 0.46154314279556274, "learning_rate": 9.677867681396343e-06, "loss": 0.3718, "step": 8668 }, { "epoch": 0.39782479005093846, "grad_norm": 0.46274620294570923, "learning_rate": 9.677781092093749e-06, "loss": 0.3601, "step": 8669 }, { "epoch": 0.3978706805561929, "grad_norm": 0.49439704418182373, "learning_rate": 9.677694491542571e-06, "loss": 0.4287, "step": 8670 }, { "epoch": 0.3979165710614474, "grad_norm": 0.4502599537372589, "learning_rate": 9.677607879743011e-06, "loss": 0.3675, "step": 8671 }, { "epoch": 0.39796246156670184, "grad_norm": 0.46441757678985596, "learning_rate": 9.677521256695282e-06, "loss": 0.3799, "step": 8672 }, { "epoch": 0.39800835207195634, "grad_norm": 0.4544665813446045, "learning_rate": 9.67743462239959e-06, "loss": 0.3737, "step": 8673 }, { "epoch": 0.3980542425772108, "grad_norm": 0.4876054525375366, "learning_rate": 9.677347976856145e-06, "loss": 0.4533, "step": 8674 }, { "epoch": 0.3981001330824652, "grad_norm": 0.43368858098983765, "learning_rate": 9.677261320065155e-06, "loss": 0.3311, "step": 8675 }, { "epoch": 0.3981460235877197, "grad_norm": 0.486748069524765, "learning_rate": 9.677174652026825e-06, "loss": 0.4155, "step": 8676 }, { "epoch": 0.39819191409297416, "grad_norm": 0.4976727068424225, "learning_rate": 9.677087972741368e-06, "loss": 0.4726, "step": 8677 }, { "epoch": 0.3982378045982286, "grad_norm": 0.433361291885376, "learning_rate": 9.67700128220899e-06, "loss": 0.3223, "step": 8678 }, { "epoch": 0.3982836951034831, "grad_norm": 0.4959779381752014, "learning_rate": 9.676914580429898e-06, "loss": 0.4811, "step": 8679 }, { "epoch": 0.39832958560873755, "grad_norm": 0.5422683954238892, "learning_rate": 9.676827867404306e-06, "loss": 0.4972, "step": 8680 }, { "epoch": 0.398375476113992, "grad_norm": 0.4244866669178009, "learning_rate": 9.676741143132417e-06, "loss": 0.3469, "step": 8681 }, { "epoch": 0.3984213666192465, "grad_norm": 0.5025492310523987, "learning_rate": 9.676654407614442e-06, "loss": 0.4004, "step": 8682 }, { "epoch": 0.39846725712450093, "grad_norm": 0.5294975638389587, "learning_rate": 9.67656766085059e-06, "loss": 0.49, "step": 8683 }, { "epoch": 0.3985131476297554, "grad_norm": 0.4405219852924347, "learning_rate": 9.676480902841068e-06, "loss": 0.4164, "step": 8684 }, { "epoch": 0.39855903813500987, "grad_norm": 0.48667651414871216, "learning_rate": 9.676394133586086e-06, "loss": 0.3533, "step": 8685 }, { "epoch": 0.3986049286402643, "grad_norm": 0.5046839714050293, "learning_rate": 9.67630735308585e-06, "loss": 0.4598, "step": 8686 }, { "epoch": 0.3986508191455188, "grad_norm": 0.4614804983139038, "learning_rate": 9.676220561340573e-06, "loss": 0.4477, "step": 8687 }, { "epoch": 0.39869670965077325, "grad_norm": 0.4799266457557678, "learning_rate": 9.676133758350462e-06, "loss": 0.4523, "step": 8688 }, { "epoch": 0.3987426001560277, "grad_norm": 0.5135653018951416, "learning_rate": 9.676046944115723e-06, "loss": 0.4636, "step": 8689 }, { "epoch": 0.3987884906612822, "grad_norm": 0.45647597312927246, "learning_rate": 9.675960118636567e-06, "loss": 0.3858, "step": 8690 }, { "epoch": 0.39883438116653663, "grad_norm": 0.46854186058044434, "learning_rate": 9.675873281913205e-06, "loss": 0.3809, "step": 8691 }, { "epoch": 0.39888027167179113, "grad_norm": 0.4501729905605316, "learning_rate": 9.67578643394584e-06, "loss": 0.3927, "step": 8692 }, { "epoch": 0.3989261621770456, "grad_norm": 0.4693083167076111, "learning_rate": 9.675699574734686e-06, "loss": 0.3742, "step": 8693 }, { "epoch": 0.3989720526823, "grad_norm": 0.44620537757873535, "learning_rate": 9.67561270427995e-06, "loss": 0.3338, "step": 8694 }, { "epoch": 0.3990179431875545, "grad_norm": 0.4076438248157501, "learning_rate": 9.675525822581841e-06, "loss": 0.3323, "step": 8695 }, { "epoch": 0.39906383369280896, "grad_norm": 0.48204880952835083, "learning_rate": 9.675438929640568e-06, "loss": 0.3734, "step": 8696 }, { "epoch": 0.3991097241980634, "grad_norm": 0.5527828335762024, "learning_rate": 9.675352025456341e-06, "loss": 0.4371, "step": 8697 }, { "epoch": 0.3991556147033179, "grad_norm": 0.4597312808036804, "learning_rate": 9.675265110029368e-06, "loss": 0.415, "step": 8698 }, { "epoch": 0.39920150520857234, "grad_norm": 0.47532808780670166, "learning_rate": 9.675178183359858e-06, "loss": 0.4687, "step": 8699 }, { "epoch": 0.39924739571382684, "grad_norm": 0.5203988552093506, "learning_rate": 9.675091245448019e-06, "loss": 0.4401, "step": 8700 }, { "epoch": 0.3992932862190813, "grad_norm": 0.4371298849582672, "learning_rate": 9.675004296294062e-06, "loss": 0.416, "step": 8701 }, { "epoch": 0.3993391767243357, "grad_norm": 0.4408140480518341, "learning_rate": 9.674917335898194e-06, "loss": 0.3721, "step": 8702 }, { "epoch": 0.3993850672295902, "grad_norm": 0.4458071291446686, "learning_rate": 9.674830364260625e-06, "loss": 0.3857, "step": 8703 }, { "epoch": 0.39943095773484466, "grad_norm": 0.4959181249141693, "learning_rate": 9.674743381381567e-06, "loss": 0.4705, "step": 8704 }, { "epoch": 0.3994768482400991, "grad_norm": 0.5098963975906372, "learning_rate": 9.674656387261224e-06, "loss": 0.4585, "step": 8705 }, { "epoch": 0.3995227387453536, "grad_norm": 0.49235400557518005, "learning_rate": 9.67456938189981e-06, "loss": 0.4994, "step": 8706 }, { "epoch": 0.39956862925060804, "grad_norm": 0.4554671347141266, "learning_rate": 9.674482365297529e-06, "loss": 0.3529, "step": 8707 }, { "epoch": 0.3996145197558625, "grad_norm": 0.5342531204223633, "learning_rate": 9.674395337454594e-06, "loss": 0.4535, "step": 8708 }, { "epoch": 0.399660410261117, "grad_norm": 0.44364133477211, "learning_rate": 9.674308298371215e-06, "loss": 0.3678, "step": 8709 }, { "epoch": 0.3997063007663714, "grad_norm": 0.47711804509162903, "learning_rate": 9.674221248047599e-06, "loss": 0.4373, "step": 8710 }, { "epoch": 0.3997521912716259, "grad_norm": 0.4616168439388275, "learning_rate": 9.674134186483957e-06, "loss": 0.3191, "step": 8711 }, { "epoch": 0.39979808177688037, "grad_norm": 0.469154417514801, "learning_rate": 9.674047113680496e-06, "loss": 0.3913, "step": 8712 }, { "epoch": 0.3998439722821348, "grad_norm": 0.5800721645355225, "learning_rate": 9.673960029637429e-06, "loss": 0.5304, "step": 8713 }, { "epoch": 0.3998898627873893, "grad_norm": 0.4438539147377014, "learning_rate": 9.67387293435496e-06, "loss": 0.4213, "step": 8714 }, { "epoch": 0.39993575329264375, "grad_norm": 0.4614817202091217, "learning_rate": 9.673785827833303e-06, "loss": 0.4235, "step": 8715 }, { "epoch": 0.3999816437978982, "grad_norm": 0.4509181082248688, "learning_rate": 9.673698710072668e-06, "loss": 0.4535, "step": 8716 }, { "epoch": 0.4000275343031527, "grad_norm": 0.45474687218666077, "learning_rate": 9.673611581073262e-06, "loss": 0.3889, "step": 8717 }, { "epoch": 0.40007342480840713, "grad_norm": 0.4203234314918518, "learning_rate": 9.673524440835294e-06, "loss": 0.3204, "step": 8718 }, { "epoch": 0.4001193153136616, "grad_norm": 0.4642297625541687, "learning_rate": 9.673437289358975e-06, "loss": 0.4379, "step": 8719 }, { "epoch": 0.40016520581891607, "grad_norm": 0.5028543472290039, "learning_rate": 9.673350126644513e-06, "loss": 0.5018, "step": 8720 }, { "epoch": 0.4002110963241705, "grad_norm": 0.44732141494750977, "learning_rate": 9.673262952692122e-06, "loss": 0.3907, "step": 8721 }, { "epoch": 0.400256986829425, "grad_norm": 0.4514525532722473, "learning_rate": 9.673175767502006e-06, "loss": 0.4066, "step": 8722 }, { "epoch": 0.40030287733467945, "grad_norm": 0.5037662386894226, "learning_rate": 9.673088571074378e-06, "loss": 0.4965, "step": 8723 }, { "epoch": 0.4003487678399339, "grad_norm": 0.4511241316795349, "learning_rate": 9.673001363409447e-06, "loss": 0.3874, "step": 8724 }, { "epoch": 0.4003946583451884, "grad_norm": 0.4596615731716156, "learning_rate": 9.672914144507423e-06, "loss": 0.4276, "step": 8725 }, { "epoch": 0.40044054885044283, "grad_norm": 0.4645096957683563, "learning_rate": 9.672826914368515e-06, "loss": 0.3884, "step": 8726 }, { "epoch": 0.40048643935569733, "grad_norm": 0.49575841426849365, "learning_rate": 9.672739672992933e-06, "loss": 0.4398, "step": 8727 }, { "epoch": 0.4005323298609518, "grad_norm": 0.4681076407432556, "learning_rate": 9.672652420380887e-06, "loss": 0.3918, "step": 8728 }, { "epoch": 0.4005782203662062, "grad_norm": 0.5176234245300293, "learning_rate": 9.672565156532585e-06, "loss": 0.4654, "step": 8729 }, { "epoch": 0.4006241108714607, "grad_norm": 0.4722677767276764, "learning_rate": 9.672477881448241e-06, "loss": 0.4331, "step": 8730 }, { "epoch": 0.40067000137671516, "grad_norm": 0.48463112115859985, "learning_rate": 9.672390595128061e-06, "loss": 0.445, "step": 8731 }, { "epoch": 0.4007158918819696, "grad_norm": 0.46203476190567017, "learning_rate": 9.672303297572256e-06, "loss": 0.3796, "step": 8732 }, { "epoch": 0.4007617823872241, "grad_norm": 0.4735702574253082, "learning_rate": 9.672215988781038e-06, "loss": 0.4534, "step": 8733 }, { "epoch": 0.40080767289247854, "grad_norm": 0.49243053793907166, "learning_rate": 9.672128668754613e-06, "loss": 0.4332, "step": 8734 }, { "epoch": 0.400853563397733, "grad_norm": 0.43404310941696167, "learning_rate": 9.672041337493194e-06, "loss": 0.372, "step": 8735 }, { "epoch": 0.4008994539029875, "grad_norm": 0.7410294413566589, "learning_rate": 9.67195399499699e-06, "loss": 0.3726, "step": 8736 }, { "epoch": 0.4009453444082419, "grad_norm": 0.43253272771835327, "learning_rate": 9.671866641266212e-06, "loss": 0.3663, "step": 8737 }, { "epoch": 0.4009912349134964, "grad_norm": 0.4867757260799408, "learning_rate": 9.671779276301067e-06, "loss": 0.4927, "step": 8738 }, { "epoch": 0.40103712541875086, "grad_norm": 0.46213769912719727, "learning_rate": 9.67169190010177e-06, "loss": 0.4026, "step": 8739 }, { "epoch": 0.4010830159240053, "grad_norm": 0.4710848927497864, "learning_rate": 9.671604512668526e-06, "loss": 0.4045, "step": 8740 }, { "epoch": 0.4011289064292598, "grad_norm": 0.4646565020084381, "learning_rate": 9.67151711400155e-06, "loss": 0.4326, "step": 8741 }, { "epoch": 0.40117479693451424, "grad_norm": 0.4739971160888672, "learning_rate": 9.671429704101048e-06, "loss": 0.386, "step": 8742 }, { "epoch": 0.4012206874397687, "grad_norm": 0.46422308683395386, "learning_rate": 9.671342282967233e-06, "loss": 0.3734, "step": 8743 }, { "epoch": 0.4012665779450232, "grad_norm": 0.4172900915145874, "learning_rate": 9.671254850600314e-06, "loss": 0.3392, "step": 8744 }, { "epoch": 0.4013124684502776, "grad_norm": 0.4752531945705414, "learning_rate": 9.6711674070005e-06, "loss": 0.376, "step": 8745 }, { "epoch": 0.4013583589555321, "grad_norm": 0.4242478311061859, "learning_rate": 9.671079952168004e-06, "loss": 0.3683, "step": 8746 }, { "epoch": 0.40140424946078657, "grad_norm": 0.46339577436447144, "learning_rate": 9.670992486103034e-06, "loss": 0.4493, "step": 8747 }, { "epoch": 0.401450139966041, "grad_norm": 0.45612382888793945, "learning_rate": 9.670905008805801e-06, "loss": 0.4149, "step": 8748 }, { "epoch": 0.4014960304712955, "grad_norm": 0.471419095993042, "learning_rate": 9.670817520276517e-06, "loss": 0.4376, "step": 8749 }, { "epoch": 0.40154192097654995, "grad_norm": 0.45914721488952637, "learning_rate": 9.670730020515391e-06, "loss": 0.391, "step": 8750 }, { "epoch": 0.4015878114818044, "grad_norm": 0.4708364009857178, "learning_rate": 9.670642509522632e-06, "loss": 0.3835, "step": 8751 }, { "epoch": 0.4016337019870589, "grad_norm": 0.5328819155693054, "learning_rate": 9.670554987298453e-06, "loss": 0.5547, "step": 8752 }, { "epoch": 0.40167959249231333, "grad_norm": 0.4802168607711792, "learning_rate": 9.670467453843062e-06, "loss": 0.4126, "step": 8753 }, { "epoch": 0.40172548299756783, "grad_norm": 0.474074125289917, "learning_rate": 9.670379909156674e-06, "loss": 0.3766, "step": 8754 }, { "epoch": 0.40177137350282227, "grad_norm": 0.4740825891494751, "learning_rate": 9.670292353239493e-06, "loss": 0.4881, "step": 8755 }, { "epoch": 0.4018172640080767, "grad_norm": 0.7397067546844482, "learning_rate": 9.670204786091734e-06, "loss": 0.4164, "step": 8756 }, { "epoch": 0.4018631545133312, "grad_norm": 0.43916967511177063, "learning_rate": 9.670117207713606e-06, "loss": 0.3594, "step": 8757 }, { "epoch": 0.40190904501858565, "grad_norm": 0.44764959812164307, "learning_rate": 9.67002961810532e-06, "loss": 0.4159, "step": 8758 }, { "epoch": 0.4019549355238401, "grad_norm": 0.5043580532073975, "learning_rate": 9.669942017267087e-06, "loss": 0.4939, "step": 8759 }, { "epoch": 0.4020008260290946, "grad_norm": 0.5208820700645447, "learning_rate": 9.669854405199118e-06, "loss": 0.4505, "step": 8760 }, { "epoch": 0.40204671653434904, "grad_norm": 0.5180500745773315, "learning_rate": 9.669766781901623e-06, "loss": 0.5205, "step": 8761 }, { "epoch": 0.40209260703960353, "grad_norm": 0.49732547998428345, "learning_rate": 9.669679147374813e-06, "loss": 0.4714, "step": 8762 }, { "epoch": 0.402138497544858, "grad_norm": 0.48268479108810425, "learning_rate": 9.669591501618897e-06, "loss": 0.4714, "step": 8763 }, { "epoch": 0.4021843880501124, "grad_norm": 1.4454619884490967, "learning_rate": 9.669503844634087e-06, "loss": 0.5187, "step": 8764 }, { "epoch": 0.4022302785553669, "grad_norm": 0.4634663164615631, "learning_rate": 9.669416176420595e-06, "loss": 0.4357, "step": 8765 }, { "epoch": 0.40227616906062136, "grad_norm": 0.4607052803039551, "learning_rate": 9.669328496978632e-06, "loss": 0.3477, "step": 8766 }, { "epoch": 0.4023220595658758, "grad_norm": 0.4328441023826599, "learning_rate": 9.669240806308406e-06, "loss": 0.3492, "step": 8767 }, { "epoch": 0.4023679500711303, "grad_norm": 0.42733681201934814, "learning_rate": 9.66915310441013e-06, "loss": 0.348, "step": 8768 }, { "epoch": 0.40241384057638474, "grad_norm": 0.5779761672019958, "learning_rate": 9.669065391284013e-06, "loss": 0.4813, "step": 8769 }, { "epoch": 0.4024597310816392, "grad_norm": 0.48003625869750977, "learning_rate": 9.668977666930269e-06, "loss": 0.4887, "step": 8770 }, { "epoch": 0.4025056215868937, "grad_norm": 0.455434650182724, "learning_rate": 9.668889931349107e-06, "loss": 0.4662, "step": 8771 }, { "epoch": 0.4025515120921481, "grad_norm": 0.47613126039505005, "learning_rate": 9.668802184540737e-06, "loss": 0.3716, "step": 8772 }, { "epoch": 0.4025974025974026, "grad_norm": 0.4700065851211548, "learning_rate": 9.668714426505373e-06, "loss": 0.371, "step": 8773 }, { "epoch": 0.40264329310265706, "grad_norm": 0.47429323196411133, "learning_rate": 9.668626657243223e-06, "loss": 0.4006, "step": 8774 }, { "epoch": 0.4026891836079115, "grad_norm": 0.48311731219291687, "learning_rate": 9.668538876754501e-06, "loss": 0.4503, "step": 8775 }, { "epoch": 0.402735074113166, "grad_norm": 0.49212297797203064, "learning_rate": 9.668451085039416e-06, "loss": 0.4663, "step": 8776 }, { "epoch": 0.40278096461842045, "grad_norm": 0.45360979437828064, "learning_rate": 9.668363282098178e-06, "loss": 0.4231, "step": 8777 }, { "epoch": 0.4028268551236749, "grad_norm": 0.4916783571243286, "learning_rate": 9.668275467931e-06, "loss": 0.3849, "step": 8778 }, { "epoch": 0.4028727456289294, "grad_norm": 0.4797839820384979, "learning_rate": 9.668187642538095e-06, "loss": 0.3894, "step": 8779 }, { "epoch": 0.4029186361341838, "grad_norm": 0.5041285157203674, "learning_rate": 9.668099805919671e-06, "loss": 0.3729, "step": 8780 }, { "epoch": 0.4029645266394383, "grad_norm": 0.4648650288581848, "learning_rate": 9.668011958075939e-06, "loss": 0.3926, "step": 8781 }, { "epoch": 0.40301041714469277, "grad_norm": 0.4691621959209442, "learning_rate": 9.667924099007113e-06, "loss": 0.4071, "step": 8782 }, { "epoch": 0.4030563076499472, "grad_norm": 0.44032666087150574, "learning_rate": 9.667836228713401e-06, "loss": 0.3243, "step": 8783 }, { "epoch": 0.4031021981552017, "grad_norm": 0.47945475578308105, "learning_rate": 9.667748347195019e-06, "loss": 0.4283, "step": 8784 }, { "epoch": 0.40314808866045615, "grad_norm": 0.48606324195861816, "learning_rate": 9.667660454452173e-06, "loss": 0.4477, "step": 8785 }, { "epoch": 0.4031939791657106, "grad_norm": 0.4703650176525116, "learning_rate": 9.667572550485076e-06, "loss": 0.3507, "step": 8786 }, { "epoch": 0.4032398696709651, "grad_norm": 0.44076669216156006, "learning_rate": 9.667484635293943e-06, "loss": 0.3461, "step": 8787 }, { "epoch": 0.40328576017621953, "grad_norm": 0.47779592871665955, "learning_rate": 9.66739670887898e-06, "loss": 0.4538, "step": 8788 }, { "epoch": 0.40333165068147403, "grad_norm": 0.4094685912132263, "learning_rate": 9.667308771240402e-06, "loss": 0.3259, "step": 8789 }, { "epoch": 0.40337754118672847, "grad_norm": 0.5172468423843384, "learning_rate": 9.66722082237842e-06, "loss": 0.5105, "step": 8790 }, { "epoch": 0.4034234316919829, "grad_norm": 0.48521021008491516, "learning_rate": 9.667132862293245e-06, "loss": 0.4166, "step": 8791 }, { "epoch": 0.4034693221972374, "grad_norm": 0.5032215118408203, "learning_rate": 9.667044890985088e-06, "loss": 0.4611, "step": 8792 }, { "epoch": 0.40351521270249185, "grad_norm": 0.4498281478881836, "learning_rate": 9.666956908454161e-06, "loss": 0.4297, "step": 8793 }, { "epoch": 0.4035611032077463, "grad_norm": 0.43463563919067383, "learning_rate": 9.666868914700676e-06, "loss": 0.3188, "step": 8794 }, { "epoch": 0.4036069937130008, "grad_norm": 0.43224555253982544, "learning_rate": 9.666780909724843e-06, "loss": 0.3612, "step": 8795 }, { "epoch": 0.40365288421825524, "grad_norm": 0.4897977411746979, "learning_rate": 9.666692893526874e-06, "loss": 0.4817, "step": 8796 }, { "epoch": 0.4036987747235097, "grad_norm": 0.4552345275878906, "learning_rate": 9.666604866106983e-06, "loss": 0.382, "step": 8797 }, { "epoch": 0.4037446652287642, "grad_norm": 0.5175498723983765, "learning_rate": 9.66651682746538e-06, "loss": 0.515, "step": 8798 }, { "epoch": 0.4037905557340186, "grad_norm": 0.5001868009567261, "learning_rate": 9.666428777602278e-06, "loss": 0.4702, "step": 8799 }, { "epoch": 0.4038364462392731, "grad_norm": 0.4322042763233185, "learning_rate": 9.666340716517886e-06, "loss": 0.3908, "step": 8800 }, { "epoch": 0.40388233674452756, "grad_norm": 0.4481142461299896, "learning_rate": 9.666252644212417e-06, "loss": 0.4312, "step": 8801 }, { "epoch": 0.403928227249782, "grad_norm": 0.4381375014781952, "learning_rate": 9.666164560686084e-06, "loss": 0.3569, "step": 8802 }, { "epoch": 0.4039741177550365, "grad_norm": 0.4626046121120453, "learning_rate": 9.666076465939099e-06, "loss": 0.4468, "step": 8803 }, { "epoch": 0.40402000826029094, "grad_norm": 0.4291236698627472, "learning_rate": 9.665988359971671e-06, "loss": 0.3875, "step": 8804 }, { "epoch": 0.4040658987655454, "grad_norm": 0.4435580372810364, "learning_rate": 9.665900242784014e-06, "loss": 0.3764, "step": 8805 }, { "epoch": 0.4041117892707999, "grad_norm": 0.476551353931427, "learning_rate": 9.665812114376339e-06, "loss": 0.4777, "step": 8806 }, { "epoch": 0.4041576797760543, "grad_norm": 0.44785547256469727, "learning_rate": 9.665723974748859e-06, "loss": 0.3756, "step": 8807 }, { "epoch": 0.4042035702813088, "grad_norm": 0.4439668357372284, "learning_rate": 9.665635823901787e-06, "loss": 0.3895, "step": 8808 }, { "epoch": 0.40424946078656326, "grad_norm": 0.4501616954803467, "learning_rate": 9.66554766183533e-06, "loss": 0.3971, "step": 8809 }, { "epoch": 0.4042953512918177, "grad_norm": 0.46380820870399475, "learning_rate": 9.665459488549707e-06, "loss": 0.4009, "step": 8810 }, { "epoch": 0.4043412417970722, "grad_norm": 0.4688146412372589, "learning_rate": 9.665371304045124e-06, "loss": 0.3796, "step": 8811 }, { "epoch": 0.40438713230232665, "grad_norm": 0.44879040122032166, "learning_rate": 9.665283108321796e-06, "loss": 0.3576, "step": 8812 }, { "epoch": 0.4044330228075811, "grad_norm": 0.4635031223297119, "learning_rate": 9.665194901379934e-06, "loss": 0.4298, "step": 8813 }, { "epoch": 0.4044789133128356, "grad_norm": 0.5085549354553223, "learning_rate": 9.665106683219751e-06, "loss": 0.4608, "step": 8814 }, { "epoch": 0.40452480381809003, "grad_norm": 0.5086290836334229, "learning_rate": 9.66501845384146e-06, "loss": 0.4306, "step": 8815 }, { "epoch": 0.4045706943233445, "grad_norm": 0.46002769470214844, "learning_rate": 9.66493021324527e-06, "loss": 0.3792, "step": 8816 }, { "epoch": 0.40461658482859897, "grad_norm": 0.4461497366428375, "learning_rate": 9.664841961431396e-06, "loss": 0.3697, "step": 8817 }, { "epoch": 0.4046624753338534, "grad_norm": 0.4822692275047302, "learning_rate": 9.664753698400048e-06, "loss": 0.3977, "step": 8818 }, { "epoch": 0.4047083658391079, "grad_norm": 0.4605186879634857, "learning_rate": 9.664665424151442e-06, "loss": 0.3969, "step": 8819 }, { "epoch": 0.40475425634436235, "grad_norm": 0.46697866916656494, "learning_rate": 9.664577138685786e-06, "loss": 0.3928, "step": 8820 }, { "epoch": 0.4048001468496168, "grad_norm": 0.506500244140625, "learning_rate": 9.664488842003297e-06, "loss": 0.5086, "step": 8821 }, { "epoch": 0.4048460373548713, "grad_norm": 0.411395788192749, "learning_rate": 9.66440053410418e-06, "loss": 0.3056, "step": 8822 }, { "epoch": 0.40489192786012573, "grad_norm": 0.47621437907218933, "learning_rate": 9.664312214988656e-06, "loss": 0.4473, "step": 8823 }, { "epoch": 0.4049378183653802, "grad_norm": 0.4494498670101166, "learning_rate": 9.664223884656932e-06, "loss": 0.3808, "step": 8824 }, { "epoch": 0.4049837088706347, "grad_norm": 0.47695329785346985, "learning_rate": 9.664135543109222e-06, "loss": 0.4421, "step": 8825 }, { "epoch": 0.4050295993758891, "grad_norm": 0.5242432355880737, "learning_rate": 9.664047190345736e-06, "loss": 0.5188, "step": 8826 }, { "epoch": 0.4050754898811436, "grad_norm": 0.4797147512435913, "learning_rate": 9.663958826366691e-06, "loss": 0.4586, "step": 8827 }, { "epoch": 0.40512138038639806, "grad_norm": 0.45458757877349854, "learning_rate": 9.663870451172295e-06, "loss": 0.4057, "step": 8828 }, { "epoch": 0.4051672708916525, "grad_norm": 0.45657458901405334, "learning_rate": 9.663782064762763e-06, "loss": 0.3862, "step": 8829 }, { "epoch": 0.405213161396907, "grad_norm": 0.4464982748031616, "learning_rate": 9.663693667138307e-06, "loss": 0.3651, "step": 8830 }, { "epoch": 0.40525905190216144, "grad_norm": 0.4419625699520111, "learning_rate": 9.66360525829914e-06, "loss": 0.3426, "step": 8831 }, { "epoch": 0.4053049424074159, "grad_norm": 0.4469248354434967, "learning_rate": 9.663516838245475e-06, "loss": 0.3777, "step": 8832 }, { "epoch": 0.4053508329126704, "grad_norm": 0.45046088099479675, "learning_rate": 9.663428406977521e-06, "loss": 0.3817, "step": 8833 }, { "epoch": 0.4053967234179248, "grad_norm": 0.42754629254341125, "learning_rate": 9.663339964495496e-06, "loss": 0.3373, "step": 8834 }, { "epoch": 0.4054426139231793, "grad_norm": 0.5050519108772278, "learning_rate": 9.663251510799611e-06, "loss": 0.4941, "step": 8835 }, { "epoch": 0.40548850442843376, "grad_norm": 0.4624300003051758, "learning_rate": 9.663163045890076e-06, "loss": 0.4599, "step": 8836 }, { "epoch": 0.4055343949336882, "grad_norm": 0.4650493562221527, "learning_rate": 9.663074569767106e-06, "loss": 0.434, "step": 8837 }, { "epoch": 0.4055802854389427, "grad_norm": 0.48047754168510437, "learning_rate": 9.662986082430914e-06, "loss": 0.422, "step": 8838 }, { "epoch": 0.40562617594419714, "grad_norm": 0.48945415019989014, "learning_rate": 9.662897583881712e-06, "loss": 0.4009, "step": 8839 }, { "epoch": 0.4056720664494516, "grad_norm": 0.5040608048439026, "learning_rate": 9.662809074119713e-06, "loss": 0.5, "step": 8840 }, { "epoch": 0.4057179569547061, "grad_norm": 0.43869921565055847, "learning_rate": 9.66272055314513e-06, "loss": 0.3554, "step": 8841 }, { "epoch": 0.4057638474599605, "grad_norm": 0.46926331520080566, "learning_rate": 9.662632020958174e-06, "loss": 0.3831, "step": 8842 }, { "epoch": 0.405809737965215, "grad_norm": 0.4961845576763153, "learning_rate": 9.662543477559062e-06, "loss": 0.4681, "step": 8843 }, { "epoch": 0.40585562847046946, "grad_norm": 0.440050333738327, "learning_rate": 9.662454922948004e-06, "loss": 0.3751, "step": 8844 }, { "epoch": 0.4059015189757239, "grad_norm": 0.4777848422527313, "learning_rate": 9.66236635712521e-06, "loss": 0.4162, "step": 8845 }, { "epoch": 0.4059474094809784, "grad_norm": 0.4768102765083313, "learning_rate": 9.662277780090901e-06, "loss": 0.421, "step": 8846 }, { "epoch": 0.40599329998623285, "grad_norm": 0.5259941816329956, "learning_rate": 9.662189191845284e-06, "loss": 0.531, "step": 8847 }, { "epoch": 0.4060391904914873, "grad_norm": 0.4325702488422394, "learning_rate": 9.662100592388572e-06, "loss": 0.379, "step": 8848 }, { "epoch": 0.4060850809967418, "grad_norm": 0.4171409010887146, "learning_rate": 9.66201198172098e-06, "loss": 0.3519, "step": 8849 }, { "epoch": 0.40613097150199623, "grad_norm": 0.49433276057243347, "learning_rate": 9.661923359842721e-06, "loss": 0.3644, "step": 8850 }, { "epoch": 0.4061768620072507, "grad_norm": 0.44720640778541565, "learning_rate": 9.661834726754007e-06, "loss": 0.3493, "step": 8851 }, { "epoch": 0.40622275251250517, "grad_norm": 0.7170031070709229, "learning_rate": 9.661746082455053e-06, "loss": 0.4727, "step": 8852 }, { "epoch": 0.4062686430177596, "grad_norm": 0.4148782789707184, "learning_rate": 9.661657426946071e-06, "loss": 0.3584, "step": 8853 }, { "epoch": 0.4063145335230141, "grad_norm": 0.47836795449256897, "learning_rate": 9.661568760227273e-06, "loss": 0.4052, "step": 8854 }, { "epoch": 0.40636042402826855, "grad_norm": 0.4515821039676666, "learning_rate": 9.661480082298872e-06, "loss": 0.414, "step": 8855 }, { "epoch": 0.406406314533523, "grad_norm": 0.46457260847091675, "learning_rate": 9.661391393161085e-06, "loss": 0.3409, "step": 8856 }, { "epoch": 0.4064522050387775, "grad_norm": 0.4624975323677063, "learning_rate": 9.661302692814123e-06, "loss": 0.395, "step": 8857 }, { "epoch": 0.40649809554403193, "grad_norm": 0.48750466108322144, "learning_rate": 9.6612139812582e-06, "loss": 0.4434, "step": 8858 }, { "epoch": 0.4065439860492864, "grad_norm": 0.5120342969894409, "learning_rate": 9.661125258493527e-06, "loss": 0.436, "step": 8859 }, { "epoch": 0.4065898765545409, "grad_norm": 0.45142799615859985, "learning_rate": 9.661036524520317e-06, "loss": 0.4019, "step": 8860 }, { "epoch": 0.4066357670597953, "grad_norm": 0.5066254138946533, "learning_rate": 9.660947779338787e-06, "loss": 0.4435, "step": 8861 }, { "epoch": 0.4066816575650498, "grad_norm": 0.45592033863067627, "learning_rate": 9.660859022949149e-06, "loss": 0.4223, "step": 8862 }, { "epoch": 0.40672754807030426, "grad_norm": 0.4346959888935089, "learning_rate": 9.660770255351616e-06, "loss": 0.3779, "step": 8863 }, { "epoch": 0.4067734385755587, "grad_norm": 0.45302462577819824, "learning_rate": 9.660681476546401e-06, "loss": 0.3893, "step": 8864 }, { "epoch": 0.4068193290808132, "grad_norm": 0.46152085065841675, "learning_rate": 9.660592686533719e-06, "loss": 0.3773, "step": 8865 }, { "epoch": 0.40686521958606764, "grad_norm": 0.4670110046863556, "learning_rate": 9.660503885313781e-06, "loss": 0.3767, "step": 8866 }, { "epoch": 0.4069111100913221, "grad_norm": 0.47388020157814026, "learning_rate": 9.660415072886805e-06, "loss": 0.426, "step": 8867 }, { "epoch": 0.4069570005965766, "grad_norm": 0.5018221139907837, "learning_rate": 9.660326249252998e-06, "loss": 0.4972, "step": 8868 }, { "epoch": 0.407002891101831, "grad_norm": 0.4418553411960602, "learning_rate": 9.66023741441258e-06, "loss": 0.3659, "step": 8869 }, { "epoch": 0.4070487816070855, "grad_norm": 0.5037994980812073, "learning_rate": 9.660148568365759e-06, "loss": 0.5028, "step": 8870 }, { "epoch": 0.40709467211233996, "grad_norm": 0.45036739110946655, "learning_rate": 9.660059711112754e-06, "loss": 0.3592, "step": 8871 }, { "epoch": 0.4071405626175944, "grad_norm": 0.47340333461761475, "learning_rate": 9.659970842653774e-06, "loss": 0.3885, "step": 8872 }, { "epoch": 0.4071864531228489, "grad_norm": 0.46310076117515564, "learning_rate": 9.659881962989037e-06, "loss": 0.4387, "step": 8873 }, { "epoch": 0.40723234362810334, "grad_norm": 0.4902832508087158, "learning_rate": 9.659793072118753e-06, "loss": 0.4212, "step": 8874 }, { "epoch": 0.4072782341333578, "grad_norm": 0.45954540371894836, "learning_rate": 9.659704170043137e-06, "loss": 0.3867, "step": 8875 }, { "epoch": 0.4073241246386123, "grad_norm": 0.47316038608551025, "learning_rate": 9.659615256762404e-06, "loss": 0.4236, "step": 8876 }, { "epoch": 0.4073700151438667, "grad_norm": 0.5014979243278503, "learning_rate": 9.659526332276767e-06, "loss": 0.4968, "step": 8877 }, { "epoch": 0.4074159056491212, "grad_norm": 0.4483209550380707, "learning_rate": 9.65943739658644e-06, "loss": 0.3236, "step": 8878 }, { "epoch": 0.40746179615437567, "grad_norm": 0.5122126936912537, "learning_rate": 9.659348449691637e-06, "loss": 0.5046, "step": 8879 }, { "epoch": 0.4075076866596301, "grad_norm": 0.4964318871498108, "learning_rate": 9.659259491592571e-06, "loss": 0.4214, "step": 8880 }, { "epoch": 0.4075535771648846, "grad_norm": 0.43395060300827026, "learning_rate": 9.659170522289457e-06, "loss": 0.369, "step": 8881 }, { "epoch": 0.40759946767013905, "grad_norm": 0.48802536725997925, "learning_rate": 9.659081541782507e-06, "loss": 0.4032, "step": 8882 }, { "epoch": 0.4076453581753935, "grad_norm": 0.4703458547592163, "learning_rate": 9.658992550071937e-06, "loss": 0.4511, "step": 8883 }, { "epoch": 0.407691248680648, "grad_norm": 0.45683303475379944, "learning_rate": 9.658903547157961e-06, "loss": 0.3857, "step": 8884 }, { "epoch": 0.40773713918590243, "grad_norm": 0.4692900776863098, "learning_rate": 9.65881453304079e-06, "loss": 0.4329, "step": 8885 }, { "epoch": 0.4077830296911569, "grad_norm": 0.4641232192516327, "learning_rate": 9.658725507720644e-06, "loss": 0.4199, "step": 8886 }, { "epoch": 0.40782892019641137, "grad_norm": 0.46960094571113586, "learning_rate": 9.658636471197731e-06, "loss": 0.3754, "step": 8887 }, { "epoch": 0.4078748107016658, "grad_norm": 0.5141364932060242, "learning_rate": 9.658547423472268e-06, "loss": 0.4944, "step": 8888 }, { "epoch": 0.4079207012069203, "grad_norm": 0.47083625197410583, "learning_rate": 9.658458364544469e-06, "loss": 0.4758, "step": 8889 }, { "epoch": 0.40796659171217475, "grad_norm": 0.4520561397075653, "learning_rate": 9.658369294414547e-06, "loss": 0.3993, "step": 8890 }, { "epoch": 0.4080124822174292, "grad_norm": 0.4793906509876251, "learning_rate": 9.658280213082717e-06, "loss": 0.3692, "step": 8891 }, { "epoch": 0.4080583727226837, "grad_norm": 0.4123370051383972, "learning_rate": 9.658191120549196e-06, "loss": 0.3098, "step": 8892 }, { "epoch": 0.40810426322793814, "grad_norm": 0.44171273708343506, "learning_rate": 9.658102016814192e-06, "loss": 0.3776, "step": 8893 }, { "epoch": 0.4081501537331926, "grad_norm": 0.4890217185020447, "learning_rate": 9.658012901877925e-06, "loss": 0.3832, "step": 8894 }, { "epoch": 0.4081960442384471, "grad_norm": 0.44416534900665283, "learning_rate": 9.657923775740607e-06, "loss": 0.3525, "step": 8895 }, { "epoch": 0.4082419347437015, "grad_norm": 0.4593847692012787, "learning_rate": 9.657834638402453e-06, "loss": 0.3494, "step": 8896 }, { "epoch": 0.408287825248956, "grad_norm": 0.43646371364593506, "learning_rate": 9.657745489863675e-06, "loss": 0.3672, "step": 8897 }, { "epoch": 0.40833371575421046, "grad_norm": 0.47590407729148865, "learning_rate": 9.65765633012449e-06, "loss": 0.4208, "step": 8898 }, { "epoch": 0.4083796062594649, "grad_norm": 0.4629414975643158, "learning_rate": 9.657567159185111e-06, "loss": 0.3724, "step": 8899 }, { "epoch": 0.4084254967647194, "grad_norm": 0.43548890948295593, "learning_rate": 9.657477977045753e-06, "loss": 0.3367, "step": 8900 }, { "epoch": 0.40847138726997384, "grad_norm": 0.4981532692909241, "learning_rate": 9.657388783706632e-06, "loss": 0.3757, "step": 8901 }, { "epoch": 0.4085172777752283, "grad_norm": 0.5075209140777588, "learning_rate": 9.657299579167958e-06, "loss": 0.4951, "step": 8902 }, { "epoch": 0.4085631682804828, "grad_norm": 0.448089063167572, "learning_rate": 9.657210363429949e-06, "loss": 0.3878, "step": 8903 }, { "epoch": 0.4086090587857372, "grad_norm": 0.4841988980770111, "learning_rate": 9.657121136492822e-06, "loss": 0.44, "step": 8904 }, { "epoch": 0.4086549492909917, "grad_norm": 0.49064114689826965, "learning_rate": 9.657031898356785e-06, "loss": 0.4792, "step": 8905 }, { "epoch": 0.40870083979624616, "grad_norm": 0.5222851037979126, "learning_rate": 9.656942649022059e-06, "loss": 0.5046, "step": 8906 }, { "epoch": 0.4087467303015006, "grad_norm": 0.4698149561882019, "learning_rate": 9.656853388488855e-06, "loss": 0.3668, "step": 8907 }, { "epoch": 0.4087926208067551, "grad_norm": 0.45221269130706787, "learning_rate": 9.656764116757387e-06, "loss": 0.3472, "step": 8908 }, { "epoch": 0.40883851131200954, "grad_norm": 0.47569355368614197, "learning_rate": 9.65667483382787e-06, "loss": 0.4016, "step": 8909 }, { "epoch": 0.408884401817264, "grad_norm": 0.44347915053367615, "learning_rate": 9.656585539700523e-06, "loss": 0.345, "step": 8910 }, { "epoch": 0.4089302923225185, "grad_norm": 0.45892956852912903, "learning_rate": 9.656496234375554e-06, "loss": 0.4293, "step": 8911 }, { "epoch": 0.4089761828277729, "grad_norm": 0.5029703974723816, "learning_rate": 9.656406917853183e-06, "loss": 0.4494, "step": 8912 }, { "epoch": 0.40902207333302737, "grad_norm": 0.4619259536266327, "learning_rate": 9.656317590133624e-06, "loss": 0.3803, "step": 8913 }, { "epoch": 0.40906796383828187, "grad_norm": 0.5027748346328735, "learning_rate": 9.65622825121709e-06, "loss": 0.4413, "step": 8914 }, { "epoch": 0.4091138543435363, "grad_norm": 0.43719255924224854, "learning_rate": 9.656138901103795e-06, "loss": 0.3505, "step": 8915 }, { "epoch": 0.4091597448487908, "grad_norm": 0.49089735746383667, "learning_rate": 9.656049539793958e-06, "loss": 0.4474, "step": 8916 }, { "epoch": 0.40920563535404525, "grad_norm": 0.43796178698539734, "learning_rate": 9.655960167287788e-06, "loss": 0.3338, "step": 8917 }, { "epoch": 0.4092515258592997, "grad_norm": 0.48327669501304626, "learning_rate": 9.655870783585505e-06, "loss": 0.4916, "step": 8918 }, { "epoch": 0.4092974163645542, "grad_norm": 0.5065589547157288, "learning_rate": 9.655781388687324e-06, "loss": 0.5143, "step": 8919 }, { "epoch": 0.40934330686980863, "grad_norm": 0.6615613102912903, "learning_rate": 9.655691982593455e-06, "loss": 0.4162, "step": 8920 }, { "epoch": 0.4093891973750631, "grad_norm": 0.4805823564529419, "learning_rate": 9.655602565304119e-06, "loss": 0.4021, "step": 8921 }, { "epoch": 0.40943508788031757, "grad_norm": 0.47744014859199524, "learning_rate": 9.655513136819528e-06, "loss": 0.4232, "step": 8922 }, { "epoch": 0.409480978385572, "grad_norm": 0.4743710160255432, "learning_rate": 9.655423697139897e-06, "loss": 0.3534, "step": 8923 }, { "epoch": 0.4095268688908265, "grad_norm": 0.47602227330207825, "learning_rate": 9.65533424626544e-06, "loss": 0.3696, "step": 8924 }, { "epoch": 0.40957275939608095, "grad_norm": 0.4405398964881897, "learning_rate": 9.655244784196373e-06, "loss": 0.3676, "step": 8925 }, { "epoch": 0.4096186499013354, "grad_norm": 0.4972655773162842, "learning_rate": 9.655155310932914e-06, "loss": 0.4846, "step": 8926 }, { "epoch": 0.4096645404065899, "grad_norm": 0.44851911067962646, "learning_rate": 9.655065826475272e-06, "loss": 0.4186, "step": 8927 }, { "epoch": 0.40971043091184434, "grad_norm": 0.45139557123184204, "learning_rate": 9.654976330823669e-06, "loss": 0.3977, "step": 8928 }, { "epoch": 0.4097563214170988, "grad_norm": 0.5475239753723145, "learning_rate": 9.654886823978316e-06, "loss": 0.5654, "step": 8929 }, { "epoch": 0.4098022119223533, "grad_norm": 0.45348426699638367, "learning_rate": 9.65479730593943e-06, "loss": 0.4009, "step": 8930 }, { "epoch": 0.4098481024276077, "grad_norm": 0.4306449592113495, "learning_rate": 9.654707776707224e-06, "loss": 0.3363, "step": 8931 }, { "epoch": 0.4098939929328622, "grad_norm": 0.42864498496055603, "learning_rate": 9.654618236281916e-06, "loss": 0.33, "step": 8932 }, { "epoch": 0.40993988343811666, "grad_norm": 0.4650707542896271, "learning_rate": 9.65452868466372e-06, "loss": 0.4095, "step": 8933 }, { "epoch": 0.4099857739433711, "grad_norm": 0.499290406703949, "learning_rate": 9.654439121852852e-06, "loss": 0.4491, "step": 8934 }, { "epoch": 0.4100316644486256, "grad_norm": 0.46717971563339233, "learning_rate": 9.654349547849527e-06, "loss": 0.4191, "step": 8935 }, { "epoch": 0.41007755495388004, "grad_norm": 0.47403067350387573, "learning_rate": 9.65425996265396e-06, "loss": 0.3985, "step": 8936 }, { "epoch": 0.4101234454591345, "grad_norm": 0.4315500557422638, "learning_rate": 9.654170366266367e-06, "loss": 0.3364, "step": 8937 }, { "epoch": 0.410169335964389, "grad_norm": 0.49038535356521606, "learning_rate": 9.654080758686962e-06, "loss": 0.458, "step": 8938 }, { "epoch": 0.4102152264696434, "grad_norm": 0.5044361352920532, "learning_rate": 9.653991139915961e-06, "loss": 0.4644, "step": 8939 }, { "epoch": 0.41026111697489787, "grad_norm": 0.505952775478363, "learning_rate": 9.653901509953582e-06, "loss": 0.4936, "step": 8940 }, { "epoch": 0.41030700748015236, "grad_norm": 0.4509478807449341, "learning_rate": 9.653811868800038e-06, "loss": 0.4125, "step": 8941 }, { "epoch": 0.4103528979854068, "grad_norm": 0.45194777846336365, "learning_rate": 9.653722216455545e-06, "loss": 0.3793, "step": 8942 }, { "epoch": 0.4103987884906613, "grad_norm": 0.45763230323791504, "learning_rate": 9.65363255292032e-06, "loss": 0.388, "step": 8943 }, { "epoch": 0.41044467899591575, "grad_norm": 0.5256699919700623, "learning_rate": 9.653542878194578e-06, "loss": 0.5126, "step": 8944 }, { "epoch": 0.4104905695011702, "grad_norm": 0.47213244438171387, "learning_rate": 9.653453192278532e-06, "loss": 0.4268, "step": 8945 }, { "epoch": 0.4105364600064247, "grad_norm": 0.4997270107269287, "learning_rate": 9.6533634951724e-06, "loss": 0.4214, "step": 8946 }, { "epoch": 0.41058235051167913, "grad_norm": 0.483229398727417, "learning_rate": 9.653273786876399e-06, "loss": 0.4796, "step": 8947 }, { "epoch": 0.41062824101693357, "grad_norm": 0.4692580997943878, "learning_rate": 9.653184067390741e-06, "loss": 0.4213, "step": 8948 }, { "epoch": 0.41067413152218807, "grad_norm": 0.4542502164840698, "learning_rate": 9.653094336715645e-06, "loss": 0.3964, "step": 8949 }, { "epoch": 0.4107200220274425, "grad_norm": 0.48807159066200256, "learning_rate": 9.653004594851325e-06, "loss": 0.4329, "step": 8950 }, { "epoch": 0.410765912532697, "grad_norm": 0.4923957586288452, "learning_rate": 9.652914841797998e-06, "loss": 0.4287, "step": 8951 }, { "epoch": 0.41081180303795145, "grad_norm": 0.49464336037635803, "learning_rate": 9.65282507755588e-06, "loss": 0.4271, "step": 8952 }, { "epoch": 0.4108576935432059, "grad_norm": 0.4886745512485504, "learning_rate": 9.652735302125186e-06, "loss": 0.4384, "step": 8953 }, { "epoch": 0.4109035840484604, "grad_norm": 0.449872761964798, "learning_rate": 9.652645515506132e-06, "loss": 0.389, "step": 8954 }, { "epoch": 0.41094947455371483, "grad_norm": 0.4313916563987732, "learning_rate": 9.652555717698933e-06, "loss": 0.4083, "step": 8955 }, { "epoch": 0.4109953650589693, "grad_norm": 0.4735181927680969, "learning_rate": 9.652465908703807e-06, "loss": 0.3884, "step": 8956 }, { "epoch": 0.4110412555642238, "grad_norm": 0.4801724851131439, "learning_rate": 9.652376088520967e-06, "loss": 0.4158, "step": 8957 }, { "epoch": 0.4110871460694782, "grad_norm": 0.44262146949768066, "learning_rate": 9.652286257150633e-06, "loss": 0.4059, "step": 8958 }, { "epoch": 0.4111330365747327, "grad_norm": 0.4646487236022949, "learning_rate": 9.652196414593018e-06, "loss": 0.3928, "step": 8959 }, { "epoch": 0.41117892707998716, "grad_norm": 0.5538948774337769, "learning_rate": 9.65210656084834e-06, "loss": 0.6343, "step": 8960 }, { "epoch": 0.4112248175852416, "grad_norm": 0.4833605885505676, "learning_rate": 9.652016695916813e-06, "loss": 0.4251, "step": 8961 }, { "epoch": 0.4112707080904961, "grad_norm": 0.4419155418872833, "learning_rate": 9.651926819798652e-06, "loss": 0.4092, "step": 8962 }, { "epoch": 0.41131659859575054, "grad_norm": 0.6728895306587219, "learning_rate": 9.651836932494079e-06, "loss": 0.3942, "step": 8963 }, { "epoch": 0.411362489101005, "grad_norm": 0.42597320675849915, "learning_rate": 9.651747034003302e-06, "loss": 0.3767, "step": 8964 }, { "epoch": 0.4114083796062595, "grad_norm": 0.4316471815109253, "learning_rate": 9.651657124326546e-06, "loss": 0.3967, "step": 8965 }, { "epoch": 0.4114542701115139, "grad_norm": 0.4501246511936188, "learning_rate": 9.651567203464021e-06, "loss": 0.3691, "step": 8966 }, { "epoch": 0.4115001606167684, "grad_norm": 0.432313472032547, "learning_rate": 9.651477271415944e-06, "loss": 0.3508, "step": 8967 }, { "epoch": 0.41154605112202286, "grad_norm": 0.4484502077102661, "learning_rate": 9.651387328182532e-06, "loss": 0.3657, "step": 8968 }, { "epoch": 0.4115919416272773, "grad_norm": 0.5222544074058533, "learning_rate": 9.651297373764003e-06, "loss": 0.4591, "step": 8969 }, { "epoch": 0.4116378321325318, "grad_norm": 0.44669589400291443, "learning_rate": 9.651207408160572e-06, "loss": 0.3488, "step": 8970 }, { "epoch": 0.41168372263778624, "grad_norm": 0.4638034701347351, "learning_rate": 9.651117431372455e-06, "loss": 0.3627, "step": 8971 }, { "epoch": 0.4117296131430407, "grad_norm": 0.5077080726623535, "learning_rate": 9.651027443399866e-06, "loss": 0.4146, "step": 8972 }, { "epoch": 0.4117755036482952, "grad_norm": 0.4447862505912781, "learning_rate": 9.650937444243027e-06, "loss": 0.3864, "step": 8973 }, { "epoch": 0.4118213941535496, "grad_norm": 0.4611743688583374, "learning_rate": 9.650847433902149e-06, "loss": 0.3561, "step": 8974 }, { "epoch": 0.41186728465880407, "grad_norm": 0.4529739022254944, "learning_rate": 9.650757412377452e-06, "loss": 0.3564, "step": 8975 }, { "epoch": 0.41191317516405856, "grad_norm": 0.4539554715156555, "learning_rate": 9.650667379669151e-06, "loss": 0.4295, "step": 8976 }, { "epoch": 0.411959065669313, "grad_norm": 0.46761810779571533, "learning_rate": 9.650577335777463e-06, "loss": 0.3738, "step": 8977 }, { "epoch": 0.4120049561745675, "grad_norm": 0.46458902955055237, "learning_rate": 9.650487280702603e-06, "loss": 0.3613, "step": 8978 }, { "epoch": 0.41205084667982195, "grad_norm": 0.4038749039173126, "learning_rate": 9.650397214444792e-06, "loss": 0.2992, "step": 8979 }, { "epoch": 0.4120967371850764, "grad_norm": 0.467714786529541, "learning_rate": 9.65030713700424e-06, "loss": 0.366, "step": 8980 }, { "epoch": 0.4121426276903309, "grad_norm": 0.4536033868789673, "learning_rate": 9.65021704838117e-06, "loss": 0.3437, "step": 8981 }, { "epoch": 0.41218851819558533, "grad_norm": 0.4523737132549286, "learning_rate": 9.650126948575793e-06, "loss": 0.3393, "step": 8982 }, { "epoch": 0.41223440870083977, "grad_norm": 0.46972012519836426, "learning_rate": 9.65003683758833e-06, "loss": 0.4243, "step": 8983 }, { "epoch": 0.41228029920609427, "grad_norm": 0.462490439414978, "learning_rate": 9.649946715418994e-06, "loss": 0.3413, "step": 8984 }, { "epoch": 0.4123261897113487, "grad_norm": 0.4883803725242615, "learning_rate": 9.649856582068006e-06, "loss": 0.459, "step": 8985 }, { "epoch": 0.4123720802166032, "grad_norm": 0.47559982538223267, "learning_rate": 9.64976643753558e-06, "loss": 0.4373, "step": 8986 }, { "epoch": 0.41241797072185765, "grad_norm": 0.4913952946662903, "learning_rate": 9.649676281821932e-06, "loss": 0.409, "step": 8987 }, { "epoch": 0.4124638612271121, "grad_norm": 0.46557748317718506, "learning_rate": 9.64958611492728e-06, "loss": 0.3908, "step": 8988 }, { "epoch": 0.4125097517323666, "grad_norm": 0.45035800337791443, "learning_rate": 9.649495936851843e-06, "loss": 0.4594, "step": 8989 }, { "epoch": 0.41255564223762103, "grad_norm": 0.47361722588539124, "learning_rate": 9.649405747595834e-06, "loss": 0.3937, "step": 8990 }, { "epoch": 0.4126015327428755, "grad_norm": 0.47661158442497253, "learning_rate": 9.64931554715947e-06, "loss": 0.432, "step": 8991 }, { "epoch": 0.41264742324813, "grad_norm": 0.5061835050582886, "learning_rate": 9.649225335542974e-06, "loss": 0.4547, "step": 8992 }, { "epoch": 0.4126933137533844, "grad_norm": 0.4640252888202667, "learning_rate": 9.649135112746554e-06, "loss": 0.4239, "step": 8993 }, { "epoch": 0.4127392042586389, "grad_norm": 0.48689496517181396, "learning_rate": 9.649044878770432e-06, "loss": 0.414, "step": 8994 }, { "epoch": 0.41278509476389336, "grad_norm": 0.48968270421028137, "learning_rate": 9.648954633614827e-06, "loss": 0.3765, "step": 8995 }, { "epoch": 0.4128309852691478, "grad_norm": 0.45853233337402344, "learning_rate": 9.648864377279952e-06, "loss": 0.3139, "step": 8996 }, { "epoch": 0.4128768757744023, "grad_norm": 0.5381628274917603, "learning_rate": 9.648774109766024e-06, "loss": 0.5339, "step": 8997 }, { "epoch": 0.41292276627965674, "grad_norm": 0.4467434287071228, "learning_rate": 9.648683831073264e-06, "loss": 0.3395, "step": 8998 }, { "epoch": 0.4129686567849112, "grad_norm": 0.4439462125301361, "learning_rate": 9.648593541201885e-06, "loss": 0.3316, "step": 8999 }, { "epoch": 0.4130145472901657, "grad_norm": 0.4597897529602051, "learning_rate": 9.648503240152105e-06, "loss": 0.3633, "step": 9000 }, { "epoch": 0.4130604377954201, "grad_norm": 0.45608511567115784, "learning_rate": 9.648412927924143e-06, "loss": 0.3649, "step": 9001 }, { "epoch": 0.41310632830067456, "grad_norm": 0.4434874653816223, "learning_rate": 9.648322604518215e-06, "loss": 0.3797, "step": 9002 }, { "epoch": 0.41315221880592906, "grad_norm": 0.5455503463745117, "learning_rate": 9.64823226993454e-06, "loss": 0.5055, "step": 9003 }, { "epoch": 0.4131981093111835, "grad_norm": 0.4706743657588959, "learning_rate": 9.64814192417333e-06, "loss": 0.3866, "step": 9004 }, { "epoch": 0.413243999816438, "grad_norm": 0.4459630846977234, "learning_rate": 9.648051567234806e-06, "loss": 0.4126, "step": 9005 }, { "epoch": 0.41328989032169244, "grad_norm": 0.4447766840457916, "learning_rate": 9.647961199119187e-06, "loss": 0.3818, "step": 9006 }, { "epoch": 0.4133357808269469, "grad_norm": 0.46116307377815247, "learning_rate": 9.647870819826685e-06, "loss": 0.3769, "step": 9007 }, { "epoch": 0.4133816713322014, "grad_norm": 0.466164231300354, "learning_rate": 9.647780429357524e-06, "loss": 0.4502, "step": 9008 }, { "epoch": 0.4134275618374558, "grad_norm": 0.4497338831424713, "learning_rate": 9.647690027711916e-06, "loss": 0.3357, "step": 9009 }, { "epoch": 0.41347345234271027, "grad_norm": 0.4638221859931946, "learning_rate": 9.647599614890081e-06, "loss": 0.3774, "step": 9010 }, { "epoch": 0.41351934284796477, "grad_norm": 0.4725976586341858, "learning_rate": 9.647509190892237e-06, "loss": 0.4332, "step": 9011 }, { "epoch": 0.4135652333532192, "grad_norm": 0.4279395341873169, "learning_rate": 9.647418755718599e-06, "loss": 0.372, "step": 9012 }, { "epoch": 0.4136111238584737, "grad_norm": 0.4862496256828308, "learning_rate": 9.647328309369385e-06, "loss": 0.4566, "step": 9013 }, { "epoch": 0.41365701436372815, "grad_norm": 0.4215000569820404, "learning_rate": 9.647237851844814e-06, "loss": 0.3125, "step": 9014 }, { "epoch": 0.4137029048689826, "grad_norm": 0.4699840843677521, "learning_rate": 9.647147383145103e-06, "loss": 0.4562, "step": 9015 }, { "epoch": 0.4137487953742371, "grad_norm": 0.4386625289916992, "learning_rate": 9.647056903270469e-06, "loss": 0.3528, "step": 9016 }, { "epoch": 0.41379468587949153, "grad_norm": 0.43476006388664246, "learning_rate": 9.646966412221128e-06, "loss": 0.3503, "step": 9017 }, { "epoch": 0.413840576384746, "grad_norm": 0.4651428163051605, "learning_rate": 9.646875909997301e-06, "loss": 0.355, "step": 9018 }, { "epoch": 0.41388646689000047, "grad_norm": 0.4806995391845703, "learning_rate": 9.646785396599206e-06, "loss": 0.4185, "step": 9019 }, { "epoch": 0.4139323573952549, "grad_norm": 0.4785299599170685, "learning_rate": 9.646694872027055e-06, "loss": 0.4414, "step": 9020 }, { "epoch": 0.4139782479005094, "grad_norm": 0.4907771348953247, "learning_rate": 9.646604336281072e-06, "loss": 0.3659, "step": 9021 }, { "epoch": 0.41402413840576385, "grad_norm": 0.5225874185562134, "learning_rate": 9.64651378936147e-06, "loss": 0.5623, "step": 9022 }, { "epoch": 0.4140700289110183, "grad_norm": 0.4605194628238678, "learning_rate": 9.646423231268471e-06, "loss": 0.3689, "step": 9023 }, { "epoch": 0.4141159194162728, "grad_norm": 0.4852149784564972, "learning_rate": 9.646332662002288e-06, "loss": 0.5034, "step": 9024 }, { "epoch": 0.41416180992152724, "grad_norm": 0.5123778581619263, "learning_rate": 9.646242081563144e-06, "loss": 0.5514, "step": 9025 }, { "epoch": 0.4142077004267817, "grad_norm": 0.5407106280326843, "learning_rate": 9.646151489951252e-06, "loss": 0.4827, "step": 9026 }, { "epoch": 0.4142535909320362, "grad_norm": 0.4818034768104553, "learning_rate": 9.646060887166833e-06, "loss": 0.432, "step": 9027 }, { "epoch": 0.4142994814372906, "grad_norm": 0.48240476846694946, "learning_rate": 9.645970273210105e-06, "loss": 0.4935, "step": 9028 }, { "epoch": 0.41434537194254506, "grad_norm": 0.5163688659667969, "learning_rate": 9.645879648081284e-06, "loss": 0.4852, "step": 9029 }, { "epoch": 0.41439126244779956, "grad_norm": 0.4567596912384033, "learning_rate": 9.645789011780588e-06, "loss": 0.3627, "step": 9030 }, { "epoch": 0.414437152953054, "grad_norm": 0.5446053743362427, "learning_rate": 9.645698364308236e-06, "loss": 0.497, "step": 9031 }, { "epoch": 0.4144830434583085, "grad_norm": 0.46646878123283386, "learning_rate": 9.645607705664446e-06, "loss": 0.4309, "step": 9032 }, { "epoch": 0.41452893396356294, "grad_norm": 0.44527146220207214, "learning_rate": 9.645517035849435e-06, "loss": 0.3512, "step": 9033 }, { "epoch": 0.4145748244688174, "grad_norm": 0.4800756573677063, "learning_rate": 9.645426354863424e-06, "loss": 0.4695, "step": 9034 }, { "epoch": 0.4146207149740719, "grad_norm": 0.4417162239551544, "learning_rate": 9.645335662706626e-06, "loss": 0.3374, "step": 9035 }, { "epoch": 0.4146666054793263, "grad_norm": 0.4372383654117584, "learning_rate": 9.645244959379263e-06, "loss": 0.3465, "step": 9036 }, { "epoch": 0.41471249598458076, "grad_norm": 0.49670517444610596, "learning_rate": 9.645154244881551e-06, "loss": 0.4593, "step": 9037 }, { "epoch": 0.41475838648983526, "grad_norm": 0.5172892212867737, "learning_rate": 9.645063519213711e-06, "loss": 0.461, "step": 9038 }, { "epoch": 0.4148042769950897, "grad_norm": 0.461757093667984, "learning_rate": 9.644972782375956e-06, "loss": 0.4068, "step": 9039 }, { "epoch": 0.4148501675003442, "grad_norm": 0.49969741702079773, "learning_rate": 9.644882034368511e-06, "loss": 0.468, "step": 9040 }, { "epoch": 0.41489605800559864, "grad_norm": 0.4867217540740967, "learning_rate": 9.644791275191589e-06, "loss": 0.3975, "step": 9041 }, { "epoch": 0.4149419485108531, "grad_norm": 0.4218943417072296, "learning_rate": 9.64470050484541e-06, "loss": 0.3978, "step": 9042 }, { "epoch": 0.4149878390161076, "grad_norm": 0.4625298082828522, "learning_rate": 9.644609723330193e-06, "loss": 0.3579, "step": 9043 }, { "epoch": 0.415033729521362, "grad_norm": 0.44674423336982727, "learning_rate": 9.644518930646154e-06, "loss": 0.3903, "step": 9044 }, { "epoch": 0.41507962002661647, "grad_norm": 0.48712071776390076, "learning_rate": 9.644428126793514e-06, "loss": 0.5092, "step": 9045 }, { "epoch": 0.41512551053187097, "grad_norm": 0.4480718970298767, "learning_rate": 9.64433731177249e-06, "loss": 0.3893, "step": 9046 }, { "epoch": 0.4151714010371254, "grad_norm": 0.46100783348083496, "learning_rate": 9.644246485583301e-06, "loss": 0.4156, "step": 9047 }, { "epoch": 0.4152172915423799, "grad_norm": 0.482197105884552, "learning_rate": 9.644155648226163e-06, "loss": 0.4626, "step": 9048 }, { "epoch": 0.41526318204763435, "grad_norm": 0.5053380727767944, "learning_rate": 9.644064799701298e-06, "loss": 0.4673, "step": 9049 }, { "epoch": 0.4153090725528888, "grad_norm": 0.4576834738254547, "learning_rate": 9.643973940008924e-06, "loss": 0.3836, "step": 9050 }, { "epoch": 0.4153549630581433, "grad_norm": 0.425549179315567, "learning_rate": 9.643883069149255e-06, "loss": 0.3438, "step": 9051 }, { "epoch": 0.41540085356339773, "grad_norm": 0.4614979028701782, "learning_rate": 9.643792187122516e-06, "loss": 0.3793, "step": 9052 }, { "epoch": 0.4154467440686522, "grad_norm": 0.48205986618995667, "learning_rate": 9.643701293928922e-06, "loss": 0.4028, "step": 9053 }, { "epoch": 0.41549263457390667, "grad_norm": 0.48817959427833557, "learning_rate": 9.643610389568692e-06, "loss": 0.4144, "step": 9054 }, { "epoch": 0.4155385250791611, "grad_norm": 0.448244571685791, "learning_rate": 9.643519474042042e-06, "loss": 0.4054, "step": 9055 }, { "epoch": 0.4155844155844156, "grad_norm": 0.5007070302963257, "learning_rate": 9.643428547349195e-06, "loss": 0.4517, "step": 9056 }, { "epoch": 0.41563030608967005, "grad_norm": 0.4293174147605896, "learning_rate": 9.64333760949037e-06, "loss": 0.3692, "step": 9057 }, { "epoch": 0.4156761965949245, "grad_norm": 0.4343571066856384, "learning_rate": 9.64324666046578e-06, "loss": 0.3502, "step": 9058 }, { "epoch": 0.415722087100179, "grad_norm": 0.4221457839012146, "learning_rate": 9.643155700275647e-06, "loss": 0.3291, "step": 9059 }, { "epoch": 0.41576797760543344, "grad_norm": 0.46484214067459106, "learning_rate": 9.643064728920192e-06, "loss": 0.4107, "step": 9060 }, { "epoch": 0.4158138681106879, "grad_norm": 0.5128991603851318, "learning_rate": 9.642973746399633e-06, "loss": 0.5017, "step": 9061 }, { "epoch": 0.4158597586159424, "grad_norm": 0.5106416940689087, "learning_rate": 9.642882752714187e-06, "loss": 0.479, "step": 9062 }, { "epoch": 0.4159056491211968, "grad_norm": 0.4736003577709198, "learning_rate": 9.64279174786407e-06, "loss": 0.3928, "step": 9063 }, { "epoch": 0.41595153962645126, "grad_norm": 0.49389421939849854, "learning_rate": 9.642700731849507e-06, "loss": 0.4692, "step": 9064 }, { "epoch": 0.41599743013170576, "grad_norm": 0.4896695017814636, "learning_rate": 9.642609704670713e-06, "loss": 0.4646, "step": 9065 }, { "epoch": 0.4160433206369602, "grad_norm": 0.6674607396125793, "learning_rate": 9.64251866632791e-06, "loss": 0.3912, "step": 9066 }, { "epoch": 0.4160892111422147, "grad_norm": 0.4925616681575775, "learning_rate": 9.642427616821313e-06, "loss": 0.4665, "step": 9067 }, { "epoch": 0.41613510164746914, "grad_norm": 0.46515190601348877, "learning_rate": 9.642336556151141e-06, "loss": 0.3909, "step": 9068 }, { "epoch": 0.4161809921527236, "grad_norm": 0.45093727111816406, "learning_rate": 9.642245484317619e-06, "loss": 0.3877, "step": 9069 }, { "epoch": 0.4162268826579781, "grad_norm": 0.5061994791030884, "learning_rate": 9.642154401320958e-06, "loss": 0.4438, "step": 9070 }, { "epoch": 0.4162727731632325, "grad_norm": 0.49421414732933044, "learning_rate": 9.642063307161383e-06, "loss": 0.3592, "step": 9071 }, { "epoch": 0.41631866366848697, "grad_norm": 0.5362238883972168, "learning_rate": 9.641972201839109e-06, "loss": 0.4679, "step": 9072 }, { "epoch": 0.41636455417374146, "grad_norm": 0.4559922516345978, "learning_rate": 9.641881085354358e-06, "loss": 0.4111, "step": 9073 }, { "epoch": 0.4164104446789959, "grad_norm": 0.4967464804649353, "learning_rate": 9.641789957707348e-06, "loss": 0.4857, "step": 9074 }, { "epoch": 0.4164563351842504, "grad_norm": 0.4805856943130493, "learning_rate": 9.641698818898298e-06, "loss": 0.4534, "step": 9075 }, { "epoch": 0.41650222568950485, "grad_norm": 0.46756449341773987, "learning_rate": 9.641607668927426e-06, "loss": 0.419, "step": 9076 }, { "epoch": 0.4165481161947593, "grad_norm": 0.451858252286911, "learning_rate": 9.641516507794955e-06, "loss": 0.3637, "step": 9077 }, { "epoch": 0.4165940067000138, "grad_norm": 0.4719327390193939, "learning_rate": 9.6414253355011e-06, "loss": 0.416, "step": 9078 }, { "epoch": 0.41663989720526823, "grad_norm": 0.44034528732299805, "learning_rate": 9.641334152046082e-06, "loss": 0.3617, "step": 9079 }, { "epoch": 0.41668578771052267, "grad_norm": 0.5202441811561584, "learning_rate": 9.64124295743012e-06, "loss": 0.5065, "step": 9080 }, { "epoch": 0.41673167821577717, "grad_norm": 0.47937485575675964, "learning_rate": 9.641151751653435e-06, "loss": 0.4285, "step": 9081 }, { "epoch": 0.4167775687210316, "grad_norm": 0.47166207432746887, "learning_rate": 9.641060534716245e-06, "loss": 0.436, "step": 9082 }, { "epoch": 0.4168234592262861, "grad_norm": 0.4816824793815613, "learning_rate": 9.640969306618768e-06, "loss": 0.4389, "step": 9083 }, { "epoch": 0.41686934973154055, "grad_norm": 0.45850440859794617, "learning_rate": 9.640878067361224e-06, "loss": 0.3283, "step": 9084 }, { "epoch": 0.416915240236795, "grad_norm": 0.4667164385318756, "learning_rate": 9.640786816943834e-06, "loss": 0.4307, "step": 9085 }, { "epoch": 0.4169611307420495, "grad_norm": 0.43857523798942566, "learning_rate": 9.640695555366817e-06, "loss": 0.3858, "step": 9086 }, { "epoch": 0.41700702124730393, "grad_norm": 0.4880298674106598, "learning_rate": 9.64060428263039e-06, "loss": 0.4606, "step": 9087 }, { "epoch": 0.4170529117525584, "grad_norm": 0.4753496050834656, "learning_rate": 9.640512998734774e-06, "loss": 0.4369, "step": 9088 }, { "epoch": 0.4170988022578129, "grad_norm": 0.4901658296585083, "learning_rate": 9.640421703680191e-06, "loss": 0.4804, "step": 9089 }, { "epoch": 0.4171446927630673, "grad_norm": 0.44069623947143555, "learning_rate": 9.640330397466856e-06, "loss": 0.3321, "step": 9090 }, { "epoch": 0.41719058326832176, "grad_norm": 0.46980416774749756, "learning_rate": 9.640239080094995e-06, "loss": 0.4239, "step": 9091 }, { "epoch": 0.41723647377357626, "grad_norm": 0.5105448365211487, "learning_rate": 9.640147751564819e-06, "loss": 0.521, "step": 9092 }, { "epoch": 0.4172823642788307, "grad_norm": 0.5242206454277039, "learning_rate": 9.640056411876554e-06, "loss": 0.4081, "step": 9093 }, { "epoch": 0.4173282547840852, "grad_norm": 0.45659470558166504, "learning_rate": 9.639965061030418e-06, "loss": 0.4055, "step": 9094 }, { "epoch": 0.41737414528933964, "grad_norm": 0.4579678773880005, "learning_rate": 9.63987369902663e-06, "loss": 0.3584, "step": 9095 }, { "epoch": 0.4174200357945941, "grad_norm": 0.4552818238735199, "learning_rate": 9.63978232586541e-06, "loss": 0.3537, "step": 9096 }, { "epoch": 0.4174659262998486, "grad_norm": 0.47954949736595154, "learning_rate": 9.63969094154698e-06, "loss": 0.4565, "step": 9097 }, { "epoch": 0.417511816805103, "grad_norm": 0.47457948327064514, "learning_rate": 9.639599546071554e-06, "loss": 0.3638, "step": 9098 }, { "epoch": 0.41755770731035746, "grad_norm": 0.49206864833831787, "learning_rate": 9.639508139439358e-06, "loss": 0.5135, "step": 9099 }, { "epoch": 0.41760359781561196, "grad_norm": 0.5259010791778564, "learning_rate": 9.639416721650609e-06, "loss": 0.5508, "step": 9100 }, { "epoch": 0.4176494883208664, "grad_norm": 0.6092624664306641, "learning_rate": 9.639325292705526e-06, "loss": 0.6375, "step": 9101 }, { "epoch": 0.4176953788261209, "grad_norm": 0.4506482779979706, "learning_rate": 9.639233852604331e-06, "loss": 0.368, "step": 9102 }, { "epoch": 0.41774126933137534, "grad_norm": 0.4400605261325836, "learning_rate": 9.639142401347242e-06, "loss": 0.3434, "step": 9103 }, { "epoch": 0.4177871598366298, "grad_norm": 0.5243903994560242, "learning_rate": 9.63905093893448e-06, "loss": 0.4622, "step": 9104 }, { "epoch": 0.4178330503418843, "grad_norm": 0.47471651434898376, "learning_rate": 9.638959465366264e-06, "loss": 0.4276, "step": 9105 }, { "epoch": 0.4178789408471387, "grad_norm": 0.4387849271297455, "learning_rate": 9.638867980642815e-06, "loss": 0.3598, "step": 9106 }, { "epoch": 0.41792483135239317, "grad_norm": 0.4695335030555725, "learning_rate": 9.638776484764351e-06, "loss": 0.3938, "step": 9107 }, { "epoch": 0.41797072185764766, "grad_norm": 0.45224127173423767, "learning_rate": 9.638684977731096e-06, "loss": 0.3562, "step": 9108 }, { "epoch": 0.4180166123629021, "grad_norm": 0.5024235844612122, "learning_rate": 9.638593459543265e-06, "loss": 0.4783, "step": 9109 }, { "epoch": 0.4180625028681566, "grad_norm": 0.45329779386520386, "learning_rate": 9.638501930201083e-06, "loss": 0.4151, "step": 9110 }, { "epoch": 0.41810839337341105, "grad_norm": 0.4989628493785858, "learning_rate": 9.638410389704768e-06, "loss": 0.462, "step": 9111 }, { "epoch": 0.4181542838786655, "grad_norm": 0.42240384221076965, "learning_rate": 9.638318838054538e-06, "loss": 0.2958, "step": 9112 }, { "epoch": 0.41820017438392, "grad_norm": 0.46871256828308105, "learning_rate": 9.638227275250618e-06, "loss": 0.4324, "step": 9113 }, { "epoch": 0.41824606488917443, "grad_norm": 0.4595646262168884, "learning_rate": 9.638135701293223e-06, "loss": 0.4537, "step": 9114 }, { "epoch": 0.41829195539442887, "grad_norm": 0.4329849183559418, "learning_rate": 9.638044116182577e-06, "loss": 0.3948, "step": 9115 }, { "epoch": 0.41833784589968337, "grad_norm": 0.5041369199752808, "learning_rate": 9.637952519918898e-06, "loss": 0.4718, "step": 9116 }, { "epoch": 0.4183837364049378, "grad_norm": 0.4626680016517639, "learning_rate": 9.637860912502406e-06, "loss": 0.4633, "step": 9117 }, { "epoch": 0.41842962691019225, "grad_norm": 0.45026925206184387, "learning_rate": 9.637769293933324e-06, "loss": 0.3674, "step": 9118 }, { "epoch": 0.41847551741544675, "grad_norm": 0.5466172695159912, "learning_rate": 9.637677664211868e-06, "loss": 0.4562, "step": 9119 }, { "epoch": 0.4185214079207012, "grad_norm": 0.46066147089004517, "learning_rate": 9.637586023338264e-06, "loss": 0.4645, "step": 9120 }, { "epoch": 0.4185672984259557, "grad_norm": 0.45379945635795593, "learning_rate": 9.637494371312726e-06, "loss": 0.4386, "step": 9121 }, { "epoch": 0.41861318893121013, "grad_norm": 0.4638664126396179, "learning_rate": 9.63740270813548e-06, "loss": 0.3932, "step": 9122 }, { "epoch": 0.4186590794364646, "grad_norm": 0.4590039551258087, "learning_rate": 9.637311033806744e-06, "loss": 0.3518, "step": 9123 }, { "epoch": 0.4187049699417191, "grad_norm": 0.49647775292396545, "learning_rate": 9.637219348326738e-06, "loss": 0.577, "step": 9124 }, { "epoch": 0.4187508604469735, "grad_norm": 0.5052891969680786, "learning_rate": 9.637127651695684e-06, "loss": 0.4724, "step": 9125 }, { "epoch": 0.41879675095222796, "grad_norm": 0.49633723497390747, "learning_rate": 9.6370359439138e-06, "loss": 0.4776, "step": 9126 }, { "epoch": 0.41884264145748246, "grad_norm": 0.5114006996154785, "learning_rate": 9.636944224981308e-06, "loss": 0.5153, "step": 9127 }, { "epoch": 0.4188885319627369, "grad_norm": 0.42441409826278687, "learning_rate": 9.63685249489843e-06, "loss": 0.3575, "step": 9128 }, { "epoch": 0.4189344224679914, "grad_norm": 0.4883800745010376, "learning_rate": 9.636760753665384e-06, "loss": 0.4491, "step": 9129 }, { "epoch": 0.41898031297324584, "grad_norm": 0.4798250198364258, "learning_rate": 9.636669001282393e-06, "loss": 0.3934, "step": 9130 }, { "epoch": 0.4190262034785003, "grad_norm": 0.4608350694179535, "learning_rate": 9.636577237749676e-06, "loss": 0.3904, "step": 9131 }, { "epoch": 0.4190720939837548, "grad_norm": 0.5018261671066284, "learning_rate": 9.636485463067453e-06, "loss": 0.5107, "step": 9132 }, { "epoch": 0.4191179844890092, "grad_norm": 0.4269445538520813, "learning_rate": 9.636393677235946e-06, "loss": 0.3553, "step": 9133 }, { "epoch": 0.41916387499426366, "grad_norm": 0.48879799246788025, "learning_rate": 9.636301880255376e-06, "loss": 0.4282, "step": 9134 }, { "epoch": 0.41920976549951816, "grad_norm": 0.4451883137226105, "learning_rate": 9.636210072125962e-06, "loss": 0.4078, "step": 9135 }, { "epoch": 0.4192556560047726, "grad_norm": 0.4177812933921814, "learning_rate": 9.636118252847929e-06, "loss": 0.3452, "step": 9136 }, { "epoch": 0.4193015465100271, "grad_norm": 0.480985552072525, "learning_rate": 9.636026422421492e-06, "loss": 0.371, "step": 9137 }, { "epoch": 0.41934743701528154, "grad_norm": 0.46856895089149475, "learning_rate": 9.635934580846875e-06, "loss": 0.4205, "step": 9138 }, { "epoch": 0.419393327520536, "grad_norm": 0.44325849413871765, "learning_rate": 9.6358427281243e-06, "loss": 0.3488, "step": 9139 }, { "epoch": 0.4194392180257905, "grad_norm": 0.4497213363647461, "learning_rate": 9.635750864253986e-06, "loss": 0.3653, "step": 9140 }, { "epoch": 0.4194851085310449, "grad_norm": 0.46376290917396545, "learning_rate": 9.635658989236154e-06, "loss": 0.4238, "step": 9141 }, { "epoch": 0.41953099903629937, "grad_norm": 0.4417935609817505, "learning_rate": 9.635567103071024e-06, "loss": 0.3823, "step": 9142 }, { "epoch": 0.41957688954155387, "grad_norm": 0.47996753454208374, "learning_rate": 9.63547520575882e-06, "loss": 0.4199, "step": 9143 }, { "epoch": 0.4196227800468083, "grad_norm": 0.47316473722457886, "learning_rate": 9.63538329729976e-06, "loss": 0.4307, "step": 9144 }, { "epoch": 0.4196686705520628, "grad_norm": 0.46204081177711487, "learning_rate": 9.635291377694066e-06, "loss": 0.3833, "step": 9145 }, { "epoch": 0.41971456105731725, "grad_norm": 0.4463976323604584, "learning_rate": 9.63519944694196e-06, "loss": 0.3962, "step": 9146 }, { "epoch": 0.4197604515625717, "grad_norm": 0.4870917499065399, "learning_rate": 9.635107505043662e-06, "loss": 0.4372, "step": 9147 }, { "epoch": 0.4198063420678262, "grad_norm": 0.5178536772727966, "learning_rate": 9.63501555199939e-06, "loss": 0.5154, "step": 9148 }, { "epoch": 0.41985223257308063, "grad_norm": 0.44570448994636536, "learning_rate": 9.634923587809374e-06, "loss": 0.3513, "step": 9149 }, { "epoch": 0.4198981230783351, "grad_norm": 0.47698888182640076, "learning_rate": 9.634831612473828e-06, "loss": 0.3736, "step": 9150 }, { "epoch": 0.41994401358358957, "grad_norm": 0.4657828211784363, "learning_rate": 9.634739625992972e-06, "loss": 0.4458, "step": 9151 }, { "epoch": 0.419989904088844, "grad_norm": 0.4672933518886566, "learning_rate": 9.634647628367033e-06, "loss": 0.4115, "step": 9152 }, { "epoch": 0.42003579459409845, "grad_norm": 0.4705240726470947, "learning_rate": 9.634555619596228e-06, "loss": 0.4102, "step": 9153 }, { "epoch": 0.42008168509935295, "grad_norm": 0.4999903440475464, "learning_rate": 9.63446359968078e-06, "loss": 0.4769, "step": 9154 }, { "epoch": 0.4201275756046074, "grad_norm": 0.521174967288971, "learning_rate": 9.634371568620908e-06, "loss": 0.539, "step": 9155 }, { "epoch": 0.4201734661098619, "grad_norm": 0.47343260049819946, "learning_rate": 9.634279526416836e-06, "loss": 0.4257, "step": 9156 }, { "epoch": 0.42021935661511634, "grad_norm": 0.4652255177497864, "learning_rate": 9.634187473068786e-06, "loss": 0.3951, "step": 9157 }, { "epoch": 0.4202652471203708, "grad_norm": 0.4572575092315674, "learning_rate": 9.634095408576975e-06, "loss": 0.3682, "step": 9158 }, { "epoch": 0.4203111376256253, "grad_norm": 0.4927283525466919, "learning_rate": 9.634003332941629e-06, "loss": 0.4164, "step": 9159 }, { "epoch": 0.4203570281308797, "grad_norm": 0.499971866607666, "learning_rate": 9.633911246162966e-06, "loss": 0.3992, "step": 9160 }, { "epoch": 0.42040291863613416, "grad_norm": 0.4839976727962494, "learning_rate": 9.63381914824121e-06, "loss": 0.4543, "step": 9161 }, { "epoch": 0.42044880914138866, "grad_norm": 0.45867499709129333, "learning_rate": 9.633727039176583e-06, "loss": 0.3848, "step": 9162 }, { "epoch": 0.4204946996466431, "grad_norm": 0.42866039276123047, "learning_rate": 9.633634918969304e-06, "loss": 0.3316, "step": 9163 }, { "epoch": 0.4205405901518976, "grad_norm": 0.44626402854919434, "learning_rate": 9.633542787619593e-06, "loss": 0.3421, "step": 9164 }, { "epoch": 0.42058648065715204, "grad_norm": 0.45159152150154114, "learning_rate": 9.633450645127676e-06, "loss": 0.4199, "step": 9165 }, { "epoch": 0.4206323711624065, "grad_norm": 0.4681631922721863, "learning_rate": 9.633358491493771e-06, "loss": 0.3974, "step": 9166 }, { "epoch": 0.420678261667661, "grad_norm": 0.4973943829536438, "learning_rate": 9.633266326718104e-06, "loss": 0.434, "step": 9167 }, { "epoch": 0.4207241521729154, "grad_norm": 0.5066848993301392, "learning_rate": 9.633174150800893e-06, "loss": 0.4319, "step": 9168 }, { "epoch": 0.42077004267816986, "grad_norm": 0.4840337634086609, "learning_rate": 9.633081963742358e-06, "loss": 0.3641, "step": 9169 }, { "epoch": 0.42081593318342436, "grad_norm": 0.47228360176086426, "learning_rate": 9.632989765542725e-06, "loss": 0.4501, "step": 9170 }, { "epoch": 0.4208618236886788, "grad_norm": 0.46816036105155945, "learning_rate": 9.632897556202214e-06, "loss": 0.4044, "step": 9171 }, { "epoch": 0.4209077141939333, "grad_norm": 0.4344419836997986, "learning_rate": 9.632805335721047e-06, "loss": 0.3297, "step": 9172 }, { "epoch": 0.42095360469918774, "grad_norm": 0.469348281621933, "learning_rate": 9.632713104099442e-06, "loss": 0.3914, "step": 9173 }, { "epoch": 0.4209994952044422, "grad_norm": 0.47385624051094055, "learning_rate": 9.632620861337627e-06, "loss": 0.3633, "step": 9174 }, { "epoch": 0.4210453857096967, "grad_norm": 0.4502538740634918, "learning_rate": 9.632528607435822e-06, "loss": 0.397, "step": 9175 }, { "epoch": 0.4210912762149511, "grad_norm": 0.4432981610298157, "learning_rate": 9.632436342394246e-06, "loss": 0.3933, "step": 9176 }, { "epoch": 0.42113716672020557, "grad_norm": 0.44716376066207886, "learning_rate": 9.632344066213122e-06, "loss": 0.3806, "step": 9177 }, { "epoch": 0.42118305722546007, "grad_norm": 0.46810510754585266, "learning_rate": 9.632251778892674e-06, "loss": 0.4201, "step": 9178 }, { "epoch": 0.4212289477307145, "grad_norm": 0.42006680369377136, "learning_rate": 9.632159480433122e-06, "loss": 0.3457, "step": 9179 }, { "epoch": 0.42127483823596895, "grad_norm": 0.4443822503089905, "learning_rate": 9.632067170834687e-06, "loss": 0.3501, "step": 9180 }, { "epoch": 0.42132072874122345, "grad_norm": 0.47762802243232727, "learning_rate": 9.631974850097595e-06, "loss": 0.3165, "step": 9181 }, { "epoch": 0.4213666192464779, "grad_norm": 0.4917104244232178, "learning_rate": 9.631882518222064e-06, "loss": 0.4505, "step": 9182 }, { "epoch": 0.4214125097517324, "grad_norm": 0.4593959152698517, "learning_rate": 9.631790175208317e-06, "loss": 0.3495, "step": 9183 }, { "epoch": 0.42145840025698683, "grad_norm": 0.5756860971450806, "learning_rate": 9.631697821056578e-06, "loss": 0.3994, "step": 9184 }, { "epoch": 0.4215042907622413, "grad_norm": 0.44248494505882263, "learning_rate": 9.631605455767066e-06, "loss": 0.3622, "step": 9185 }, { "epoch": 0.42155018126749577, "grad_norm": 0.4316681921482086, "learning_rate": 9.631513079340006e-06, "loss": 0.3223, "step": 9186 }, { "epoch": 0.4215960717727502, "grad_norm": 0.5344674587249756, "learning_rate": 9.631420691775617e-06, "loss": 0.4768, "step": 9187 }, { "epoch": 0.42164196227800466, "grad_norm": 0.45224109292030334, "learning_rate": 9.631328293074123e-06, "loss": 0.3535, "step": 9188 }, { "epoch": 0.42168785278325915, "grad_norm": 0.4437558948993683, "learning_rate": 9.631235883235748e-06, "loss": 0.3745, "step": 9189 }, { "epoch": 0.4217337432885136, "grad_norm": 0.49107006192207336, "learning_rate": 9.63114346226071e-06, "loss": 0.4793, "step": 9190 }, { "epoch": 0.4217796337937681, "grad_norm": 0.4538659453392029, "learning_rate": 9.631051030149236e-06, "loss": 0.3959, "step": 9191 }, { "epoch": 0.42182552429902254, "grad_norm": 0.47982704639434814, "learning_rate": 9.630958586901545e-06, "loss": 0.4302, "step": 9192 }, { "epoch": 0.421871414804277, "grad_norm": 0.4702761173248291, "learning_rate": 9.63086613251786e-06, "loss": 0.4455, "step": 9193 }, { "epoch": 0.4219173053095315, "grad_norm": 0.43576353788375854, "learning_rate": 9.630773666998404e-06, "loss": 0.3259, "step": 9194 }, { "epoch": 0.4219631958147859, "grad_norm": 0.4703406095504761, "learning_rate": 9.630681190343398e-06, "loss": 0.4793, "step": 9195 }, { "epoch": 0.42200908632004036, "grad_norm": 0.4469132423400879, "learning_rate": 9.630588702553065e-06, "loss": 0.4254, "step": 9196 }, { "epoch": 0.42205497682529486, "grad_norm": 0.48336511850357056, "learning_rate": 9.630496203627628e-06, "loss": 0.4489, "step": 9197 }, { "epoch": 0.4221008673305493, "grad_norm": 0.46181246638298035, "learning_rate": 9.63040369356731e-06, "loss": 0.4561, "step": 9198 }, { "epoch": 0.4221467578358038, "grad_norm": 0.48599866032600403, "learning_rate": 9.630311172372331e-06, "loss": 0.4654, "step": 9199 }, { "epoch": 0.42219264834105824, "grad_norm": 0.5396682620048523, "learning_rate": 9.630218640042917e-06, "loss": 0.3289, "step": 9200 }, { "epoch": 0.4222385388463127, "grad_norm": 0.4663090109825134, "learning_rate": 9.630126096579287e-06, "loss": 0.4402, "step": 9201 }, { "epoch": 0.4222844293515672, "grad_norm": 0.4351460039615631, "learning_rate": 9.630033541981664e-06, "loss": 0.3594, "step": 9202 }, { "epoch": 0.4223303198568216, "grad_norm": 0.41514748334884644, "learning_rate": 9.629940976250273e-06, "loss": 0.3182, "step": 9203 }, { "epoch": 0.42237621036207607, "grad_norm": 0.4399113655090332, "learning_rate": 9.629848399385335e-06, "loss": 0.3948, "step": 9204 }, { "epoch": 0.42242210086733056, "grad_norm": 0.5144763588905334, "learning_rate": 9.629755811387074e-06, "loss": 0.459, "step": 9205 }, { "epoch": 0.422467991372585, "grad_norm": 0.45055699348449707, "learning_rate": 9.62966321225571e-06, "loss": 0.3578, "step": 9206 }, { "epoch": 0.42251388187783945, "grad_norm": 0.46767714619636536, "learning_rate": 9.629570601991467e-06, "loss": 0.4006, "step": 9207 }, { "epoch": 0.42255977238309395, "grad_norm": 0.4586738348007202, "learning_rate": 9.62947798059457e-06, "loss": 0.3616, "step": 9208 }, { "epoch": 0.4226056628883484, "grad_norm": 0.4546884298324585, "learning_rate": 9.629385348065237e-06, "loss": 0.4148, "step": 9209 }, { "epoch": 0.4226515533936029, "grad_norm": 0.46073809266090393, "learning_rate": 9.629292704403694e-06, "loss": 0.3829, "step": 9210 }, { "epoch": 0.42269744389885733, "grad_norm": 0.4920588433742523, "learning_rate": 9.629200049610163e-06, "loss": 0.3888, "step": 9211 }, { "epoch": 0.42274333440411177, "grad_norm": 0.44922736287117004, "learning_rate": 9.629107383684868e-06, "loss": 0.4143, "step": 9212 }, { "epoch": 0.42278922490936627, "grad_norm": 0.46897852420806885, "learning_rate": 9.629014706628031e-06, "loss": 0.3605, "step": 9213 }, { "epoch": 0.4228351154146207, "grad_norm": 0.4589211940765381, "learning_rate": 9.628922018439872e-06, "loss": 0.3584, "step": 9214 }, { "epoch": 0.42288100591987515, "grad_norm": 0.5066733360290527, "learning_rate": 9.628829319120618e-06, "loss": 0.5265, "step": 9215 }, { "epoch": 0.42292689642512965, "grad_norm": 0.4305788278579712, "learning_rate": 9.62873660867049e-06, "loss": 0.3751, "step": 9216 }, { "epoch": 0.4229727869303841, "grad_norm": 0.4506034851074219, "learning_rate": 9.628643887089712e-06, "loss": 0.3819, "step": 9217 }, { "epoch": 0.4230186774356386, "grad_norm": 0.5699796080589294, "learning_rate": 9.628551154378508e-06, "loss": 0.4476, "step": 9218 }, { "epoch": 0.42306456794089303, "grad_norm": 0.47635558247566223, "learning_rate": 9.628458410537096e-06, "loss": 0.3653, "step": 9219 }, { "epoch": 0.4231104584461475, "grad_norm": 0.48368704319000244, "learning_rate": 9.628365655565703e-06, "loss": 0.4219, "step": 9220 }, { "epoch": 0.423156348951402, "grad_norm": 0.4511227607727051, "learning_rate": 9.628272889464554e-06, "loss": 0.4061, "step": 9221 }, { "epoch": 0.4232022394566564, "grad_norm": 0.42040878534317017, "learning_rate": 9.628180112233867e-06, "loss": 0.3503, "step": 9222 }, { "epoch": 0.42324812996191086, "grad_norm": 0.43288448452949524, "learning_rate": 9.628087323873867e-06, "loss": 0.3426, "step": 9223 }, { "epoch": 0.42329402046716536, "grad_norm": 0.4799714982509613, "learning_rate": 9.627994524384779e-06, "loss": 0.4161, "step": 9224 }, { "epoch": 0.4233399109724198, "grad_norm": 0.4229944944381714, "learning_rate": 9.627901713766825e-06, "loss": 0.3422, "step": 9225 }, { "epoch": 0.4233858014776743, "grad_norm": 0.43074628710746765, "learning_rate": 9.627808892020227e-06, "loss": 0.3511, "step": 9226 }, { "epoch": 0.42343169198292874, "grad_norm": 0.44593873620033264, "learning_rate": 9.627716059145209e-06, "loss": 0.411, "step": 9227 }, { "epoch": 0.4234775824881832, "grad_norm": 0.4194604456424713, "learning_rate": 9.627623215141995e-06, "loss": 0.3466, "step": 9228 }, { "epoch": 0.4235234729934377, "grad_norm": 0.4713863730430603, "learning_rate": 9.627530360010808e-06, "loss": 0.4614, "step": 9229 }, { "epoch": 0.4235693634986921, "grad_norm": 0.5067123770713806, "learning_rate": 9.627437493751871e-06, "loss": 0.5093, "step": 9230 }, { "epoch": 0.42361525400394656, "grad_norm": 0.4608362317085266, "learning_rate": 9.627344616365407e-06, "loss": 0.4213, "step": 9231 }, { "epoch": 0.42366114450920106, "grad_norm": 0.4694509208202362, "learning_rate": 9.627251727851638e-06, "loss": 0.369, "step": 9232 }, { "epoch": 0.4237070350144555, "grad_norm": 0.5229730606079102, "learning_rate": 9.627158828210793e-06, "loss": 0.4363, "step": 9233 }, { "epoch": 0.42375292551970994, "grad_norm": 0.4506397843360901, "learning_rate": 9.627065917443088e-06, "loss": 0.3655, "step": 9234 }, { "epoch": 0.42379881602496444, "grad_norm": 0.4641627073287964, "learning_rate": 9.626972995548751e-06, "loss": 0.4375, "step": 9235 }, { "epoch": 0.4238447065302189, "grad_norm": 0.44912189245224, "learning_rate": 9.626880062528004e-06, "loss": 0.4098, "step": 9236 }, { "epoch": 0.4238905970354734, "grad_norm": 0.4723019301891327, "learning_rate": 9.62678711838107e-06, "loss": 0.3611, "step": 9237 }, { "epoch": 0.4239364875407278, "grad_norm": 0.40141505002975464, "learning_rate": 9.626694163108175e-06, "loss": 0.2889, "step": 9238 }, { "epoch": 0.42398237804598227, "grad_norm": 0.5123217105865479, "learning_rate": 9.626601196709539e-06, "loss": 0.4302, "step": 9239 }, { "epoch": 0.42402826855123676, "grad_norm": 0.4635584354400635, "learning_rate": 9.626508219185388e-06, "loss": 0.4033, "step": 9240 }, { "epoch": 0.4240741590564912, "grad_norm": 0.43918484449386597, "learning_rate": 9.626415230535947e-06, "loss": 0.3181, "step": 9241 }, { "epoch": 0.42412004956174565, "grad_norm": 0.46661925315856934, "learning_rate": 9.626322230761434e-06, "loss": 0.4369, "step": 9242 }, { "epoch": 0.42416594006700015, "grad_norm": 0.4900226891040802, "learning_rate": 9.626229219862078e-06, "loss": 0.4706, "step": 9243 }, { "epoch": 0.4242118305722546, "grad_norm": 0.5329292416572571, "learning_rate": 9.6261361978381e-06, "loss": 0.4179, "step": 9244 }, { "epoch": 0.4242577210775091, "grad_norm": 0.4939621388912201, "learning_rate": 9.626043164689726e-06, "loss": 0.3436, "step": 9245 }, { "epoch": 0.42430361158276353, "grad_norm": 0.4607374966144562, "learning_rate": 9.625950120417177e-06, "loss": 0.4247, "step": 9246 }, { "epoch": 0.42434950208801797, "grad_norm": 0.46404802799224854, "learning_rate": 9.625857065020678e-06, "loss": 0.3985, "step": 9247 }, { "epoch": 0.42439539259327247, "grad_norm": 0.4763891398906708, "learning_rate": 9.625763998500452e-06, "loss": 0.4214, "step": 9248 }, { "epoch": 0.4244412830985269, "grad_norm": 0.4844805598258972, "learning_rate": 9.625670920856725e-06, "loss": 0.3892, "step": 9249 }, { "epoch": 0.42448717360378135, "grad_norm": 0.4685736298561096, "learning_rate": 9.62557783208972e-06, "loss": 0.4103, "step": 9250 }, { "epoch": 0.42453306410903585, "grad_norm": 0.8583676218986511, "learning_rate": 9.625484732199656e-06, "loss": 0.5084, "step": 9251 }, { "epoch": 0.4245789546142903, "grad_norm": 0.4483937621116638, "learning_rate": 9.625391621186767e-06, "loss": 0.3639, "step": 9252 }, { "epoch": 0.4246248451195448, "grad_norm": 0.4661558270454407, "learning_rate": 9.625298499051267e-06, "loss": 0.401, "step": 9253 }, { "epoch": 0.42467073562479923, "grad_norm": 0.4873422384262085, "learning_rate": 9.625205365793385e-06, "loss": 0.4293, "step": 9254 }, { "epoch": 0.4247166261300537, "grad_norm": 0.5073668956756592, "learning_rate": 9.625112221413343e-06, "loss": 0.3772, "step": 9255 }, { "epoch": 0.4247625166353082, "grad_norm": 0.47060680389404297, "learning_rate": 9.625019065911368e-06, "loss": 0.392, "step": 9256 }, { "epoch": 0.4248084071405626, "grad_norm": 0.5233715772628784, "learning_rate": 9.624925899287679e-06, "loss": 0.4704, "step": 9257 }, { "epoch": 0.42485429764581706, "grad_norm": 0.4383015036582947, "learning_rate": 9.624832721542504e-06, "loss": 0.3227, "step": 9258 }, { "epoch": 0.42490018815107156, "grad_norm": 0.4938104450702667, "learning_rate": 9.624739532676065e-06, "loss": 0.4378, "step": 9259 }, { "epoch": 0.424946078656326, "grad_norm": 0.4555651843547821, "learning_rate": 9.62464633268859e-06, "loss": 0.4051, "step": 9260 }, { "epoch": 0.4249919691615805, "grad_norm": 0.4442608654499054, "learning_rate": 9.624553121580298e-06, "loss": 0.3711, "step": 9261 }, { "epoch": 0.42503785966683494, "grad_norm": 0.47801512479782104, "learning_rate": 9.624459899351416e-06, "loss": 0.4486, "step": 9262 }, { "epoch": 0.4250837501720894, "grad_norm": 0.4152389466762543, "learning_rate": 9.624366666002168e-06, "loss": 0.3147, "step": 9263 }, { "epoch": 0.4251296406773439, "grad_norm": 0.44813215732574463, "learning_rate": 9.624273421532776e-06, "loss": 0.3317, "step": 9264 }, { "epoch": 0.4251755311825983, "grad_norm": 0.5241290926933289, "learning_rate": 9.624180165943467e-06, "loss": 0.5044, "step": 9265 }, { "epoch": 0.42522142168785276, "grad_norm": 0.8696032762527466, "learning_rate": 9.624086899234465e-06, "loss": 0.5278, "step": 9266 }, { "epoch": 0.42526731219310726, "grad_norm": 0.47406888008117676, "learning_rate": 9.62399362140599e-06, "loss": 0.4308, "step": 9267 }, { "epoch": 0.4253132026983617, "grad_norm": 0.49069467186927795, "learning_rate": 9.623900332458273e-06, "loss": 0.4554, "step": 9268 }, { "epoch": 0.42535909320361615, "grad_norm": 0.47076311707496643, "learning_rate": 9.623807032391533e-06, "loss": 0.4368, "step": 9269 }, { "epoch": 0.42540498370887064, "grad_norm": 0.4652973413467407, "learning_rate": 9.623713721205998e-06, "loss": 0.4413, "step": 9270 }, { "epoch": 0.4254508742141251, "grad_norm": 0.45212292671203613, "learning_rate": 9.62362039890189e-06, "loss": 0.3793, "step": 9271 }, { "epoch": 0.4254967647193796, "grad_norm": 0.5371978878974915, "learning_rate": 9.623527065479434e-06, "loss": 0.5501, "step": 9272 }, { "epoch": 0.425542655224634, "grad_norm": 0.5082307457923889, "learning_rate": 9.623433720938854e-06, "loss": 0.3749, "step": 9273 }, { "epoch": 0.42558854572988847, "grad_norm": 0.5133089423179626, "learning_rate": 9.623340365280378e-06, "loss": 0.3987, "step": 9274 }, { "epoch": 0.42563443623514297, "grad_norm": 0.47863346338272095, "learning_rate": 9.623246998504226e-06, "loss": 0.4279, "step": 9275 }, { "epoch": 0.4256803267403974, "grad_norm": 0.4552866220474243, "learning_rate": 9.623153620610623e-06, "loss": 0.3884, "step": 9276 }, { "epoch": 0.42572621724565185, "grad_norm": 0.45817872881889343, "learning_rate": 9.623060231599796e-06, "loss": 0.3832, "step": 9277 }, { "epoch": 0.42577210775090635, "grad_norm": 0.45008647441864014, "learning_rate": 9.622966831471967e-06, "loss": 0.3713, "step": 9278 }, { "epoch": 0.4258179982561608, "grad_norm": 0.4441007375717163, "learning_rate": 9.622873420227363e-06, "loss": 0.3495, "step": 9279 }, { "epoch": 0.4258638887614153, "grad_norm": 0.46405982971191406, "learning_rate": 9.622779997866208e-06, "loss": 0.3886, "step": 9280 }, { "epoch": 0.42590977926666973, "grad_norm": 0.49822187423706055, "learning_rate": 9.622686564388724e-06, "loss": 0.3776, "step": 9281 }, { "epoch": 0.4259556697719242, "grad_norm": 0.4638362526893616, "learning_rate": 9.622593119795139e-06, "loss": 0.4053, "step": 9282 }, { "epoch": 0.42600156027717867, "grad_norm": 0.4604857861995697, "learning_rate": 9.622499664085676e-06, "loss": 0.4014, "step": 9283 }, { "epoch": 0.4260474507824331, "grad_norm": 0.5440638065338135, "learning_rate": 9.62240619726056e-06, "loss": 0.5325, "step": 9284 }, { "epoch": 0.42609334128768755, "grad_norm": 0.46557363867759705, "learning_rate": 9.622312719320017e-06, "loss": 0.387, "step": 9285 }, { "epoch": 0.42613923179294205, "grad_norm": 0.4589885175228119, "learning_rate": 9.62221923026427e-06, "loss": 0.4085, "step": 9286 }, { "epoch": 0.4261851222981965, "grad_norm": 0.6126189827919006, "learning_rate": 9.622125730093546e-06, "loss": 0.5568, "step": 9287 }, { "epoch": 0.426231012803451, "grad_norm": 0.4489326477050781, "learning_rate": 9.622032218808066e-06, "loss": 0.3142, "step": 9288 }, { "epoch": 0.42627690330870543, "grad_norm": 0.4269963502883911, "learning_rate": 9.621938696408059e-06, "loss": 0.3284, "step": 9289 }, { "epoch": 0.4263227938139599, "grad_norm": 0.45649832487106323, "learning_rate": 9.621845162893748e-06, "loss": 0.4132, "step": 9290 }, { "epoch": 0.4263686843192144, "grad_norm": 0.46363863348960876, "learning_rate": 9.621751618265356e-06, "loss": 0.4587, "step": 9291 }, { "epoch": 0.4264145748244688, "grad_norm": 0.471809446811676, "learning_rate": 9.621658062523113e-06, "loss": 0.4442, "step": 9292 }, { "epoch": 0.42646046532972326, "grad_norm": 0.4662055969238281, "learning_rate": 9.62156449566724e-06, "loss": 0.4064, "step": 9293 }, { "epoch": 0.42650635583497776, "grad_norm": 0.5097374320030212, "learning_rate": 9.621470917697962e-06, "loss": 0.4923, "step": 9294 }, { "epoch": 0.4265522463402322, "grad_norm": 0.44528651237487793, "learning_rate": 9.621377328615505e-06, "loss": 0.3651, "step": 9295 }, { "epoch": 0.42659813684548664, "grad_norm": 0.4545913636684418, "learning_rate": 9.621283728420095e-06, "loss": 0.397, "step": 9296 }, { "epoch": 0.42664402735074114, "grad_norm": 0.4809154272079468, "learning_rate": 9.621190117111956e-06, "loss": 0.4121, "step": 9297 }, { "epoch": 0.4266899178559956, "grad_norm": 0.465979665517807, "learning_rate": 9.621096494691313e-06, "loss": 0.3999, "step": 9298 }, { "epoch": 0.4267358083612501, "grad_norm": 0.4242551326751709, "learning_rate": 9.621002861158393e-06, "loss": 0.3317, "step": 9299 }, { "epoch": 0.4267816988665045, "grad_norm": 0.41084274649620056, "learning_rate": 9.620909216513418e-06, "loss": 0.2963, "step": 9300 }, { "epoch": 0.42682758937175896, "grad_norm": 0.44750308990478516, "learning_rate": 9.620815560756615e-06, "loss": 0.341, "step": 9301 }, { "epoch": 0.42687347987701346, "grad_norm": 0.45712295174598694, "learning_rate": 9.620721893888208e-06, "loss": 0.4317, "step": 9302 }, { "epoch": 0.4269193703822679, "grad_norm": 0.47288960218429565, "learning_rate": 9.620628215908424e-06, "loss": 0.4755, "step": 9303 }, { "epoch": 0.42696526088752235, "grad_norm": 0.5106072425842285, "learning_rate": 9.620534526817488e-06, "loss": 0.4701, "step": 9304 }, { "epoch": 0.42701115139277684, "grad_norm": 0.4548209607601166, "learning_rate": 9.620440826615624e-06, "loss": 0.3885, "step": 9305 }, { "epoch": 0.4270570418980313, "grad_norm": 0.4438328444957733, "learning_rate": 9.62034711530306e-06, "loss": 0.3867, "step": 9306 }, { "epoch": 0.4271029324032858, "grad_norm": 0.46092694997787476, "learning_rate": 9.620253392880017e-06, "loss": 0.4293, "step": 9307 }, { "epoch": 0.4271488229085402, "grad_norm": 0.42802494764328003, "learning_rate": 9.620159659346722e-06, "loss": 0.362, "step": 9308 }, { "epoch": 0.42719471341379467, "grad_norm": 0.4766315221786499, "learning_rate": 9.620065914703403e-06, "loss": 0.4794, "step": 9309 }, { "epoch": 0.42724060391904917, "grad_norm": 0.4250241816043854, "learning_rate": 9.619972158950284e-06, "loss": 0.3648, "step": 9310 }, { "epoch": 0.4272864944243036, "grad_norm": 0.4395454525947571, "learning_rate": 9.61987839208759e-06, "loss": 0.3661, "step": 9311 }, { "epoch": 0.42733238492955805, "grad_norm": 0.44705507159233093, "learning_rate": 9.619784614115545e-06, "loss": 0.3744, "step": 9312 }, { "epoch": 0.42737827543481255, "grad_norm": 0.44999632239341736, "learning_rate": 9.619690825034376e-06, "loss": 0.3945, "step": 9313 }, { "epoch": 0.427424165940067, "grad_norm": 0.46289411187171936, "learning_rate": 9.61959702484431e-06, "loss": 0.4242, "step": 9314 }, { "epoch": 0.4274700564453215, "grad_norm": 0.5039185285568237, "learning_rate": 9.619503213545571e-06, "loss": 0.4496, "step": 9315 }, { "epoch": 0.42751594695057593, "grad_norm": 0.46557557582855225, "learning_rate": 9.619409391138384e-06, "loss": 0.4462, "step": 9316 }, { "epoch": 0.4275618374558304, "grad_norm": 0.44910427927970886, "learning_rate": 9.619315557622977e-06, "loss": 0.4513, "step": 9317 }, { "epoch": 0.42760772796108487, "grad_norm": 0.4621914327144623, "learning_rate": 9.619221712999572e-06, "loss": 0.4038, "step": 9318 }, { "epoch": 0.4276536184663393, "grad_norm": 0.43913882970809937, "learning_rate": 9.619127857268398e-06, "loss": 0.3546, "step": 9319 }, { "epoch": 0.42769950897159376, "grad_norm": 0.4547036588191986, "learning_rate": 9.619033990429678e-06, "loss": 0.4286, "step": 9320 }, { "epoch": 0.42774539947684825, "grad_norm": 0.4780140817165375, "learning_rate": 9.618940112483639e-06, "loss": 0.382, "step": 9321 }, { "epoch": 0.4277912899821027, "grad_norm": 0.4566645622253418, "learning_rate": 9.618846223430508e-06, "loss": 0.365, "step": 9322 }, { "epoch": 0.42783718048735714, "grad_norm": 0.4559045135974884, "learning_rate": 9.61875232327051e-06, "loss": 0.4314, "step": 9323 }, { "epoch": 0.42788307099261164, "grad_norm": 0.4224149286746979, "learning_rate": 9.61865841200387e-06, "loss": 0.3271, "step": 9324 }, { "epoch": 0.4279289614978661, "grad_norm": 0.48331961035728455, "learning_rate": 9.618564489630813e-06, "loss": 0.4475, "step": 9325 }, { "epoch": 0.4279748520031206, "grad_norm": 0.4809322655200958, "learning_rate": 9.618470556151566e-06, "loss": 0.3672, "step": 9326 }, { "epoch": 0.428020742508375, "grad_norm": 0.4940967559814453, "learning_rate": 9.618376611566355e-06, "loss": 0.4648, "step": 9327 }, { "epoch": 0.42806663301362946, "grad_norm": 0.4519594609737396, "learning_rate": 9.618282655875409e-06, "loss": 0.3651, "step": 9328 }, { "epoch": 0.42811252351888396, "grad_norm": 0.4870782494544983, "learning_rate": 9.618188689078947e-06, "loss": 0.4205, "step": 9329 }, { "epoch": 0.4281584140241384, "grad_norm": 0.4744412899017334, "learning_rate": 9.618094711177201e-06, "loss": 0.4473, "step": 9330 }, { "epoch": 0.42820430452939284, "grad_norm": 0.43273308873176575, "learning_rate": 9.618000722170392e-06, "loss": 0.3325, "step": 9331 }, { "epoch": 0.42825019503464734, "grad_norm": 0.538695752620697, "learning_rate": 9.617906722058751e-06, "loss": 0.4849, "step": 9332 }, { "epoch": 0.4282960855399018, "grad_norm": 0.5047197937965393, "learning_rate": 9.617812710842502e-06, "loss": 0.4393, "step": 9333 }, { "epoch": 0.4283419760451563, "grad_norm": 0.45876622200012207, "learning_rate": 9.617718688521872e-06, "loss": 0.4118, "step": 9334 }, { "epoch": 0.4283878665504107, "grad_norm": 0.5221706628799438, "learning_rate": 9.617624655097084e-06, "loss": 0.4447, "step": 9335 }, { "epoch": 0.42843375705566517, "grad_norm": 0.4644910991191864, "learning_rate": 9.617530610568365e-06, "loss": 0.4468, "step": 9336 }, { "epoch": 0.42847964756091966, "grad_norm": 0.47872182726860046, "learning_rate": 9.617436554935943e-06, "loss": 0.4131, "step": 9337 }, { "epoch": 0.4285255380661741, "grad_norm": 0.43918725848197937, "learning_rate": 9.617342488200045e-06, "loss": 0.3405, "step": 9338 }, { "epoch": 0.42857142857142855, "grad_norm": 0.4436374008655548, "learning_rate": 9.617248410360895e-06, "loss": 0.365, "step": 9339 }, { "epoch": 0.42861731907668305, "grad_norm": 0.45412611961364746, "learning_rate": 9.617154321418718e-06, "loss": 0.4159, "step": 9340 }, { "epoch": 0.4286632095819375, "grad_norm": 0.4963136315345764, "learning_rate": 9.617060221373743e-06, "loss": 0.4397, "step": 9341 }, { "epoch": 0.428709100087192, "grad_norm": 0.43968525528907776, "learning_rate": 9.616966110226196e-06, "loss": 0.3423, "step": 9342 }, { "epoch": 0.4287549905924464, "grad_norm": 0.41697973012924194, "learning_rate": 9.616871987976302e-06, "loss": 0.3143, "step": 9343 }, { "epoch": 0.42880088109770087, "grad_norm": 0.4994734525680542, "learning_rate": 9.616777854624288e-06, "loss": 0.5338, "step": 9344 }, { "epoch": 0.42884677160295537, "grad_norm": 0.436317503452301, "learning_rate": 9.616683710170383e-06, "loss": 0.3557, "step": 9345 }, { "epoch": 0.4288926621082098, "grad_norm": 0.4690050482749939, "learning_rate": 9.616589554614807e-06, "loss": 0.4209, "step": 9346 }, { "epoch": 0.42893855261346425, "grad_norm": 0.5130156874656677, "learning_rate": 9.616495387957791e-06, "loss": 0.4243, "step": 9347 }, { "epoch": 0.42898444311871875, "grad_norm": 0.4453177750110626, "learning_rate": 9.616401210199562e-06, "loss": 0.3717, "step": 9348 }, { "epoch": 0.4290303336239732, "grad_norm": 0.4651983380317688, "learning_rate": 9.616307021340345e-06, "loss": 0.4732, "step": 9349 }, { "epoch": 0.4290762241292277, "grad_norm": 0.4337430000305176, "learning_rate": 9.616212821380367e-06, "loss": 0.4131, "step": 9350 }, { "epoch": 0.42912211463448213, "grad_norm": 0.4609726667404175, "learning_rate": 9.616118610319851e-06, "loss": 0.3984, "step": 9351 }, { "epoch": 0.4291680051397366, "grad_norm": 0.47717565298080444, "learning_rate": 9.61602438815903e-06, "loss": 0.4544, "step": 9352 }, { "epoch": 0.4292138956449911, "grad_norm": 0.4874224364757538, "learning_rate": 9.615930154898126e-06, "loss": 0.4817, "step": 9353 }, { "epoch": 0.4292597861502455, "grad_norm": 0.5027182102203369, "learning_rate": 9.615835910537365e-06, "loss": 0.4354, "step": 9354 }, { "epoch": 0.42930567665549996, "grad_norm": 0.4737550616264343, "learning_rate": 9.615741655076978e-06, "loss": 0.3733, "step": 9355 }, { "epoch": 0.42935156716075445, "grad_norm": 0.4546607434749603, "learning_rate": 9.615647388517188e-06, "loss": 0.4453, "step": 9356 }, { "epoch": 0.4293974576660089, "grad_norm": 0.46273118257522583, "learning_rate": 9.615553110858222e-06, "loss": 0.3763, "step": 9357 }, { "epoch": 0.42944334817126334, "grad_norm": 0.43113821744918823, "learning_rate": 9.61545882210031e-06, "loss": 0.3462, "step": 9358 }, { "epoch": 0.42948923867651784, "grad_norm": 0.4763573408126831, "learning_rate": 9.615364522243674e-06, "loss": 0.4053, "step": 9359 }, { "epoch": 0.4295351291817723, "grad_norm": 0.48686087131500244, "learning_rate": 9.615270211288544e-06, "loss": 0.4547, "step": 9360 }, { "epoch": 0.4295810196870268, "grad_norm": 0.41065531969070435, "learning_rate": 9.615175889235145e-06, "loss": 0.2928, "step": 9361 }, { "epoch": 0.4296269101922812, "grad_norm": 0.437703400850296, "learning_rate": 9.615081556083705e-06, "loss": 0.3571, "step": 9362 }, { "epoch": 0.42967280069753566, "grad_norm": 0.4673965275287628, "learning_rate": 9.61498721183445e-06, "loss": 0.3705, "step": 9363 }, { "epoch": 0.42971869120279016, "grad_norm": 0.43763649463653564, "learning_rate": 9.614892856487608e-06, "loss": 0.3493, "step": 9364 }, { "epoch": 0.4297645817080446, "grad_norm": 0.514441728591919, "learning_rate": 9.614798490043405e-06, "loss": 0.502, "step": 9365 }, { "epoch": 0.42981047221329904, "grad_norm": 0.45282578468322754, "learning_rate": 9.614704112502066e-06, "loss": 0.41, "step": 9366 }, { "epoch": 0.42985636271855354, "grad_norm": 0.4755251109600067, "learning_rate": 9.614609723863824e-06, "loss": 0.3975, "step": 9367 }, { "epoch": 0.429902253223808, "grad_norm": 0.5186518430709839, "learning_rate": 9.614515324128899e-06, "loss": 0.5104, "step": 9368 }, { "epoch": 0.4299481437290625, "grad_norm": 0.46767401695251465, "learning_rate": 9.614420913297523e-06, "loss": 0.4207, "step": 9369 }, { "epoch": 0.4299940342343169, "grad_norm": 0.4562342166900635, "learning_rate": 9.614326491369918e-06, "loss": 0.4023, "step": 9370 }, { "epoch": 0.43003992473957137, "grad_norm": 0.5214709639549255, "learning_rate": 9.614232058346317e-06, "loss": 0.5009, "step": 9371 }, { "epoch": 0.43008581524482586, "grad_norm": 0.43774959444999695, "learning_rate": 9.614137614226944e-06, "loss": 0.3507, "step": 9372 }, { "epoch": 0.4301317057500803, "grad_norm": 0.4659118950366974, "learning_rate": 9.614043159012025e-06, "loss": 0.4417, "step": 9373 }, { "epoch": 0.43017759625533475, "grad_norm": 0.4844622313976288, "learning_rate": 9.613948692701788e-06, "loss": 0.4637, "step": 9374 }, { "epoch": 0.43022348676058925, "grad_norm": 0.4660356640815735, "learning_rate": 9.613854215296463e-06, "loss": 0.4089, "step": 9375 }, { "epoch": 0.4302693772658437, "grad_norm": 0.5010260343551636, "learning_rate": 9.613759726796274e-06, "loss": 0.4641, "step": 9376 }, { "epoch": 0.4303152677710982, "grad_norm": 0.45488470792770386, "learning_rate": 9.613665227201448e-06, "loss": 0.4029, "step": 9377 }, { "epoch": 0.43036115827635263, "grad_norm": 0.5302449464797974, "learning_rate": 9.613570716512214e-06, "loss": 0.6093, "step": 9378 }, { "epoch": 0.43040704878160707, "grad_norm": 0.45343151688575745, "learning_rate": 9.613476194728798e-06, "loss": 0.3532, "step": 9379 }, { "epoch": 0.43045293928686157, "grad_norm": 0.42666423320770264, "learning_rate": 9.613381661851428e-06, "loss": 0.3315, "step": 9380 }, { "epoch": 0.430498829792116, "grad_norm": 0.40548163652420044, "learning_rate": 9.613287117880331e-06, "loss": 0.3252, "step": 9381 }, { "epoch": 0.43054472029737045, "grad_norm": 0.5159562230110168, "learning_rate": 9.613192562815734e-06, "loss": 0.4775, "step": 9382 }, { "epoch": 0.43059061080262495, "grad_norm": 0.4368624687194824, "learning_rate": 9.613097996657866e-06, "loss": 0.3283, "step": 9383 }, { "epoch": 0.4306365013078794, "grad_norm": 0.4870242476463318, "learning_rate": 9.613003419406954e-06, "loss": 0.4814, "step": 9384 }, { "epoch": 0.43068239181313384, "grad_norm": 0.4523240029811859, "learning_rate": 9.612908831063223e-06, "loss": 0.4047, "step": 9385 }, { "epoch": 0.43072828231838833, "grad_norm": 0.4423651397228241, "learning_rate": 9.612814231626903e-06, "loss": 0.3166, "step": 9386 }, { "epoch": 0.4307741728236428, "grad_norm": 0.4728807508945465, "learning_rate": 9.612719621098219e-06, "loss": 0.4265, "step": 9387 }, { "epoch": 0.4308200633288973, "grad_norm": 0.4348207116127014, "learning_rate": 9.612624999477403e-06, "loss": 0.3906, "step": 9388 }, { "epoch": 0.4308659538341517, "grad_norm": 0.47058945894241333, "learning_rate": 9.612530366764677e-06, "loss": 0.4664, "step": 9389 }, { "epoch": 0.43091184433940616, "grad_norm": 0.4843747317790985, "learning_rate": 9.612435722960271e-06, "loss": 0.4426, "step": 9390 }, { "epoch": 0.43095773484466066, "grad_norm": 0.46338361501693726, "learning_rate": 9.612341068064415e-06, "loss": 0.3525, "step": 9391 }, { "epoch": 0.4310036253499151, "grad_norm": 0.4496888518333435, "learning_rate": 9.612246402077335e-06, "loss": 0.3451, "step": 9392 }, { "epoch": 0.43104951585516954, "grad_norm": 0.4750821888446808, "learning_rate": 9.612151724999254e-06, "loss": 0.4109, "step": 9393 }, { "epoch": 0.43109540636042404, "grad_norm": 0.47097453474998474, "learning_rate": 9.612057036830407e-06, "loss": 0.4209, "step": 9394 }, { "epoch": 0.4311412968656785, "grad_norm": 0.45879799127578735, "learning_rate": 9.611962337571018e-06, "loss": 0.3614, "step": 9395 }, { "epoch": 0.431187187370933, "grad_norm": 0.43468353152275085, "learning_rate": 9.611867627221314e-06, "loss": 0.3744, "step": 9396 }, { "epoch": 0.4312330778761874, "grad_norm": 0.4649869501590729, "learning_rate": 9.611772905781526e-06, "loss": 0.4359, "step": 9397 }, { "epoch": 0.43127896838144186, "grad_norm": 0.47029486298561096, "learning_rate": 9.611678173251876e-06, "loss": 0.4371, "step": 9398 }, { "epoch": 0.43132485888669636, "grad_norm": 0.45058026909828186, "learning_rate": 9.6115834296326e-06, "loss": 0.3673, "step": 9399 }, { "epoch": 0.4313707493919508, "grad_norm": 0.5006216764450073, "learning_rate": 9.61148867492392e-06, "loss": 0.5004, "step": 9400 }, { "epoch": 0.43141663989720525, "grad_norm": 0.504587709903717, "learning_rate": 9.611393909126063e-06, "loss": 0.4078, "step": 9401 }, { "epoch": 0.43146253040245974, "grad_norm": 0.46144551038742065, "learning_rate": 9.61129913223926e-06, "loss": 0.4672, "step": 9402 }, { "epoch": 0.4315084209077142, "grad_norm": 0.4192473590373993, "learning_rate": 9.611204344263739e-06, "loss": 0.323, "step": 9403 }, { "epoch": 0.4315543114129687, "grad_norm": 0.45751240849494934, "learning_rate": 9.611109545199726e-06, "loss": 0.3884, "step": 9404 }, { "epoch": 0.4316002019182231, "grad_norm": 0.49888166785240173, "learning_rate": 9.61101473504745e-06, "loss": 0.4231, "step": 9405 }, { "epoch": 0.43164609242347757, "grad_norm": 0.455972284078598, "learning_rate": 9.61091991380714e-06, "loss": 0.4122, "step": 9406 }, { "epoch": 0.43169198292873207, "grad_norm": 0.4863723814487457, "learning_rate": 9.610825081479021e-06, "loss": 0.4429, "step": 9407 }, { "epoch": 0.4317378734339865, "grad_norm": 0.44898533821105957, "learning_rate": 9.610730238063324e-06, "loss": 0.3827, "step": 9408 }, { "epoch": 0.43178376393924095, "grad_norm": 0.5116301774978638, "learning_rate": 9.610635383560276e-06, "loss": 0.4845, "step": 9409 }, { "epoch": 0.43182965444449545, "grad_norm": 0.463850200176239, "learning_rate": 9.610540517970105e-06, "loss": 0.382, "step": 9410 }, { "epoch": 0.4318755449497499, "grad_norm": 0.4139117896556854, "learning_rate": 9.610445641293037e-06, "loss": 0.3493, "step": 9411 }, { "epoch": 0.43192143545500433, "grad_norm": 0.43511831760406494, "learning_rate": 9.610350753529306e-06, "loss": 0.3891, "step": 9412 }, { "epoch": 0.43196732596025883, "grad_norm": 0.5639481544494629, "learning_rate": 9.610255854679134e-06, "loss": 0.6202, "step": 9413 }, { "epoch": 0.4320132164655133, "grad_norm": 0.4561227560043335, "learning_rate": 9.610160944742752e-06, "loss": 0.364, "step": 9414 }, { "epoch": 0.43205910697076777, "grad_norm": 0.48013460636138916, "learning_rate": 9.61006602372039e-06, "loss": 0.4019, "step": 9415 }, { "epoch": 0.4321049974760222, "grad_norm": 0.4754844009876251, "learning_rate": 9.609971091612272e-06, "loss": 0.3712, "step": 9416 }, { "epoch": 0.43215088798127665, "grad_norm": 0.48912543058395386, "learning_rate": 9.60987614841863e-06, "loss": 0.4797, "step": 9417 }, { "epoch": 0.43219677848653115, "grad_norm": 0.4525083303451538, "learning_rate": 9.60978119413969e-06, "loss": 0.4012, "step": 9418 }, { "epoch": 0.4322426689917856, "grad_norm": 0.4672132730484009, "learning_rate": 9.609686228775682e-06, "loss": 0.4231, "step": 9419 }, { "epoch": 0.43228855949704004, "grad_norm": 0.45324018597602844, "learning_rate": 9.609591252326831e-06, "loss": 0.397, "step": 9420 }, { "epoch": 0.43233445000229453, "grad_norm": 0.49847131967544556, "learning_rate": 9.60949626479337e-06, "loss": 0.5175, "step": 9421 }, { "epoch": 0.432380340507549, "grad_norm": 0.4509052336215973, "learning_rate": 9.609401266175524e-06, "loss": 0.3989, "step": 9422 }, { "epoch": 0.4324262310128035, "grad_norm": 0.45550236105918884, "learning_rate": 9.609306256473522e-06, "loss": 0.3819, "step": 9423 }, { "epoch": 0.4324721215180579, "grad_norm": 0.41421663761138916, "learning_rate": 9.609211235687595e-06, "loss": 0.3217, "step": 9424 }, { "epoch": 0.43251801202331236, "grad_norm": 0.4731671214103699, "learning_rate": 9.60911620381797e-06, "loss": 0.4454, "step": 9425 }, { "epoch": 0.43256390252856686, "grad_norm": 0.44386017322540283, "learning_rate": 9.609021160864873e-06, "loss": 0.3974, "step": 9426 }, { "epoch": 0.4326097930338213, "grad_norm": 0.4222288131713867, "learning_rate": 9.608926106828537e-06, "loss": 0.3068, "step": 9427 }, { "epoch": 0.43265568353907574, "grad_norm": 0.4673410654067993, "learning_rate": 9.608831041709188e-06, "loss": 0.3837, "step": 9428 }, { "epoch": 0.43270157404433024, "grad_norm": 0.4927833676338196, "learning_rate": 9.608735965507055e-06, "loss": 0.4184, "step": 9429 }, { "epoch": 0.4327474645495847, "grad_norm": 0.46952155232429504, "learning_rate": 9.608640878222367e-06, "loss": 0.4006, "step": 9430 }, { "epoch": 0.4327933550548392, "grad_norm": 0.4492369592189789, "learning_rate": 9.60854577985535e-06, "loss": 0.3567, "step": 9431 }, { "epoch": 0.4328392455600936, "grad_norm": 0.49307572841644287, "learning_rate": 9.608450670406235e-06, "loss": 0.4771, "step": 9432 }, { "epoch": 0.43288513606534806, "grad_norm": 0.9059242606163025, "learning_rate": 9.608355549875252e-06, "loss": 0.3824, "step": 9433 }, { "epoch": 0.43293102657060256, "grad_norm": 0.44346991181373596, "learning_rate": 9.608260418262628e-06, "loss": 0.3705, "step": 9434 }, { "epoch": 0.432976917075857, "grad_norm": 0.4912658631801605, "learning_rate": 9.608165275568593e-06, "loss": 0.4065, "step": 9435 }, { "epoch": 0.43302280758111145, "grad_norm": 0.4544029235839844, "learning_rate": 9.608070121793373e-06, "loss": 0.4197, "step": 9436 }, { "epoch": 0.43306869808636594, "grad_norm": 0.4280281364917755, "learning_rate": 9.6079749569372e-06, "loss": 0.3548, "step": 9437 }, { "epoch": 0.4331145885916204, "grad_norm": 0.46020108461380005, "learning_rate": 9.607879781000303e-06, "loss": 0.4141, "step": 9438 }, { "epoch": 0.4331604790968749, "grad_norm": 0.48227033019065857, "learning_rate": 9.607784593982906e-06, "loss": 0.4336, "step": 9439 }, { "epoch": 0.4332063696021293, "grad_norm": 0.5099912881851196, "learning_rate": 9.607689395885244e-06, "loss": 0.4812, "step": 9440 }, { "epoch": 0.43325226010738377, "grad_norm": 0.5185948014259338, "learning_rate": 9.607594186707542e-06, "loss": 0.425, "step": 9441 }, { "epoch": 0.43329815061263827, "grad_norm": 0.4974026381969452, "learning_rate": 9.607498966450031e-06, "loss": 0.4347, "step": 9442 }, { "epoch": 0.4333440411178927, "grad_norm": 0.46012112498283386, "learning_rate": 9.607403735112938e-06, "loss": 0.3862, "step": 9443 }, { "epoch": 0.43338993162314715, "grad_norm": 0.5346178412437439, "learning_rate": 9.607308492696494e-06, "loss": 0.5133, "step": 9444 }, { "epoch": 0.43343582212840165, "grad_norm": 0.43566057085990906, "learning_rate": 9.607213239200926e-06, "loss": 0.372, "step": 9445 }, { "epoch": 0.4334817126336561, "grad_norm": 0.46320590376853943, "learning_rate": 9.607117974626465e-06, "loss": 0.4043, "step": 9446 }, { "epoch": 0.43352760313891053, "grad_norm": 0.5251379013061523, "learning_rate": 9.607022698973341e-06, "loss": 0.4748, "step": 9447 }, { "epoch": 0.43357349364416503, "grad_norm": 0.4413606524467468, "learning_rate": 9.60692741224178e-06, "loss": 0.4233, "step": 9448 }, { "epoch": 0.4336193841494195, "grad_norm": 0.4972039759159088, "learning_rate": 9.606832114432012e-06, "loss": 0.4904, "step": 9449 }, { "epoch": 0.43366527465467397, "grad_norm": 0.4795592725276947, "learning_rate": 9.606736805544266e-06, "loss": 0.4393, "step": 9450 }, { "epoch": 0.4337111651599284, "grad_norm": 0.9199171662330627, "learning_rate": 9.606641485578773e-06, "loss": 0.5734, "step": 9451 }, { "epoch": 0.43375705566518286, "grad_norm": 0.4427761733531952, "learning_rate": 9.606546154535763e-06, "loss": 0.3629, "step": 9452 }, { "epoch": 0.43380294617043735, "grad_norm": 0.4971867501735687, "learning_rate": 9.606450812415461e-06, "loss": 0.4502, "step": 9453 }, { "epoch": 0.4338488366756918, "grad_norm": 0.4610409438610077, "learning_rate": 9.6063554592181e-06, "loss": 0.3953, "step": 9454 }, { "epoch": 0.43389472718094624, "grad_norm": 0.540839433670044, "learning_rate": 9.606260094943907e-06, "loss": 0.5324, "step": 9455 }, { "epoch": 0.43394061768620074, "grad_norm": 0.46103745698928833, "learning_rate": 9.606164719593111e-06, "loss": 0.3814, "step": 9456 }, { "epoch": 0.4339865081914552, "grad_norm": 0.4704090356826782, "learning_rate": 9.606069333165945e-06, "loss": 0.4242, "step": 9457 }, { "epoch": 0.4340323986967097, "grad_norm": 0.4563893675804138, "learning_rate": 9.605973935662635e-06, "loss": 0.3891, "step": 9458 }, { "epoch": 0.4340782892019641, "grad_norm": 0.5015137791633606, "learning_rate": 9.605878527083411e-06, "loss": 0.3907, "step": 9459 }, { "epoch": 0.43412417970721856, "grad_norm": 0.4614434242248535, "learning_rate": 9.605783107428503e-06, "loss": 0.3574, "step": 9460 }, { "epoch": 0.43417007021247306, "grad_norm": 0.44149383902549744, "learning_rate": 9.605687676698141e-06, "loss": 0.3515, "step": 9461 }, { "epoch": 0.4342159607177275, "grad_norm": 0.4034615457057953, "learning_rate": 9.605592234892553e-06, "loss": 0.2984, "step": 9462 }, { "epoch": 0.43426185122298194, "grad_norm": 0.49119535088539124, "learning_rate": 9.60549678201197e-06, "loss": 0.4548, "step": 9463 }, { "epoch": 0.43430774172823644, "grad_norm": 0.4659927487373352, "learning_rate": 9.60540131805662e-06, "loss": 0.3694, "step": 9464 }, { "epoch": 0.4343536322334909, "grad_norm": 0.4542892873287201, "learning_rate": 9.605305843026734e-06, "loss": 0.4375, "step": 9465 }, { "epoch": 0.4343995227387454, "grad_norm": 0.4805331826210022, "learning_rate": 9.605210356922538e-06, "loss": 0.466, "step": 9466 }, { "epoch": 0.4344454132439998, "grad_norm": 0.46991226077079773, "learning_rate": 9.60511485974427e-06, "loss": 0.4049, "step": 9467 }, { "epoch": 0.43449130374925427, "grad_norm": 0.48190444707870483, "learning_rate": 9.60501935149215e-06, "loss": 0.432, "step": 9468 }, { "epoch": 0.43453719425450876, "grad_norm": 0.5730679035186768, "learning_rate": 9.604923832166413e-06, "loss": 0.4493, "step": 9469 }, { "epoch": 0.4345830847597632, "grad_norm": 0.45229271054267883, "learning_rate": 9.604828301767286e-06, "loss": 0.367, "step": 9470 }, { "epoch": 0.43462897526501765, "grad_norm": 0.4923461079597473, "learning_rate": 9.604732760295004e-06, "loss": 0.4005, "step": 9471 }, { "epoch": 0.43467486577027215, "grad_norm": 0.4399645924568176, "learning_rate": 9.60463720774979e-06, "loss": 0.3317, "step": 9472 }, { "epoch": 0.4347207562755266, "grad_norm": 0.4814951419830322, "learning_rate": 9.604541644131878e-06, "loss": 0.3795, "step": 9473 }, { "epoch": 0.43476664678078103, "grad_norm": 0.43064799904823303, "learning_rate": 9.604446069441496e-06, "loss": 0.348, "step": 9474 }, { "epoch": 0.4348125372860355, "grad_norm": 0.4646523892879486, "learning_rate": 9.604350483678875e-06, "loss": 0.3811, "step": 9475 }, { "epoch": 0.43485842779128997, "grad_norm": 0.47420167922973633, "learning_rate": 9.604254886844243e-06, "loss": 0.4106, "step": 9476 }, { "epoch": 0.43490431829654447, "grad_norm": 0.5082074403762817, "learning_rate": 9.604159278937833e-06, "loss": 0.46, "step": 9477 }, { "epoch": 0.4349502088017989, "grad_norm": 0.4871568977832794, "learning_rate": 9.60406365995987e-06, "loss": 0.4379, "step": 9478 }, { "epoch": 0.43499609930705335, "grad_norm": 0.4797271192073822, "learning_rate": 9.603968029910589e-06, "loss": 0.3855, "step": 9479 }, { "epoch": 0.43504198981230785, "grad_norm": 0.43549373745918274, "learning_rate": 9.603872388790218e-06, "loss": 0.3664, "step": 9480 }, { "epoch": 0.4350878803175623, "grad_norm": 0.4948456287384033, "learning_rate": 9.603776736598988e-06, "loss": 0.4371, "step": 9481 }, { "epoch": 0.43513377082281673, "grad_norm": 0.4821363389492035, "learning_rate": 9.603681073337126e-06, "loss": 0.434, "step": 9482 }, { "epoch": 0.43517966132807123, "grad_norm": 0.45945584774017334, "learning_rate": 9.603585399004866e-06, "loss": 0.3649, "step": 9483 }, { "epoch": 0.4352255518333257, "grad_norm": 0.4420243203639984, "learning_rate": 9.603489713602434e-06, "loss": 0.3929, "step": 9484 }, { "epoch": 0.4352714423385802, "grad_norm": 0.457217812538147, "learning_rate": 9.603394017130063e-06, "loss": 0.3579, "step": 9485 }, { "epoch": 0.4353173328438346, "grad_norm": 0.4531521201133728, "learning_rate": 9.603298309587982e-06, "loss": 0.3579, "step": 9486 }, { "epoch": 0.43536322334908906, "grad_norm": 0.4106457531452179, "learning_rate": 9.60320259097642e-06, "loss": 0.3202, "step": 9487 }, { "epoch": 0.43540911385434355, "grad_norm": 0.460921972990036, "learning_rate": 9.603106861295613e-06, "loss": 0.4365, "step": 9488 }, { "epoch": 0.435455004359598, "grad_norm": 0.4275890588760376, "learning_rate": 9.603011120545783e-06, "loss": 0.3401, "step": 9489 }, { "epoch": 0.43550089486485244, "grad_norm": 0.46876978874206543, "learning_rate": 9.602915368727166e-06, "loss": 0.4684, "step": 9490 }, { "epoch": 0.43554678537010694, "grad_norm": 0.4690188765525818, "learning_rate": 9.602819605839988e-06, "loss": 0.4397, "step": 9491 }, { "epoch": 0.4355926758753614, "grad_norm": 0.48514583706855774, "learning_rate": 9.602723831884485e-06, "loss": 0.4298, "step": 9492 }, { "epoch": 0.4356385663806159, "grad_norm": 0.4991267919540405, "learning_rate": 9.60262804686088e-06, "loss": 0.4446, "step": 9493 }, { "epoch": 0.4356844568858703, "grad_norm": 0.4434119462966919, "learning_rate": 9.602532250769409e-06, "loss": 0.3573, "step": 9494 }, { "epoch": 0.43573034739112476, "grad_norm": 0.4420779049396515, "learning_rate": 9.6024364436103e-06, "loss": 0.3557, "step": 9495 }, { "epoch": 0.43577623789637926, "grad_norm": 0.4561111330986023, "learning_rate": 9.602340625383785e-06, "loss": 0.4103, "step": 9496 }, { "epoch": 0.4358221284016337, "grad_norm": 0.5396824479103088, "learning_rate": 9.602244796090092e-06, "loss": 0.438, "step": 9497 }, { "epoch": 0.43586801890688814, "grad_norm": 0.5025354623794556, "learning_rate": 9.602148955729454e-06, "loss": 0.4624, "step": 9498 }, { "epoch": 0.43591390941214264, "grad_norm": 0.4464099407196045, "learning_rate": 9.602053104302101e-06, "loss": 0.353, "step": 9499 }, { "epoch": 0.4359597999173971, "grad_norm": 0.47576573491096497, "learning_rate": 9.60195724180826e-06, "loss": 0.3878, "step": 9500 }, { "epoch": 0.4360056904226515, "grad_norm": 0.4834880232810974, "learning_rate": 9.601861368248165e-06, "loss": 0.444, "step": 9501 }, { "epoch": 0.436051580927906, "grad_norm": 0.4710325002670288, "learning_rate": 9.601765483622048e-06, "loss": 0.48, "step": 9502 }, { "epoch": 0.43609747143316047, "grad_norm": 0.4765704572200775, "learning_rate": 9.601669587930134e-06, "loss": 0.4363, "step": 9503 }, { "epoch": 0.43614336193841496, "grad_norm": 0.47389891743659973, "learning_rate": 9.60157368117266e-06, "loss": 0.4445, "step": 9504 }, { "epoch": 0.4361892524436694, "grad_norm": 0.4966842830181122, "learning_rate": 9.601477763349853e-06, "loss": 0.4541, "step": 9505 }, { "epoch": 0.43623514294892385, "grad_norm": 0.503948986530304, "learning_rate": 9.601381834461942e-06, "loss": 0.4241, "step": 9506 }, { "epoch": 0.43628103345417835, "grad_norm": 0.44321370124816895, "learning_rate": 9.601285894509162e-06, "loss": 0.3601, "step": 9507 }, { "epoch": 0.4363269239594328, "grad_norm": 0.48795321583747864, "learning_rate": 9.601189943491742e-06, "loss": 0.4362, "step": 9508 }, { "epoch": 0.43637281446468723, "grad_norm": 0.5016379952430725, "learning_rate": 9.601093981409911e-06, "loss": 0.4245, "step": 9509 }, { "epoch": 0.43641870496994173, "grad_norm": 0.4635215103626251, "learning_rate": 9.600998008263902e-06, "loss": 0.4033, "step": 9510 }, { "epoch": 0.43646459547519617, "grad_norm": 0.46343687176704407, "learning_rate": 9.600902024053944e-06, "loss": 0.3665, "step": 9511 }, { "epoch": 0.43651048598045067, "grad_norm": 0.46449822187423706, "learning_rate": 9.60080602878027e-06, "loss": 0.3782, "step": 9512 }, { "epoch": 0.4365563764857051, "grad_norm": 0.5395718216896057, "learning_rate": 9.600710022443109e-06, "loss": 0.5637, "step": 9513 }, { "epoch": 0.43660226699095955, "grad_norm": 0.4605312943458557, "learning_rate": 9.600614005042693e-06, "loss": 0.387, "step": 9514 }, { "epoch": 0.43664815749621405, "grad_norm": 0.47935715317726135, "learning_rate": 9.600517976579251e-06, "loss": 0.4268, "step": 9515 }, { "epoch": 0.4366940480014685, "grad_norm": 0.48694777488708496, "learning_rate": 9.600421937053015e-06, "loss": 0.3775, "step": 9516 }, { "epoch": 0.43673993850672294, "grad_norm": 0.48647522926330566, "learning_rate": 9.600325886464217e-06, "loss": 0.4895, "step": 9517 }, { "epoch": 0.43678582901197743, "grad_norm": 0.5025939345359802, "learning_rate": 9.600229824813087e-06, "loss": 0.457, "step": 9518 }, { "epoch": 0.4368317195172319, "grad_norm": 0.4780937433242798, "learning_rate": 9.600133752099856e-06, "loss": 0.3984, "step": 9519 }, { "epoch": 0.4368776100224864, "grad_norm": 0.4750468134880066, "learning_rate": 9.600037668324756e-06, "loss": 0.4292, "step": 9520 }, { "epoch": 0.4369235005277408, "grad_norm": 0.46017226576805115, "learning_rate": 9.599941573488016e-06, "loss": 0.44, "step": 9521 }, { "epoch": 0.43696939103299526, "grad_norm": 0.49469447135925293, "learning_rate": 9.59984546758987e-06, "loss": 0.4668, "step": 9522 }, { "epoch": 0.43701528153824976, "grad_norm": 0.42648306488990784, "learning_rate": 9.599749350630547e-06, "loss": 0.3352, "step": 9523 }, { "epoch": 0.4370611720435042, "grad_norm": 0.5039868354797363, "learning_rate": 9.599653222610278e-06, "loss": 0.5306, "step": 9524 }, { "epoch": 0.43710706254875864, "grad_norm": 0.4735119640827179, "learning_rate": 9.599557083529295e-06, "loss": 0.4487, "step": 9525 }, { "epoch": 0.43715295305401314, "grad_norm": 0.4705061912536621, "learning_rate": 9.599460933387828e-06, "loss": 0.4126, "step": 9526 }, { "epoch": 0.4371988435592676, "grad_norm": 0.47537651658058167, "learning_rate": 9.599364772186109e-06, "loss": 0.4323, "step": 9527 }, { "epoch": 0.4372447340645221, "grad_norm": 0.4777722656726837, "learning_rate": 9.59926859992437e-06, "loss": 0.4177, "step": 9528 }, { "epoch": 0.4372906245697765, "grad_norm": 0.4627701938152313, "learning_rate": 9.599172416602843e-06, "loss": 0.4882, "step": 9529 }, { "epoch": 0.43733651507503096, "grad_norm": 0.41366854310035706, "learning_rate": 9.599076222221755e-06, "loss": 0.3336, "step": 9530 }, { "epoch": 0.43738240558028546, "grad_norm": 0.4612693786621094, "learning_rate": 9.59898001678134e-06, "loss": 0.4008, "step": 9531 }, { "epoch": 0.4374282960855399, "grad_norm": 0.44290438294410706, "learning_rate": 9.598883800281833e-06, "loss": 0.345, "step": 9532 }, { "epoch": 0.43747418659079434, "grad_norm": 0.46944841742515564, "learning_rate": 9.59878757272346e-06, "loss": 0.3832, "step": 9533 }, { "epoch": 0.43752007709604884, "grad_norm": 0.520319402217865, "learning_rate": 9.598691334106453e-06, "loss": 0.5183, "step": 9534 }, { "epoch": 0.4375659676013033, "grad_norm": 0.45137467980384827, "learning_rate": 9.598595084431046e-06, "loss": 0.3357, "step": 9535 }, { "epoch": 0.4376118581065577, "grad_norm": 0.4630063474178314, "learning_rate": 9.59849882369747e-06, "loss": 0.4132, "step": 9536 }, { "epoch": 0.4376577486118122, "grad_norm": 0.4519720673561096, "learning_rate": 9.598402551905955e-06, "loss": 0.387, "step": 9537 }, { "epoch": 0.43770363911706667, "grad_norm": 0.4464069902896881, "learning_rate": 9.598306269056733e-06, "loss": 0.3556, "step": 9538 }, { "epoch": 0.43774952962232117, "grad_norm": 0.4828057289123535, "learning_rate": 9.598209975150035e-06, "loss": 0.4224, "step": 9539 }, { "epoch": 0.4377954201275756, "grad_norm": 0.48324036598205566, "learning_rate": 9.598113670186096e-06, "loss": 0.4474, "step": 9540 }, { "epoch": 0.43784131063283005, "grad_norm": 0.4475082755088806, "learning_rate": 9.598017354165141e-06, "loss": 0.3249, "step": 9541 }, { "epoch": 0.43788720113808455, "grad_norm": 0.4720956087112427, "learning_rate": 9.597921027087408e-06, "loss": 0.4081, "step": 9542 }, { "epoch": 0.437933091643339, "grad_norm": 0.4305553436279297, "learning_rate": 9.597824688953125e-06, "loss": 0.3958, "step": 9543 }, { "epoch": 0.43797898214859343, "grad_norm": 0.4112338721752167, "learning_rate": 9.597728339762526e-06, "loss": 0.3558, "step": 9544 }, { "epoch": 0.43802487265384793, "grad_norm": 0.46954888105392456, "learning_rate": 9.59763197951584e-06, "loss": 0.4346, "step": 9545 }, { "epoch": 0.43807076315910237, "grad_norm": 0.4459943473339081, "learning_rate": 9.5975356082133e-06, "loss": 0.3925, "step": 9546 }, { "epoch": 0.43811665366435687, "grad_norm": 0.48806485533714294, "learning_rate": 9.597439225855139e-06, "loss": 0.4316, "step": 9547 }, { "epoch": 0.4381625441696113, "grad_norm": 0.46897292137145996, "learning_rate": 9.597342832441587e-06, "loss": 0.4863, "step": 9548 }, { "epoch": 0.43820843467486575, "grad_norm": 0.4176226854324341, "learning_rate": 9.597246427972878e-06, "loss": 0.3357, "step": 9549 }, { "epoch": 0.43825432518012025, "grad_norm": 0.46972376108169556, "learning_rate": 9.59715001244924e-06, "loss": 0.3881, "step": 9550 }, { "epoch": 0.4383002156853747, "grad_norm": 0.4630694091320038, "learning_rate": 9.597053585870909e-06, "loss": 0.4316, "step": 9551 }, { "epoch": 0.43834610619062914, "grad_norm": 0.506456732749939, "learning_rate": 9.596957148238113e-06, "loss": 0.5058, "step": 9552 }, { "epoch": 0.43839199669588363, "grad_norm": 0.44118791818618774, "learning_rate": 9.596860699551087e-06, "loss": 0.3391, "step": 9553 }, { "epoch": 0.4384378872011381, "grad_norm": 0.48329615592956543, "learning_rate": 9.596764239810061e-06, "loss": 0.4639, "step": 9554 }, { "epoch": 0.4384837777063926, "grad_norm": 0.47136417031288147, "learning_rate": 9.596667769015269e-06, "loss": 0.4262, "step": 9555 }, { "epoch": 0.438529668211647, "grad_norm": 0.4728092849254608, "learning_rate": 9.596571287166942e-06, "loss": 0.4678, "step": 9556 }, { "epoch": 0.43857555871690146, "grad_norm": 0.4291680157184601, "learning_rate": 9.59647479426531e-06, "loss": 0.351, "step": 9557 }, { "epoch": 0.43862144922215596, "grad_norm": 0.47856372594833374, "learning_rate": 9.596378290310609e-06, "loss": 0.4745, "step": 9558 }, { "epoch": 0.4386673397274104, "grad_norm": 0.4719753861427307, "learning_rate": 9.596281775303066e-06, "loss": 0.4166, "step": 9559 }, { "epoch": 0.43871323023266484, "grad_norm": 0.4732882082462311, "learning_rate": 9.596185249242918e-06, "loss": 0.4545, "step": 9560 }, { "epoch": 0.43875912073791934, "grad_norm": 0.4735700190067291, "learning_rate": 9.596088712130396e-06, "loss": 0.4461, "step": 9561 }, { "epoch": 0.4388050112431738, "grad_norm": 0.4803396463394165, "learning_rate": 9.59599216396573e-06, "loss": 0.4025, "step": 9562 }, { "epoch": 0.4388509017484282, "grad_norm": 0.7367607951164246, "learning_rate": 9.595895604749153e-06, "loss": 0.5485, "step": 9563 }, { "epoch": 0.4388967922536827, "grad_norm": 0.4681912660598755, "learning_rate": 9.595799034480898e-06, "loss": 0.4451, "step": 9564 }, { "epoch": 0.43894268275893716, "grad_norm": 0.44611018896102905, "learning_rate": 9.595702453161198e-06, "loss": 0.3619, "step": 9565 }, { "epoch": 0.43898857326419166, "grad_norm": 0.4557933807373047, "learning_rate": 9.595605860790282e-06, "loss": 0.4256, "step": 9566 }, { "epoch": 0.4390344637694461, "grad_norm": 0.4834311604499817, "learning_rate": 9.595509257368386e-06, "loss": 0.4134, "step": 9567 }, { "epoch": 0.43908035427470055, "grad_norm": 0.5164036154747009, "learning_rate": 9.595412642895738e-06, "loss": 0.5809, "step": 9568 }, { "epoch": 0.43912624477995504, "grad_norm": 0.48303714394569397, "learning_rate": 9.595316017372576e-06, "loss": 0.4757, "step": 9569 }, { "epoch": 0.4391721352852095, "grad_norm": 0.47512075304985046, "learning_rate": 9.595219380799128e-06, "loss": 0.3888, "step": 9570 }, { "epoch": 0.43921802579046393, "grad_norm": 0.4493614137172699, "learning_rate": 9.595122733175626e-06, "loss": 0.3954, "step": 9571 }, { "epoch": 0.4392639162957184, "grad_norm": 0.4429350197315216, "learning_rate": 9.595026074502307e-06, "loss": 0.4018, "step": 9572 }, { "epoch": 0.43930980680097287, "grad_norm": 0.47695687413215637, "learning_rate": 9.594929404779398e-06, "loss": 0.4396, "step": 9573 }, { "epoch": 0.43935569730622737, "grad_norm": 0.4400349259376526, "learning_rate": 9.594832724007137e-06, "loss": 0.367, "step": 9574 }, { "epoch": 0.4394015878114818, "grad_norm": 0.4291749894618988, "learning_rate": 9.594736032185751e-06, "loss": 0.3441, "step": 9575 }, { "epoch": 0.43944747831673625, "grad_norm": 0.48809078335762024, "learning_rate": 9.594639329315476e-06, "loss": 0.457, "step": 9576 }, { "epoch": 0.43949336882199075, "grad_norm": 0.47735485434532166, "learning_rate": 9.594542615396542e-06, "loss": 0.4483, "step": 9577 }, { "epoch": 0.4395392593272452, "grad_norm": 0.4509314298629761, "learning_rate": 9.594445890429185e-06, "loss": 0.3491, "step": 9578 }, { "epoch": 0.43958514983249963, "grad_norm": 0.47399163246154785, "learning_rate": 9.594349154413635e-06, "loss": 0.4207, "step": 9579 }, { "epoch": 0.43963104033775413, "grad_norm": 0.44592374563217163, "learning_rate": 9.594252407350126e-06, "loss": 0.3543, "step": 9580 }, { "epoch": 0.4396769308430086, "grad_norm": 0.4471324384212494, "learning_rate": 9.59415564923889e-06, "loss": 0.3725, "step": 9581 }, { "epoch": 0.43972282134826307, "grad_norm": 0.5123616456985474, "learning_rate": 9.594058880080157e-06, "loss": 0.5099, "step": 9582 }, { "epoch": 0.4397687118535175, "grad_norm": 0.4294447898864746, "learning_rate": 9.593962099874166e-06, "loss": 0.3653, "step": 9583 }, { "epoch": 0.43981460235877196, "grad_norm": 0.41964370012283325, "learning_rate": 9.593865308621144e-06, "loss": 0.3352, "step": 9584 }, { "epoch": 0.43986049286402645, "grad_norm": 0.4615685045719147, "learning_rate": 9.593768506321326e-06, "loss": 0.4276, "step": 9585 }, { "epoch": 0.4399063833692809, "grad_norm": 0.47215625643730164, "learning_rate": 9.593671692974944e-06, "loss": 0.4478, "step": 9586 }, { "epoch": 0.43995227387453534, "grad_norm": 0.44181594252586365, "learning_rate": 9.593574868582232e-06, "loss": 0.3995, "step": 9587 }, { "epoch": 0.43999816437978984, "grad_norm": 0.4748929738998413, "learning_rate": 9.593478033143422e-06, "loss": 0.4311, "step": 9588 }, { "epoch": 0.4400440548850443, "grad_norm": 0.4751283526420593, "learning_rate": 9.593381186658748e-06, "loss": 0.4445, "step": 9589 }, { "epoch": 0.4400899453902987, "grad_norm": 0.438077449798584, "learning_rate": 9.593284329128441e-06, "loss": 0.3401, "step": 9590 }, { "epoch": 0.4401358358955532, "grad_norm": 0.4550577700138092, "learning_rate": 9.593187460552734e-06, "loss": 0.4084, "step": 9591 }, { "epoch": 0.44018172640080766, "grad_norm": 0.46328914165496826, "learning_rate": 9.593090580931862e-06, "loss": 0.3899, "step": 9592 }, { "epoch": 0.44022761690606216, "grad_norm": 0.5900828242301941, "learning_rate": 9.592993690266058e-06, "loss": 0.3853, "step": 9593 }, { "epoch": 0.4402735074113166, "grad_norm": 0.4365132451057434, "learning_rate": 9.592896788555553e-06, "loss": 0.3784, "step": 9594 }, { "epoch": 0.44031939791657104, "grad_norm": 0.45450422167778015, "learning_rate": 9.59279987580058e-06, "loss": 0.3958, "step": 9595 }, { "epoch": 0.44036528842182554, "grad_norm": 0.4881937503814697, "learning_rate": 9.592702952001372e-06, "loss": 0.3977, "step": 9596 }, { "epoch": 0.44041117892708, "grad_norm": 0.4581640362739563, "learning_rate": 9.592606017158165e-06, "loss": 0.3548, "step": 9597 }, { "epoch": 0.4404570694323344, "grad_norm": 0.47902989387512207, "learning_rate": 9.592509071271188e-06, "loss": 0.415, "step": 9598 }, { "epoch": 0.4405029599375889, "grad_norm": 0.4827384650707245, "learning_rate": 9.592412114340677e-06, "loss": 0.401, "step": 9599 }, { "epoch": 0.44054885044284336, "grad_norm": 0.48581916093826294, "learning_rate": 9.592315146366865e-06, "loss": 0.4201, "step": 9600 }, { "epoch": 0.44059474094809786, "grad_norm": 0.4750884771347046, "learning_rate": 9.592218167349985e-06, "loss": 0.4305, "step": 9601 }, { "epoch": 0.4406406314533523, "grad_norm": 0.47014978528022766, "learning_rate": 9.592121177290267e-06, "loss": 0.3972, "step": 9602 }, { "epoch": 0.44068652195860675, "grad_norm": 0.44357892870903015, "learning_rate": 9.592024176187948e-06, "loss": 0.4019, "step": 9603 }, { "epoch": 0.44073241246386125, "grad_norm": 0.47168901562690735, "learning_rate": 9.591927164043263e-06, "loss": 0.4105, "step": 9604 }, { "epoch": 0.4407783029691157, "grad_norm": 0.45505568385124207, "learning_rate": 9.591830140856438e-06, "loss": 0.3724, "step": 9605 }, { "epoch": 0.44082419347437013, "grad_norm": 0.4602077007293701, "learning_rate": 9.591733106627713e-06, "loss": 0.4212, "step": 9606 }, { "epoch": 0.4408700839796246, "grad_norm": 0.4131883978843689, "learning_rate": 9.591636061357318e-06, "loss": 0.296, "step": 9607 }, { "epoch": 0.44091597448487907, "grad_norm": 0.4569132626056671, "learning_rate": 9.591539005045489e-06, "loss": 0.4232, "step": 9608 }, { "epoch": 0.44096186499013357, "grad_norm": 0.4728423058986664, "learning_rate": 9.591441937692457e-06, "loss": 0.4557, "step": 9609 }, { "epoch": 0.441007755495388, "grad_norm": 0.48723241686820984, "learning_rate": 9.591344859298456e-06, "loss": 0.4776, "step": 9610 }, { "epoch": 0.44105364600064245, "grad_norm": 0.469016432762146, "learning_rate": 9.59124776986372e-06, "loss": 0.3925, "step": 9611 }, { "epoch": 0.44109953650589695, "grad_norm": 0.46609246730804443, "learning_rate": 9.591150669388482e-06, "loss": 0.392, "step": 9612 }, { "epoch": 0.4411454270111514, "grad_norm": 0.4937361478805542, "learning_rate": 9.591053557872975e-06, "loss": 0.4476, "step": 9613 }, { "epoch": 0.44119131751640583, "grad_norm": 0.49860453605651855, "learning_rate": 9.590956435317436e-06, "loss": 0.4802, "step": 9614 }, { "epoch": 0.44123720802166033, "grad_norm": 0.47362595796585083, "learning_rate": 9.590859301722093e-06, "loss": 0.4158, "step": 9615 }, { "epoch": 0.4412830985269148, "grad_norm": 0.48048558831214905, "learning_rate": 9.590762157087182e-06, "loss": 0.4388, "step": 9616 }, { "epoch": 0.4413289890321692, "grad_norm": 0.5018331408500671, "learning_rate": 9.590665001412938e-06, "loss": 0.4515, "step": 9617 }, { "epoch": 0.4413748795374237, "grad_norm": 0.45968523621559143, "learning_rate": 9.590567834699593e-06, "loss": 0.4522, "step": 9618 }, { "epoch": 0.44142077004267816, "grad_norm": 0.445666640996933, "learning_rate": 9.590470656947382e-06, "loss": 0.3746, "step": 9619 }, { "epoch": 0.44146666054793265, "grad_norm": 0.4866577088832855, "learning_rate": 9.590373468156538e-06, "loss": 0.4338, "step": 9620 }, { "epoch": 0.4415125510531871, "grad_norm": 0.4516773819923401, "learning_rate": 9.590276268327295e-06, "loss": 0.399, "step": 9621 }, { "epoch": 0.44155844155844154, "grad_norm": 0.4766620695590973, "learning_rate": 9.590179057459884e-06, "loss": 0.4171, "step": 9622 }, { "epoch": 0.44160433206369604, "grad_norm": 0.3945341408252716, "learning_rate": 9.590081835554542e-06, "loss": 0.29, "step": 9623 }, { "epoch": 0.4416502225689505, "grad_norm": 0.5096600651741028, "learning_rate": 9.589984602611501e-06, "loss": 0.4799, "step": 9624 }, { "epoch": 0.4416961130742049, "grad_norm": 0.4714215397834778, "learning_rate": 9.589887358630998e-06, "loss": 0.4063, "step": 9625 }, { "epoch": 0.4417420035794594, "grad_norm": 0.47318416833877563, "learning_rate": 9.589790103613263e-06, "loss": 0.4342, "step": 9626 }, { "epoch": 0.44178789408471386, "grad_norm": 0.4896180033683777, "learning_rate": 9.589692837558534e-06, "loss": 0.4109, "step": 9627 }, { "epoch": 0.44183378458996836, "grad_norm": 0.4745769202709198, "learning_rate": 9.589595560467037e-06, "loss": 0.4208, "step": 9628 }, { "epoch": 0.4418796750952228, "grad_norm": 0.44588232040405273, "learning_rate": 9.589498272339016e-06, "loss": 0.3602, "step": 9629 }, { "epoch": 0.44192556560047724, "grad_norm": 0.46436935663223267, "learning_rate": 9.589400973174698e-06, "loss": 0.4071, "step": 9630 }, { "epoch": 0.44197145610573174, "grad_norm": 0.48432761430740356, "learning_rate": 9.589303662974319e-06, "loss": 0.3974, "step": 9631 }, { "epoch": 0.4420173466109862, "grad_norm": 0.5417534112930298, "learning_rate": 9.589206341738113e-06, "loss": 0.5975, "step": 9632 }, { "epoch": 0.4420632371162406, "grad_norm": 0.45078712701797485, "learning_rate": 9.589109009466316e-06, "loss": 0.4189, "step": 9633 }, { "epoch": 0.4421091276214951, "grad_norm": 0.45233264565467834, "learning_rate": 9.589011666159159e-06, "loss": 0.4251, "step": 9634 }, { "epoch": 0.44215501812674957, "grad_norm": 0.521564781665802, "learning_rate": 9.588914311816877e-06, "loss": 0.4916, "step": 9635 }, { "epoch": 0.44220090863200406, "grad_norm": 0.5074800252914429, "learning_rate": 9.588816946439705e-06, "loss": 0.5138, "step": 9636 }, { "epoch": 0.4422467991372585, "grad_norm": 0.45381203293800354, "learning_rate": 9.588719570027875e-06, "loss": 0.3815, "step": 9637 }, { "epoch": 0.44229268964251295, "grad_norm": 0.480557918548584, "learning_rate": 9.588622182581625e-06, "loss": 0.4628, "step": 9638 }, { "epoch": 0.44233858014776745, "grad_norm": 0.477324903011322, "learning_rate": 9.588524784101185e-06, "loss": 0.4028, "step": 9639 }, { "epoch": 0.4423844706530219, "grad_norm": 0.4725293219089508, "learning_rate": 9.588427374586792e-06, "loss": 0.4111, "step": 9640 }, { "epoch": 0.44243036115827633, "grad_norm": 0.4444078803062439, "learning_rate": 9.588329954038679e-06, "loss": 0.3974, "step": 9641 }, { "epoch": 0.44247625166353083, "grad_norm": 0.45349982380867004, "learning_rate": 9.588232522457081e-06, "loss": 0.3895, "step": 9642 }, { "epoch": 0.44252214216878527, "grad_norm": 0.44622552394866943, "learning_rate": 9.588135079842232e-06, "loss": 0.4195, "step": 9643 }, { "epoch": 0.44256803267403977, "grad_norm": 0.4689968526363373, "learning_rate": 9.588037626194365e-06, "loss": 0.419, "step": 9644 }, { "epoch": 0.4426139231792942, "grad_norm": 0.46988803148269653, "learning_rate": 9.587940161513716e-06, "loss": 0.3757, "step": 9645 }, { "epoch": 0.44265981368454865, "grad_norm": 0.4673752188682556, "learning_rate": 9.587842685800519e-06, "loss": 0.4555, "step": 9646 }, { "epoch": 0.44270570418980315, "grad_norm": 0.43658646941185, "learning_rate": 9.587745199055009e-06, "loss": 0.3212, "step": 9647 }, { "epoch": 0.4427515946950576, "grad_norm": 0.39917293190956116, "learning_rate": 9.587647701277418e-06, "loss": 0.2871, "step": 9648 }, { "epoch": 0.44279748520031204, "grad_norm": 0.4589487314224243, "learning_rate": 9.587550192467984e-06, "loss": 0.4084, "step": 9649 }, { "epoch": 0.44284337570556653, "grad_norm": 0.45076534152030945, "learning_rate": 9.58745267262694e-06, "loss": 0.3552, "step": 9650 }, { "epoch": 0.442889266210821, "grad_norm": 0.5135691165924072, "learning_rate": 9.587355141754519e-06, "loss": 0.5182, "step": 9651 }, { "epoch": 0.4429351567160754, "grad_norm": 0.47803282737731934, "learning_rate": 9.587257599850957e-06, "loss": 0.4357, "step": 9652 }, { "epoch": 0.4429810472213299, "grad_norm": 0.441232293844223, "learning_rate": 9.587160046916488e-06, "loss": 0.3583, "step": 9653 }, { "epoch": 0.44302693772658436, "grad_norm": 0.4099386930465698, "learning_rate": 9.587062482951345e-06, "loss": 0.2851, "step": 9654 }, { "epoch": 0.44307282823183886, "grad_norm": 0.42625892162323, "learning_rate": 9.586964907955768e-06, "loss": 0.3393, "step": 9655 }, { "epoch": 0.4431187187370933, "grad_norm": 0.5025972127914429, "learning_rate": 9.586867321929987e-06, "loss": 0.4622, "step": 9656 }, { "epoch": 0.44316460924234774, "grad_norm": 0.4824918210506439, "learning_rate": 9.586769724874236e-06, "loss": 0.4678, "step": 9657 }, { "epoch": 0.44321049974760224, "grad_norm": 0.4923332631587982, "learning_rate": 9.586672116788753e-06, "loss": 0.4995, "step": 9658 }, { "epoch": 0.4432563902528567, "grad_norm": 0.4466186463832855, "learning_rate": 9.58657449767377e-06, "loss": 0.3339, "step": 9659 }, { "epoch": 0.4433022807581111, "grad_norm": 0.4873707592487335, "learning_rate": 9.586476867529525e-06, "loss": 0.4248, "step": 9660 }, { "epoch": 0.4433481712633656, "grad_norm": 0.5259816646575928, "learning_rate": 9.58637922635625e-06, "loss": 0.4431, "step": 9661 }, { "epoch": 0.44339406176862006, "grad_norm": 0.42020919919013977, "learning_rate": 9.58628157415418e-06, "loss": 0.372, "step": 9662 }, { "epoch": 0.44343995227387456, "grad_norm": 0.4119405448436737, "learning_rate": 9.58618391092355e-06, "loss": 0.3331, "step": 9663 }, { "epoch": 0.443485842779129, "grad_norm": 0.47012048959732056, "learning_rate": 9.586086236664595e-06, "loss": 0.4227, "step": 9664 }, { "epoch": 0.44353173328438344, "grad_norm": 0.8761385679244995, "learning_rate": 9.58598855137755e-06, "loss": 0.4267, "step": 9665 }, { "epoch": 0.44357762378963794, "grad_norm": 0.49415332078933716, "learning_rate": 9.58589085506265e-06, "loss": 0.4361, "step": 9666 }, { "epoch": 0.4436235142948924, "grad_norm": 0.4912410080432892, "learning_rate": 9.58579314772013e-06, "loss": 0.3832, "step": 9667 }, { "epoch": 0.4436694048001468, "grad_norm": 0.46939942240715027, "learning_rate": 9.585695429350225e-06, "loss": 0.38, "step": 9668 }, { "epoch": 0.4437152953054013, "grad_norm": 0.5038236379623413, "learning_rate": 9.585597699953169e-06, "loss": 0.4197, "step": 9669 }, { "epoch": 0.44376118581065577, "grad_norm": 0.4478367865085602, "learning_rate": 9.5854999595292e-06, "loss": 0.3755, "step": 9670 }, { "epoch": 0.44380707631591026, "grad_norm": 0.44914358854293823, "learning_rate": 9.585402208078548e-06, "loss": 0.4188, "step": 9671 }, { "epoch": 0.4438529668211647, "grad_norm": 0.5287940502166748, "learning_rate": 9.585304445601454e-06, "loss": 0.3888, "step": 9672 }, { "epoch": 0.44389885732641915, "grad_norm": 0.46928712725639343, "learning_rate": 9.585206672098145e-06, "loss": 0.363, "step": 9673 }, { "epoch": 0.44394474783167365, "grad_norm": 0.49741995334625244, "learning_rate": 9.585108887568865e-06, "loss": 0.4185, "step": 9674 }, { "epoch": 0.4439906383369281, "grad_norm": 0.4508717358112335, "learning_rate": 9.585011092013845e-06, "loss": 0.3754, "step": 9675 }, { "epoch": 0.44403652884218253, "grad_norm": 0.5743631720542908, "learning_rate": 9.58491328543332e-06, "loss": 0.3973, "step": 9676 }, { "epoch": 0.44408241934743703, "grad_norm": 0.4918135106563568, "learning_rate": 9.584815467827525e-06, "loss": 0.42, "step": 9677 }, { "epoch": 0.44412830985269147, "grad_norm": 0.4639934003353119, "learning_rate": 9.584717639196696e-06, "loss": 0.389, "step": 9678 }, { "epoch": 0.4441742003579459, "grad_norm": 0.4706595838069916, "learning_rate": 9.584619799541069e-06, "loss": 0.4092, "step": 9679 }, { "epoch": 0.4442200908632004, "grad_norm": 0.4610098898410797, "learning_rate": 9.584521948860878e-06, "loss": 0.4672, "step": 9680 }, { "epoch": 0.44426598136845485, "grad_norm": 0.43970662355422974, "learning_rate": 9.584424087156358e-06, "loss": 0.3839, "step": 9681 }, { "epoch": 0.44431187187370935, "grad_norm": 0.4475744664669037, "learning_rate": 9.584326214427744e-06, "loss": 0.3711, "step": 9682 }, { "epoch": 0.4443577623789638, "grad_norm": 0.41338759660720825, "learning_rate": 9.584228330675273e-06, "loss": 0.4046, "step": 9683 }, { "epoch": 0.44440365288421824, "grad_norm": 0.4646712839603424, "learning_rate": 9.584130435899181e-06, "loss": 0.3965, "step": 9684 }, { "epoch": 0.44444954338947273, "grad_norm": 0.4934080243110657, "learning_rate": 9.584032530099701e-06, "loss": 0.4669, "step": 9685 }, { "epoch": 0.4444954338947272, "grad_norm": 0.5081644058227539, "learning_rate": 9.583934613277071e-06, "loss": 0.49, "step": 9686 }, { "epoch": 0.4445413243999816, "grad_norm": 0.4256170988082886, "learning_rate": 9.583836685431524e-06, "loss": 0.3619, "step": 9687 }, { "epoch": 0.4445872149052361, "grad_norm": 0.4500099718570709, "learning_rate": 9.583738746563296e-06, "loss": 0.322, "step": 9688 }, { "epoch": 0.44463310541049056, "grad_norm": 0.4437118172645569, "learning_rate": 9.583640796672622e-06, "loss": 0.4048, "step": 9689 }, { "epoch": 0.44467899591574506, "grad_norm": 0.436017245054245, "learning_rate": 9.58354283575974e-06, "loss": 0.3937, "step": 9690 }, { "epoch": 0.4447248864209995, "grad_norm": 0.4234682023525238, "learning_rate": 9.583444863824885e-06, "loss": 0.3169, "step": 9691 }, { "epoch": 0.44477077692625394, "grad_norm": 0.45163267850875854, "learning_rate": 9.58334688086829e-06, "loss": 0.3767, "step": 9692 }, { "epoch": 0.44481666743150844, "grad_norm": 0.4595990478992462, "learning_rate": 9.583248886890192e-06, "loss": 0.3929, "step": 9693 }, { "epoch": 0.4448625579367629, "grad_norm": 0.4723793864250183, "learning_rate": 9.583150881890828e-06, "loss": 0.4233, "step": 9694 }, { "epoch": 0.4449084484420173, "grad_norm": 0.4567941725254059, "learning_rate": 9.583052865870433e-06, "loss": 0.3847, "step": 9695 }, { "epoch": 0.4449543389472718, "grad_norm": 0.4242376685142517, "learning_rate": 9.582954838829242e-06, "loss": 0.3075, "step": 9696 }, { "epoch": 0.44500022945252626, "grad_norm": 0.45160433650016785, "learning_rate": 9.58285680076749e-06, "loss": 0.3498, "step": 9697 }, { "epoch": 0.44504611995778076, "grad_norm": 0.42402368783950806, "learning_rate": 9.582758751685416e-06, "loss": 0.3124, "step": 9698 }, { "epoch": 0.4450920104630352, "grad_norm": 0.4305524528026581, "learning_rate": 9.582660691583251e-06, "loss": 0.3335, "step": 9699 }, { "epoch": 0.44513790096828965, "grad_norm": 0.4245362877845764, "learning_rate": 9.582562620461234e-06, "loss": 0.322, "step": 9700 }, { "epoch": 0.44518379147354414, "grad_norm": 0.45082196593284607, "learning_rate": 9.5824645383196e-06, "loss": 0.3475, "step": 9701 }, { "epoch": 0.4452296819787986, "grad_norm": 0.44884252548217773, "learning_rate": 9.582366445158586e-06, "loss": 0.3881, "step": 9702 }, { "epoch": 0.44527557248405303, "grad_norm": 0.42796602845191956, "learning_rate": 9.582268340978426e-06, "loss": 0.364, "step": 9703 }, { "epoch": 0.4453214629893075, "grad_norm": 0.5100491642951965, "learning_rate": 9.582170225779359e-06, "loss": 0.5006, "step": 9704 }, { "epoch": 0.44536735349456197, "grad_norm": 0.4466865360736847, "learning_rate": 9.582072099561617e-06, "loss": 0.313, "step": 9705 }, { "epoch": 0.4454132439998164, "grad_norm": 0.4920097589492798, "learning_rate": 9.581973962325438e-06, "loss": 0.423, "step": 9706 }, { "epoch": 0.4454591345050709, "grad_norm": 0.48109546303749084, "learning_rate": 9.581875814071057e-06, "loss": 0.4311, "step": 9707 }, { "epoch": 0.44550502501032535, "grad_norm": 0.4446703791618347, "learning_rate": 9.581777654798712e-06, "loss": 0.4136, "step": 9708 }, { "epoch": 0.44555091551557985, "grad_norm": 0.4389550983905792, "learning_rate": 9.581679484508637e-06, "loss": 0.401, "step": 9709 }, { "epoch": 0.4455968060208343, "grad_norm": 0.8718675971031189, "learning_rate": 9.581581303201069e-06, "loss": 0.5176, "step": 9710 }, { "epoch": 0.44564269652608873, "grad_norm": 0.4206651747226715, "learning_rate": 9.581483110876243e-06, "loss": 0.2982, "step": 9711 }, { "epoch": 0.44568858703134323, "grad_norm": 0.4238147437572479, "learning_rate": 9.581384907534399e-06, "loss": 0.3167, "step": 9712 }, { "epoch": 0.4457344775365977, "grad_norm": 0.4877219796180725, "learning_rate": 9.58128669317577e-06, "loss": 0.4763, "step": 9713 }, { "epoch": 0.4457803680418521, "grad_norm": 0.4501168131828308, "learning_rate": 9.581188467800589e-06, "loss": 0.393, "step": 9714 }, { "epoch": 0.4458262585471066, "grad_norm": 0.4622237980365753, "learning_rate": 9.581090231409098e-06, "loss": 0.3658, "step": 9715 }, { "epoch": 0.44587214905236106, "grad_norm": 0.45816364884376526, "learning_rate": 9.58099198400153e-06, "loss": 0.4164, "step": 9716 }, { "epoch": 0.44591803955761555, "grad_norm": 0.47169187664985657, "learning_rate": 9.580893725578122e-06, "loss": 0.3562, "step": 9717 }, { "epoch": 0.44596393006287, "grad_norm": 0.46834126114845276, "learning_rate": 9.58079545613911e-06, "loss": 0.4316, "step": 9718 }, { "epoch": 0.44600982056812444, "grad_norm": 0.50584876537323, "learning_rate": 9.580697175684733e-06, "loss": 0.4767, "step": 9719 }, { "epoch": 0.44605571107337894, "grad_norm": 0.45500805974006653, "learning_rate": 9.580598884215223e-06, "loss": 0.3598, "step": 9720 }, { "epoch": 0.4461016015786334, "grad_norm": 0.46660587191581726, "learning_rate": 9.580500581730818e-06, "loss": 0.3811, "step": 9721 }, { "epoch": 0.4461474920838878, "grad_norm": 0.41970840096473694, "learning_rate": 9.580402268231756e-06, "loss": 0.3358, "step": 9722 }, { "epoch": 0.4461933825891423, "grad_norm": 0.43053188920021057, "learning_rate": 9.58030394371827e-06, "loss": 0.3876, "step": 9723 }, { "epoch": 0.44623927309439676, "grad_norm": 0.4564133286476135, "learning_rate": 9.5802056081906e-06, "loss": 0.4138, "step": 9724 }, { "epoch": 0.44628516359965126, "grad_norm": 0.45494577288627625, "learning_rate": 9.580107261648983e-06, "loss": 0.4176, "step": 9725 }, { "epoch": 0.4463310541049057, "grad_norm": 0.4483310580253601, "learning_rate": 9.580008904093652e-06, "loss": 0.3746, "step": 9726 }, { "epoch": 0.44637694461016014, "grad_norm": 0.46922314167022705, "learning_rate": 9.579910535524843e-06, "loss": 0.4402, "step": 9727 }, { "epoch": 0.44642283511541464, "grad_norm": 0.49520522356033325, "learning_rate": 9.579812155942798e-06, "loss": 0.4259, "step": 9728 }, { "epoch": 0.4464687256206691, "grad_norm": 0.4410932958126068, "learning_rate": 9.57971376534775e-06, "loss": 0.3668, "step": 9729 }, { "epoch": 0.4465146161259235, "grad_norm": 0.44762057065963745, "learning_rate": 9.579615363739934e-06, "loss": 0.3704, "step": 9730 }, { "epoch": 0.446560506631178, "grad_norm": 0.4395647943019867, "learning_rate": 9.579516951119587e-06, "loss": 0.3822, "step": 9731 }, { "epoch": 0.44660639713643246, "grad_norm": 0.4863123893737793, "learning_rate": 9.57941852748695e-06, "loss": 0.4338, "step": 9732 }, { "epoch": 0.44665228764168696, "grad_norm": 0.4585941731929779, "learning_rate": 9.579320092842254e-06, "loss": 0.4042, "step": 9733 }, { "epoch": 0.4466981781469414, "grad_norm": 0.5000231862068176, "learning_rate": 9.57922164718574e-06, "loss": 0.4862, "step": 9734 }, { "epoch": 0.44674406865219585, "grad_norm": 0.46738728880882263, "learning_rate": 9.579123190517645e-06, "loss": 0.3494, "step": 9735 }, { "epoch": 0.44678995915745034, "grad_norm": 0.5346416234970093, "learning_rate": 9.579024722838202e-06, "loss": 0.542, "step": 9736 }, { "epoch": 0.4468358496627048, "grad_norm": 0.43725869059562683, "learning_rate": 9.57892624414765e-06, "loss": 0.3412, "step": 9737 }, { "epoch": 0.44688174016795923, "grad_norm": 0.466680109500885, "learning_rate": 9.578827754446226e-06, "loss": 0.4109, "step": 9738 }, { "epoch": 0.4469276306732137, "grad_norm": 0.44553911685943604, "learning_rate": 9.578729253734165e-06, "loss": 0.3729, "step": 9739 }, { "epoch": 0.44697352117846817, "grad_norm": 0.4994473159313202, "learning_rate": 9.578630742011708e-06, "loss": 0.5378, "step": 9740 }, { "epoch": 0.4470194116837226, "grad_norm": 0.4441971480846405, "learning_rate": 9.578532219279087e-06, "loss": 0.3986, "step": 9741 }, { "epoch": 0.4470653021889771, "grad_norm": 0.46104422211647034, "learning_rate": 9.578433685536541e-06, "loss": 0.4017, "step": 9742 }, { "epoch": 0.44711119269423155, "grad_norm": 0.45616480708122253, "learning_rate": 9.578335140784308e-06, "loss": 0.4138, "step": 9743 }, { "epoch": 0.44715708319948605, "grad_norm": 0.43608972430229187, "learning_rate": 9.578236585022622e-06, "loss": 0.3771, "step": 9744 }, { "epoch": 0.4472029737047405, "grad_norm": 0.42397522926330566, "learning_rate": 9.578138018251723e-06, "loss": 0.3225, "step": 9745 }, { "epoch": 0.44724886420999493, "grad_norm": 0.4208241403102875, "learning_rate": 9.578039440471848e-06, "loss": 0.3517, "step": 9746 }, { "epoch": 0.44729475471524943, "grad_norm": 0.47919583320617676, "learning_rate": 9.577940851683232e-06, "loss": 0.4654, "step": 9747 }, { "epoch": 0.4473406452205039, "grad_norm": 0.40554603934288025, "learning_rate": 9.577842251886114e-06, "loss": 0.2959, "step": 9748 }, { "epoch": 0.4473865357257583, "grad_norm": 0.4671021103858948, "learning_rate": 9.577743641080729e-06, "loss": 0.4055, "step": 9749 }, { "epoch": 0.4474324262310128, "grad_norm": 0.4165404438972473, "learning_rate": 9.577645019267315e-06, "loss": 0.3016, "step": 9750 }, { "epoch": 0.44747831673626726, "grad_norm": 0.42552512884140015, "learning_rate": 9.57754638644611e-06, "loss": 0.3225, "step": 9751 }, { "epoch": 0.44752420724152175, "grad_norm": 0.4466932415962219, "learning_rate": 9.577447742617352e-06, "loss": 0.3876, "step": 9752 }, { "epoch": 0.4475700977467762, "grad_norm": 0.49270591139793396, "learning_rate": 9.577349087781277e-06, "loss": 0.3693, "step": 9753 }, { "epoch": 0.44761598825203064, "grad_norm": 0.4785556495189667, "learning_rate": 9.57725042193812e-06, "loss": 0.3814, "step": 9754 }, { "epoch": 0.44766187875728514, "grad_norm": 0.5062205195426941, "learning_rate": 9.57715174508812e-06, "loss": 0.458, "step": 9755 }, { "epoch": 0.4477077692625396, "grad_norm": 0.4693039357662201, "learning_rate": 9.577053057231517e-06, "loss": 0.4285, "step": 9756 }, { "epoch": 0.447753659767794, "grad_norm": 0.45526188611984253, "learning_rate": 9.576954358368545e-06, "loss": 0.3891, "step": 9757 }, { "epoch": 0.4477995502730485, "grad_norm": 0.4523662328720093, "learning_rate": 9.576855648499441e-06, "loss": 0.394, "step": 9758 }, { "epoch": 0.44784544077830296, "grad_norm": 0.49156418442726135, "learning_rate": 9.576756927624445e-06, "loss": 0.437, "step": 9759 }, { "epoch": 0.44789133128355746, "grad_norm": 0.489103227853775, "learning_rate": 9.576658195743793e-06, "loss": 0.4246, "step": 9760 }, { "epoch": 0.4479372217888119, "grad_norm": 0.4653508961200714, "learning_rate": 9.576559452857724e-06, "loss": 0.4119, "step": 9761 }, { "epoch": 0.44798311229406634, "grad_norm": 0.4668499529361725, "learning_rate": 9.576460698966471e-06, "loss": 0.4023, "step": 9762 }, { "epoch": 0.44802900279932084, "grad_norm": 0.4694982171058655, "learning_rate": 9.576361934070275e-06, "loss": 0.4737, "step": 9763 }, { "epoch": 0.4480748933045753, "grad_norm": 0.5218654870986938, "learning_rate": 9.576263158169375e-06, "loss": 0.5114, "step": 9764 }, { "epoch": 0.4481207838098297, "grad_norm": 0.473928302526474, "learning_rate": 9.576164371264004e-06, "loss": 0.4333, "step": 9765 }, { "epoch": 0.4481666743150842, "grad_norm": 0.4562360644340515, "learning_rate": 9.576065573354402e-06, "loss": 0.4024, "step": 9766 }, { "epoch": 0.44821256482033867, "grad_norm": 0.47578755021095276, "learning_rate": 9.575966764440809e-06, "loss": 0.3949, "step": 9767 }, { "epoch": 0.4482584553255931, "grad_norm": 0.4578250050544739, "learning_rate": 9.575867944523459e-06, "loss": 0.4231, "step": 9768 }, { "epoch": 0.4483043458308476, "grad_norm": 0.4514824151992798, "learning_rate": 9.57576911360259e-06, "loss": 0.3135, "step": 9769 }, { "epoch": 0.44835023633610205, "grad_norm": 0.4930398166179657, "learning_rate": 9.57567027167844e-06, "loss": 0.4383, "step": 9770 }, { "epoch": 0.44839612684135655, "grad_norm": 0.44422447681427, "learning_rate": 9.575571418751248e-06, "loss": 0.336, "step": 9771 }, { "epoch": 0.448442017346611, "grad_norm": 0.49810612201690674, "learning_rate": 9.575472554821251e-06, "loss": 0.4321, "step": 9772 }, { "epoch": 0.44848790785186543, "grad_norm": 0.4803476333618164, "learning_rate": 9.575373679888687e-06, "loss": 0.4501, "step": 9773 }, { "epoch": 0.44853379835711993, "grad_norm": 0.5777461528778076, "learning_rate": 9.575274793953792e-06, "loss": 0.3456, "step": 9774 }, { "epoch": 0.44857968886237437, "grad_norm": 0.514886736869812, "learning_rate": 9.575175897016806e-06, "loss": 0.4582, "step": 9775 }, { "epoch": 0.4486255793676288, "grad_norm": 0.48452407121658325, "learning_rate": 9.575076989077965e-06, "loss": 0.4077, "step": 9776 }, { "epoch": 0.4486714698728833, "grad_norm": 0.467336505651474, "learning_rate": 9.57497807013751e-06, "loss": 0.3709, "step": 9777 }, { "epoch": 0.44871736037813775, "grad_norm": 0.4870755672454834, "learning_rate": 9.574879140195674e-06, "loss": 0.4272, "step": 9778 }, { "epoch": 0.44876325088339225, "grad_norm": 0.4655212163925171, "learning_rate": 9.574780199252699e-06, "loss": 0.407, "step": 9779 }, { "epoch": 0.4488091413886467, "grad_norm": 0.4765538275241852, "learning_rate": 9.574681247308821e-06, "loss": 0.3801, "step": 9780 }, { "epoch": 0.44885503189390114, "grad_norm": 0.5006573796272278, "learning_rate": 9.574582284364279e-06, "loss": 0.4318, "step": 9781 }, { "epoch": 0.44890092239915563, "grad_norm": 0.46182742714881897, "learning_rate": 9.57448331041931e-06, "loss": 0.3596, "step": 9782 }, { "epoch": 0.4489468129044101, "grad_norm": 0.5996143817901611, "learning_rate": 9.574384325474152e-06, "loss": 0.3796, "step": 9783 }, { "epoch": 0.4489927034096645, "grad_norm": 0.4312472343444824, "learning_rate": 9.574285329529044e-06, "loss": 0.3661, "step": 9784 }, { "epoch": 0.449038593914919, "grad_norm": 0.44791820645332336, "learning_rate": 9.574186322584223e-06, "loss": 0.3896, "step": 9785 }, { "epoch": 0.44908448442017346, "grad_norm": 0.4694032371044159, "learning_rate": 9.574087304639928e-06, "loss": 0.4215, "step": 9786 }, { "epoch": 0.44913037492542796, "grad_norm": 0.4973786473274231, "learning_rate": 9.573988275696396e-06, "loss": 0.5019, "step": 9787 }, { "epoch": 0.4491762654306824, "grad_norm": 0.5054702758789062, "learning_rate": 9.573889235753865e-06, "loss": 0.5471, "step": 9788 }, { "epoch": 0.44922215593593684, "grad_norm": 0.507432222366333, "learning_rate": 9.573790184812576e-06, "loss": 0.4276, "step": 9789 }, { "epoch": 0.44926804644119134, "grad_norm": 0.41484472155570984, "learning_rate": 9.573691122872766e-06, "loss": 0.3224, "step": 9790 }, { "epoch": 0.4493139369464458, "grad_norm": 0.46572345495224, "learning_rate": 9.57359204993467e-06, "loss": 0.4246, "step": 9791 }, { "epoch": 0.4493598274517002, "grad_norm": 0.47469544410705566, "learning_rate": 9.573492965998529e-06, "loss": 0.3825, "step": 9792 }, { "epoch": 0.4494057179569547, "grad_norm": 0.47324785590171814, "learning_rate": 9.573393871064581e-06, "loss": 0.3947, "step": 9793 }, { "epoch": 0.44945160846220916, "grad_norm": 0.445686936378479, "learning_rate": 9.573294765133064e-06, "loss": 0.4203, "step": 9794 }, { "epoch": 0.4494974989674636, "grad_norm": 0.4478321671485901, "learning_rate": 9.573195648204216e-06, "loss": 0.3496, "step": 9795 }, { "epoch": 0.4495433894727181, "grad_norm": 0.4292794167995453, "learning_rate": 9.573096520278277e-06, "loss": 0.3274, "step": 9796 }, { "epoch": 0.44958927997797254, "grad_norm": 0.45690569281578064, "learning_rate": 9.572997381355485e-06, "loss": 0.3853, "step": 9797 }, { "epoch": 0.44963517048322704, "grad_norm": 0.5141229033470154, "learning_rate": 9.572898231436077e-06, "loss": 0.4877, "step": 9798 }, { "epoch": 0.4496810609884815, "grad_norm": 0.4356003701686859, "learning_rate": 9.57279907052029e-06, "loss": 0.3691, "step": 9799 }, { "epoch": 0.4497269514937359, "grad_norm": 0.4536591172218323, "learning_rate": 9.572699898608366e-06, "loss": 0.3818, "step": 9800 }, { "epoch": 0.4497728419989904, "grad_norm": 0.4678402543067932, "learning_rate": 9.572600715700542e-06, "loss": 0.446, "step": 9801 }, { "epoch": 0.44981873250424487, "grad_norm": 0.4600459635257721, "learning_rate": 9.572501521797056e-06, "loss": 0.4137, "step": 9802 }, { "epoch": 0.4498646230094993, "grad_norm": 0.4637428820133209, "learning_rate": 9.572402316898147e-06, "loss": 0.3978, "step": 9803 }, { "epoch": 0.4499105135147538, "grad_norm": 0.470689594745636, "learning_rate": 9.572303101004052e-06, "loss": 0.4394, "step": 9804 }, { "epoch": 0.44995640402000825, "grad_norm": 0.5019899010658264, "learning_rate": 9.572203874115014e-06, "loss": 0.5465, "step": 9805 }, { "epoch": 0.45000229452526275, "grad_norm": 0.47686177492141724, "learning_rate": 9.572104636231267e-06, "loss": 0.4644, "step": 9806 }, { "epoch": 0.4500481850305172, "grad_norm": 0.45432478189468384, "learning_rate": 9.572005387353052e-06, "loss": 0.3873, "step": 9807 }, { "epoch": 0.45009407553577163, "grad_norm": 0.4617711901664734, "learning_rate": 9.571906127480605e-06, "loss": 0.3755, "step": 9808 }, { "epoch": 0.45013996604102613, "grad_norm": 0.4422835111618042, "learning_rate": 9.571806856614169e-06, "loss": 0.3231, "step": 9809 }, { "epoch": 0.45018585654628057, "grad_norm": 0.47981172800064087, "learning_rate": 9.57170757475398e-06, "loss": 0.4628, "step": 9810 }, { "epoch": 0.450231747051535, "grad_norm": 0.43930914998054504, "learning_rate": 9.571608281900278e-06, "loss": 0.3544, "step": 9811 }, { "epoch": 0.4502776375567895, "grad_norm": 0.4524284899234772, "learning_rate": 9.5715089780533e-06, "loss": 0.4185, "step": 9812 }, { "epoch": 0.45032352806204395, "grad_norm": 0.46251779794692993, "learning_rate": 9.571409663213285e-06, "loss": 0.4108, "step": 9813 }, { "epoch": 0.45036941856729845, "grad_norm": 0.4838874638080597, "learning_rate": 9.571310337380474e-06, "loss": 0.4312, "step": 9814 }, { "epoch": 0.4504153090725529, "grad_norm": 0.46627694368362427, "learning_rate": 9.571211000555102e-06, "loss": 0.4183, "step": 9815 }, { "epoch": 0.45046119957780734, "grad_norm": 0.4588378667831421, "learning_rate": 9.571111652737413e-06, "loss": 0.4412, "step": 9816 }, { "epoch": 0.45050709008306183, "grad_norm": 0.4505850076675415, "learning_rate": 9.571012293927642e-06, "loss": 0.408, "step": 9817 }, { "epoch": 0.4505529805883163, "grad_norm": 0.5060086250305176, "learning_rate": 9.570912924126028e-06, "loss": 0.5212, "step": 9818 }, { "epoch": 0.4505988710935707, "grad_norm": 0.45375683903694153, "learning_rate": 9.570813543332813e-06, "loss": 0.3957, "step": 9819 }, { "epoch": 0.4506447615988252, "grad_norm": 0.44547316431999207, "learning_rate": 9.570714151548233e-06, "loss": 0.3901, "step": 9820 }, { "epoch": 0.45069065210407966, "grad_norm": 0.46824169158935547, "learning_rate": 9.570614748772529e-06, "loss": 0.4406, "step": 9821 }, { "epoch": 0.45073654260933416, "grad_norm": 0.5276828408241272, "learning_rate": 9.570515335005937e-06, "loss": 0.4566, "step": 9822 }, { "epoch": 0.4507824331145886, "grad_norm": 0.45947471261024475, "learning_rate": 9.5704159102487e-06, "loss": 0.4443, "step": 9823 }, { "epoch": 0.45082832361984304, "grad_norm": 0.46542027592658997, "learning_rate": 9.570316474501055e-06, "loss": 0.398, "step": 9824 }, { "epoch": 0.45087421412509754, "grad_norm": 0.4361060857772827, "learning_rate": 9.57021702776324e-06, "loss": 0.3282, "step": 9825 }, { "epoch": 0.450920104630352, "grad_norm": 0.41409507393836975, "learning_rate": 9.570117570035496e-06, "loss": 0.3092, "step": 9826 }, { "epoch": 0.4509659951356064, "grad_norm": 0.44705623388290405, "learning_rate": 9.570018101318062e-06, "loss": 0.341, "step": 9827 }, { "epoch": 0.4510118856408609, "grad_norm": 0.46038907766342163, "learning_rate": 9.569918621611177e-06, "loss": 0.3685, "step": 9828 }, { "epoch": 0.45105777614611536, "grad_norm": 0.44764235615730286, "learning_rate": 9.56981913091508e-06, "loss": 0.3608, "step": 9829 }, { "epoch": 0.4511036666513698, "grad_norm": 0.5124440789222717, "learning_rate": 9.56971962923001e-06, "loss": 0.4526, "step": 9830 }, { "epoch": 0.4511495571566243, "grad_norm": 0.49712926149368286, "learning_rate": 9.569620116556206e-06, "loss": 0.4519, "step": 9831 }, { "epoch": 0.45119544766187875, "grad_norm": 0.42241939902305603, "learning_rate": 9.569520592893908e-06, "loss": 0.3309, "step": 9832 }, { "epoch": 0.45124133816713324, "grad_norm": 0.4900880455970764, "learning_rate": 9.569421058243355e-06, "loss": 0.4743, "step": 9833 }, { "epoch": 0.4512872286723877, "grad_norm": 0.46104082465171814, "learning_rate": 9.569321512604788e-06, "loss": 0.3815, "step": 9834 }, { "epoch": 0.45133311917764213, "grad_norm": 0.4504721462726593, "learning_rate": 9.569221955978444e-06, "loss": 0.3956, "step": 9835 }, { "epoch": 0.4513790096828966, "grad_norm": 0.4503343105316162, "learning_rate": 9.569122388364564e-06, "loss": 0.3717, "step": 9836 }, { "epoch": 0.45142490018815107, "grad_norm": 0.4914402365684509, "learning_rate": 9.569022809763385e-06, "loss": 0.475, "step": 9837 }, { "epoch": 0.4514707906934055, "grad_norm": 0.4153030812740326, "learning_rate": 9.56892322017515e-06, "loss": 0.3246, "step": 9838 }, { "epoch": 0.45151668119866, "grad_norm": 0.4414408504962921, "learning_rate": 9.568823619600095e-06, "loss": 0.3837, "step": 9839 }, { "epoch": 0.45156257170391445, "grad_norm": 0.4685708284378052, "learning_rate": 9.56872400803846e-06, "loss": 0.3944, "step": 9840 }, { "epoch": 0.45160846220916895, "grad_norm": 0.5370137691497803, "learning_rate": 9.56862438549049e-06, "loss": 0.4182, "step": 9841 }, { "epoch": 0.4516543527144234, "grad_norm": 0.42223286628723145, "learning_rate": 9.568524751956417e-06, "loss": 0.3517, "step": 9842 }, { "epoch": 0.45170024321967783, "grad_norm": 0.45244261622428894, "learning_rate": 9.568425107436485e-06, "loss": 0.427, "step": 9843 }, { "epoch": 0.45174613372493233, "grad_norm": 0.4518932104110718, "learning_rate": 9.568325451930932e-06, "loss": 0.3763, "step": 9844 }, { "epoch": 0.4517920242301868, "grad_norm": 0.46881550550460815, "learning_rate": 9.568225785439999e-06, "loss": 0.439, "step": 9845 }, { "epoch": 0.4518379147354412, "grad_norm": 0.42553529143333435, "learning_rate": 9.568126107963924e-06, "loss": 0.3393, "step": 9846 }, { "epoch": 0.4518838052406957, "grad_norm": 0.4750100076198578, "learning_rate": 9.568026419502947e-06, "loss": 0.478, "step": 9847 }, { "epoch": 0.45192969574595016, "grad_norm": 0.48248741030693054, "learning_rate": 9.567926720057308e-06, "loss": 0.4431, "step": 9848 }, { "epoch": 0.45197558625120465, "grad_norm": 0.45480674505233765, "learning_rate": 9.567827009627248e-06, "loss": 0.3805, "step": 9849 }, { "epoch": 0.4520214767564591, "grad_norm": 0.4876725673675537, "learning_rate": 9.567727288213005e-06, "loss": 0.4161, "step": 9850 }, { "epoch": 0.45206736726171354, "grad_norm": 0.47479158639907837, "learning_rate": 9.567627555814821e-06, "loss": 0.394, "step": 9851 }, { "epoch": 0.45211325776696804, "grad_norm": 0.4589460790157318, "learning_rate": 9.567527812432932e-06, "loss": 0.3451, "step": 9852 }, { "epoch": 0.4521591482722225, "grad_norm": 0.46952447295188904, "learning_rate": 9.56742805806758e-06, "loss": 0.4182, "step": 9853 }, { "epoch": 0.4522050387774769, "grad_norm": 0.4585420489311218, "learning_rate": 9.567328292719007e-06, "loss": 0.3487, "step": 9854 }, { "epoch": 0.4522509292827314, "grad_norm": 0.4362148642539978, "learning_rate": 9.56722851638745e-06, "loss": 0.3445, "step": 9855 }, { "epoch": 0.45229681978798586, "grad_norm": 0.4377683103084564, "learning_rate": 9.56712872907315e-06, "loss": 0.3838, "step": 9856 }, { "epoch": 0.4523427102932403, "grad_norm": 0.4936470687389374, "learning_rate": 9.567028930776347e-06, "loss": 0.4501, "step": 9857 }, { "epoch": 0.4523886007984948, "grad_norm": 0.46923792362213135, "learning_rate": 9.566929121497282e-06, "loss": 0.4205, "step": 9858 }, { "epoch": 0.45243449130374924, "grad_norm": 0.44804394245147705, "learning_rate": 9.56682930123619e-06, "loss": 0.3546, "step": 9859 }, { "epoch": 0.45248038180900374, "grad_norm": 0.481635183095932, "learning_rate": 9.56672946999332e-06, "loss": 0.455, "step": 9860 }, { "epoch": 0.4525262723142582, "grad_norm": 0.48465442657470703, "learning_rate": 9.566629627768904e-06, "loss": 0.4507, "step": 9861 }, { "epoch": 0.4525721628195126, "grad_norm": 0.46866318583488464, "learning_rate": 9.566529774563185e-06, "loss": 0.4042, "step": 9862 }, { "epoch": 0.4526180533247671, "grad_norm": 0.48285502195358276, "learning_rate": 9.566429910376402e-06, "loss": 0.4134, "step": 9863 }, { "epoch": 0.45266394383002156, "grad_norm": 0.483559787273407, "learning_rate": 9.566330035208799e-06, "loss": 0.4087, "step": 9864 }, { "epoch": 0.452709834335276, "grad_norm": 0.46345236897468567, "learning_rate": 9.566230149060611e-06, "loss": 0.3767, "step": 9865 }, { "epoch": 0.4527557248405305, "grad_norm": 0.4565286636352539, "learning_rate": 9.566130251932081e-06, "loss": 0.4095, "step": 9866 }, { "epoch": 0.45280161534578495, "grad_norm": 0.4623097777366638, "learning_rate": 9.566030343823452e-06, "loss": 0.3953, "step": 9867 }, { "epoch": 0.45284750585103944, "grad_norm": 0.4674161374568939, "learning_rate": 9.565930424734959e-06, "loss": 0.4443, "step": 9868 }, { "epoch": 0.4528933963562939, "grad_norm": 0.46204760670661926, "learning_rate": 9.565830494666843e-06, "loss": 0.3836, "step": 9869 }, { "epoch": 0.45293928686154833, "grad_norm": 0.49224013090133667, "learning_rate": 9.565730553619348e-06, "loss": 0.446, "step": 9870 }, { "epoch": 0.4529851773668028, "grad_norm": 0.47723156213760376, "learning_rate": 9.56563060159271e-06, "loss": 0.3997, "step": 9871 }, { "epoch": 0.45303106787205727, "grad_norm": 0.4757527709007263, "learning_rate": 9.565530638587172e-06, "loss": 0.3889, "step": 9872 }, { "epoch": 0.4530769583773117, "grad_norm": 0.5213149785995483, "learning_rate": 9.565430664602974e-06, "loss": 0.4369, "step": 9873 }, { "epoch": 0.4531228488825662, "grad_norm": 0.47971388697624207, "learning_rate": 9.565330679640357e-06, "loss": 0.4328, "step": 9874 }, { "epoch": 0.45316873938782065, "grad_norm": 0.4483637809753418, "learning_rate": 9.565230683699559e-06, "loss": 0.3169, "step": 9875 }, { "epoch": 0.45321462989307515, "grad_norm": 0.4834587872028351, "learning_rate": 9.565130676780822e-06, "loss": 0.4584, "step": 9876 }, { "epoch": 0.4532605203983296, "grad_norm": 0.4770866334438324, "learning_rate": 9.565030658884388e-06, "loss": 0.4218, "step": 9877 }, { "epoch": 0.45330641090358403, "grad_norm": 0.4676073491573334, "learning_rate": 9.564930630010495e-06, "loss": 0.4257, "step": 9878 }, { "epoch": 0.45335230140883853, "grad_norm": 0.5158021450042725, "learning_rate": 9.564830590159383e-06, "loss": 0.404, "step": 9879 }, { "epoch": 0.453398191914093, "grad_norm": 0.4540286660194397, "learning_rate": 9.564730539331297e-06, "loss": 0.3928, "step": 9880 }, { "epoch": 0.4534440824193474, "grad_norm": 0.42820119857788086, "learning_rate": 9.564630477526474e-06, "loss": 0.2926, "step": 9881 }, { "epoch": 0.4534899729246019, "grad_norm": 0.44111403822898865, "learning_rate": 9.564530404745156e-06, "loss": 0.3866, "step": 9882 }, { "epoch": 0.45353586342985636, "grad_norm": 0.4540165364742279, "learning_rate": 9.56443032098758e-06, "loss": 0.3712, "step": 9883 }, { "epoch": 0.4535817539351108, "grad_norm": 0.7026460766792297, "learning_rate": 9.564330226253992e-06, "loss": 0.3399, "step": 9884 }, { "epoch": 0.4536276444403653, "grad_norm": 0.4685457944869995, "learning_rate": 9.564230120544631e-06, "loss": 0.4083, "step": 9885 }, { "epoch": 0.45367353494561974, "grad_norm": 0.5072273015975952, "learning_rate": 9.564130003859735e-06, "loss": 0.4957, "step": 9886 }, { "epoch": 0.45371942545087424, "grad_norm": 0.4591566324234009, "learning_rate": 9.564029876199549e-06, "loss": 0.4106, "step": 9887 }, { "epoch": 0.4537653159561287, "grad_norm": 0.4739809036254883, "learning_rate": 9.563929737564311e-06, "loss": 0.496, "step": 9888 }, { "epoch": 0.4538112064613831, "grad_norm": 0.43732163310050964, "learning_rate": 9.56382958795426e-06, "loss": 0.4069, "step": 9889 }, { "epoch": 0.4538570969666376, "grad_norm": 0.49638670682907104, "learning_rate": 9.56372942736964e-06, "loss": 0.4804, "step": 9890 }, { "epoch": 0.45390298747189206, "grad_norm": 0.5043736100196838, "learning_rate": 9.563629255810693e-06, "loss": 0.481, "step": 9891 }, { "epoch": 0.4539488779771465, "grad_norm": 0.44765156507492065, "learning_rate": 9.563529073277659e-06, "loss": 0.3717, "step": 9892 }, { "epoch": 0.453994768482401, "grad_norm": 0.42711880803108215, "learning_rate": 9.563428879770775e-06, "loss": 0.3024, "step": 9893 }, { "epoch": 0.45404065898765544, "grad_norm": 0.4751996397972107, "learning_rate": 9.563328675290287e-06, "loss": 0.4168, "step": 9894 }, { "epoch": 0.45408654949290994, "grad_norm": 0.4565356969833374, "learning_rate": 9.563228459836432e-06, "loss": 0.3708, "step": 9895 }, { "epoch": 0.4541324399981644, "grad_norm": 0.4587240517139435, "learning_rate": 9.563128233409453e-06, "loss": 0.4059, "step": 9896 }, { "epoch": 0.4541783305034188, "grad_norm": 0.47915181517601013, "learning_rate": 9.56302799600959e-06, "loss": 0.4249, "step": 9897 }, { "epoch": 0.4542242210086733, "grad_norm": 0.5254428386688232, "learning_rate": 9.562927747637087e-06, "loss": 0.4322, "step": 9898 }, { "epoch": 0.45427011151392777, "grad_norm": 0.43688803911209106, "learning_rate": 9.562827488292181e-06, "loss": 0.3284, "step": 9899 }, { "epoch": 0.4543160020191822, "grad_norm": 0.5806544423103333, "learning_rate": 9.562727217975116e-06, "loss": 0.5325, "step": 9900 }, { "epoch": 0.4543618925244367, "grad_norm": 0.4851280152797699, "learning_rate": 9.562626936686133e-06, "loss": 0.4538, "step": 9901 }, { "epoch": 0.45440778302969115, "grad_norm": 0.4443572461605072, "learning_rate": 9.56252664442547e-06, "loss": 0.3763, "step": 9902 }, { "epoch": 0.45445367353494565, "grad_norm": 0.45183512568473816, "learning_rate": 9.562426341193372e-06, "loss": 0.3728, "step": 9903 }, { "epoch": 0.4544995640402001, "grad_norm": 0.47146010398864746, "learning_rate": 9.562326026990077e-06, "loss": 0.3879, "step": 9904 }, { "epoch": 0.45454545454545453, "grad_norm": 0.5086910128593445, "learning_rate": 9.56222570181583e-06, "loss": 0.4595, "step": 9905 }, { "epoch": 0.45459134505070903, "grad_norm": 0.4530327022075653, "learning_rate": 9.562125365670869e-06, "loss": 0.3851, "step": 9906 }, { "epoch": 0.45463723555596347, "grad_norm": 0.4583710730075836, "learning_rate": 9.562025018555437e-06, "loss": 0.4029, "step": 9907 }, { "epoch": 0.4546831260612179, "grad_norm": 0.48848941922187805, "learning_rate": 9.561924660469773e-06, "loss": 0.4501, "step": 9908 }, { "epoch": 0.4547290165664724, "grad_norm": 0.4487016201019287, "learning_rate": 9.561824291414121e-06, "loss": 0.3288, "step": 9909 }, { "epoch": 0.45477490707172685, "grad_norm": 0.47554028034210205, "learning_rate": 9.56172391138872e-06, "loss": 0.4681, "step": 9910 }, { "epoch": 0.4548207975769813, "grad_norm": 0.48326393961906433, "learning_rate": 9.561623520393815e-06, "loss": 0.3799, "step": 9911 }, { "epoch": 0.4548666880822358, "grad_norm": 0.4342602789402008, "learning_rate": 9.561523118429642e-06, "loss": 0.364, "step": 9912 }, { "epoch": 0.45491257858749024, "grad_norm": 0.4346505403518677, "learning_rate": 9.561422705496449e-06, "loss": 0.3384, "step": 9913 }, { "epoch": 0.45495846909274473, "grad_norm": 0.45098307728767395, "learning_rate": 9.561322281594473e-06, "loss": 0.352, "step": 9914 }, { "epoch": 0.4550043595979992, "grad_norm": 0.4688247740268707, "learning_rate": 9.561221846723956e-06, "loss": 0.3906, "step": 9915 }, { "epoch": 0.4550502501032536, "grad_norm": 0.46275565028190613, "learning_rate": 9.56112140088514e-06, "loss": 0.4409, "step": 9916 }, { "epoch": 0.4550961406085081, "grad_norm": 0.4721828103065491, "learning_rate": 9.561020944078266e-06, "loss": 0.3938, "step": 9917 }, { "epoch": 0.45514203111376256, "grad_norm": 0.42936527729034424, "learning_rate": 9.560920476303577e-06, "loss": 0.3459, "step": 9918 }, { "epoch": 0.455187921619017, "grad_norm": 0.48279470205307007, "learning_rate": 9.560819997561314e-06, "loss": 0.4704, "step": 9919 }, { "epoch": 0.4552338121242715, "grad_norm": 0.47656986117362976, "learning_rate": 9.560719507851717e-06, "loss": 0.4162, "step": 9920 }, { "epoch": 0.45527970262952594, "grad_norm": 0.43818405270576477, "learning_rate": 9.560619007175031e-06, "loss": 0.3781, "step": 9921 }, { "epoch": 0.45532559313478044, "grad_norm": 0.4752810597419739, "learning_rate": 9.560518495531495e-06, "loss": 0.4059, "step": 9922 }, { "epoch": 0.4553714836400349, "grad_norm": 0.47806331515312195, "learning_rate": 9.56041797292135e-06, "loss": 0.3955, "step": 9923 }, { "epoch": 0.4554173741452893, "grad_norm": 0.4881223142147064, "learning_rate": 9.56031743934484e-06, "loss": 0.4556, "step": 9924 }, { "epoch": 0.4554632646505438, "grad_norm": 0.8381417989730835, "learning_rate": 9.560216894802206e-06, "loss": 0.4527, "step": 9925 }, { "epoch": 0.45550915515579826, "grad_norm": 0.48614510893821716, "learning_rate": 9.56011633929369e-06, "loss": 0.438, "step": 9926 }, { "epoch": 0.4555550456610527, "grad_norm": 0.45397496223449707, "learning_rate": 9.560015772819533e-06, "loss": 0.3989, "step": 9927 }, { "epoch": 0.4556009361663072, "grad_norm": 0.4825592339038849, "learning_rate": 9.559915195379976e-06, "loss": 0.4072, "step": 9928 }, { "epoch": 0.45564682667156164, "grad_norm": 0.4833358824253082, "learning_rate": 9.559814606975265e-06, "loss": 0.4568, "step": 9929 }, { "epoch": 0.45569271717681614, "grad_norm": 0.49016672372817993, "learning_rate": 9.559714007605637e-06, "loss": 0.4164, "step": 9930 }, { "epoch": 0.4557386076820706, "grad_norm": 0.48718762397766113, "learning_rate": 9.559613397271335e-06, "loss": 0.4291, "step": 9931 }, { "epoch": 0.455784498187325, "grad_norm": 0.538737416267395, "learning_rate": 9.559512775972604e-06, "loss": 0.589, "step": 9932 }, { "epoch": 0.4558303886925795, "grad_norm": 0.5791155099868774, "learning_rate": 9.559412143709681e-06, "loss": 0.5949, "step": 9933 }, { "epoch": 0.45587627919783397, "grad_norm": 0.48282018303871155, "learning_rate": 9.559311500482814e-06, "loss": 0.4559, "step": 9934 }, { "epoch": 0.4559221697030884, "grad_norm": 0.486346572637558, "learning_rate": 9.55921084629224e-06, "loss": 0.3804, "step": 9935 }, { "epoch": 0.4559680602083429, "grad_norm": 0.468128502368927, "learning_rate": 9.559110181138203e-06, "loss": 0.4217, "step": 9936 }, { "epoch": 0.45601395071359735, "grad_norm": 0.45098164677619934, "learning_rate": 9.559009505020944e-06, "loss": 0.3715, "step": 9937 }, { "epoch": 0.45605984121885185, "grad_norm": 0.4745773673057556, "learning_rate": 9.558908817940707e-06, "loss": 0.4245, "step": 9938 }, { "epoch": 0.4561057317241063, "grad_norm": 0.4722738564014435, "learning_rate": 9.558808119897733e-06, "loss": 0.4258, "step": 9939 }, { "epoch": 0.45615162222936073, "grad_norm": 0.4512943625450134, "learning_rate": 9.558707410892265e-06, "loss": 0.3406, "step": 9940 }, { "epoch": 0.45619751273461523, "grad_norm": 0.4353412687778473, "learning_rate": 9.558606690924543e-06, "loss": 0.354, "step": 9941 }, { "epoch": 0.45624340323986967, "grad_norm": 0.4160791039466858, "learning_rate": 9.558505959994813e-06, "loss": 0.2931, "step": 9942 }, { "epoch": 0.4562892937451241, "grad_norm": 0.45235979557037354, "learning_rate": 9.558405218103313e-06, "loss": 0.4079, "step": 9943 }, { "epoch": 0.4563351842503786, "grad_norm": 0.4443121552467346, "learning_rate": 9.558304465250287e-06, "loss": 0.3518, "step": 9944 }, { "epoch": 0.45638107475563305, "grad_norm": 0.519679844379425, "learning_rate": 9.558203701435979e-06, "loss": 0.4479, "step": 9945 }, { "epoch": 0.4564269652608875, "grad_norm": 0.46667245030403137, "learning_rate": 9.558102926660629e-06, "loss": 0.3999, "step": 9946 }, { "epoch": 0.456472855766142, "grad_norm": 0.4934587776660919, "learning_rate": 9.55800214092448e-06, "loss": 0.4411, "step": 9947 }, { "epoch": 0.45651874627139644, "grad_norm": 0.5010746121406555, "learning_rate": 9.557901344227774e-06, "loss": 0.5098, "step": 9948 }, { "epoch": 0.45656463677665093, "grad_norm": 0.46531081199645996, "learning_rate": 9.557800536570754e-06, "loss": 0.452, "step": 9949 }, { "epoch": 0.4566105272819054, "grad_norm": 0.4719448983669281, "learning_rate": 9.557699717953661e-06, "loss": 0.4586, "step": 9950 }, { "epoch": 0.4566564177871598, "grad_norm": 0.5225871801376343, "learning_rate": 9.55759888837674e-06, "loss": 0.4823, "step": 9951 }, { "epoch": 0.4567023082924143, "grad_norm": 0.4800194203853607, "learning_rate": 9.557498047840232e-06, "loss": 0.4404, "step": 9952 }, { "epoch": 0.45674819879766876, "grad_norm": 0.45608821511268616, "learning_rate": 9.55739719634438e-06, "loss": 0.4175, "step": 9953 }, { "epoch": 0.4567940893029232, "grad_norm": 0.45326918363571167, "learning_rate": 9.557296333889425e-06, "loss": 0.3586, "step": 9954 }, { "epoch": 0.4568399798081777, "grad_norm": 0.4494493901729584, "learning_rate": 9.55719546047561e-06, "loss": 0.3591, "step": 9955 }, { "epoch": 0.45688587031343214, "grad_norm": 0.4518440365791321, "learning_rate": 9.557094576103181e-06, "loss": 0.4096, "step": 9956 }, { "epoch": 0.45693176081868664, "grad_norm": 0.520563006401062, "learning_rate": 9.556993680772375e-06, "loss": 0.5272, "step": 9957 }, { "epoch": 0.4569776513239411, "grad_norm": 0.507922887802124, "learning_rate": 9.55689277448344e-06, "loss": 0.4813, "step": 9958 }, { "epoch": 0.4570235418291955, "grad_norm": 0.4522140622138977, "learning_rate": 9.556791857236613e-06, "loss": 0.4013, "step": 9959 }, { "epoch": 0.45706943233445, "grad_norm": 0.4685266315937042, "learning_rate": 9.556690929032142e-06, "loss": 0.4915, "step": 9960 }, { "epoch": 0.45711532283970446, "grad_norm": 0.4657999873161316, "learning_rate": 9.556589989870265e-06, "loss": 0.4688, "step": 9961 }, { "epoch": 0.4571612133449589, "grad_norm": 0.4579140841960907, "learning_rate": 9.556489039751231e-06, "loss": 0.413, "step": 9962 }, { "epoch": 0.4572071038502134, "grad_norm": 0.45779508352279663, "learning_rate": 9.556388078675276e-06, "loss": 0.4171, "step": 9963 }, { "epoch": 0.45725299435546785, "grad_norm": 0.42638781666755676, "learning_rate": 9.556287106642647e-06, "loss": 0.3325, "step": 9964 }, { "epoch": 0.45729888486072234, "grad_norm": 0.47515642642974854, "learning_rate": 9.556186123653587e-06, "loss": 0.4014, "step": 9965 }, { "epoch": 0.4573447753659768, "grad_norm": 0.47173863649368286, "learning_rate": 9.556085129708334e-06, "loss": 0.4416, "step": 9966 }, { "epoch": 0.45739066587123123, "grad_norm": 0.4339618384838104, "learning_rate": 9.555984124807136e-06, "loss": 0.3945, "step": 9967 }, { "epoch": 0.4574365563764857, "grad_norm": 0.4697257876396179, "learning_rate": 9.555883108950234e-06, "loss": 0.4495, "step": 9968 }, { "epoch": 0.45748244688174017, "grad_norm": 0.45135417580604553, "learning_rate": 9.55578208213787e-06, "loss": 0.3734, "step": 9969 }, { "epoch": 0.4575283373869946, "grad_norm": 0.4891314208507538, "learning_rate": 9.555681044370288e-06, "loss": 0.4063, "step": 9970 }, { "epoch": 0.4575742278922491, "grad_norm": 0.47307920455932617, "learning_rate": 9.555579995647732e-06, "loss": 0.4372, "step": 9971 }, { "epoch": 0.45762011839750355, "grad_norm": 0.4372274577617645, "learning_rate": 9.555478935970443e-06, "loss": 0.4241, "step": 9972 }, { "epoch": 0.457666008902758, "grad_norm": 0.47360700368881226, "learning_rate": 9.555377865338666e-06, "loss": 0.4516, "step": 9973 }, { "epoch": 0.4577118994080125, "grad_norm": 0.45366328954696655, "learning_rate": 9.555276783752643e-06, "loss": 0.3862, "step": 9974 }, { "epoch": 0.45775778991326693, "grad_norm": 0.4219350218772888, "learning_rate": 9.555175691212617e-06, "loss": 0.3428, "step": 9975 }, { "epoch": 0.45780368041852143, "grad_norm": 0.48012861609458923, "learning_rate": 9.55507458771883e-06, "loss": 0.4389, "step": 9976 }, { "epoch": 0.4578495709237759, "grad_norm": 0.4771484136581421, "learning_rate": 9.554973473271526e-06, "loss": 0.3932, "step": 9977 }, { "epoch": 0.4578954614290303, "grad_norm": 0.43656250834465027, "learning_rate": 9.55487234787095e-06, "loss": 0.3869, "step": 9978 }, { "epoch": 0.4579413519342848, "grad_norm": 0.4799874722957611, "learning_rate": 9.554771211517343e-06, "loss": 0.3874, "step": 9979 }, { "epoch": 0.45798724243953925, "grad_norm": 0.4342607855796814, "learning_rate": 9.554670064210948e-06, "loss": 0.3585, "step": 9980 }, { "epoch": 0.4580331329447937, "grad_norm": 0.47616249322891235, "learning_rate": 9.55456890595201e-06, "loss": 0.4598, "step": 9981 }, { "epoch": 0.4580790234500482, "grad_norm": 0.4475398361682892, "learning_rate": 9.554467736740771e-06, "loss": 0.3992, "step": 9982 }, { "epoch": 0.45812491395530264, "grad_norm": 0.4943822920322418, "learning_rate": 9.554366556577474e-06, "loss": 0.5044, "step": 9983 }, { "epoch": 0.45817080446055714, "grad_norm": 0.5043686032295227, "learning_rate": 9.554265365462364e-06, "loss": 0.5225, "step": 9984 }, { "epoch": 0.4582166949658116, "grad_norm": 0.44076502323150635, "learning_rate": 9.55416416339568e-06, "loss": 0.36, "step": 9985 }, { "epoch": 0.458262585471066, "grad_norm": 0.4140150249004364, "learning_rate": 9.554062950377672e-06, "loss": 0.3421, "step": 9986 }, { "epoch": 0.4583084759763205, "grad_norm": 0.46010980010032654, "learning_rate": 9.553961726408578e-06, "loss": 0.3757, "step": 9987 }, { "epoch": 0.45835436648157496, "grad_norm": 0.44241753220558167, "learning_rate": 9.553860491488644e-06, "loss": 0.3274, "step": 9988 }, { "epoch": 0.4584002569868294, "grad_norm": 0.43740734457969666, "learning_rate": 9.553759245618112e-06, "loss": 0.3575, "step": 9989 }, { "epoch": 0.4584461474920839, "grad_norm": 1.5007730722427368, "learning_rate": 9.553657988797228e-06, "loss": 0.5251, "step": 9990 }, { "epoch": 0.45849203799733834, "grad_norm": 0.5382006764411926, "learning_rate": 9.553556721026233e-06, "loss": 0.4733, "step": 9991 }, { "epoch": 0.45853792850259284, "grad_norm": 0.44896650314331055, "learning_rate": 9.55345544230537e-06, "loss": 0.3813, "step": 9992 }, { "epoch": 0.4585838190078473, "grad_norm": 0.49121126532554626, "learning_rate": 9.553354152634886e-06, "loss": 0.4258, "step": 9993 }, { "epoch": 0.4586297095131017, "grad_norm": 0.42866453528404236, "learning_rate": 9.55325285201502e-06, "loss": 0.3065, "step": 9994 }, { "epoch": 0.4586756000183562, "grad_norm": 0.5034466981887817, "learning_rate": 9.553151540446019e-06, "loss": 0.462, "step": 9995 }, { "epoch": 0.45872149052361066, "grad_norm": 0.48079365491867065, "learning_rate": 9.553050217928126e-06, "loss": 0.4196, "step": 9996 }, { "epoch": 0.4587673810288651, "grad_norm": 0.48676690459251404, "learning_rate": 9.552948884461584e-06, "loss": 0.393, "step": 9997 }, { "epoch": 0.4588132715341196, "grad_norm": 0.4471967816352844, "learning_rate": 9.552847540046636e-06, "loss": 0.414, "step": 9998 }, { "epoch": 0.45885916203937405, "grad_norm": 0.4555295705795288, "learning_rate": 9.552746184683528e-06, "loss": 0.3607, "step": 9999 }, { "epoch": 0.4589050525446285, "grad_norm": 0.49966102838516235, "learning_rate": 9.552644818372501e-06, "loss": 0.4345, "step": 10000 }, { "epoch": 0.458950943049883, "grad_norm": 0.4645128548145294, "learning_rate": 9.552543441113801e-06, "loss": 0.3502, "step": 10001 }, { "epoch": 0.45899683355513743, "grad_norm": 0.5004390478134155, "learning_rate": 9.552442052907672e-06, "loss": 0.4586, "step": 10002 }, { "epoch": 0.4590427240603919, "grad_norm": 0.5824493765830994, "learning_rate": 9.552340653754353e-06, "loss": 0.4122, "step": 10003 }, { "epoch": 0.45908861456564637, "grad_norm": 0.42458784580230713, "learning_rate": 9.552239243654094e-06, "loss": 0.3161, "step": 10004 }, { "epoch": 0.4591345050709008, "grad_norm": 0.5094610452651978, "learning_rate": 9.552137822607135e-06, "loss": 0.4611, "step": 10005 }, { "epoch": 0.4591803955761553, "grad_norm": 0.5777984261512756, "learning_rate": 9.552036390613724e-06, "loss": 0.5842, "step": 10006 }, { "epoch": 0.45922628608140975, "grad_norm": 0.4628232717514038, "learning_rate": 9.551934947674099e-06, "loss": 0.4088, "step": 10007 }, { "epoch": 0.4592721765866642, "grad_norm": 0.4700241982936859, "learning_rate": 9.55183349378851e-06, "loss": 0.3928, "step": 10008 }, { "epoch": 0.4593180670919187, "grad_norm": 0.4583022892475128, "learning_rate": 9.551732028957195e-06, "loss": 0.3695, "step": 10009 }, { "epoch": 0.45936395759717313, "grad_norm": 0.43629127740859985, "learning_rate": 9.551630553180401e-06, "loss": 0.3745, "step": 10010 }, { "epoch": 0.45940984810242763, "grad_norm": 0.4497649669647217, "learning_rate": 9.551529066458375e-06, "loss": 0.3664, "step": 10011 }, { "epoch": 0.4594557386076821, "grad_norm": 0.43785974383354187, "learning_rate": 9.551427568791357e-06, "loss": 0.328, "step": 10012 }, { "epoch": 0.4595016291129365, "grad_norm": 0.44024258852005005, "learning_rate": 9.551326060179593e-06, "loss": 0.3499, "step": 10013 }, { "epoch": 0.459547519618191, "grad_norm": 0.4628494381904602, "learning_rate": 9.551224540623324e-06, "loss": 0.4056, "step": 10014 }, { "epoch": 0.45959341012344546, "grad_norm": 0.4891040623188019, "learning_rate": 9.551123010122798e-06, "loss": 0.4342, "step": 10015 }, { "epoch": 0.4596393006286999, "grad_norm": 0.4771755039691925, "learning_rate": 9.551021468678256e-06, "loss": 0.4586, "step": 10016 }, { "epoch": 0.4596851911339544, "grad_norm": 0.450152724981308, "learning_rate": 9.550919916289945e-06, "loss": 0.3853, "step": 10017 }, { "epoch": 0.45973108163920884, "grad_norm": 0.4463956654071808, "learning_rate": 9.550818352958108e-06, "loss": 0.3803, "step": 10018 }, { "epoch": 0.45977697214446334, "grad_norm": 0.5033095479011536, "learning_rate": 9.55071677868299e-06, "loss": 0.5156, "step": 10019 }, { "epoch": 0.4598228626497178, "grad_norm": 0.4749075472354889, "learning_rate": 9.550615193464831e-06, "loss": 0.4763, "step": 10020 }, { "epoch": 0.4598687531549722, "grad_norm": 0.4581708610057831, "learning_rate": 9.55051359730388e-06, "loss": 0.3961, "step": 10021 }, { "epoch": 0.4599146436602267, "grad_norm": 0.4889867603778839, "learning_rate": 9.550411990200381e-06, "loss": 0.4524, "step": 10022 }, { "epoch": 0.45996053416548116, "grad_norm": 0.46329137682914734, "learning_rate": 9.550310372154579e-06, "loss": 0.3885, "step": 10023 }, { "epoch": 0.4600064246707356, "grad_norm": 0.455548495054245, "learning_rate": 9.550208743166715e-06, "loss": 0.4026, "step": 10024 }, { "epoch": 0.4600523151759901, "grad_norm": 0.4738123416900635, "learning_rate": 9.550107103237034e-06, "loss": 0.4446, "step": 10025 }, { "epoch": 0.46009820568124454, "grad_norm": 0.5240274667739868, "learning_rate": 9.550005452365783e-06, "loss": 0.4979, "step": 10026 }, { "epoch": 0.46014409618649904, "grad_norm": 0.43227049708366394, "learning_rate": 9.549903790553203e-06, "loss": 0.3814, "step": 10027 }, { "epoch": 0.4601899866917535, "grad_norm": 0.46555179357528687, "learning_rate": 9.549802117799542e-06, "loss": 0.4336, "step": 10028 }, { "epoch": 0.4602358771970079, "grad_norm": 0.4735663831233978, "learning_rate": 9.549700434105042e-06, "loss": 0.4299, "step": 10029 }, { "epoch": 0.4602817677022624, "grad_norm": 0.45129549503326416, "learning_rate": 9.54959873946995e-06, "loss": 0.3625, "step": 10030 }, { "epoch": 0.46032765820751687, "grad_norm": 0.5759739875793457, "learning_rate": 9.549497033894509e-06, "loss": 0.3094, "step": 10031 }, { "epoch": 0.4603735487127713, "grad_norm": 0.4574528932571411, "learning_rate": 9.549395317378961e-06, "loss": 0.4236, "step": 10032 }, { "epoch": 0.4604194392180258, "grad_norm": 0.4939922094345093, "learning_rate": 9.549293589923554e-06, "loss": 0.4679, "step": 10033 }, { "epoch": 0.46046532972328025, "grad_norm": 0.4289783239364624, "learning_rate": 9.549191851528533e-06, "loss": 0.3391, "step": 10034 }, { "epoch": 0.4605112202285347, "grad_norm": 0.4712502360343933, "learning_rate": 9.549090102194141e-06, "loss": 0.4229, "step": 10035 }, { "epoch": 0.4605571107337892, "grad_norm": 0.4966989755630493, "learning_rate": 9.548988341920623e-06, "loss": 0.4486, "step": 10036 }, { "epoch": 0.46060300123904363, "grad_norm": 0.4693969190120697, "learning_rate": 9.54888657070822e-06, "loss": 0.3969, "step": 10037 }, { "epoch": 0.46064889174429813, "grad_norm": 0.45313915610313416, "learning_rate": 9.548784788557186e-06, "loss": 0.3803, "step": 10038 }, { "epoch": 0.46069478224955257, "grad_norm": 0.8935025930404663, "learning_rate": 9.548682995467758e-06, "loss": 0.4941, "step": 10039 }, { "epoch": 0.460740672754807, "grad_norm": 0.4878140687942505, "learning_rate": 9.548581191440183e-06, "loss": 0.395, "step": 10040 }, { "epoch": 0.4607865632600615, "grad_norm": 0.5029117465019226, "learning_rate": 9.548479376474704e-06, "loss": 0.4907, "step": 10041 }, { "epoch": 0.46083245376531595, "grad_norm": 0.45163223147392273, "learning_rate": 9.54837755057157e-06, "loss": 0.341, "step": 10042 }, { "epoch": 0.4608783442705704, "grad_norm": 0.44898027181625366, "learning_rate": 9.548275713731023e-06, "loss": 0.3525, "step": 10043 }, { "epoch": 0.4609242347758249, "grad_norm": 0.46235594153404236, "learning_rate": 9.54817386595331e-06, "loss": 0.3689, "step": 10044 }, { "epoch": 0.46097012528107933, "grad_norm": 0.48003360629081726, "learning_rate": 9.548072007238672e-06, "loss": 0.4639, "step": 10045 }, { "epoch": 0.46101601578633383, "grad_norm": 0.4563031494617462, "learning_rate": 9.547970137587355e-06, "loss": 0.415, "step": 10046 }, { "epoch": 0.4610619062915883, "grad_norm": 0.4135788381099701, "learning_rate": 9.547868256999608e-06, "loss": 0.3119, "step": 10047 }, { "epoch": 0.4611077967968427, "grad_norm": 0.4257953464984894, "learning_rate": 9.547766365475673e-06, "loss": 0.339, "step": 10048 }, { "epoch": 0.4611536873020972, "grad_norm": 0.4664488434791565, "learning_rate": 9.547664463015793e-06, "loss": 0.4404, "step": 10049 }, { "epoch": 0.46119957780735166, "grad_norm": 0.43049153685569763, "learning_rate": 9.547562549620217e-06, "loss": 0.3518, "step": 10050 }, { "epoch": 0.4612454683126061, "grad_norm": 0.4459921419620514, "learning_rate": 9.547460625289188e-06, "loss": 0.3748, "step": 10051 }, { "epoch": 0.4612913588178606, "grad_norm": 0.45962008833885193, "learning_rate": 9.547358690022952e-06, "loss": 0.3849, "step": 10052 }, { "epoch": 0.46133724932311504, "grad_norm": 0.46512913703918457, "learning_rate": 9.547256743821753e-06, "loss": 0.4191, "step": 10053 }, { "epoch": 0.46138313982836954, "grad_norm": 0.4879626929759979, "learning_rate": 9.547154786685836e-06, "loss": 0.4551, "step": 10054 }, { "epoch": 0.461429030333624, "grad_norm": 0.4872152805328369, "learning_rate": 9.547052818615448e-06, "loss": 0.467, "step": 10055 }, { "epoch": 0.4614749208388784, "grad_norm": 0.4161750376224518, "learning_rate": 9.546950839610831e-06, "loss": 0.3203, "step": 10056 }, { "epoch": 0.4615208113441329, "grad_norm": 0.4910643398761749, "learning_rate": 9.546848849672235e-06, "loss": 0.4535, "step": 10057 }, { "epoch": 0.46156670184938736, "grad_norm": 0.4327855408191681, "learning_rate": 9.5467468487999e-06, "loss": 0.3518, "step": 10058 }, { "epoch": 0.4616125923546418, "grad_norm": 0.44933268427848816, "learning_rate": 9.546644836994077e-06, "loss": 0.3708, "step": 10059 }, { "epoch": 0.4616584828598963, "grad_norm": 0.47097790241241455, "learning_rate": 9.546542814255006e-06, "loss": 0.4399, "step": 10060 }, { "epoch": 0.46170437336515074, "grad_norm": 0.5800557732582092, "learning_rate": 9.546440780582935e-06, "loss": 0.4243, "step": 10061 }, { "epoch": 0.4617502638704052, "grad_norm": 0.4938318729400635, "learning_rate": 9.546338735978108e-06, "loss": 0.4192, "step": 10062 }, { "epoch": 0.4617961543756597, "grad_norm": 0.508450984954834, "learning_rate": 9.546236680440772e-06, "loss": 0.5059, "step": 10063 }, { "epoch": 0.4618420448809141, "grad_norm": 0.4875849783420563, "learning_rate": 9.546134613971172e-06, "loss": 0.4661, "step": 10064 }, { "epoch": 0.4618879353861686, "grad_norm": 0.45299094915390015, "learning_rate": 9.546032536569552e-06, "loss": 0.3789, "step": 10065 }, { "epoch": 0.46193382589142307, "grad_norm": 0.41441550850868225, "learning_rate": 9.545930448236158e-06, "loss": 0.3209, "step": 10066 }, { "epoch": 0.4619797163966775, "grad_norm": 0.4659954309463501, "learning_rate": 9.545828348971238e-06, "loss": 0.4362, "step": 10067 }, { "epoch": 0.462025606901932, "grad_norm": 0.4999721050262451, "learning_rate": 9.545726238775036e-06, "loss": 0.4306, "step": 10068 }, { "epoch": 0.46207149740718645, "grad_norm": 0.44158822298049927, "learning_rate": 9.545624117647794e-06, "loss": 0.3768, "step": 10069 }, { "epoch": 0.4621173879124409, "grad_norm": 0.45205965638160706, "learning_rate": 9.545521985589763e-06, "loss": 0.3594, "step": 10070 }, { "epoch": 0.4621632784176954, "grad_norm": 0.4988010823726654, "learning_rate": 9.545419842601185e-06, "loss": 0.5267, "step": 10071 }, { "epoch": 0.46220916892294983, "grad_norm": 0.43586310744285583, "learning_rate": 9.545317688682308e-06, "loss": 0.3744, "step": 10072 }, { "epoch": 0.46225505942820433, "grad_norm": 0.5058405995368958, "learning_rate": 9.545215523833375e-06, "loss": 0.4243, "step": 10073 }, { "epoch": 0.46230094993345877, "grad_norm": 0.45228537917137146, "learning_rate": 9.545113348054634e-06, "loss": 0.3641, "step": 10074 }, { "epoch": 0.4623468404387132, "grad_norm": 0.4844028353691101, "learning_rate": 9.54501116134633e-06, "loss": 0.4161, "step": 10075 }, { "epoch": 0.4623927309439677, "grad_norm": 0.5068084597587585, "learning_rate": 9.544908963708708e-06, "loss": 0.4654, "step": 10076 }, { "epoch": 0.46243862144922215, "grad_norm": 0.4577224850654602, "learning_rate": 9.544806755142016e-06, "loss": 0.3672, "step": 10077 }, { "epoch": 0.4624845119544766, "grad_norm": 0.4572412371635437, "learning_rate": 9.544704535646497e-06, "loss": 0.4306, "step": 10078 }, { "epoch": 0.4625304024597311, "grad_norm": 0.519980251789093, "learning_rate": 9.544602305222397e-06, "loss": 0.4995, "step": 10079 }, { "epoch": 0.46257629296498554, "grad_norm": 0.44654372334480286, "learning_rate": 9.544500063869964e-06, "loss": 0.4219, "step": 10080 }, { "epoch": 0.46262218347024003, "grad_norm": 0.5094678401947021, "learning_rate": 9.544397811589441e-06, "loss": 0.4552, "step": 10081 }, { "epoch": 0.4626680739754945, "grad_norm": 0.526958703994751, "learning_rate": 9.544295548381077e-06, "loss": 0.4772, "step": 10082 }, { "epoch": 0.4627139644807489, "grad_norm": 0.5276633501052856, "learning_rate": 9.544193274245118e-06, "loss": 0.4472, "step": 10083 }, { "epoch": 0.4627598549860034, "grad_norm": 0.4546002149581909, "learning_rate": 9.544090989181806e-06, "loss": 0.387, "step": 10084 }, { "epoch": 0.46280574549125786, "grad_norm": 0.45366042852401733, "learning_rate": 9.543988693191389e-06, "loss": 0.3801, "step": 10085 }, { "epoch": 0.4628516359965123, "grad_norm": 0.4777795970439911, "learning_rate": 9.543886386274116e-06, "loss": 0.4108, "step": 10086 }, { "epoch": 0.4628975265017668, "grad_norm": 0.4432874619960785, "learning_rate": 9.543784068430228e-06, "loss": 0.378, "step": 10087 }, { "epoch": 0.46294341700702124, "grad_norm": 0.5480496883392334, "learning_rate": 9.543681739659974e-06, "loss": 0.5073, "step": 10088 }, { "epoch": 0.4629893075122757, "grad_norm": 0.5434205532073975, "learning_rate": 9.543579399963598e-06, "loss": 0.4025, "step": 10089 }, { "epoch": 0.4630351980175302, "grad_norm": 0.46511849761009216, "learning_rate": 9.54347704934135e-06, "loss": 0.3929, "step": 10090 }, { "epoch": 0.4630810885227846, "grad_norm": 0.46351978182792664, "learning_rate": 9.543374687793472e-06, "loss": 0.4117, "step": 10091 }, { "epoch": 0.4631269790280391, "grad_norm": 0.46285396814346313, "learning_rate": 9.543272315320213e-06, "loss": 0.4013, "step": 10092 }, { "epoch": 0.46317286953329356, "grad_norm": 0.45155471563339233, "learning_rate": 9.543169931921817e-06, "loss": 0.3618, "step": 10093 }, { "epoch": 0.463218760038548, "grad_norm": 0.4584038555622101, "learning_rate": 9.543067537598532e-06, "loss": 0.3709, "step": 10094 }, { "epoch": 0.4632646505438025, "grad_norm": 0.48299309611320496, "learning_rate": 9.542965132350603e-06, "loss": 0.4702, "step": 10095 }, { "epoch": 0.46331054104905695, "grad_norm": 0.49806708097457886, "learning_rate": 9.542862716178275e-06, "loss": 0.4814, "step": 10096 }, { "epoch": 0.4633564315543114, "grad_norm": 0.48939889669418335, "learning_rate": 9.5427602890818e-06, "loss": 0.4914, "step": 10097 }, { "epoch": 0.4634023220595659, "grad_norm": 0.4154219329357147, "learning_rate": 9.542657851061417e-06, "loss": 0.3038, "step": 10098 }, { "epoch": 0.4634482125648203, "grad_norm": 0.43353354930877686, "learning_rate": 9.542555402117376e-06, "loss": 0.381, "step": 10099 }, { "epoch": 0.4634941030700748, "grad_norm": 0.43858015537261963, "learning_rate": 9.542452942249924e-06, "loss": 0.3336, "step": 10100 }, { "epoch": 0.46353999357532927, "grad_norm": 0.47354984283447266, "learning_rate": 9.542350471459304e-06, "loss": 0.3993, "step": 10101 }, { "epoch": 0.4635858840805837, "grad_norm": 0.4408109784126282, "learning_rate": 9.542247989745768e-06, "loss": 0.3207, "step": 10102 }, { "epoch": 0.4636317745858382, "grad_norm": 0.47126561403274536, "learning_rate": 9.542145497109557e-06, "loss": 0.4249, "step": 10103 }, { "epoch": 0.46367766509109265, "grad_norm": 0.4833935797214508, "learning_rate": 9.54204299355092e-06, "loss": 0.3718, "step": 10104 }, { "epoch": 0.4637235555963471, "grad_norm": 0.4889027774333954, "learning_rate": 9.541940479070104e-06, "loss": 0.4329, "step": 10105 }, { "epoch": 0.4637694461016016, "grad_norm": 0.4404420256614685, "learning_rate": 9.541837953667353e-06, "loss": 0.3362, "step": 10106 }, { "epoch": 0.46381533660685603, "grad_norm": 0.45375174283981323, "learning_rate": 9.541735417342916e-06, "loss": 0.3671, "step": 10107 }, { "epoch": 0.46386122711211053, "grad_norm": 0.48985207080841064, "learning_rate": 9.541632870097039e-06, "loss": 0.4456, "step": 10108 }, { "epoch": 0.463907117617365, "grad_norm": 0.42800143361091614, "learning_rate": 9.541530311929968e-06, "loss": 0.3287, "step": 10109 }, { "epoch": 0.4639530081226194, "grad_norm": 0.4616200923919678, "learning_rate": 9.54142774284195e-06, "loss": 0.4028, "step": 10110 }, { "epoch": 0.4639988986278739, "grad_norm": 0.4615591764450073, "learning_rate": 9.541325162833232e-06, "loss": 0.3619, "step": 10111 }, { "epoch": 0.46404478913312835, "grad_norm": 0.5035247206687927, "learning_rate": 9.54122257190406e-06, "loss": 0.3915, "step": 10112 }, { "epoch": 0.4640906796383828, "grad_norm": 0.40776196122169495, "learning_rate": 9.54111997005468e-06, "loss": 0.3218, "step": 10113 }, { "epoch": 0.4641365701436373, "grad_norm": 0.504343569278717, "learning_rate": 9.541017357285342e-06, "loss": 0.4219, "step": 10114 }, { "epoch": 0.46418246064889174, "grad_norm": 0.5200358629226685, "learning_rate": 9.54091473359629e-06, "loss": 0.5024, "step": 10115 }, { "epoch": 0.46422835115414623, "grad_norm": 0.46448755264282227, "learning_rate": 9.540812098987768e-06, "loss": 0.3645, "step": 10116 }, { "epoch": 0.4642742416594007, "grad_norm": 0.49726325273513794, "learning_rate": 9.540709453460028e-06, "loss": 0.4559, "step": 10117 }, { "epoch": 0.4643201321646551, "grad_norm": 0.48025810718536377, "learning_rate": 9.540606797013317e-06, "loss": 0.4823, "step": 10118 }, { "epoch": 0.4643660226699096, "grad_norm": 0.5020802021026611, "learning_rate": 9.540504129647878e-06, "loss": 0.4306, "step": 10119 }, { "epoch": 0.46441191317516406, "grad_norm": 0.4576500654220581, "learning_rate": 9.54040145136396e-06, "loss": 0.3812, "step": 10120 }, { "epoch": 0.4644578036804185, "grad_norm": 0.4354941248893738, "learning_rate": 9.540298762161808e-06, "loss": 0.3377, "step": 10121 }, { "epoch": 0.464503694185673, "grad_norm": 0.4891124665737152, "learning_rate": 9.540196062041672e-06, "loss": 0.4525, "step": 10122 }, { "epoch": 0.46454958469092744, "grad_norm": 0.48238879442214966, "learning_rate": 9.540093351003797e-06, "loss": 0.3812, "step": 10123 }, { "epoch": 0.4645954751961819, "grad_norm": 0.4946328401565552, "learning_rate": 9.539990629048428e-06, "loss": 0.437, "step": 10124 }, { "epoch": 0.4646413657014364, "grad_norm": 0.4605189263820648, "learning_rate": 9.539887896175816e-06, "loss": 0.3833, "step": 10125 }, { "epoch": 0.4646872562066908, "grad_norm": 0.455402672290802, "learning_rate": 9.539785152386208e-06, "loss": 0.4505, "step": 10126 }, { "epoch": 0.4647331467119453, "grad_norm": 0.4873480200767517, "learning_rate": 9.53968239767985e-06, "loss": 0.3642, "step": 10127 }, { "epoch": 0.46477903721719976, "grad_norm": 0.41558223962783813, "learning_rate": 9.539579632056985e-06, "loss": 0.3062, "step": 10128 }, { "epoch": 0.4648249277224542, "grad_norm": 0.43191471695899963, "learning_rate": 9.539476855517866e-06, "loss": 0.337, "step": 10129 }, { "epoch": 0.4648708182277087, "grad_norm": 0.4598858952522278, "learning_rate": 9.539374068062739e-06, "loss": 0.3958, "step": 10130 }, { "epoch": 0.46491670873296315, "grad_norm": 0.424194872379303, "learning_rate": 9.539271269691847e-06, "loss": 0.3619, "step": 10131 }, { "epoch": 0.4649625992382176, "grad_norm": 0.4774252772331238, "learning_rate": 9.539168460405444e-06, "loss": 0.4249, "step": 10132 }, { "epoch": 0.4650084897434721, "grad_norm": 0.4371560215950012, "learning_rate": 9.53906564020377e-06, "loss": 0.4019, "step": 10133 }, { "epoch": 0.46505438024872653, "grad_norm": 0.4690553843975067, "learning_rate": 9.538962809087078e-06, "loss": 0.4406, "step": 10134 }, { "epoch": 0.465100270753981, "grad_norm": 0.47569596767425537, "learning_rate": 9.53885996705561e-06, "loss": 0.3947, "step": 10135 }, { "epoch": 0.46514616125923547, "grad_norm": 0.4826330542564392, "learning_rate": 9.53875711410962e-06, "loss": 0.4479, "step": 10136 }, { "epoch": 0.4651920517644899, "grad_norm": 0.5102675557136536, "learning_rate": 9.53865425024935e-06, "loss": 0.4498, "step": 10137 }, { "epoch": 0.4652379422697444, "grad_norm": 0.5118948817253113, "learning_rate": 9.53855137547505e-06, "loss": 0.4493, "step": 10138 }, { "epoch": 0.46528383277499885, "grad_norm": 0.4750429093837738, "learning_rate": 9.538448489786964e-06, "loss": 0.4453, "step": 10139 }, { "epoch": 0.4653297232802533, "grad_norm": 0.47009220719337463, "learning_rate": 9.538345593185343e-06, "loss": 0.4075, "step": 10140 }, { "epoch": 0.4653756137855078, "grad_norm": 0.48859694600105286, "learning_rate": 9.538242685670434e-06, "loss": 0.4574, "step": 10141 }, { "epoch": 0.46542150429076223, "grad_norm": 0.46785372495651245, "learning_rate": 9.538139767242482e-06, "loss": 0.4051, "step": 10142 }, { "epoch": 0.46546739479601673, "grad_norm": 0.48553353548049927, "learning_rate": 9.538036837901737e-06, "loss": 0.4099, "step": 10143 }, { "epoch": 0.4655132853012712, "grad_norm": 0.47608351707458496, "learning_rate": 9.537933897648445e-06, "loss": 0.4638, "step": 10144 }, { "epoch": 0.4655591758065256, "grad_norm": 0.42500007152557373, "learning_rate": 9.537830946482856e-06, "loss": 0.346, "step": 10145 }, { "epoch": 0.4656050663117801, "grad_norm": 0.4008198082447052, "learning_rate": 9.537727984405214e-06, "loss": 0.2771, "step": 10146 }, { "epoch": 0.46565095681703456, "grad_norm": 0.4721977710723877, "learning_rate": 9.53762501141577e-06, "loss": 0.4136, "step": 10147 }, { "epoch": 0.465696847322289, "grad_norm": 0.44315576553344727, "learning_rate": 9.53752202751477e-06, "loss": 0.3474, "step": 10148 }, { "epoch": 0.4657427378275435, "grad_norm": 0.5014727115631104, "learning_rate": 9.53741903270246e-06, "loss": 0.4673, "step": 10149 }, { "epoch": 0.46578862833279794, "grad_norm": 0.44032490253448486, "learning_rate": 9.537316026979088e-06, "loss": 0.3678, "step": 10150 }, { "epoch": 0.4658345188380524, "grad_norm": 0.48446351289749146, "learning_rate": 9.537213010344906e-06, "loss": 0.4935, "step": 10151 }, { "epoch": 0.4658804093433069, "grad_norm": 0.4451989233493805, "learning_rate": 9.537109982800157e-06, "loss": 0.3976, "step": 10152 }, { "epoch": 0.4659262998485613, "grad_norm": 0.4543994069099426, "learning_rate": 9.537006944345092e-06, "loss": 0.3456, "step": 10153 }, { "epoch": 0.4659721903538158, "grad_norm": 0.46358808875083923, "learning_rate": 9.536903894979955e-06, "loss": 0.4122, "step": 10154 }, { "epoch": 0.46601808085907026, "grad_norm": 0.4269695281982422, "learning_rate": 9.536800834704998e-06, "loss": 0.3582, "step": 10155 }, { "epoch": 0.4660639713643247, "grad_norm": 0.41859170794487, "learning_rate": 9.536697763520465e-06, "loss": 0.3425, "step": 10156 }, { "epoch": 0.4661098618695792, "grad_norm": 0.49543049931526184, "learning_rate": 9.536594681426608e-06, "loss": 0.4629, "step": 10157 }, { "epoch": 0.46615575237483364, "grad_norm": 0.41522207856178284, "learning_rate": 9.536491588423671e-06, "loss": 0.3175, "step": 10158 }, { "epoch": 0.4662016428800881, "grad_norm": 0.41735395789146423, "learning_rate": 9.536388484511902e-06, "loss": 0.3178, "step": 10159 }, { "epoch": 0.4662475333853426, "grad_norm": 0.4680722951889038, "learning_rate": 9.536285369691554e-06, "loss": 0.3714, "step": 10160 }, { "epoch": 0.466293423890597, "grad_norm": 0.47423529624938965, "learning_rate": 9.53618224396287e-06, "loss": 0.3745, "step": 10161 }, { "epoch": 0.4663393143958515, "grad_norm": 0.4534178376197815, "learning_rate": 9.536079107326099e-06, "loss": 0.3747, "step": 10162 }, { "epoch": 0.46638520490110597, "grad_norm": 0.4683798849582672, "learning_rate": 9.535975959781489e-06, "loss": 0.3924, "step": 10163 }, { "epoch": 0.4664310954063604, "grad_norm": 0.48483073711395264, "learning_rate": 9.535872801329289e-06, "loss": 0.4296, "step": 10164 }, { "epoch": 0.4664769859116149, "grad_norm": 0.47956332564353943, "learning_rate": 9.535769631969745e-06, "loss": 0.4267, "step": 10165 }, { "epoch": 0.46652287641686935, "grad_norm": 0.491567462682724, "learning_rate": 9.535666451703109e-06, "loss": 0.4816, "step": 10166 }, { "epoch": 0.4665687669221238, "grad_norm": 0.45649975538253784, "learning_rate": 9.535563260529624e-06, "loss": 0.4043, "step": 10167 }, { "epoch": 0.4666146574273783, "grad_norm": 0.4739347994327545, "learning_rate": 9.535460058449543e-06, "loss": 0.4243, "step": 10168 }, { "epoch": 0.46666054793263273, "grad_norm": 0.4462469816207886, "learning_rate": 9.53535684546311e-06, "loss": 0.4154, "step": 10169 }, { "epoch": 0.4667064384378872, "grad_norm": 0.5060902833938599, "learning_rate": 9.535253621570578e-06, "loss": 0.413, "step": 10170 }, { "epoch": 0.46675232894314167, "grad_norm": 0.46325913071632385, "learning_rate": 9.53515038677219e-06, "loss": 0.3567, "step": 10171 }, { "epoch": 0.4667982194483961, "grad_norm": 0.4650494456291199, "learning_rate": 9.535047141068199e-06, "loss": 0.3631, "step": 10172 }, { "epoch": 0.4668441099536506, "grad_norm": 0.4779733419418335, "learning_rate": 9.534943884458847e-06, "loss": 0.4563, "step": 10173 }, { "epoch": 0.46689000045890505, "grad_norm": 0.45985865592956543, "learning_rate": 9.534840616944389e-06, "loss": 0.3753, "step": 10174 }, { "epoch": 0.4669358909641595, "grad_norm": 0.4448172450065613, "learning_rate": 9.53473733852507e-06, "loss": 0.3499, "step": 10175 }, { "epoch": 0.466981781469414, "grad_norm": 0.4870678782463074, "learning_rate": 9.534634049201142e-06, "loss": 0.4437, "step": 10176 }, { "epoch": 0.46702767197466843, "grad_norm": 0.502816915512085, "learning_rate": 9.534530748972846e-06, "loss": 0.4155, "step": 10177 }, { "epoch": 0.4670735624799229, "grad_norm": 0.452913373708725, "learning_rate": 9.534427437840436e-06, "loss": 0.3465, "step": 10178 }, { "epoch": 0.4671194529851774, "grad_norm": 0.42497164011001587, "learning_rate": 9.53432411580416e-06, "loss": 0.3269, "step": 10179 }, { "epoch": 0.4671653434904318, "grad_norm": 0.49143680930137634, "learning_rate": 9.534220782864264e-06, "loss": 0.4783, "step": 10180 }, { "epoch": 0.4672112339956863, "grad_norm": 0.44599488377571106, "learning_rate": 9.534117439020999e-06, "loss": 0.3457, "step": 10181 }, { "epoch": 0.46725712450094076, "grad_norm": 0.4745642840862274, "learning_rate": 9.534014084274612e-06, "loss": 0.4089, "step": 10182 }, { "epoch": 0.4673030150061952, "grad_norm": 0.5003796219825745, "learning_rate": 9.533910718625354e-06, "loss": 0.4759, "step": 10183 }, { "epoch": 0.4673489055114497, "grad_norm": 0.43620696663856506, "learning_rate": 9.533807342073469e-06, "loss": 0.3626, "step": 10184 }, { "epoch": 0.46739479601670414, "grad_norm": 0.4845469295978546, "learning_rate": 9.53370395461921e-06, "loss": 0.4311, "step": 10185 }, { "epoch": 0.4674406865219586, "grad_norm": 0.47875112295150757, "learning_rate": 9.533600556262823e-06, "loss": 0.4171, "step": 10186 }, { "epoch": 0.4674865770272131, "grad_norm": 0.4743548035621643, "learning_rate": 9.533497147004558e-06, "loss": 0.4694, "step": 10187 }, { "epoch": 0.4675324675324675, "grad_norm": 0.4402513802051544, "learning_rate": 9.533393726844662e-06, "loss": 0.3554, "step": 10188 }, { "epoch": 0.467578358037722, "grad_norm": 0.43777450919151306, "learning_rate": 9.533290295783387e-06, "loss": 0.3824, "step": 10189 }, { "epoch": 0.46762424854297646, "grad_norm": 0.4674980044364929, "learning_rate": 9.533186853820978e-06, "loss": 0.4826, "step": 10190 }, { "epoch": 0.4676701390482309, "grad_norm": 0.4540131986141205, "learning_rate": 9.533083400957686e-06, "loss": 0.3677, "step": 10191 }, { "epoch": 0.4677160295534854, "grad_norm": 0.43238818645477295, "learning_rate": 9.532979937193758e-06, "loss": 0.3242, "step": 10192 }, { "epoch": 0.46776192005873984, "grad_norm": 0.49364128708839417, "learning_rate": 9.532876462529445e-06, "loss": 0.4689, "step": 10193 }, { "epoch": 0.4678078105639943, "grad_norm": 0.4342409372329712, "learning_rate": 9.532772976964994e-06, "loss": 0.2977, "step": 10194 }, { "epoch": 0.4678537010692488, "grad_norm": 0.427198201417923, "learning_rate": 9.532669480500655e-06, "loss": 0.3365, "step": 10195 }, { "epoch": 0.4678995915745032, "grad_norm": 0.4457751512527466, "learning_rate": 9.532565973136678e-06, "loss": 0.3594, "step": 10196 }, { "epoch": 0.4679454820797577, "grad_norm": 0.5074992179870605, "learning_rate": 9.532462454873308e-06, "loss": 0.415, "step": 10197 }, { "epoch": 0.46799137258501217, "grad_norm": 0.43412360548973083, "learning_rate": 9.532358925710799e-06, "loss": 0.3679, "step": 10198 }, { "epoch": 0.4680372630902666, "grad_norm": 0.4655688405036926, "learning_rate": 9.532255385649394e-06, "loss": 0.4313, "step": 10199 }, { "epoch": 0.4680831535955211, "grad_norm": 0.46152544021606445, "learning_rate": 9.532151834689347e-06, "loss": 0.4158, "step": 10200 }, { "epoch": 0.46812904410077555, "grad_norm": 0.4291996657848358, "learning_rate": 9.532048272830906e-06, "loss": 0.3904, "step": 10201 }, { "epoch": 0.46817493460603, "grad_norm": 0.44603246450424194, "learning_rate": 9.531944700074316e-06, "loss": 0.3269, "step": 10202 }, { "epoch": 0.4682208251112845, "grad_norm": 0.48220697045326233, "learning_rate": 9.531841116419831e-06, "loss": 0.4626, "step": 10203 }, { "epoch": 0.46826671561653893, "grad_norm": 0.46037623286247253, "learning_rate": 9.5317375218677e-06, "loss": 0.4074, "step": 10204 }, { "epoch": 0.4683126061217934, "grad_norm": 0.47353342175483704, "learning_rate": 9.531633916418168e-06, "loss": 0.4856, "step": 10205 }, { "epoch": 0.46835849662704787, "grad_norm": 0.4755588173866272, "learning_rate": 9.53153030007149e-06, "loss": 0.4093, "step": 10206 }, { "epoch": 0.4684043871323023, "grad_norm": 0.4176962077617645, "learning_rate": 9.531426672827909e-06, "loss": 0.2916, "step": 10207 }, { "epoch": 0.4684502776375568, "grad_norm": 0.4827055335044861, "learning_rate": 9.531323034687679e-06, "loss": 0.4857, "step": 10208 }, { "epoch": 0.46849616814281125, "grad_norm": 0.43406835198402405, "learning_rate": 9.531219385651045e-06, "loss": 0.3524, "step": 10209 }, { "epoch": 0.4685420586480657, "grad_norm": 0.4584280252456665, "learning_rate": 9.531115725718261e-06, "loss": 0.4167, "step": 10210 }, { "epoch": 0.4685879491533202, "grad_norm": 0.49388375878334045, "learning_rate": 9.531012054889572e-06, "loss": 0.4283, "step": 10211 }, { "epoch": 0.46863383965857464, "grad_norm": 0.4825495183467865, "learning_rate": 9.53090837316523e-06, "loss": 0.3636, "step": 10212 }, { "epoch": 0.4686797301638291, "grad_norm": 0.46548572182655334, "learning_rate": 9.530804680545482e-06, "loss": 0.364, "step": 10213 }, { "epoch": 0.4687256206690836, "grad_norm": 0.4603481590747833, "learning_rate": 9.530700977030578e-06, "loss": 0.4266, "step": 10214 }, { "epoch": 0.468771511174338, "grad_norm": 0.4530159831047058, "learning_rate": 9.53059726262077e-06, "loss": 0.3879, "step": 10215 }, { "epoch": 0.4688174016795925, "grad_norm": 0.4745256304740906, "learning_rate": 9.530493537316306e-06, "loss": 0.4746, "step": 10216 }, { "epoch": 0.46886329218484696, "grad_norm": 0.5069655776023865, "learning_rate": 9.530389801117432e-06, "loss": 0.4944, "step": 10217 }, { "epoch": 0.4689091826901014, "grad_norm": 0.4569256007671356, "learning_rate": 9.530286054024403e-06, "loss": 0.3674, "step": 10218 }, { "epoch": 0.4689550731953559, "grad_norm": 0.46480974555015564, "learning_rate": 9.530182296037464e-06, "loss": 0.4174, "step": 10219 }, { "epoch": 0.46900096370061034, "grad_norm": 0.5011894702911377, "learning_rate": 9.530078527156869e-06, "loss": 0.4719, "step": 10220 }, { "epoch": 0.4690468542058648, "grad_norm": 0.4615441858768463, "learning_rate": 9.529974747382862e-06, "loss": 0.4254, "step": 10221 }, { "epoch": 0.4690927447111193, "grad_norm": 0.5117265582084656, "learning_rate": 9.529870956715697e-06, "loss": 0.4158, "step": 10222 }, { "epoch": 0.4691386352163737, "grad_norm": 0.48618847131729126, "learning_rate": 9.529767155155622e-06, "loss": 0.4272, "step": 10223 }, { "epoch": 0.4691845257216282, "grad_norm": 0.40197300910949707, "learning_rate": 9.529663342702886e-06, "loss": 0.3239, "step": 10224 }, { "epoch": 0.46923041622688266, "grad_norm": 0.43460094928741455, "learning_rate": 9.52955951935774e-06, "loss": 0.4016, "step": 10225 }, { "epoch": 0.4692763067321371, "grad_norm": 0.46816888451576233, "learning_rate": 9.529455685120432e-06, "loss": 0.4068, "step": 10226 }, { "epoch": 0.4693221972373916, "grad_norm": 0.4750359058380127, "learning_rate": 9.529351839991215e-06, "loss": 0.4378, "step": 10227 }, { "epoch": 0.46936808774264605, "grad_norm": 0.45918720960617065, "learning_rate": 9.529247983970333e-06, "loss": 0.3774, "step": 10228 }, { "epoch": 0.4694139782479005, "grad_norm": 0.5184150338172913, "learning_rate": 9.529144117058041e-06, "loss": 0.3301, "step": 10229 }, { "epoch": 0.469459868753155, "grad_norm": 0.478085458278656, "learning_rate": 9.529040239254587e-06, "loss": 0.3812, "step": 10230 }, { "epoch": 0.4695057592584094, "grad_norm": 0.4576238691806793, "learning_rate": 9.52893635056022e-06, "loss": 0.3832, "step": 10231 }, { "epoch": 0.4695516497636639, "grad_norm": 0.44175854325294495, "learning_rate": 9.52883245097519e-06, "loss": 0.373, "step": 10232 }, { "epoch": 0.46959754026891837, "grad_norm": 0.4565293490886688, "learning_rate": 9.528728540499747e-06, "loss": 0.4443, "step": 10233 }, { "epoch": 0.4696434307741728, "grad_norm": 0.4721430242061615, "learning_rate": 9.528624619134143e-06, "loss": 0.427, "step": 10234 }, { "epoch": 0.4696893212794273, "grad_norm": 0.4381866455078125, "learning_rate": 9.528520686878624e-06, "loss": 0.3493, "step": 10235 }, { "epoch": 0.46973521178468175, "grad_norm": 0.5155766010284424, "learning_rate": 9.528416743733445e-06, "loss": 0.4975, "step": 10236 }, { "epoch": 0.4697811022899362, "grad_norm": 0.44764846563339233, "learning_rate": 9.52831278969885e-06, "loss": 0.3981, "step": 10237 }, { "epoch": 0.4698269927951907, "grad_norm": 0.4705737233161926, "learning_rate": 9.528208824775093e-06, "loss": 0.4123, "step": 10238 }, { "epoch": 0.46987288330044513, "grad_norm": 0.5041414499282837, "learning_rate": 9.528104848962423e-06, "loss": 0.4097, "step": 10239 }, { "epoch": 0.4699187738056996, "grad_norm": 0.4294908046722412, "learning_rate": 9.52800086226109e-06, "loss": 0.3758, "step": 10240 }, { "epoch": 0.46996466431095407, "grad_norm": 0.46984410285949707, "learning_rate": 9.527896864671343e-06, "loss": 0.4259, "step": 10241 }, { "epoch": 0.4700105548162085, "grad_norm": 0.4776791036128998, "learning_rate": 9.527792856193434e-06, "loss": 0.4033, "step": 10242 }, { "epoch": 0.470056445321463, "grad_norm": 0.4898414611816406, "learning_rate": 9.52768883682761e-06, "loss": 0.4652, "step": 10243 }, { "epoch": 0.47010233582671745, "grad_norm": 0.46027126908302307, "learning_rate": 9.527584806574127e-06, "loss": 0.3806, "step": 10244 }, { "epoch": 0.4701482263319719, "grad_norm": 0.4511317014694214, "learning_rate": 9.52748076543323e-06, "loss": 0.4283, "step": 10245 }, { "epoch": 0.4701941168372264, "grad_norm": 0.4421726167201996, "learning_rate": 9.527376713405169e-06, "loss": 0.3843, "step": 10246 }, { "epoch": 0.47024000734248084, "grad_norm": 0.4418502748012543, "learning_rate": 9.527272650490198e-06, "loss": 0.3694, "step": 10247 }, { "epoch": 0.4702858978477353, "grad_norm": 0.4714941084384918, "learning_rate": 9.527168576688565e-06, "loss": 0.3938, "step": 10248 }, { "epoch": 0.4703317883529898, "grad_norm": 0.49469122290611267, "learning_rate": 9.52706449200052e-06, "loss": 0.4831, "step": 10249 }, { "epoch": 0.4703776788582442, "grad_norm": 0.4973783493041992, "learning_rate": 9.526960396426313e-06, "loss": 0.4478, "step": 10250 }, { "epoch": 0.4704235693634987, "grad_norm": 0.48669660091400146, "learning_rate": 9.526856289966196e-06, "loss": 0.4281, "step": 10251 }, { "epoch": 0.47046945986875316, "grad_norm": 0.46653491258621216, "learning_rate": 9.526752172620417e-06, "loss": 0.446, "step": 10252 }, { "epoch": 0.4705153503740076, "grad_norm": 0.48437002301216125, "learning_rate": 9.52664804438923e-06, "loss": 0.4706, "step": 10253 }, { "epoch": 0.4705612408792621, "grad_norm": 0.4409119188785553, "learning_rate": 9.526543905272882e-06, "loss": 0.3761, "step": 10254 }, { "epoch": 0.47060713138451654, "grad_norm": 0.46961262822151184, "learning_rate": 9.526439755271624e-06, "loss": 0.4061, "step": 10255 }, { "epoch": 0.470653021889771, "grad_norm": 0.4741775095462799, "learning_rate": 9.526335594385707e-06, "loss": 0.469, "step": 10256 }, { "epoch": 0.4706989123950255, "grad_norm": 0.4921030104160309, "learning_rate": 9.526231422615382e-06, "loss": 0.426, "step": 10257 }, { "epoch": 0.4707448029002799, "grad_norm": 0.44580015540122986, "learning_rate": 9.526127239960899e-06, "loss": 0.4274, "step": 10258 }, { "epoch": 0.4707906934055344, "grad_norm": 0.4783933758735657, "learning_rate": 9.526023046422508e-06, "loss": 0.4075, "step": 10259 }, { "epoch": 0.47083658391078886, "grad_norm": 0.4656791090965271, "learning_rate": 9.52591884200046e-06, "loss": 0.4411, "step": 10260 }, { "epoch": 0.4708824744160433, "grad_norm": 0.4863322377204895, "learning_rate": 9.525814626695005e-06, "loss": 0.4826, "step": 10261 }, { "epoch": 0.4709283649212978, "grad_norm": 0.45109036564826965, "learning_rate": 9.525710400506395e-06, "loss": 0.421, "step": 10262 }, { "epoch": 0.47097425542655225, "grad_norm": 0.4539772868156433, "learning_rate": 9.525606163434879e-06, "loss": 0.3467, "step": 10263 }, { "epoch": 0.4710201459318067, "grad_norm": 0.4710644781589508, "learning_rate": 9.52550191548071e-06, "loss": 0.4561, "step": 10264 }, { "epoch": 0.4710660364370612, "grad_norm": 0.5383159518241882, "learning_rate": 9.525397656644137e-06, "loss": 0.5325, "step": 10265 }, { "epoch": 0.47111192694231563, "grad_norm": 0.5490121245384216, "learning_rate": 9.52529338692541e-06, "loss": 0.5469, "step": 10266 }, { "epoch": 0.47115781744757007, "grad_norm": 0.4448584020137787, "learning_rate": 9.525189106324781e-06, "loss": 0.402, "step": 10267 }, { "epoch": 0.47120370795282457, "grad_norm": 0.5430421829223633, "learning_rate": 9.5250848148425e-06, "loss": 0.4333, "step": 10268 }, { "epoch": 0.471249598458079, "grad_norm": 0.47605228424072266, "learning_rate": 9.524980512478819e-06, "loss": 0.4455, "step": 10269 }, { "epoch": 0.4712954889633335, "grad_norm": 0.4852641224861145, "learning_rate": 9.524876199233988e-06, "loss": 0.4392, "step": 10270 }, { "epoch": 0.47134137946858795, "grad_norm": 0.5136424899101257, "learning_rate": 9.524771875108255e-06, "loss": 0.4962, "step": 10271 }, { "epoch": 0.4713872699738424, "grad_norm": 0.4403326213359833, "learning_rate": 9.524667540101877e-06, "loss": 0.3098, "step": 10272 }, { "epoch": 0.4714331604790969, "grad_norm": 0.4392525255680084, "learning_rate": 9.524563194215101e-06, "loss": 0.3844, "step": 10273 }, { "epoch": 0.47147905098435133, "grad_norm": 0.4410269260406494, "learning_rate": 9.524458837448178e-06, "loss": 0.3493, "step": 10274 }, { "epoch": 0.4715249414896058, "grad_norm": 0.40997621417045593, "learning_rate": 9.52435446980136e-06, "loss": 0.3285, "step": 10275 }, { "epoch": 0.4715708319948603, "grad_norm": 0.4391801059246063, "learning_rate": 9.524250091274898e-06, "loss": 0.3405, "step": 10276 }, { "epoch": 0.4716167225001147, "grad_norm": 0.46179893612861633, "learning_rate": 9.524145701869041e-06, "loss": 0.391, "step": 10277 }, { "epoch": 0.4716626130053692, "grad_norm": 0.4987298250198364, "learning_rate": 9.524041301584042e-06, "loss": 0.4279, "step": 10278 }, { "epoch": 0.47170850351062366, "grad_norm": 0.42264726758003235, "learning_rate": 9.52393689042015e-06, "loss": 0.3186, "step": 10279 }, { "epoch": 0.4717543940158781, "grad_norm": 0.4643816351890564, "learning_rate": 9.523832468377621e-06, "loss": 0.4201, "step": 10280 }, { "epoch": 0.4718002845211326, "grad_norm": 0.4555171728134155, "learning_rate": 9.5237280354567e-06, "loss": 0.367, "step": 10281 }, { "epoch": 0.47184617502638704, "grad_norm": 0.4316999018192291, "learning_rate": 9.523623591657643e-06, "loss": 0.3305, "step": 10282 }, { "epoch": 0.4718920655316415, "grad_norm": 0.5543466210365295, "learning_rate": 9.523519136980696e-06, "loss": 0.6002, "step": 10283 }, { "epoch": 0.471937956036896, "grad_norm": 0.46315932273864746, "learning_rate": 9.523414671426115e-06, "loss": 0.3714, "step": 10284 }, { "epoch": 0.4719838465421504, "grad_norm": 0.45800137519836426, "learning_rate": 9.523310194994148e-06, "loss": 0.3558, "step": 10285 }, { "epoch": 0.4720297370474049, "grad_norm": 0.43640342354774475, "learning_rate": 9.523205707685049e-06, "loss": 0.3481, "step": 10286 }, { "epoch": 0.47207562755265936, "grad_norm": 0.5094229578971863, "learning_rate": 9.523101209499068e-06, "loss": 0.4948, "step": 10287 }, { "epoch": 0.4721215180579138, "grad_norm": 0.44438436627388, "learning_rate": 9.522996700436456e-06, "loss": 0.3976, "step": 10288 }, { "epoch": 0.4721674085631683, "grad_norm": 0.4255135655403137, "learning_rate": 9.522892180497462e-06, "loss": 0.3419, "step": 10289 }, { "epoch": 0.47221329906842274, "grad_norm": 0.44764408469200134, "learning_rate": 9.522787649682342e-06, "loss": 0.322, "step": 10290 }, { "epoch": 0.4722591895736772, "grad_norm": 0.43707945942878723, "learning_rate": 9.522683107991345e-06, "loss": 0.3792, "step": 10291 }, { "epoch": 0.4723050800789317, "grad_norm": 0.458928644657135, "learning_rate": 9.522578555424722e-06, "loss": 0.4478, "step": 10292 }, { "epoch": 0.4723509705841861, "grad_norm": 0.4472564160823822, "learning_rate": 9.522473991982726e-06, "loss": 0.4004, "step": 10293 }, { "epoch": 0.47239686108944057, "grad_norm": 0.5485993027687073, "learning_rate": 9.522369417665606e-06, "loss": 0.4856, "step": 10294 }, { "epoch": 0.47244275159469507, "grad_norm": 0.4359759986400604, "learning_rate": 9.522264832473615e-06, "loss": 0.3413, "step": 10295 }, { "epoch": 0.4724886420999495, "grad_norm": 0.4886433184146881, "learning_rate": 9.522160236407005e-06, "loss": 0.4406, "step": 10296 }, { "epoch": 0.472534532605204, "grad_norm": 0.4512285590171814, "learning_rate": 9.522055629466028e-06, "loss": 0.3789, "step": 10297 }, { "epoch": 0.47258042311045845, "grad_norm": 0.5037420988082886, "learning_rate": 9.521951011650932e-06, "loss": 0.499, "step": 10298 }, { "epoch": 0.4726263136157129, "grad_norm": 0.5104064345359802, "learning_rate": 9.521846382961973e-06, "loss": 0.3686, "step": 10299 }, { "epoch": 0.4726722041209674, "grad_norm": 0.44444432854652405, "learning_rate": 9.521741743399399e-06, "loss": 0.3436, "step": 10300 }, { "epoch": 0.47271809462622183, "grad_norm": 0.4367820918560028, "learning_rate": 9.521637092963465e-06, "loss": 0.3725, "step": 10301 }, { "epoch": 0.47276398513147627, "grad_norm": 0.47296854853630066, "learning_rate": 9.521532431654418e-06, "loss": 0.4431, "step": 10302 }, { "epoch": 0.47280987563673077, "grad_norm": 0.43497464060783386, "learning_rate": 9.521427759472516e-06, "loss": 0.3652, "step": 10303 }, { "epoch": 0.4728557661419852, "grad_norm": 0.4277838468551636, "learning_rate": 9.521323076418006e-06, "loss": 0.3574, "step": 10304 }, { "epoch": 0.4729016566472397, "grad_norm": 0.45304176211357117, "learning_rate": 9.52121838249114e-06, "loss": 0.3415, "step": 10305 }, { "epoch": 0.47294754715249415, "grad_norm": 0.5273206233978271, "learning_rate": 9.52111367769217e-06, "loss": 0.4002, "step": 10306 }, { "epoch": 0.4729934376577486, "grad_norm": 0.4759596288204193, "learning_rate": 9.52100896202135e-06, "loss": 0.4662, "step": 10307 }, { "epoch": 0.4730393281630031, "grad_norm": 0.44349730014801025, "learning_rate": 9.520904235478932e-06, "loss": 0.329, "step": 10308 }, { "epoch": 0.47308521866825753, "grad_norm": 0.4337792694568634, "learning_rate": 9.520799498065162e-06, "loss": 0.374, "step": 10309 }, { "epoch": 0.473131109173512, "grad_norm": 0.43011367321014404, "learning_rate": 9.520694749780299e-06, "loss": 0.3557, "step": 10310 }, { "epoch": 0.4731769996787665, "grad_norm": 0.4629124104976654, "learning_rate": 9.520589990624591e-06, "loss": 0.442, "step": 10311 }, { "epoch": 0.4732228901840209, "grad_norm": 0.42186805605888367, "learning_rate": 9.520485220598291e-06, "loss": 0.3164, "step": 10312 }, { "epoch": 0.4732687806892754, "grad_norm": 0.4764348268508911, "learning_rate": 9.52038043970165e-06, "loss": 0.4198, "step": 10313 }, { "epoch": 0.47331467119452986, "grad_norm": 0.47077757120132446, "learning_rate": 9.520275647934922e-06, "loss": 0.4076, "step": 10314 }, { "epoch": 0.4733605616997843, "grad_norm": 0.443193256855011, "learning_rate": 9.520170845298358e-06, "loss": 0.3577, "step": 10315 }, { "epoch": 0.4734064522050388, "grad_norm": 0.4883959889411926, "learning_rate": 9.520066031792209e-06, "loss": 0.4595, "step": 10316 }, { "epoch": 0.47345234271029324, "grad_norm": 0.45426440238952637, "learning_rate": 9.519961207416727e-06, "loss": 0.3288, "step": 10317 }, { "epoch": 0.4734982332155477, "grad_norm": 0.44965294003486633, "learning_rate": 9.519856372172165e-06, "loss": 0.4036, "step": 10318 }, { "epoch": 0.4735441237208022, "grad_norm": 0.45954135060310364, "learning_rate": 9.519751526058776e-06, "loss": 0.4165, "step": 10319 }, { "epoch": 0.4735900142260566, "grad_norm": 0.45515939593315125, "learning_rate": 9.51964666907681e-06, "loss": 0.3682, "step": 10320 }, { "epoch": 0.4736359047313111, "grad_norm": 0.5606639385223389, "learning_rate": 9.519541801226522e-06, "loss": 0.5414, "step": 10321 }, { "epoch": 0.47368179523656556, "grad_norm": 0.5108808279037476, "learning_rate": 9.519436922508161e-06, "loss": 0.4288, "step": 10322 }, { "epoch": 0.47372768574182, "grad_norm": 0.4635060131549835, "learning_rate": 9.51933203292198e-06, "loss": 0.3776, "step": 10323 }, { "epoch": 0.4737735762470745, "grad_norm": 0.43096524477005005, "learning_rate": 9.519227132468232e-06, "loss": 0.336, "step": 10324 }, { "epoch": 0.47381946675232894, "grad_norm": 0.42057979106903076, "learning_rate": 9.51912222114717e-06, "loss": 0.3425, "step": 10325 }, { "epoch": 0.4738653572575834, "grad_norm": 0.45328864455223083, "learning_rate": 9.519017298959045e-06, "loss": 0.3719, "step": 10326 }, { "epoch": 0.4739112477628379, "grad_norm": 0.4315711259841919, "learning_rate": 9.518912365904109e-06, "loss": 0.3673, "step": 10327 }, { "epoch": 0.4739571382680923, "grad_norm": 0.4419160783290863, "learning_rate": 9.518807421982616e-06, "loss": 0.3609, "step": 10328 }, { "epoch": 0.47400302877334677, "grad_norm": 0.44125065207481384, "learning_rate": 9.518702467194816e-06, "loss": 0.4238, "step": 10329 }, { "epoch": 0.47404891927860127, "grad_norm": 0.445504367351532, "learning_rate": 9.518597501540965e-06, "loss": 0.3638, "step": 10330 }, { "epoch": 0.4740948097838557, "grad_norm": 0.46772706508636475, "learning_rate": 9.51849252502131e-06, "loss": 0.4668, "step": 10331 }, { "epoch": 0.4741407002891102, "grad_norm": 0.48574432730674744, "learning_rate": 9.51838753763611e-06, "loss": 0.4196, "step": 10332 }, { "epoch": 0.47418659079436465, "grad_norm": 0.44626715779304504, "learning_rate": 9.518282539385611e-06, "loss": 0.3901, "step": 10333 }, { "epoch": 0.4742324812996191, "grad_norm": 0.42279523611068726, "learning_rate": 9.518177530270069e-06, "loss": 0.3583, "step": 10334 }, { "epoch": 0.4742783718048736, "grad_norm": 0.45353010296821594, "learning_rate": 9.518072510289737e-06, "loss": 0.4176, "step": 10335 }, { "epoch": 0.47432426231012803, "grad_norm": 0.43583959341049194, "learning_rate": 9.517967479444867e-06, "loss": 0.3751, "step": 10336 }, { "epoch": 0.4743701528153825, "grad_norm": 0.4660477340221405, "learning_rate": 9.517862437735709e-06, "loss": 0.4001, "step": 10337 }, { "epoch": 0.47441604332063697, "grad_norm": 0.4626091420650482, "learning_rate": 9.51775738516252e-06, "loss": 0.3803, "step": 10338 }, { "epoch": 0.4744619338258914, "grad_norm": 0.44223880767822266, "learning_rate": 9.51765232172555e-06, "loss": 0.3913, "step": 10339 }, { "epoch": 0.4745078243311459, "grad_norm": 0.44942721724510193, "learning_rate": 9.517547247425051e-06, "loss": 0.4065, "step": 10340 }, { "epoch": 0.47455371483640035, "grad_norm": 0.4664980471134186, "learning_rate": 9.517442162261277e-06, "loss": 0.4304, "step": 10341 }, { "epoch": 0.4745996053416548, "grad_norm": 0.433765172958374, "learning_rate": 9.51733706623448e-06, "loss": 0.3561, "step": 10342 }, { "epoch": 0.4746454958469093, "grad_norm": 0.49178147315979004, "learning_rate": 9.517231959344914e-06, "loss": 0.4427, "step": 10343 }, { "epoch": 0.47469138635216374, "grad_norm": 0.43830791115760803, "learning_rate": 9.51712684159283e-06, "loss": 0.3729, "step": 10344 }, { "epoch": 0.4747372768574182, "grad_norm": 0.4705291986465454, "learning_rate": 9.517021712978482e-06, "loss": 0.4293, "step": 10345 }, { "epoch": 0.4747831673626727, "grad_norm": 0.4658603370189667, "learning_rate": 9.516916573502121e-06, "loss": 0.4178, "step": 10346 }, { "epoch": 0.4748290578679271, "grad_norm": 0.49700331687927246, "learning_rate": 9.516811423164003e-06, "loss": 0.4768, "step": 10347 }, { "epoch": 0.4748749483731816, "grad_norm": 0.45584195852279663, "learning_rate": 9.51670626196438e-06, "loss": 0.3917, "step": 10348 }, { "epoch": 0.47492083887843606, "grad_norm": 0.5152073502540588, "learning_rate": 9.516601089903502e-06, "loss": 0.5275, "step": 10349 }, { "epoch": 0.4749667293836905, "grad_norm": 0.41748711466789246, "learning_rate": 9.516495906981624e-06, "loss": 0.3217, "step": 10350 }, { "epoch": 0.475012619888945, "grad_norm": 0.4511125981807709, "learning_rate": 9.516390713198999e-06, "loss": 0.4312, "step": 10351 }, { "epoch": 0.47505851039419944, "grad_norm": 0.4734383523464203, "learning_rate": 9.51628550855588e-06, "loss": 0.3898, "step": 10352 }, { "epoch": 0.4751044008994539, "grad_norm": 0.49554720520973206, "learning_rate": 9.51618029305252e-06, "loss": 0.4827, "step": 10353 }, { "epoch": 0.4751502914047084, "grad_norm": 0.43140262365341187, "learning_rate": 9.516075066689169e-06, "loss": 0.3552, "step": 10354 }, { "epoch": 0.4751961819099628, "grad_norm": 0.4343739151954651, "learning_rate": 9.515969829466087e-06, "loss": 0.3541, "step": 10355 }, { "epoch": 0.47524207241521726, "grad_norm": 0.49409523606300354, "learning_rate": 9.515864581383521e-06, "loss": 0.447, "step": 10356 }, { "epoch": 0.47528796292047176, "grad_norm": 0.4462185502052307, "learning_rate": 9.515759322441727e-06, "loss": 0.3996, "step": 10357 }, { "epoch": 0.4753338534257262, "grad_norm": 0.450898140668869, "learning_rate": 9.515654052640955e-06, "loss": 0.4085, "step": 10358 }, { "epoch": 0.4753797439309807, "grad_norm": 0.5207261443138123, "learning_rate": 9.515548771981461e-06, "loss": 0.4161, "step": 10359 }, { "epoch": 0.47542563443623514, "grad_norm": 0.4385071396827698, "learning_rate": 9.515443480463498e-06, "loss": 0.3645, "step": 10360 }, { "epoch": 0.4754715249414896, "grad_norm": 0.5149874687194824, "learning_rate": 9.515338178087316e-06, "loss": 0.5163, "step": 10361 }, { "epoch": 0.4755174154467441, "grad_norm": 0.4715368151664734, "learning_rate": 9.515232864853175e-06, "loss": 0.4108, "step": 10362 }, { "epoch": 0.4755633059519985, "grad_norm": 0.4490543603897095, "learning_rate": 9.515127540761321e-06, "loss": 0.3611, "step": 10363 }, { "epoch": 0.47560919645725297, "grad_norm": 0.45235684514045715, "learning_rate": 9.515022205812011e-06, "loss": 0.4326, "step": 10364 }, { "epoch": 0.47565508696250747, "grad_norm": 0.45228105783462524, "learning_rate": 9.514916860005497e-06, "loss": 0.35, "step": 10365 }, { "epoch": 0.4757009774677619, "grad_norm": 0.4305766522884369, "learning_rate": 9.514811503342033e-06, "loss": 0.3919, "step": 10366 }, { "epoch": 0.4757468679730164, "grad_norm": 0.4573293924331665, "learning_rate": 9.514706135821874e-06, "loss": 0.3812, "step": 10367 }, { "epoch": 0.47579275847827085, "grad_norm": 0.45627254247665405, "learning_rate": 9.51460075744527e-06, "loss": 0.3904, "step": 10368 }, { "epoch": 0.4758386489835253, "grad_norm": 0.4555344879627228, "learning_rate": 9.514495368212478e-06, "loss": 0.4001, "step": 10369 }, { "epoch": 0.4758845394887798, "grad_norm": 0.48171466588974, "learning_rate": 9.514389968123747e-06, "loss": 0.4767, "step": 10370 }, { "epoch": 0.47593042999403423, "grad_norm": 0.47452256083488464, "learning_rate": 9.514284557179334e-06, "loss": 0.4533, "step": 10371 }, { "epoch": 0.4759763204992887, "grad_norm": 0.4739775061607361, "learning_rate": 9.51417913537949e-06, "loss": 0.4212, "step": 10372 }, { "epoch": 0.47602221100454317, "grad_norm": 0.4628604054450989, "learning_rate": 9.514073702724473e-06, "loss": 0.417, "step": 10373 }, { "epoch": 0.4760681015097976, "grad_norm": 0.4621262550354004, "learning_rate": 9.513968259214531e-06, "loss": 0.3887, "step": 10374 }, { "epoch": 0.4761139920150521, "grad_norm": 0.45815667510032654, "learning_rate": 9.51386280484992e-06, "loss": 0.4496, "step": 10375 }, { "epoch": 0.47615988252030655, "grad_norm": 0.47607067227363586, "learning_rate": 9.513757339630895e-06, "loss": 0.4337, "step": 10376 }, { "epoch": 0.476205773025561, "grad_norm": 0.4208063781261444, "learning_rate": 9.513651863557707e-06, "loss": 0.324, "step": 10377 }, { "epoch": 0.4762516635308155, "grad_norm": 0.41242238879203796, "learning_rate": 9.513546376630612e-06, "loss": 0.3007, "step": 10378 }, { "epoch": 0.47629755403606994, "grad_norm": 0.46094265580177307, "learning_rate": 9.51344087884986e-06, "loss": 0.3832, "step": 10379 }, { "epoch": 0.4763434445413244, "grad_norm": 0.4570407569408417, "learning_rate": 9.513335370215709e-06, "loss": 0.4495, "step": 10380 }, { "epoch": 0.4763893350465789, "grad_norm": 0.451535165309906, "learning_rate": 9.513229850728411e-06, "loss": 0.4328, "step": 10381 }, { "epoch": 0.4764352255518333, "grad_norm": 0.4502072334289551, "learning_rate": 9.513124320388218e-06, "loss": 0.4257, "step": 10382 }, { "epoch": 0.47648111605708776, "grad_norm": 0.49615052342414856, "learning_rate": 9.513018779195387e-06, "loss": 0.4467, "step": 10383 }, { "epoch": 0.47652700656234226, "grad_norm": 0.43216705322265625, "learning_rate": 9.512913227150171e-06, "loss": 0.3653, "step": 10384 }, { "epoch": 0.4765728970675967, "grad_norm": 0.4520290493965149, "learning_rate": 9.512807664252821e-06, "loss": 0.363, "step": 10385 }, { "epoch": 0.4766187875728512, "grad_norm": 0.4575624465942383, "learning_rate": 9.512702090503593e-06, "loss": 0.4093, "step": 10386 }, { "epoch": 0.47666467807810564, "grad_norm": 0.4549427032470703, "learning_rate": 9.512596505902742e-06, "loss": 0.4027, "step": 10387 }, { "epoch": 0.4767105685833601, "grad_norm": 0.44714000821113586, "learning_rate": 9.512490910450521e-06, "loss": 0.3318, "step": 10388 }, { "epoch": 0.4767564590886146, "grad_norm": 0.474368691444397, "learning_rate": 9.512385304147183e-06, "loss": 0.4436, "step": 10389 }, { "epoch": 0.476802349593869, "grad_norm": 0.4662497639656067, "learning_rate": 9.512279686992982e-06, "loss": 0.3604, "step": 10390 }, { "epoch": 0.47684824009912347, "grad_norm": 0.4281292259693146, "learning_rate": 9.512174058988171e-06, "loss": 0.304, "step": 10391 }, { "epoch": 0.47689413060437796, "grad_norm": 0.4604879915714264, "learning_rate": 9.512068420133006e-06, "loss": 0.3846, "step": 10392 }, { "epoch": 0.4769400211096324, "grad_norm": 0.49535027146339417, "learning_rate": 9.511962770427742e-06, "loss": 0.3673, "step": 10393 }, { "epoch": 0.4769859116148869, "grad_norm": 0.5369259715080261, "learning_rate": 9.511857109872633e-06, "loss": 0.4326, "step": 10394 }, { "epoch": 0.47703180212014135, "grad_norm": 0.4738069474697113, "learning_rate": 9.511751438467928e-06, "loss": 0.3636, "step": 10395 }, { "epoch": 0.4770776926253958, "grad_norm": 0.46687376499176025, "learning_rate": 9.511645756213887e-06, "loss": 0.4106, "step": 10396 }, { "epoch": 0.4771235831306503, "grad_norm": 0.46414706110954285, "learning_rate": 9.511540063110763e-06, "loss": 0.4249, "step": 10397 }, { "epoch": 0.47716947363590473, "grad_norm": 0.453397661447525, "learning_rate": 9.511434359158807e-06, "loss": 0.3995, "step": 10398 }, { "epoch": 0.47721536414115917, "grad_norm": 0.44518303871154785, "learning_rate": 9.511328644358275e-06, "loss": 0.3732, "step": 10399 }, { "epoch": 0.47726125464641367, "grad_norm": 0.46980151534080505, "learning_rate": 9.511222918709422e-06, "loss": 0.4471, "step": 10400 }, { "epoch": 0.4773071451516681, "grad_norm": 0.46569621562957764, "learning_rate": 9.511117182212502e-06, "loss": 0.3176, "step": 10401 }, { "epoch": 0.4773530356569226, "grad_norm": 0.4824908673763275, "learning_rate": 9.511011434867768e-06, "loss": 0.4412, "step": 10402 }, { "epoch": 0.47739892616217705, "grad_norm": 0.44572189450263977, "learning_rate": 9.510905676675476e-06, "loss": 0.3869, "step": 10403 }, { "epoch": 0.4774448166674315, "grad_norm": 0.45096150040626526, "learning_rate": 9.510799907635879e-06, "loss": 0.3817, "step": 10404 }, { "epoch": 0.477490707172686, "grad_norm": 0.46703019738197327, "learning_rate": 9.510694127749232e-06, "loss": 0.4425, "step": 10405 }, { "epoch": 0.47753659767794043, "grad_norm": 0.47779399156570435, "learning_rate": 9.510588337015789e-06, "loss": 0.4049, "step": 10406 }, { "epoch": 0.4775824881831949, "grad_norm": 0.49011799693107605, "learning_rate": 9.510482535435805e-06, "loss": 0.439, "step": 10407 }, { "epoch": 0.4776283786884494, "grad_norm": 0.4545629620552063, "learning_rate": 9.510376723009535e-06, "loss": 0.3812, "step": 10408 }, { "epoch": 0.4776742691937038, "grad_norm": 0.41130968928337097, "learning_rate": 9.51027089973723e-06, "loss": 0.3595, "step": 10409 }, { "epoch": 0.4777201596989583, "grad_norm": 0.37521857023239136, "learning_rate": 9.51016506561915e-06, "loss": 0.2613, "step": 10410 }, { "epoch": 0.47776605020421276, "grad_norm": 0.4440804719924927, "learning_rate": 9.510059220655546e-06, "loss": 0.3196, "step": 10411 }, { "epoch": 0.4778119407094672, "grad_norm": 0.4269392192363739, "learning_rate": 9.50995336484667e-06, "loss": 0.3093, "step": 10412 }, { "epoch": 0.4778578312147217, "grad_norm": 0.4689266085624695, "learning_rate": 9.509847498192782e-06, "loss": 0.3996, "step": 10413 }, { "epoch": 0.47790372171997614, "grad_norm": 0.5067508816719055, "learning_rate": 9.509741620694135e-06, "loss": 0.4835, "step": 10414 }, { "epoch": 0.4779496122252306, "grad_norm": 0.5038485527038574, "learning_rate": 9.509635732350982e-06, "loss": 0.4798, "step": 10415 }, { "epoch": 0.4779955027304851, "grad_norm": 0.5294581055641174, "learning_rate": 9.509529833163576e-06, "loss": 0.5106, "step": 10416 }, { "epoch": 0.4780413932357395, "grad_norm": 0.46726858615875244, "learning_rate": 9.509423923132177e-06, "loss": 0.4051, "step": 10417 }, { "epoch": 0.47808728374099396, "grad_norm": 0.5157381296157837, "learning_rate": 9.509318002257036e-06, "loss": 0.485, "step": 10418 }, { "epoch": 0.47813317424624846, "grad_norm": 0.46835318207740784, "learning_rate": 9.509212070538407e-06, "loss": 0.3957, "step": 10419 }, { "epoch": 0.4781790647515029, "grad_norm": 0.47847121953964233, "learning_rate": 9.509106127976546e-06, "loss": 0.4101, "step": 10420 }, { "epoch": 0.4782249552567574, "grad_norm": 0.502334475517273, "learning_rate": 9.50900017457171e-06, "loss": 0.5169, "step": 10421 }, { "epoch": 0.47827084576201184, "grad_norm": 0.45988163352012634, "learning_rate": 9.50889421032415e-06, "loss": 0.4024, "step": 10422 }, { "epoch": 0.4783167362672663, "grad_norm": 0.496215283870697, "learning_rate": 9.508788235234123e-06, "loss": 0.4842, "step": 10423 }, { "epoch": 0.4783626267725208, "grad_norm": 0.43937432765960693, "learning_rate": 9.508682249301883e-06, "loss": 0.3463, "step": 10424 }, { "epoch": 0.4784085172777752, "grad_norm": 0.4643916189670563, "learning_rate": 9.508576252527686e-06, "loss": 0.4199, "step": 10425 }, { "epoch": 0.47845440778302967, "grad_norm": 0.49748629331588745, "learning_rate": 9.508470244911784e-06, "loss": 0.4291, "step": 10426 }, { "epoch": 0.47850029828828416, "grad_norm": 0.4719739556312561, "learning_rate": 9.508364226454436e-06, "loss": 0.3515, "step": 10427 }, { "epoch": 0.4785461887935386, "grad_norm": 0.46450793743133545, "learning_rate": 9.508258197155896e-06, "loss": 0.3917, "step": 10428 }, { "epoch": 0.4785920792987931, "grad_norm": 0.4392354190349579, "learning_rate": 9.508152157016415e-06, "loss": 0.3933, "step": 10429 }, { "epoch": 0.47863796980404755, "grad_norm": 0.47227659821510315, "learning_rate": 9.508046106036253e-06, "loss": 0.4181, "step": 10430 }, { "epoch": 0.478683860309302, "grad_norm": 0.4628347158432007, "learning_rate": 9.50794004421566e-06, "loss": 0.4008, "step": 10431 }, { "epoch": 0.4787297508145565, "grad_norm": 0.5100054144859314, "learning_rate": 9.507833971554898e-06, "loss": 0.4806, "step": 10432 }, { "epoch": 0.47877564131981093, "grad_norm": 0.4490429759025574, "learning_rate": 9.507727888054215e-06, "loss": 0.3962, "step": 10433 }, { "epoch": 0.47882153182506537, "grad_norm": 0.4828034043312073, "learning_rate": 9.507621793713871e-06, "loss": 0.462, "step": 10434 }, { "epoch": 0.47886742233031987, "grad_norm": 0.4738355576992035, "learning_rate": 9.507515688534119e-06, "loss": 0.4342, "step": 10435 }, { "epoch": 0.4789133128355743, "grad_norm": 0.4522918164730072, "learning_rate": 9.507409572515211e-06, "loss": 0.3792, "step": 10436 }, { "epoch": 0.4789592033408288, "grad_norm": 0.4803236126899719, "learning_rate": 9.50730344565741e-06, "loss": 0.4594, "step": 10437 }, { "epoch": 0.47900509384608325, "grad_norm": 0.433921217918396, "learning_rate": 9.507197307960966e-06, "loss": 0.311, "step": 10438 }, { "epoch": 0.4790509843513377, "grad_norm": 0.5136846899986267, "learning_rate": 9.507091159426134e-06, "loss": 0.4486, "step": 10439 }, { "epoch": 0.4790968748565922, "grad_norm": 0.4746669828891754, "learning_rate": 9.506985000053168e-06, "loss": 0.4328, "step": 10440 }, { "epoch": 0.47914276536184663, "grad_norm": 0.4642753601074219, "learning_rate": 9.506878829842329e-06, "loss": 0.3835, "step": 10441 }, { "epoch": 0.4791886558671011, "grad_norm": 0.45964664220809937, "learning_rate": 9.506772648793866e-06, "loss": 0.3879, "step": 10442 }, { "epoch": 0.4792345463723556, "grad_norm": 0.48215681314468384, "learning_rate": 9.50666645690804e-06, "loss": 0.4792, "step": 10443 }, { "epoch": 0.47928043687761, "grad_norm": 0.4566616714000702, "learning_rate": 9.506560254185103e-06, "loss": 0.3838, "step": 10444 }, { "epoch": 0.47932632738286446, "grad_norm": 0.4943268895149231, "learning_rate": 9.50645404062531e-06, "loss": 0.4459, "step": 10445 }, { "epoch": 0.47937221788811896, "grad_norm": 0.4451558291912079, "learning_rate": 9.506347816228918e-06, "loss": 0.3927, "step": 10446 }, { "epoch": 0.4794181083933734, "grad_norm": 0.4583683907985687, "learning_rate": 9.506241580996181e-06, "loss": 0.4457, "step": 10447 }, { "epoch": 0.4794639988986279, "grad_norm": 0.44078630208969116, "learning_rate": 9.506135334927355e-06, "loss": 0.3949, "step": 10448 }, { "epoch": 0.47950988940388234, "grad_norm": 0.41707995533943176, "learning_rate": 9.506029078022696e-06, "loss": 0.3386, "step": 10449 }, { "epoch": 0.4795557799091368, "grad_norm": 0.4449442923069, "learning_rate": 9.50592281028246e-06, "loss": 0.3752, "step": 10450 }, { "epoch": 0.4796016704143913, "grad_norm": 0.3976631164550781, "learning_rate": 9.505816531706901e-06, "loss": 0.27, "step": 10451 }, { "epoch": 0.4796475609196457, "grad_norm": 0.45820823311805725, "learning_rate": 9.505710242296275e-06, "loss": 0.378, "step": 10452 }, { "epoch": 0.47969345142490016, "grad_norm": 0.49073418974876404, "learning_rate": 9.505603942050839e-06, "loss": 0.4245, "step": 10453 }, { "epoch": 0.47973934193015466, "grad_norm": 0.4454165995121002, "learning_rate": 9.505497630970845e-06, "loss": 0.3778, "step": 10454 }, { "epoch": 0.4797852324354091, "grad_norm": 0.5252615809440613, "learning_rate": 9.505391309056555e-06, "loss": 0.5217, "step": 10455 }, { "epoch": 0.4798311229406636, "grad_norm": 0.49363264441490173, "learning_rate": 9.505284976308218e-06, "loss": 0.5159, "step": 10456 }, { "epoch": 0.47987701344591804, "grad_norm": 0.4703807830810547, "learning_rate": 9.505178632726093e-06, "loss": 0.4229, "step": 10457 }, { "epoch": 0.4799229039511725, "grad_norm": 0.4552096426486969, "learning_rate": 9.505072278310437e-06, "loss": 0.436, "step": 10458 }, { "epoch": 0.479968794456427, "grad_norm": 0.45829489827156067, "learning_rate": 9.5049659130615e-06, "loss": 0.3731, "step": 10459 }, { "epoch": 0.4800146849616814, "grad_norm": 0.445065438747406, "learning_rate": 9.504859536979546e-06, "loss": 0.3966, "step": 10460 }, { "epoch": 0.48006057546693587, "grad_norm": 0.5642270445823669, "learning_rate": 9.504753150064825e-06, "loss": 0.4826, "step": 10461 }, { "epoch": 0.48010646597219037, "grad_norm": 0.464288592338562, "learning_rate": 9.504646752317594e-06, "loss": 0.4127, "step": 10462 }, { "epoch": 0.4801523564774448, "grad_norm": 0.44272831082344055, "learning_rate": 9.504540343738111e-06, "loss": 0.4207, "step": 10463 }, { "epoch": 0.4801982469826993, "grad_norm": 0.43861475586891174, "learning_rate": 9.504433924326628e-06, "loss": 0.3548, "step": 10464 }, { "epoch": 0.48024413748795375, "grad_norm": 0.466901957988739, "learning_rate": 9.504327494083405e-06, "loss": 0.4029, "step": 10465 }, { "epoch": 0.4802900279932082, "grad_norm": 0.46454697847366333, "learning_rate": 9.504221053008694e-06, "loss": 0.3889, "step": 10466 }, { "epoch": 0.4803359184984627, "grad_norm": 0.43511927127838135, "learning_rate": 9.504114601102755e-06, "loss": 0.3384, "step": 10467 }, { "epoch": 0.48038180900371713, "grad_norm": 0.4771687388420105, "learning_rate": 9.50400813836584e-06, "loss": 0.4094, "step": 10468 }, { "epoch": 0.4804276995089716, "grad_norm": 0.4551457166671753, "learning_rate": 9.503901664798208e-06, "loss": 0.3711, "step": 10469 }, { "epoch": 0.48047359001422607, "grad_norm": 0.4445315897464752, "learning_rate": 9.503795180400113e-06, "loss": 0.3907, "step": 10470 }, { "epoch": 0.4805194805194805, "grad_norm": 0.48052436113357544, "learning_rate": 9.503688685171813e-06, "loss": 0.4886, "step": 10471 }, { "epoch": 0.48056537102473496, "grad_norm": 0.48993992805480957, "learning_rate": 9.503582179113562e-06, "loss": 0.38, "step": 10472 }, { "epoch": 0.48061126152998945, "grad_norm": 0.4691629707813263, "learning_rate": 9.503475662225618e-06, "loss": 0.3833, "step": 10473 }, { "epoch": 0.4806571520352439, "grad_norm": 0.4672612249851227, "learning_rate": 9.503369134508237e-06, "loss": 0.411, "step": 10474 }, { "epoch": 0.4807030425404984, "grad_norm": 0.4373226761817932, "learning_rate": 9.503262595961676e-06, "loss": 0.4001, "step": 10475 }, { "epoch": 0.48074893304575284, "grad_norm": 0.4269385039806366, "learning_rate": 9.503156046586186e-06, "loss": 0.3212, "step": 10476 }, { "epoch": 0.4807948235510073, "grad_norm": 0.4578581154346466, "learning_rate": 9.503049486382028e-06, "loss": 0.4274, "step": 10477 }, { "epoch": 0.4808407140562618, "grad_norm": 0.48944443464279175, "learning_rate": 9.50294291534946e-06, "loss": 0.3831, "step": 10478 }, { "epoch": 0.4808866045615162, "grad_norm": 0.46075639128685, "learning_rate": 9.502836333488732e-06, "loss": 0.4088, "step": 10479 }, { "epoch": 0.48093249506677066, "grad_norm": 0.4391716420650482, "learning_rate": 9.502729740800105e-06, "loss": 0.3774, "step": 10480 }, { "epoch": 0.48097838557202516, "grad_norm": 0.48299962282180786, "learning_rate": 9.502623137283835e-06, "loss": 0.4833, "step": 10481 }, { "epoch": 0.4810242760772796, "grad_norm": 0.4231841266155243, "learning_rate": 9.502516522940176e-06, "loss": 0.3107, "step": 10482 }, { "epoch": 0.4810701665825341, "grad_norm": 0.47765496373176575, "learning_rate": 9.502409897769388e-06, "loss": 0.4071, "step": 10483 }, { "epoch": 0.48111605708778854, "grad_norm": 0.4894293248653412, "learning_rate": 9.502303261771721e-06, "loss": 0.3773, "step": 10484 }, { "epoch": 0.481161947593043, "grad_norm": 0.44916993379592896, "learning_rate": 9.502196614947439e-06, "loss": 0.3895, "step": 10485 }, { "epoch": 0.4812078380982975, "grad_norm": 0.5385277271270752, "learning_rate": 9.502089957296794e-06, "loss": 0.5212, "step": 10486 }, { "epoch": 0.4812537286035519, "grad_norm": 0.4333556592464447, "learning_rate": 9.501983288820044e-06, "loss": 0.3637, "step": 10487 }, { "epoch": 0.48129961910880636, "grad_norm": 0.43650519847869873, "learning_rate": 9.501876609517444e-06, "loss": 0.3821, "step": 10488 }, { "epoch": 0.48134550961406086, "grad_norm": 0.4622440040111542, "learning_rate": 9.501769919389252e-06, "loss": 0.4373, "step": 10489 }, { "epoch": 0.4813914001193153, "grad_norm": 0.4766353666782379, "learning_rate": 9.501663218435726e-06, "loss": 0.4478, "step": 10490 }, { "epoch": 0.4814372906245698, "grad_norm": 0.4908713400363922, "learning_rate": 9.50155650665712e-06, "loss": 0.5197, "step": 10491 }, { "epoch": 0.48148318112982424, "grad_norm": 0.4828174412250519, "learning_rate": 9.50144978405369e-06, "loss": 0.4673, "step": 10492 }, { "epoch": 0.4815290716350787, "grad_norm": 0.4744407832622528, "learning_rate": 9.501343050625695e-06, "loss": 0.4248, "step": 10493 }, { "epoch": 0.4815749621403332, "grad_norm": 0.4782252013683319, "learning_rate": 9.501236306373389e-06, "loss": 0.4626, "step": 10494 }, { "epoch": 0.4816208526455876, "grad_norm": 0.4637541174888611, "learning_rate": 9.501129551297032e-06, "loss": 0.4179, "step": 10495 }, { "epoch": 0.48166674315084207, "grad_norm": 0.4931493401527405, "learning_rate": 9.501022785396878e-06, "loss": 0.4008, "step": 10496 }, { "epoch": 0.48171263365609657, "grad_norm": 0.48418864607810974, "learning_rate": 9.500916008673187e-06, "loss": 0.4416, "step": 10497 }, { "epoch": 0.481758524161351, "grad_norm": 0.47030889987945557, "learning_rate": 9.50080922112621e-06, "loss": 0.4116, "step": 10498 }, { "epoch": 0.48180441466660545, "grad_norm": 0.45532140135765076, "learning_rate": 9.50070242275621e-06, "loss": 0.3612, "step": 10499 }, { "epoch": 0.48185030517185995, "grad_norm": 0.4816846549510956, "learning_rate": 9.500595613563441e-06, "loss": 0.4048, "step": 10500 }, { "epoch": 0.4818961956771144, "grad_norm": 0.44284936785697937, "learning_rate": 9.50048879354816e-06, "loss": 0.3405, "step": 10501 }, { "epoch": 0.4819420861823689, "grad_norm": 0.5109581351280212, "learning_rate": 9.500381962710624e-06, "loss": 0.4817, "step": 10502 }, { "epoch": 0.48198797668762333, "grad_norm": 0.44986802339553833, "learning_rate": 9.500275121051087e-06, "loss": 0.3545, "step": 10503 }, { "epoch": 0.4820338671928778, "grad_norm": 0.5242726802825928, "learning_rate": 9.500168268569812e-06, "loss": 0.4571, "step": 10504 }, { "epoch": 0.48207975769813227, "grad_norm": 0.4705042243003845, "learning_rate": 9.50006140526705e-06, "loss": 0.3932, "step": 10505 }, { "epoch": 0.4821256482033867, "grad_norm": 0.4624444544315338, "learning_rate": 9.499954531143061e-06, "loss": 0.3465, "step": 10506 }, { "epoch": 0.48217153870864116, "grad_norm": 0.4773663580417633, "learning_rate": 9.499847646198104e-06, "loss": 0.4933, "step": 10507 }, { "epoch": 0.48221742921389565, "grad_norm": 0.49081405997276306, "learning_rate": 9.499740750432432e-06, "loss": 0.3814, "step": 10508 }, { "epoch": 0.4822633197191501, "grad_norm": 0.4599322974681854, "learning_rate": 9.499633843846303e-06, "loss": 0.3385, "step": 10509 }, { "epoch": 0.4823092102244046, "grad_norm": 0.4541787803173065, "learning_rate": 9.499526926439974e-06, "loss": 0.3435, "step": 10510 }, { "epoch": 0.48235510072965904, "grad_norm": 0.4409223198890686, "learning_rate": 9.499419998213706e-06, "loss": 0.3293, "step": 10511 }, { "epoch": 0.4824009912349135, "grad_norm": 0.46895596385002136, "learning_rate": 9.49931305916775e-06, "loss": 0.452, "step": 10512 }, { "epoch": 0.482446881740168, "grad_norm": 0.47766542434692383, "learning_rate": 9.499206109302368e-06, "loss": 0.3616, "step": 10513 }, { "epoch": 0.4824927722454224, "grad_norm": 0.48215818405151367, "learning_rate": 9.499099148617815e-06, "loss": 0.4581, "step": 10514 }, { "epoch": 0.48253866275067686, "grad_norm": 0.4555628001689911, "learning_rate": 9.498992177114347e-06, "loss": 0.4677, "step": 10515 }, { "epoch": 0.48258455325593136, "grad_norm": 0.4614100158214569, "learning_rate": 9.498885194792225e-06, "loss": 0.421, "step": 10516 }, { "epoch": 0.4826304437611858, "grad_norm": 0.49830806255340576, "learning_rate": 9.498778201651702e-06, "loss": 0.3402, "step": 10517 }, { "epoch": 0.4826763342664403, "grad_norm": 0.48189035058021545, "learning_rate": 9.498671197693038e-06, "loss": 0.3849, "step": 10518 }, { "epoch": 0.48272222477169474, "grad_norm": 0.45526474714279175, "learning_rate": 9.49856418291649e-06, "loss": 0.3397, "step": 10519 }, { "epoch": 0.4827681152769492, "grad_norm": 0.45288729667663574, "learning_rate": 9.498457157322314e-06, "loss": 0.4046, "step": 10520 }, { "epoch": 0.4828140057822037, "grad_norm": 0.46893730759620667, "learning_rate": 9.498350120910769e-06, "loss": 0.3723, "step": 10521 }, { "epoch": 0.4828598962874581, "grad_norm": 0.48233258724212646, "learning_rate": 9.498243073682111e-06, "loss": 0.4054, "step": 10522 }, { "epoch": 0.48290578679271257, "grad_norm": 0.46372032165527344, "learning_rate": 9.498136015636599e-06, "loss": 0.3859, "step": 10523 }, { "epoch": 0.48295167729796706, "grad_norm": 0.4478050470352173, "learning_rate": 9.49802894677449e-06, "loss": 0.3784, "step": 10524 }, { "epoch": 0.4829975678032215, "grad_norm": 0.5011600255966187, "learning_rate": 9.497921867096039e-06, "loss": 0.4296, "step": 10525 }, { "epoch": 0.483043458308476, "grad_norm": 0.4738364517688751, "learning_rate": 9.497814776601506e-06, "loss": 0.4038, "step": 10526 }, { "epoch": 0.48308934881373045, "grad_norm": 0.4783861041069031, "learning_rate": 9.49770767529115e-06, "loss": 0.4039, "step": 10527 }, { "epoch": 0.4831352393189849, "grad_norm": 0.512788712978363, "learning_rate": 9.497600563165224e-06, "loss": 0.4844, "step": 10528 }, { "epoch": 0.4831811298242394, "grad_norm": 0.4720323979854584, "learning_rate": 9.497493440223991e-06, "loss": 0.3864, "step": 10529 }, { "epoch": 0.48322702032949383, "grad_norm": 0.4891703426837921, "learning_rate": 9.497386306467703e-06, "loss": 0.4253, "step": 10530 }, { "epoch": 0.48327291083474827, "grad_norm": 0.46546056866645813, "learning_rate": 9.497279161896621e-06, "loss": 0.4531, "step": 10531 }, { "epoch": 0.48331880134000277, "grad_norm": 0.47407132387161255, "learning_rate": 9.497172006511003e-06, "loss": 0.4052, "step": 10532 }, { "epoch": 0.4833646918452572, "grad_norm": 0.47243431210517883, "learning_rate": 9.497064840311105e-06, "loss": 0.3758, "step": 10533 }, { "epoch": 0.48341058235051165, "grad_norm": 0.495991587638855, "learning_rate": 9.496957663297186e-06, "loss": 0.4253, "step": 10534 }, { "epoch": 0.48345647285576615, "grad_norm": 0.48500409722328186, "learning_rate": 9.496850475469502e-06, "loss": 0.4046, "step": 10535 }, { "epoch": 0.4835023633610206, "grad_norm": 0.42323896288871765, "learning_rate": 9.496743276828313e-06, "loss": 0.3403, "step": 10536 }, { "epoch": 0.4835482538662751, "grad_norm": 0.4268997609615326, "learning_rate": 9.496636067373875e-06, "loss": 0.357, "step": 10537 }, { "epoch": 0.48359414437152953, "grad_norm": 0.46194788813591003, "learning_rate": 9.496528847106447e-06, "loss": 0.4241, "step": 10538 }, { "epoch": 0.483640034876784, "grad_norm": 0.41879960894584656, "learning_rate": 9.496421616026285e-06, "loss": 0.3563, "step": 10539 }, { "epoch": 0.4836859253820385, "grad_norm": 0.47671058773994446, "learning_rate": 9.496314374133649e-06, "loss": 0.3823, "step": 10540 }, { "epoch": 0.4837318158872929, "grad_norm": 0.5220320820808411, "learning_rate": 9.496207121428795e-06, "loss": 0.5725, "step": 10541 }, { "epoch": 0.48377770639254736, "grad_norm": 0.4554789364337921, "learning_rate": 9.496099857911984e-06, "loss": 0.4219, "step": 10542 }, { "epoch": 0.48382359689780186, "grad_norm": 0.42972588539123535, "learning_rate": 9.495992583583471e-06, "loss": 0.3613, "step": 10543 }, { "epoch": 0.4838694874030563, "grad_norm": 0.4622933566570282, "learning_rate": 9.495885298443515e-06, "loss": 0.3811, "step": 10544 }, { "epoch": 0.4839153779083108, "grad_norm": 0.5069906711578369, "learning_rate": 9.495778002492374e-06, "loss": 0.4743, "step": 10545 }, { "epoch": 0.48396126841356524, "grad_norm": 0.5044757127761841, "learning_rate": 9.495670695730304e-06, "loss": 0.5002, "step": 10546 }, { "epoch": 0.4840071589188197, "grad_norm": 0.5277027487754822, "learning_rate": 9.495563378157564e-06, "loss": 0.5758, "step": 10547 }, { "epoch": 0.4840530494240742, "grad_norm": 0.4643923044204712, "learning_rate": 9.495456049774416e-06, "loss": 0.4404, "step": 10548 }, { "epoch": 0.4840989399293286, "grad_norm": 0.49092474579811096, "learning_rate": 9.495348710581113e-06, "loss": 0.4437, "step": 10549 }, { "epoch": 0.48414483043458306, "grad_norm": 0.48186805844306946, "learning_rate": 9.495241360577916e-06, "loss": 0.3862, "step": 10550 }, { "epoch": 0.48419072093983756, "grad_norm": 0.49741244316101074, "learning_rate": 9.495133999765083e-06, "loss": 0.4555, "step": 10551 }, { "epoch": 0.484236611445092, "grad_norm": 0.447372704744339, "learning_rate": 9.495026628142872e-06, "loss": 0.3559, "step": 10552 }, { "epoch": 0.4842825019503465, "grad_norm": 0.4247765839099884, "learning_rate": 9.494919245711539e-06, "loss": 0.3681, "step": 10553 }, { "epoch": 0.48432839245560094, "grad_norm": 0.45308107137680054, "learning_rate": 9.494811852471344e-06, "loss": 0.3715, "step": 10554 }, { "epoch": 0.4843742829608554, "grad_norm": 0.5200189352035522, "learning_rate": 9.494704448422544e-06, "loss": 0.4981, "step": 10555 }, { "epoch": 0.4844201734661099, "grad_norm": 0.45749276876449585, "learning_rate": 9.4945970335654e-06, "loss": 0.3655, "step": 10556 }, { "epoch": 0.4844660639713643, "grad_norm": 0.4648534059524536, "learning_rate": 9.494489607900168e-06, "loss": 0.3552, "step": 10557 }, { "epoch": 0.48451195447661877, "grad_norm": 0.4545661509037018, "learning_rate": 9.494382171427108e-06, "loss": 0.3629, "step": 10558 }, { "epoch": 0.48455784498187326, "grad_norm": 0.555922269821167, "learning_rate": 9.494274724146477e-06, "loss": 0.5095, "step": 10559 }, { "epoch": 0.4846037354871277, "grad_norm": 0.4687761068344116, "learning_rate": 9.494167266058534e-06, "loss": 0.3892, "step": 10560 }, { "epoch": 0.48464962599238215, "grad_norm": 0.45815131068229675, "learning_rate": 9.494059797163536e-06, "loss": 0.3839, "step": 10561 }, { "epoch": 0.48469551649763665, "grad_norm": 0.4146079421043396, "learning_rate": 9.493952317461743e-06, "loss": 0.3361, "step": 10562 }, { "epoch": 0.4847414070028911, "grad_norm": 0.4430672228336334, "learning_rate": 9.493844826953414e-06, "loss": 0.3602, "step": 10563 }, { "epoch": 0.4847872975081456, "grad_norm": 0.4650081396102905, "learning_rate": 9.493737325638804e-06, "loss": 0.3953, "step": 10564 }, { "epoch": 0.48483318801340003, "grad_norm": 0.4787168502807617, "learning_rate": 9.493629813518175e-06, "loss": 0.4182, "step": 10565 }, { "epoch": 0.48487907851865447, "grad_norm": 0.5862655639648438, "learning_rate": 9.493522290591786e-06, "loss": 0.4181, "step": 10566 }, { "epoch": 0.48492496902390897, "grad_norm": 0.4384766221046448, "learning_rate": 9.493414756859893e-06, "loss": 0.3364, "step": 10567 }, { "epoch": 0.4849708595291634, "grad_norm": 0.5203227400779724, "learning_rate": 9.493307212322756e-06, "loss": 0.5114, "step": 10568 }, { "epoch": 0.48501675003441785, "grad_norm": 0.44314152002334595, "learning_rate": 9.493199656980634e-06, "loss": 0.3823, "step": 10569 }, { "epoch": 0.48506264053967235, "grad_norm": 0.45190590620040894, "learning_rate": 9.493092090833782e-06, "loss": 0.3966, "step": 10570 }, { "epoch": 0.4851085310449268, "grad_norm": 0.49951040744781494, "learning_rate": 9.492984513882465e-06, "loss": 0.5049, "step": 10571 }, { "epoch": 0.4851544215501813, "grad_norm": 0.4859614670276642, "learning_rate": 9.492876926126937e-06, "loss": 0.4033, "step": 10572 }, { "epoch": 0.48520031205543573, "grad_norm": 0.4782193601131439, "learning_rate": 9.492769327567456e-06, "loss": 0.4687, "step": 10573 }, { "epoch": 0.4852462025606902, "grad_norm": 0.46321114897727966, "learning_rate": 9.492661718204286e-06, "loss": 0.413, "step": 10574 }, { "epoch": 0.4852920930659447, "grad_norm": 0.4493845999240875, "learning_rate": 9.49255409803768e-06, "loss": 0.3359, "step": 10575 }, { "epoch": 0.4853379835711991, "grad_norm": 0.47707003355026245, "learning_rate": 9.492446467067902e-06, "loss": 0.4981, "step": 10576 }, { "epoch": 0.48538387407645356, "grad_norm": 0.443598210811615, "learning_rate": 9.492338825295207e-06, "loss": 0.4085, "step": 10577 }, { "epoch": 0.48542976458170806, "grad_norm": 0.45336687564849854, "learning_rate": 9.492231172719854e-06, "loss": 0.3886, "step": 10578 }, { "epoch": 0.4854756550869625, "grad_norm": 0.45578062534332275, "learning_rate": 9.492123509342104e-06, "loss": 0.414, "step": 10579 }, { "epoch": 0.485521545592217, "grad_norm": 0.4789424240589142, "learning_rate": 9.492015835162213e-06, "loss": 0.452, "step": 10580 }, { "epoch": 0.48556743609747144, "grad_norm": 0.46922335028648376, "learning_rate": 9.491908150180442e-06, "loss": 0.409, "step": 10581 }, { "epoch": 0.4856133266027259, "grad_norm": 0.46665775775909424, "learning_rate": 9.491800454397051e-06, "loss": 0.4309, "step": 10582 }, { "epoch": 0.4856592171079804, "grad_norm": 0.4767134487628937, "learning_rate": 9.491692747812296e-06, "loss": 0.4193, "step": 10583 }, { "epoch": 0.4857051076132348, "grad_norm": 0.4366234540939331, "learning_rate": 9.49158503042644e-06, "loss": 0.3475, "step": 10584 }, { "epoch": 0.48575099811848926, "grad_norm": 0.42120498418807983, "learning_rate": 9.491477302239737e-06, "loss": 0.3228, "step": 10585 }, { "epoch": 0.48579688862374376, "grad_norm": 0.4392586350440979, "learning_rate": 9.49136956325245e-06, "loss": 0.3343, "step": 10586 }, { "epoch": 0.4858427791289982, "grad_norm": 0.47664669156074524, "learning_rate": 9.491261813464837e-06, "loss": 0.4366, "step": 10587 }, { "epoch": 0.48588866963425265, "grad_norm": 0.4301667809486389, "learning_rate": 9.491154052877154e-06, "loss": 0.3805, "step": 10588 }, { "epoch": 0.48593456013950714, "grad_norm": 0.4611995816230774, "learning_rate": 9.491046281489667e-06, "loss": 0.3998, "step": 10589 }, { "epoch": 0.4859804506447616, "grad_norm": 0.4516323506832123, "learning_rate": 9.490938499302627e-06, "loss": 0.4378, "step": 10590 }, { "epoch": 0.4860263411500161, "grad_norm": 0.47884055972099304, "learning_rate": 9.4908307063163e-06, "loss": 0.4323, "step": 10591 }, { "epoch": 0.4860722316552705, "grad_norm": 0.5051462054252625, "learning_rate": 9.490722902530943e-06, "loss": 0.4771, "step": 10592 }, { "epoch": 0.48611812216052497, "grad_norm": 0.47771400213241577, "learning_rate": 9.490615087946812e-06, "loss": 0.4078, "step": 10593 }, { "epoch": 0.48616401266577947, "grad_norm": 0.47820213437080383, "learning_rate": 9.490507262564172e-06, "loss": 0.4374, "step": 10594 }, { "epoch": 0.4862099031710339, "grad_norm": 0.45286113023757935, "learning_rate": 9.490399426383277e-06, "loss": 0.4117, "step": 10595 }, { "epoch": 0.48625579367628835, "grad_norm": 0.5176018476486206, "learning_rate": 9.49029157940439e-06, "loss": 0.4869, "step": 10596 }, { "epoch": 0.48630168418154285, "grad_norm": 0.4529692828655243, "learning_rate": 9.490183721627767e-06, "loss": 0.3432, "step": 10597 }, { "epoch": 0.4863475746867973, "grad_norm": 0.49915948510169983, "learning_rate": 9.490075853053673e-06, "loss": 0.4814, "step": 10598 }, { "epoch": 0.4863934651920518, "grad_norm": 0.48007404804229736, "learning_rate": 9.48996797368236e-06, "loss": 0.4559, "step": 10599 }, { "epoch": 0.48643935569730623, "grad_norm": 0.46602025628089905, "learning_rate": 9.489860083514094e-06, "loss": 0.3751, "step": 10600 }, { "epoch": 0.4864852462025607, "grad_norm": 0.46526169776916504, "learning_rate": 9.48975218254913e-06, "loss": 0.404, "step": 10601 }, { "epoch": 0.48653113670781517, "grad_norm": 0.4285773038864136, "learning_rate": 9.489644270787728e-06, "loss": 0.3255, "step": 10602 }, { "epoch": 0.4865770272130696, "grad_norm": 0.4511367976665497, "learning_rate": 9.48953634823015e-06, "loss": 0.4547, "step": 10603 }, { "epoch": 0.48662291771832406, "grad_norm": 0.4331052899360657, "learning_rate": 9.489428414876654e-06, "loss": 0.3531, "step": 10604 }, { "epoch": 0.48666880822357855, "grad_norm": 0.4630088210105896, "learning_rate": 9.489320470727499e-06, "loss": 0.4049, "step": 10605 }, { "epoch": 0.486714698728833, "grad_norm": 0.45251232385635376, "learning_rate": 9.489212515782946e-06, "loss": 0.401, "step": 10606 }, { "epoch": 0.4867605892340875, "grad_norm": 0.4439692497253418, "learning_rate": 9.489104550043251e-06, "loss": 0.3855, "step": 10607 }, { "epoch": 0.48680647973934194, "grad_norm": 0.43586409091949463, "learning_rate": 9.488996573508679e-06, "loss": 0.3428, "step": 10608 }, { "epoch": 0.4868523702445964, "grad_norm": 0.5121430158615112, "learning_rate": 9.488888586179487e-06, "loss": 0.4365, "step": 10609 }, { "epoch": 0.4868982607498509, "grad_norm": 0.4647507965564728, "learning_rate": 9.488780588055936e-06, "loss": 0.4203, "step": 10610 }, { "epoch": 0.4869441512551053, "grad_norm": 0.4112704396247864, "learning_rate": 9.48867257913828e-06, "loss": 0.2968, "step": 10611 }, { "epoch": 0.48699004176035976, "grad_norm": 0.46082592010498047, "learning_rate": 9.488564559426787e-06, "loss": 0.4453, "step": 10612 }, { "epoch": 0.48703593226561426, "grad_norm": 0.48928314447402954, "learning_rate": 9.48845652892171e-06, "loss": 0.4928, "step": 10613 }, { "epoch": 0.4870818227708687, "grad_norm": 0.45143210887908936, "learning_rate": 9.488348487623313e-06, "loss": 0.4082, "step": 10614 }, { "epoch": 0.4871277132761232, "grad_norm": 0.46791449189186096, "learning_rate": 9.488240435531854e-06, "loss": 0.4305, "step": 10615 }, { "epoch": 0.48717360378137764, "grad_norm": 0.45897534489631653, "learning_rate": 9.488132372647596e-06, "loss": 0.4325, "step": 10616 }, { "epoch": 0.4872194942866321, "grad_norm": 0.48604848980903625, "learning_rate": 9.488024298970792e-06, "loss": 0.4039, "step": 10617 }, { "epoch": 0.4872653847918866, "grad_norm": 0.521153450012207, "learning_rate": 9.487916214501709e-06, "loss": 0.4017, "step": 10618 }, { "epoch": 0.487311275297141, "grad_norm": 0.4453768730163574, "learning_rate": 9.487808119240602e-06, "loss": 0.4487, "step": 10619 }, { "epoch": 0.48735716580239546, "grad_norm": 0.4747428894042969, "learning_rate": 9.487700013187734e-06, "loss": 0.4266, "step": 10620 }, { "epoch": 0.48740305630764996, "grad_norm": 0.44069811701774597, "learning_rate": 9.487591896343364e-06, "loss": 0.3339, "step": 10621 }, { "epoch": 0.4874489468129044, "grad_norm": 0.5257259607315063, "learning_rate": 9.487483768707753e-06, "loss": 0.5003, "step": 10622 }, { "epoch": 0.48749483731815885, "grad_norm": 0.4492780864238739, "learning_rate": 9.487375630281156e-06, "loss": 0.3613, "step": 10623 }, { "epoch": 0.48754072782341334, "grad_norm": 0.48792123794555664, "learning_rate": 9.487267481063839e-06, "loss": 0.4324, "step": 10624 }, { "epoch": 0.4875866183286678, "grad_norm": 0.47915229201316833, "learning_rate": 9.48715932105606e-06, "loss": 0.4347, "step": 10625 }, { "epoch": 0.4876325088339223, "grad_norm": 0.452625572681427, "learning_rate": 9.48705115025808e-06, "loss": 0.41, "step": 10626 }, { "epoch": 0.4876783993391767, "grad_norm": 0.5111326575279236, "learning_rate": 9.486942968670157e-06, "loss": 0.4949, "step": 10627 }, { "epoch": 0.48772428984443117, "grad_norm": 0.5307044982910156, "learning_rate": 9.486834776292552e-06, "loss": 0.5728, "step": 10628 }, { "epoch": 0.48777018034968567, "grad_norm": 0.4536900818347931, "learning_rate": 9.486726573125526e-06, "loss": 0.3901, "step": 10629 }, { "epoch": 0.4878160708549401, "grad_norm": 0.4589734375476837, "learning_rate": 9.48661835916934e-06, "loss": 0.3918, "step": 10630 }, { "epoch": 0.48786196136019455, "grad_norm": 0.4557384252548218, "learning_rate": 9.486510134424251e-06, "loss": 0.4146, "step": 10631 }, { "epoch": 0.48790785186544905, "grad_norm": 0.44064903259277344, "learning_rate": 9.48640189889052e-06, "loss": 0.3445, "step": 10632 }, { "epoch": 0.4879537423707035, "grad_norm": 0.440535306930542, "learning_rate": 9.486293652568413e-06, "loss": 0.3971, "step": 10633 }, { "epoch": 0.487999632875958, "grad_norm": 0.43332499265670776, "learning_rate": 9.48618539545818e-06, "loss": 0.4, "step": 10634 }, { "epoch": 0.48804552338121243, "grad_norm": 0.4713751971721649, "learning_rate": 9.48607712756009e-06, "loss": 0.4134, "step": 10635 }, { "epoch": 0.4880914138864669, "grad_norm": 0.43473419547080994, "learning_rate": 9.485968848874401e-06, "loss": 0.3481, "step": 10636 }, { "epoch": 0.48813730439172137, "grad_norm": 0.4714726507663727, "learning_rate": 9.485860559401372e-06, "loss": 0.4216, "step": 10637 }, { "epoch": 0.4881831948969758, "grad_norm": 0.44355708360671997, "learning_rate": 9.485752259141264e-06, "loss": 0.3728, "step": 10638 }, { "epoch": 0.48822908540223026, "grad_norm": 0.4756859242916107, "learning_rate": 9.485643948094336e-06, "loss": 0.4963, "step": 10639 }, { "epoch": 0.48827497590748475, "grad_norm": 0.46483999490737915, "learning_rate": 9.485535626260852e-06, "loss": 0.3987, "step": 10640 }, { "epoch": 0.4883208664127392, "grad_norm": 0.48093196749687195, "learning_rate": 9.485427293641069e-06, "loss": 0.4531, "step": 10641 }, { "epoch": 0.4883667569179937, "grad_norm": 0.48590660095214844, "learning_rate": 9.485318950235252e-06, "loss": 0.476, "step": 10642 }, { "epoch": 0.48841264742324814, "grad_norm": 0.487311452627182, "learning_rate": 9.485210596043655e-06, "loss": 0.4561, "step": 10643 }, { "epoch": 0.4884585379285026, "grad_norm": 0.47221386432647705, "learning_rate": 9.485102231066545e-06, "loss": 0.4587, "step": 10644 }, { "epoch": 0.4885044284337571, "grad_norm": 0.43011292815208435, "learning_rate": 9.484993855304177e-06, "loss": 0.3676, "step": 10645 }, { "epoch": 0.4885503189390115, "grad_norm": 0.5184840559959412, "learning_rate": 9.484885468756815e-06, "loss": 0.5442, "step": 10646 }, { "epoch": 0.48859620944426596, "grad_norm": 0.4347049593925476, "learning_rate": 9.48477707142472e-06, "loss": 0.3366, "step": 10647 }, { "epoch": 0.48864209994952046, "grad_norm": 0.4783165454864502, "learning_rate": 9.48466866330815e-06, "loss": 0.413, "step": 10648 }, { "epoch": 0.4886879904547749, "grad_norm": 0.4483320713043213, "learning_rate": 9.484560244407369e-06, "loss": 0.3751, "step": 10649 }, { "epoch": 0.48873388096002934, "grad_norm": 0.4035055339336395, "learning_rate": 9.484451814722635e-06, "loss": 0.293, "step": 10650 }, { "epoch": 0.48877977146528384, "grad_norm": 0.46935850381851196, "learning_rate": 9.48434337425421e-06, "loss": 0.4051, "step": 10651 }, { "epoch": 0.4888256619705383, "grad_norm": 0.49506109952926636, "learning_rate": 9.484234923002356e-06, "loss": 0.4865, "step": 10652 }, { "epoch": 0.4888715524757928, "grad_norm": 0.5216750502586365, "learning_rate": 9.48412646096733e-06, "loss": 0.6004, "step": 10653 }, { "epoch": 0.4889174429810472, "grad_norm": 0.4630841612815857, "learning_rate": 9.484017988149396e-06, "loss": 0.3929, "step": 10654 }, { "epoch": 0.48896333348630167, "grad_norm": 0.45899176597595215, "learning_rate": 9.483909504548814e-06, "loss": 0.4132, "step": 10655 }, { "epoch": 0.48900922399155616, "grad_norm": 0.4924745261669159, "learning_rate": 9.483801010165844e-06, "loss": 0.4558, "step": 10656 }, { "epoch": 0.4890551144968106, "grad_norm": 0.42792290449142456, "learning_rate": 9.483692505000749e-06, "loss": 0.3295, "step": 10657 }, { "epoch": 0.48910100500206505, "grad_norm": 0.49291712045669556, "learning_rate": 9.483583989053788e-06, "loss": 0.5214, "step": 10658 }, { "epoch": 0.48914689550731955, "grad_norm": 0.4470479488372803, "learning_rate": 9.483475462325221e-06, "loss": 0.3972, "step": 10659 }, { "epoch": 0.489192786012574, "grad_norm": 0.43819937109947205, "learning_rate": 9.483366924815312e-06, "loss": 0.3487, "step": 10660 }, { "epoch": 0.4892386765178285, "grad_norm": 0.4256936013698578, "learning_rate": 9.48325837652432e-06, "loss": 0.3338, "step": 10661 }, { "epoch": 0.48928456702308293, "grad_norm": 0.8580310940742493, "learning_rate": 9.483149817452508e-06, "loss": 0.4282, "step": 10662 }, { "epoch": 0.48933045752833737, "grad_norm": 0.505344569683075, "learning_rate": 9.483041247600135e-06, "loss": 0.5029, "step": 10663 }, { "epoch": 0.48937634803359187, "grad_norm": 0.44550150632858276, "learning_rate": 9.48293266696746e-06, "loss": 0.3384, "step": 10664 }, { "epoch": 0.4894222385388463, "grad_norm": 0.41006457805633545, "learning_rate": 9.48282407555475e-06, "loss": 0.314, "step": 10665 }, { "epoch": 0.48946812904410075, "grad_norm": 0.5112209320068359, "learning_rate": 9.482715473362263e-06, "loss": 0.4939, "step": 10666 }, { "epoch": 0.48951401954935525, "grad_norm": 0.44809049367904663, "learning_rate": 9.48260686039026e-06, "loss": 0.3873, "step": 10667 }, { "epoch": 0.4895599100546097, "grad_norm": 0.43120285868644714, "learning_rate": 9.482498236639002e-06, "loss": 0.362, "step": 10668 }, { "epoch": 0.4896058005598642, "grad_norm": 0.45697617530822754, "learning_rate": 9.48238960210875e-06, "loss": 0.3977, "step": 10669 }, { "epoch": 0.48965169106511863, "grad_norm": 0.4931700527667999, "learning_rate": 9.482280956799765e-06, "loss": 0.4953, "step": 10670 }, { "epoch": 0.4896975815703731, "grad_norm": 0.471383273601532, "learning_rate": 9.48217230071231e-06, "loss": 0.4039, "step": 10671 }, { "epoch": 0.4897434720756276, "grad_norm": 0.44583895802497864, "learning_rate": 9.482063633846642e-06, "loss": 0.4038, "step": 10672 }, { "epoch": 0.489789362580882, "grad_norm": 0.45217204093933105, "learning_rate": 9.48195495620303e-06, "loss": 0.3917, "step": 10673 }, { "epoch": 0.48983525308613646, "grad_norm": 0.5157490968704224, "learning_rate": 9.48184626778173e-06, "loss": 0.4222, "step": 10674 }, { "epoch": 0.48988114359139096, "grad_norm": 0.5463132262229919, "learning_rate": 9.481737568583003e-06, "loss": 0.4731, "step": 10675 }, { "epoch": 0.4899270340966454, "grad_norm": 0.48229992389678955, "learning_rate": 9.481628858607111e-06, "loss": 0.3891, "step": 10676 }, { "epoch": 0.48997292460189984, "grad_norm": 0.5011181831359863, "learning_rate": 9.481520137854318e-06, "loss": 0.4392, "step": 10677 }, { "epoch": 0.49001881510715434, "grad_norm": 0.48628997802734375, "learning_rate": 9.481411406324883e-06, "loss": 0.4644, "step": 10678 }, { "epoch": 0.4900647056124088, "grad_norm": 0.4589502811431885, "learning_rate": 9.481302664019067e-06, "loss": 0.4061, "step": 10679 }, { "epoch": 0.4901105961176633, "grad_norm": 0.4357941448688507, "learning_rate": 9.481193910937132e-06, "loss": 0.329, "step": 10680 }, { "epoch": 0.4901564866229177, "grad_norm": 0.4721980094909668, "learning_rate": 9.48108514707934e-06, "loss": 0.4175, "step": 10681 }, { "epoch": 0.49020237712817216, "grad_norm": 0.47352492809295654, "learning_rate": 9.480976372445954e-06, "loss": 0.41, "step": 10682 }, { "epoch": 0.49024826763342666, "grad_norm": 0.45808619260787964, "learning_rate": 9.480867587037235e-06, "loss": 0.41, "step": 10683 }, { "epoch": 0.4902941581386811, "grad_norm": 0.47846418619155884, "learning_rate": 9.480758790853442e-06, "loss": 0.4784, "step": 10684 }, { "epoch": 0.49034004864393554, "grad_norm": 0.4267057478427887, "learning_rate": 9.480649983894838e-06, "loss": 0.3286, "step": 10685 }, { "epoch": 0.49038593914919004, "grad_norm": 0.4665498733520508, "learning_rate": 9.480541166161685e-06, "loss": 0.476, "step": 10686 }, { "epoch": 0.4904318296544445, "grad_norm": 0.45767974853515625, "learning_rate": 9.480432337654244e-06, "loss": 0.3493, "step": 10687 }, { "epoch": 0.490477720159699, "grad_norm": 0.5075933933258057, "learning_rate": 9.48032349837278e-06, "loss": 0.5108, "step": 10688 }, { "epoch": 0.4905236106649534, "grad_norm": 0.4356735050678253, "learning_rate": 9.48021464831755e-06, "loss": 0.3335, "step": 10689 }, { "epoch": 0.49056950117020787, "grad_norm": 0.4836972653865814, "learning_rate": 9.480105787488818e-06, "loss": 0.4355, "step": 10690 }, { "epoch": 0.49061539167546236, "grad_norm": 0.46491938829421997, "learning_rate": 9.479996915886844e-06, "loss": 0.4266, "step": 10691 }, { "epoch": 0.4906612821807168, "grad_norm": 0.43625667691230774, "learning_rate": 9.479888033511892e-06, "loss": 0.3522, "step": 10692 }, { "epoch": 0.49070717268597125, "grad_norm": 0.46028441190719604, "learning_rate": 9.479779140364225e-06, "loss": 0.4166, "step": 10693 }, { "epoch": 0.49075306319122575, "grad_norm": 0.4954717457294464, "learning_rate": 9.479670236444104e-06, "loss": 0.5178, "step": 10694 }, { "epoch": 0.4907989536964802, "grad_norm": 0.45140647888183594, "learning_rate": 9.479561321751787e-06, "loss": 0.3438, "step": 10695 }, { "epoch": 0.4908448442017347, "grad_norm": 0.4738351106643677, "learning_rate": 9.479452396287539e-06, "loss": 0.4298, "step": 10696 }, { "epoch": 0.49089073470698913, "grad_norm": 0.45133888721466064, "learning_rate": 9.479343460051622e-06, "loss": 0.4058, "step": 10697 }, { "epoch": 0.49093662521224357, "grad_norm": 0.435988187789917, "learning_rate": 9.479234513044299e-06, "loss": 0.3259, "step": 10698 }, { "epoch": 0.49098251571749807, "grad_norm": 0.4549507796764374, "learning_rate": 9.479125555265829e-06, "loss": 0.3961, "step": 10699 }, { "epoch": 0.4910284062227525, "grad_norm": 0.46372732520103455, "learning_rate": 9.479016586716476e-06, "loss": 0.3804, "step": 10700 }, { "epoch": 0.49107429672800695, "grad_norm": 0.40998783707618713, "learning_rate": 9.478907607396504e-06, "loss": 0.3145, "step": 10701 }, { "epoch": 0.49112018723326145, "grad_norm": 0.4988030791282654, "learning_rate": 9.478798617306171e-06, "loss": 0.4894, "step": 10702 }, { "epoch": 0.4911660777385159, "grad_norm": 0.46360355615615845, "learning_rate": 9.47868961644574e-06, "loss": 0.3937, "step": 10703 }, { "epoch": 0.4912119682437704, "grad_norm": 0.5051340460777283, "learning_rate": 9.478580604815475e-06, "loss": 0.4093, "step": 10704 }, { "epoch": 0.49125785874902483, "grad_norm": 0.47017911076545715, "learning_rate": 9.478471582415638e-06, "loss": 0.4045, "step": 10705 }, { "epoch": 0.4913037492542793, "grad_norm": 0.469716340303421, "learning_rate": 9.478362549246488e-06, "loss": 0.4113, "step": 10706 }, { "epoch": 0.4913496397595338, "grad_norm": 0.4343683123588562, "learning_rate": 9.478253505308291e-06, "loss": 0.386, "step": 10707 }, { "epoch": 0.4913955302647882, "grad_norm": 0.4531431794166565, "learning_rate": 9.478144450601308e-06, "loss": 0.3852, "step": 10708 }, { "epoch": 0.49144142077004266, "grad_norm": 0.46602678298950195, "learning_rate": 9.478035385125802e-06, "loss": 0.4207, "step": 10709 }, { "epoch": 0.49148731127529716, "grad_norm": 0.4904780983924866, "learning_rate": 9.47792630888203e-06, "loss": 0.4395, "step": 10710 }, { "epoch": 0.4915332017805516, "grad_norm": 0.4122215509414673, "learning_rate": 9.477817221870263e-06, "loss": 0.3141, "step": 10711 }, { "epoch": 0.49157909228580604, "grad_norm": 0.4632011950016022, "learning_rate": 9.477708124090756e-06, "loss": 0.452, "step": 10712 }, { "epoch": 0.49162498279106054, "grad_norm": 0.4378023147583008, "learning_rate": 9.477599015543774e-06, "loss": 0.41, "step": 10713 }, { "epoch": 0.491670873296315, "grad_norm": 0.49732813239097595, "learning_rate": 9.477489896229583e-06, "loss": 0.4237, "step": 10714 }, { "epoch": 0.4917167638015695, "grad_norm": 0.4453243613243103, "learning_rate": 9.477380766148438e-06, "loss": 0.3801, "step": 10715 }, { "epoch": 0.4917626543068239, "grad_norm": 0.44639262557029724, "learning_rate": 9.477271625300608e-06, "loss": 0.3914, "step": 10716 }, { "epoch": 0.49180854481207836, "grad_norm": 0.45876771211624146, "learning_rate": 9.477162473686353e-06, "loss": 0.4365, "step": 10717 }, { "epoch": 0.49185443531733286, "grad_norm": 0.4284830689430237, "learning_rate": 9.477053311305932e-06, "loss": 0.3459, "step": 10718 }, { "epoch": 0.4919003258225873, "grad_norm": 0.4694904386997223, "learning_rate": 9.476944138159615e-06, "loss": 0.4294, "step": 10719 }, { "epoch": 0.49194621632784175, "grad_norm": 0.4418640434741974, "learning_rate": 9.476834954247657e-06, "loss": 0.3728, "step": 10720 }, { "epoch": 0.49199210683309624, "grad_norm": 0.47429221868515015, "learning_rate": 9.476725759570324e-06, "loss": 0.4423, "step": 10721 }, { "epoch": 0.4920379973383507, "grad_norm": 0.4893615245819092, "learning_rate": 9.47661655412788e-06, "loss": 0.5006, "step": 10722 }, { "epoch": 0.4920838878436052, "grad_norm": 0.48861318826675415, "learning_rate": 9.476507337920586e-06, "loss": 0.4628, "step": 10723 }, { "epoch": 0.4921297783488596, "grad_norm": 0.4213911294937134, "learning_rate": 9.476398110948703e-06, "loss": 0.336, "step": 10724 }, { "epoch": 0.49217566885411407, "grad_norm": 0.46832531690597534, "learning_rate": 9.476288873212498e-06, "loss": 0.4058, "step": 10725 }, { "epoch": 0.49222155935936857, "grad_norm": 0.4661771059036255, "learning_rate": 9.47617962471223e-06, "loss": 0.3812, "step": 10726 }, { "epoch": 0.492267449864623, "grad_norm": 0.46775874495506287, "learning_rate": 9.476070365448162e-06, "loss": 0.4248, "step": 10727 }, { "epoch": 0.49231334036987745, "grad_norm": 0.43332505226135254, "learning_rate": 9.475961095420557e-06, "loss": 0.3788, "step": 10728 }, { "epoch": 0.49235923087513195, "grad_norm": 0.4617195427417755, "learning_rate": 9.47585181462968e-06, "loss": 0.4286, "step": 10729 }, { "epoch": 0.4924051213803864, "grad_norm": 0.43549850583076477, "learning_rate": 9.47574252307579e-06, "loss": 0.3835, "step": 10730 }, { "epoch": 0.4924510118856409, "grad_norm": 0.46854934096336365, "learning_rate": 9.475633220759153e-06, "loss": 0.4168, "step": 10731 }, { "epoch": 0.49249690239089533, "grad_norm": 0.4645409882068634, "learning_rate": 9.47552390768003e-06, "loss": 0.3976, "step": 10732 }, { "epoch": 0.4925427928961498, "grad_norm": 0.47584426403045654, "learning_rate": 9.475414583838686e-06, "loss": 0.4163, "step": 10733 }, { "epoch": 0.49258868340140427, "grad_norm": 0.48771488666534424, "learning_rate": 9.47530524923538e-06, "loss": 0.4566, "step": 10734 }, { "epoch": 0.4926345739066587, "grad_norm": 0.5110533237457275, "learning_rate": 9.47519590387038e-06, "loss": 0.5134, "step": 10735 }, { "epoch": 0.49268046441191315, "grad_norm": 0.4774998128414154, "learning_rate": 9.475086547743945e-06, "loss": 0.4421, "step": 10736 }, { "epoch": 0.49272635491716765, "grad_norm": 0.4370076358318329, "learning_rate": 9.474977180856339e-06, "loss": 0.3821, "step": 10737 }, { "epoch": 0.4927722454224221, "grad_norm": 0.46614742279052734, "learning_rate": 9.474867803207826e-06, "loss": 0.3748, "step": 10738 }, { "epoch": 0.49281813592767654, "grad_norm": 0.42606380581855774, "learning_rate": 9.474758414798668e-06, "loss": 0.3502, "step": 10739 }, { "epoch": 0.49286402643293103, "grad_norm": 0.46804261207580566, "learning_rate": 9.474649015629126e-06, "loss": 0.3631, "step": 10740 }, { "epoch": 0.4929099169381855, "grad_norm": 0.48882976174354553, "learning_rate": 9.474539605699468e-06, "loss": 0.3939, "step": 10741 }, { "epoch": 0.49295580744344, "grad_norm": 0.4956061542034149, "learning_rate": 9.474430185009953e-06, "loss": 0.4319, "step": 10742 }, { "epoch": 0.4930016979486944, "grad_norm": 0.4858354330062866, "learning_rate": 9.474320753560849e-06, "loss": 0.4353, "step": 10743 }, { "epoch": 0.49304758845394886, "grad_norm": 0.4654906392097473, "learning_rate": 9.474211311352411e-06, "loss": 0.438, "step": 10744 }, { "epoch": 0.49309347895920336, "grad_norm": 0.4981384873390198, "learning_rate": 9.47410185838491e-06, "loss": 0.4793, "step": 10745 }, { "epoch": 0.4931393694644578, "grad_norm": 0.5082470774650574, "learning_rate": 9.473992394658604e-06, "loss": 0.3928, "step": 10746 }, { "epoch": 0.49318525996971224, "grad_norm": 0.5102579593658447, "learning_rate": 9.473882920173761e-06, "loss": 0.3757, "step": 10747 }, { "epoch": 0.49323115047496674, "grad_norm": 0.4583779573440552, "learning_rate": 9.473773434930639e-06, "loss": 0.4549, "step": 10748 }, { "epoch": 0.4932770409802212, "grad_norm": 0.471036821603775, "learning_rate": 9.473663938929504e-06, "loss": 0.3451, "step": 10749 }, { "epoch": 0.4933229314854757, "grad_norm": 0.43662160634994507, "learning_rate": 9.47355443217062e-06, "loss": 0.3752, "step": 10750 }, { "epoch": 0.4933688219907301, "grad_norm": 0.4624191224575043, "learning_rate": 9.47344491465425e-06, "loss": 0.3744, "step": 10751 }, { "epoch": 0.49341471249598456, "grad_norm": 0.4741438031196594, "learning_rate": 9.473335386380657e-06, "loss": 0.3788, "step": 10752 }, { "epoch": 0.49346060300123906, "grad_norm": 0.422494500875473, "learning_rate": 9.473225847350103e-06, "loss": 0.3237, "step": 10753 }, { "epoch": 0.4935064935064935, "grad_norm": 0.48873844742774963, "learning_rate": 9.473116297562854e-06, "loss": 0.4374, "step": 10754 }, { "epoch": 0.49355238401174795, "grad_norm": 0.5160603523254395, "learning_rate": 9.47300673701917e-06, "loss": 0.4394, "step": 10755 }, { "epoch": 0.49359827451700244, "grad_norm": 0.4478933811187744, "learning_rate": 9.472897165719318e-06, "loss": 0.3853, "step": 10756 }, { "epoch": 0.4936441650222569, "grad_norm": 0.4606477916240692, "learning_rate": 9.47278758366356e-06, "loss": 0.4096, "step": 10757 }, { "epoch": 0.4936900555275114, "grad_norm": 0.5313938856124878, "learning_rate": 9.472677990852159e-06, "loss": 0.4168, "step": 10758 }, { "epoch": 0.4937359460327658, "grad_norm": 0.4804835021495819, "learning_rate": 9.472568387285378e-06, "loss": 0.4234, "step": 10759 }, { "epoch": 0.49378183653802027, "grad_norm": 0.46497994661331177, "learning_rate": 9.472458772963485e-06, "loss": 0.4534, "step": 10760 }, { "epoch": 0.49382772704327477, "grad_norm": 0.4685143828392029, "learning_rate": 9.472349147886736e-06, "loss": 0.453, "step": 10761 }, { "epoch": 0.4938736175485292, "grad_norm": 0.43738800287246704, "learning_rate": 9.472239512055402e-06, "loss": 0.3224, "step": 10762 }, { "epoch": 0.49391950805378365, "grad_norm": 0.4840410351753235, "learning_rate": 9.472129865469742e-06, "loss": 0.3878, "step": 10763 }, { "epoch": 0.49396539855903815, "grad_norm": 0.44155141711235046, "learning_rate": 9.472020208130022e-06, "loss": 0.3661, "step": 10764 }, { "epoch": 0.4940112890642926, "grad_norm": 0.45112836360931396, "learning_rate": 9.471910540036503e-06, "loss": 0.374, "step": 10765 }, { "epoch": 0.49405717956954703, "grad_norm": 0.47623029351234436, "learning_rate": 9.471800861189453e-06, "loss": 0.4311, "step": 10766 }, { "epoch": 0.49410307007480153, "grad_norm": 0.4813067615032196, "learning_rate": 9.471691171589132e-06, "loss": 0.4031, "step": 10767 }, { "epoch": 0.494148960580056, "grad_norm": 0.47675156593322754, "learning_rate": 9.471581471235805e-06, "loss": 0.4343, "step": 10768 }, { "epoch": 0.49419485108531047, "grad_norm": 0.4491693377494812, "learning_rate": 9.471471760129735e-06, "loss": 0.433, "step": 10769 }, { "epoch": 0.4942407415905649, "grad_norm": 0.4628542363643646, "learning_rate": 9.47136203827119e-06, "loss": 0.456, "step": 10770 }, { "epoch": 0.49428663209581936, "grad_norm": 0.46257779002189636, "learning_rate": 9.471252305660429e-06, "loss": 0.43, "step": 10771 }, { "epoch": 0.49433252260107385, "grad_norm": 0.49147915840148926, "learning_rate": 9.471142562297714e-06, "loss": 0.4763, "step": 10772 }, { "epoch": 0.4943784131063283, "grad_norm": 0.4747050404548645, "learning_rate": 9.471032808183316e-06, "loss": 0.4369, "step": 10773 }, { "epoch": 0.49442430361158274, "grad_norm": 0.4625003933906555, "learning_rate": 9.470923043317493e-06, "loss": 0.3201, "step": 10774 }, { "epoch": 0.49447019411683724, "grad_norm": 0.48790210485458374, "learning_rate": 9.470813267700512e-06, "loss": 0.4353, "step": 10775 }, { "epoch": 0.4945160846220917, "grad_norm": 0.4906661808490753, "learning_rate": 9.470703481332638e-06, "loss": 0.4954, "step": 10776 }, { "epoch": 0.4945619751273462, "grad_norm": 0.5141158699989319, "learning_rate": 9.47059368421413e-06, "loss": 0.5236, "step": 10777 }, { "epoch": 0.4946078656326006, "grad_norm": 0.46352094411849976, "learning_rate": 9.470483876345256e-06, "loss": 0.4038, "step": 10778 }, { "epoch": 0.49465375613785506, "grad_norm": 0.4780634641647339, "learning_rate": 9.47037405772628e-06, "loss": 0.4054, "step": 10779 }, { "epoch": 0.49469964664310956, "grad_norm": 0.4802604615688324, "learning_rate": 9.470264228357465e-06, "loss": 0.4441, "step": 10780 }, { "epoch": 0.494745537148364, "grad_norm": 0.49443528056144714, "learning_rate": 9.470154388239076e-06, "loss": 0.5114, "step": 10781 }, { "epoch": 0.49479142765361844, "grad_norm": 0.4826546609401703, "learning_rate": 9.470044537371376e-06, "loss": 0.4254, "step": 10782 }, { "epoch": 0.49483731815887294, "grad_norm": 0.5457146167755127, "learning_rate": 9.469934675754629e-06, "loss": 0.4759, "step": 10783 }, { "epoch": 0.4948832086641274, "grad_norm": 0.47386622428894043, "learning_rate": 9.4698248033891e-06, "loss": 0.4428, "step": 10784 }, { "epoch": 0.4949290991693819, "grad_norm": 0.4439961910247803, "learning_rate": 9.469714920275055e-06, "loss": 0.3831, "step": 10785 }, { "epoch": 0.4949749896746363, "grad_norm": 0.4569987654685974, "learning_rate": 9.469605026412754e-06, "loss": 0.3715, "step": 10786 }, { "epoch": 0.49502088017989077, "grad_norm": 0.49769458174705505, "learning_rate": 9.469495121802463e-06, "loss": 0.5095, "step": 10787 }, { "epoch": 0.49506677068514526, "grad_norm": 0.5076336860656738, "learning_rate": 9.469385206444449e-06, "loss": 0.4933, "step": 10788 }, { "epoch": 0.4951126611903997, "grad_norm": 0.4258926808834076, "learning_rate": 9.469275280338974e-06, "loss": 0.333, "step": 10789 }, { "epoch": 0.49515855169565415, "grad_norm": 0.4661965072154999, "learning_rate": 9.469165343486302e-06, "loss": 0.389, "step": 10790 }, { "epoch": 0.49520444220090865, "grad_norm": 0.46809253096580505, "learning_rate": 9.469055395886697e-06, "loss": 0.4094, "step": 10791 }, { "epoch": 0.4952503327061631, "grad_norm": 0.4882303476333618, "learning_rate": 9.468945437540424e-06, "loss": 0.3824, "step": 10792 }, { "epoch": 0.49529622321141753, "grad_norm": 0.4730532765388489, "learning_rate": 9.46883546844775e-06, "loss": 0.3591, "step": 10793 }, { "epoch": 0.49534211371667203, "grad_norm": 0.48045167326927185, "learning_rate": 9.468725488608936e-06, "loss": 0.3237, "step": 10794 }, { "epoch": 0.49538800422192647, "grad_norm": 0.5047442317008972, "learning_rate": 9.468615498024247e-06, "loss": 0.4381, "step": 10795 }, { "epoch": 0.49543389472718097, "grad_norm": 0.49691057205200195, "learning_rate": 9.468505496693947e-06, "loss": 0.4573, "step": 10796 }, { "epoch": 0.4954797852324354, "grad_norm": 0.424167662858963, "learning_rate": 9.468395484618305e-06, "loss": 0.3622, "step": 10797 }, { "epoch": 0.49552567573768985, "grad_norm": 0.47169479727745056, "learning_rate": 9.46828546179758e-06, "loss": 0.3843, "step": 10798 }, { "epoch": 0.49557156624294435, "grad_norm": 0.5038683414459229, "learning_rate": 9.468175428232038e-06, "loss": 0.4184, "step": 10799 }, { "epoch": 0.4956174567481988, "grad_norm": 0.4351760745048523, "learning_rate": 9.468065383921945e-06, "loss": 0.342, "step": 10800 }, { "epoch": 0.49566334725345323, "grad_norm": 0.4330252408981323, "learning_rate": 9.467955328867564e-06, "loss": 0.3461, "step": 10801 }, { "epoch": 0.49570923775870773, "grad_norm": 0.4928853511810303, "learning_rate": 9.467845263069163e-06, "loss": 0.449, "step": 10802 }, { "epoch": 0.4957551282639622, "grad_norm": 0.43306809663772583, "learning_rate": 9.467735186527002e-06, "loss": 0.3363, "step": 10803 }, { "epoch": 0.4958010187692167, "grad_norm": 0.4502425193786621, "learning_rate": 9.467625099241349e-06, "loss": 0.3988, "step": 10804 }, { "epoch": 0.4958469092744711, "grad_norm": 0.44554266333580017, "learning_rate": 9.467515001212466e-06, "loss": 0.3805, "step": 10805 }, { "epoch": 0.49589279977972556, "grad_norm": 0.5150051712989807, "learning_rate": 9.467404892440621e-06, "loss": 0.5178, "step": 10806 }, { "epoch": 0.49593869028498005, "grad_norm": 0.4569566249847412, "learning_rate": 9.467294772926077e-06, "loss": 0.3887, "step": 10807 }, { "epoch": 0.4959845807902345, "grad_norm": 0.44354185461997986, "learning_rate": 9.467184642669099e-06, "loss": 0.3555, "step": 10808 }, { "epoch": 0.49603047129548894, "grad_norm": 0.46612823009490967, "learning_rate": 9.467074501669952e-06, "loss": 0.4138, "step": 10809 }, { "epoch": 0.49607636180074344, "grad_norm": 0.4554126560688019, "learning_rate": 9.4669643499289e-06, "loss": 0.3604, "step": 10810 }, { "epoch": 0.4961222523059979, "grad_norm": 0.43482455611228943, "learning_rate": 9.466854187446211e-06, "loss": 0.4205, "step": 10811 }, { "epoch": 0.4961681428112524, "grad_norm": 0.442158579826355, "learning_rate": 9.466744014222145e-06, "loss": 0.3966, "step": 10812 }, { "epoch": 0.4962140333165068, "grad_norm": 0.4627056121826172, "learning_rate": 9.466633830256969e-06, "loss": 0.4574, "step": 10813 }, { "epoch": 0.49625992382176126, "grad_norm": 0.46705663204193115, "learning_rate": 9.46652363555095e-06, "loss": 0.4315, "step": 10814 }, { "epoch": 0.49630581432701576, "grad_norm": 0.44923144578933716, "learning_rate": 9.46641343010435e-06, "loss": 0.3555, "step": 10815 }, { "epoch": 0.4963517048322702, "grad_norm": 0.4127691388130188, "learning_rate": 9.466303213917437e-06, "loss": 0.3065, "step": 10816 }, { "epoch": 0.49639759533752464, "grad_norm": 0.448308527469635, "learning_rate": 9.466192986990472e-06, "loss": 0.4083, "step": 10817 }, { "epoch": 0.49644348584277914, "grad_norm": 0.4670823812484741, "learning_rate": 9.466082749323724e-06, "loss": 0.4168, "step": 10818 }, { "epoch": 0.4964893763480336, "grad_norm": 0.42175057530403137, "learning_rate": 9.465972500917456e-06, "loss": 0.3077, "step": 10819 }, { "epoch": 0.4965352668532881, "grad_norm": 0.48070865869522095, "learning_rate": 9.465862241771932e-06, "loss": 0.4887, "step": 10820 }, { "epoch": 0.4965811573585425, "grad_norm": 0.43024805188179016, "learning_rate": 9.465751971887422e-06, "loss": 0.3409, "step": 10821 }, { "epoch": 0.49662704786379697, "grad_norm": 0.429681658744812, "learning_rate": 9.465641691264186e-06, "loss": 0.3514, "step": 10822 }, { "epoch": 0.49667293836905146, "grad_norm": 0.4429875314235687, "learning_rate": 9.465531399902492e-06, "loss": 0.3687, "step": 10823 }, { "epoch": 0.4967188288743059, "grad_norm": 0.5693091750144958, "learning_rate": 9.465421097802603e-06, "loss": 0.4011, "step": 10824 }, { "epoch": 0.49676471937956035, "grad_norm": 0.45220044255256653, "learning_rate": 9.465310784964787e-06, "loss": 0.3353, "step": 10825 }, { "epoch": 0.49681060988481485, "grad_norm": 0.5061473250389099, "learning_rate": 9.465200461389308e-06, "loss": 0.4995, "step": 10826 }, { "epoch": 0.4968565003900693, "grad_norm": 0.48667842149734497, "learning_rate": 9.46509012707643e-06, "loss": 0.412, "step": 10827 }, { "epoch": 0.49690239089532373, "grad_norm": 0.6104325652122498, "learning_rate": 9.46497978202642e-06, "loss": 0.3776, "step": 10828 }, { "epoch": 0.49694828140057823, "grad_norm": 0.4795345366001129, "learning_rate": 9.464869426239544e-06, "loss": 0.4681, "step": 10829 }, { "epoch": 0.49699417190583267, "grad_norm": 0.557748019695282, "learning_rate": 9.464759059716064e-06, "loss": 0.4729, "step": 10830 }, { "epoch": 0.49704006241108717, "grad_norm": 0.4785155951976776, "learning_rate": 9.46464868245625e-06, "loss": 0.4545, "step": 10831 }, { "epoch": 0.4970859529163416, "grad_norm": 0.47897031903266907, "learning_rate": 9.464538294460363e-06, "loss": 0.3708, "step": 10832 }, { "epoch": 0.49713184342159605, "grad_norm": 0.5082811117172241, "learning_rate": 9.464427895728672e-06, "loss": 0.5203, "step": 10833 }, { "epoch": 0.49717773392685055, "grad_norm": 0.435053288936615, "learning_rate": 9.464317486261439e-06, "loss": 0.3852, "step": 10834 }, { "epoch": 0.497223624432105, "grad_norm": 0.44608721137046814, "learning_rate": 9.464207066058933e-06, "loss": 0.3908, "step": 10835 }, { "epoch": 0.49726951493735944, "grad_norm": 0.4874432682991028, "learning_rate": 9.464096635121417e-06, "loss": 0.4701, "step": 10836 }, { "epoch": 0.49731540544261393, "grad_norm": 0.47423055768013, "learning_rate": 9.463986193449157e-06, "loss": 0.3958, "step": 10837 }, { "epoch": 0.4973612959478684, "grad_norm": 0.4569908082485199, "learning_rate": 9.463875741042421e-06, "loss": 0.3969, "step": 10838 }, { "epoch": 0.4974071864531229, "grad_norm": 0.4475950300693512, "learning_rate": 9.463765277901471e-06, "loss": 0.3766, "step": 10839 }, { "epoch": 0.4974530769583773, "grad_norm": 0.4636659324169159, "learning_rate": 9.463654804026575e-06, "loss": 0.4222, "step": 10840 }, { "epoch": 0.49749896746363176, "grad_norm": 0.4321160912513733, "learning_rate": 9.463544319417999e-06, "loss": 0.3752, "step": 10841 }, { "epoch": 0.49754485796888626, "grad_norm": 0.48935502767562866, "learning_rate": 9.463433824076007e-06, "loss": 0.4471, "step": 10842 }, { "epoch": 0.4975907484741407, "grad_norm": 0.46794259548187256, "learning_rate": 9.463323318000864e-06, "loss": 0.4017, "step": 10843 }, { "epoch": 0.49763663897939514, "grad_norm": 0.5219445824623108, "learning_rate": 9.463212801192838e-06, "loss": 0.5316, "step": 10844 }, { "epoch": 0.49768252948464964, "grad_norm": 0.4497554302215576, "learning_rate": 9.463102273652194e-06, "loss": 0.379, "step": 10845 }, { "epoch": 0.4977284199899041, "grad_norm": 0.4437973201274872, "learning_rate": 9.462991735379196e-06, "loss": 0.402, "step": 10846 }, { "epoch": 0.4977743104951586, "grad_norm": 0.4793253242969513, "learning_rate": 9.462881186374115e-06, "loss": 0.4369, "step": 10847 }, { "epoch": 0.497820201000413, "grad_norm": 0.46814313530921936, "learning_rate": 9.46277062663721e-06, "loss": 0.4732, "step": 10848 }, { "epoch": 0.49786609150566746, "grad_norm": 0.4704726040363312, "learning_rate": 9.462660056168753e-06, "loss": 0.4177, "step": 10849 }, { "epoch": 0.49791198201092196, "grad_norm": 0.47194790840148926, "learning_rate": 9.462549474969004e-06, "loss": 0.4517, "step": 10850 }, { "epoch": 0.4979578725161764, "grad_norm": 0.5083450078964233, "learning_rate": 9.462438883038234e-06, "loss": 0.3601, "step": 10851 }, { "epoch": 0.49800376302143085, "grad_norm": 0.4686483144760132, "learning_rate": 9.462328280376705e-06, "loss": 0.4115, "step": 10852 }, { "epoch": 0.49804965352668534, "grad_norm": 0.4468414783477783, "learning_rate": 9.462217666984687e-06, "loss": 0.3686, "step": 10853 }, { "epoch": 0.4980955440319398, "grad_norm": 0.4344910681247711, "learning_rate": 9.462107042862443e-06, "loss": 0.3288, "step": 10854 }, { "epoch": 0.4981414345371942, "grad_norm": 0.4448080062866211, "learning_rate": 9.461996408010239e-06, "loss": 0.403, "step": 10855 }, { "epoch": 0.4981873250424487, "grad_norm": 0.5039364099502563, "learning_rate": 9.461885762428343e-06, "loss": 0.4173, "step": 10856 }, { "epoch": 0.49823321554770317, "grad_norm": 0.4316632151603699, "learning_rate": 9.46177510611702e-06, "loss": 0.3294, "step": 10857 }, { "epoch": 0.49827910605295767, "grad_norm": 0.45275360345840454, "learning_rate": 9.461664439076534e-06, "loss": 0.3848, "step": 10858 }, { "epoch": 0.4983249965582121, "grad_norm": 0.45665857195854187, "learning_rate": 9.461553761307156e-06, "loss": 0.3552, "step": 10859 }, { "epoch": 0.49837088706346655, "grad_norm": 0.4466211199760437, "learning_rate": 9.461443072809146e-06, "loss": 0.412, "step": 10860 }, { "epoch": 0.49841677756872105, "grad_norm": 0.4732917249202728, "learning_rate": 9.461332373582776e-06, "loss": 0.4232, "step": 10861 }, { "epoch": 0.4984626680739755, "grad_norm": 0.5152777433395386, "learning_rate": 9.461221663628311e-06, "loss": 0.5253, "step": 10862 }, { "epoch": 0.49850855857922993, "grad_norm": 0.4891108572483063, "learning_rate": 9.461110942946014e-06, "loss": 0.4449, "step": 10863 }, { "epoch": 0.49855444908448443, "grad_norm": 0.4732539653778076, "learning_rate": 9.461000211536153e-06, "loss": 0.4368, "step": 10864 }, { "epoch": 0.4986003395897389, "grad_norm": 0.4699980318546295, "learning_rate": 9.460889469398994e-06, "loss": 0.414, "step": 10865 }, { "epoch": 0.49864623009499337, "grad_norm": 0.456589013338089, "learning_rate": 9.460778716534803e-06, "loss": 0.3833, "step": 10866 }, { "epoch": 0.4986921206002478, "grad_norm": 0.48536157608032227, "learning_rate": 9.46066795294385e-06, "loss": 0.4275, "step": 10867 }, { "epoch": 0.49873801110550225, "grad_norm": 0.4452584385871887, "learning_rate": 9.460557178626397e-06, "loss": 0.3514, "step": 10868 }, { "epoch": 0.49878390161075675, "grad_norm": 0.4469469487667084, "learning_rate": 9.460446393582711e-06, "loss": 0.3919, "step": 10869 }, { "epoch": 0.4988297921160112, "grad_norm": 0.5035808682441711, "learning_rate": 9.46033559781306e-06, "loss": 0.4527, "step": 10870 }, { "epoch": 0.49887568262126564, "grad_norm": 0.48639756441116333, "learning_rate": 9.460224791317709e-06, "loss": 0.4663, "step": 10871 }, { "epoch": 0.49892157312652013, "grad_norm": 0.5119649767875671, "learning_rate": 9.460113974096925e-06, "loss": 0.4516, "step": 10872 }, { "epoch": 0.4989674636317746, "grad_norm": 0.48175814747810364, "learning_rate": 9.460003146150975e-06, "loss": 0.3247, "step": 10873 }, { "epoch": 0.4990133541370291, "grad_norm": 0.5030218958854675, "learning_rate": 9.459892307480126e-06, "loss": 0.4418, "step": 10874 }, { "epoch": 0.4990592446422835, "grad_norm": 0.4917619228363037, "learning_rate": 9.459781458084642e-06, "loss": 0.4748, "step": 10875 }, { "epoch": 0.49910513514753796, "grad_norm": 0.44831913709640503, "learning_rate": 9.459670597964793e-06, "loss": 0.3791, "step": 10876 }, { "epoch": 0.49915102565279246, "grad_norm": 0.45432212948799133, "learning_rate": 9.459559727120842e-06, "loss": 0.3857, "step": 10877 }, { "epoch": 0.4991969161580469, "grad_norm": 0.4490716755390167, "learning_rate": 9.459448845553058e-06, "loss": 0.354, "step": 10878 }, { "epoch": 0.49924280666330134, "grad_norm": 0.45428746938705444, "learning_rate": 9.459337953261706e-06, "loss": 0.4084, "step": 10879 }, { "epoch": 0.49928869716855584, "grad_norm": 0.5144004821777344, "learning_rate": 9.459227050247055e-06, "loss": 0.4153, "step": 10880 }, { "epoch": 0.4993345876738103, "grad_norm": 0.4629228711128235, "learning_rate": 9.459116136509369e-06, "loss": 0.4218, "step": 10881 }, { "epoch": 0.4993804781790647, "grad_norm": 0.44414469599723816, "learning_rate": 9.459005212048917e-06, "loss": 0.3562, "step": 10882 }, { "epoch": 0.4994263686843192, "grad_norm": 0.4481891095638275, "learning_rate": 9.458894276865965e-06, "loss": 0.3429, "step": 10883 }, { "epoch": 0.49947225918957366, "grad_norm": 0.4644007086753845, "learning_rate": 9.458783330960779e-06, "loss": 0.3752, "step": 10884 }, { "epoch": 0.49951814969482816, "grad_norm": 0.43031781911849976, "learning_rate": 9.458672374333626e-06, "loss": 0.3614, "step": 10885 }, { "epoch": 0.4995640402000826, "grad_norm": 0.5275810360908508, "learning_rate": 9.458561406984774e-06, "loss": 0.4922, "step": 10886 }, { "epoch": 0.49960993070533705, "grad_norm": 0.4725988507270813, "learning_rate": 9.45845042891449e-06, "loss": 0.4443, "step": 10887 }, { "epoch": 0.49965582121059154, "grad_norm": 0.4356434941291809, "learning_rate": 9.458339440123036e-06, "loss": 0.3544, "step": 10888 }, { "epoch": 0.499701711715846, "grad_norm": 0.466789573431015, "learning_rate": 9.458228440610686e-06, "loss": 0.4302, "step": 10889 }, { "epoch": 0.49974760222110043, "grad_norm": 0.45808231830596924, "learning_rate": 9.458117430377703e-06, "loss": 0.3713, "step": 10890 }, { "epoch": 0.4997934927263549, "grad_norm": 0.5023537278175354, "learning_rate": 9.458006409424354e-06, "loss": 0.491, "step": 10891 }, { "epoch": 0.49983938323160937, "grad_norm": 0.4618149995803833, "learning_rate": 9.457895377750907e-06, "loss": 0.431, "step": 10892 }, { "epoch": 0.49988527373686387, "grad_norm": 0.4783206880092621, "learning_rate": 9.457784335357629e-06, "loss": 0.4146, "step": 10893 }, { "epoch": 0.4999311642421183, "grad_norm": 0.4852701723575592, "learning_rate": 9.457673282244785e-06, "loss": 0.4225, "step": 10894 }, { "epoch": 0.49997705474737275, "grad_norm": 0.4660445749759674, "learning_rate": 9.457562218412646e-06, "loss": 0.3648, "step": 10895 }, { "epoch": 0.5000229452526272, "grad_norm": 0.44535401463508606, "learning_rate": 9.457451143861477e-06, "loss": 0.3388, "step": 10896 }, { "epoch": 0.5000688357578817, "grad_norm": 0.4588528573513031, "learning_rate": 9.457340058591542e-06, "loss": 0.3814, "step": 10897 }, { "epoch": 0.5001147262631361, "grad_norm": 0.4969598948955536, "learning_rate": 9.457228962603112e-06, "loss": 0.4511, "step": 10898 }, { "epoch": 0.5001606167683906, "grad_norm": 0.4603307843208313, "learning_rate": 9.457117855896454e-06, "loss": 0.4057, "step": 10899 }, { "epoch": 0.5002065072736451, "grad_norm": 0.4576098322868347, "learning_rate": 9.457006738471832e-06, "loss": 0.4008, "step": 10900 }, { "epoch": 0.5002523977788995, "grad_norm": 0.45939192175865173, "learning_rate": 9.456895610329518e-06, "loss": 0.3751, "step": 10901 }, { "epoch": 0.500298288284154, "grad_norm": 0.49289727210998535, "learning_rate": 9.456784471469775e-06, "loss": 0.4668, "step": 10902 }, { "epoch": 0.5003441787894085, "grad_norm": 0.4463818371295929, "learning_rate": 9.456673321892872e-06, "loss": 0.3387, "step": 10903 }, { "epoch": 0.5003900692946629, "grad_norm": 0.4723441004753113, "learning_rate": 9.456562161599078e-06, "loss": 0.4563, "step": 10904 }, { "epoch": 0.5004359597999174, "grad_norm": 0.4962775707244873, "learning_rate": 9.456450990588657e-06, "loss": 0.4172, "step": 10905 }, { "epoch": 0.5004818503051719, "grad_norm": 0.46568378806114197, "learning_rate": 9.456339808861878e-06, "loss": 0.3816, "step": 10906 }, { "epoch": 0.5005277408104263, "grad_norm": 0.47389382123947144, "learning_rate": 9.45622861641901e-06, "loss": 0.4341, "step": 10907 }, { "epoch": 0.5005736313156808, "grad_norm": 0.4549764394760132, "learning_rate": 9.456117413260316e-06, "loss": 0.4217, "step": 10908 }, { "epoch": 0.5006195218209353, "grad_norm": 0.44340980052948, "learning_rate": 9.456006199386067e-06, "loss": 0.3703, "step": 10909 }, { "epoch": 0.5006654123261897, "grad_norm": 0.5011385083198547, "learning_rate": 9.45589497479653e-06, "loss": 0.4669, "step": 10910 }, { "epoch": 0.5007113028314442, "grad_norm": 0.49119576811790466, "learning_rate": 9.455783739491971e-06, "loss": 0.4572, "step": 10911 }, { "epoch": 0.5007571933366987, "grad_norm": 0.46003612875938416, "learning_rate": 9.455672493472659e-06, "loss": 0.3605, "step": 10912 }, { "epoch": 0.500803083841953, "grad_norm": 0.4600478410720825, "learning_rate": 9.455561236738861e-06, "loss": 0.3545, "step": 10913 }, { "epoch": 0.5008489743472075, "grad_norm": 0.4378669559955597, "learning_rate": 9.455449969290844e-06, "loss": 0.3451, "step": 10914 }, { "epoch": 0.500894864852462, "grad_norm": 0.4849533438682556, "learning_rate": 9.455338691128878e-06, "loss": 0.4146, "step": 10915 }, { "epoch": 0.5009407553577165, "grad_norm": 0.5508562922477722, "learning_rate": 9.455227402253225e-06, "loss": 0.5187, "step": 10916 }, { "epoch": 0.5009866458629709, "grad_norm": 0.4776171147823334, "learning_rate": 9.45511610266416e-06, "loss": 0.3663, "step": 10917 }, { "epoch": 0.5010325363682254, "grad_norm": 0.474141001701355, "learning_rate": 9.455004792361944e-06, "loss": 0.3803, "step": 10918 }, { "epoch": 0.5010784268734799, "grad_norm": 0.4400337040424347, "learning_rate": 9.454893471346848e-06, "loss": 0.3934, "step": 10919 }, { "epoch": 0.5011243173787343, "grad_norm": 0.46708279848098755, "learning_rate": 9.454782139619138e-06, "loss": 0.4168, "step": 10920 }, { "epoch": 0.5011702078839888, "grad_norm": 0.4862530827522278, "learning_rate": 9.454670797179086e-06, "loss": 0.4147, "step": 10921 }, { "epoch": 0.5012160983892433, "grad_norm": 0.46721550822257996, "learning_rate": 9.454559444026955e-06, "loss": 0.4531, "step": 10922 }, { "epoch": 0.5012619888944977, "grad_norm": 0.44835421442985535, "learning_rate": 9.454448080163015e-06, "loss": 0.3639, "step": 10923 }, { "epoch": 0.5013078793997522, "grad_norm": 0.43290725350379944, "learning_rate": 9.454336705587532e-06, "loss": 0.3455, "step": 10924 }, { "epoch": 0.5013537699050067, "grad_norm": 0.45366838574409485, "learning_rate": 9.454225320300776e-06, "loss": 0.4542, "step": 10925 }, { "epoch": 0.5013996604102611, "grad_norm": 0.4568560719490051, "learning_rate": 9.454113924303015e-06, "loss": 0.4013, "step": 10926 }, { "epoch": 0.5014455509155156, "grad_norm": 0.450371116399765, "learning_rate": 9.454002517594514e-06, "loss": 0.3984, "step": 10927 }, { "epoch": 0.5014914414207701, "grad_norm": 0.6074205040931702, "learning_rate": 9.453891100175542e-06, "loss": 0.3823, "step": 10928 }, { "epoch": 0.5015373319260245, "grad_norm": 0.4859667122364044, "learning_rate": 9.45377967204637e-06, "loss": 0.4463, "step": 10929 }, { "epoch": 0.501583222431279, "grad_norm": 0.4499225914478302, "learning_rate": 9.453668233207262e-06, "loss": 0.3986, "step": 10930 }, { "epoch": 0.5016291129365335, "grad_norm": 0.4626608192920685, "learning_rate": 9.453556783658488e-06, "loss": 0.39, "step": 10931 }, { "epoch": 0.501675003441788, "grad_norm": 0.4317147731781006, "learning_rate": 9.453445323400315e-06, "loss": 0.3243, "step": 10932 }, { "epoch": 0.5017208939470423, "grad_norm": 0.4683949947357178, "learning_rate": 9.453333852433013e-06, "loss": 0.4438, "step": 10933 }, { "epoch": 0.5017667844522968, "grad_norm": 0.45582839846611023, "learning_rate": 9.453222370756848e-06, "loss": 0.3785, "step": 10934 }, { "epoch": 0.5018126749575513, "grad_norm": 0.45699912309646606, "learning_rate": 9.453110878372087e-06, "loss": 0.3962, "step": 10935 }, { "epoch": 0.5018585654628057, "grad_norm": 0.4745451509952545, "learning_rate": 9.452999375279002e-06, "loss": 0.4242, "step": 10936 }, { "epoch": 0.5019044559680602, "grad_norm": 0.4583762288093567, "learning_rate": 9.452887861477858e-06, "loss": 0.4034, "step": 10937 }, { "epoch": 0.5019503464733147, "grad_norm": 0.4873026907444, "learning_rate": 9.452776336968924e-06, "loss": 0.4433, "step": 10938 }, { "epoch": 0.5019962369785691, "grad_norm": 0.46590790152549744, "learning_rate": 9.452664801752467e-06, "loss": 0.373, "step": 10939 }, { "epoch": 0.5020421274838236, "grad_norm": 0.44053980708122253, "learning_rate": 9.452553255828758e-06, "loss": 0.3849, "step": 10940 }, { "epoch": 0.5020880179890781, "grad_norm": 0.4903087317943573, "learning_rate": 9.452441699198063e-06, "loss": 0.4812, "step": 10941 }, { "epoch": 0.5021339084943325, "grad_norm": 0.4747794568538666, "learning_rate": 9.452330131860653e-06, "loss": 0.3791, "step": 10942 }, { "epoch": 0.502179798999587, "grad_norm": 0.43665972352027893, "learning_rate": 9.45221855381679e-06, "loss": 0.3842, "step": 10943 }, { "epoch": 0.5022256895048415, "grad_norm": 0.45434272289276123, "learning_rate": 9.45210696506675e-06, "loss": 0.3805, "step": 10944 }, { "epoch": 0.5022715800100959, "grad_norm": 0.46483394503593445, "learning_rate": 9.451995365610797e-06, "loss": 0.4074, "step": 10945 }, { "epoch": 0.5023174705153504, "grad_norm": 0.44043317437171936, "learning_rate": 9.4518837554492e-06, "loss": 0.3921, "step": 10946 }, { "epoch": 0.5023633610206049, "grad_norm": 0.5069910883903503, "learning_rate": 9.451772134582229e-06, "loss": 0.5197, "step": 10947 }, { "epoch": 0.5024092515258592, "grad_norm": 0.5411222577095032, "learning_rate": 9.45166050301015e-06, "loss": 0.5132, "step": 10948 }, { "epoch": 0.5024551420311137, "grad_norm": 0.4758906364440918, "learning_rate": 9.451548860733231e-06, "loss": 0.379, "step": 10949 }, { "epoch": 0.5025010325363682, "grad_norm": 0.46093040704727173, "learning_rate": 9.451437207751744e-06, "loss": 0.3877, "step": 10950 }, { "epoch": 0.5025469230416227, "grad_norm": 0.4234349727630615, "learning_rate": 9.451325544065954e-06, "loss": 0.3541, "step": 10951 }, { "epoch": 0.5025928135468771, "grad_norm": 0.4991777837276459, "learning_rate": 9.451213869676133e-06, "loss": 0.4991, "step": 10952 }, { "epoch": 0.5026387040521316, "grad_norm": 0.4620932340621948, "learning_rate": 9.451102184582546e-06, "loss": 0.4213, "step": 10953 }, { "epoch": 0.5026845945573861, "grad_norm": 0.4639440178871155, "learning_rate": 9.450990488785464e-06, "loss": 0.4008, "step": 10954 }, { "epoch": 0.5027304850626405, "grad_norm": 0.48175880312919617, "learning_rate": 9.450878782285152e-06, "loss": 0.4694, "step": 10955 }, { "epoch": 0.502776375567895, "grad_norm": 0.46049076318740845, "learning_rate": 9.450767065081882e-06, "loss": 0.3939, "step": 10956 }, { "epoch": 0.5028222660731495, "grad_norm": 0.4564223289489746, "learning_rate": 9.450655337175925e-06, "loss": 0.3992, "step": 10957 }, { "epoch": 0.5028681565784039, "grad_norm": 0.4506520926952362, "learning_rate": 9.450543598567545e-06, "loss": 0.3645, "step": 10958 }, { "epoch": 0.5029140470836584, "grad_norm": 0.45811915397644043, "learning_rate": 9.450431849257012e-06, "loss": 0.4137, "step": 10959 }, { "epoch": 0.5029599375889129, "grad_norm": 0.49642693996429443, "learning_rate": 9.450320089244594e-06, "loss": 0.5011, "step": 10960 }, { "epoch": 0.5030058280941673, "grad_norm": 0.4784826636314392, "learning_rate": 9.450208318530562e-06, "loss": 0.4272, "step": 10961 }, { "epoch": 0.5030517185994218, "grad_norm": 0.4685262441635132, "learning_rate": 9.450096537115183e-06, "loss": 0.4068, "step": 10962 }, { "epoch": 0.5030976091046763, "grad_norm": 0.48505303263664246, "learning_rate": 9.449984744998725e-06, "loss": 0.4724, "step": 10963 }, { "epoch": 0.5031434996099307, "grad_norm": 0.47323817014694214, "learning_rate": 9.44987294218146e-06, "loss": 0.4041, "step": 10964 }, { "epoch": 0.5031893901151852, "grad_norm": 0.446487158536911, "learning_rate": 9.449761128663655e-06, "loss": 0.3674, "step": 10965 }, { "epoch": 0.5032352806204397, "grad_norm": 0.48941147327423096, "learning_rate": 9.449649304445579e-06, "loss": 0.4872, "step": 10966 }, { "epoch": 0.5032811711256941, "grad_norm": 0.41921108961105347, "learning_rate": 9.4495374695275e-06, "loss": 0.2918, "step": 10967 }, { "epoch": 0.5033270616309485, "grad_norm": 0.47301679849624634, "learning_rate": 9.449425623909686e-06, "loss": 0.4287, "step": 10968 }, { "epoch": 0.503372952136203, "grad_norm": 0.4526398181915283, "learning_rate": 9.449313767592412e-06, "loss": 0.3616, "step": 10969 }, { "epoch": 0.5034188426414575, "grad_norm": 0.45543360710144043, "learning_rate": 9.44920190057594e-06, "loss": 0.3561, "step": 10970 }, { "epoch": 0.5034647331467119, "grad_norm": 0.4691105782985687, "learning_rate": 9.44909002286054e-06, "loss": 0.4069, "step": 10971 }, { "epoch": 0.5035106236519664, "grad_norm": 0.4809856116771698, "learning_rate": 9.448978134446486e-06, "loss": 0.4643, "step": 10972 }, { "epoch": 0.5035565141572209, "grad_norm": 0.49662455916404724, "learning_rate": 9.448866235334043e-06, "loss": 0.4942, "step": 10973 }, { "epoch": 0.5036024046624753, "grad_norm": 0.4130520820617676, "learning_rate": 9.448754325523479e-06, "loss": 0.3395, "step": 10974 }, { "epoch": 0.5036482951677298, "grad_norm": 0.5070728659629822, "learning_rate": 9.448642405015066e-06, "loss": 0.5039, "step": 10975 }, { "epoch": 0.5036941856729843, "grad_norm": 0.41832247376441956, "learning_rate": 9.448530473809072e-06, "loss": 0.3049, "step": 10976 }, { "epoch": 0.5037400761782387, "grad_norm": 0.45334652066230774, "learning_rate": 9.448418531905766e-06, "loss": 0.3939, "step": 10977 }, { "epoch": 0.5037859666834932, "grad_norm": 0.4927808940410614, "learning_rate": 9.448306579305418e-06, "loss": 0.4129, "step": 10978 }, { "epoch": 0.5038318571887477, "grad_norm": 0.4754318594932556, "learning_rate": 9.448194616008296e-06, "loss": 0.4267, "step": 10979 }, { "epoch": 0.5038777476940021, "grad_norm": 0.4737919569015503, "learning_rate": 9.44808264201467e-06, "loss": 0.4352, "step": 10980 }, { "epoch": 0.5039236381992566, "grad_norm": 0.4747816026210785, "learning_rate": 9.447970657324809e-06, "loss": 0.4402, "step": 10981 }, { "epoch": 0.5039695287045111, "grad_norm": 0.462495893239975, "learning_rate": 9.447858661938982e-06, "loss": 0.3933, "step": 10982 }, { "epoch": 0.5040154192097654, "grad_norm": 0.44550761580467224, "learning_rate": 9.447746655857459e-06, "loss": 0.3714, "step": 10983 }, { "epoch": 0.50406130971502, "grad_norm": 0.4744863510131836, "learning_rate": 9.44763463908051e-06, "loss": 0.4387, "step": 10984 }, { "epoch": 0.5041072002202744, "grad_norm": 0.49458232522010803, "learning_rate": 9.447522611608404e-06, "loss": 0.4542, "step": 10985 }, { "epoch": 0.5041530907255289, "grad_norm": 0.5333617329597473, "learning_rate": 9.447410573441408e-06, "loss": 0.4722, "step": 10986 }, { "epoch": 0.5041989812307833, "grad_norm": 0.4457358121871948, "learning_rate": 9.447298524579793e-06, "loss": 0.3642, "step": 10987 }, { "epoch": 0.5042448717360378, "grad_norm": 0.4825538694858551, "learning_rate": 9.447186465023829e-06, "loss": 0.4365, "step": 10988 }, { "epoch": 0.5042907622412923, "grad_norm": 0.45974797010421753, "learning_rate": 9.447074394773786e-06, "loss": 0.4263, "step": 10989 }, { "epoch": 0.5043366527465467, "grad_norm": 0.4827609360218048, "learning_rate": 9.446962313829933e-06, "loss": 0.4573, "step": 10990 }, { "epoch": 0.5043825432518012, "grad_norm": 0.44754886627197266, "learning_rate": 9.446850222192537e-06, "loss": 0.3979, "step": 10991 }, { "epoch": 0.5044284337570557, "grad_norm": 0.4962870180606842, "learning_rate": 9.446738119861872e-06, "loss": 0.4441, "step": 10992 }, { "epoch": 0.5044743242623101, "grad_norm": 0.5338955521583557, "learning_rate": 9.446626006838203e-06, "loss": 0.488, "step": 10993 }, { "epoch": 0.5045202147675646, "grad_norm": 0.44123077392578125, "learning_rate": 9.446513883121805e-06, "loss": 0.401, "step": 10994 }, { "epoch": 0.5045661052728191, "grad_norm": 0.42539265751838684, "learning_rate": 9.446401748712942e-06, "loss": 0.3241, "step": 10995 }, { "epoch": 0.5046119957780735, "grad_norm": 0.47869786620140076, "learning_rate": 9.446289603611887e-06, "loss": 0.3962, "step": 10996 }, { "epoch": 0.504657886283328, "grad_norm": 0.4272017478942871, "learning_rate": 9.446177447818908e-06, "loss": 0.3219, "step": 10997 }, { "epoch": 0.5047037767885825, "grad_norm": 0.4733532965183258, "learning_rate": 9.446065281334275e-06, "loss": 0.4628, "step": 10998 }, { "epoch": 0.5047496672938369, "grad_norm": 0.4485187530517578, "learning_rate": 9.445953104158258e-06, "loss": 0.3655, "step": 10999 }, { "epoch": 0.5047955577990914, "grad_norm": 0.4721904695034027, "learning_rate": 9.44584091629113e-06, "loss": 0.4482, "step": 11000 }, { "epoch": 0.5048414483043459, "grad_norm": 0.45469409227371216, "learning_rate": 9.445728717733156e-06, "loss": 0.4143, "step": 11001 }, { "epoch": 0.5048873388096002, "grad_norm": 0.4424894452095032, "learning_rate": 9.445616508484607e-06, "loss": 0.3927, "step": 11002 }, { "epoch": 0.5049332293148547, "grad_norm": 0.4687498211860657, "learning_rate": 9.445504288545753e-06, "loss": 0.4065, "step": 11003 }, { "epoch": 0.5049791198201092, "grad_norm": 0.46504124999046326, "learning_rate": 9.445392057916865e-06, "loss": 0.3621, "step": 11004 }, { "epoch": 0.5050250103253637, "grad_norm": 0.4430292844772339, "learning_rate": 9.445279816598212e-06, "loss": 0.3743, "step": 11005 }, { "epoch": 0.5050709008306181, "grad_norm": 0.4403539299964905, "learning_rate": 9.445167564590065e-06, "loss": 0.3877, "step": 11006 }, { "epoch": 0.5051167913358726, "grad_norm": 0.5480190515518188, "learning_rate": 9.445055301892692e-06, "loss": 0.5021, "step": 11007 }, { "epoch": 0.5051626818411271, "grad_norm": 0.4245948791503906, "learning_rate": 9.444943028506363e-06, "loss": 0.3349, "step": 11008 }, { "epoch": 0.5052085723463815, "grad_norm": 0.5123680233955383, "learning_rate": 9.44483074443135e-06, "loss": 0.5073, "step": 11009 }, { "epoch": 0.505254462851636, "grad_norm": 0.4700130820274353, "learning_rate": 9.444718449667922e-06, "loss": 0.4152, "step": 11010 }, { "epoch": 0.5053003533568905, "grad_norm": 0.4817167818546295, "learning_rate": 9.444606144216349e-06, "loss": 0.4252, "step": 11011 }, { "epoch": 0.5053462438621449, "grad_norm": 0.46350380778312683, "learning_rate": 9.4444938280769e-06, "loss": 0.3156, "step": 11012 }, { "epoch": 0.5053921343673994, "grad_norm": 0.5053478479385376, "learning_rate": 9.444381501249846e-06, "loss": 0.4161, "step": 11013 }, { "epoch": 0.5054380248726539, "grad_norm": 0.47810742259025574, "learning_rate": 9.444269163735459e-06, "loss": 0.4633, "step": 11014 }, { "epoch": 0.5054839153779083, "grad_norm": 0.4782871901988983, "learning_rate": 9.444156815534006e-06, "loss": 0.4407, "step": 11015 }, { "epoch": 0.5055298058831628, "grad_norm": 0.4952266216278076, "learning_rate": 9.444044456645758e-06, "loss": 0.4181, "step": 11016 }, { "epoch": 0.5055756963884173, "grad_norm": 0.41617321968078613, "learning_rate": 9.443932087070988e-06, "loss": 0.3305, "step": 11017 }, { "epoch": 0.5056215868936716, "grad_norm": 0.4508669376373291, "learning_rate": 9.443819706809961e-06, "loss": 0.3839, "step": 11018 }, { "epoch": 0.5056674773989261, "grad_norm": 0.4599497616291046, "learning_rate": 9.443707315862952e-06, "loss": 0.3817, "step": 11019 }, { "epoch": 0.5057133679041806, "grad_norm": 1.0045057535171509, "learning_rate": 9.443594914230228e-06, "loss": 0.4503, "step": 11020 }, { "epoch": 0.5057592584094351, "grad_norm": 0.4739968478679657, "learning_rate": 9.44348250191206e-06, "loss": 0.4313, "step": 11021 }, { "epoch": 0.5058051489146895, "grad_norm": 0.47547319531440735, "learning_rate": 9.443370078908722e-06, "loss": 0.3381, "step": 11022 }, { "epoch": 0.505851039419944, "grad_norm": 0.4720299541950226, "learning_rate": 9.443257645220479e-06, "loss": 0.3905, "step": 11023 }, { "epoch": 0.5058969299251985, "grad_norm": 0.44392523169517517, "learning_rate": 9.443145200847605e-06, "loss": 0.3638, "step": 11024 }, { "epoch": 0.5059428204304529, "grad_norm": 0.5037426948547363, "learning_rate": 9.443032745790369e-06, "loss": 0.4775, "step": 11025 }, { "epoch": 0.5059887109357074, "grad_norm": 0.49098989367485046, "learning_rate": 9.44292028004904e-06, "loss": 0.4033, "step": 11026 }, { "epoch": 0.5060346014409619, "grad_norm": 0.46203309297561646, "learning_rate": 9.44280780362389e-06, "loss": 0.4478, "step": 11027 }, { "epoch": 0.5060804919462163, "grad_norm": 0.4806912839412689, "learning_rate": 9.44269531651519e-06, "loss": 0.4354, "step": 11028 }, { "epoch": 0.5061263824514708, "grad_norm": 0.4335838556289673, "learning_rate": 9.442582818723211e-06, "loss": 0.3283, "step": 11029 }, { "epoch": 0.5061722729567253, "grad_norm": 0.46188610792160034, "learning_rate": 9.442470310248222e-06, "loss": 0.4047, "step": 11030 }, { "epoch": 0.5062181634619797, "grad_norm": 0.42918816208839417, "learning_rate": 9.442357791090494e-06, "loss": 0.3459, "step": 11031 }, { "epoch": 0.5062640539672342, "grad_norm": 0.5138610601425171, "learning_rate": 9.442245261250298e-06, "loss": 0.5251, "step": 11032 }, { "epoch": 0.5063099444724887, "grad_norm": 0.5044010877609253, "learning_rate": 9.442132720727904e-06, "loss": 0.477, "step": 11033 }, { "epoch": 0.5063558349777431, "grad_norm": 0.556893527507782, "learning_rate": 9.44202016952358e-06, "loss": 0.5587, "step": 11034 }, { "epoch": 0.5064017254829976, "grad_norm": 0.5577366948127747, "learning_rate": 9.441907607637602e-06, "loss": 0.5195, "step": 11035 }, { "epoch": 0.506447615988252, "grad_norm": 0.4409179389476776, "learning_rate": 9.441795035070238e-06, "loss": 0.3331, "step": 11036 }, { "epoch": 0.5064935064935064, "grad_norm": 0.49706846475601196, "learning_rate": 9.441682451821759e-06, "loss": 0.4677, "step": 11037 }, { "epoch": 0.5065393969987609, "grad_norm": 0.4645450711250305, "learning_rate": 9.441569857892435e-06, "loss": 0.4131, "step": 11038 }, { "epoch": 0.5065852875040154, "grad_norm": 0.501076340675354, "learning_rate": 9.441457253282538e-06, "loss": 0.4945, "step": 11039 }, { "epoch": 0.5066311780092699, "grad_norm": 0.5052675008773804, "learning_rate": 9.441344637992337e-06, "loss": 0.4626, "step": 11040 }, { "epoch": 0.5066770685145243, "grad_norm": 0.4847705662250519, "learning_rate": 9.441232012022104e-06, "loss": 0.4113, "step": 11041 }, { "epoch": 0.5067229590197788, "grad_norm": 0.4488966464996338, "learning_rate": 9.441119375372109e-06, "loss": 0.3954, "step": 11042 }, { "epoch": 0.5067688495250333, "grad_norm": 0.45276081562042236, "learning_rate": 9.441006728042626e-06, "loss": 0.3949, "step": 11043 }, { "epoch": 0.5068147400302877, "grad_norm": 0.43426671624183655, "learning_rate": 9.44089407003392e-06, "loss": 0.3778, "step": 11044 }, { "epoch": 0.5068606305355422, "grad_norm": 0.5115455985069275, "learning_rate": 9.440781401346268e-06, "loss": 0.4712, "step": 11045 }, { "epoch": 0.5069065210407967, "grad_norm": 0.45821547508239746, "learning_rate": 9.440668721979939e-06, "loss": 0.4115, "step": 11046 }, { "epoch": 0.5069524115460511, "grad_norm": 0.5105917453765869, "learning_rate": 9.440556031935202e-06, "loss": 0.4245, "step": 11047 }, { "epoch": 0.5069983020513056, "grad_norm": 0.4881758987903595, "learning_rate": 9.440443331212328e-06, "loss": 0.4939, "step": 11048 }, { "epoch": 0.5070441925565601, "grad_norm": 0.4476834535598755, "learning_rate": 9.44033061981159e-06, "loss": 0.4147, "step": 11049 }, { "epoch": 0.5070900830618145, "grad_norm": 0.4782133102416992, "learning_rate": 9.44021789773326e-06, "loss": 0.4147, "step": 11050 }, { "epoch": 0.507135973567069, "grad_norm": 0.4676351845264435, "learning_rate": 9.440105164977605e-06, "loss": 0.4056, "step": 11051 }, { "epoch": 0.5071818640723235, "grad_norm": 0.44782114028930664, "learning_rate": 9.439992421544899e-06, "loss": 0.3467, "step": 11052 }, { "epoch": 0.5072277545775778, "grad_norm": 0.4424588978290558, "learning_rate": 9.439879667435415e-06, "loss": 0.4033, "step": 11053 }, { "epoch": 0.5072736450828323, "grad_norm": 0.48231878876686096, "learning_rate": 9.43976690264942e-06, "loss": 0.4728, "step": 11054 }, { "epoch": 0.5073195355880868, "grad_norm": 0.4335916340351105, "learning_rate": 9.439654127187184e-06, "loss": 0.4288, "step": 11055 }, { "epoch": 0.5073654260933413, "grad_norm": 0.4402496814727783, "learning_rate": 9.439541341048984e-06, "loss": 0.3749, "step": 11056 }, { "epoch": 0.5074113165985957, "grad_norm": 0.45048797130584717, "learning_rate": 9.439428544235087e-06, "loss": 0.3642, "step": 11057 }, { "epoch": 0.5074572071038502, "grad_norm": 0.518036425113678, "learning_rate": 9.439315736745767e-06, "loss": 0.4899, "step": 11058 }, { "epoch": 0.5075030976091047, "grad_norm": 0.4576689600944519, "learning_rate": 9.439202918581293e-06, "loss": 0.4694, "step": 11059 }, { "epoch": 0.5075489881143591, "grad_norm": 0.45883145928382874, "learning_rate": 9.439090089741936e-06, "loss": 0.4037, "step": 11060 }, { "epoch": 0.5075948786196136, "grad_norm": 0.4919106662273407, "learning_rate": 9.43897725022797e-06, "loss": 0.5226, "step": 11061 }, { "epoch": 0.5076407691248681, "grad_norm": 0.47049248218536377, "learning_rate": 9.438864400039663e-06, "loss": 0.423, "step": 11062 }, { "epoch": 0.5076866596301225, "grad_norm": 0.43477338552474976, "learning_rate": 9.438751539177289e-06, "loss": 0.3594, "step": 11063 }, { "epoch": 0.507732550135377, "grad_norm": 0.4439249634742737, "learning_rate": 9.438638667641119e-06, "loss": 0.3314, "step": 11064 }, { "epoch": 0.5077784406406315, "grad_norm": 0.4799375832080841, "learning_rate": 9.438525785431422e-06, "loss": 0.4011, "step": 11065 }, { "epoch": 0.5078243311458859, "grad_norm": 0.4653036296367645, "learning_rate": 9.438412892548475e-06, "loss": 0.3585, "step": 11066 }, { "epoch": 0.5078702216511404, "grad_norm": 0.4222617447376251, "learning_rate": 9.438299988992543e-06, "loss": 0.3557, "step": 11067 }, { "epoch": 0.5079161121563949, "grad_norm": 0.48153528571128845, "learning_rate": 9.4381870747639e-06, "loss": 0.4467, "step": 11068 }, { "epoch": 0.5079620026616493, "grad_norm": 0.48363277316093445, "learning_rate": 9.43807414986282e-06, "loss": 0.4085, "step": 11069 }, { "epoch": 0.5080078931669038, "grad_norm": 0.441182941198349, "learning_rate": 9.437961214289571e-06, "loss": 0.3777, "step": 11070 }, { "epoch": 0.5080537836721583, "grad_norm": 0.506866455078125, "learning_rate": 9.437848268044424e-06, "loss": 0.4978, "step": 11071 }, { "epoch": 0.5080996741774126, "grad_norm": 0.4856387972831726, "learning_rate": 9.437735311127655e-06, "loss": 0.4605, "step": 11072 }, { "epoch": 0.5081455646826671, "grad_norm": 0.44156569242477417, "learning_rate": 9.437622343539532e-06, "loss": 0.3731, "step": 11073 }, { "epoch": 0.5081914551879216, "grad_norm": 0.4477945566177368, "learning_rate": 9.437509365280328e-06, "loss": 0.4327, "step": 11074 }, { "epoch": 0.5082373456931761, "grad_norm": 0.4663015305995941, "learning_rate": 9.437396376350316e-06, "loss": 0.4586, "step": 11075 }, { "epoch": 0.5082832361984305, "grad_norm": 0.43222957849502563, "learning_rate": 9.437283376749766e-06, "loss": 0.3475, "step": 11076 }, { "epoch": 0.508329126703685, "grad_norm": 0.4556671977043152, "learning_rate": 9.437170366478948e-06, "loss": 0.4169, "step": 11077 }, { "epoch": 0.5083750172089395, "grad_norm": 0.49400678277015686, "learning_rate": 9.437057345538137e-06, "loss": 0.4747, "step": 11078 }, { "epoch": 0.5084209077141939, "grad_norm": 0.4953756332397461, "learning_rate": 9.436944313927603e-06, "loss": 0.4709, "step": 11079 }, { "epoch": 0.5084667982194484, "grad_norm": 0.4935978353023529, "learning_rate": 9.436831271647619e-06, "loss": 0.4369, "step": 11080 }, { "epoch": 0.5085126887247029, "grad_norm": 0.43096497654914856, "learning_rate": 9.436718218698455e-06, "loss": 0.3543, "step": 11081 }, { "epoch": 0.5085585792299573, "grad_norm": 0.5037040114402771, "learning_rate": 9.436605155080383e-06, "loss": 0.4801, "step": 11082 }, { "epoch": 0.5086044697352118, "grad_norm": 0.47201764583587646, "learning_rate": 9.436492080793677e-06, "loss": 0.4253, "step": 11083 }, { "epoch": 0.5086503602404663, "grad_norm": 0.4751473069190979, "learning_rate": 9.43637899583861e-06, "loss": 0.4651, "step": 11084 }, { "epoch": 0.5086962507457207, "grad_norm": 0.52275550365448, "learning_rate": 9.436265900215448e-06, "loss": 0.5488, "step": 11085 }, { "epoch": 0.5087421412509752, "grad_norm": 0.4285326600074768, "learning_rate": 9.43615279392447e-06, "loss": 0.3608, "step": 11086 }, { "epoch": 0.5087880317562297, "grad_norm": 0.43743857741355896, "learning_rate": 9.436039676965942e-06, "loss": 0.347, "step": 11087 }, { "epoch": 0.508833922261484, "grad_norm": 0.4144723415374756, "learning_rate": 9.43592654934014e-06, "loss": 0.313, "step": 11088 }, { "epoch": 0.5088798127667385, "grad_norm": 0.43682458996772766, "learning_rate": 9.435813411047335e-06, "loss": 0.35, "step": 11089 }, { "epoch": 0.508925703271993, "grad_norm": 0.48336261510849, "learning_rate": 9.435700262087796e-06, "loss": 0.445, "step": 11090 }, { "epoch": 0.5089715937772474, "grad_norm": 0.45538023114204407, "learning_rate": 9.4355871024618e-06, "loss": 0.3784, "step": 11091 }, { "epoch": 0.5090174842825019, "grad_norm": 0.4261086881160736, "learning_rate": 9.435473932169619e-06, "loss": 0.3645, "step": 11092 }, { "epoch": 0.5090633747877564, "grad_norm": 0.47664251923561096, "learning_rate": 9.43536075121152e-06, "loss": 0.4192, "step": 11093 }, { "epoch": 0.5091092652930109, "grad_norm": 0.458617627620697, "learning_rate": 9.43524755958778e-06, "loss": 0.3755, "step": 11094 }, { "epoch": 0.5091551557982653, "grad_norm": 0.45727846026420593, "learning_rate": 9.435134357298667e-06, "loss": 0.4144, "step": 11095 }, { "epoch": 0.5092010463035198, "grad_norm": 0.416154682636261, "learning_rate": 9.435021144344458e-06, "loss": 0.3371, "step": 11096 }, { "epoch": 0.5092469368087743, "grad_norm": 0.4496779441833496, "learning_rate": 9.434907920725421e-06, "loss": 0.3086, "step": 11097 }, { "epoch": 0.5092928273140287, "grad_norm": 0.4601019322872162, "learning_rate": 9.434794686441831e-06, "loss": 0.4058, "step": 11098 }, { "epoch": 0.5093387178192832, "grad_norm": 0.4619458317756653, "learning_rate": 9.43468144149396e-06, "loss": 0.3592, "step": 11099 }, { "epoch": 0.5093846083245377, "grad_norm": 0.4989260733127594, "learning_rate": 9.43456818588208e-06, "loss": 0.4978, "step": 11100 }, { "epoch": 0.5094304988297921, "grad_norm": 0.4481591284275055, "learning_rate": 9.434454919606461e-06, "loss": 0.3988, "step": 11101 }, { "epoch": 0.5094763893350466, "grad_norm": 0.4668935537338257, "learning_rate": 9.43434164266738e-06, "loss": 0.4377, "step": 11102 }, { "epoch": 0.5095222798403011, "grad_norm": 0.4743530750274658, "learning_rate": 9.434228355065105e-06, "loss": 0.4418, "step": 11103 }, { "epoch": 0.5095681703455555, "grad_norm": 0.4634436368942261, "learning_rate": 9.434115056799911e-06, "loss": 0.3932, "step": 11104 }, { "epoch": 0.50961406085081, "grad_norm": 0.4736510217189789, "learning_rate": 9.43400174787207e-06, "loss": 0.4718, "step": 11105 }, { "epoch": 0.5096599513560645, "grad_norm": 0.45432233810424805, "learning_rate": 9.433888428281854e-06, "loss": 0.3718, "step": 11106 }, { "epoch": 0.5097058418613188, "grad_norm": 0.43205001950263977, "learning_rate": 9.433775098029536e-06, "loss": 0.4024, "step": 11107 }, { "epoch": 0.5097517323665733, "grad_norm": 0.4442681670188904, "learning_rate": 9.433661757115388e-06, "loss": 0.3669, "step": 11108 }, { "epoch": 0.5097976228718278, "grad_norm": 0.6076908707618713, "learning_rate": 9.433548405539683e-06, "loss": 0.4564, "step": 11109 }, { "epoch": 0.5098435133770823, "grad_norm": 0.4501707851886749, "learning_rate": 9.433435043302694e-06, "loss": 0.3678, "step": 11110 }, { "epoch": 0.5098894038823367, "grad_norm": 0.4663463234901428, "learning_rate": 9.433321670404691e-06, "loss": 0.4278, "step": 11111 }, { "epoch": 0.5099352943875912, "grad_norm": 0.4603365957736969, "learning_rate": 9.433208286845949e-06, "loss": 0.4038, "step": 11112 }, { "epoch": 0.5099811848928457, "grad_norm": 0.4779384434223175, "learning_rate": 9.433094892626742e-06, "loss": 0.4395, "step": 11113 }, { "epoch": 0.5100270753981001, "grad_norm": 0.47363585233688354, "learning_rate": 9.432981487747338e-06, "loss": 0.4425, "step": 11114 }, { "epoch": 0.5100729659033546, "grad_norm": 0.4191666543483734, "learning_rate": 9.432868072208015e-06, "loss": 0.3398, "step": 11115 }, { "epoch": 0.5101188564086091, "grad_norm": 0.4613591432571411, "learning_rate": 9.432754646009043e-06, "loss": 0.3888, "step": 11116 }, { "epoch": 0.5101647469138635, "grad_norm": 0.4800584018230438, "learning_rate": 9.432641209150694e-06, "loss": 0.4865, "step": 11117 }, { "epoch": 0.510210637419118, "grad_norm": 0.4228001534938812, "learning_rate": 9.432527761633243e-06, "loss": 0.3847, "step": 11118 }, { "epoch": 0.5102565279243725, "grad_norm": 0.5002844333648682, "learning_rate": 9.432414303456963e-06, "loss": 0.4685, "step": 11119 }, { "epoch": 0.5103024184296269, "grad_norm": 0.4854716658592224, "learning_rate": 9.432300834622123e-06, "loss": 0.4367, "step": 11120 }, { "epoch": 0.5103483089348814, "grad_norm": 0.48742276430130005, "learning_rate": 9.432187355128999e-06, "loss": 0.469, "step": 11121 }, { "epoch": 0.5103941994401359, "grad_norm": 0.4191174805164337, "learning_rate": 9.432073864977864e-06, "loss": 0.3568, "step": 11122 }, { "epoch": 0.5104400899453903, "grad_norm": 0.476814329624176, "learning_rate": 9.431960364168991e-06, "loss": 0.4237, "step": 11123 }, { "epoch": 0.5104859804506447, "grad_norm": 0.41378504037857056, "learning_rate": 9.431846852702648e-06, "loss": 0.3246, "step": 11124 }, { "epoch": 0.5105318709558992, "grad_norm": 0.49643921852111816, "learning_rate": 9.431733330579116e-06, "loss": 0.4809, "step": 11125 }, { "epoch": 0.5105777614611536, "grad_norm": 0.4422619938850403, "learning_rate": 9.431619797798664e-06, "loss": 0.3348, "step": 11126 }, { "epoch": 0.5106236519664081, "grad_norm": 0.486173540353775, "learning_rate": 9.431506254361562e-06, "loss": 0.3969, "step": 11127 }, { "epoch": 0.5106695424716626, "grad_norm": 0.49294888973236084, "learning_rate": 9.43139270026809e-06, "loss": 0.4826, "step": 11128 }, { "epoch": 0.5107154329769171, "grad_norm": 0.40683048963546753, "learning_rate": 9.431279135518514e-06, "loss": 0.3064, "step": 11129 }, { "epoch": 0.5107613234821715, "grad_norm": 0.44529032707214355, "learning_rate": 9.431165560113111e-06, "loss": 0.3702, "step": 11130 }, { "epoch": 0.510807213987426, "grad_norm": 0.44968166947364807, "learning_rate": 9.431051974052154e-06, "loss": 0.3575, "step": 11131 }, { "epoch": 0.5108531044926805, "grad_norm": 0.46065571904182434, "learning_rate": 9.430938377335916e-06, "loss": 0.3516, "step": 11132 }, { "epoch": 0.5108989949979349, "grad_norm": 0.4441080093383789, "learning_rate": 9.430824769964669e-06, "loss": 0.3351, "step": 11133 }, { "epoch": 0.5109448855031894, "grad_norm": 0.44278252124786377, "learning_rate": 9.430711151938686e-06, "loss": 0.4041, "step": 11134 }, { "epoch": 0.5109907760084439, "grad_norm": 0.48169058561325073, "learning_rate": 9.43059752325824e-06, "loss": 0.424, "step": 11135 }, { "epoch": 0.5110366665136983, "grad_norm": 0.4215027391910553, "learning_rate": 9.430483883923608e-06, "loss": 0.3204, "step": 11136 }, { "epoch": 0.5110825570189528, "grad_norm": 0.458992600440979, "learning_rate": 9.43037023393506e-06, "loss": 0.4212, "step": 11137 }, { "epoch": 0.5111284475242073, "grad_norm": 0.4420967102050781, "learning_rate": 9.43025657329287e-06, "loss": 0.3699, "step": 11138 }, { "epoch": 0.5111743380294617, "grad_norm": 0.48791202902793884, "learning_rate": 9.43014290199731e-06, "loss": 0.4388, "step": 11139 }, { "epoch": 0.5112202285347162, "grad_norm": 0.4718987047672272, "learning_rate": 9.430029220048655e-06, "loss": 0.3936, "step": 11140 }, { "epoch": 0.5112661190399707, "grad_norm": 0.4414863884449005, "learning_rate": 9.429915527447178e-06, "loss": 0.3878, "step": 11141 }, { "epoch": 0.511312009545225, "grad_norm": 0.4793201684951782, "learning_rate": 9.429801824193153e-06, "loss": 0.4288, "step": 11142 }, { "epoch": 0.5113579000504795, "grad_norm": 0.48404479026794434, "learning_rate": 9.42968811028685e-06, "loss": 0.483, "step": 11143 }, { "epoch": 0.511403790555734, "grad_norm": 0.4797365665435791, "learning_rate": 9.429574385728547e-06, "loss": 0.4312, "step": 11144 }, { "epoch": 0.5114496810609885, "grad_norm": 0.45008593797683716, "learning_rate": 9.429460650518516e-06, "loss": 0.3769, "step": 11145 }, { "epoch": 0.5114955715662429, "grad_norm": 0.44718408584594727, "learning_rate": 9.429346904657031e-06, "loss": 0.392, "step": 11146 }, { "epoch": 0.5115414620714974, "grad_norm": 0.4396439790725708, "learning_rate": 9.429233148144363e-06, "loss": 0.3595, "step": 11147 }, { "epoch": 0.5115873525767519, "grad_norm": 0.4328153431415558, "learning_rate": 9.429119380980788e-06, "loss": 0.3461, "step": 11148 }, { "epoch": 0.5116332430820063, "grad_norm": 0.48482391238212585, "learning_rate": 9.429005603166577e-06, "loss": 0.4111, "step": 11149 }, { "epoch": 0.5116791335872608, "grad_norm": 0.41375404596328735, "learning_rate": 9.428891814702009e-06, "loss": 0.3204, "step": 11150 }, { "epoch": 0.5117250240925153, "grad_norm": 0.47852614521980286, "learning_rate": 9.42877801558735e-06, "loss": 0.4461, "step": 11151 }, { "epoch": 0.5117709145977697, "grad_norm": 0.4818577170372009, "learning_rate": 9.428664205822879e-06, "loss": 0.4554, "step": 11152 }, { "epoch": 0.5118168051030242, "grad_norm": 0.47473928332328796, "learning_rate": 9.42855038540887e-06, "loss": 0.4177, "step": 11153 }, { "epoch": 0.5118626956082787, "grad_norm": 0.4590417146682739, "learning_rate": 9.428436554345592e-06, "loss": 0.4494, "step": 11154 }, { "epoch": 0.5119085861135331, "grad_norm": 0.4606229364871979, "learning_rate": 9.428322712633324e-06, "loss": 0.4102, "step": 11155 }, { "epoch": 0.5119544766187876, "grad_norm": 0.4564058184623718, "learning_rate": 9.428208860272337e-06, "loss": 0.398, "step": 11156 }, { "epoch": 0.5120003671240421, "grad_norm": 0.46197837591171265, "learning_rate": 9.428094997262906e-06, "loss": 0.4134, "step": 11157 }, { "epoch": 0.5120462576292965, "grad_norm": 0.4781615734100342, "learning_rate": 9.427981123605304e-06, "loss": 0.4224, "step": 11158 }, { "epoch": 0.512092148134551, "grad_norm": 0.4690605700016022, "learning_rate": 9.427867239299804e-06, "loss": 0.4236, "step": 11159 }, { "epoch": 0.5121380386398054, "grad_norm": 0.49025338888168335, "learning_rate": 9.42775334434668e-06, "loss": 0.4675, "step": 11160 }, { "epoch": 0.5121839291450598, "grad_norm": 0.45674118399620056, "learning_rate": 9.427639438746208e-06, "loss": 0.384, "step": 11161 }, { "epoch": 0.5122298196503143, "grad_norm": 0.4760828912258148, "learning_rate": 9.42752552249866e-06, "loss": 0.3817, "step": 11162 }, { "epoch": 0.5122757101555688, "grad_norm": 0.4247443377971649, "learning_rate": 9.42741159560431e-06, "loss": 0.3024, "step": 11163 }, { "epoch": 0.5123216006608233, "grad_norm": 0.46078816056251526, "learning_rate": 9.427297658063433e-06, "loss": 0.4108, "step": 11164 }, { "epoch": 0.5123674911660777, "grad_norm": 0.48824793100357056, "learning_rate": 9.427183709876302e-06, "loss": 0.4855, "step": 11165 }, { "epoch": 0.5124133816713322, "grad_norm": 0.40844792127609253, "learning_rate": 9.427069751043193e-06, "loss": 0.335, "step": 11166 }, { "epoch": 0.5124592721765867, "grad_norm": 0.4416067600250244, "learning_rate": 9.426955781564378e-06, "loss": 0.3825, "step": 11167 }, { "epoch": 0.5125051626818411, "grad_norm": 0.4281124770641327, "learning_rate": 9.42684180144013e-06, "loss": 0.3098, "step": 11168 }, { "epoch": 0.5125510531870956, "grad_norm": 0.47789958119392395, "learning_rate": 9.426727810670725e-06, "loss": 0.4523, "step": 11169 }, { "epoch": 0.5125969436923501, "grad_norm": 0.465964138507843, "learning_rate": 9.42661380925644e-06, "loss": 0.4595, "step": 11170 }, { "epoch": 0.5126428341976045, "grad_norm": 0.4484297037124634, "learning_rate": 9.426499797197541e-06, "loss": 0.4043, "step": 11171 }, { "epoch": 0.512688724702859, "grad_norm": 0.4079855978488922, "learning_rate": 9.426385774494308e-06, "loss": 0.3055, "step": 11172 }, { "epoch": 0.5127346152081135, "grad_norm": 0.4222286343574524, "learning_rate": 9.426271741147018e-06, "loss": 0.3721, "step": 11173 }, { "epoch": 0.5127805057133679, "grad_norm": 0.5693210959434509, "learning_rate": 9.426157697155938e-06, "loss": 0.3347, "step": 11174 }, { "epoch": 0.5128263962186224, "grad_norm": 0.438080757856369, "learning_rate": 9.426043642521347e-06, "loss": 0.3475, "step": 11175 }, { "epoch": 0.5128722867238769, "grad_norm": 0.4451926648616791, "learning_rate": 9.425929577243516e-06, "loss": 0.3451, "step": 11176 }, { "epoch": 0.5129181772291312, "grad_norm": 0.4349431097507477, "learning_rate": 9.425815501322724e-06, "loss": 0.3319, "step": 11177 }, { "epoch": 0.5129640677343857, "grad_norm": 0.43615883588790894, "learning_rate": 9.42570141475924e-06, "loss": 0.3544, "step": 11178 }, { "epoch": 0.5130099582396402, "grad_norm": 0.45016226172447205, "learning_rate": 9.425587317553343e-06, "loss": 0.3658, "step": 11179 }, { "epoch": 0.5130558487448946, "grad_norm": 0.4785882830619812, "learning_rate": 9.425473209705304e-06, "loss": 0.443, "step": 11180 }, { "epoch": 0.5131017392501491, "grad_norm": 0.460958868265152, "learning_rate": 9.4253590912154e-06, "loss": 0.4202, "step": 11181 }, { "epoch": 0.5131476297554036, "grad_norm": 0.44740164279937744, "learning_rate": 9.425244962083902e-06, "loss": 0.4001, "step": 11182 }, { "epoch": 0.5131935202606581, "grad_norm": 0.41863784193992615, "learning_rate": 9.425130822311089e-06, "loss": 0.3213, "step": 11183 }, { "epoch": 0.5132394107659125, "grad_norm": 0.4627070426940918, "learning_rate": 9.425016671897231e-06, "loss": 0.3605, "step": 11184 }, { "epoch": 0.513285301271167, "grad_norm": 0.45955419540405273, "learning_rate": 9.424902510842605e-06, "loss": 0.388, "step": 11185 }, { "epoch": 0.5133311917764215, "grad_norm": 0.47954294085502625, "learning_rate": 9.424788339147485e-06, "loss": 0.4674, "step": 11186 }, { "epoch": 0.5133770822816759, "grad_norm": 0.4473467469215393, "learning_rate": 9.424674156812147e-06, "loss": 0.4135, "step": 11187 }, { "epoch": 0.5134229727869304, "grad_norm": 0.46484407782554626, "learning_rate": 9.424559963836862e-06, "loss": 0.4595, "step": 11188 }, { "epoch": 0.5134688632921849, "grad_norm": 0.49906080961227417, "learning_rate": 9.424445760221908e-06, "loss": 0.4543, "step": 11189 }, { "epoch": 0.5135147537974393, "grad_norm": 0.5169631242752075, "learning_rate": 9.424331545967557e-06, "loss": 0.5245, "step": 11190 }, { "epoch": 0.5135606443026938, "grad_norm": 0.4689655005931854, "learning_rate": 9.424217321074085e-06, "loss": 0.4127, "step": 11191 }, { "epoch": 0.5136065348079483, "grad_norm": 0.44165536761283875, "learning_rate": 9.424103085541768e-06, "loss": 0.3201, "step": 11192 }, { "epoch": 0.5136524253132027, "grad_norm": 0.44161972403526306, "learning_rate": 9.423988839370879e-06, "loss": 0.3759, "step": 11193 }, { "epoch": 0.5136983158184572, "grad_norm": 0.42835134267807007, "learning_rate": 9.423874582561692e-06, "loss": 0.3324, "step": 11194 }, { "epoch": 0.5137442063237116, "grad_norm": 0.4730234444141388, "learning_rate": 9.423760315114485e-06, "loss": 0.41, "step": 11195 }, { "epoch": 0.513790096828966, "grad_norm": 0.43346846103668213, "learning_rate": 9.423646037029527e-06, "loss": 0.3518, "step": 11196 }, { "epoch": 0.5138359873342205, "grad_norm": 0.4523134231567383, "learning_rate": 9.4235317483071e-06, "loss": 0.3724, "step": 11197 }, { "epoch": 0.513881877839475, "grad_norm": 0.45653825998306274, "learning_rate": 9.423417448947476e-06, "loss": 0.4106, "step": 11198 }, { "epoch": 0.5139277683447295, "grad_norm": 0.5112180709838867, "learning_rate": 9.423303138950925e-06, "loss": 0.4898, "step": 11199 }, { "epoch": 0.5139736588499839, "grad_norm": 0.4864504337310791, "learning_rate": 9.42318881831773e-06, "loss": 0.443, "step": 11200 }, { "epoch": 0.5140195493552384, "grad_norm": 0.4419673681259155, "learning_rate": 9.42307448704816e-06, "loss": 0.3944, "step": 11201 }, { "epoch": 0.5140654398604929, "grad_norm": 0.47507017850875854, "learning_rate": 9.42296014514249e-06, "loss": 0.4619, "step": 11202 }, { "epoch": 0.5141113303657473, "grad_norm": 0.4882911741733551, "learning_rate": 9.422845792601e-06, "loss": 0.4918, "step": 11203 }, { "epoch": 0.5141572208710018, "grad_norm": 0.45332691073417664, "learning_rate": 9.42273142942396e-06, "loss": 0.3999, "step": 11204 }, { "epoch": 0.5142031113762563, "grad_norm": 0.4811588525772095, "learning_rate": 9.422617055611647e-06, "loss": 0.4413, "step": 11205 }, { "epoch": 0.5142490018815107, "grad_norm": 0.45333409309387207, "learning_rate": 9.422502671164337e-06, "loss": 0.3316, "step": 11206 }, { "epoch": 0.5142948923867652, "grad_norm": 0.4645090103149414, "learning_rate": 9.422388276082302e-06, "loss": 0.3916, "step": 11207 }, { "epoch": 0.5143407828920197, "grad_norm": 0.44360506534576416, "learning_rate": 9.42227387036582e-06, "loss": 0.3455, "step": 11208 }, { "epoch": 0.5143866733972741, "grad_norm": 0.4784619212150574, "learning_rate": 9.422159454015164e-06, "loss": 0.496, "step": 11209 }, { "epoch": 0.5144325639025286, "grad_norm": 0.4477009177207947, "learning_rate": 9.422045027030612e-06, "loss": 0.3971, "step": 11210 }, { "epoch": 0.5144784544077831, "grad_norm": 0.440959632396698, "learning_rate": 9.421930589412436e-06, "loss": 0.3597, "step": 11211 }, { "epoch": 0.5145243449130374, "grad_norm": 0.4549063742160797, "learning_rate": 9.421816141160913e-06, "loss": 0.3818, "step": 11212 }, { "epoch": 0.5145702354182919, "grad_norm": 0.5039348602294922, "learning_rate": 9.421701682276317e-06, "loss": 0.4979, "step": 11213 }, { "epoch": 0.5146161259235464, "grad_norm": 0.47268053889274597, "learning_rate": 9.421587212758924e-06, "loss": 0.3998, "step": 11214 }, { "epoch": 0.5146620164288008, "grad_norm": 0.45106935501098633, "learning_rate": 9.421472732609011e-06, "loss": 0.381, "step": 11215 }, { "epoch": 0.5147079069340553, "grad_norm": 0.44838130474090576, "learning_rate": 9.42135824182685e-06, "loss": 0.4023, "step": 11216 }, { "epoch": 0.5147537974393098, "grad_norm": 0.4636859595775604, "learning_rate": 9.421243740412719e-06, "loss": 0.4065, "step": 11217 }, { "epoch": 0.5147996879445643, "grad_norm": 0.4705791175365448, "learning_rate": 9.421129228366892e-06, "loss": 0.3972, "step": 11218 }, { "epoch": 0.5148455784498187, "grad_norm": 0.4705198109149933, "learning_rate": 9.421014705689644e-06, "loss": 0.3906, "step": 11219 }, { "epoch": 0.5148914689550732, "grad_norm": 0.4702683687210083, "learning_rate": 9.42090017238125e-06, "loss": 0.4814, "step": 11220 }, { "epoch": 0.5149373594603277, "grad_norm": 0.4810901880264282, "learning_rate": 9.420785628441987e-06, "loss": 0.5011, "step": 11221 }, { "epoch": 0.5149832499655821, "grad_norm": 0.4607618749141693, "learning_rate": 9.42067107387213e-06, "loss": 0.4343, "step": 11222 }, { "epoch": 0.5150291404708366, "grad_norm": 0.4946284890174866, "learning_rate": 9.420556508671954e-06, "loss": 0.4498, "step": 11223 }, { "epoch": 0.5150750309760911, "grad_norm": 0.46579962968826294, "learning_rate": 9.420441932841736e-06, "loss": 0.3796, "step": 11224 }, { "epoch": 0.5151209214813455, "grad_norm": 0.4398934245109558, "learning_rate": 9.42032734638175e-06, "loss": 0.3724, "step": 11225 }, { "epoch": 0.5151668119866, "grad_norm": 0.4489741921424866, "learning_rate": 9.42021274929227e-06, "loss": 0.4077, "step": 11226 }, { "epoch": 0.5152127024918545, "grad_norm": 0.44262582063674927, "learning_rate": 9.420098141573574e-06, "loss": 0.3703, "step": 11227 }, { "epoch": 0.5152585929971089, "grad_norm": 0.4338358938694, "learning_rate": 9.41998352322594e-06, "loss": 0.3489, "step": 11228 }, { "epoch": 0.5153044835023634, "grad_norm": 0.4774375557899475, "learning_rate": 9.419868894249637e-06, "loss": 0.4242, "step": 11229 }, { "epoch": 0.5153503740076179, "grad_norm": 0.5442906022071838, "learning_rate": 9.419754254644946e-06, "loss": 0.4632, "step": 11230 }, { "epoch": 0.5153962645128722, "grad_norm": 0.5080773234367371, "learning_rate": 9.41963960441214e-06, "loss": 0.458, "step": 11231 }, { "epoch": 0.5154421550181267, "grad_norm": 0.48662588000297546, "learning_rate": 9.419524943551497e-06, "loss": 0.4054, "step": 11232 }, { "epoch": 0.5154880455233812, "grad_norm": 0.4499012231826782, "learning_rate": 9.41941027206329e-06, "loss": 0.3873, "step": 11233 }, { "epoch": 0.5155339360286356, "grad_norm": 0.4747801721096039, "learning_rate": 9.419295589947797e-06, "loss": 0.4194, "step": 11234 }, { "epoch": 0.5155798265338901, "grad_norm": 0.4485040307044983, "learning_rate": 9.419180897205292e-06, "loss": 0.3695, "step": 11235 }, { "epoch": 0.5156257170391446, "grad_norm": 0.5051221251487732, "learning_rate": 9.419066193836051e-06, "loss": 0.5025, "step": 11236 }, { "epoch": 0.5156716075443991, "grad_norm": 0.45362600684165955, "learning_rate": 9.418951479840353e-06, "loss": 0.4341, "step": 11237 }, { "epoch": 0.5157174980496535, "grad_norm": 0.4738071858882904, "learning_rate": 9.41883675521847e-06, "loss": 0.3016, "step": 11238 }, { "epoch": 0.515763388554908, "grad_norm": 0.4678727388381958, "learning_rate": 9.418722019970679e-06, "loss": 0.3595, "step": 11239 }, { "epoch": 0.5158092790601625, "grad_norm": 0.45817965269088745, "learning_rate": 9.418607274097255e-06, "loss": 0.3979, "step": 11240 }, { "epoch": 0.5158551695654169, "grad_norm": 0.44150492548942566, "learning_rate": 9.418492517598476e-06, "loss": 0.3624, "step": 11241 }, { "epoch": 0.5159010600706714, "grad_norm": 0.42746803164482117, "learning_rate": 9.418377750474618e-06, "loss": 0.3503, "step": 11242 }, { "epoch": 0.5159469505759259, "grad_norm": 0.4773419499397278, "learning_rate": 9.418262972725956e-06, "loss": 0.4107, "step": 11243 }, { "epoch": 0.5159928410811803, "grad_norm": 0.4755682051181793, "learning_rate": 9.418148184352765e-06, "loss": 0.4418, "step": 11244 }, { "epoch": 0.5160387315864348, "grad_norm": 0.5008369088172913, "learning_rate": 9.418033385355322e-06, "loss": 0.4241, "step": 11245 }, { "epoch": 0.5160846220916893, "grad_norm": 0.4306611716747284, "learning_rate": 9.417918575733904e-06, "loss": 0.3425, "step": 11246 }, { "epoch": 0.5161305125969436, "grad_norm": 0.4905848205089569, "learning_rate": 9.417803755488785e-06, "loss": 0.4509, "step": 11247 }, { "epoch": 0.5161764031021981, "grad_norm": 0.4466972053050995, "learning_rate": 9.417688924620244e-06, "loss": 0.3305, "step": 11248 }, { "epoch": 0.5162222936074526, "grad_norm": 0.438510924577713, "learning_rate": 9.417574083128554e-06, "loss": 0.3923, "step": 11249 }, { "epoch": 0.516268184112707, "grad_norm": 0.4085960388183594, "learning_rate": 9.417459231013993e-06, "loss": 0.307, "step": 11250 }, { "epoch": 0.5163140746179615, "grad_norm": 0.5797719955444336, "learning_rate": 9.417344368276838e-06, "loss": 0.4863, "step": 11251 }, { "epoch": 0.516359965123216, "grad_norm": 0.4440360963344574, "learning_rate": 9.417229494917361e-06, "loss": 0.4086, "step": 11252 }, { "epoch": 0.5164058556284705, "grad_norm": 0.455028772354126, "learning_rate": 9.417114610935842e-06, "loss": 0.4288, "step": 11253 }, { "epoch": 0.5164517461337249, "grad_norm": 0.493502140045166, "learning_rate": 9.416999716332557e-06, "loss": 0.479, "step": 11254 }, { "epoch": 0.5164976366389794, "grad_norm": 0.43186095356941223, "learning_rate": 9.416884811107782e-06, "loss": 0.3474, "step": 11255 }, { "epoch": 0.5165435271442339, "grad_norm": 0.4689897894859314, "learning_rate": 9.416769895261793e-06, "loss": 0.4217, "step": 11256 }, { "epoch": 0.5165894176494883, "grad_norm": 0.4879116714000702, "learning_rate": 9.416654968794867e-06, "loss": 0.4469, "step": 11257 }, { "epoch": 0.5166353081547428, "grad_norm": 0.5197396278381348, "learning_rate": 9.416540031707279e-06, "loss": 0.5213, "step": 11258 }, { "epoch": 0.5166811986599973, "grad_norm": 0.42598289251327515, "learning_rate": 9.416425083999307e-06, "loss": 0.3433, "step": 11259 }, { "epoch": 0.5167270891652517, "grad_norm": 0.4675031900405884, "learning_rate": 9.416310125671225e-06, "loss": 0.3736, "step": 11260 }, { "epoch": 0.5167729796705062, "grad_norm": 0.4364950954914093, "learning_rate": 9.41619515672331e-06, "loss": 0.3398, "step": 11261 }, { "epoch": 0.5168188701757607, "grad_norm": 0.43757903575897217, "learning_rate": 9.416080177155843e-06, "loss": 0.3723, "step": 11262 }, { "epoch": 0.5168647606810151, "grad_norm": 0.45544275641441345, "learning_rate": 9.415965186969094e-06, "loss": 0.4007, "step": 11263 }, { "epoch": 0.5169106511862696, "grad_norm": 0.48667579889297485, "learning_rate": 9.415850186163345e-06, "loss": 0.4928, "step": 11264 }, { "epoch": 0.516956541691524, "grad_norm": 0.4592079520225525, "learning_rate": 9.415735174738866e-06, "loss": 0.4204, "step": 11265 }, { "epoch": 0.5170024321967784, "grad_norm": 0.4929644763469696, "learning_rate": 9.41562015269594e-06, "loss": 0.4273, "step": 11266 }, { "epoch": 0.5170483227020329, "grad_norm": 0.42213869094848633, "learning_rate": 9.415505120034842e-06, "loss": 0.3686, "step": 11267 }, { "epoch": 0.5170942132072874, "grad_norm": 0.39224839210510254, "learning_rate": 9.415390076755848e-06, "loss": 0.2905, "step": 11268 }, { "epoch": 0.5171401037125418, "grad_norm": 0.5034732818603516, "learning_rate": 9.415275022859233e-06, "loss": 0.5203, "step": 11269 }, { "epoch": 0.5171859942177963, "grad_norm": 0.5695082545280457, "learning_rate": 9.415159958345276e-06, "loss": 0.4646, "step": 11270 }, { "epoch": 0.5172318847230508, "grad_norm": 0.4319281578063965, "learning_rate": 9.415044883214252e-06, "loss": 0.3618, "step": 11271 }, { "epoch": 0.5172777752283053, "grad_norm": 0.46490177512168884, "learning_rate": 9.414929797466439e-06, "loss": 0.4124, "step": 11272 }, { "epoch": 0.5173236657335597, "grad_norm": 0.46130844950675964, "learning_rate": 9.414814701102113e-06, "loss": 0.4481, "step": 11273 }, { "epoch": 0.5173695562388142, "grad_norm": 0.44523581862449646, "learning_rate": 9.41469959412155e-06, "loss": 0.393, "step": 11274 }, { "epoch": 0.5174154467440687, "grad_norm": 0.44459274411201477, "learning_rate": 9.414584476525028e-06, "loss": 0.357, "step": 11275 }, { "epoch": 0.5174613372493231, "grad_norm": 0.5191458463668823, "learning_rate": 9.414469348312824e-06, "loss": 0.4802, "step": 11276 }, { "epoch": 0.5175072277545776, "grad_norm": 0.4215477705001831, "learning_rate": 9.414354209485215e-06, "loss": 0.3271, "step": 11277 }, { "epoch": 0.5175531182598321, "grad_norm": 0.46454212069511414, "learning_rate": 9.414239060042478e-06, "loss": 0.345, "step": 11278 }, { "epoch": 0.5175990087650865, "grad_norm": 0.47585931420326233, "learning_rate": 9.414123899984888e-06, "loss": 0.4178, "step": 11279 }, { "epoch": 0.517644899270341, "grad_norm": 0.4738839864730835, "learning_rate": 9.414008729312724e-06, "loss": 0.3712, "step": 11280 }, { "epoch": 0.5176907897755955, "grad_norm": 0.483776718378067, "learning_rate": 9.41389354802626e-06, "loss": 0.4149, "step": 11281 }, { "epoch": 0.5177366802808498, "grad_norm": 0.4590279161930084, "learning_rate": 9.413778356125778e-06, "loss": 0.4018, "step": 11282 }, { "epoch": 0.5177825707861043, "grad_norm": 0.5332219004631042, "learning_rate": 9.41366315361155e-06, "loss": 0.4948, "step": 11283 }, { "epoch": 0.5178284612913588, "grad_norm": 0.432334840297699, "learning_rate": 9.413547940483855e-06, "loss": 0.3738, "step": 11284 }, { "epoch": 0.5178743517966132, "grad_norm": 0.4356936514377594, "learning_rate": 9.413432716742972e-06, "loss": 0.3552, "step": 11285 }, { "epoch": 0.5179202423018677, "grad_norm": 0.4573189318180084, "learning_rate": 9.413317482389177e-06, "loss": 0.3378, "step": 11286 }, { "epoch": 0.5179661328071222, "grad_norm": 0.43666619062423706, "learning_rate": 9.413202237422743e-06, "loss": 0.3541, "step": 11287 }, { "epoch": 0.5180120233123767, "grad_norm": 0.4871177673339844, "learning_rate": 9.413086981843953e-06, "loss": 0.4226, "step": 11288 }, { "epoch": 0.5180579138176311, "grad_norm": 0.5462905764579773, "learning_rate": 9.41297171565308e-06, "loss": 0.4618, "step": 11289 }, { "epoch": 0.5181038043228856, "grad_norm": 0.4553225040435791, "learning_rate": 9.412856438850405e-06, "loss": 0.3837, "step": 11290 }, { "epoch": 0.5181496948281401, "grad_norm": 0.49372997879981995, "learning_rate": 9.4127411514362e-06, "loss": 0.4312, "step": 11291 }, { "epoch": 0.5181955853333945, "grad_norm": 0.43431931734085083, "learning_rate": 9.412625853410747e-06, "loss": 0.4, "step": 11292 }, { "epoch": 0.518241475838649, "grad_norm": 0.47358494997024536, "learning_rate": 9.41251054477432e-06, "loss": 0.3717, "step": 11293 }, { "epoch": 0.5182873663439035, "grad_norm": 0.4579281806945801, "learning_rate": 9.4123952255272e-06, "loss": 0.4159, "step": 11294 }, { "epoch": 0.5183332568491579, "grad_norm": 0.4474659562110901, "learning_rate": 9.412279895669661e-06, "loss": 0.4231, "step": 11295 }, { "epoch": 0.5183791473544124, "grad_norm": 0.5042991042137146, "learning_rate": 9.412164555201981e-06, "loss": 0.4906, "step": 11296 }, { "epoch": 0.5184250378596669, "grad_norm": 0.4518040120601654, "learning_rate": 9.412049204124438e-06, "loss": 0.3935, "step": 11297 }, { "epoch": 0.5184709283649213, "grad_norm": 0.48454368114471436, "learning_rate": 9.41193384243731e-06, "loss": 0.3942, "step": 11298 }, { "epoch": 0.5185168188701758, "grad_norm": 0.48193231225013733, "learning_rate": 9.411818470140873e-06, "loss": 0.4169, "step": 11299 }, { "epoch": 0.5185627093754303, "grad_norm": 0.48706933856010437, "learning_rate": 9.411703087235405e-06, "loss": 0.3714, "step": 11300 }, { "epoch": 0.5186085998806846, "grad_norm": 0.4524175226688385, "learning_rate": 9.411587693721182e-06, "loss": 0.4065, "step": 11301 }, { "epoch": 0.5186544903859391, "grad_norm": 0.42802923917770386, "learning_rate": 9.411472289598486e-06, "loss": 0.3655, "step": 11302 }, { "epoch": 0.5187003808911936, "grad_norm": 0.4512304663658142, "learning_rate": 9.41135687486759e-06, "loss": 0.4031, "step": 11303 }, { "epoch": 0.518746271396448, "grad_norm": 0.468533992767334, "learning_rate": 9.411241449528772e-06, "loss": 0.4353, "step": 11304 }, { "epoch": 0.5187921619017025, "grad_norm": 0.4598747491836548, "learning_rate": 9.411126013582312e-06, "loss": 0.4363, "step": 11305 }, { "epoch": 0.518838052406957, "grad_norm": 0.4329906404018402, "learning_rate": 9.411010567028484e-06, "loss": 0.3792, "step": 11306 }, { "epoch": 0.5188839429122115, "grad_norm": 0.47453564405441284, "learning_rate": 9.410895109867571e-06, "loss": 0.4805, "step": 11307 }, { "epoch": 0.5189298334174659, "grad_norm": 0.5077623724937439, "learning_rate": 9.410779642099845e-06, "loss": 0.5025, "step": 11308 }, { "epoch": 0.5189757239227204, "grad_norm": 0.47868186235427856, "learning_rate": 9.410664163725586e-06, "loss": 0.4849, "step": 11309 }, { "epoch": 0.5190216144279749, "grad_norm": 0.4690362811088562, "learning_rate": 9.410548674745072e-06, "loss": 0.4197, "step": 11310 }, { "epoch": 0.5190675049332293, "grad_norm": 0.45661434531211853, "learning_rate": 9.41043317515858e-06, "loss": 0.4265, "step": 11311 }, { "epoch": 0.5191133954384838, "grad_norm": 0.4610253870487213, "learning_rate": 9.41031766496639e-06, "loss": 0.4281, "step": 11312 }, { "epoch": 0.5191592859437383, "grad_norm": 0.4959578216075897, "learning_rate": 9.410202144168776e-06, "loss": 0.4603, "step": 11313 }, { "epoch": 0.5192051764489927, "grad_norm": 0.483648419380188, "learning_rate": 9.410086612766018e-06, "loss": 0.4091, "step": 11314 }, { "epoch": 0.5192510669542472, "grad_norm": 0.4500217139720917, "learning_rate": 9.409971070758395e-06, "loss": 0.3782, "step": 11315 }, { "epoch": 0.5192969574595017, "grad_norm": 0.46670421957969666, "learning_rate": 9.409855518146183e-06, "loss": 0.3608, "step": 11316 }, { "epoch": 0.519342847964756, "grad_norm": 0.4461580216884613, "learning_rate": 9.40973995492966e-06, "loss": 0.3545, "step": 11317 }, { "epoch": 0.5193887384700105, "grad_norm": 0.46640920639038086, "learning_rate": 9.409624381109105e-06, "loss": 0.4148, "step": 11318 }, { "epoch": 0.519434628975265, "grad_norm": 0.47005024552345276, "learning_rate": 9.409508796684793e-06, "loss": 0.4295, "step": 11319 }, { "epoch": 0.5194805194805194, "grad_norm": 0.5063243508338928, "learning_rate": 9.409393201657004e-06, "loss": 0.4065, "step": 11320 }, { "epoch": 0.5195264099857739, "grad_norm": 0.4501003921031952, "learning_rate": 9.409277596026018e-06, "loss": 0.4125, "step": 11321 }, { "epoch": 0.5195723004910284, "grad_norm": 0.4959845542907715, "learning_rate": 9.40916197979211e-06, "loss": 0.4505, "step": 11322 }, { "epoch": 0.5196181909962828, "grad_norm": 0.4186255633831024, "learning_rate": 9.40904635295556e-06, "loss": 0.3528, "step": 11323 }, { "epoch": 0.5196640815015373, "grad_norm": 0.44498705863952637, "learning_rate": 9.408930715516643e-06, "loss": 0.3825, "step": 11324 }, { "epoch": 0.5197099720067918, "grad_norm": 0.5048653483390808, "learning_rate": 9.40881506747564e-06, "loss": 0.4308, "step": 11325 }, { "epoch": 0.5197558625120463, "grad_norm": 0.46696797013282776, "learning_rate": 9.408699408832828e-06, "loss": 0.4046, "step": 11326 }, { "epoch": 0.5198017530173007, "grad_norm": 0.4412844777107239, "learning_rate": 9.408583739588487e-06, "loss": 0.3626, "step": 11327 }, { "epoch": 0.5198476435225552, "grad_norm": 0.428541362285614, "learning_rate": 9.408468059742893e-06, "loss": 0.3192, "step": 11328 }, { "epoch": 0.5198935340278097, "grad_norm": 0.4453152120113373, "learning_rate": 9.408352369296323e-06, "loss": 0.3238, "step": 11329 }, { "epoch": 0.5199394245330641, "grad_norm": 0.5214999914169312, "learning_rate": 9.408236668249059e-06, "loss": 0.5346, "step": 11330 }, { "epoch": 0.5199853150383186, "grad_norm": 0.42444345355033875, "learning_rate": 9.408120956601376e-06, "loss": 0.3515, "step": 11331 }, { "epoch": 0.5200312055435731, "grad_norm": 0.46152263879776, "learning_rate": 9.408005234353553e-06, "loss": 0.404, "step": 11332 }, { "epoch": 0.5200770960488275, "grad_norm": 0.5059174299240112, "learning_rate": 9.407889501505868e-06, "loss": 0.5288, "step": 11333 }, { "epoch": 0.520122986554082, "grad_norm": 0.4300854206085205, "learning_rate": 9.407773758058602e-06, "loss": 0.356, "step": 11334 }, { "epoch": 0.5201688770593365, "grad_norm": 0.4807341396808624, "learning_rate": 9.407658004012029e-06, "loss": 0.438, "step": 11335 }, { "epoch": 0.5202147675645908, "grad_norm": 0.47700512409210205, "learning_rate": 9.407542239366431e-06, "loss": 0.3492, "step": 11336 }, { "epoch": 0.5202606580698453, "grad_norm": 0.5245160460472107, "learning_rate": 9.407426464122085e-06, "loss": 0.4858, "step": 11337 }, { "epoch": 0.5203065485750998, "grad_norm": 0.45137128233909607, "learning_rate": 9.407310678279269e-06, "loss": 0.3663, "step": 11338 }, { "epoch": 0.5203524390803542, "grad_norm": 0.4665279984474182, "learning_rate": 9.407194881838262e-06, "loss": 0.4469, "step": 11339 }, { "epoch": 0.5203983295856087, "grad_norm": 0.4119986891746521, "learning_rate": 9.407079074799341e-06, "loss": 0.2969, "step": 11340 }, { "epoch": 0.5204442200908632, "grad_norm": 0.4704146981239319, "learning_rate": 9.406963257162788e-06, "loss": 0.422, "step": 11341 }, { "epoch": 0.5204901105961177, "grad_norm": 0.4676615297794342, "learning_rate": 9.406847428928878e-06, "loss": 0.4316, "step": 11342 }, { "epoch": 0.5205360011013721, "grad_norm": 0.4753597676753998, "learning_rate": 9.40673159009789e-06, "loss": 0.4498, "step": 11343 }, { "epoch": 0.5205818916066266, "grad_norm": 0.4642932713031769, "learning_rate": 9.406615740670104e-06, "loss": 0.4192, "step": 11344 }, { "epoch": 0.5206277821118811, "grad_norm": 0.4393087923526764, "learning_rate": 9.406499880645799e-06, "loss": 0.3927, "step": 11345 }, { "epoch": 0.5206736726171355, "grad_norm": 0.4112776219844818, "learning_rate": 9.406384010025251e-06, "loss": 0.313, "step": 11346 }, { "epoch": 0.52071956312239, "grad_norm": 0.43229940533638, "learning_rate": 9.406268128808742e-06, "loss": 0.3612, "step": 11347 }, { "epoch": 0.5207654536276445, "grad_norm": 0.4701545834541321, "learning_rate": 9.406152236996546e-06, "loss": 0.4393, "step": 11348 }, { "epoch": 0.5208113441328989, "grad_norm": 0.4098416566848755, "learning_rate": 9.406036334588946e-06, "loss": 0.336, "step": 11349 }, { "epoch": 0.5208572346381534, "grad_norm": 0.41647979617118835, "learning_rate": 9.40592042158622e-06, "loss": 0.322, "step": 11350 }, { "epoch": 0.5209031251434079, "grad_norm": 0.45846766233444214, "learning_rate": 9.405804497988645e-06, "loss": 0.4217, "step": 11351 }, { "epoch": 0.5209490156486623, "grad_norm": 0.5021017789840698, "learning_rate": 9.405688563796503e-06, "loss": 0.4517, "step": 11352 }, { "epoch": 0.5209949061539167, "grad_norm": 0.47045889496803284, "learning_rate": 9.405572619010065e-06, "loss": 0.4074, "step": 11353 }, { "epoch": 0.5210407966591712, "grad_norm": 0.4577224850654602, "learning_rate": 9.405456663629619e-06, "loss": 0.438, "step": 11354 }, { "epoch": 0.5210866871644256, "grad_norm": 0.5118107795715332, "learning_rate": 9.40534069765544e-06, "loss": 0.4618, "step": 11355 }, { "epoch": 0.5211325776696801, "grad_norm": 0.4609542191028595, "learning_rate": 9.405224721087808e-06, "loss": 0.382, "step": 11356 }, { "epoch": 0.5211784681749346, "grad_norm": 0.465101957321167, "learning_rate": 9.405108733926999e-06, "loss": 0.3927, "step": 11357 }, { "epoch": 0.521224358680189, "grad_norm": 0.4497661590576172, "learning_rate": 9.404992736173296e-06, "loss": 0.3517, "step": 11358 }, { "epoch": 0.5212702491854435, "grad_norm": 0.4259025752544403, "learning_rate": 9.404876727826973e-06, "loss": 0.3641, "step": 11359 }, { "epoch": 0.521316139690698, "grad_norm": 0.5044989585876465, "learning_rate": 9.404760708888313e-06, "loss": 0.4948, "step": 11360 }, { "epoch": 0.5213620301959525, "grad_norm": 0.43325573205947876, "learning_rate": 9.404644679357593e-06, "loss": 0.3806, "step": 11361 }, { "epoch": 0.5214079207012069, "grad_norm": 0.46886196732521057, "learning_rate": 9.404528639235094e-06, "loss": 0.4116, "step": 11362 }, { "epoch": 0.5214538112064614, "grad_norm": 0.42928507924079895, "learning_rate": 9.404412588521093e-06, "loss": 0.3297, "step": 11363 }, { "epoch": 0.5214997017117159, "grad_norm": 0.46476927399635315, "learning_rate": 9.404296527215871e-06, "loss": 0.44, "step": 11364 }, { "epoch": 0.5215455922169703, "grad_norm": 0.4753119945526123, "learning_rate": 9.404180455319704e-06, "loss": 0.416, "step": 11365 }, { "epoch": 0.5215914827222248, "grad_norm": 0.4441254436969757, "learning_rate": 9.404064372832873e-06, "loss": 0.3513, "step": 11366 }, { "epoch": 0.5216373732274793, "grad_norm": 0.42617788910865784, "learning_rate": 9.403948279755658e-06, "loss": 0.3654, "step": 11367 }, { "epoch": 0.5216832637327337, "grad_norm": 0.480732798576355, "learning_rate": 9.403832176088337e-06, "loss": 0.4372, "step": 11368 }, { "epoch": 0.5217291542379882, "grad_norm": 0.45252206921577454, "learning_rate": 9.40371606183119e-06, "loss": 0.3618, "step": 11369 }, { "epoch": 0.5217750447432427, "grad_norm": 0.48564642667770386, "learning_rate": 9.403599936984495e-06, "loss": 0.4535, "step": 11370 }, { "epoch": 0.521820935248497, "grad_norm": 0.4580940902233124, "learning_rate": 9.403483801548532e-06, "loss": 0.3796, "step": 11371 }, { "epoch": 0.5218668257537515, "grad_norm": 0.47691118717193604, "learning_rate": 9.40336765552358e-06, "loss": 0.4255, "step": 11372 }, { "epoch": 0.521912716259006, "grad_norm": 0.5334489941596985, "learning_rate": 9.40325149890992e-06, "loss": 0.4755, "step": 11373 }, { "epoch": 0.5219586067642604, "grad_norm": 0.45369982719421387, "learning_rate": 9.403135331707828e-06, "loss": 0.4912, "step": 11374 }, { "epoch": 0.5220044972695149, "grad_norm": 0.4181894063949585, "learning_rate": 9.403019153917587e-06, "loss": 0.3399, "step": 11375 }, { "epoch": 0.5220503877747694, "grad_norm": 0.439365953207016, "learning_rate": 9.402902965539473e-06, "loss": 0.4084, "step": 11376 }, { "epoch": 0.5220962782800239, "grad_norm": 0.4301898181438446, "learning_rate": 9.402786766573768e-06, "loss": 0.3335, "step": 11377 }, { "epoch": 0.5221421687852783, "grad_norm": 0.5846481323242188, "learning_rate": 9.40267055702075e-06, "loss": 0.5041, "step": 11378 }, { "epoch": 0.5221880592905328, "grad_norm": 0.48720747232437134, "learning_rate": 9.4025543368807e-06, "loss": 0.4601, "step": 11379 }, { "epoch": 0.5222339497957873, "grad_norm": 0.4171673655509949, "learning_rate": 9.402438106153895e-06, "loss": 0.3284, "step": 11380 }, { "epoch": 0.5222798403010417, "grad_norm": 0.49452322721481323, "learning_rate": 9.402321864840616e-06, "loss": 0.4907, "step": 11381 }, { "epoch": 0.5223257308062962, "grad_norm": 0.4706476032733917, "learning_rate": 9.402205612941142e-06, "loss": 0.3945, "step": 11382 }, { "epoch": 0.5223716213115507, "grad_norm": 0.442869633436203, "learning_rate": 9.402089350455753e-06, "loss": 0.3734, "step": 11383 }, { "epoch": 0.5224175118168051, "grad_norm": 0.4574196934700012, "learning_rate": 9.401973077384729e-06, "loss": 0.3933, "step": 11384 }, { "epoch": 0.5224634023220596, "grad_norm": 0.4647204875946045, "learning_rate": 9.401856793728348e-06, "loss": 0.4053, "step": 11385 }, { "epoch": 0.5225092928273141, "grad_norm": 0.46060729026794434, "learning_rate": 9.401740499486892e-06, "loss": 0.4149, "step": 11386 }, { "epoch": 0.5225551833325685, "grad_norm": 0.47708842158317566, "learning_rate": 9.401624194660638e-06, "loss": 0.4305, "step": 11387 }, { "epoch": 0.522601073837823, "grad_norm": 0.4464203715324402, "learning_rate": 9.401507879249868e-06, "loss": 0.3981, "step": 11388 }, { "epoch": 0.5226469643430774, "grad_norm": 0.5870711207389832, "learning_rate": 9.40139155325486e-06, "loss": 0.4062, "step": 11389 }, { "epoch": 0.5226928548483318, "grad_norm": 0.46573159098625183, "learning_rate": 9.401275216675896e-06, "loss": 0.4284, "step": 11390 }, { "epoch": 0.5227387453535863, "grad_norm": 0.46834370493888855, "learning_rate": 9.401158869513253e-06, "loss": 0.4096, "step": 11391 }, { "epoch": 0.5227846358588408, "grad_norm": 0.4613339900970459, "learning_rate": 9.40104251176721e-06, "loss": 0.4453, "step": 11392 }, { "epoch": 0.5228305263640952, "grad_norm": 0.4914063811302185, "learning_rate": 9.400926143438053e-06, "loss": 0.4452, "step": 11393 }, { "epoch": 0.5228764168693497, "grad_norm": 0.4860925078392029, "learning_rate": 9.400809764526056e-06, "loss": 0.3974, "step": 11394 }, { "epoch": 0.5229223073746042, "grad_norm": 0.4686044454574585, "learning_rate": 9.400693375031498e-06, "loss": 0.3828, "step": 11395 }, { "epoch": 0.5229681978798587, "grad_norm": 0.4933861494064331, "learning_rate": 9.400576974954664e-06, "loss": 0.3958, "step": 11396 }, { "epoch": 0.5230140883851131, "grad_norm": 0.46955105662345886, "learning_rate": 9.400460564295831e-06, "loss": 0.4582, "step": 11397 }, { "epoch": 0.5230599788903676, "grad_norm": 0.5272611975669861, "learning_rate": 9.400344143055278e-06, "loss": 0.3525, "step": 11398 }, { "epoch": 0.5231058693956221, "grad_norm": 0.4410994052886963, "learning_rate": 9.400227711233287e-06, "loss": 0.381, "step": 11399 }, { "epoch": 0.5231517599008765, "grad_norm": 0.4742319583892822, "learning_rate": 9.400111268830136e-06, "loss": 0.4431, "step": 11400 }, { "epoch": 0.523197650406131, "grad_norm": 0.44880059361457825, "learning_rate": 9.399994815846107e-06, "loss": 0.346, "step": 11401 }, { "epoch": 0.5232435409113855, "grad_norm": 0.48278918862342834, "learning_rate": 9.39987835228148e-06, "loss": 0.3819, "step": 11402 }, { "epoch": 0.5232894314166399, "grad_norm": 0.479733407497406, "learning_rate": 9.399761878136533e-06, "loss": 0.3756, "step": 11403 }, { "epoch": 0.5233353219218944, "grad_norm": 0.4563409984111786, "learning_rate": 9.399645393411547e-06, "loss": 0.4333, "step": 11404 }, { "epoch": 0.5233812124271489, "grad_norm": 0.4592234194278717, "learning_rate": 9.399528898106802e-06, "loss": 0.3642, "step": 11405 }, { "epoch": 0.5234271029324032, "grad_norm": 0.4508698284626007, "learning_rate": 9.39941239222258e-06, "loss": 0.4531, "step": 11406 }, { "epoch": 0.5234729934376577, "grad_norm": 0.4498033821582794, "learning_rate": 9.39929587575916e-06, "loss": 0.4034, "step": 11407 }, { "epoch": 0.5235188839429122, "grad_norm": 0.490349143743515, "learning_rate": 9.399179348716821e-06, "loss": 0.4193, "step": 11408 }, { "epoch": 0.5235647744481666, "grad_norm": 0.4533957839012146, "learning_rate": 9.399062811095844e-06, "loss": 0.4236, "step": 11409 }, { "epoch": 0.5236106649534211, "grad_norm": 0.42886388301849365, "learning_rate": 9.39894626289651e-06, "loss": 0.3874, "step": 11410 }, { "epoch": 0.5236565554586756, "grad_norm": 0.48032861948013306, "learning_rate": 9.398829704119097e-06, "loss": 0.3871, "step": 11411 }, { "epoch": 0.52370244596393, "grad_norm": 0.488559752702713, "learning_rate": 9.398713134763888e-06, "loss": 0.4298, "step": 11412 }, { "epoch": 0.5237483364691845, "grad_norm": 0.5009146928787231, "learning_rate": 9.398596554831164e-06, "loss": 0.4035, "step": 11413 }, { "epoch": 0.523794226974439, "grad_norm": 0.5082897543907166, "learning_rate": 9.3984799643212e-06, "loss": 0.4693, "step": 11414 }, { "epoch": 0.5238401174796935, "grad_norm": 0.47286292910575867, "learning_rate": 9.398363363234282e-06, "loss": 0.4326, "step": 11415 }, { "epoch": 0.5238860079849479, "grad_norm": 0.4564986526966095, "learning_rate": 9.398246751570688e-06, "loss": 0.4197, "step": 11416 }, { "epoch": 0.5239318984902024, "grad_norm": 0.4848954379558563, "learning_rate": 9.398130129330699e-06, "loss": 0.4419, "step": 11417 }, { "epoch": 0.5239777889954569, "grad_norm": 0.49837687611579895, "learning_rate": 9.398013496514592e-06, "loss": 0.4929, "step": 11418 }, { "epoch": 0.5240236795007113, "grad_norm": 0.705758810043335, "learning_rate": 9.397896853122654e-06, "loss": 0.3419, "step": 11419 }, { "epoch": 0.5240695700059658, "grad_norm": 0.4462769031524658, "learning_rate": 9.397780199155163e-06, "loss": 0.3325, "step": 11420 }, { "epoch": 0.5241154605112203, "grad_norm": 0.4101276099681854, "learning_rate": 9.397663534612395e-06, "loss": 0.3415, "step": 11421 }, { "epoch": 0.5241613510164747, "grad_norm": 0.49243876338005066, "learning_rate": 9.397546859494637e-06, "loss": 0.4201, "step": 11422 }, { "epoch": 0.5242072415217292, "grad_norm": 0.4809768497943878, "learning_rate": 9.397430173802165e-06, "loss": 0.4516, "step": 11423 }, { "epoch": 0.5242531320269836, "grad_norm": 0.46915292739868164, "learning_rate": 9.39731347753526e-06, "loss": 0.4209, "step": 11424 }, { "epoch": 0.524299022532238, "grad_norm": 0.5892186164855957, "learning_rate": 9.397196770694207e-06, "loss": 0.4467, "step": 11425 }, { "epoch": 0.5243449130374925, "grad_norm": 0.46034806966781616, "learning_rate": 9.397080053279284e-06, "loss": 0.4012, "step": 11426 }, { "epoch": 0.524390803542747, "grad_norm": 0.698807418346405, "learning_rate": 9.39696332529077e-06, "loss": 0.4257, "step": 11427 }, { "epoch": 0.5244366940480014, "grad_norm": 0.5127604603767395, "learning_rate": 9.396846586728945e-06, "loss": 0.4437, "step": 11428 }, { "epoch": 0.5244825845532559, "grad_norm": 0.4466533958911896, "learning_rate": 9.396729837594094e-06, "loss": 0.3849, "step": 11429 }, { "epoch": 0.5245284750585104, "grad_norm": 0.4703664183616638, "learning_rate": 9.396613077886495e-06, "loss": 0.4689, "step": 11430 }, { "epoch": 0.5245743655637649, "grad_norm": 0.47790801525115967, "learning_rate": 9.396496307606428e-06, "loss": 0.4715, "step": 11431 }, { "epoch": 0.5246202560690193, "grad_norm": 0.49904265999794006, "learning_rate": 9.396379526754177e-06, "loss": 0.4157, "step": 11432 }, { "epoch": 0.5246661465742738, "grad_norm": 0.4207460582256317, "learning_rate": 9.39626273533002e-06, "loss": 0.3297, "step": 11433 }, { "epoch": 0.5247120370795283, "grad_norm": 0.46035128831863403, "learning_rate": 9.396145933334237e-06, "loss": 0.3639, "step": 11434 }, { "epoch": 0.5247579275847827, "grad_norm": 0.45932647585868835, "learning_rate": 9.396029120767112e-06, "loss": 0.4179, "step": 11435 }, { "epoch": 0.5248038180900372, "grad_norm": 0.4972432851791382, "learning_rate": 9.395912297628925e-06, "loss": 0.4649, "step": 11436 }, { "epoch": 0.5248497085952917, "grad_norm": 0.45799997448921204, "learning_rate": 9.395795463919954e-06, "loss": 0.3257, "step": 11437 }, { "epoch": 0.5248955991005461, "grad_norm": 0.4304791986942291, "learning_rate": 9.395678619640485e-06, "loss": 0.3226, "step": 11438 }, { "epoch": 0.5249414896058006, "grad_norm": 0.4275456368923187, "learning_rate": 9.395561764790795e-06, "loss": 0.3379, "step": 11439 }, { "epoch": 0.5249873801110551, "grad_norm": 0.4632872939109802, "learning_rate": 9.395444899371166e-06, "loss": 0.4199, "step": 11440 }, { "epoch": 0.5250332706163094, "grad_norm": 0.47015640139579773, "learning_rate": 9.395328023381878e-06, "loss": 0.3496, "step": 11441 }, { "epoch": 0.5250791611215639, "grad_norm": 0.45110246539115906, "learning_rate": 9.395211136823215e-06, "loss": 0.4339, "step": 11442 }, { "epoch": 0.5251250516268184, "grad_norm": 0.4446107745170593, "learning_rate": 9.395094239695456e-06, "loss": 0.3649, "step": 11443 }, { "epoch": 0.5251709421320728, "grad_norm": 0.45945775508880615, "learning_rate": 9.394977331998883e-06, "loss": 0.3658, "step": 11444 }, { "epoch": 0.5252168326373273, "grad_norm": 0.42512863874435425, "learning_rate": 9.394860413733776e-06, "loss": 0.3323, "step": 11445 }, { "epoch": 0.5252627231425818, "grad_norm": 0.45659297704696655, "learning_rate": 9.394743484900417e-06, "loss": 0.4295, "step": 11446 }, { "epoch": 0.5253086136478362, "grad_norm": 0.42990642786026, "learning_rate": 9.394626545499087e-06, "loss": 0.366, "step": 11447 }, { "epoch": 0.5253545041530907, "grad_norm": 0.47807377576828003, "learning_rate": 9.394509595530067e-06, "loss": 0.4271, "step": 11448 }, { "epoch": 0.5254003946583452, "grad_norm": 0.4524679183959961, "learning_rate": 9.394392634993639e-06, "loss": 0.4002, "step": 11449 }, { "epoch": 0.5254462851635997, "grad_norm": 0.4388274848461151, "learning_rate": 9.394275663890083e-06, "loss": 0.3454, "step": 11450 }, { "epoch": 0.5254921756688541, "grad_norm": 0.4434483051300049, "learning_rate": 9.394158682219681e-06, "loss": 0.3799, "step": 11451 }, { "epoch": 0.5255380661741086, "grad_norm": 0.46120551228523254, "learning_rate": 9.394041689982714e-06, "loss": 0.3764, "step": 11452 }, { "epoch": 0.5255839566793631, "grad_norm": 0.48892879486083984, "learning_rate": 9.393924687179464e-06, "loss": 0.4386, "step": 11453 }, { "epoch": 0.5256298471846175, "grad_norm": 0.4669158160686493, "learning_rate": 9.39380767381021e-06, "loss": 0.382, "step": 11454 }, { "epoch": 0.525675737689872, "grad_norm": 0.5157306790351868, "learning_rate": 9.393690649875238e-06, "loss": 0.4459, "step": 11455 }, { "epoch": 0.5257216281951265, "grad_norm": 0.4693259596824646, "learning_rate": 9.393573615374825e-06, "loss": 0.4466, "step": 11456 }, { "epoch": 0.5257675187003809, "grad_norm": 0.4892548620700836, "learning_rate": 9.393456570309256e-06, "loss": 0.4453, "step": 11457 }, { "epoch": 0.5258134092056354, "grad_norm": 0.4841972887516022, "learning_rate": 9.393339514678808e-06, "loss": 0.4389, "step": 11458 }, { "epoch": 0.5258592997108898, "grad_norm": 0.46456146240234375, "learning_rate": 9.393222448483766e-06, "loss": 0.3919, "step": 11459 }, { "epoch": 0.5259051902161442, "grad_norm": 0.46003010869026184, "learning_rate": 9.393105371724412e-06, "loss": 0.4, "step": 11460 }, { "epoch": 0.5259510807213987, "grad_norm": 0.4810815453529358, "learning_rate": 9.392988284401023e-06, "loss": 0.4253, "step": 11461 }, { "epoch": 0.5259969712266532, "grad_norm": 0.456248939037323, "learning_rate": 9.392871186513885e-06, "loss": 0.3881, "step": 11462 }, { "epoch": 0.5260428617319076, "grad_norm": 0.4862266480922699, "learning_rate": 9.39275407806328e-06, "loss": 0.4147, "step": 11463 }, { "epoch": 0.5260887522371621, "grad_norm": 0.42009127140045166, "learning_rate": 9.392636959049486e-06, "loss": 0.331, "step": 11464 }, { "epoch": 0.5261346427424166, "grad_norm": 0.4570986032485962, "learning_rate": 9.392519829472788e-06, "loss": 0.4042, "step": 11465 }, { "epoch": 0.5261805332476711, "grad_norm": 0.42909303307533264, "learning_rate": 9.392402689333463e-06, "loss": 0.3171, "step": 11466 }, { "epoch": 0.5262264237529255, "grad_norm": 0.4438481628894806, "learning_rate": 9.3922855386318e-06, "loss": 0.3948, "step": 11467 }, { "epoch": 0.52627231425818, "grad_norm": 0.45915961265563965, "learning_rate": 9.392168377368074e-06, "loss": 0.3877, "step": 11468 }, { "epoch": 0.5263182047634345, "grad_norm": 0.42701494693756104, "learning_rate": 9.392051205542569e-06, "loss": 0.3343, "step": 11469 }, { "epoch": 0.5263640952686889, "grad_norm": 0.44611984491348267, "learning_rate": 9.391934023155569e-06, "loss": 0.3746, "step": 11470 }, { "epoch": 0.5264099857739434, "grad_norm": 0.4977155327796936, "learning_rate": 9.391816830207352e-06, "loss": 0.4444, "step": 11471 }, { "epoch": 0.5264558762791979, "grad_norm": 0.44357648491859436, "learning_rate": 9.391699626698203e-06, "loss": 0.3478, "step": 11472 }, { "epoch": 0.5265017667844523, "grad_norm": 0.5211843252182007, "learning_rate": 9.391582412628401e-06, "loss": 0.4295, "step": 11473 }, { "epoch": 0.5265476572897068, "grad_norm": 0.47220176458358765, "learning_rate": 9.39146518799823e-06, "loss": 0.4472, "step": 11474 }, { "epoch": 0.5265935477949613, "grad_norm": 0.4938763678073883, "learning_rate": 9.391347952807972e-06, "loss": 0.5066, "step": 11475 }, { "epoch": 0.5266394383002156, "grad_norm": 0.46761298179626465, "learning_rate": 9.391230707057906e-06, "loss": 0.4883, "step": 11476 }, { "epoch": 0.5266853288054701, "grad_norm": 0.4353277385234833, "learning_rate": 9.391113450748318e-06, "loss": 0.3962, "step": 11477 }, { "epoch": 0.5267312193107246, "grad_norm": 0.4347732365131378, "learning_rate": 9.390996183879488e-06, "loss": 0.3452, "step": 11478 }, { "epoch": 0.526777109815979, "grad_norm": 0.5078023672103882, "learning_rate": 9.390878906451697e-06, "loss": 0.5234, "step": 11479 }, { "epoch": 0.5268230003212335, "grad_norm": 0.4796895384788513, "learning_rate": 9.390761618465228e-06, "loss": 0.4465, "step": 11480 }, { "epoch": 0.526868890826488, "grad_norm": 0.5031535625457764, "learning_rate": 9.390644319920363e-06, "loss": 0.5324, "step": 11481 }, { "epoch": 0.5269147813317424, "grad_norm": 0.432140588760376, "learning_rate": 9.390527010817385e-06, "loss": 0.3336, "step": 11482 }, { "epoch": 0.5269606718369969, "grad_norm": 0.4209963083267212, "learning_rate": 9.390409691156576e-06, "loss": 0.3781, "step": 11483 }, { "epoch": 0.5270065623422514, "grad_norm": 0.4993205964565277, "learning_rate": 9.390292360938215e-06, "loss": 0.4563, "step": 11484 }, { "epoch": 0.5270524528475059, "grad_norm": 0.4465543329715729, "learning_rate": 9.390175020162588e-06, "loss": 0.4116, "step": 11485 }, { "epoch": 0.5270983433527603, "grad_norm": 0.40897610783576965, "learning_rate": 9.390057668829976e-06, "loss": 0.3078, "step": 11486 }, { "epoch": 0.5271442338580148, "grad_norm": 0.4544394314289093, "learning_rate": 9.38994030694066e-06, "loss": 0.463, "step": 11487 }, { "epoch": 0.5271901243632693, "grad_norm": 0.464700847864151, "learning_rate": 9.389822934494923e-06, "loss": 0.4466, "step": 11488 }, { "epoch": 0.5272360148685237, "grad_norm": 0.4608149826526642, "learning_rate": 9.389705551493048e-06, "loss": 0.39, "step": 11489 }, { "epoch": 0.5272819053737782, "grad_norm": 0.48113757371902466, "learning_rate": 9.389588157935317e-06, "loss": 0.4944, "step": 11490 }, { "epoch": 0.5273277958790327, "grad_norm": 0.45270052552223206, "learning_rate": 9.38947075382201e-06, "loss": 0.4081, "step": 11491 }, { "epoch": 0.527373686384287, "grad_norm": 0.4248492419719696, "learning_rate": 9.389353339153413e-06, "loss": 0.3493, "step": 11492 }, { "epoch": 0.5274195768895416, "grad_norm": 0.463619202375412, "learning_rate": 9.389235913929806e-06, "loss": 0.4243, "step": 11493 }, { "epoch": 0.527465467394796, "grad_norm": 0.5470955967903137, "learning_rate": 9.389118478151471e-06, "loss": 0.5999, "step": 11494 }, { "epoch": 0.5275113579000504, "grad_norm": 0.4415670335292816, "learning_rate": 9.389001031818691e-06, "loss": 0.385, "step": 11495 }, { "epoch": 0.5275572484053049, "grad_norm": 0.49321937561035156, "learning_rate": 9.38888357493175e-06, "loss": 0.4911, "step": 11496 }, { "epoch": 0.5276031389105594, "grad_norm": 0.4510864317417145, "learning_rate": 9.38876610749093e-06, "loss": 0.3403, "step": 11497 }, { "epoch": 0.5276490294158138, "grad_norm": 0.45155200362205505, "learning_rate": 9.38864862949651e-06, "loss": 0.3588, "step": 11498 }, { "epoch": 0.5276949199210683, "grad_norm": 0.44299718737602234, "learning_rate": 9.388531140948778e-06, "loss": 0.3903, "step": 11499 }, { "epoch": 0.5277408104263228, "grad_norm": 0.4556327760219574, "learning_rate": 9.388413641848012e-06, "loss": 0.3739, "step": 11500 }, { "epoch": 0.5277867009315772, "grad_norm": 0.4424279034137726, "learning_rate": 9.388296132194496e-06, "loss": 0.33, "step": 11501 }, { "epoch": 0.5278325914368317, "grad_norm": 0.44124263525009155, "learning_rate": 9.388178611988515e-06, "loss": 0.3677, "step": 11502 }, { "epoch": 0.5278784819420862, "grad_norm": 0.471493124961853, "learning_rate": 9.388061081230347e-06, "loss": 0.3666, "step": 11503 }, { "epoch": 0.5279243724473407, "grad_norm": 0.47322705388069153, "learning_rate": 9.387943539920278e-06, "loss": 0.4306, "step": 11504 }, { "epoch": 0.5279702629525951, "grad_norm": 0.4894491136074066, "learning_rate": 9.38782598805859e-06, "loss": 0.383, "step": 11505 }, { "epoch": 0.5280161534578496, "grad_norm": 0.4657978415489197, "learning_rate": 9.387708425645564e-06, "loss": 0.3862, "step": 11506 }, { "epoch": 0.5280620439631041, "grad_norm": 0.48629364371299744, "learning_rate": 9.387590852681485e-06, "loss": 0.4399, "step": 11507 }, { "epoch": 0.5281079344683585, "grad_norm": 0.4670019745826721, "learning_rate": 9.387473269166634e-06, "loss": 0.4486, "step": 11508 }, { "epoch": 0.528153824973613, "grad_norm": 0.4332893192768097, "learning_rate": 9.387355675101294e-06, "loss": 0.3464, "step": 11509 }, { "epoch": 0.5281997154788675, "grad_norm": 0.4641135334968567, "learning_rate": 9.387238070485751e-06, "loss": 0.3874, "step": 11510 }, { "epoch": 0.5282456059841218, "grad_norm": 0.496931254863739, "learning_rate": 9.387120455320284e-06, "loss": 0.472, "step": 11511 }, { "epoch": 0.5282914964893763, "grad_norm": 0.4468570053577423, "learning_rate": 9.387002829605175e-06, "loss": 0.3432, "step": 11512 }, { "epoch": 0.5283373869946308, "grad_norm": 0.4977463483810425, "learning_rate": 9.386885193340712e-06, "loss": 0.4429, "step": 11513 }, { "epoch": 0.5283832774998852, "grad_norm": 0.48747384548187256, "learning_rate": 9.386767546527172e-06, "loss": 0.5108, "step": 11514 }, { "epoch": 0.5284291680051397, "grad_norm": 0.45584872364997864, "learning_rate": 9.38664988916484e-06, "loss": 0.404, "step": 11515 }, { "epoch": 0.5284750585103942, "grad_norm": 0.4881237745285034, "learning_rate": 9.386532221254001e-06, "loss": 0.4138, "step": 11516 }, { "epoch": 0.5285209490156486, "grad_norm": 0.4662555754184723, "learning_rate": 9.386414542794937e-06, "loss": 0.4162, "step": 11517 }, { "epoch": 0.5285668395209031, "grad_norm": 0.4696439206600189, "learning_rate": 9.38629685378793e-06, "loss": 0.4537, "step": 11518 }, { "epoch": 0.5286127300261576, "grad_norm": 0.46920210123062134, "learning_rate": 9.386179154233264e-06, "loss": 0.4406, "step": 11519 }, { "epoch": 0.5286586205314121, "grad_norm": 0.47456490993499756, "learning_rate": 9.386061444131219e-06, "loss": 0.4727, "step": 11520 }, { "epoch": 0.5287045110366665, "grad_norm": 0.4964886009693146, "learning_rate": 9.385943723482083e-06, "loss": 0.4117, "step": 11521 }, { "epoch": 0.528750401541921, "grad_norm": 0.4609547555446625, "learning_rate": 9.385825992286136e-06, "loss": 0.3918, "step": 11522 }, { "epoch": 0.5287962920471755, "grad_norm": 0.4713539779186249, "learning_rate": 9.38570825054366e-06, "loss": 0.3993, "step": 11523 }, { "epoch": 0.5288421825524299, "grad_norm": 0.4443165957927704, "learning_rate": 9.385590498254942e-06, "loss": 0.3728, "step": 11524 }, { "epoch": 0.5288880730576844, "grad_norm": 0.5092284679412842, "learning_rate": 9.385472735420263e-06, "loss": 0.5394, "step": 11525 }, { "epoch": 0.5289339635629389, "grad_norm": 0.4156496524810791, "learning_rate": 9.385354962039904e-06, "loss": 0.3494, "step": 11526 }, { "epoch": 0.5289798540681933, "grad_norm": 0.48917466402053833, "learning_rate": 9.385237178114152e-06, "loss": 0.4481, "step": 11527 }, { "epoch": 0.5290257445734478, "grad_norm": 0.4595395028591156, "learning_rate": 9.385119383643288e-06, "loss": 0.3885, "step": 11528 }, { "epoch": 0.5290716350787023, "grad_norm": 0.4985535144805908, "learning_rate": 9.385001578627597e-06, "loss": 0.3819, "step": 11529 }, { "epoch": 0.5291175255839566, "grad_norm": 0.4437890946865082, "learning_rate": 9.384883763067361e-06, "loss": 0.373, "step": 11530 }, { "epoch": 0.5291634160892111, "grad_norm": 0.44906851649284363, "learning_rate": 9.384765936962862e-06, "loss": 0.3844, "step": 11531 }, { "epoch": 0.5292093065944656, "grad_norm": 0.4874633252620697, "learning_rate": 9.384648100314386e-06, "loss": 0.4687, "step": 11532 }, { "epoch": 0.52925519709972, "grad_norm": 0.4595133364200592, "learning_rate": 9.384530253122213e-06, "loss": 0.4055, "step": 11533 }, { "epoch": 0.5293010876049745, "grad_norm": 0.47851428389549255, "learning_rate": 9.38441239538663e-06, "loss": 0.4401, "step": 11534 }, { "epoch": 0.529346978110229, "grad_norm": 0.495633602142334, "learning_rate": 9.38429452710792e-06, "loss": 0.4195, "step": 11535 }, { "epoch": 0.5293928686154834, "grad_norm": 0.5485066175460815, "learning_rate": 9.384176648286365e-06, "loss": 0.5194, "step": 11536 }, { "epoch": 0.5294387591207379, "grad_norm": 0.4835604727268219, "learning_rate": 9.384058758922248e-06, "loss": 0.4097, "step": 11537 }, { "epoch": 0.5294846496259924, "grad_norm": 0.4388428032398224, "learning_rate": 9.383940859015854e-06, "loss": 0.3257, "step": 11538 }, { "epoch": 0.5295305401312469, "grad_norm": 0.46346622705459595, "learning_rate": 9.383822948567466e-06, "loss": 0.3945, "step": 11539 }, { "epoch": 0.5295764306365013, "grad_norm": 0.4459650218486786, "learning_rate": 9.383705027577368e-06, "loss": 0.3418, "step": 11540 }, { "epoch": 0.5296223211417558, "grad_norm": 0.49368637800216675, "learning_rate": 9.38358709604584e-06, "loss": 0.4209, "step": 11541 }, { "epoch": 0.5296682116470103, "grad_norm": 0.4806511402130127, "learning_rate": 9.38346915397317e-06, "loss": 0.3934, "step": 11542 }, { "epoch": 0.5297141021522647, "grad_norm": 0.4567352831363678, "learning_rate": 9.383351201359644e-06, "loss": 0.3847, "step": 11543 }, { "epoch": 0.5297599926575192, "grad_norm": 0.42276278138160706, "learning_rate": 9.383233238205537e-06, "loss": 0.3381, "step": 11544 }, { "epoch": 0.5298058831627737, "grad_norm": 0.4420825242996216, "learning_rate": 9.383115264511138e-06, "loss": 0.4147, "step": 11545 }, { "epoch": 0.529851773668028, "grad_norm": 0.5010154843330383, "learning_rate": 9.38299728027673e-06, "loss": 0.4365, "step": 11546 }, { "epoch": 0.5298976641732825, "grad_norm": 0.48686492443084717, "learning_rate": 9.3828792855026e-06, "loss": 0.4615, "step": 11547 }, { "epoch": 0.529943554678537, "grad_norm": 0.47311222553253174, "learning_rate": 9.382761280189025e-06, "loss": 0.3837, "step": 11548 }, { "epoch": 0.5299894451837914, "grad_norm": 0.4158177971839905, "learning_rate": 9.382643264336296e-06, "loss": 0.3249, "step": 11549 }, { "epoch": 0.5300353356890459, "grad_norm": 0.4302652180194855, "learning_rate": 9.382525237944691e-06, "loss": 0.3538, "step": 11550 }, { "epoch": 0.5300812261943004, "grad_norm": 0.41319578886032104, "learning_rate": 9.382407201014494e-06, "loss": 0.3415, "step": 11551 }, { "epoch": 0.5301271166995548, "grad_norm": 0.45538896322250366, "learning_rate": 9.382289153545994e-06, "loss": 0.3952, "step": 11552 }, { "epoch": 0.5301730072048093, "grad_norm": 0.4859313368797302, "learning_rate": 9.382171095539469e-06, "loss": 0.3714, "step": 11553 }, { "epoch": 0.5302188977100638, "grad_norm": 0.4542468786239624, "learning_rate": 9.382053026995209e-06, "loss": 0.4064, "step": 11554 }, { "epoch": 0.5302647882153183, "grad_norm": 0.43858861923217773, "learning_rate": 9.381934947913491e-06, "loss": 0.378, "step": 11555 }, { "epoch": 0.5303106787205727, "grad_norm": 0.45160600543022156, "learning_rate": 9.381816858294605e-06, "loss": 0.3273, "step": 11556 }, { "epoch": 0.5303565692258272, "grad_norm": 0.422331839799881, "learning_rate": 9.381698758138832e-06, "loss": 0.3369, "step": 11557 }, { "epoch": 0.5304024597310817, "grad_norm": 0.45455700159072876, "learning_rate": 9.381580647446454e-06, "loss": 0.4062, "step": 11558 }, { "epoch": 0.5304483502363361, "grad_norm": 0.4425019919872284, "learning_rate": 9.381462526217759e-06, "loss": 0.3686, "step": 11559 }, { "epoch": 0.5304942407415906, "grad_norm": 0.4182943105697632, "learning_rate": 9.38134439445303e-06, "loss": 0.3367, "step": 11560 }, { "epoch": 0.5305401312468451, "grad_norm": 0.46657779812812805, "learning_rate": 9.38122625215255e-06, "loss": 0.4064, "step": 11561 }, { "epoch": 0.5305860217520995, "grad_norm": 0.42145976424217224, "learning_rate": 9.381108099316605e-06, "loss": 0.3286, "step": 11562 }, { "epoch": 0.530631912257354, "grad_norm": 0.46870410442352295, "learning_rate": 9.380989935945474e-06, "loss": 0.4069, "step": 11563 }, { "epoch": 0.5306778027626085, "grad_norm": 0.47562527656555176, "learning_rate": 9.380871762039448e-06, "loss": 0.4392, "step": 11564 }, { "epoch": 0.5307236932678628, "grad_norm": 0.4466591477394104, "learning_rate": 9.380753577598807e-06, "loss": 0.3744, "step": 11565 }, { "epoch": 0.5307695837731173, "grad_norm": 0.5316721200942993, "learning_rate": 9.380635382623837e-06, "loss": 0.4571, "step": 11566 }, { "epoch": 0.5308154742783718, "grad_norm": 0.448160320520401, "learning_rate": 9.380517177114822e-06, "loss": 0.359, "step": 11567 }, { "epoch": 0.5308613647836262, "grad_norm": 0.48585203289985657, "learning_rate": 9.380398961072044e-06, "loss": 0.4666, "step": 11568 }, { "epoch": 0.5309072552888807, "grad_norm": 0.4485619068145752, "learning_rate": 9.38028073449579e-06, "loss": 0.3932, "step": 11569 }, { "epoch": 0.5309531457941352, "grad_norm": 0.7244715690612793, "learning_rate": 9.380162497386344e-06, "loss": 0.4252, "step": 11570 }, { "epoch": 0.5309990362993896, "grad_norm": 0.41888168454170227, "learning_rate": 9.380044249743989e-06, "loss": 0.3383, "step": 11571 }, { "epoch": 0.5310449268046441, "grad_norm": 0.6482976078987122, "learning_rate": 9.37992599156901e-06, "loss": 0.368, "step": 11572 }, { "epoch": 0.5310908173098986, "grad_norm": 0.4620508849620819, "learning_rate": 9.37980772286169e-06, "loss": 0.4066, "step": 11573 }, { "epoch": 0.5311367078151531, "grad_norm": 0.42144301533699036, "learning_rate": 9.379689443622316e-06, "loss": 0.3567, "step": 11574 }, { "epoch": 0.5311825983204075, "grad_norm": 0.42783674597740173, "learning_rate": 9.37957115385117e-06, "loss": 0.3331, "step": 11575 }, { "epoch": 0.531228488825662, "grad_norm": 0.44144707918167114, "learning_rate": 9.379452853548539e-06, "loss": 0.3111, "step": 11576 }, { "epoch": 0.5312743793309165, "grad_norm": 0.48687034845352173, "learning_rate": 9.379334542714707e-06, "loss": 0.5027, "step": 11577 }, { "epoch": 0.5313202698361709, "grad_norm": 0.4576660394668579, "learning_rate": 9.379216221349955e-06, "loss": 0.3857, "step": 11578 }, { "epoch": 0.5313661603414254, "grad_norm": 0.4898691773414612, "learning_rate": 9.379097889454571e-06, "loss": 0.473, "step": 11579 }, { "epoch": 0.5314120508466799, "grad_norm": 0.45174071192741394, "learning_rate": 9.37897954702884e-06, "loss": 0.352, "step": 11580 }, { "epoch": 0.5314579413519342, "grad_norm": 0.42047473788261414, "learning_rate": 9.378861194073044e-06, "loss": 0.3278, "step": 11581 }, { "epoch": 0.5315038318571887, "grad_norm": 0.49685177206993103, "learning_rate": 9.378742830587468e-06, "loss": 0.3997, "step": 11582 }, { "epoch": 0.5315497223624432, "grad_norm": 0.43215611577033997, "learning_rate": 9.3786244565724e-06, "loss": 0.3179, "step": 11583 }, { "epoch": 0.5315956128676976, "grad_norm": 0.4613479971885681, "learning_rate": 9.378506072028119e-06, "loss": 0.3661, "step": 11584 }, { "epoch": 0.5316415033729521, "grad_norm": 0.46476954221725464, "learning_rate": 9.378387676954916e-06, "loss": 0.4189, "step": 11585 }, { "epoch": 0.5316873938782066, "grad_norm": 0.48116689920425415, "learning_rate": 9.37826927135307e-06, "loss": 0.4661, "step": 11586 }, { "epoch": 0.531733284383461, "grad_norm": 0.4635394513607025, "learning_rate": 9.37815085522287e-06, "loss": 0.4361, "step": 11587 }, { "epoch": 0.5317791748887155, "grad_norm": 0.4625721275806427, "learning_rate": 9.378032428564597e-06, "loss": 0.44, "step": 11588 }, { "epoch": 0.53182506539397, "grad_norm": 0.4525059759616852, "learning_rate": 9.37791399137854e-06, "loss": 0.4199, "step": 11589 }, { "epoch": 0.5318709558992244, "grad_norm": 0.43967950344085693, "learning_rate": 9.37779554366498e-06, "loss": 0.3436, "step": 11590 }, { "epoch": 0.5319168464044789, "grad_norm": 0.5150948166847229, "learning_rate": 9.377677085424203e-06, "loss": 0.4618, "step": 11591 }, { "epoch": 0.5319627369097334, "grad_norm": 0.47768378257751465, "learning_rate": 9.377558616656494e-06, "loss": 0.4452, "step": 11592 }, { "epoch": 0.5320086274149879, "grad_norm": 0.4843105673789978, "learning_rate": 9.377440137362141e-06, "loss": 0.4901, "step": 11593 }, { "epoch": 0.5320545179202423, "grad_norm": 0.4529247581958771, "learning_rate": 9.377321647541425e-06, "loss": 0.4015, "step": 11594 }, { "epoch": 0.5321004084254968, "grad_norm": 0.45344090461730957, "learning_rate": 9.37720314719463e-06, "loss": 0.4497, "step": 11595 }, { "epoch": 0.5321462989307513, "grad_norm": 0.43733781576156616, "learning_rate": 9.377084636322045e-06, "loss": 0.3634, "step": 11596 }, { "epoch": 0.5321921894360057, "grad_norm": 0.4152829349040985, "learning_rate": 9.376966114923952e-06, "loss": 0.3617, "step": 11597 }, { "epoch": 0.5322380799412602, "grad_norm": 0.4740702211856842, "learning_rate": 9.376847583000636e-06, "loss": 0.4416, "step": 11598 }, { "epoch": 0.5322839704465147, "grad_norm": 0.5115492939949036, "learning_rate": 9.376729040552384e-06, "loss": 0.4452, "step": 11599 }, { "epoch": 0.532329860951769, "grad_norm": 0.4531647861003876, "learning_rate": 9.37661048757948e-06, "loss": 0.4305, "step": 11600 }, { "epoch": 0.5323757514570235, "grad_norm": 0.4669593870639801, "learning_rate": 9.376491924082208e-06, "loss": 0.4468, "step": 11601 }, { "epoch": 0.532421641962278, "grad_norm": 0.4381949305534363, "learning_rate": 9.376373350060855e-06, "loss": 0.3488, "step": 11602 }, { "epoch": 0.5324675324675324, "grad_norm": 0.4604457914829254, "learning_rate": 9.376254765515707e-06, "loss": 0.3846, "step": 11603 }, { "epoch": 0.5325134229727869, "grad_norm": 0.4993179142475128, "learning_rate": 9.376136170447046e-06, "loss": 0.3595, "step": 11604 }, { "epoch": 0.5325593134780414, "grad_norm": 0.4939204454421997, "learning_rate": 9.376017564855157e-06, "loss": 0.4765, "step": 11605 }, { "epoch": 0.5326052039832958, "grad_norm": 0.4435739517211914, "learning_rate": 9.375898948740329e-06, "loss": 0.3855, "step": 11606 }, { "epoch": 0.5326510944885503, "grad_norm": 0.49379995465278625, "learning_rate": 9.375780322102845e-06, "loss": 0.4484, "step": 11607 }, { "epoch": 0.5326969849938048, "grad_norm": 0.4521661698818207, "learning_rate": 9.37566168494299e-06, "loss": 0.3592, "step": 11608 }, { "epoch": 0.5327428754990593, "grad_norm": 0.4651557207107544, "learning_rate": 9.375543037261048e-06, "loss": 0.3696, "step": 11609 }, { "epoch": 0.5327887660043137, "grad_norm": 0.46903112530708313, "learning_rate": 9.375424379057307e-06, "loss": 0.4092, "step": 11610 }, { "epoch": 0.5328346565095682, "grad_norm": 0.4719286561012268, "learning_rate": 9.375305710332052e-06, "loss": 0.4762, "step": 11611 }, { "epoch": 0.5328805470148227, "grad_norm": 0.491604745388031, "learning_rate": 9.375187031085568e-06, "loss": 0.4428, "step": 11612 }, { "epoch": 0.5329264375200771, "grad_norm": 0.4755484163761139, "learning_rate": 9.37506834131814e-06, "loss": 0.4658, "step": 11613 }, { "epoch": 0.5329723280253316, "grad_norm": 0.43989041447639465, "learning_rate": 9.374949641030053e-06, "loss": 0.3405, "step": 11614 }, { "epoch": 0.5330182185305861, "grad_norm": 0.434423565864563, "learning_rate": 9.374830930221592e-06, "loss": 0.3818, "step": 11615 }, { "epoch": 0.5330641090358404, "grad_norm": 0.47206324338912964, "learning_rate": 9.374712208893044e-06, "loss": 0.4204, "step": 11616 }, { "epoch": 0.533109999541095, "grad_norm": 0.4455054700374603, "learning_rate": 9.374593477044694e-06, "loss": 0.3458, "step": 11617 }, { "epoch": 0.5331558900463494, "grad_norm": 0.45316562056541443, "learning_rate": 9.374474734676829e-06, "loss": 0.3834, "step": 11618 }, { "epoch": 0.5332017805516038, "grad_norm": 0.4805208444595337, "learning_rate": 9.37435598178973e-06, "loss": 0.4594, "step": 11619 }, { "epoch": 0.5332476710568583, "grad_norm": 0.44822531938552856, "learning_rate": 9.374237218383687e-06, "loss": 0.4097, "step": 11620 }, { "epoch": 0.5332935615621128, "grad_norm": 0.4362548291683197, "learning_rate": 9.374118444458984e-06, "loss": 0.3443, "step": 11621 }, { "epoch": 0.5333394520673672, "grad_norm": 0.5092342495918274, "learning_rate": 9.373999660015905e-06, "loss": 0.5684, "step": 11622 }, { "epoch": 0.5333853425726217, "grad_norm": 0.42923811078071594, "learning_rate": 9.37388086505474e-06, "loss": 0.3465, "step": 11623 }, { "epoch": 0.5334312330778762, "grad_norm": 0.44357872009277344, "learning_rate": 9.37376205957577e-06, "loss": 0.3662, "step": 11624 }, { "epoch": 0.5334771235831306, "grad_norm": 0.43685269355773926, "learning_rate": 9.373643243579283e-06, "loss": 0.39, "step": 11625 }, { "epoch": 0.5335230140883851, "grad_norm": 0.47921207547187805, "learning_rate": 9.373524417065566e-06, "loss": 0.4298, "step": 11626 }, { "epoch": 0.5335689045936396, "grad_norm": 0.48560789227485657, "learning_rate": 9.3734055800349e-06, "loss": 0.4327, "step": 11627 }, { "epoch": 0.5336147950988941, "grad_norm": 0.4798007011413574, "learning_rate": 9.373286732487576e-06, "loss": 0.3875, "step": 11628 }, { "epoch": 0.5336606856041485, "grad_norm": 0.4492725431919098, "learning_rate": 9.373167874423877e-06, "loss": 0.3537, "step": 11629 }, { "epoch": 0.533706576109403, "grad_norm": 0.4310928285121918, "learning_rate": 9.373049005844091e-06, "loss": 0.3474, "step": 11630 }, { "epoch": 0.5337524666146575, "grad_norm": 0.4532617926597595, "learning_rate": 9.372930126748501e-06, "loss": 0.3925, "step": 11631 }, { "epoch": 0.5337983571199119, "grad_norm": 0.4393414556980133, "learning_rate": 9.372811237137393e-06, "loss": 0.3754, "step": 11632 }, { "epoch": 0.5338442476251664, "grad_norm": 0.4893622398376465, "learning_rate": 9.372692337011056e-06, "loss": 0.4213, "step": 11633 }, { "epoch": 0.5338901381304209, "grad_norm": 0.46111804246902466, "learning_rate": 9.372573426369774e-06, "loss": 0.4179, "step": 11634 }, { "epoch": 0.5339360286356752, "grad_norm": 0.46457934379577637, "learning_rate": 9.372454505213831e-06, "loss": 0.3934, "step": 11635 }, { "epoch": 0.5339819191409297, "grad_norm": 0.46537649631500244, "learning_rate": 9.372335573543516e-06, "loss": 0.4486, "step": 11636 }, { "epoch": 0.5340278096461842, "grad_norm": 0.4430766701698303, "learning_rate": 9.372216631359115e-06, "loss": 0.3266, "step": 11637 }, { "epoch": 0.5340737001514386, "grad_norm": 0.44635188579559326, "learning_rate": 9.37209767866091e-06, "loss": 0.3726, "step": 11638 }, { "epoch": 0.5341195906566931, "grad_norm": 0.48563629388809204, "learning_rate": 9.371978715449192e-06, "loss": 0.4021, "step": 11639 }, { "epoch": 0.5341654811619476, "grad_norm": 0.4986581802368164, "learning_rate": 9.371859741724245e-06, "loss": 0.5052, "step": 11640 }, { "epoch": 0.534211371667202, "grad_norm": 0.4532596468925476, "learning_rate": 9.371740757486357e-06, "loss": 0.3847, "step": 11641 }, { "epoch": 0.5342572621724565, "grad_norm": 0.45764607191085815, "learning_rate": 9.37162176273581e-06, "loss": 0.4517, "step": 11642 }, { "epoch": 0.534303152677711, "grad_norm": 0.4384588301181793, "learning_rate": 9.371502757472891e-06, "loss": 0.3441, "step": 11643 }, { "epoch": 0.5343490431829655, "grad_norm": 0.4472794532775879, "learning_rate": 9.37138374169789e-06, "loss": 0.4206, "step": 11644 }, { "epoch": 0.5343949336882199, "grad_norm": 0.438943475484848, "learning_rate": 9.371264715411091e-06, "loss": 0.3478, "step": 11645 }, { "epoch": 0.5344408241934744, "grad_norm": 0.4645429849624634, "learning_rate": 9.371145678612777e-06, "loss": 0.4307, "step": 11646 }, { "epoch": 0.5344867146987289, "grad_norm": 0.4793485999107361, "learning_rate": 9.371026631303239e-06, "loss": 0.3829, "step": 11647 }, { "epoch": 0.5345326052039833, "grad_norm": 0.44711020588874817, "learning_rate": 9.370907573482762e-06, "loss": 0.4163, "step": 11648 }, { "epoch": 0.5345784957092378, "grad_norm": 0.4245031476020813, "learning_rate": 9.370788505151631e-06, "loss": 0.3568, "step": 11649 }, { "epoch": 0.5346243862144923, "grad_norm": 0.48311084508895874, "learning_rate": 9.370669426310134e-06, "loss": 0.4932, "step": 11650 }, { "epoch": 0.5346702767197467, "grad_norm": 0.46771153807640076, "learning_rate": 9.370550336958556e-06, "loss": 0.4558, "step": 11651 }, { "epoch": 0.5347161672250011, "grad_norm": 0.4367324113845825, "learning_rate": 9.370431237097184e-06, "loss": 0.3342, "step": 11652 }, { "epoch": 0.5347620577302556, "grad_norm": 0.4495399296283722, "learning_rate": 9.370312126726304e-06, "loss": 0.3618, "step": 11653 }, { "epoch": 0.53480794823551, "grad_norm": 0.41776782274246216, "learning_rate": 9.370193005846202e-06, "loss": 0.3108, "step": 11654 }, { "epoch": 0.5348538387407645, "grad_norm": 0.44117724895477295, "learning_rate": 9.370073874457167e-06, "loss": 0.3651, "step": 11655 }, { "epoch": 0.534899729246019, "grad_norm": 0.5484052896499634, "learning_rate": 9.369954732559481e-06, "loss": 0.3278, "step": 11656 }, { "epoch": 0.5349456197512734, "grad_norm": 0.4948674738407135, "learning_rate": 9.369835580153435e-06, "loss": 0.477, "step": 11657 }, { "epoch": 0.5349915102565279, "grad_norm": 0.4251708388328552, "learning_rate": 9.369716417239312e-06, "loss": 0.3038, "step": 11658 }, { "epoch": 0.5350374007617824, "grad_norm": 0.611009955406189, "learning_rate": 9.369597243817402e-06, "loss": 0.4037, "step": 11659 }, { "epoch": 0.5350832912670368, "grad_norm": 0.46289777755737305, "learning_rate": 9.369478059887988e-06, "loss": 0.398, "step": 11660 }, { "epoch": 0.5351291817722913, "grad_norm": 0.4309151768684387, "learning_rate": 9.369358865451359e-06, "loss": 0.3516, "step": 11661 }, { "epoch": 0.5351750722775458, "grad_norm": 0.4830739200115204, "learning_rate": 9.369239660507802e-06, "loss": 0.4298, "step": 11662 }, { "epoch": 0.5352209627828003, "grad_norm": 0.45701953768730164, "learning_rate": 9.369120445057602e-06, "loss": 0.3928, "step": 11663 }, { "epoch": 0.5352668532880547, "grad_norm": 0.501124382019043, "learning_rate": 9.369001219101046e-06, "loss": 0.4657, "step": 11664 }, { "epoch": 0.5353127437933092, "grad_norm": 0.45428067445755005, "learning_rate": 9.36888198263842e-06, "loss": 0.457, "step": 11665 }, { "epoch": 0.5353586342985637, "grad_norm": 0.416500985622406, "learning_rate": 9.368762735670012e-06, "loss": 0.3114, "step": 11666 }, { "epoch": 0.5354045248038181, "grad_norm": 0.48178282380104065, "learning_rate": 9.36864347819611e-06, "loss": 0.44, "step": 11667 }, { "epoch": 0.5354504153090726, "grad_norm": 0.4471374452114105, "learning_rate": 9.368524210216999e-06, "loss": 0.4024, "step": 11668 }, { "epoch": 0.5354963058143271, "grad_norm": 0.46065840125083923, "learning_rate": 9.368404931732963e-06, "loss": 0.4109, "step": 11669 }, { "epoch": 0.5355421963195814, "grad_norm": 0.42254650592803955, "learning_rate": 9.368285642744295e-06, "loss": 0.3375, "step": 11670 }, { "epoch": 0.5355880868248359, "grad_norm": 0.42463284730911255, "learning_rate": 9.368166343251277e-06, "loss": 0.3402, "step": 11671 }, { "epoch": 0.5356339773300904, "grad_norm": 0.4263712167739868, "learning_rate": 9.3680470332542e-06, "loss": 0.3509, "step": 11672 }, { "epoch": 0.5356798678353448, "grad_norm": 0.4777979552745819, "learning_rate": 9.367927712753347e-06, "loss": 0.3815, "step": 11673 }, { "epoch": 0.5357257583405993, "grad_norm": 0.4567091464996338, "learning_rate": 9.367808381749005e-06, "loss": 0.3422, "step": 11674 }, { "epoch": 0.5357716488458538, "grad_norm": 0.45422235131263733, "learning_rate": 9.367689040241465e-06, "loss": 0.4111, "step": 11675 }, { "epoch": 0.5358175393511082, "grad_norm": 0.4887014329433441, "learning_rate": 9.367569688231009e-06, "loss": 0.4326, "step": 11676 }, { "epoch": 0.5358634298563627, "grad_norm": 0.4648697078227997, "learning_rate": 9.367450325717927e-06, "loss": 0.3618, "step": 11677 }, { "epoch": 0.5359093203616172, "grad_norm": 0.42911064624786377, "learning_rate": 9.367330952702508e-06, "loss": 0.3263, "step": 11678 }, { "epoch": 0.5359552108668716, "grad_norm": 0.453212171792984, "learning_rate": 9.367211569185033e-06, "loss": 0.3818, "step": 11679 }, { "epoch": 0.5360011013721261, "grad_norm": 0.4420299828052521, "learning_rate": 9.367092175165792e-06, "loss": 0.3797, "step": 11680 }, { "epoch": 0.5360469918773806, "grad_norm": 0.48142096400260925, "learning_rate": 9.366972770645075e-06, "loss": 0.4953, "step": 11681 }, { "epoch": 0.5360928823826351, "grad_norm": 0.43701648712158203, "learning_rate": 9.366853355623166e-06, "loss": 0.4113, "step": 11682 }, { "epoch": 0.5361387728878895, "grad_norm": 0.46006593108177185, "learning_rate": 9.366733930100354e-06, "loss": 0.3981, "step": 11683 }, { "epoch": 0.536184663393144, "grad_norm": 0.459682822227478, "learning_rate": 9.366614494076923e-06, "loss": 0.3574, "step": 11684 }, { "epoch": 0.5362305538983985, "grad_norm": 0.42785394191741943, "learning_rate": 9.366495047553164e-06, "loss": 0.3232, "step": 11685 }, { "epoch": 0.5362764444036529, "grad_norm": 0.43644100427627563, "learning_rate": 9.36637559052936e-06, "loss": 0.3942, "step": 11686 }, { "epoch": 0.5363223349089073, "grad_norm": 0.45794540643692017, "learning_rate": 9.366256123005803e-06, "loss": 0.3359, "step": 11687 }, { "epoch": 0.5363682254141618, "grad_norm": 0.4445849359035492, "learning_rate": 9.366136644982777e-06, "loss": 0.3921, "step": 11688 }, { "epoch": 0.5364141159194162, "grad_norm": 0.4533580243587494, "learning_rate": 9.366017156460571e-06, "loss": 0.4263, "step": 11689 }, { "epoch": 0.5364600064246707, "grad_norm": 0.4564112424850464, "learning_rate": 9.365897657439474e-06, "loss": 0.3519, "step": 11690 }, { "epoch": 0.5365058969299252, "grad_norm": 0.5009704232215881, "learning_rate": 9.365778147919767e-06, "loss": 0.4842, "step": 11691 }, { "epoch": 0.5365517874351796, "grad_norm": 0.464406281709671, "learning_rate": 9.365658627901744e-06, "loss": 0.3908, "step": 11692 }, { "epoch": 0.5365976779404341, "grad_norm": 0.5183472037315369, "learning_rate": 9.365539097385687e-06, "loss": 0.5698, "step": 11693 }, { "epoch": 0.5366435684456886, "grad_norm": 0.4381266236305237, "learning_rate": 9.36541955637189e-06, "loss": 0.2997, "step": 11694 }, { "epoch": 0.536689458950943, "grad_norm": 0.4728393256664276, "learning_rate": 9.365300004860634e-06, "loss": 0.4102, "step": 11695 }, { "epoch": 0.5367353494561975, "grad_norm": 0.49420151114463806, "learning_rate": 9.365180442852211e-06, "loss": 0.4696, "step": 11696 }, { "epoch": 0.536781239961452, "grad_norm": 0.4743998646736145, "learning_rate": 9.365060870346906e-06, "loss": 0.4561, "step": 11697 }, { "epoch": 0.5368271304667065, "grad_norm": 0.4423024356365204, "learning_rate": 9.364941287345007e-06, "loss": 0.4031, "step": 11698 }, { "epoch": 0.5368730209719609, "grad_norm": 0.42873337864875793, "learning_rate": 9.364821693846801e-06, "loss": 0.3322, "step": 11699 }, { "epoch": 0.5369189114772154, "grad_norm": 0.44599631428718567, "learning_rate": 9.364702089852578e-06, "loss": 0.4245, "step": 11700 }, { "epoch": 0.5369648019824699, "grad_norm": 0.46549248695373535, "learning_rate": 9.364582475362623e-06, "loss": 0.4024, "step": 11701 }, { "epoch": 0.5370106924877243, "grad_norm": 0.4442719519138336, "learning_rate": 9.364462850377226e-06, "loss": 0.3646, "step": 11702 }, { "epoch": 0.5370565829929788, "grad_norm": 0.4664672315120697, "learning_rate": 9.364343214896672e-06, "loss": 0.3519, "step": 11703 }, { "epoch": 0.5371024734982333, "grad_norm": 0.4892199635505676, "learning_rate": 9.36422356892125e-06, "loss": 0.4619, "step": 11704 }, { "epoch": 0.5371483640034876, "grad_norm": 0.48552682995796204, "learning_rate": 9.364103912451247e-06, "loss": 0.4872, "step": 11705 }, { "epoch": 0.5371942545087421, "grad_norm": 0.5217673182487488, "learning_rate": 9.363984245486955e-06, "loss": 0.5127, "step": 11706 }, { "epoch": 0.5372401450139966, "grad_norm": 0.44143733382225037, "learning_rate": 9.363864568028655e-06, "loss": 0.4118, "step": 11707 }, { "epoch": 0.537286035519251, "grad_norm": 0.4720189571380615, "learning_rate": 9.363744880076638e-06, "loss": 0.4746, "step": 11708 }, { "epoch": 0.5373319260245055, "grad_norm": 0.4795798659324646, "learning_rate": 9.363625181631192e-06, "loss": 0.4008, "step": 11709 }, { "epoch": 0.53737781652976, "grad_norm": 0.477272093296051, "learning_rate": 9.363505472692606e-06, "loss": 0.3761, "step": 11710 }, { "epoch": 0.5374237070350144, "grad_norm": 0.47043347358703613, "learning_rate": 9.363385753261165e-06, "loss": 0.3995, "step": 11711 }, { "epoch": 0.5374695975402689, "grad_norm": 0.4559633433818817, "learning_rate": 9.363266023337158e-06, "loss": 0.3946, "step": 11712 }, { "epoch": 0.5375154880455234, "grad_norm": 0.49843499064445496, "learning_rate": 9.363146282920876e-06, "loss": 0.5041, "step": 11713 }, { "epoch": 0.5375613785507778, "grad_norm": 0.4743489921092987, "learning_rate": 9.363026532012603e-06, "loss": 0.436, "step": 11714 }, { "epoch": 0.5376072690560323, "grad_norm": 0.4795120060443878, "learning_rate": 9.362906770612628e-06, "loss": 0.4156, "step": 11715 }, { "epoch": 0.5376531595612868, "grad_norm": 0.4943495988845825, "learning_rate": 9.362786998721238e-06, "loss": 0.459, "step": 11716 }, { "epoch": 0.5376990500665413, "grad_norm": 0.44998136162757874, "learning_rate": 9.362667216338724e-06, "loss": 0.361, "step": 11717 }, { "epoch": 0.5377449405717957, "grad_norm": 0.4819561243057251, "learning_rate": 9.362547423465371e-06, "loss": 0.4879, "step": 11718 }, { "epoch": 0.5377908310770502, "grad_norm": 0.4418932795524597, "learning_rate": 9.362427620101468e-06, "loss": 0.3833, "step": 11719 }, { "epoch": 0.5378367215823047, "grad_norm": 0.46308183670043945, "learning_rate": 9.362307806247304e-06, "loss": 0.4314, "step": 11720 }, { "epoch": 0.537882612087559, "grad_norm": 0.5953032374382019, "learning_rate": 9.362187981903167e-06, "loss": 0.4444, "step": 11721 }, { "epoch": 0.5379285025928136, "grad_norm": 0.42617878317832947, "learning_rate": 9.362068147069343e-06, "loss": 0.3516, "step": 11722 }, { "epoch": 0.537974393098068, "grad_norm": 0.4837894141674042, "learning_rate": 9.361948301746125e-06, "loss": 0.4853, "step": 11723 }, { "epoch": 0.5380202836033224, "grad_norm": 0.47321876883506775, "learning_rate": 9.361828445933796e-06, "loss": 0.4159, "step": 11724 }, { "epoch": 0.5380661741085769, "grad_norm": 0.5161371231079102, "learning_rate": 9.361708579632646e-06, "loss": 0.3919, "step": 11725 }, { "epoch": 0.5381120646138314, "grad_norm": 0.572919487953186, "learning_rate": 9.361588702842963e-06, "loss": 0.53, "step": 11726 }, { "epoch": 0.5381579551190858, "grad_norm": 0.44206735491752625, "learning_rate": 9.361468815565037e-06, "loss": 0.342, "step": 11727 }, { "epoch": 0.5382038456243403, "grad_norm": 0.44542205333709717, "learning_rate": 9.361348917799154e-06, "loss": 0.3824, "step": 11728 }, { "epoch": 0.5382497361295948, "grad_norm": 0.47720059752464294, "learning_rate": 9.361229009545604e-06, "loss": 0.4126, "step": 11729 }, { "epoch": 0.5382956266348492, "grad_norm": 0.45526832342147827, "learning_rate": 9.361109090804673e-06, "loss": 0.3634, "step": 11730 }, { "epoch": 0.5383415171401037, "grad_norm": 0.47212526202201843, "learning_rate": 9.360989161576655e-06, "loss": 0.4119, "step": 11731 }, { "epoch": 0.5383874076453582, "grad_norm": 0.48589789867401123, "learning_rate": 9.360869221861831e-06, "loss": 0.4123, "step": 11732 }, { "epoch": 0.5384332981506127, "grad_norm": 0.4738694131374359, "learning_rate": 9.360749271660494e-06, "loss": 0.473, "step": 11733 }, { "epoch": 0.5384791886558671, "grad_norm": 0.46446332335472107, "learning_rate": 9.360629310972931e-06, "loss": 0.4203, "step": 11734 }, { "epoch": 0.5385250791611216, "grad_norm": 0.46781182289123535, "learning_rate": 9.360509339799431e-06, "loss": 0.4827, "step": 11735 }, { "epoch": 0.5385709696663761, "grad_norm": 0.4557516276836395, "learning_rate": 9.360389358140283e-06, "loss": 0.3776, "step": 11736 }, { "epoch": 0.5386168601716305, "grad_norm": 0.45764628052711487, "learning_rate": 9.360269365995773e-06, "loss": 0.4002, "step": 11737 }, { "epoch": 0.538662750676885, "grad_norm": 0.4630337357521057, "learning_rate": 9.360149363366194e-06, "loss": 0.3938, "step": 11738 }, { "epoch": 0.5387086411821395, "grad_norm": 0.4393448233604431, "learning_rate": 9.36002935025183e-06, "loss": 0.3385, "step": 11739 }, { "epoch": 0.5387545316873938, "grad_norm": 0.4994428753852844, "learning_rate": 9.35990932665297e-06, "loss": 0.4288, "step": 11740 }, { "epoch": 0.5388004221926483, "grad_norm": 0.49707406759262085, "learning_rate": 9.359789292569904e-06, "loss": 0.5072, "step": 11741 }, { "epoch": 0.5388463126979028, "grad_norm": 0.46496033668518066, "learning_rate": 9.359669248002925e-06, "loss": 0.3754, "step": 11742 }, { "epoch": 0.5388922032031572, "grad_norm": 0.4182380139827728, "learning_rate": 9.359549192952315e-06, "loss": 0.3434, "step": 11743 }, { "epoch": 0.5389380937084117, "grad_norm": 0.4334733188152313, "learning_rate": 9.359429127418365e-06, "loss": 0.4045, "step": 11744 }, { "epoch": 0.5389839842136662, "grad_norm": 0.46384796500205994, "learning_rate": 9.359309051401363e-06, "loss": 0.414, "step": 11745 }, { "epoch": 0.5390298747189206, "grad_norm": 0.43957823514938354, "learning_rate": 9.359188964901599e-06, "loss": 0.3752, "step": 11746 }, { "epoch": 0.5390757652241751, "grad_norm": 0.4827621877193451, "learning_rate": 9.359068867919363e-06, "loss": 0.4383, "step": 11747 }, { "epoch": 0.5391216557294296, "grad_norm": 0.49911588430404663, "learning_rate": 9.35894876045494e-06, "loss": 0.485, "step": 11748 }, { "epoch": 0.539167546234684, "grad_norm": 0.4338134229183197, "learning_rate": 9.358828642508622e-06, "loss": 0.3856, "step": 11749 }, { "epoch": 0.5392134367399385, "grad_norm": 0.5002368688583374, "learning_rate": 9.358708514080697e-06, "loss": 0.4655, "step": 11750 }, { "epoch": 0.539259327245193, "grad_norm": 0.4632793962955475, "learning_rate": 9.358588375171453e-06, "loss": 0.4596, "step": 11751 }, { "epoch": 0.5393052177504475, "grad_norm": 0.4487612843513489, "learning_rate": 9.358468225781178e-06, "loss": 0.3793, "step": 11752 }, { "epoch": 0.5393511082557019, "grad_norm": 0.4145508408546448, "learning_rate": 9.358348065910166e-06, "loss": 0.2976, "step": 11753 }, { "epoch": 0.5393969987609564, "grad_norm": 0.5098912119865417, "learning_rate": 9.358227895558701e-06, "loss": 0.4267, "step": 11754 }, { "epoch": 0.5394428892662109, "grad_norm": 0.4779066741466522, "learning_rate": 9.358107714727072e-06, "loss": 0.4517, "step": 11755 }, { "epoch": 0.5394887797714653, "grad_norm": 0.47584226727485657, "learning_rate": 9.35798752341557e-06, "loss": 0.4246, "step": 11756 }, { "epoch": 0.5395346702767198, "grad_norm": 0.7093757390975952, "learning_rate": 9.357867321624485e-06, "loss": 0.5258, "step": 11757 }, { "epoch": 0.5395805607819742, "grad_norm": 0.5028489828109741, "learning_rate": 9.357747109354102e-06, "loss": 0.4544, "step": 11758 }, { "epoch": 0.5396264512872286, "grad_norm": 0.4866946339607239, "learning_rate": 9.357626886604715e-06, "loss": 0.3878, "step": 11759 }, { "epoch": 0.5396723417924831, "grad_norm": 0.47552040219306946, "learning_rate": 9.357506653376609e-06, "loss": 0.439, "step": 11760 }, { "epoch": 0.5397182322977376, "grad_norm": 0.4580599367618561, "learning_rate": 9.357386409670076e-06, "loss": 0.4168, "step": 11761 }, { "epoch": 0.539764122802992, "grad_norm": 0.4799123704433441, "learning_rate": 9.357266155485404e-06, "loss": 0.4385, "step": 11762 }, { "epoch": 0.5398100133082465, "grad_norm": 0.4635271728038788, "learning_rate": 9.35714589082288e-06, "loss": 0.4093, "step": 11763 }, { "epoch": 0.539855903813501, "grad_norm": 0.4889325797557831, "learning_rate": 9.357025615682798e-06, "loss": 0.3931, "step": 11764 }, { "epoch": 0.5399017943187554, "grad_norm": 0.49372759461402893, "learning_rate": 9.356905330065442e-06, "loss": 0.3893, "step": 11765 }, { "epoch": 0.5399476848240099, "grad_norm": 0.468430757522583, "learning_rate": 9.356785033971106e-06, "loss": 0.3458, "step": 11766 }, { "epoch": 0.5399935753292644, "grad_norm": 0.46209850907325745, "learning_rate": 9.356664727400075e-06, "loss": 0.3918, "step": 11767 }, { "epoch": 0.5400394658345188, "grad_norm": 0.4556235373020172, "learning_rate": 9.356544410352641e-06, "loss": 0.3714, "step": 11768 }, { "epoch": 0.5400853563397733, "grad_norm": 0.4316776692867279, "learning_rate": 9.356424082829093e-06, "loss": 0.2993, "step": 11769 }, { "epoch": 0.5401312468450278, "grad_norm": 0.508688747882843, "learning_rate": 9.356303744829722e-06, "loss": 0.5229, "step": 11770 }, { "epoch": 0.5401771373502823, "grad_norm": 0.4347439110279083, "learning_rate": 9.356183396354813e-06, "loss": 0.3734, "step": 11771 }, { "epoch": 0.5402230278555367, "grad_norm": 0.46213340759277344, "learning_rate": 9.356063037404657e-06, "loss": 0.4129, "step": 11772 }, { "epoch": 0.5402689183607912, "grad_norm": 0.4284520447254181, "learning_rate": 9.355942667979547e-06, "loss": 0.3263, "step": 11773 }, { "epoch": 0.5403148088660457, "grad_norm": 0.5183601975440979, "learning_rate": 9.355822288079767e-06, "loss": 0.3736, "step": 11774 }, { "epoch": 0.5403606993713, "grad_norm": 0.45145323872566223, "learning_rate": 9.35570189770561e-06, "loss": 0.3761, "step": 11775 }, { "epoch": 0.5404065898765545, "grad_norm": 0.4858222007751465, "learning_rate": 9.355581496857364e-06, "loss": 0.4552, "step": 11776 }, { "epoch": 0.540452480381809, "grad_norm": 0.47047221660614014, "learning_rate": 9.35546108553532e-06, "loss": 0.4106, "step": 11777 }, { "epoch": 0.5404983708870634, "grad_norm": 0.45213961601257324, "learning_rate": 9.355340663739767e-06, "loss": 0.4236, "step": 11778 }, { "epoch": 0.5405442613923179, "grad_norm": 0.4616587460041046, "learning_rate": 9.355220231470994e-06, "loss": 0.4735, "step": 11779 }, { "epoch": 0.5405901518975724, "grad_norm": 0.47109919786453247, "learning_rate": 9.355099788729291e-06, "loss": 0.3975, "step": 11780 }, { "epoch": 0.5406360424028268, "grad_norm": 0.4649921655654907, "learning_rate": 9.354979335514948e-06, "loss": 0.4103, "step": 11781 }, { "epoch": 0.5406819329080813, "grad_norm": 0.4334821403026581, "learning_rate": 9.354858871828254e-06, "loss": 0.38, "step": 11782 }, { "epoch": 0.5407278234133358, "grad_norm": 0.413735955953598, "learning_rate": 9.354738397669498e-06, "loss": 0.2719, "step": 11783 }, { "epoch": 0.5407737139185902, "grad_norm": 0.4799814224243164, "learning_rate": 9.35461791303897e-06, "loss": 0.447, "step": 11784 }, { "epoch": 0.5408196044238447, "grad_norm": 0.5069194436073303, "learning_rate": 9.354497417936963e-06, "loss": 0.5104, "step": 11785 }, { "epoch": 0.5408654949290992, "grad_norm": 0.44394585490226746, "learning_rate": 9.35437691236376e-06, "loss": 0.3367, "step": 11786 }, { "epoch": 0.5409113854343537, "grad_norm": 0.4505567252635956, "learning_rate": 9.35425639631966e-06, "loss": 0.346, "step": 11787 }, { "epoch": 0.5409572759396081, "grad_norm": 0.4658847451210022, "learning_rate": 9.354135869804943e-06, "loss": 0.3795, "step": 11788 }, { "epoch": 0.5410031664448626, "grad_norm": 0.47817355394363403, "learning_rate": 9.354015332819906e-06, "loss": 0.4299, "step": 11789 }, { "epoch": 0.5410490569501171, "grad_norm": 0.46292224526405334, "learning_rate": 9.353894785364834e-06, "loss": 0.4355, "step": 11790 }, { "epoch": 0.5410949474553715, "grad_norm": 0.495517373085022, "learning_rate": 9.35377422744002e-06, "loss": 0.4362, "step": 11791 }, { "epoch": 0.541140837960626, "grad_norm": 0.48221826553344727, "learning_rate": 9.353653659045755e-06, "loss": 0.4937, "step": 11792 }, { "epoch": 0.5411867284658805, "grad_norm": 0.48616424202919006, "learning_rate": 9.353533080182324e-06, "loss": 0.4281, "step": 11793 }, { "epoch": 0.5412326189711348, "grad_norm": 0.46081969141960144, "learning_rate": 9.353412490850022e-06, "loss": 0.4162, "step": 11794 }, { "epoch": 0.5412785094763893, "grad_norm": 0.4284566640853882, "learning_rate": 9.353291891049137e-06, "loss": 0.3753, "step": 11795 }, { "epoch": 0.5413243999816438, "grad_norm": 0.4643787741661072, "learning_rate": 9.353171280779958e-06, "loss": 0.3701, "step": 11796 }, { "epoch": 0.5413702904868982, "grad_norm": 0.4975559413433075, "learning_rate": 9.353050660042777e-06, "loss": 0.487, "step": 11797 }, { "epoch": 0.5414161809921527, "grad_norm": 0.47529593110084534, "learning_rate": 9.352930028837884e-06, "loss": 0.422, "step": 11798 }, { "epoch": 0.5414620714974072, "grad_norm": 0.4581986367702484, "learning_rate": 9.352809387165564e-06, "loss": 0.4088, "step": 11799 }, { "epoch": 0.5415079620026616, "grad_norm": 0.4705793559551239, "learning_rate": 9.352688735026115e-06, "loss": 0.3395, "step": 11800 }, { "epoch": 0.5415538525079161, "grad_norm": 0.4848674237728119, "learning_rate": 9.352568072419823e-06, "loss": 0.4898, "step": 11801 }, { "epoch": 0.5415997430131706, "grad_norm": 0.4780259132385254, "learning_rate": 9.352447399346977e-06, "loss": 0.3892, "step": 11802 }, { "epoch": 0.541645633518425, "grad_norm": 0.4688941538333893, "learning_rate": 9.352326715807869e-06, "loss": 0.362, "step": 11803 }, { "epoch": 0.5416915240236795, "grad_norm": 0.47002193331718445, "learning_rate": 9.352206021802789e-06, "loss": 0.3776, "step": 11804 }, { "epoch": 0.541737414528934, "grad_norm": 0.5230007767677307, "learning_rate": 9.352085317332027e-06, "loss": 0.494, "step": 11805 }, { "epoch": 0.5417833050341885, "grad_norm": 0.47935017943382263, "learning_rate": 9.351964602395874e-06, "loss": 0.396, "step": 11806 }, { "epoch": 0.5418291955394429, "grad_norm": 0.42556673288345337, "learning_rate": 9.35184387699462e-06, "loss": 0.381, "step": 11807 }, { "epoch": 0.5418750860446974, "grad_norm": 0.4134579300880432, "learning_rate": 9.351723141128554e-06, "loss": 0.3652, "step": 11808 }, { "epoch": 0.5419209765499519, "grad_norm": 0.46036505699157715, "learning_rate": 9.351602394797968e-06, "loss": 0.392, "step": 11809 }, { "epoch": 0.5419668670552062, "grad_norm": 0.440643846988678, "learning_rate": 9.35148163800315e-06, "loss": 0.3571, "step": 11810 }, { "epoch": 0.5420127575604607, "grad_norm": 0.4625470042228699, "learning_rate": 9.351360870744393e-06, "loss": 0.4355, "step": 11811 }, { "epoch": 0.5420586480657152, "grad_norm": 0.46800902485847473, "learning_rate": 9.351240093021987e-06, "loss": 0.3927, "step": 11812 }, { "epoch": 0.5421045385709696, "grad_norm": 0.4468221366405487, "learning_rate": 9.351119304836222e-06, "loss": 0.3952, "step": 11813 }, { "epoch": 0.5421504290762241, "grad_norm": 0.4418472945690155, "learning_rate": 9.350998506187388e-06, "loss": 0.3619, "step": 11814 }, { "epoch": 0.5421963195814786, "grad_norm": 0.43051525950431824, "learning_rate": 9.350877697075775e-06, "loss": 0.3289, "step": 11815 }, { "epoch": 0.542242210086733, "grad_norm": 0.4173886477947235, "learning_rate": 9.350756877501676e-06, "loss": 0.3282, "step": 11816 }, { "epoch": 0.5422881005919875, "grad_norm": 0.46568477153778076, "learning_rate": 9.350636047465378e-06, "loss": 0.438, "step": 11817 }, { "epoch": 0.542333991097242, "grad_norm": 0.47144076228141785, "learning_rate": 9.350515206967175e-06, "loss": 0.3893, "step": 11818 }, { "epoch": 0.5423798816024964, "grad_norm": 0.44918006658554077, "learning_rate": 9.350394356007355e-06, "loss": 0.3491, "step": 11819 }, { "epoch": 0.5424257721077509, "grad_norm": 0.44110172986984253, "learning_rate": 9.35027349458621e-06, "loss": 0.3486, "step": 11820 }, { "epoch": 0.5424716626130054, "grad_norm": 0.4185820519924164, "learning_rate": 9.35015262270403e-06, "loss": 0.2931, "step": 11821 }, { "epoch": 0.5425175531182598, "grad_norm": 0.45126640796661377, "learning_rate": 9.350031740361104e-06, "loss": 0.4063, "step": 11822 }, { "epoch": 0.5425634436235143, "grad_norm": 0.43778812885284424, "learning_rate": 9.349910847557729e-06, "loss": 0.3596, "step": 11823 }, { "epoch": 0.5426093341287688, "grad_norm": 0.4214657247066498, "learning_rate": 9.349789944294188e-06, "loss": 0.3254, "step": 11824 }, { "epoch": 0.5426552246340233, "grad_norm": 0.48458001017570496, "learning_rate": 9.349669030570776e-06, "loss": 0.4779, "step": 11825 }, { "epoch": 0.5427011151392777, "grad_norm": 0.5321649312973022, "learning_rate": 9.349548106387783e-06, "loss": 0.3905, "step": 11826 }, { "epoch": 0.5427470056445322, "grad_norm": 0.4375259280204773, "learning_rate": 9.349427171745498e-06, "loss": 0.4041, "step": 11827 }, { "epoch": 0.5427928961497867, "grad_norm": 0.4763076901435852, "learning_rate": 9.349306226644214e-06, "loss": 0.4435, "step": 11828 }, { "epoch": 0.542838786655041, "grad_norm": 0.44987210631370544, "learning_rate": 9.349185271084222e-06, "loss": 0.386, "step": 11829 }, { "epoch": 0.5428846771602955, "grad_norm": 0.4584258198738098, "learning_rate": 9.349064305065812e-06, "loss": 0.4269, "step": 11830 }, { "epoch": 0.54293056766555, "grad_norm": 0.45892924070358276, "learning_rate": 9.348943328589275e-06, "loss": 0.4416, "step": 11831 }, { "epoch": 0.5429764581708044, "grad_norm": 0.4012521207332611, "learning_rate": 9.348822341654901e-06, "loss": 0.2997, "step": 11832 }, { "epoch": 0.5430223486760589, "grad_norm": 0.43315044045448303, "learning_rate": 9.348701344262982e-06, "loss": 0.3403, "step": 11833 }, { "epoch": 0.5430682391813134, "grad_norm": 0.45989835262298584, "learning_rate": 9.34858033641381e-06, "loss": 0.3515, "step": 11834 }, { "epoch": 0.5431141296865678, "grad_norm": 0.4613991975784302, "learning_rate": 9.348459318107675e-06, "loss": 0.4053, "step": 11835 }, { "epoch": 0.5431600201918223, "grad_norm": 0.4385504126548767, "learning_rate": 9.348338289344865e-06, "loss": 0.3647, "step": 11836 }, { "epoch": 0.5432059106970768, "grad_norm": 0.4765866994857788, "learning_rate": 9.348217250125676e-06, "loss": 0.473, "step": 11837 }, { "epoch": 0.5432518012023312, "grad_norm": 0.47670695185661316, "learning_rate": 9.348096200450396e-06, "loss": 0.4057, "step": 11838 }, { "epoch": 0.5432976917075857, "grad_norm": 0.43690064549446106, "learning_rate": 9.347975140319318e-06, "loss": 0.3628, "step": 11839 }, { "epoch": 0.5433435822128402, "grad_norm": 0.5245376229286194, "learning_rate": 9.347854069732731e-06, "loss": 0.5441, "step": 11840 }, { "epoch": 0.5433894727180947, "grad_norm": 0.457271933555603, "learning_rate": 9.347732988690927e-06, "loss": 0.3883, "step": 11841 }, { "epoch": 0.5434353632233491, "grad_norm": 0.4336269497871399, "learning_rate": 9.347611897194198e-06, "loss": 0.4003, "step": 11842 }, { "epoch": 0.5434812537286036, "grad_norm": 0.4405486285686493, "learning_rate": 9.347490795242835e-06, "loss": 0.3878, "step": 11843 }, { "epoch": 0.5435271442338581, "grad_norm": 0.4559611976146698, "learning_rate": 9.347369682837128e-06, "loss": 0.3894, "step": 11844 }, { "epoch": 0.5435730347391124, "grad_norm": 0.46457624435424805, "learning_rate": 9.347248559977368e-06, "loss": 0.4254, "step": 11845 }, { "epoch": 0.543618925244367, "grad_norm": 0.4827214479446411, "learning_rate": 9.34712742666385e-06, "loss": 0.3872, "step": 11846 }, { "epoch": 0.5436648157496214, "grad_norm": 0.4457905888557434, "learning_rate": 9.34700628289686e-06, "loss": 0.3979, "step": 11847 }, { "epoch": 0.5437107062548758, "grad_norm": 0.4217289686203003, "learning_rate": 9.346885128676693e-06, "loss": 0.3097, "step": 11848 }, { "epoch": 0.5437565967601303, "grad_norm": 0.45775216817855835, "learning_rate": 9.346763964003637e-06, "loss": 0.3797, "step": 11849 }, { "epoch": 0.5438024872653848, "grad_norm": 0.44266411662101746, "learning_rate": 9.346642788877989e-06, "loss": 0.3768, "step": 11850 }, { "epoch": 0.5438483777706392, "grad_norm": 0.4988372027873993, "learning_rate": 9.346521603300035e-06, "loss": 0.4875, "step": 11851 }, { "epoch": 0.5438942682758937, "grad_norm": 0.4533219337463379, "learning_rate": 9.346400407270069e-06, "loss": 0.3812, "step": 11852 }, { "epoch": 0.5439401587811482, "grad_norm": 0.43136364221572876, "learning_rate": 9.346279200788382e-06, "loss": 0.3439, "step": 11853 }, { "epoch": 0.5439860492864026, "grad_norm": 0.46980252861976624, "learning_rate": 9.346157983855265e-06, "loss": 0.4665, "step": 11854 }, { "epoch": 0.5440319397916571, "grad_norm": 0.4426124691963196, "learning_rate": 9.346036756471009e-06, "loss": 0.4032, "step": 11855 }, { "epoch": 0.5440778302969116, "grad_norm": 0.4331699013710022, "learning_rate": 9.345915518635906e-06, "loss": 0.339, "step": 11856 }, { "epoch": 0.544123720802166, "grad_norm": 0.5154035687446594, "learning_rate": 9.345794270350248e-06, "loss": 0.4104, "step": 11857 }, { "epoch": 0.5441696113074205, "grad_norm": 0.4645753502845764, "learning_rate": 9.345673011614327e-06, "loss": 0.3731, "step": 11858 }, { "epoch": 0.544215501812675, "grad_norm": 0.49769800901412964, "learning_rate": 9.345551742428434e-06, "loss": 0.4557, "step": 11859 }, { "epoch": 0.5442613923179295, "grad_norm": 0.44506654143333435, "learning_rate": 9.34543046279286e-06, "loss": 0.3917, "step": 11860 }, { "epoch": 0.5443072828231839, "grad_norm": 0.537725031375885, "learning_rate": 9.345309172707898e-06, "loss": 0.4401, "step": 11861 }, { "epoch": 0.5443531733284384, "grad_norm": 0.4936114251613617, "learning_rate": 9.345187872173838e-06, "loss": 0.4316, "step": 11862 }, { "epoch": 0.5443990638336929, "grad_norm": 0.47287797927856445, "learning_rate": 9.345066561190974e-06, "loss": 0.3715, "step": 11863 }, { "epoch": 0.5444449543389472, "grad_norm": 0.4486594796180725, "learning_rate": 9.344945239759595e-06, "loss": 0.397, "step": 11864 }, { "epoch": 0.5444908448442017, "grad_norm": 0.4418001174926758, "learning_rate": 9.344823907879995e-06, "loss": 0.3803, "step": 11865 }, { "epoch": 0.5445367353494562, "grad_norm": 0.4633978009223938, "learning_rate": 9.344702565552463e-06, "loss": 0.3903, "step": 11866 }, { "epoch": 0.5445826258547106, "grad_norm": 0.4715845286846161, "learning_rate": 9.344581212777292e-06, "loss": 0.4259, "step": 11867 }, { "epoch": 0.5446285163599651, "grad_norm": 0.4580090641975403, "learning_rate": 9.344459849554777e-06, "loss": 0.4025, "step": 11868 }, { "epoch": 0.5446744068652196, "grad_norm": 0.45308545231819153, "learning_rate": 9.344338475885206e-06, "loss": 0.4072, "step": 11869 }, { "epoch": 0.544720297370474, "grad_norm": 0.44486701488494873, "learning_rate": 9.344217091768872e-06, "loss": 0.3671, "step": 11870 }, { "epoch": 0.5447661878757285, "grad_norm": 0.4847283661365509, "learning_rate": 9.344095697206069e-06, "loss": 0.4654, "step": 11871 }, { "epoch": 0.544812078380983, "grad_norm": 0.5664860010147095, "learning_rate": 9.343974292197083e-06, "loss": 0.4344, "step": 11872 }, { "epoch": 0.5448579688862374, "grad_norm": 0.42176544666290283, "learning_rate": 9.343852876742212e-06, "loss": 0.3323, "step": 11873 }, { "epoch": 0.5449038593914919, "grad_norm": 0.4359741508960724, "learning_rate": 9.343731450841746e-06, "loss": 0.333, "step": 11874 }, { "epoch": 0.5449497498967464, "grad_norm": 0.4587786793708801, "learning_rate": 9.343610014495977e-06, "loss": 0.3576, "step": 11875 }, { "epoch": 0.5449956404020009, "grad_norm": 0.4939517080783844, "learning_rate": 9.343488567705197e-06, "loss": 0.4374, "step": 11876 }, { "epoch": 0.5450415309072553, "grad_norm": 0.5242145657539368, "learning_rate": 9.343367110469697e-06, "loss": 0.3855, "step": 11877 }, { "epoch": 0.5450874214125098, "grad_norm": 0.4620373249053955, "learning_rate": 9.343245642789768e-06, "loss": 0.4518, "step": 11878 }, { "epoch": 0.5451333119177643, "grad_norm": 0.49062758684158325, "learning_rate": 9.343124164665706e-06, "loss": 0.5207, "step": 11879 }, { "epoch": 0.5451792024230186, "grad_norm": 0.4475047290325165, "learning_rate": 9.343002676097803e-06, "loss": 0.3944, "step": 11880 }, { "epoch": 0.5452250929282731, "grad_norm": 0.49329304695129395, "learning_rate": 9.342881177086347e-06, "loss": 0.4507, "step": 11881 }, { "epoch": 0.5452709834335276, "grad_norm": 0.4308706820011139, "learning_rate": 9.342759667631633e-06, "loss": 0.3393, "step": 11882 }, { "epoch": 0.545316873938782, "grad_norm": 0.44557979702949524, "learning_rate": 9.34263814773395e-06, "loss": 0.4097, "step": 11883 }, { "epoch": 0.5453627644440365, "grad_norm": 0.43194103240966797, "learning_rate": 9.342516617393597e-06, "loss": 0.3929, "step": 11884 }, { "epoch": 0.545408654949291, "grad_norm": 0.4289681017398834, "learning_rate": 9.342395076610859e-06, "loss": 0.3572, "step": 11885 }, { "epoch": 0.5454545454545454, "grad_norm": 0.43574059009552, "learning_rate": 9.342273525386034e-06, "loss": 0.3628, "step": 11886 }, { "epoch": 0.5455004359597999, "grad_norm": 0.47124069929122925, "learning_rate": 9.342151963719411e-06, "loss": 0.3799, "step": 11887 }, { "epoch": 0.5455463264650544, "grad_norm": 0.4588020443916321, "learning_rate": 9.34203039161128e-06, "loss": 0.417, "step": 11888 }, { "epoch": 0.5455922169703088, "grad_norm": 0.44188037514686584, "learning_rate": 9.341908809061939e-06, "loss": 0.3678, "step": 11889 }, { "epoch": 0.5456381074755633, "grad_norm": 0.4796905815601349, "learning_rate": 9.341787216071678e-06, "loss": 0.4204, "step": 11890 }, { "epoch": 0.5456839979808178, "grad_norm": 0.46321603655815125, "learning_rate": 9.341665612640789e-06, "loss": 0.426, "step": 11891 }, { "epoch": 0.5457298884860722, "grad_norm": 0.4615887999534607, "learning_rate": 9.341543998769563e-06, "loss": 0.439, "step": 11892 }, { "epoch": 0.5457757789913267, "grad_norm": 0.436480313539505, "learning_rate": 9.341422374458294e-06, "loss": 0.3936, "step": 11893 }, { "epoch": 0.5458216694965812, "grad_norm": 0.44779476523399353, "learning_rate": 9.341300739707276e-06, "loss": 0.4189, "step": 11894 }, { "epoch": 0.5458675600018357, "grad_norm": 0.4934377372264862, "learning_rate": 9.341179094516799e-06, "loss": 0.4308, "step": 11895 }, { "epoch": 0.5459134505070901, "grad_norm": 0.43862295150756836, "learning_rate": 9.341057438887155e-06, "loss": 0.3299, "step": 11896 }, { "epoch": 0.5459593410123446, "grad_norm": 0.44630101323127747, "learning_rate": 9.34093577281864e-06, "loss": 0.3454, "step": 11897 }, { "epoch": 0.546005231517599, "grad_norm": 0.547847330570221, "learning_rate": 9.340814096311543e-06, "loss": 0.4226, "step": 11898 }, { "epoch": 0.5460511220228534, "grad_norm": 0.454950749874115, "learning_rate": 9.340692409366158e-06, "loss": 0.3915, "step": 11899 }, { "epoch": 0.5460970125281079, "grad_norm": 0.4577164053916931, "learning_rate": 9.34057071198278e-06, "loss": 0.3861, "step": 11900 }, { "epoch": 0.5461429030333624, "grad_norm": 0.48604515194892883, "learning_rate": 9.340449004161698e-06, "loss": 0.5, "step": 11901 }, { "epoch": 0.5461887935386168, "grad_norm": 0.47552889585494995, "learning_rate": 9.340327285903206e-06, "loss": 0.3318, "step": 11902 }, { "epoch": 0.5462346840438713, "grad_norm": 0.4860277771949768, "learning_rate": 9.340205557207595e-06, "loss": 0.4303, "step": 11903 }, { "epoch": 0.5462805745491258, "grad_norm": 0.435284823179245, "learning_rate": 9.340083818075163e-06, "loss": 0.3703, "step": 11904 }, { "epoch": 0.5463264650543802, "grad_norm": 0.4470125436782837, "learning_rate": 9.339962068506197e-06, "loss": 0.406, "step": 11905 }, { "epoch": 0.5463723555596347, "grad_norm": 0.5407623052597046, "learning_rate": 9.339840308500991e-06, "loss": 0.4444, "step": 11906 }, { "epoch": 0.5464182460648892, "grad_norm": 0.4669787287712097, "learning_rate": 9.33971853805984e-06, "loss": 0.4159, "step": 11907 }, { "epoch": 0.5464641365701436, "grad_norm": 0.39115869998931885, "learning_rate": 9.339596757183037e-06, "loss": 0.2823, "step": 11908 }, { "epoch": 0.5465100270753981, "grad_norm": 0.48859718441963196, "learning_rate": 9.339474965870872e-06, "loss": 0.4575, "step": 11909 }, { "epoch": 0.5465559175806526, "grad_norm": 0.4885725975036621, "learning_rate": 9.339353164123639e-06, "loss": 0.4367, "step": 11910 }, { "epoch": 0.546601808085907, "grad_norm": 0.5023432374000549, "learning_rate": 9.33923135194163e-06, "loss": 0.4342, "step": 11911 }, { "epoch": 0.5466476985911615, "grad_norm": 0.5017714500427246, "learning_rate": 9.33910952932514e-06, "loss": 0.5021, "step": 11912 }, { "epoch": 0.546693589096416, "grad_norm": 0.45476987957954407, "learning_rate": 9.338987696274463e-06, "loss": 0.3801, "step": 11913 }, { "epoch": 0.5467394796016705, "grad_norm": 0.44223013520240784, "learning_rate": 9.338865852789889e-06, "loss": 0.3816, "step": 11914 }, { "epoch": 0.5467853701069249, "grad_norm": 0.4331381022930145, "learning_rate": 9.338743998871711e-06, "loss": 0.3938, "step": 11915 }, { "epoch": 0.5468312606121793, "grad_norm": 0.4733550250530243, "learning_rate": 9.338622134520224e-06, "loss": 0.3526, "step": 11916 }, { "epoch": 0.5468771511174338, "grad_norm": 0.5198267102241516, "learning_rate": 9.338500259735718e-06, "loss": 0.4789, "step": 11917 }, { "epoch": 0.5469230416226882, "grad_norm": 0.47378867864608765, "learning_rate": 9.33837837451849e-06, "loss": 0.4302, "step": 11918 }, { "epoch": 0.5469689321279427, "grad_norm": 0.47132954001426697, "learning_rate": 9.338256478868833e-06, "loss": 0.388, "step": 11919 }, { "epoch": 0.5470148226331972, "grad_norm": 0.4417761564254761, "learning_rate": 9.338134572787036e-06, "loss": 0.331, "step": 11920 }, { "epoch": 0.5470607131384516, "grad_norm": 0.4476693272590637, "learning_rate": 9.338012656273395e-06, "loss": 0.3778, "step": 11921 }, { "epoch": 0.5471066036437061, "grad_norm": 0.4371757209300995, "learning_rate": 9.337890729328202e-06, "loss": 0.3213, "step": 11922 }, { "epoch": 0.5471524941489606, "grad_norm": 0.49356135725975037, "learning_rate": 9.337768791951751e-06, "loss": 0.4583, "step": 11923 }, { "epoch": 0.547198384654215, "grad_norm": 0.4758743941783905, "learning_rate": 9.337646844144337e-06, "loss": 0.4605, "step": 11924 }, { "epoch": 0.5472442751594695, "grad_norm": 0.45748040080070496, "learning_rate": 9.33752488590625e-06, "loss": 0.4023, "step": 11925 }, { "epoch": 0.547290165664724, "grad_norm": 0.4380097985267639, "learning_rate": 9.337402917237783e-06, "loss": 0.3501, "step": 11926 }, { "epoch": 0.5473360561699784, "grad_norm": 0.48652786016464233, "learning_rate": 9.337280938139233e-06, "loss": 0.4767, "step": 11927 }, { "epoch": 0.5473819466752329, "grad_norm": 0.476633220911026, "learning_rate": 9.337158948610891e-06, "loss": 0.4006, "step": 11928 }, { "epoch": 0.5474278371804874, "grad_norm": 0.4548337459564209, "learning_rate": 9.337036948653051e-06, "loss": 0.3516, "step": 11929 }, { "epoch": 0.5474737276857419, "grad_norm": 0.48502957820892334, "learning_rate": 9.336914938266004e-06, "loss": 0.4405, "step": 11930 }, { "epoch": 0.5475196181909963, "grad_norm": 0.4599230885505676, "learning_rate": 9.336792917450047e-06, "loss": 0.4292, "step": 11931 }, { "epoch": 0.5475655086962508, "grad_norm": 0.46210649609565735, "learning_rate": 9.336670886205471e-06, "loss": 0.4366, "step": 11932 }, { "epoch": 0.5476113992015053, "grad_norm": 0.4912729859352112, "learning_rate": 9.336548844532571e-06, "loss": 0.4015, "step": 11933 }, { "epoch": 0.5476572897067596, "grad_norm": 0.4451366364955902, "learning_rate": 9.336426792431639e-06, "loss": 0.3736, "step": 11934 }, { "epoch": 0.5477031802120141, "grad_norm": 0.4855445921421051, "learning_rate": 9.33630472990297e-06, "loss": 0.3833, "step": 11935 }, { "epoch": 0.5477490707172686, "grad_norm": 0.445516437292099, "learning_rate": 9.336182656946854e-06, "loss": 0.4041, "step": 11936 }, { "epoch": 0.547794961222523, "grad_norm": 0.523945152759552, "learning_rate": 9.33606057356359e-06, "loss": 0.4052, "step": 11937 }, { "epoch": 0.5478408517277775, "grad_norm": 0.4580520689487457, "learning_rate": 9.33593847975347e-06, "loss": 0.3829, "step": 11938 }, { "epoch": 0.547886742233032, "grad_norm": 0.41742679476737976, "learning_rate": 9.335816375516784e-06, "loss": 0.3285, "step": 11939 }, { "epoch": 0.5479326327382864, "grad_norm": 0.4721064269542694, "learning_rate": 9.335694260853828e-06, "loss": 0.394, "step": 11940 }, { "epoch": 0.5479785232435409, "grad_norm": 0.4114242494106293, "learning_rate": 9.335572135764896e-06, "loss": 0.3232, "step": 11941 }, { "epoch": 0.5480244137487954, "grad_norm": 0.4183559715747833, "learning_rate": 9.335450000250282e-06, "loss": 0.3183, "step": 11942 }, { "epoch": 0.5480703042540498, "grad_norm": 0.45973044633865356, "learning_rate": 9.335327854310279e-06, "loss": 0.3912, "step": 11943 }, { "epoch": 0.5481161947593043, "grad_norm": 0.4514411687850952, "learning_rate": 9.33520569794518e-06, "loss": 0.4181, "step": 11944 }, { "epoch": 0.5481620852645588, "grad_norm": 0.4932914078235626, "learning_rate": 9.335083531155281e-06, "loss": 0.4803, "step": 11945 }, { "epoch": 0.5482079757698132, "grad_norm": 0.4695689380168915, "learning_rate": 9.334961353940872e-06, "loss": 0.4479, "step": 11946 }, { "epoch": 0.5482538662750677, "grad_norm": 0.4880853295326233, "learning_rate": 9.334839166302252e-06, "loss": 0.4232, "step": 11947 }, { "epoch": 0.5482997567803222, "grad_norm": 0.45557525753974915, "learning_rate": 9.33471696823971e-06, "loss": 0.372, "step": 11948 }, { "epoch": 0.5483456472855767, "grad_norm": 0.46407681703567505, "learning_rate": 9.334594759753541e-06, "loss": 0.3999, "step": 11949 }, { "epoch": 0.548391537790831, "grad_norm": 0.48239511251449585, "learning_rate": 9.334472540844042e-06, "loss": 0.4207, "step": 11950 }, { "epoch": 0.5484374282960855, "grad_norm": 0.42568421363830566, "learning_rate": 9.334350311511503e-06, "loss": 0.3556, "step": 11951 }, { "epoch": 0.54848331880134, "grad_norm": 0.47539007663726807, "learning_rate": 9.33422807175622e-06, "loss": 0.4405, "step": 11952 }, { "epoch": 0.5485292093065944, "grad_norm": 0.47793328762054443, "learning_rate": 9.334105821578485e-06, "loss": 0.444, "step": 11953 }, { "epoch": 0.5485750998118489, "grad_norm": 0.4893568754196167, "learning_rate": 9.333983560978594e-06, "loss": 0.4586, "step": 11954 }, { "epoch": 0.5486209903171034, "grad_norm": 0.45939329266548157, "learning_rate": 9.333861289956842e-06, "loss": 0.3775, "step": 11955 }, { "epoch": 0.5486668808223578, "grad_norm": 0.4701369106769562, "learning_rate": 9.33373900851352e-06, "loss": 0.4121, "step": 11956 }, { "epoch": 0.5487127713276123, "grad_norm": 0.4366547465324402, "learning_rate": 9.333616716648924e-06, "loss": 0.3636, "step": 11957 }, { "epoch": 0.5487586618328668, "grad_norm": 0.4269086718559265, "learning_rate": 9.333494414363346e-06, "loss": 0.2984, "step": 11958 }, { "epoch": 0.5488045523381212, "grad_norm": 0.45478543639183044, "learning_rate": 9.333372101657084e-06, "loss": 0.4023, "step": 11959 }, { "epoch": 0.5488504428433757, "grad_norm": 0.46015894412994385, "learning_rate": 9.33324977853043e-06, "loss": 0.3679, "step": 11960 }, { "epoch": 0.5488963333486302, "grad_norm": 0.42885449528694153, "learning_rate": 9.333127444983676e-06, "loss": 0.375, "step": 11961 }, { "epoch": 0.5489422238538846, "grad_norm": 0.5015444159507751, "learning_rate": 9.333005101017118e-06, "loss": 0.4642, "step": 11962 }, { "epoch": 0.5489881143591391, "grad_norm": 0.4491008222103119, "learning_rate": 9.332882746631051e-06, "loss": 0.3625, "step": 11963 }, { "epoch": 0.5490340048643936, "grad_norm": 0.4310057759284973, "learning_rate": 9.332760381825768e-06, "loss": 0.4002, "step": 11964 }, { "epoch": 0.5490798953696481, "grad_norm": 0.46215924620628357, "learning_rate": 9.332638006601565e-06, "loss": 0.4374, "step": 11965 }, { "epoch": 0.5491257858749025, "grad_norm": 0.4195605218410492, "learning_rate": 9.332515620958732e-06, "loss": 0.3289, "step": 11966 }, { "epoch": 0.549171676380157, "grad_norm": 0.4543750286102295, "learning_rate": 9.332393224897571e-06, "loss": 0.3647, "step": 11967 }, { "epoch": 0.5492175668854115, "grad_norm": 0.4636421501636505, "learning_rate": 9.332270818418368e-06, "loss": 0.3641, "step": 11968 }, { "epoch": 0.5492634573906658, "grad_norm": 0.4815918207168579, "learning_rate": 9.332148401521423e-06, "loss": 0.4052, "step": 11969 }, { "epoch": 0.5493093478959203, "grad_norm": 0.4500230550765991, "learning_rate": 9.332025974207026e-06, "loss": 0.4557, "step": 11970 }, { "epoch": 0.5493552384011748, "grad_norm": 0.4289769232273102, "learning_rate": 9.331903536475476e-06, "loss": 0.342, "step": 11971 }, { "epoch": 0.5494011289064292, "grad_norm": 0.48944559693336487, "learning_rate": 9.331781088327065e-06, "loss": 0.4711, "step": 11972 }, { "epoch": 0.5494470194116837, "grad_norm": 0.6866310238838196, "learning_rate": 9.331658629762085e-06, "loss": 0.5128, "step": 11973 }, { "epoch": 0.5494929099169382, "grad_norm": 0.525269627571106, "learning_rate": 9.331536160780837e-06, "loss": 0.4636, "step": 11974 }, { "epoch": 0.5495388004221926, "grad_norm": 0.47854605317115784, "learning_rate": 9.33141368138361e-06, "loss": 0.4853, "step": 11975 }, { "epoch": 0.5495846909274471, "grad_norm": 0.47168368101119995, "learning_rate": 9.3312911915707e-06, "loss": 0.4211, "step": 11976 }, { "epoch": 0.5496305814327016, "grad_norm": 0.45862630009651184, "learning_rate": 9.3311686913424e-06, "loss": 0.4234, "step": 11977 }, { "epoch": 0.549676471937956, "grad_norm": 0.44495606422424316, "learning_rate": 9.331046180699008e-06, "loss": 0.3773, "step": 11978 }, { "epoch": 0.5497223624432105, "grad_norm": 0.4484730064868927, "learning_rate": 9.330923659640816e-06, "loss": 0.4324, "step": 11979 }, { "epoch": 0.549768252948465, "grad_norm": 0.4571169912815094, "learning_rate": 9.330801128168121e-06, "loss": 0.407, "step": 11980 }, { "epoch": 0.5498141434537194, "grad_norm": 0.4378121495246887, "learning_rate": 9.330678586281215e-06, "loss": 0.3449, "step": 11981 }, { "epoch": 0.5498600339589739, "grad_norm": 0.4404963254928589, "learning_rate": 9.330556033980395e-06, "loss": 0.367, "step": 11982 }, { "epoch": 0.5499059244642284, "grad_norm": 0.47941526770591736, "learning_rate": 9.330433471265954e-06, "loss": 0.4032, "step": 11983 }, { "epoch": 0.5499518149694829, "grad_norm": 0.45340219140052795, "learning_rate": 9.330310898138186e-06, "loss": 0.3782, "step": 11984 }, { "epoch": 0.5499977054747373, "grad_norm": 0.4625984728336334, "learning_rate": 9.330188314597388e-06, "loss": 0.3934, "step": 11985 }, { "epoch": 0.5500435959799918, "grad_norm": 0.42984768748283386, "learning_rate": 9.330065720643853e-06, "loss": 0.3826, "step": 11986 }, { "epoch": 0.5500894864852462, "grad_norm": 0.4522968530654907, "learning_rate": 9.329943116277877e-06, "loss": 0.4063, "step": 11987 }, { "epoch": 0.5501353769905006, "grad_norm": 0.46702197194099426, "learning_rate": 9.329820501499754e-06, "loss": 0.3816, "step": 11988 }, { "epoch": 0.5501812674957551, "grad_norm": 0.5477336049079895, "learning_rate": 9.32969787630978e-06, "loss": 0.466, "step": 11989 }, { "epoch": 0.5502271580010096, "grad_norm": 0.45845189690589905, "learning_rate": 9.329575240708249e-06, "loss": 0.4122, "step": 11990 }, { "epoch": 0.550273048506264, "grad_norm": 0.484897255897522, "learning_rate": 9.329452594695456e-06, "loss": 0.4681, "step": 11991 }, { "epoch": 0.5503189390115185, "grad_norm": 0.44475236535072327, "learning_rate": 9.329329938271696e-06, "loss": 0.3602, "step": 11992 }, { "epoch": 0.550364829516773, "grad_norm": 0.4254019856452942, "learning_rate": 9.329207271437264e-06, "loss": 0.3114, "step": 11993 }, { "epoch": 0.5504107200220274, "grad_norm": 0.41875210404396057, "learning_rate": 9.329084594192453e-06, "loss": 0.3442, "step": 11994 }, { "epoch": 0.5504566105272819, "grad_norm": 0.4428924024105072, "learning_rate": 9.32896190653756e-06, "loss": 0.3963, "step": 11995 }, { "epoch": 0.5505025010325364, "grad_norm": 0.43282920122146606, "learning_rate": 9.328839208472884e-06, "loss": 0.3964, "step": 11996 }, { "epoch": 0.5505483915377908, "grad_norm": 0.4443638026714325, "learning_rate": 9.328716499998712e-06, "loss": 0.4067, "step": 11997 }, { "epoch": 0.5505942820430453, "grad_norm": 0.46203410625457764, "learning_rate": 9.328593781115344e-06, "loss": 0.3466, "step": 11998 }, { "epoch": 0.5506401725482998, "grad_norm": 0.4323679208755493, "learning_rate": 9.328471051823074e-06, "loss": 0.3247, "step": 11999 }, { "epoch": 0.5506860630535542, "grad_norm": 0.4740195572376251, "learning_rate": 9.328348312122198e-06, "loss": 0.4242, "step": 12000 }, { "epoch": 0.5507319535588087, "grad_norm": 0.4352451264858246, "learning_rate": 9.32822556201301e-06, "loss": 0.4, "step": 12001 }, { "epoch": 0.5507778440640632, "grad_norm": 0.49147024750709534, "learning_rate": 9.328102801495804e-06, "loss": 0.4212, "step": 12002 }, { "epoch": 0.5508237345693177, "grad_norm": 0.4650729298591614, "learning_rate": 9.327980030570877e-06, "loss": 0.4151, "step": 12003 }, { "epoch": 0.550869625074572, "grad_norm": 0.45757368206977844, "learning_rate": 9.327857249238527e-06, "loss": 0.3818, "step": 12004 }, { "epoch": 0.5509155155798265, "grad_norm": 0.4687870740890503, "learning_rate": 9.327734457499044e-06, "loss": 0.3814, "step": 12005 }, { "epoch": 0.550961406085081, "grad_norm": 0.4887888729572296, "learning_rate": 9.327611655352726e-06, "loss": 0.4632, "step": 12006 }, { "epoch": 0.5510072965903354, "grad_norm": 0.49543240666389465, "learning_rate": 9.327488842799868e-06, "loss": 0.5111, "step": 12007 }, { "epoch": 0.5510531870955899, "grad_norm": 0.45357561111450195, "learning_rate": 9.327366019840764e-06, "loss": 0.4253, "step": 12008 }, { "epoch": 0.5510990776008444, "grad_norm": 0.46783414483070374, "learning_rate": 9.327243186475711e-06, "loss": 0.4283, "step": 12009 }, { "epoch": 0.5511449681060988, "grad_norm": 0.5238674283027649, "learning_rate": 9.327120342705004e-06, "loss": 0.4899, "step": 12010 }, { "epoch": 0.5511908586113533, "grad_norm": 0.45498740673065186, "learning_rate": 9.326997488528938e-06, "loss": 0.3767, "step": 12011 }, { "epoch": 0.5512367491166078, "grad_norm": 0.4747585654258728, "learning_rate": 9.326874623947809e-06, "loss": 0.466, "step": 12012 }, { "epoch": 0.5512826396218622, "grad_norm": 0.5111625790596008, "learning_rate": 9.326751748961914e-06, "loss": 0.4781, "step": 12013 }, { "epoch": 0.5513285301271167, "grad_norm": 0.4384748935699463, "learning_rate": 9.326628863571543e-06, "loss": 0.3585, "step": 12014 }, { "epoch": 0.5513744206323712, "grad_norm": 0.4508642554283142, "learning_rate": 9.326505967776997e-06, "loss": 0.4009, "step": 12015 }, { "epoch": 0.5514203111376256, "grad_norm": 0.4847089648246765, "learning_rate": 9.32638306157857e-06, "loss": 0.3967, "step": 12016 }, { "epoch": 0.5514662016428801, "grad_norm": 0.45215851068496704, "learning_rate": 9.326260144976555e-06, "loss": 0.4036, "step": 12017 }, { "epoch": 0.5515120921481346, "grad_norm": 0.49371060729026794, "learning_rate": 9.326137217971252e-06, "loss": 0.4417, "step": 12018 }, { "epoch": 0.5515579826533891, "grad_norm": 0.44972464442253113, "learning_rate": 9.326014280562954e-06, "loss": 0.389, "step": 12019 }, { "epoch": 0.5516038731586435, "grad_norm": 0.43199482560157776, "learning_rate": 9.325891332751957e-06, "loss": 0.36, "step": 12020 }, { "epoch": 0.551649763663898, "grad_norm": 0.4447985887527466, "learning_rate": 9.325768374538556e-06, "loss": 0.4016, "step": 12021 }, { "epoch": 0.5516956541691524, "grad_norm": 0.4255668520927429, "learning_rate": 9.325645405923048e-06, "loss": 0.3343, "step": 12022 }, { "epoch": 0.5517415446744068, "grad_norm": 0.43600738048553467, "learning_rate": 9.325522426905726e-06, "loss": 0.3156, "step": 12023 }, { "epoch": 0.5517874351796613, "grad_norm": 0.4471251368522644, "learning_rate": 9.32539943748689e-06, "loss": 0.4111, "step": 12024 }, { "epoch": 0.5518333256849158, "grad_norm": 0.44182848930358887, "learning_rate": 9.325276437666833e-06, "loss": 0.3506, "step": 12025 }, { "epoch": 0.5518792161901702, "grad_norm": 0.5259996652603149, "learning_rate": 9.325153427445853e-06, "loss": 0.4574, "step": 12026 }, { "epoch": 0.5519251066954247, "grad_norm": 0.42675042152404785, "learning_rate": 9.325030406824241e-06, "loss": 0.3743, "step": 12027 }, { "epoch": 0.5519709972006792, "grad_norm": 0.4457780718803406, "learning_rate": 9.324907375802297e-06, "loss": 0.37, "step": 12028 }, { "epoch": 0.5520168877059336, "grad_norm": 0.5031201243400574, "learning_rate": 9.324784334380315e-06, "loss": 0.432, "step": 12029 }, { "epoch": 0.5520627782111881, "grad_norm": 0.47958654165267944, "learning_rate": 9.324661282558594e-06, "loss": 0.4218, "step": 12030 }, { "epoch": 0.5521086687164426, "grad_norm": 0.45022791624069214, "learning_rate": 9.324538220337426e-06, "loss": 0.3839, "step": 12031 }, { "epoch": 0.552154559221697, "grad_norm": 0.4702366590499878, "learning_rate": 9.324415147717107e-06, "loss": 0.4302, "step": 12032 }, { "epoch": 0.5522004497269515, "grad_norm": 0.4438423216342926, "learning_rate": 9.324292064697938e-06, "loss": 0.3575, "step": 12033 }, { "epoch": 0.552246340232206, "grad_norm": 0.43346479535102844, "learning_rate": 9.324168971280208e-06, "loss": 0.3334, "step": 12034 }, { "epoch": 0.5522922307374604, "grad_norm": 0.4527474641799927, "learning_rate": 9.324045867464218e-06, "loss": 0.3864, "step": 12035 }, { "epoch": 0.5523381212427149, "grad_norm": 0.44111475348472595, "learning_rate": 9.323922753250261e-06, "loss": 0.3838, "step": 12036 }, { "epoch": 0.5523840117479694, "grad_norm": 0.45739734172821045, "learning_rate": 9.323799628638635e-06, "loss": 0.3895, "step": 12037 }, { "epoch": 0.5524299022532239, "grad_norm": 0.46910691261291504, "learning_rate": 9.323676493629637e-06, "loss": 0.405, "step": 12038 }, { "epoch": 0.5524757927584782, "grad_norm": 0.43242698907852173, "learning_rate": 9.32355334822356e-06, "loss": 0.3851, "step": 12039 }, { "epoch": 0.5525216832637327, "grad_norm": 0.4352954924106598, "learning_rate": 9.323430192420703e-06, "loss": 0.3579, "step": 12040 }, { "epoch": 0.5525675737689872, "grad_norm": 0.4331340789794922, "learning_rate": 9.32330702622136e-06, "loss": 0.3548, "step": 12041 }, { "epoch": 0.5526134642742416, "grad_norm": 0.44352322816848755, "learning_rate": 9.323183849625828e-06, "loss": 0.3477, "step": 12042 }, { "epoch": 0.5526593547794961, "grad_norm": 0.4381205439567566, "learning_rate": 9.323060662634403e-06, "loss": 0.392, "step": 12043 }, { "epoch": 0.5527052452847506, "grad_norm": 0.45821982622146606, "learning_rate": 9.322937465247384e-06, "loss": 0.445, "step": 12044 }, { "epoch": 0.552751135790005, "grad_norm": 0.43494147062301636, "learning_rate": 9.322814257465061e-06, "loss": 0.3214, "step": 12045 }, { "epoch": 0.5527970262952595, "grad_norm": 0.4614538848400116, "learning_rate": 9.322691039287736e-06, "loss": 0.4518, "step": 12046 }, { "epoch": 0.552842916800514, "grad_norm": 0.4773239493370056, "learning_rate": 9.322567810715704e-06, "loss": 0.4237, "step": 12047 }, { "epoch": 0.5528888073057684, "grad_norm": 0.44227465987205505, "learning_rate": 9.32244457174926e-06, "loss": 0.3585, "step": 12048 }, { "epoch": 0.5529346978110229, "grad_norm": 0.46889862418174744, "learning_rate": 9.322321322388701e-06, "loss": 0.4117, "step": 12049 }, { "epoch": 0.5529805883162774, "grad_norm": 0.4665222465991974, "learning_rate": 9.322198062634322e-06, "loss": 0.3821, "step": 12050 }, { "epoch": 0.5530264788215318, "grad_norm": 0.4897397458553314, "learning_rate": 9.322074792486421e-06, "loss": 0.4116, "step": 12051 }, { "epoch": 0.5530723693267863, "grad_norm": 0.4382251501083374, "learning_rate": 9.321951511945298e-06, "loss": 0.4295, "step": 12052 }, { "epoch": 0.5531182598320408, "grad_norm": 0.5055105090141296, "learning_rate": 9.321828221011243e-06, "loss": 0.469, "step": 12053 }, { "epoch": 0.5531641503372953, "grad_norm": 0.45462626218795776, "learning_rate": 9.321704919684554e-06, "loss": 0.3656, "step": 12054 }, { "epoch": 0.5532100408425497, "grad_norm": 0.4514736831188202, "learning_rate": 9.32158160796553e-06, "loss": 0.3895, "step": 12055 }, { "epoch": 0.5532559313478042, "grad_norm": 0.47061610221862793, "learning_rate": 9.321458285854465e-06, "loss": 0.4257, "step": 12056 }, { "epoch": 0.5533018218530587, "grad_norm": 0.4941685199737549, "learning_rate": 9.321334953351657e-06, "loss": 0.5324, "step": 12057 }, { "epoch": 0.553347712358313, "grad_norm": 0.4527307450771332, "learning_rate": 9.321211610457404e-06, "loss": 0.374, "step": 12058 }, { "epoch": 0.5533936028635675, "grad_norm": 0.4510420262813568, "learning_rate": 9.321088257172e-06, "loss": 0.3715, "step": 12059 }, { "epoch": 0.553439493368822, "grad_norm": 0.5284619927406311, "learning_rate": 9.320964893495741e-06, "loss": 0.4635, "step": 12060 }, { "epoch": 0.5534853838740764, "grad_norm": 0.4498647153377533, "learning_rate": 9.320841519428927e-06, "loss": 0.3746, "step": 12061 }, { "epoch": 0.5535312743793309, "grad_norm": 0.45350927114486694, "learning_rate": 9.320718134971852e-06, "loss": 0.4396, "step": 12062 }, { "epoch": 0.5535771648845854, "grad_norm": 0.44705232977867126, "learning_rate": 9.320594740124815e-06, "loss": 0.3685, "step": 12063 }, { "epoch": 0.5536230553898398, "grad_norm": 0.44742101430892944, "learning_rate": 9.320471334888108e-06, "loss": 0.3825, "step": 12064 }, { "epoch": 0.5536689458950943, "grad_norm": 0.47853606939315796, "learning_rate": 9.320347919262033e-06, "loss": 0.4122, "step": 12065 }, { "epoch": 0.5537148364003488, "grad_norm": 0.4651338458061218, "learning_rate": 9.320224493246885e-06, "loss": 0.3889, "step": 12066 }, { "epoch": 0.5537607269056032, "grad_norm": 0.4843061864376068, "learning_rate": 9.32010105684296e-06, "loss": 0.4366, "step": 12067 }, { "epoch": 0.5538066174108577, "grad_norm": 0.46956324577331543, "learning_rate": 9.319977610050556e-06, "loss": 0.3797, "step": 12068 }, { "epoch": 0.5538525079161122, "grad_norm": 0.4895978271961212, "learning_rate": 9.31985415286997e-06, "loss": 0.4515, "step": 12069 }, { "epoch": 0.5538983984213666, "grad_norm": 0.4545944929122925, "learning_rate": 9.319730685301498e-06, "loss": 0.3712, "step": 12070 }, { "epoch": 0.5539442889266211, "grad_norm": 0.4647114872932434, "learning_rate": 9.319607207345436e-06, "loss": 0.4646, "step": 12071 }, { "epoch": 0.5539901794318756, "grad_norm": 0.4699457883834839, "learning_rate": 9.319483719002082e-06, "loss": 0.4306, "step": 12072 }, { "epoch": 0.5540360699371301, "grad_norm": 0.48452994227409363, "learning_rate": 9.319360220271734e-06, "loss": 0.4109, "step": 12073 }, { "epoch": 0.5540819604423844, "grad_norm": 0.48863694071769714, "learning_rate": 9.319236711154687e-06, "loss": 0.4247, "step": 12074 }, { "epoch": 0.554127850947639, "grad_norm": 0.4846033751964569, "learning_rate": 9.319113191651239e-06, "loss": 0.4456, "step": 12075 }, { "epoch": 0.5541737414528934, "grad_norm": 0.46660470962524414, "learning_rate": 9.318989661761686e-06, "loss": 0.4015, "step": 12076 }, { "epoch": 0.5542196319581478, "grad_norm": 0.4736575186252594, "learning_rate": 9.318866121486327e-06, "loss": 0.3727, "step": 12077 }, { "epoch": 0.5542655224634023, "grad_norm": 0.43606826663017273, "learning_rate": 9.318742570825458e-06, "loss": 0.3333, "step": 12078 }, { "epoch": 0.5543114129686568, "grad_norm": 0.48082706332206726, "learning_rate": 9.318619009779378e-06, "loss": 0.4525, "step": 12079 }, { "epoch": 0.5543573034739112, "grad_norm": 0.5324326157569885, "learning_rate": 9.31849543834838e-06, "loss": 0.5368, "step": 12080 }, { "epoch": 0.5544031939791657, "grad_norm": 0.4841443598270416, "learning_rate": 9.318371856532763e-06, "loss": 0.4529, "step": 12081 }, { "epoch": 0.5544490844844202, "grad_norm": 0.48112794756889343, "learning_rate": 9.318248264332826e-06, "loss": 0.4459, "step": 12082 }, { "epoch": 0.5544949749896746, "grad_norm": 0.4488029181957245, "learning_rate": 9.318124661748865e-06, "loss": 0.361, "step": 12083 }, { "epoch": 0.5545408654949291, "grad_norm": 0.48441484570503235, "learning_rate": 9.318001048781175e-06, "loss": 0.4346, "step": 12084 }, { "epoch": 0.5545867560001836, "grad_norm": 0.492082804441452, "learning_rate": 9.317877425430058e-06, "loss": 0.4038, "step": 12085 }, { "epoch": 0.554632646505438, "grad_norm": 0.4680447280406952, "learning_rate": 9.317753791695806e-06, "loss": 0.4387, "step": 12086 }, { "epoch": 0.5546785370106925, "grad_norm": 0.46267813444137573, "learning_rate": 9.31763014757872e-06, "loss": 0.4013, "step": 12087 }, { "epoch": 0.554724427515947, "grad_norm": 0.4445572793483734, "learning_rate": 9.317506493079097e-06, "loss": 0.3491, "step": 12088 }, { "epoch": 0.5547703180212014, "grad_norm": 0.4425163269042969, "learning_rate": 9.317382828197232e-06, "loss": 0.3568, "step": 12089 }, { "epoch": 0.5548162085264559, "grad_norm": 1.0561143159866333, "learning_rate": 9.317259152933424e-06, "loss": 0.4236, "step": 12090 }, { "epoch": 0.5548620990317104, "grad_norm": 0.5127905607223511, "learning_rate": 9.317135467287972e-06, "loss": 0.5058, "step": 12091 }, { "epoch": 0.5549079895369649, "grad_norm": 0.49167007207870483, "learning_rate": 9.31701177126117e-06, "loss": 0.4713, "step": 12092 }, { "epoch": 0.5549538800422192, "grad_norm": 0.461435467004776, "learning_rate": 9.316888064853318e-06, "loss": 0.3655, "step": 12093 }, { "epoch": 0.5549997705474737, "grad_norm": 0.4498245418071747, "learning_rate": 9.316764348064713e-06, "loss": 0.393, "step": 12094 }, { "epoch": 0.5550456610527282, "grad_norm": 0.44873034954071045, "learning_rate": 9.316640620895651e-06, "loss": 0.3561, "step": 12095 }, { "epoch": 0.5550915515579826, "grad_norm": 0.43564528226852417, "learning_rate": 9.316516883346432e-06, "loss": 0.3666, "step": 12096 }, { "epoch": 0.5551374420632371, "grad_norm": 0.4924660623073578, "learning_rate": 9.316393135417351e-06, "loss": 0.4461, "step": 12097 }, { "epoch": 0.5551833325684916, "grad_norm": 0.9832836389541626, "learning_rate": 9.316269377108707e-06, "loss": 0.4736, "step": 12098 }, { "epoch": 0.555229223073746, "grad_norm": 0.5583959817886353, "learning_rate": 9.316145608420798e-06, "loss": 0.4646, "step": 12099 }, { "epoch": 0.5552751135790005, "grad_norm": 0.4562219977378845, "learning_rate": 9.31602182935392e-06, "loss": 0.388, "step": 12100 }, { "epoch": 0.555321004084255, "grad_norm": 0.5044378042221069, "learning_rate": 9.315898039908374e-06, "loss": 0.4816, "step": 12101 }, { "epoch": 0.5553668945895094, "grad_norm": 0.44881659746170044, "learning_rate": 9.315774240084452e-06, "loss": 0.376, "step": 12102 }, { "epoch": 0.5554127850947639, "grad_norm": 0.5203543901443481, "learning_rate": 9.315650429882458e-06, "loss": 0.4987, "step": 12103 }, { "epoch": 0.5554586756000184, "grad_norm": 0.4803381860256195, "learning_rate": 9.315526609302685e-06, "loss": 0.4262, "step": 12104 }, { "epoch": 0.5555045661052728, "grad_norm": 0.44015568494796753, "learning_rate": 9.315402778345432e-06, "loss": 0.3828, "step": 12105 }, { "epoch": 0.5555504566105273, "grad_norm": 0.5461061000823975, "learning_rate": 9.315278937011e-06, "loss": 0.4579, "step": 12106 }, { "epoch": 0.5555963471157818, "grad_norm": 0.48139676451683044, "learning_rate": 9.315155085299683e-06, "loss": 0.4675, "step": 12107 }, { "epoch": 0.5556422376210363, "grad_norm": 0.4233253300189972, "learning_rate": 9.315031223211779e-06, "loss": 0.3417, "step": 12108 }, { "epoch": 0.5556881281262906, "grad_norm": 0.4718100428581238, "learning_rate": 9.314907350747585e-06, "loss": 0.4456, "step": 12109 }, { "epoch": 0.5557340186315451, "grad_norm": 0.45583781599998474, "learning_rate": 9.314783467907405e-06, "loss": 0.3532, "step": 12110 }, { "epoch": 0.5557799091367996, "grad_norm": 0.45792990922927856, "learning_rate": 9.31465957469153e-06, "loss": 0.3881, "step": 12111 }, { "epoch": 0.555825799642054, "grad_norm": 0.4594878554344177, "learning_rate": 9.314535671100261e-06, "loss": 0.4063, "step": 12112 }, { "epoch": 0.5558716901473085, "grad_norm": 0.466871440410614, "learning_rate": 9.314411757133895e-06, "loss": 0.3855, "step": 12113 }, { "epoch": 0.555917580652563, "grad_norm": 0.4323359727859497, "learning_rate": 9.31428783279273e-06, "loss": 0.3189, "step": 12114 }, { "epoch": 0.5559634711578174, "grad_norm": 0.4370236098766327, "learning_rate": 9.314163898077067e-06, "loss": 0.3538, "step": 12115 }, { "epoch": 0.5560093616630719, "grad_norm": 0.4464944303035736, "learning_rate": 9.314039952987198e-06, "loss": 0.4118, "step": 12116 }, { "epoch": 0.5560552521683264, "grad_norm": 0.45803695917129517, "learning_rate": 9.313915997523426e-06, "loss": 0.3951, "step": 12117 }, { "epoch": 0.5561011426735808, "grad_norm": 0.8699121475219727, "learning_rate": 9.31379203168605e-06, "loss": 0.4238, "step": 12118 }, { "epoch": 0.5561470331788353, "grad_norm": 0.4901309609413147, "learning_rate": 9.313668055475362e-06, "loss": 0.4327, "step": 12119 }, { "epoch": 0.5561929236840898, "grad_norm": 0.46953085064888, "learning_rate": 9.313544068891665e-06, "loss": 0.4424, "step": 12120 }, { "epoch": 0.5562388141893442, "grad_norm": 0.4544709622859955, "learning_rate": 9.313420071935255e-06, "loss": 0.3977, "step": 12121 }, { "epoch": 0.5562847046945987, "grad_norm": 0.4466981291770935, "learning_rate": 9.313296064606432e-06, "loss": 0.4434, "step": 12122 }, { "epoch": 0.5563305951998532, "grad_norm": 0.5235185623168945, "learning_rate": 9.313172046905494e-06, "loss": 0.4996, "step": 12123 }, { "epoch": 0.5563764857051076, "grad_norm": 0.44973328709602356, "learning_rate": 9.313048018832736e-06, "loss": 0.3798, "step": 12124 }, { "epoch": 0.5564223762103621, "grad_norm": 0.4175124764442444, "learning_rate": 9.31292398038846e-06, "loss": 0.3282, "step": 12125 }, { "epoch": 0.5564682667156166, "grad_norm": 0.47038909792900085, "learning_rate": 9.312799931572965e-06, "loss": 0.414, "step": 12126 }, { "epoch": 0.556514157220871, "grad_norm": 0.45200538635253906, "learning_rate": 9.312675872386546e-06, "loss": 0.4343, "step": 12127 }, { "epoch": 0.5565600477261254, "grad_norm": 0.4696628451347351, "learning_rate": 9.312551802829502e-06, "loss": 0.4272, "step": 12128 }, { "epoch": 0.5566059382313799, "grad_norm": 0.4801579415798187, "learning_rate": 9.312427722902132e-06, "loss": 0.4942, "step": 12129 }, { "epoch": 0.5566518287366344, "grad_norm": 0.4296312928199768, "learning_rate": 9.312303632604734e-06, "loss": 0.3275, "step": 12130 }, { "epoch": 0.5566977192418888, "grad_norm": 0.48296621441841125, "learning_rate": 9.312179531937609e-06, "loss": 0.4251, "step": 12131 }, { "epoch": 0.5567436097471433, "grad_norm": 0.49556785821914673, "learning_rate": 9.31205542090105e-06, "loss": 0.3913, "step": 12132 }, { "epoch": 0.5567895002523978, "grad_norm": 0.4593547284603119, "learning_rate": 9.31193129949536e-06, "loss": 0.3791, "step": 12133 }, { "epoch": 0.5568353907576522, "grad_norm": 0.4720228910446167, "learning_rate": 9.311807167720838e-06, "loss": 0.4064, "step": 12134 }, { "epoch": 0.5568812812629067, "grad_norm": 0.4836314022541046, "learning_rate": 9.311683025577777e-06, "loss": 0.4393, "step": 12135 }, { "epoch": 0.5569271717681612, "grad_norm": 0.43410229682922363, "learning_rate": 9.311558873066482e-06, "loss": 0.3565, "step": 12136 }, { "epoch": 0.5569730622734156, "grad_norm": 0.46988412737846375, "learning_rate": 9.311434710187248e-06, "loss": 0.3803, "step": 12137 }, { "epoch": 0.5570189527786701, "grad_norm": 0.485321968793869, "learning_rate": 9.311310536940374e-06, "loss": 0.4171, "step": 12138 }, { "epoch": 0.5570648432839246, "grad_norm": 0.45082175731658936, "learning_rate": 9.31118635332616e-06, "loss": 0.3735, "step": 12139 }, { "epoch": 0.557110733789179, "grad_norm": 0.4475308060646057, "learning_rate": 9.311062159344903e-06, "loss": 0.3761, "step": 12140 }, { "epoch": 0.5571566242944335, "grad_norm": 0.4941766560077667, "learning_rate": 9.310937954996902e-06, "loss": 0.4191, "step": 12141 }, { "epoch": 0.557202514799688, "grad_norm": 0.4788995087146759, "learning_rate": 9.310813740282453e-06, "loss": 0.4459, "step": 12142 }, { "epoch": 0.5572484053049425, "grad_norm": 0.4992770552635193, "learning_rate": 9.310689515201861e-06, "loss": 0.4888, "step": 12143 }, { "epoch": 0.5572942958101968, "grad_norm": 0.4674097001552582, "learning_rate": 9.31056527975542e-06, "loss": 0.3727, "step": 12144 }, { "epoch": 0.5573401863154513, "grad_norm": 0.48826098442077637, "learning_rate": 9.310441033943431e-06, "loss": 0.4298, "step": 12145 }, { "epoch": 0.5573860768207058, "grad_norm": 0.5561777949333191, "learning_rate": 9.31031677776619e-06, "loss": 0.5093, "step": 12146 }, { "epoch": 0.5574319673259602, "grad_norm": 0.46843668818473816, "learning_rate": 9.310192511223997e-06, "loss": 0.4028, "step": 12147 }, { "epoch": 0.5574778578312147, "grad_norm": 0.4768430292606354, "learning_rate": 9.310068234317153e-06, "loss": 0.3792, "step": 12148 }, { "epoch": 0.5575237483364692, "grad_norm": 0.47033795714378357, "learning_rate": 9.309943947045956e-06, "loss": 0.3974, "step": 12149 }, { "epoch": 0.5575696388417236, "grad_norm": 0.4621320068836212, "learning_rate": 9.309819649410703e-06, "loss": 0.3748, "step": 12150 }, { "epoch": 0.5576155293469781, "grad_norm": 0.4729558527469635, "learning_rate": 9.309695341411693e-06, "loss": 0.353, "step": 12151 }, { "epoch": 0.5576614198522326, "grad_norm": 0.4460930824279785, "learning_rate": 9.309571023049227e-06, "loss": 0.3797, "step": 12152 }, { "epoch": 0.557707310357487, "grad_norm": 0.4192441403865814, "learning_rate": 9.309446694323603e-06, "loss": 0.3148, "step": 12153 }, { "epoch": 0.5577532008627415, "grad_norm": 0.4007530212402344, "learning_rate": 9.30932235523512e-06, "loss": 0.2906, "step": 12154 }, { "epoch": 0.557799091367996, "grad_norm": 0.479134202003479, "learning_rate": 9.309198005784075e-06, "loss": 0.4242, "step": 12155 }, { "epoch": 0.5578449818732504, "grad_norm": 0.4199838936328888, "learning_rate": 9.30907364597077e-06, "loss": 0.3325, "step": 12156 }, { "epoch": 0.5578908723785049, "grad_norm": 0.45139381289482117, "learning_rate": 9.308949275795502e-06, "loss": 0.3935, "step": 12157 }, { "epoch": 0.5579367628837594, "grad_norm": 0.4335850179195404, "learning_rate": 9.308824895258572e-06, "loss": 0.3441, "step": 12158 }, { "epoch": 0.5579826533890138, "grad_norm": 0.4658150374889374, "learning_rate": 9.308700504360277e-06, "loss": 0.3731, "step": 12159 }, { "epoch": 0.5580285438942683, "grad_norm": 0.4783141314983368, "learning_rate": 9.308576103100918e-06, "loss": 0.4304, "step": 12160 }, { "epoch": 0.5580744343995228, "grad_norm": 0.46721163392066956, "learning_rate": 9.308451691480795e-06, "loss": 0.4088, "step": 12161 }, { "epoch": 0.5581203249047773, "grad_norm": 0.43630751967430115, "learning_rate": 9.308327269500204e-06, "loss": 0.3263, "step": 12162 }, { "epoch": 0.5581662154100316, "grad_norm": 0.4469703435897827, "learning_rate": 9.308202837159444e-06, "loss": 0.3913, "step": 12163 }, { "epoch": 0.5582121059152861, "grad_norm": 0.5071561932563782, "learning_rate": 9.308078394458817e-06, "loss": 0.4468, "step": 12164 }, { "epoch": 0.5582579964205406, "grad_norm": 0.4541645646095276, "learning_rate": 9.307953941398622e-06, "loss": 0.3974, "step": 12165 }, { "epoch": 0.558303886925795, "grad_norm": 0.5984510183334351, "learning_rate": 9.307829477979158e-06, "loss": 0.4534, "step": 12166 }, { "epoch": 0.5583497774310495, "grad_norm": 0.5287955403327942, "learning_rate": 9.30770500420072e-06, "loss": 0.322, "step": 12167 }, { "epoch": 0.558395667936304, "grad_norm": 0.45505374670028687, "learning_rate": 9.307580520063616e-06, "loss": 0.4231, "step": 12168 }, { "epoch": 0.5584415584415584, "grad_norm": 0.4570363163948059, "learning_rate": 9.307456025568135e-06, "loss": 0.408, "step": 12169 }, { "epoch": 0.5584874489468129, "grad_norm": 0.4292166829109192, "learning_rate": 9.307331520714586e-06, "loss": 0.3278, "step": 12170 }, { "epoch": 0.5585333394520674, "grad_norm": 0.464189350605011, "learning_rate": 9.307207005503263e-06, "loss": 0.4442, "step": 12171 }, { "epoch": 0.5585792299573218, "grad_norm": 0.42920994758605957, "learning_rate": 9.307082479934467e-06, "loss": 0.3387, "step": 12172 }, { "epoch": 0.5586251204625763, "grad_norm": 0.47448137402534485, "learning_rate": 9.306957944008494e-06, "loss": 0.4054, "step": 12173 }, { "epoch": 0.5586710109678308, "grad_norm": 0.49340492486953735, "learning_rate": 9.30683339772565e-06, "loss": 0.4315, "step": 12174 }, { "epoch": 0.5587169014730852, "grad_norm": 0.5011741518974304, "learning_rate": 9.30670884108623e-06, "loss": 0.5097, "step": 12175 }, { "epoch": 0.5587627919783397, "grad_norm": 0.45290327072143555, "learning_rate": 9.306584274090534e-06, "loss": 0.3587, "step": 12176 }, { "epoch": 0.5588086824835942, "grad_norm": 0.4752029478549957, "learning_rate": 9.306459696738861e-06, "loss": 0.4603, "step": 12177 }, { "epoch": 0.5588545729888486, "grad_norm": 0.5530535578727722, "learning_rate": 9.306335109031514e-06, "loss": 0.322, "step": 12178 }, { "epoch": 0.558900463494103, "grad_norm": 0.5102943778038025, "learning_rate": 9.306210510968788e-06, "loss": 0.4541, "step": 12179 }, { "epoch": 0.5589463539993575, "grad_norm": 0.48672640323638916, "learning_rate": 9.306085902550986e-06, "loss": 0.4229, "step": 12180 }, { "epoch": 0.558992244504612, "grad_norm": 0.4599076509475708, "learning_rate": 9.305961283778406e-06, "loss": 0.4195, "step": 12181 }, { "epoch": 0.5590381350098664, "grad_norm": 0.48828449845314026, "learning_rate": 9.305836654651348e-06, "loss": 0.4713, "step": 12182 }, { "epoch": 0.5590840255151209, "grad_norm": 0.4966793656349182, "learning_rate": 9.305712015170112e-06, "loss": 0.4515, "step": 12183 }, { "epoch": 0.5591299160203754, "grad_norm": 0.46091508865356445, "learning_rate": 9.305587365334997e-06, "loss": 0.4148, "step": 12184 }, { "epoch": 0.5591758065256298, "grad_norm": 0.4421173930168152, "learning_rate": 9.305462705146306e-06, "loss": 0.3374, "step": 12185 }, { "epoch": 0.5592216970308843, "grad_norm": 0.44091668725013733, "learning_rate": 9.305338034604333e-06, "loss": 0.3312, "step": 12186 }, { "epoch": 0.5592675875361388, "grad_norm": 0.481839656829834, "learning_rate": 9.30521335370938e-06, "loss": 0.3931, "step": 12187 }, { "epoch": 0.5593134780413932, "grad_norm": 0.4522998034954071, "learning_rate": 9.305088662461751e-06, "loss": 0.3946, "step": 12188 }, { "epoch": 0.5593593685466477, "grad_norm": 0.4550017714500427, "learning_rate": 9.304963960861741e-06, "loss": 0.3488, "step": 12189 }, { "epoch": 0.5594052590519022, "grad_norm": 0.45765042304992676, "learning_rate": 9.304839248909652e-06, "loss": 0.3939, "step": 12190 }, { "epoch": 0.5594511495571566, "grad_norm": 0.46712562441825867, "learning_rate": 9.304714526605783e-06, "loss": 0.451, "step": 12191 }, { "epoch": 0.5594970400624111, "grad_norm": 0.46571141481399536, "learning_rate": 9.304589793950433e-06, "loss": 0.4242, "step": 12192 }, { "epoch": 0.5595429305676656, "grad_norm": 0.45479685068130493, "learning_rate": 9.304465050943904e-06, "loss": 0.3531, "step": 12193 }, { "epoch": 0.55958882107292, "grad_norm": 0.4918900728225708, "learning_rate": 9.304340297586494e-06, "loss": 0.4248, "step": 12194 }, { "epoch": 0.5596347115781745, "grad_norm": 0.4176943302154541, "learning_rate": 9.304215533878505e-06, "loss": 0.3563, "step": 12195 }, { "epoch": 0.559680602083429, "grad_norm": 0.44797900319099426, "learning_rate": 9.304090759820237e-06, "loss": 0.4209, "step": 12196 }, { "epoch": 0.5597264925886835, "grad_norm": 0.4970931112766266, "learning_rate": 9.303965975411987e-06, "loss": 0.5208, "step": 12197 }, { "epoch": 0.5597723830939378, "grad_norm": 0.43813297152519226, "learning_rate": 9.303841180654058e-06, "loss": 0.3997, "step": 12198 }, { "epoch": 0.5598182735991923, "grad_norm": 0.4533596634864807, "learning_rate": 9.303716375546749e-06, "loss": 0.4079, "step": 12199 }, { "epoch": 0.5598641641044468, "grad_norm": 0.4552463889122009, "learning_rate": 9.30359156009036e-06, "loss": 0.4388, "step": 12200 }, { "epoch": 0.5599100546097012, "grad_norm": 0.42153000831604004, "learning_rate": 9.303466734285195e-06, "loss": 0.3277, "step": 12201 }, { "epoch": 0.5599559451149557, "grad_norm": 0.4840945601463318, "learning_rate": 9.303341898131548e-06, "loss": 0.3911, "step": 12202 }, { "epoch": 0.5600018356202102, "grad_norm": 0.44774964451789856, "learning_rate": 9.303217051629722e-06, "loss": 0.3833, "step": 12203 }, { "epoch": 0.5600477261254646, "grad_norm": 0.4361903965473175, "learning_rate": 9.303092194780018e-06, "loss": 0.383, "step": 12204 }, { "epoch": 0.5600936166307191, "grad_norm": 0.44973692297935486, "learning_rate": 9.302967327582735e-06, "loss": 0.3897, "step": 12205 }, { "epoch": 0.5601395071359736, "grad_norm": 0.48008444905281067, "learning_rate": 9.302842450038173e-06, "loss": 0.4185, "step": 12206 }, { "epoch": 0.560185397641228, "grad_norm": 0.4797430634498596, "learning_rate": 9.302717562146632e-06, "loss": 0.4304, "step": 12207 }, { "epoch": 0.5602312881464825, "grad_norm": 0.43292778730392456, "learning_rate": 9.302592663908416e-06, "loss": 0.3752, "step": 12208 }, { "epoch": 0.560277178651737, "grad_norm": 0.4630085229873657, "learning_rate": 9.30246775532382e-06, "loss": 0.3733, "step": 12209 }, { "epoch": 0.5603230691569914, "grad_norm": 0.4516647160053253, "learning_rate": 9.302342836393148e-06, "loss": 0.3871, "step": 12210 }, { "epoch": 0.5603689596622459, "grad_norm": 0.4639822542667389, "learning_rate": 9.3022179071167e-06, "loss": 0.3882, "step": 12211 }, { "epoch": 0.5604148501675004, "grad_norm": 0.4666505753993988, "learning_rate": 9.302092967494776e-06, "loss": 0.444, "step": 12212 }, { "epoch": 0.5604607406727548, "grad_norm": 0.4380147457122803, "learning_rate": 9.301968017527676e-06, "loss": 0.3707, "step": 12213 }, { "epoch": 0.5605066311780093, "grad_norm": 0.43364036083221436, "learning_rate": 9.301843057215699e-06, "loss": 0.3832, "step": 12214 }, { "epoch": 0.5605525216832637, "grad_norm": 0.4586067497730255, "learning_rate": 9.301718086559148e-06, "loss": 0.3907, "step": 12215 }, { "epoch": 0.5605984121885182, "grad_norm": 0.4557298421859741, "learning_rate": 9.301593105558322e-06, "loss": 0.3968, "step": 12216 }, { "epoch": 0.5606443026937726, "grad_norm": 0.4413805603981018, "learning_rate": 9.301468114213523e-06, "loss": 0.3451, "step": 12217 }, { "epoch": 0.5606901931990271, "grad_norm": 0.47997286915779114, "learning_rate": 9.301343112525052e-06, "loss": 0.3989, "step": 12218 }, { "epoch": 0.5607360837042816, "grad_norm": 0.43419668078422546, "learning_rate": 9.301218100493206e-06, "loss": 0.3255, "step": 12219 }, { "epoch": 0.560781974209536, "grad_norm": 0.4807821214199066, "learning_rate": 9.30109307811829e-06, "loss": 0.4354, "step": 12220 }, { "epoch": 0.5608278647147905, "grad_norm": 0.6913928985595703, "learning_rate": 9.300968045400602e-06, "loss": 0.4538, "step": 12221 }, { "epoch": 0.560873755220045, "grad_norm": 0.4109992980957031, "learning_rate": 9.300843002340444e-06, "loss": 0.2966, "step": 12222 }, { "epoch": 0.5609196457252994, "grad_norm": 0.4806285798549652, "learning_rate": 9.300717948938114e-06, "loss": 0.4498, "step": 12223 }, { "epoch": 0.5609655362305539, "grad_norm": 0.4772484600543976, "learning_rate": 9.300592885193915e-06, "loss": 0.3953, "step": 12224 }, { "epoch": 0.5610114267358084, "grad_norm": 0.6011549830436707, "learning_rate": 9.300467811108149e-06, "loss": 0.3362, "step": 12225 }, { "epoch": 0.5610573172410628, "grad_norm": 0.4709915518760681, "learning_rate": 9.300342726681113e-06, "loss": 0.4224, "step": 12226 }, { "epoch": 0.5611032077463173, "grad_norm": 0.4223760962486267, "learning_rate": 9.30021763191311e-06, "loss": 0.3136, "step": 12227 }, { "epoch": 0.5611490982515718, "grad_norm": 0.4856641888618469, "learning_rate": 9.300092526804443e-06, "loss": 0.4319, "step": 12228 }, { "epoch": 0.5611949887568262, "grad_norm": 0.4588739573955536, "learning_rate": 9.29996741135541e-06, "loss": 0.4092, "step": 12229 }, { "epoch": 0.5612408792620807, "grad_norm": 0.4486434757709503, "learning_rate": 9.299842285566311e-06, "loss": 0.3413, "step": 12230 }, { "epoch": 0.5612867697673352, "grad_norm": 0.46499741077423096, "learning_rate": 9.29971714943745e-06, "loss": 0.4061, "step": 12231 }, { "epoch": 0.5613326602725897, "grad_norm": 0.47625085711479187, "learning_rate": 9.299592002969126e-06, "loss": 0.4565, "step": 12232 }, { "epoch": 0.561378550777844, "grad_norm": 0.45495492219924927, "learning_rate": 9.29946684616164e-06, "loss": 0.4314, "step": 12233 }, { "epoch": 0.5614244412830985, "grad_norm": 0.4289122223854065, "learning_rate": 9.299341679015293e-06, "loss": 0.3139, "step": 12234 }, { "epoch": 0.561470331788353, "grad_norm": 0.4708121120929718, "learning_rate": 9.299216501530386e-06, "loss": 0.355, "step": 12235 }, { "epoch": 0.5615162222936074, "grad_norm": 0.49518176913261414, "learning_rate": 9.29909131370722e-06, "loss": 0.5086, "step": 12236 }, { "epoch": 0.5615621127988619, "grad_norm": 0.45498061180114746, "learning_rate": 9.298966115546098e-06, "loss": 0.4002, "step": 12237 }, { "epoch": 0.5616080033041164, "grad_norm": 0.5243152379989624, "learning_rate": 9.298840907047318e-06, "loss": 0.5179, "step": 12238 }, { "epoch": 0.5616538938093708, "grad_norm": 0.44022777676582336, "learning_rate": 9.298715688211182e-06, "loss": 0.3185, "step": 12239 }, { "epoch": 0.5616997843146253, "grad_norm": 0.4458248019218445, "learning_rate": 9.298590459037993e-06, "loss": 0.3831, "step": 12240 }, { "epoch": 0.5617456748198798, "grad_norm": 0.4435526132583618, "learning_rate": 9.298465219528048e-06, "loss": 0.3782, "step": 12241 }, { "epoch": 0.5617915653251342, "grad_norm": 0.4742283225059509, "learning_rate": 9.298339969681653e-06, "loss": 0.4295, "step": 12242 }, { "epoch": 0.5618374558303887, "grad_norm": 0.4798366129398346, "learning_rate": 9.298214709499107e-06, "loss": 0.4263, "step": 12243 }, { "epoch": 0.5618833463356432, "grad_norm": 0.4598684310913086, "learning_rate": 9.29808943898071e-06, "loss": 0.425, "step": 12244 }, { "epoch": 0.5619292368408976, "grad_norm": 0.42091506719589233, "learning_rate": 9.297964158126765e-06, "loss": 0.3519, "step": 12245 }, { "epoch": 0.5619751273461521, "grad_norm": 0.45386961102485657, "learning_rate": 9.297838866937573e-06, "loss": 0.4419, "step": 12246 }, { "epoch": 0.5620210178514066, "grad_norm": 0.49147486686706543, "learning_rate": 9.297713565413433e-06, "loss": 0.4432, "step": 12247 }, { "epoch": 0.562066908356661, "grad_norm": 0.5244831442832947, "learning_rate": 9.29758825355465e-06, "loss": 0.4525, "step": 12248 }, { "epoch": 0.5621127988619155, "grad_norm": 0.4447228014469147, "learning_rate": 9.297462931361523e-06, "loss": 0.368, "step": 12249 }, { "epoch": 0.56215868936717, "grad_norm": 0.459503710269928, "learning_rate": 9.297337598834355e-06, "loss": 0.4391, "step": 12250 }, { "epoch": 0.5622045798724244, "grad_norm": 0.4367789924144745, "learning_rate": 9.297212255973446e-06, "loss": 0.3537, "step": 12251 }, { "epoch": 0.5622504703776788, "grad_norm": 0.47100570797920227, "learning_rate": 9.297086902779097e-06, "loss": 0.4811, "step": 12252 }, { "epoch": 0.5622963608829333, "grad_norm": 0.4529896378517151, "learning_rate": 9.29696153925161e-06, "loss": 0.3592, "step": 12253 }, { "epoch": 0.5623422513881878, "grad_norm": 0.4349445700645447, "learning_rate": 9.296836165391288e-06, "loss": 0.3587, "step": 12254 }, { "epoch": 0.5623881418934422, "grad_norm": 0.4545430839061737, "learning_rate": 9.29671078119843e-06, "loss": 0.414, "step": 12255 }, { "epoch": 0.5624340323986967, "grad_norm": 0.45265355706214905, "learning_rate": 9.29658538667334e-06, "loss": 0.3859, "step": 12256 }, { "epoch": 0.5624799229039512, "grad_norm": 0.47099509835243225, "learning_rate": 9.296459981816317e-06, "loss": 0.403, "step": 12257 }, { "epoch": 0.5625258134092056, "grad_norm": 0.4402739703655243, "learning_rate": 9.296334566627663e-06, "loss": 0.3782, "step": 12258 }, { "epoch": 0.5625717039144601, "grad_norm": 0.42116811871528625, "learning_rate": 9.296209141107682e-06, "loss": 0.3204, "step": 12259 }, { "epoch": 0.5626175944197146, "grad_norm": 0.4551568031311035, "learning_rate": 9.296083705256673e-06, "loss": 0.3607, "step": 12260 }, { "epoch": 0.562663484924969, "grad_norm": 0.4693467617034912, "learning_rate": 9.29595825907494e-06, "loss": 0.4515, "step": 12261 }, { "epoch": 0.5627093754302235, "grad_norm": 0.44797995686531067, "learning_rate": 9.295832802562782e-06, "loss": 0.3867, "step": 12262 }, { "epoch": 0.562755265935478, "grad_norm": 0.4529840052127838, "learning_rate": 9.295707335720501e-06, "loss": 0.421, "step": 12263 }, { "epoch": 0.5628011564407324, "grad_norm": 0.45345208048820496, "learning_rate": 9.295581858548402e-06, "loss": 0.3959, "step": 12264 }, { "epoch": 0.5628470469459869, "grad_norm": 0.43922895193099976, "learning_rate": 9.295456371046783e-06, "loss": 0.3661, "step": 12265 }, { "epoch": 0.5628929374512414, "grad_norm": 0.44701889157295227, "learning_rate": 9.295330873215946e-06, "loss": 0.3533, "step": 12266 }, { "epoch": 0.5629388279564957, "grad_norm": 0.45694538950920105, "learning_rate": 9.295205365056195e-06, "loss": 0.4229, "step": 12267 }, { "epoch": 0.5629847184617502, "grad_norm": 0.47272512316703796, "learning_rate": 9.295079846567832e-06, "loss": 0.4317, "step": 12268 }, { "epoch": 0.5630306089670047, "grad_norm": 0.47979989647865295, "learning_rate": 9.294954317751155e-06, "loss": 0.3983, "step": 12269 }, { "epoch": 0.5630764994722592, "grad_norm": 0.48282569646835327, "learning_rate": 9.29482877860647e-06, "loss": 0.4311, "step": 12270 }, { "epoch": 0.5631223899775136, "grad_norm": 0.5249196887016296, "learning_rate": 9.294703229134077e-06, "loss": 0.4931, "step": 12271 }, { "epoch": 0.5631682804827681, "grad_norm": 0.4999580681324005, "learning_rate": 9.294577669334277e-06, "loss": 0.4858, "step": 12272 }, { "epoch": 0.5632141709880226, "grad_norm": 0.4488961696624756, "learning_rate": 9.294452099207375e-06, "loss": 0.371, "step": 12273 }, { "epoch": 0.563260061493277, "grad_norm": 0.4934143126010895, "learning_rate": 9.29432651875367e-06, "loss": 0.4765, "step": 12274 }, { "epoch": 0.5633059519985315, "grad_norm": 0.44268858432769775, "learning_rate": 9.294200927973464e-06, "loss": 0.3678, "step": 12275 }, { "epoch": 0.563351842503786, "grad_norm": 0.4099865257740021, "learning_rate": 9.294075326867063e-06, "loss": 0.3204, "step": 12276 }, { "epoch": 0.5633977330090404, "grad_norm": 0.6284464001655579, "learning_rate": 9.293949715434764e-06, "loss": 0.4633, "step": 12277 }, { "epoch": 0.5634436235142949, "grad_norm": 0.47400614619255066, "learning_rate": 9.293824093676872e-06, "loss": 0.432, "step": 12278 }, { "epoch": 0.5634895140195494, "grad_norm": 0.4270019233226776, "learning_rate": 9.293698461593688e-06, "loss": 0.3407, "step": 12279 }, { "epoch": 0.5635354045248038, "grad_norm": 0.44250649213790894, "learning_rate": 9.293572819185513e-06, "loss": 0.3854, "step": 12280 }, { "epoch": 0.5635812950300583, "grad_norm": 0.42304226756095886, "learning_rate": 9.293447166452652e-06, "loss": 0.3455, "step": 12281 }, { "epoch": 0.5636271855353128, "grad_norm": 0.43560224771499634, "learning_rate": 9.293321503395405e-06, "loss": 0.338, "step": 12282 }, { "epoch": 0.5636730760405672, "grad_norm": 0.45362693071365356, "learning_rate": 9.293195830014076e-06, "loss": 0.3709, "step": 12283 }, { "epoch": 0.5637189665458217, "grad_norm": 0.41024479269981384, "learning_rate": 9.293070146308965e-06, "loss": 0.3211, "step": 12284 }, { "epoch": 0.5637648570510762, "grad_norm": 0.4536677300930023, "learning_rate": 9.292944452280375e-06, "loss": 0.4002, "step": 12285 }, { "epoch": 0.5638107475563306, "grad_norm": 0.44525158405303955, "learning_rate": 9.29281874792861e-06, "loss": 0.3607, "step": 12286 }, { "epoch": 0.563856638061585, "grad_norm": 0.6359711289405823, "learning_rate": 9.292693033253967e-06, "loss": 0.4739, "step": 12287 }, { "epoch": 0.5639025285668395, "grad_norm": 0.4852937161922455, "learning_rate": 9.292567308256756e-06, "loss": 0.4493, "step": 12288 }, { "epoch": 0.563948419072094, "grad_norm": 0.4483056962490082, "learning_rate": 9.292441572937275e-06, "loss": 0.3922, "step": 12289 }, { "epoch": 0.5639943095773484, "grad_norm": 0.47928833961486816, "learning_rate": 9.292315827295824e-06, "loss": 0.496, "step": 12290 }, { "epoch": 0.5640402000826029, "grad_norm": 0.442153662443161, "learning_rate": 9.29219007133271e-06, "loss": 0.3498, "step": 12291 }, { "epoch": 0.5640860905878574, "grad_norm": 0.47824329137802124, "learning_rate": 9.292064305048233e-06, "loss": 0.469, "step": 12292 }, { "epoch": 0.5641319810931118, "grad_norm": 0.47097247838974, "learning_rate": 9.291938528442697e-06, "loss": 0.4551, "step": 12293 }, { "epoch": 0.5641778715983663, "grad_norm": 0.49916717410087585, "learning_rate": 9.291812741516402e-06, "loss": 0.4219, "step": 12294 }, { "epoch": 0.5642237621036208, "grad_norm": 0.44558364152908325, "learning_rate": 9.291686944269652e-06, "loss": 0.355, "step": 12295 }, { "epoch": 0.5642696526088752, "grad_norm": 0.4427212178707123, "learning_rate": 9.291561136702752e-06, "loss": 0.3565, "step": 12296 }, { "epoch": 0.5643155431141297, "grad_norm": 0.45036619901657104, "learning_rate": 9.291435318816e-06, "loss": 0.3584, "step": 12297 }, { "epoch": 0.5643614336193842, "grad_norm": 0.4831230342388153, "learning_rate": 9.291309490609701e-06, "loss": 0.4038, "step": 12298 }, { "epoch": 0.5644073241246386, "grad_norm": 0.42990365624427795, "learning_rate": 9.291183652084154e-06, "loss": 0.3265, "step": 12299 }, { "epoch": 0.5644532146298931, "grad_norm": 0.4638040065765381, "learning_rate": 9.291057803239669e-06, "loss": 0.3719, "step": 12300 }, { "epoch": 0.5644991051351476, "grad_norm": 0.46280795335769653, "learning_rate": 9.290931944076542e-06, "loss": 0.387, "step": 12301 }, { "epoch": 0.564544995640402, "grad_norm": 0.464979350566864, "learning_rate": 9.290806074595079e-06, "loss": 0.4265, "step": 12302 }, { "epoch": 0.5645908861456564, "grad_norm": 0.4330803453922272, "learning_rate": 9.29068019479558e-06, "loss": 0.337, "step": 12303 }, { "epoch": 0.5646367766509109, "grad_norm": 0.45030567049980164, "learning_rate": 9.29055430467835e-06, "loss": 0.3736, "step": 12304 }, { "epoch": 0.5646826671561654, "grad_norm": 0.47264572978019714, "learning_rate": 9.290428404243692e-06, "loss": 0.4167, "step": 12305 }, { "epoch": 0.5647285576614198, "grad_norm": 0.5115957260131836, "learning_rate": 9.290302493491907e-06, "loss": 0.4501, "step": 12306 }, { "epoch": 0.5647744481666743, "grad_norm": 0.4238923490047455, "learning_rate": 9.290176572423298e-06, "loss": 0.3377, "step": 12307 }, { "epoch": 0.5648203386719288, "grad_norm": 0.49365487694740295, "learning_rate": 9.29005064103817e-06, "loss": 0.4826, "step": 12308 }, { "epoch": 0.5648662291771832, "grad_norm": 0.4579617977142334, "learning_rate": 9.289924699336823e-06, "loss": 0.3847, "step": 12309 }, { "epoch": 0.5649121196824377, "grad_norm": 0.47378009557724, "learning_rate": 9.289798747319561e-06, "loss": 0.4545, "step": 12310 }, { "epoch": 0.5649580101876922, "grad_norm": 0.4739331901073456, "learning_rate": 9.289672784986688e-06, "loss": 0.4167, "step": 12311 }, { "epoch": 0.5650039006929466, "grad_norm": 0.4547980725765228, "learning_rate": 9.289546812338504e-06, "loss": 0.3684, "step": 12312 }, { "epoch": 0.5650497911982011, "grad_norm": 0.45787227153778076, "learning_rate": 9.289420829375315e-06, "loss": 0.3574, "step": 12313 }, { "epoch": 0.5650956817034556, "grad_norm": 0.4667074382305145, "learning_rate": 9.289294836097423e-06, "loss": 0.4499, "step": 12314 }, { "epoch": 0.56514157220871, "grad_norm": 0.40683817863464355, "learning_rate": 9.28916883250513e-06, "loss": 0.3049, "step": 12315 }, { "epoch": 0.5651874627139645, "grad_norm": 0.43430161476135254, "learning_rate": 9.28904281859874e-06, "loss": 0.3436, "step": 12316 }, { "epoch": 0.565233353219219, "grad_norm": 0.7006710767745972, "learning_rate": 9.288916794378556e-06, "loss": 0.3966, "step": 12317 }, { "epoch": 0.5652792437244734, "grad_norm": 0.47191306948661804, "learning_rate": 9.288790759844879e-06, "loss": 0.3642, "step": 12318 }, { "epoch": 0.5653251342297279, "grad_norm": 0.44052189588546753, "learning_rate": 9.288664714998015e-06, "loss": 0.3523, "step": 12319 }, { "epoch": 0.5653710247349824, "grad_norm": 0.457026869058609, "learning_rate": 9.288538659838266e-06, "loss": 0.3781, "step": 12320 }, { "epoch": 0.5654169152402369, "grad_norm": 0.5153876543045044, "learning_rate": 9.288412594365935e-06, "loss": 0.5315, "step": 12321 }, { "epoch": 0.5654628057454912, "grad_norm": 0.4891815185546875, "learning_rate": 9.288286518581325e-06, "loss": 0.4521, "step": 12322 }, { "epoch": 0.5655086962507457, "grad_norm": 0.4297462999820709, "learning_rate": 9.288160432484739e-06, "loss": 0.3453, "step": 12323 }, { "epoch": 0.5655545867560002, "grad_norm": 0.46707066893577576, "learning_rate": 9.288034336076481e-06, "loss": 0.3803, "step": 12324 }, { "epoch": 0.5656004772612546, "grad_norm": 0.57640141248703, "learning_rate": 9.287908229356854e-06, "loss": 0.3316, "step": 12325 }, { "epoch": 0.5656463677665091, "grad_norm": 0.5167283415794373, "learning_rate": 9.28778211232616e-06, "loss": 0.4538, "step": 12326 }, { "epoch": 0.5656922582717636, "grad_norm": 0.4570002555847168, "learning_rate": 9.287655984984703e-06, "loss": 0.427, "step": 12327 }, { "epoch": 0.565738148777018, "grad_norm": 0.4519749879837036, "learning_rate": 9.287529847332788e-06, "loss": 0.3555, "step": 12328 }, { "epoch": 0.5657840392822725, "grad_norm": 0.43872126936912537, "learning_rate": 9.287403699370717e-06, "loss": 0.3468, "step": 12329 }, { "epoch": 0.565829929787527, "grad_norm": 0.4331133961677551, "learning_rate": 9.287277541098793e-06, "loss": 0.3627, "step": 12330 }, { "epoch": 0.5658758202927814, "grad_norm": 0.44898131489753723, "learning_rate": 9.287151372517319e-06, "loss": 0.3659, "step": 12331 }, { "epoch": 0.5659217107980359, "grad_norm": 0.4406052827835083, "learning_rate": 9.2870251936266e-06, "loss": 0.3557, "step": 12332 }, { "epoch": 0.5659676013032904, "grad_norm": 0.4835537374019623, "learning_rate": 9.286899004426936e-06, "loss": 0.4299, "step": 12333 }, { "epoch": 0.5660134918085448, "grad_norm": 0.43501392006874084, "learning_rate": 9.286772804918636e-06, "loss": 0.321, "step": 12334 }, { "epoch": 0.5660593823137993, "grad_norm": 0.4455309510231018, "learning_rate": 9.286646595101999e-06, "loss": 0.3968, "step": 12335 }, { "epoch": 0.5661052728190538, "grad_norm": 0.4751589596271515, "learning_rate": 9.28652037497733e-06, "loss": 0.3687, "step": 12336 }, { "epoch": 0.5661511633243081, "grad_norm": 0.4375549256801605, "learning_rate": 9.28639414454493e-06, "loss": 0.3732, "step": 12337 }, { "epoch": 0.5661970538295626, "grad_norm": 0.48499125242233276, "learning_rate": 9.286267903805108e-06, "loss": 0.4234, "step": 12338 }, { "epoch": 0.5662429443348171, "grad_norm": 0.4654761254787445, "learning_rate": 9.286141652758165e-06, "loss": 0.3718, "step": 12339 }, { "epoch": 0.5662888348400716, "grad_norm": 0.46959471702575684, "learning_rate": 9.2860153914044e-06, "loss": 0.4556, "step": 12340 }, { "epoch": 0.566334725345326, "grad_norm": 0.4355045258998871, "learning_rate": 9.285889119744125e-06, "loss": 0.3064, "step": 12341 }, { "epoch": 0.5663806158505805, "grad_norm": 0.4347067177295685, "learning_rate": 9.285762837777638e-06, "loss": 0.366, "step": 12342 }, { "epoch": 0.566426506355835, "grad_norm": 0.44626331329345703, "learning_rate": 9.285636545505244e-06, "loss": 0.3823, "step": 12343 }, { "epoch": 0.5664723968610894, "grad_norm": 0.5105581879615784, "learning_rate": 9.285510242927245e-06, "loss": 0.4178, "step": 12344 }, { "epoch": 0.5665182873663439, "grad_norm": 0.4818257987499237, "learning_rate": 9.28538393004395e-06, "loss": 0.4351, "step": 12345 }, { "epoch": 0.5665641778715984, "grad_norm": 0.4729830026626587, "learning_rate": 9.285257606855655e-06, "loss": 0.4205, "step": 12346 }, { "epoch": 0.5666100683768528, "grad_norm": 0.452766478061676, "learning_rate": 9.28513127336267e-06, "loss": 0.4502, "step": 12347 }, { "epoch": 0.5666559588821073, "grad_norm": 0.45837682485580444, "learning_rate": 9.285004929565296e-06, "loss": 0.3804, "step": 12348 }, { "epoch": 0.5667018493873618, "grad_norm": 0.4430794417858124, "learning_rate": 9.28487857546384e-06, "loss": 0.389, "step": 12349 }, { "epoch": 0.5667477398926162, "grad_norm": 0.4108556807041168, "learning_rate": 9.284752211058601e-06, "loss": 0.3445, "step": 12350 }, { "epoch": 0.5667936303978707, "grad_norm": 0.46997344493865967, "learning_rate": 9.284625836349886e-06, "loss": 0.3615, "step": 12351 }, { "epoch": 0.5668395209031252, "grad_norm": 0.4661126434803009, "learning_rate": 9.284499451337997e-06, "loss": 0.4088, "step": 12352 }, { "epoch": 0.5668854114083796, "grad_norm": 0.47118115425109863, "learning_rate": 9.284373056023238e-06, "loss": 0.4273, "step": 12353 }, { "epoch": 0.5669313019136341, "grad_norm": 0.42398905754089355, "learning_rate": 9.284246650405918e-06, "loss": 0.3474, "step": 12354 }, { "epoch": 0.5669771924188886, "grad_norm": 0.4324185252189636, "learning_rate": 9.284120234486334e-06, "loss": 0.3298, "step": 12355 }, { "epoch": 0.5670230829241429, "grad_norm": 0.47996610403060913, "learning_rate": 9.283993808264795e-06, "loss": 0.4485, "step": 12356 }, { "epoch": 0.5670689734293974, "grad_norm": 0.4737033545970917, "learning_rate": 9.283867371741602e-06, "loss": 0.3987, "step": 12357 }, { "epoch": 0.5671148639346519, "grad_norm": 0.4798785150051117, "learning_rate": 9.28374092491706e-06, "loss": 0.4083, "step": 12358 }, { "epoch": 0.5671607544399064, "grad_norm": 0.4558336138725281, "learning_rate": 9.283614467791472e-06, "loss": 0.3991, "step": 12359 }, { "epoch": 0.5672066449451608, "grad_norm": 0.44801995158195496, "learning_rate": 9.283488000365145e-06, "loss": 0.4273, "step": 12360 }, { "epoch": 0.5672525354504153, "grad_norm": 0.46096011996269226, "learning_rate": 9.283361522638378e-06, "loss": 0.3992, "step": 12361 }, { "epoch": 0.5672984259556698, "grad_norm": 0.42707687616348267, "learning_rate": 9.283235034611483e-06, "loss": 0.3199, "step": 12362 }, { "epoch": 0.5673443164609242, "grad_norm": 0.4474504292011261, "learning_rate": 9.283108536284754e-06, "loss": 0.4063, "step": 12363 }, { "epoch": 0.5673902069661787, "grad_norm": 0.43131572008132935, "learning_rate": 9.282982027658505e-06, "loss": 0.355, "step": 12364 }, { "epoch": 0.5674360974714332, "grad_norm": 0.4835481643676758, "learning_rate": 9.282855508733036e-06, "loss": 0.4254, "step": 12365 }, { "epoch": 0.5674819879766876, "grad_norm": 0.4040113389492035, "learning_rate": 9.28272897950865e-06, "loss": 0.3239, "step": 12366 }, { "epoch": 0.5675278784819421, "grad_norm": 0.45294812321662903, "learning_rate": 9.282602439985652e-06, "loss": 0.4037, "step": 12367 }, { "epoch": 0.5675737689871966, "grad_norm": 0.4249016344547272, "learning_rate": 9.282475890164347e-06, "loss": 0.3281, "step": 12368 }, { "epoch": 0.567619659492451, "grad_norm": 0.5042969584465027, "learning_rate": 9.28234933004504e-06, "loss": 0.4873, "step": 12369 }, { "epoch": 0.5676655499977055, "grad_norm": 0.42759576439857483, "learning_rate": 9.282222759628035e-06, "loss": 0.3611, "step": 12370 }, { "epoch": 0.56771144050296, "grad_norm": 0.450784832239151, "learning_rate": 9.282096178913633e-06, "loss": 0.3498, "step": 12371 }, { "epoch": 0.5677573310082143, "grad_norm": 0.5053176879882812, "learning_rate": 9.281969587902144e-06, "loss": 0.4637, "step": 12372 }, { "epoch": 0.5678032215134688, "grad_norm": 0.4744170010089874, "learning_rate": 9.281842986593868e-06, "loss": 0.4533, "step": 12373 }, { "epoch": 0.5678491120187233, "grad_norm": 0.463698148727417, "learning_rate": 9.28171637498911e-06, "loss": 0.3886, "step": 12374 }, { "epoch": 0.5678950025239778, "grad_norm": 0.4810331165790558, "learning_rate": 9.281589753088178e-06, "loss": 0.4165, "step": 12375 }, { "epoch": 0.5679408930292322, "grad_norm": 0.44187161326408386, "learning_rate": 9.281463120891373e-06, "loss": 0.3754, "step": 12376 }, { "epoch": 0.5679867835344867, "grad_norm": 0.4601793885231018, "learning_rate": 9.281336478398998e-06, "loss": 0.4035, "step": 12377 }, { "epoch": 0.5680326740397412, "grad_norm": 0.47379082441329956, "learning_rate": 9.281209825611364e-06, "loss": 0.4469, "step": 12378 }, { "epoch": 0.5680785645449956, "grad_norm": 0.46356967091560364, "learning_rate": 9.28108316252877e-06, "loss": 0.457, "step": 12379 }, { "epoch": 0.5681244550502501, "grad_norm": 0.4848254323005676, "learning_rate": 9.280956489151523e-06, "loss": 0.4114, "step": 12380 }, { "epoch": 0.5681703455555046, "grad_norm": 0.44569462537765503, "learning_rate": 9.280829805479925e-06, "loss": 0.3692, "step": 12381 }, { "epoch": 0.568216236060759, "grad_norm": 0.520349383354187, "learning_rate": 9.280703111514282e-06, "loss": 0.4715, "step": 12382 }, { "epoch": 0.5682621265660135, "grad_norm": 0.4963352382183075, "learning_rate": 9.2805764072549e-06, "loss": 0.4059, "step": 12383 }, { "epoch": 0.568308017071268, "grad_norm": 0.46492689847946167, "learning_rate": 9.280449692702083e-06, "loss": 0.4105, "step": 12384 }, { "epoch": 0.5683539075765224, "grad_norm": 0.4455277919769287, "learning_rate": 9.280322967856136e-06, "loss": 0.3325, "step": 12385 }, { "epoch": 0.5683997980817769, "grad_norm": 0.7298843860626221, "learning_rate": 9.280196232717363e-06, "loss": 0.4832, "step": 12386 }, { "epoch": 0.5684456885870314, "grad_norm": 0.451158732175827, "learning_rate": 9.280069487286067e-06, "loss": 0.3472, "step": 12387 }, { "epoch": 0.5684915790922858, "grad_norm": 0.4695546627044678, "learning_rate": 9.279942731562557e-06, "loss": 0.4215, "step": 12388 }, { "epoch": 0.5685374695975403, "grad_norm": 0.48479726910591125, "learning_rate": 9.279815965547134e-06, "loss": 0.4319, "step": 12389 }, { "epoch": 0.5685833601027948, "grad_norm": 0.510964035987854, "learning_rate": 9.279689189240105e-06, "loss": 0.4641, "step": 12390 }, { "epoch": 0.5686292506080491, "grad_norm": 0.4566155970096588, "learning_rate": 9.279562402641775e-06, "loss": 0.3817, "step": 12391 }, { "epoch": 0.5686751411133036, "grad_norm": 0.44954410195350647, "learning_rate": 9.279435605752446e-06, "loss": 0.3824, "step": 12392 }, { "epoch": 0.5687210316185581, "grad_norm": 0.45285600423812866, "learning_rate": 9.279308798572427e-06, "loss": 0.3757, "step": 12393 }, { "epoch": 0.5687669221238126, "grad_norm": 0.44981521368026733, "learning_rate": 9.27918198110202e-06, "loss": 0.3592, "step": 12394 }, { "epoch": 0.568812812629067, "grad_norm": 0.48487240076065063, "learning_rate": 9.279055153341531e-06, "loss": 0.5208, "step": 12395 }, { "epoch": 0.5688587031343215, "grad_norm": 0.45981818437576294, "learning_rate": 9.278928315291264e-06, "loss": 0.4691, "step": 12396 }, { "epoch": 0.568904593639576, "grad_norm": 0.5117197632789612, "learning_rate": 9.278801466951526e-06, "loss": 0.528, "step": 12397 }, { "epoch": 0.5689504841448304, "grad_norm": 0.4701516330242157, "learning_rate": 9.278674608322622e-06, "loss": 0.4579, "step": 12398 }, { "epoch": 0.5689963746500849, "grad_norm": 0.4414425194263458, "learning_rate": 9.278547739404854e-06, "loss": 0.3324, "step": 12399 }, { "epoch": 0.5690422651553394, "grad_norm": 0.4252696633338928, "learning_rate": 9.27842086019853e-06, "loss": 0.2948, "step": 12400 }, { "epoch": 0.5690881556605938, "grad_norm": 0.5155567526817322, "learning_rate": 9.278293970703954e-06, "loss": 0.5607, "step": 12401 }, { "epoch": 0.5691340461658483, "grad_norm": 0.47085556387901306, "learning_rate": 9.278167070921432e-06, "loss": 0.4165, "step": 12402 }, { "epoch": 0.5691799366711028, "grad_norm": 0.46698322892189026, "learning_rate": 9.278040160851266e-06, "loss": 0.4486, "step": 12403 }, { "epoch": 0.5692258271763572, "grad_norm": 0.4098964333534241, "learning_rate": 9.277913240493765e-06, "loss": 0.3355, "step": 12404 }, { "epoch": 0.5692717176816117, "grad_norm": 0.4780869781970978, "learning_rate": 9.277786309849232e-06, "loss": 0.4979, "step": 12405 }, { "epoch": 0.5693176081868662, "grad_norm": 0.4583963453769684, "learning_rate": 9.277659368917975e-06, "loss": 0.349, "step": 12406 }, { "epoch": 0.5693634986921206, "grad_norm": 0.4263805150985718, "learning_rate": 9.277532417700296e-06, "loss": 0.3427, "step": 12407 }, { "epoch": 0.569409389197375, "grad_norm": 0.6302890777587891, "learning_rate": 9.277405456196503e-06, "loss": 0.4112, "step": 12408 }, { "epoch": 0.5694552797026295, "grad_norm": 0.5228577256202698, "learning_rate": 9.277278484406897e-06, "loss": 0.5185, "step": 12409 }, { "epoch": 0.5695011702078839, "grad_norm": 0.4519951045513153, "learning_rate": 9.277151502331789e-06, "loss": 0.3903, "step": 12410 }, { "epoch": 0.5695470607131384, "grad_norm": 0.5260011553764343, "learning_rate": 9.27702450997148e-06, "loss": 0.5269, "step": 12411 }, { "epoch": 0.5695929512183929, "grad_norm": 0.43695566058158875, "learning_rate": 9.276897507326277e-06, "loss": 0.3653, "step": 12412 }, { "epoch": 0.5696388417236474, "grad_norm": 0.4952985644340515, "learning_rate": 9.276770494396486e-06, "loss": 0.4958, "step": 12413 }, { "epoch": 0.5696847322289018, "grad_norm": 0.4860233962535858, "learning_rate": 9.276643471182412e-06, "loss": 0.4614, "step": 12414 }, { "epoch": 0.5697306227341563, "grad_norm": 0.4407031536102295, "learning_rate": 9.27651643768436e-06, "loss": 0.4207, "step": 12415 }, { "epoch": 0.5697765132394108, "grad_norm": 0.5016089081764221, "learning_rate": 9.276389393902635e-06, "loss": 0.509, "step": 12416 }, { "epoch": 0.5698224037446652, "grad_norm": 0.4353282153606415, "learning_rate": 9.276262339837542e-06, "loss": 0.3723, "step": 12417 }, { "epoch": 0.5698682942499197, "grad_norm": 0.40254485607147217, "learning_rate": 9.27613527548939e-06, "loss": 0.2888, "step": 12418 }, { "epoch": 0.5699141847551742, "grad_norm": 0.42926833033561707, "learning_rate": 9.276008200858482e-06, "loss": 0.3797, "step": 12419 }, { "epoch": 0.5699600752604286, "grad_norm": 0.46133339405059814, "learning_rate": 9.275881115945123e-06, "loss": 0.3847, "step": 12420 }, { "epoch": 0.5700059657656831, "grad_norm": 0.42997005581855774, "learning_rate": 9.275754020749618e-06, "loss": 0.3794, "step": 12421 }, { "epoch": 0.5700518562709376, "grad_norm": 0.4597589373588562, "learning_rate": 9.275626915272277e-06, "loss": 0.4197, "step": 12422 }, { "epoch": 0.570097746776192, "grad_norm": 0.48366090655326843, "learning_rate": 9.275499799513401e-06, "loss": 0.4959, "step": 12423 }, { "epoch": 0.5701436372814465, "grad_norm": 0.47189608216285706, "learning_rate": 9.275372673473299e-06, "loss": 0.4332, "step": 12424 }, { "epoch": 0.570189527786701, "grad_norm": 0.4361773729324341, "learning_rate": 9.275245537152273e-06, "loss": 0.3878, "step": 12425 }, { "epoch": 0.5702354182919553, "grad_norm": 0.4524957537651062, "learning_rate": 9.275118390550632e-06, "loss": 0.4094, "step": 12426 }, { "epoch": 0.5702813087972098, "grad_norm": 0.4407818615436554, "learning_rate": 9.274991233668679e-06, "loss": 0.38, "step": 12427 }, { "epoch": 0.5703271993024643, "grad_norm": 0.4623456597328186, "learning_rate": 9.274864066506723e-06, "loss": 0.462, "step": 12428 }, { "epoch": 0.5703730898077188, "grad_norm": 0.42813539505004883, "learning_rate": 9.274736889065068e-06, "loss": 0.333, "step": 12429 }, { "epoch": 0.5704189803129732, "grad_norm": 0.45001348853111267, "learning_rate": 9.27460970134402e-06, "loss": 0.3637, "step": 12430 }, { "epoch": 0.5704648708182277, "grad_norm": 0.46588921546936035, "learning_rate": 9.274482503343884e-06, "loss": 0.3506, "step": 12431 }, { "epoch": 0.5705107613234822, "grad_norm": 0.4617343842983246, "learning_rate": 9.274355295064968e-06, "loss": 0.4483, "step": 12432 }, { "epoch": 0.5705566518287366, "grad_norm": 0.5144270658493042, "learning_rate": 9.274228076507575e-06, "loss": 0.434, "step": 12433 }, { "epoch": 0.5706025423339911, "grad_norm": 0.4424397051334381, "learning_rate": 9.274100847672011e-06, "loss": 0.3402, "step": 12434 }, { "epoch": 0.5706484328392456, "grad_norm": 0.4536787271499634, "learning_rate": 9.273973608558586e-06, "loss": 0.3404, "step": 12435 }, { "epoch": 0.5706943233445, "grad_norm": 0.5042724013328552, "learning_rate": 9.273846359167603e-06, "loss": 0.4661, "step": 12436 }, { "epoch": 0.5707402138497545, "grad_norm": 0.4644010365009308, "learning_rate": 9.273719099499367e-06, "loss": 0.4422, "step": 12437 }, { "epoch": 0.570786104355009, "grad_norm": 0.495913565158844, "learning_rate": 9.273591829554188e-06, "loss": 0.4226, "step": 12438 }, { "epoch": 0.5708319948602634, "grad_norm": 0.4370101988315582, "learning_rate": 9.273464549332368e-06, "loss": 0.3364, "step": 12439 }, { "epoch": 0.5708778853655179, "grad_norm": 0.46859753131866455, "learning_rate": 9.273337258834215e-06, "loss": 0.5125, "step": 12440 }, { "epoch": 0.5709237758707724, "grad_norm": 0.4720595180988312, "learning_rate": 9.273209958060033e-06, "loss": 0.4716, "step": 12441 }, { "epoch": 0.5709696663760268, "grad_norm": 0.473365843296051, "learning_rate": 9.27308264701013e-06, "loss": 0.4183, "step": 12442 }, { "epoch": 0.5710155568812812, "grad_norm": 0.4700353443622589, "learning_rate": 9.272955325684813e-06, "loss": 0.4227, "step": 12443 }, { "epoch": 0.5710614473865357, "grad_norm": 0.44280537962913513, "learning_rate": 9.272827994084387e-06, "loss": 0.3604, "step": 12444 }, { "epoch": 0.5711073378917901, "grad_norm": 0.4543342590332031, "learning_rate": 9.272700652209157e-06, "loss": 0.4131, "step": 12445 }, { "epoch": 0.5711532283970446, "grad_norm": 0.46248307824134827, "learning_rate": 9.272573300059432e-06, "loss": 0.44, "step": 12446 }, { "epoch": 0.5711991189022991, "grad_norm": 0.494863361120224, "learning_rate": 9.272445937635516e-06, "loss": 0.4419, "step": 12447 }, { "epoch": 0.5712450094075536, "grad_norm": 0.4952270984649658, "learning_rate": 9.272318564937716e-06, "loss": 0.5217, "step": 12448 }, { "epoch": 0.571290899912808, "grad_norm": 0.474663108587265, "learning_rate": 9.272191181966338e-06, "loss": 0.4384, "step": 12449 }, { "epoch": 0.5713367904180625, "grad_norm": 0.4027078449726105, "learning_rate": 9.272063788721687e-06, "loss": 0.3125, "step": 12450 }, { "epoch": 0.571382680923317, "grad_norm": 0.4733777940273285, "learning_rate": 9.271936385204074e-06, "loss": 0.4677, "step": 12451 }, { "epoch": 0.5714285714285714, "grad_norm": 0.4812890589237213, "learning_rate": 9.2718089714138e-06, "loss": 0.4389, "step": 12452 }, { "epoch": 0.5714744619338259, "grad_norm": 0.4387906491756439, "learning_rate": 9.271681547351175e-06, "loss": 0.3193, "step": 12453 }, { "epoch": 0.5715203524390804, "grad_norm": 0.4270033538341522, "learning_rate": 9.271554113016503e-06, "loss": 0.3529, "step": 12454 }, { "epoch": 0.5715662429443348, "grad_norm": 0.45658600330352783, "learning_rate": 9.271426668410092e-06, "loss": 0.3739, "step": 12455 }, { "epoch": 0.5716121334495893, "grad_norm": 0.45512616634368896, "learning_rate": 9.271299213532248e-06, "loss": 0.4447, "step": 12456 }, { "epoch": 0.5716580239548438, "grad_norm": 0.44947123527526855, "learning_rate": 9.271171748383278e-06, "loss": 0.4156, "step": 12457 }, { "epoch": 0.5717039144600982, "grad_norm": 0.4518815875053406, "learning_rate": 9.271044272963488e-06, "loss": 0.3759, "step": 12458 }, { "epoch": 0.5717498049653527, "grad_norm": 0.47350963950157166, "learning_rate": 9.270916787273184e-06, "loss": 0.4153, "step": 12459 }, { "epoch": 0.5717956954706072, "grad_norm": 0.455748975276947, "learning_rate": 9.270789291312672e-06, "loss": 0.3801, "step": 12460 }, { "epoch": 0.5718415859758615, "grad_norm": 0.4458254873752594, "learning_rate": 9.270661785082261e-06, "loss": 0.348, "step": 12461 }, { "epoch": 0.571887476481116, "grad_norm": 0.48263588547706604, "learning_rate": 9.270534268582257e-06, "loss": 0.4613, "step": 12462 }, { "epoch": 0.5719333669863705, "grad_norm": 0.43069809675216675, "learning_rate": 9.270406741812964e-06, "loss": 0.3351, "step": 12463 }, { "epoch": 0.571979257491625, "grad_norm": 0.4114397168159485, "learning_rate": 9.27027920477469e-06, "loss": 0.3238, "step": 12464 }, { "epoch": 0.5720251479968794, "grad_norm": 0.4413287043571472, "learning_rate": 9.270151657467745e-06, "loss": 0.3959, "step": 12465 }, { "epoch": 0.5720710385021339, "grad_norm": 0.45655664801597595, "learning_rate": 9.270024099892432e-06, "loss": 0.441, "step": 12466 }, { "epoch": 0.5721169290073884, "grad_norm": 0.46156468987464905, "learning_rate": 9.269896532049058e-06, "loss": 0.4258, "step": 12467 }, { "epoch": 0.5721628195126428, "grad_norm": 0.4382556676864624, "learning_rate": 9.26976895393793e-06, "loss": 0.4042, "step": 12468 }, { "epoch": 0.5722087100178973, "grad_norm": 0.456933856010437, "learning_rate": 9.269641365559356e-06, "loss": 0.3622, "step": 12469 }, { "epoch": 0.5722546005231518, "grad_norm": 0.5138749480247498, "learning_rate": 9.269513766913643e-06, "loss": 0.4866, "step": 12470 }, { "epoch": 0.5723004910284062, "grad_norm": 0.4266880750656128, "learning_rate": 9.269386158001094e-06, "loss": 0.3173, "step": 12471 }, { "epoch": 0.5723463815336607, "grad_norm": 0.41814228892326355, "learning_rate": 9.269258538822021e-06, "loss": 0.309, "step": 12472 }, { "epoch": 0.5723922720389152, "grad_norm": 0.4905936121940613, "learning_rate": 9.269130909376728e-06, "loss": 0.444, "step": 12473 }, { "epoch": 0.5724381625441696, "grad_norm": 0.4651744067668915, "learning_rate": 9.269003269665524e-06, "loss": 0.4218, "step": 12474 }, { "epoch": 0.5724840530494241, "grad_norm": 0.47762641310691833, "learning_rate": 9.268875619688713e-06, "loss": 0.416, "step": 12475 }, { "epoch": 0.5725299435546786, "grad_norm": 0.46365851163864136, "learning_rate": 9.268747959446605e-06, "loss": 0.3537, "step": 12476 }, { "epoch": 0.572575834059933, "grad_norm": 0.43689680099487305, "learning_rate": 9.268620288939502e-06, "loss": 0.3265, "step": 12477 }, { "epoch": 0.5726217245651875, "grad_norm": 0.492890328168869, "learning_rate": 9.268492608167717e-06, "loss": 0.4517, "step": 12478 }, { "epoch": 0.572667615070442, "grad_norm": 0.4098397195339203, "learning_rate": 9.268364917131555e-06, "loss": 0.3216, "step": 12479 }, { "epoch": 0.5727135055756963, "grad_norm": 0.43102848529815674, "learning_rate": 9.268237215831322e-06, "loss": 0.3347, "step": 12480 }, { "epoch": 0.5727593960809508, "grad_norm": 0.5144219994544983, "learning_rate": 9.268109504267324e-06, "loss": 0.438, "step": 12481 }, { "epoch": 0.5728052865862053, "grad_norm": 0.4524027705192566, "learning_rate": 9.267981782439871e-06, "loss": 0.3992, "step": 12482 }, { "epoch": 0.5728511770914598, "grad_norm": 0.43138790130615234, "learning_rate": 9.267854050349269e-06, "loss": 0.3514, "step": 12483 }, { "epoch": 0.5728970675967142, "grad_norm": 0.43252289295196533, "learning_rate": 9.267726307995824e-06, "loss": 0.3518, "step": 12484 }, { "epoch": 0.5729429581019687, "grad_norm": 0.4815121293067932, "learning_rate": 9.267598555379846e-06, "loss": 0.44, "step": 12485 }, { "epoch": 0.5729888486072232, "grad_norm": 0.45449697971343994, "learning_rate": 9.267470792501639e-06, "loss": 0.4081, "step": 12486 }, { "epoch": 0.5730347391124776, "grad_norm": 0.4681018888950348, "learning_rate": 9.26734301936151e-06, "loss": 0.4436, "step": 12487 }, { "epoch": 0.5730806296177321, "grad_norm": 0.45586466789245605, "learning_rate": 9.26721523595977e-06, "loss": 0.4667, "step": 12488 }, { "epoch": 0.5731265201229866, "grad_norm": 0.48727309703826904, "learning_rate": 9.267087442296725e-06, "loss": 0.5274, "step": 12489 }, { "epoch": 0.573172410628241, "grad_norm": 0.46886223554611206, "learning_rate": 9.266959638372678e-06, "loss": 0.4183, "step": 12490 }, { "epoch": 0.5732183011334955, "grad_norm": 0.4830036163330078, "learning_rate": 9.266831824187942e-06, "loss": 0.4417, "step": 12491 }, { "epoch": 0.57326419163875, "grad_norm": 0.43994027376174927, "learning_rate": 9.266703999742824e-06, "loss": 0.3357, "step": 12492 }, { "epoch": 0.5733100821440044, "grad_norm": 0.45810234546661377, "learning_rate": 9.266576165037627e-06, "loss": 0.3888, "step": 12493 }, { "epoch": 0.5733559726492589, "grad_norm": 0.5088083744049072, "learning_rate": 9.266448320072661e-06, "loss": 0.5074, "step": 12494 }, { "epoch": 0.5734018631545134, "grad_norm": 0.4527965486049652, "learning_rate": 9.266320464848233e-06, "loss": 0.3634, "step": 12495 }, { "epoch": 0.5734477536597677, "grad_norm": 0.4559270739555359, "learning_rate": 9.266192599364653e-06, "loss": 0.4186, "step": 12496 }, { "epoch": 0.5734936441650222, "grad_norm": 0.4693770706653595, "learning_rate": 9.266064723622225e-06, "loss": 0.4186, "step": 12497 }, { "epoch": 0.5735395346702767, "grad_norm": 0.4442390501499176, "learning_rate": 9.265936837621257e-06, "loss": 0.343, "step": 12498 }, { "epoch": 0.5735854251755311, "grad_norm": 0.4576526880264282, "learning_rate": 9.265808941362057e-06, "loss": 0.4029, "step": 12499 }, { "epoch": 0.5736313156807856, "grad_norm": 0.4814698398113251, "learning_rate": 9.265681034844935e-06, "loss": 0.3962, "step": 12500 }, { "epoch": 0.5736772061860401, "grad_norm": 0.45719659328460693, "learning_rate": 9.265553118070194e-06, "loss": 0.4066, "step": 12501 }, { "epoch": 0.5737230966912946, "grad_norm": 0.42948174476623535, "learning_rate": 9.265425191038145e-06, "loss": 0.332, "step": 12502 }, { "epoch": 0.573768987196549, "grad_norm": 0.5138431787490845, "learning_rate": 9.265297253749095e-06, "loss": 0.3634, "step": 12503 }, { "epoch": 0.5738148777018035, "grad_norm": 0.4744110107421875, "learning_rate": 9.26516930620335e-06, "loss": 0.3674, "step": 12504 }, { "epoch": 0.573860768207058, "grad_norm": 0.44775137305259705, "learning_rate": 9.26504134840122e-06, "loss": 0.3854, "step": 12505 }, { "epoch": 0.5739066587123124, "grad_norm": 0.4446086883544922, "learning_rate": 9.264913380343012e-06, "loss": 0.3962, "step": 12506 }, { "epoch": 0.5739525492175669, "grad_norm": 0.486645370721817, "learning_rate": 9.264785402029032e-06, "loss": 0.4352, "step": 12507 }, { "epoch": 0.5739984397228214, "grad_norm": 0.45591336488723755, "learning_rate": 9.26465741345959e-06, "loss": 0.3715, "step": 12508 }, { "epoch": 0.5740443302280758, "grad_norm": 0.43360835313796997, "learning_rate": 9.264529414634993e-06, "loss": 0.2985, "step": 12509 }, { "epoch": 0.5740902207333303, "grad_norm": 0.4525274634361267, "learning_rate": 9.26440140555555e-06, "loss": 0.3746, "step": 12510 }, { "epoch": 0.5741361112385848, "grad_norm": 0.4483006000518799, "learning_rate": 9.264273386221565e-06, "loss": 0.3833, "step": 12511 }, { "epoch": 0.5741820017438392, "grad_norm": 0.5403188467025757, "learning_rate": 9.264145356633349e-06, "loss": 0.444, "step": 12512 }, { "epoch": 0.5742278922490937, "grad_norm": 0.4745635390281677, "learning_rate": 9.26401731679121e-06, "loss": 0.4284, "step": 12513 }, { "epoch": 0.5742737827543481, "grad_norm": 0.4384280741214752, "learning_rate": 9.263889266695455e-06, "loss": 0.3585, "step": 12514 }, { "epoch": 0.5743196732596025, "grad_norm": 0.477106511592865, "learning_rate": 9.263761206346392e-06, "loss": 0.4046, "step": 12515 }, { "epoch": 0.574365563764857, "grad_norm": 0.4570589065551758, "learning_rate": 9.263633135744327e-06, "loss": 0.4352, "step": 12516 }, { "epoch": 0.5744114542701115, "grad_norm": 0.47551780939102173, "learning_rate": 9.263505054889572e-06, "loss": 0.3888, "step": 12517 }, { "epoch": 0.574457344775366, "grad_norm": 0.4811258316040039, "learning_rate": 9.263376963782434e-06, "loss": 0.4461, "step": 12518 }, { "epoch": 0.5745032352806204, "grad_norm": 0.42558398842811584, "learning_rate": 9.263248862423217e-06, "loss": 0.3786, "step": 12519 }, { "epoch": 0.5745491257858749, "grad_norm": 0.44696101546287537, "learning_rate": 9.263120750812236e-06, "loss": 0.426, "step": 12520 }, { "epoch": 0.5745950162911294, "grad_norm": 0.46655941009521484, "learning_rate": 9.262992628949792e-06, "loss": 0.437, "step": 12521 }, { "epoch": 0.5746409067963838, "grad_norm": 0.43725064396858215, "learning_rate": 9.262864496836197e-06, "loss": 0.3533, "step": 12522 }, { "epoch": 0.5746867973016383, "grad_norm": 0.42130446434020996, "learning_rate": 9.262736354471757e-06, "loss": 0.3299, "step": 12523 }, { "epoch": 0.5747326878068928, "grad_norm": 0.4209478497505188, "learning_rate": 9.262608201856783e-06, "loss": 0.3498, "step": 12524 }, { "epoch": 0.5747785783121472, "grad_norm": 0.4715080261230469, "learning_rate": 9.262480038991581e-06, "loss": 0.4646, "step": 12525 }, { "epoch": 0.5748244688174017, "grad_norm": 0.5360161662101746, "learning_rate": 9.26235186587646e-06, "loss": 0.3933, "step": 12526 }, { "epoch": 0.5748703593226562, "grad_norm": 0.42833325266838074, "learning_rate": 9.26222368251173e-06, "loss": 0.3197, "step": 12527 }, { "epoch": 0.5749162498279106, "grad_norm": 0.4850447475910187, "learning_rate": 9.262095488897694e-06, "loss": 0.4705, "step": 12528 }, { "epoch": 0.5749621403331651, "grad_norm": 0.4339088201522827, "learning_rate": 9.261967285034663e-06, "loss": 0.3984, "step": 12529 }, { "epoch": 0.5750080308384196, "grad_norm": 0.47556325793266296, "learning_rate": 9.26183907092295e-06, "loss": 0.4502, "step": 12530 }, { "epoch": 0.575053921343674, "grad_norm": 0.42540550231933594, "learning_rate": 9.261710846562855e-06, "loss": 0.3495, "step": 12531 }, { "epoch": 0.5750998118489284, "grad_norm": 0.4327041506767273, "learning_rate": 9.261582611954691e-06, "loss": 0.3361, "step": 12532 }, { "epoch": 0.5751457023541829, "grad_norm": 0.4750451147556305, "learning_rate": 9.261454367098767e-06, "loss": 0.4151, "step": 12533 }, { "epoch": 0.5751915928594373, "grad_norm": 0.47268441319465637, "learning_rate": 9.261326111995391e-06, "loss": 0.44, "step": 12534 }, { "epoch": 0.5752374833646918, "grad_norm": 0.44058722257614136, "learning_rate": 9.26119784664487e-06, "loss": 0.3601, "step": 12535 }, { "epoch": 0.5752833738699463, "grad_norm": 0.45171108841896057, "learning_rate": 9.261069571047513e-06, "loss": 0.3832, "step": 12536 }, { "epoch": 0.5753292643752008, "grad_norm": 0.47940173745155334, "learning_rate": 9.260941285203627e-06, "loss": 0.407, "step": 12537 }, { "epoch": 0.5753751548804552, "grad_norm": 0.42961037158966064, "learning_rate": 9.260812989113523e-06, "loss": 0.3125, "step": 12538 }, { "epoch": 0.5754210453857097, "grad_norm": 0.46799832582473755, "learning_rate": 9.26068468277751e-06, "loss": 0.4577, "step": 12539 }, { "epoch": 0.5754669358909642, "grad_norm": 0.4417872130870819, "learning_rate": 9.260556366195893e-06, "loss": 0.3986, "step": 12540 }, { "epoch": 0.5755128263962186, "grad_norm": 0.44156181812286377, "learning_rate": 9.260428039368982e-06, "loss": 0.4196, "step": 12541 }, { "epoch": 0.5755587169014731, "grad_norm": 0.4396057426929474, "learning_rate": 9.26029970229709e-06, "loss": 0.3349, "step": 12542 }, { "epoch": 0.5756046074067276, "grad_norm": 0.45526188611984253, "learning_rate": 9.260171354980518e-06, "loss": 0.3734, "step": 12543 }, { "epoch": 0.575650497911982, "grad_norm": 0.44120582938194275, "learning_rate": 9.26004299741958e-06, "loss": 0.341, "step": 12544 }, { "epoch": 0.5756963884172365, "grad_norm": 0.4317726194858551, "learning_rate": 9.259914629614583e-06, "loss": 0.335, "step": 12545 }, { "epoch": 0.575742278922491, "grad_norm": 0.49047619104385376, "learning_rate": 9.259786251565835e-06, "loss": 0.4674, "step": 12546 }, { "epoch": 0.5757881694277454, "grad_norm": 0.42871949076652527, "learning_rate": 9.259657863273648e-06, "loss": 0.3294, "step": 12547 }, { "epoch": 0.5758340599329999, "grad_norm": 0.4375298321247101, "learning_rate": 9.259529464738326e-06, "loss": 0.3966, "step": 12548 }, { "epoch": 0.5758799504382544, "grad_norm": 0.4428410232067108, "learning_rate": 9.25940105596018e-06, "loss": 0.3188, "step": 12549 }, { "epoch": 0.5759258409435087, "grad_norm": 0.46383482217788696, "learning_rate": 9.25927263693952e-06, "loss": 0.4116, "step": 12550 }, { "epoch": 0.5759717314487632, "grad_norm": 0.49060237407684326, "learning_rate": 9.25914420767665e-06, "loss": 0.4981, "step": 12551 }, { "epoch": 0.5760176219540177, "grad_norm": 0.4574492871761322, "learning_rate": 9.259015768171886e-06, "loss": 0.39, "step": 12552 }, { "epoch": 0.5760635124592722, "grad_norm": 0.40163856744766235, "learning_rate": 9.258887318425533e-06, "loss": 0.2927, "step": 12553 }, { "epoch": 0.5761094029645266, "grad_norm": 0.4383412301540375, "learning_rate": 9.258758858437899e-06, "loss": 0.378, "step": 12554 }, { "epoch": 0.5761552934697811, "grad_norm": 0.44665706157684326, "learning_rate": 9.258630388209296e-06, "loss": 0.3741, "step": 12555 }, { "epoch": 0.5762011839750356, "grad_norm": 0.4523334205150604, "learning_rate": 9.25850190774003e-06, "loss": 0.3845, "step": 12556 }, { "epoch": 0.57624707448029, "grad_norm": 0.5325130820274353, "learning_rate": 9.258373417030409e-06, "loss": 0.5737, "step": 12557 }, { "epoch": 0.5762929649855445, "grad_norm": 0.4551815390586853, "learning_rate": 9.258244916080745e-06, "loss": 0.4092, "step": 12558 }, { "epoch": 0.576338855490799, "grad_norm": 0.442103773355484, "learning_rate": 9.258116404891346e-06, "loss": 0.3817, "step": 12559 }, { "epoch": 0.5763847459960534, "grad_norm": 0.5180836915969849, "learning_rate": 9.257987883462523e-06, "loss": 0.3605, "step": 12560 }, { "epoch": 0.5764306365013079, "grad_norm": 0.4852864146232605, "learning_rate": 9.25785935179458e-06, "loss": 0.5063, "step": 12561 }, { "epoch": 0.5764765270065624, "grad_norm": 0.4747324287891388, "learning_rate": 9.25773080988783e-06, "loss": 0.4429, "step": 12562 }, { "epoch": 0.5765224175118168, "grad_norm": 0.44937577843666077, "learning_rate": 9.257602257742581e-06, "loss": 0.4234, "step": 12563 }, { "epoch": 0.5765683080170713, "grad_norm": 0.4900214672088623, "learning_rate": 9.257473695359145e-06, "loss": 0.4454, "step": 12564 }, { "epoch": 0.5766141985223258, "grad_norm": 0.455264151096344, "learning_rate": 9.257345122737827e-06, "loss": 0.3906, "step": 12565 }, { "epoch": 0.5766600890275801, "grad_norm": 0.43089550733566284, "learning_rate": 9.257216539878936e-06, "loss": 0.3673, "step": 12566 }, { "epoch": 0.5767059795328346, "grad_norm": 0.41970348358154297, "learning_rate": 9.257087946782784e-06, "loss": 0.3512, "step": 12567 }, { "epoch": 0.5767518700380891, "grad_norm": 0.5235967040061951, "learning_rate": 9.256959343449679e-06, "loss": 0.5105, "step": 12568 }, { "epoch": 0.5767977605433435, "grad_norm": 0.460002064704895, "learning_rate": 9.25683072987993e-06, "loss": 0.4157, "step": 12569 }, { "epoch": 0.576843651048598, "grad_norm": 0.4280596077442169, "learning_rate": 9.256702106073847e-06, "loss": 0.3994, "step": 12570 }, { "epoch": 0.5768895415538525, "grad_norm": 0.5014805197715759, "learning_rate": 9.256573472031737e-06, "loss": 0.4735, "step": 12571 }, { "epoch": 0.576935432059107, "grad_norm": 0.4504304528236389, "learning_rate": 9.256444827753914e-06, "loss": 0.376, "step": 12572 }, { "epoch": 0.5769813225643614, "grad_norm": 0.47284138202667236, "learning_rate": 9.256316173240683e-06, "loss": 0.4107, "step": 12573 }, { "epoch": 0.5770272130696159, "grad_norm": 0.4578664302825928, "learning_rate": 9.256187508492356e-06, "loss": 0.3668, "step": 12574 }, { "epoch": 0.5770731035748704, "grad_norm": 0.4515513777732849, "learning_rate": 9.25605883350924e-06, "loss": 0.3592, "step": 12575 }, { "epoch": 0.5771189940801248, "grad_norm": 0.48279306292533875, "learning_rate": 9.255930148291646e-06, "loss": 0.4758, "step": 12576 }, { "epoch": 0.5771648845853793, "grad_norm": 0.4338676929473877, "learning_rate": 9.255801452839885e-06, "loss": 0.3521, "step": 12577 }, { "epoch": 0.5772107750906338, "grad_norm": 0.43797609210014343, "learning_rate": 9.255672747154263e-06, "loss": 0.3461, "step": 12578 }, { "epoch": 0.5772566655958882, "grad_norm": 0.4690699577331543, "learning_rate": 9.25554403123509e-06, "loss": 0.4216, "step": 12579 }, { "epoch": 0.5773025561011427, "grad_norm": 0.447279155254364, "learning_rate": 9.255415305082681e-06, "loss": 0.4089, "step": 12580 }, { "epoch": 0.5773484466063972, "grad_norm": 0.45294392108917236, "learning_rate": 9.255286568697337e-06, "loss": 0.4177, "step": 12581 }, { "epoch": 0.5773943371116516, "grad_norm": 0.4197905361652374, "learning_rate": 9.255157822079373e-06, "loss": 0.3395, "step": 12582 }, { "epoch": 0.577440227616906, "grad_norm": 0.45428162813186646, "learning_rate": 9.255029065229097e-06, "loss": 0.4472, "step": 12583 }, { "epoch": 0.5774861181221606, "grad_norm": 0.44238606095314026, "learning_rate": 9.25490029814682e-06, "loss": 0.3714, "step": 12584 }, { "epoch": 0.5775320086274149, "grad_norm": 0.4275534451007843, "learning_rate": 9.25477152083285e-06, "loss": 0.3461, "step": 12585 }, { "epoch": 0.5775778991326694, "grad_norm": 0.45596247911453247, "learning_rate": 9.254642733287498e-06, "loss": 0.4239, "step": 12586 }, { "epoch": 0.5776237896379239, "grad_norm": 0.4034506678581238, "learning_rate": 9.254513935511073e-06, "loss": 0.2921, "step": 12587 }, { "epoch": 0.5776696801431783, "grad_norm": 0.46780630946159363, "learning_rate": 9.254385127503884e-06, "loss": 0.4069, "step": 12588 }, { "epoch": 0.5777155706484328, "grad_norm": 0.432293176651001, "learning_rate": 9.254256309266242e-06, "loss": 0.3323, "step": 12589 }, { "epoch": 0.5777614611536873, "grad_norm": 0.43386217951774597, "learning_rate": 9.254127480798456e-06, "loss": 0.3912, "step": 12590 }, { "epoch": 0.5778073516589418, "grad_norm": 0.4603815972805023, "learning_rate": 9.253998642100837e-06, "loss": 0.4116, "step": 12591 }, { "epoch": 0.5778532421641962, "grad_norm": 0.43377038836479187, "learning_rate": 9.253869793173693e-06, "loss": 0.3266, "step": 12592 }, { "epoch": 0.5778991326694507, "grad_norm": 0.42841583490371704, "learning_rate": 9.253740934017334e-06, "loss": 0.3766, "step": 12593 }, { "epoch": 0.5779450231747052, "grad_norm": 0.4495267868041992, "learning_rate": 9.253612064632073e-06, "loss": 0.3488, "step": 12594 }, { "epoch": 0.5779909136799596, "grad_norm": 0.4960354268550873, "learning_rate": 9.253483185018215e-06, "loss": 0.4806, "step": 12595 }, { "epoch": 0.5780368041852141, "grad_norm": 0.43426960706710815, "learning_rate": 9.253354295176074e-06, "loss": 0.3523, "step": 12596 }, { "epoch": 0.5780826946904686, "grad_norm": 0.47681307792663574, "learning_rate": 9.253225395105957e-06, "loss": 0.4644, "step": 12597 }, { "epoch": 0.578128585195723, "grad_norm": 0.47388502955436707, "learning_rate": 9.253096484808174e-06, "loss": 0.4434, "step": 12598 }, { "epoch": 0.5781744757009775, "grad_norm": 0.4419380724430084, "learning_rate": 9.25296756428304e-06, "loss": 0.4233, "step": 12599 }, { "epoch": 0.578220366206232, "grad_norm": 0.42498525977134705, "learning_rate": 9.252838633530859e-06, "loss": 0.364, "step": 12600 }, { "epoch": 0.5782662567114863, "grad_norm": 0.4250074028968811, "learning_rate": 9.252709692551944e-06, "loss": 0.3456, "step": 12601 }, { "epoch": 0.5783121472167408, "grad_norm": 0.44671469926834106, "learning_rate": 9.252580741346604e-06, "loss": 0.3926, "step": 12602 }, { "epoch": 0.5783580377219953, "grad_norm": 1.0606437921524048, "learning_rate": 9.25245177991515e-06, "loss": 0.416, "step": 12603 }, { "epoch": 0.5784039282272497, "grad_norm": 1.2155812978744507, "learning_rate": 9.252322808257891e-06, "loss": 0.4173, "step": 12604 }, { "epoch": 0.5784498187325042, "grad_norm": 0.45152273774147034, "learning_rate": 9.252193826375137e-06, "loss": 0.3432, "step": 12605 }, { "epoch": 0.5784957092377587, "grad_norm": 0.4285723567008972, "learning_rate": 9.2520648342672e-06, "loss": 0.3253, "step": 12606 }, { "epoch": 0.5785415997430132, "grad_norm": 0.47818103432655334, "learning_rate": 9.25193583193439e-06, "loss": 0.42, "step": 12607 }, { "epoch": 0.5785874902482676, "grad_norm": 0.47493234276771545, "learning_rate": 9.251806819377013e-06, "loss": 0.4245, "step": 12608 }, { "epoch": 0.5786333807535221, "grad_norm": 0.45272698998451233, "learning_rate": 9.251677796595386e-06, "loss": 0.3802, "step": 12609 }, { "epoch": 0.5786792712587766, "grad_norm": 0.48176708817481995, "learning_rate": 9.251548763589815e-06, "loss": 0.4999, "step": 12610 }, { "epoch": 0.578725161764031, "grad_norm": 0.44258081912994385, "learning_rate": 9.251419720360611e-06, "loss": 0.3405, "step": 12611 }, { "epoch": 0.5787710522692855, "grad_norm": 0.5007138252258301, "learning_rate": 9.251290666908082e-06, "loss": 0.5337, "step": 12612 }, { "epoch": 0.57881694277454, "grad_norm": 0.44774025678634644, "learning_rate": 9.251161603232544e-06, "loss": 0.4167, "step": 12613 }, { "epoch": 0.5788628332797944, "grad_norm": 0.4723808169364929, "learning_rate": 9.2510325293343e-06, "loss": 0.4288, "step": 12614 }, { "epoch": 0.5789087237850489, "grad_norm": 0.5099866986274719, "learning_rate": 9.250903445213668e-06, "loss": 0.394, "step": 12615 }, { "epoch": 0.5789546142903034, "grad_norm": 0.4534531235694885, "learning_rate": 9.250774350870954e-06, "loss": 0.3379, "step": 12616 }, { "epoch": 0.5790005047955578, "grad_norm": 0.4622512459754944, "learning_rate": 9.250645246306468e-06, "loss": 0.4141, "step": 12617 }, { "epoch": 0.5790463953008123, "grad_norm": 0.4934578537940979, "learning_rate": 9.250516131520525e-06, "loss": 0.4415, "step": 12618 }, { "epoch": 0.5790922858060668, "grad_norm": 0.4687318801879883, "learning_rate": 9.250387006513429e-06, "loss": 0.4715, "step": 12619 }, { "epoch": 0.5791381763113211, "grad_norm": 0.45984745025634766, "learning_rate": 9.250257871285495e-06, "loss": 0.4028, "step": 12620 }, { "epoch": 0.5791840668165756, "grad_norm": 0.4395900368690491, "learning_rate": 9.250128725837032e-06, "loss": 0.3503, "step": 12621 }, { "epoch": 0.5792299573218301, "grad_norm": 0.553170919418335, "learning_rate": 9.249999570168351e-06, "loss": 0.523, "step": 12622 }, { "epoch": 0.5792758478270845, "grad_norm": 0.44351068139076233, "learning_rate": 9.249870404279761e-06, "loss": 0.3423, "step": 12623 }, { "epoch": 0.579321738332339, "grad_norm": 0.4908098578453064, "learning_rate": 9.249741228171576e-06, "loss": 0.4551, "step": 12624 }, { "epoch": 0.5793676288375935, "grad_norm": 0.44533997774124146, "learning_rate": 9.249612041844103e-06, "loss": 0.3868, "step": 12625 }, { "epoch": 0.579413519342848, "grad_norm": 0.4501284956932068, "learning_rate": 9.249482845297655e-06, "loss": 0.4242, "step": 12626 }, { "epoch": 0.5794594098481024, "grad_norm": 0.48862409591674805, "learning_rate": 9.249353638532541e-06, "loss": 0.4189, "step": 12627 }, { "epoch": 0.5795053003533569, "grad_norm": 0.4677802324295044, "learning_rate": 9.249224421549074e-06, "loss": 0.4245, "step": 12628 }, { "epoch": 0.5795511908586114, "grad_norm": 0.4659879207611084, "learning_rate": 9.249095194347563e-06, "loss": 0.377, "step": 12629 }, { "epoch": 0.5795970813638658, "grad_norm": 0.434348464012146, "learning_rate": 9.24896595692832e-06, "loss": 0.3269, "step": 12630 }, { "epoch": 0.5796429718691203, "grad_norm": 0.47228026390075684, "learning_rate": 9.248836709291653e-06, "loss": 0.4207, "step": 12631 }, { "epoch": 0.5796888623743748, "grad_norm": 0.4434499144554138, "learning_rate": 9.248707451437875e-06, "loss": 0.3502, "step": 12632 }, { "epoch": 0.5797347528796292, "grad_norm": 0.41379526257514954, "learning_rate": 9.248578183367298e-06, "loss": 0.3284, "step": 12633 }, { "epoch": 0.5797806433848837, "grad_norm": 0.4771484434604645, "learning_rate": 9.24844890508023e-06, "loss": 0.4753, "step": 12634 }, { "epoch": 0.5798265338901382, "grad_norm": 0.4715487062931061, "learning_rate": 9.248319616576984e-06, "loss": 0.4184, "step": 12635 }, { "epoch": 0.5798724243953925, "grad_norm": 0.4768238067626953, "learning_rate": 9.24819031785787e-06, "loss": 0.4328, "step": 12636 }, { "epoch": 0.579918314900647, "grad_norm": 0.4236830472946167, "learning_rate": 9.2480610089232e-06, "loss": 0.371, "step": 12637 }, { "epoch": 0.5799642054059015, "grad_norm": 0.4577537775039673, "learning_rate": 9.247931689773281e-06, "loss": 0.3863, "step": 12638 }, { "epoch": 0.5800100959111559, "grad_norm": 0.43757155537605286, "learning_rate": 9.247802360408428e-06, "loss": 0.341, "step": 12639 }, { "epoch": 0.5800559864164104, "grad_norm": 0.42257827520370483, "learning_rate": 9.247673020828953e-06, "loss": 0.3469, "step": 12640 }, { "epoch": 0.5801018769216649, "grad_norm": 0.4678580164909363, "learning_rate": 9.247543671035164e-06, "loss": 0.3983, "step": 12641 }, { "epoch": 0.5801477674269194, "grad_norm": 0.49686145782470703, "learning_rate": 9.247414311027372e-06, "loss": 0.4309, "step": 12642 }, { "epoch": 0.5801936579321738, "grad_norm": 0.4756784737110138, "learning_rate": 9.24728494080589e-06, "loss": 0.3999, "step": 12643 }, { "epoch": 0.5802395484374283, "grad_norm": 0.4505109488964081, "learning_rate": 9.247155560371028e-06, "loss": 0.3575, "step": 12644 }, { "epoch": 0.5802854389426828, "grad_norm": 0.4537818729877472, "learning_rate": 9.247026169723098e-06, "loss": 0.3964, "step": 12645 }, { "epoch": 0.5803313294479372, "grad_norm": 0.4446287155151367, "learning_rate": 9.24689676886241e-06, "loss": 0.3825, "step": 12646 }, { "epoch": 0.5803772199531917, "grad_norm": 0.4738503396511078, "learning_rate": 9.246767357789274e-06, "loss": 0.3932, "step": 12647 }, { "epoch": 0.5804231104584462, "grad_norm": 0.4947744905948639, "learning_rate": 9.246637936504006e-06, "loss": 0.4513, "step": 12648 }, { "epoch": 0.5804690009637006, "grad_norm": 0.4701147973537445, "learning_rate": 9.246508505006912e-06, "loss": 0.3979, "step": 12649 }, { "epoch": 0.5805148914689551, "grad_norm": 0.5339137315750122, "learning_rate": 9.246379063298306e-06, "loss": 0.5207, "step": 12650 }, { "epoch": 0.5805607819742096, "grad_norm": 0.48089513182640076, "learning_rate": 9.246249611378497e-06, "loss": 0.4652, "step": 12651 }, { "epoch": 0.580606672479464, "grad_norm": 0.42181867361068726, "learning_rate": 9.246120149247799e-06, "loss": 0.3223, "step": 12652 }, { "epoch": 0.5806525629847185, "grad_norm": 0.5170144438743591, "learning_rate": 9.245990676906522e-06, "loss": 0.5261, "step": 12653 }, { "epoch": 0.580698453489973, "grad_norm": 0.5422033071517944, "learning_rate": 9.24586119435498e-06, "loss": 0.5302, "step": 12654 }, { "epoch": 0.5807443439952273, "grad_norm": 0.4499092996120453, "learning_rate": 9.245731701593477e-06, "loss": 0.4105, "step": 12655 }, { "epoch": 0.5807902345004818, "grad_norm": 0.43707072734832764, "learning_rate": 9.245602198622332e-06, "loss": 0.3927, "step": 12656 }, { "epoch": 0.5808361250057363, "grad_norm": 0.46806856989860535, "learning_rate": 9.245472685441854e-06, "loss": 0.4405, "step": 12657 }, { "epoch": 0.5808820155109907, "grad_norm": 0.47955837845802307, "learning_rate": 9.245343162052353e-06, "loss": 0.4913, "step": 12658 }, { "epoch": 0.5809279060162452, "grad_norm": 0.4481831192970276, "learning_rate": 9.245213628454143e-06, "loss": 0.4079, "step": 12659 }, { "epoch": 0.5809737965214997, "grad_norm": 0.500677764415741, "learning_rate": 9.245084084647533e-06, "loss": 0.4098, "step": 12660 }, { "epoch": 0.5810196870267542, "grad_norm": 0.6144744753837585, "learning_rate": 9.244954530632836e-06, "loss": 0.4519, "step": 12661 }, { "epoch": 0.5810655775320086, "grad_norm": 0.44593825936317444, "learning_rate": 9.244824966410363e-06, "loss": 0.3581, "step": 12662 }, { "epoch": 0.5811114680372631, "grad_norm": 0.4719686210155487, "learning_rate": 9.244695391980425e-06, "loss": 0.4126, "step": 12663 }, { "epoch": 0.5811573585425176, "grad_norm": 0.487368106842041, "learning_rate": 9.244565807343335e-06, "loss": 0.4748, "step": 12664 }, { "epoch": 0.581203249047772, "grad_norm": 0.4862326383590698, "learning_rate": 9.244436212499403e-06, "loss": 0.4145, "step": 12665 }, { "epoch": 0.5812491395530265, "grad_norm": 0.4453006982803345, "learning_rate": 9.244306607448943e-06, "loss": 0.3778, "step": 12666 }, { "epoch": 0.581295030058281, "grad_norm": 0.48194485902786255, "learning_rate": 9.244176992192265e-06, "loss": 0.4246, "step": 12667 }, { "epoch": 0.5813409205635354, "grad_norm": 0.4460377097129822, "learning_rate": 9.244047366729677e-06, "loss": 0.3984, "step": 12668 }, { "epoch": 0.5813868110687899, "grad_norm": 0.4344809353351593, "learning_rate": 9.243917731061499e-06, "loss": 0.3413, "step": 12669 }, { "epoch": 0.5814327015740444, "grad_norm": 0.4771958589553833, "learning_rate": 9.243788085188036e-06, "loss": 0.4356, "step": 12670 }, { "epoch": 0.5814785920792988, "grad_norm": 0.4335649013519287, "learning_rate": 9.243658429109604e-06, "loss": 0.4024, "step": 12671 }, { "epoch": 0.5815244825845532, "grad_norm": 0.4528760313987732, "learning_rate": 9.243528762826512e-06, "loss": 0.3225, "step": 12672 }, { "epoch": 0.5815703730898077, "grad_norm": 0.48238903284072876, "learning_rate": 9.243399086339074e-06, "loss": 0.4182, "step": 12673 }, { "epoch": 0.5816162635950621, "grad_norm": 0.46099919080734253, "learning_rate": 9.243269399647597e-06, "loss": 0.4123, "step": 12674 }, { "epoch": 0.5816621541003166, "grad_norm": 0.5078523755073547, "learning_rate": 9.243139702752399e-06, "loss": 0.5051, "step": 12675 }, { "epoch": 0.5817080446055711, "grad_norm": 0.47520917654037476, "learning_rate": 9.243009995653788e-06, "loss": 0.4133, "step": 12676 }, { "epoch": 0.5817539351108255, "grad_norm": 0.46814873814582825, "learning_rate": 9.242880278352078e-06, "loss": 0.4237, "step": 12677 }, { "epoch": 0.58179982561608, "grad_norm": 0.42546555399894714, "learning_rate": 9.24275055084758e-06, "loss": 0.3612, "step": 12678 }, { "epoch": 0.5818457161213345, "grad_norm": 0.4584414064884186, "learning_rate": 9.242620813140604e-06, "loss": 0.3781, "step": 12679 }, { "epoch": 0.581891606626589, "grad_norm": 0.477372407913208, "learning_rate": 9.242491065231465e-06, "loss": 0.4519, "step": 12680 }, { "epoch": 0.5819374971318434, "grad_norm": 0.46654728055000305, "learning_rate": 9.242361307120475e-06, "loss": 0.4435, "step": 12681 }, { "epoch": 0.5819833876370979, "grad_norm": 0.4543529152870178, "learning_rate": 9.242231538807945e-06, "loss": 0.3819, "step": 12682 }, { "epoch": 0.5820292781423524, "grad_norm": 0.47681164741516113, "learning_rate": 9.242101760294185e-06, "loss": 0.4223, "step": 12683 }, { "epoch": 0.5820751686476068, "grad_norm": 0.4439181387424469, "learning_rate": 9.241971971579511e-06, "loss": 0.3605, "step": 12684 }, { "epoch": 0.5821210591528613, "grad_norm": 0.446836918592453, "learning_rate": 9.241842172664232e-06, "loss": 0.3426, "step": 12685 }, { "epoch": 0.5821669496581158, "grad_norm": 1.3848943710327148, "learning_rate": 9.241712363548663e-06, "loss": 0.5195, "step": 12686 }, { "epoch": 0.5822128401633702, "grad_norm": 0.5124891400337219, "learning_rate": 9.241582544233113e-06, "loss": 0.4932, "step": 12687 }, { "epoch": 0.5822587306686247, "grad_norm": 0.430666983127594, "learning_rate": 9.241452714717895e-06, "loss": 0.3759, "step": 12688 }, { "epoch": 0.5823046211738792, "grad_norm": 0.49588513374328613, "learning_rate": 9.241322875003325e-06, "loss": 0.4262, "step": 12689 }, { "epoch": 0.5823505116791335, "grad_norm": 0.43820464611053467, "learning_rate": 9.24119302508971e-06, "loss": 0.359, "step": 12690 }, { "epoch": 0.582396402184388, "grad_norm": 0.4562884569168091, "learning_rate": 9.241063164977363e-06, "loss": 0.4009, "step": 12691 }, { "epoch": 0.5824422926896425, "grad_norm": 0.47813984751701355, "learning_rate": 9.2409332946666e-06, "loss": 0.434, "step": 12692 }, { "epoch": 0.5824881831948969, "grad_norm": 1.996543288230896, "learning_rate": 9.24080341415773e-06, "loss": 0.5269, "step": 12693 }, { "epoch": 0.5825340737001514, "grad_norm": 0.5161787867546082, "learning_rate": 9.240673523451066e-06, "loss": 0.4896, "step": 12694 }, { "epoch": 0.5825799642054059, "grad_norm": 0.4490312933921814, "learning_rate": 9.24054362254692e-06, "loss": 0.3964, "step": 12695 }, { "epoch": 0.5826258547106604, "grad_norm": 0.492544949054718, "learning_rate": 9.240413711445606e-06, "loss": 0.4414, "step": 12696 }, { "epoch": 0.5826717452159148, "grad_norm": 0.46340063214302063, "learning_rate": 9.240283790147435e-06, "loss": 0.3439, "step": 12697 }, { "epoch": 0.5827176357211693, "grad_norm": 0.5094013214111328, "learning_rate": 9.240153858652721e-06, "loss": 0.4496, "step": 12698 }, { "epoch": 0.5827635262264238, "grad_norm": 0.5043747425079346, "learning_rate": 9.240023916961774e-06, "loss": 0.3981, "step": 12699 }, { "epoch": 0.5828094167316782, "grad_norm": 0.4240081012248993, "learning_rate": 9.239893965074907e-06, "loss": 0.3532, "step": 12700 }, { "epoch": 0.5828553072369327, "grad_norm": 0.637977659702301, "learning_rate": 9.239764002992433e-06, "loss": 0.5507, "step": 12701 }, { "epoch": 0.5829011977421872, "grad_norm": 0.4460325241088867, "learning_rate": 9.239634030714667e-06, "loss": 0.3631, "step": 12702 }, { "epoch": 0.5829470882474416, "grad_norm": 0.5296462178230286, "learning_rate": 9.239504048241916e-06, "loss": 0.4795, "step": 12703 }, { "epoch": 0.5829929787526961, "grad_norm": 0.511194109916687, "learning_rate": 9.239374055574498e-06, "loss": 0.3773, "step": 12704 }, { "epoch": 0.5830388692579506, "grad_norm": 0.4857388436794281, "learning_rate": 9.239244052712724e-06, "loss": 0.3669, "step": 12705 }, { "epoch": 0.583084759763205, "grad_norm": 0.4807306230068207, "learning_rate": 9.239114039656903e-06, "loss": 0.4215, "step": 12706 }, { "epoch": 0.5831306502684594, "grad_norm": 0.4654237627983093, "learning_rate": 9.238984016407354e-06, "loss": 0.3478, "step": 12707 }, { "epoch": 0.583176540773714, "grad_norm": 0.4653848111629486, "learning_rate": 9.238853982964385e-06, "loss": 0.3929, "step": 12708 }, { "epoch": 0.5832224312789683, "grad_norm": 0.5456488728523254, "learning_rate": 9.23872393932831e-06, "loss": 0.3775, "step": 12709 }, { "epoch": 0.5832683217842228, "grad_norm": 0.5224929451942444, "learning_rate": 9.23859388549944e-06, "loss": 0.4646, "step": 12710 }, { "epoch": 0.5833142122894773, "grad_norm": 0.4654673933982849, "learning_rate": 9.238463821478092e-06, "loss": 0.4087, "step": 12711 }, { "epoch": 0.5833601027947317, "grad_norm": 0.4765751361846924, "learning_rate": 9.238333747264575e-06, "loss": 0.4423, "step": 12712 }, { "epoch": 0.5834059932999862, "grad_norm": 0.4661179184913635, "learning_rate": 9.238203662859201e-06, "loss": 0.3779, "step": 12713 }, { "epoch": 0.5834518838052407, "grad_norm": 0.5229474902153015, "learning_rate": 9.238073568262288e-06, "loss": 0.5761, "step": 12714 }, { "epoch": 0.5834977743104952, "grad_norm": 0.4295141398906708, "learning_rate": 9.237943463474146e-06, "loss": 0.3191, "step": 12715 }, { "epoch": 0.5835436648157496, "grad_norm": 0.44346287846565247, "learning_rate": 9.237813348495086e-06, "loss": 0.44, "step": 12716 }, { "epoch": 0.5835895553210041, "grad_norm": 0.5208945870399475, "learning_rate": 9.237683223325422e-06, "loss": 0.4486, "step": 12717 }, { "epoch": 0.5836354458262586, "grad_norm": 0.4688635468482971, "learning_rate": 9.237553087965467e-06, "loss": 0.3783, "step": 12718 }, { "epoch": 0.583681336331513, "grad_norm": 0.44729936122894287, "learning_rate": 9.237422942415536e-06, "loss": 0.3568, "step": 12719 }, { "epoch": 0.5837272268367675, "grad_norm": 0.4615308940410614, "learning_rate": 9.23729278667594e-06, "loss": 0.428, "step": 12720 }, { "epoch": 0.583773117342022, "grad_norm": 0.41692161560058594, "learning_rate": 9.23716262074699e-06, "loss": 0.3272, "step": 12721 }, { "epoch": 0.5838190078472764, "grad_norm": 0.47400984168052673, "learning_rate": 9.237032444629004e-06, "loss": 0.4147, "step": 12722 }, { "epoch": 0.5838648983525309, "grad_norm": 0.4694210886955261, "learning_rate": 9.23690225832229e-06, "loss": 0.4517, "step": 12723 }, { "epoch": 0.5839107888577854, "grad_norm": 0.4737335741519928, "learning_rate": 9.236772061827164e-06, "loss": 0.4174, "step": 12724 }, { "epoch": 0.5839566793630397, "grad_norm": 0.44994741678237915, "learning_rate": 9.236641855143938e-06, "loss": 0.3417, "step": 12725 }, { "epoch": 0.5840025698682942, "grad_norm": 0.42765989899635315, "learning_rate": 9.236511638272926e-06, "loss": 0.4105, "step": 12726 }, { "epoch": 0.5840484603735487, "grad_norm": 0.43511876463890076, "learning_rate": 9.236381411214441e-06, "loss": 0.3566, "step": 12727 }, { "epoch": 0.5840943508788031, "grad_norm": 0.5826748013496399, "learning_rate": 9.236251173968796e-06, "loss": 0.5251, "step": 12728 }, { "epoch": 0.5841402413840576, "grad_norm": 0.4832215905189514, "learning_rate": 9.236120926536303e-06, "loss": 0.4301, "step": 12729 }, { "epoch": 0.5841861318893121, "grad_norm": 0.4788094758987427, "learning_rate": 9.235990668917275e-06, "loss": 0.4141, "step": 12730 }, { "epoch": 0.5842320223945666, "grad_norm": 0.47492659091949463, "learning_rate": 9.235860401112028e-06, "loss": 0.3743, "step": 12731 }, { "epoch": 0.584277912899821, "grad_norm": 0.46850553154945374, "learning_rate": 9.235730123120875e-06, "loss": 0.4369, "step": 12732 }, { "epoch": 0.5843238034050755, "grad_norm": 0.49449992179870605, "learning_rate": 9.235599834944124e-06, "loss": 0.4657, "step": 12733 }, { "epoch": 0.58436969391033, "grad_norm": 0.46150949597358704, "learning_rate": 9.235469536582094e-06, "loss": 0.4304, "step": 12734 }, { "epoch": 0.5844155844155844, "grad_norm": 0.44881072640419006, "learning_rate": 9.2353392280351e-06, "loss": 0.3408, "step": 12735 }, { "epoch": 0.5844614749208389, "grad_norm": 0.4457770586013794, "learning_rate": 9.235208909303446e-06, "loss": 0.3854, "step": 12736 }, { "epoch": 0.5845073654260934, "grad_norm": 0.47274813055992126, "learning_rate": 9.235078580387454e-06, "loss": 0.3924, "step": 12737 }, { "epoch": 0.5845532559313478, "grad_norm": 0.4498476982116699, "learning_rate": 9.234948241287437e-06, "loss": 0.3077, "step": 12738 }, { "epoch": 0.5845991464366023, "grad_norm": 0.4387124478816986, "learning_rate": 9.234817892003702e-06, "loss": 0.3632, "step": 12739 }, { "epoch": 0.5846450369418568, "grad_norm": 0.44873833656311035, "learning_rate": 9.234687532536567e-06, "loss": 0.3143, "step": 12740 }, { "epoch": 0.5846909274471112, "grad_norm": 0.46687573194503784, "learning_rate": 9.234557162886347e-06, "loss": 0.3719, "step": 12741 }, { "epoch": 0.5847368179523657, "grad_norm": 0.47468191385269165, "learning_rate": 9.234426783053352e-06, "loss": 0.3728, "step": 12742 }, { "epoch": 0.5847827084576201, "grad_norm": 0.43557316064834595, "learning_rate": 9.234296393037899e-06, "loss": 0.3754, "step": 12743 }, { "epoch": 0.5848285989628745, "grad_norm": 0.4859871566295624, "learning_rate": 9.234165992840299e-06, "loss": 0.4656, "step": 12744 }, { "epoch": 0.584874489468129, "grad_norm": 0.45823734998703003, "learning_rate": 9.234035582460864e-06, "loss": 0.4359, "step": 12745 }, { "epoch": 0.5849203799733835, "grad_norm": 0.44153136014938354, "learning_rate": 9.233905161899913e-06, "loss": 0.363, "step": 12746 }, { "epoch": 0.5849662704786379, "grad_norm": 0.4379129409790039, "learning_rate": 9.233774731157753e-06, "loss": 0.3659, "step": 12747 }, { "epoch": 0.5850121609838924, "grad_norm": 0.4464224576950073, "learning_rate": 9.233644290234702e-06, "loss": 0.3756, "step": 12748 }, { "epoch": 0.5850580514891469, "grad_norm": 0.4254448115825653, "learning_rate": 9.233513839131075e-06, "loss": 0.3488, "step": 12749 }, { "epoch": 0.5851039419944014, "grad_norm": 0.4926731586456299, "learning_rate": 9.23338337784718e-06, "loss": 0.4671, "step": 12750 }, { "epoch": 0.5851498324996558, "grad_norm": 0.4746816158294678, "learning_rate": 9.233252906383335e-06, "loss": 0.4359, "step": 12751 }, { "epoch": 0.5851957230049103, "grad_norm": 0.46839094161987305, "learning_rate": 9.233122424739853e-06, "loss": 0.4016, "step": 12752 }, { "epoch": 0.5852416135101648, "grad_norm": 0.46539875864982605, "learning_rate": 9.232991932917048e-06, "loss": 0.4629, "step": 12753 }, { "epoch": 0.5852875040154192, "grad_norm": 0.4820875823497772, "learning_rate": 9.232861430915235e-06, "loss": 0.5143, "step": 12754 }, { "epoch": 0.5853333945206737, "grad_norm": 0.480930358171463, "learning_rate": 9.232730918734723e-06, "loss": 0.4125, "step": 12755 }, { "epoch": 0.5853792850259282, "grad_norm": 0.5174856781959534, "learning_rate": 9.23260039637583e-06, "loss": 0.4734, "step": 12756 }, { "epoch": 0.5854251755311826, "grad_norm": 0.464162677526474, "learning_rate": 9.23246986383887e-06, "loss": 0.3744, "step": 12757 }, { "epoch": 0.5854710660364371, "grad_norm": 0.454582542181015, "learning_rate": 9.232339321124156e-06, "loss": 0.3676, "step": 12758 }, { "epoch": 0.5855169565416916, "grad_norm": 0.4333985149860382, "learning_rate": 9.232208768232e-06, "loss": 0.385, "step": 12759 }, { "epoch": 0.585562847046946, "grad_norm": 0.421978622674942, "learning_rate": 9.232078205162719e-06, "loss": 0.3437, "step": 12760 }, { "epoch": 0.5856087375522004, "grad_norm": 0.4656358063220978, "learning_rate": 9.231947631916625e-06, "loss": 0.3763, "step": 12761 }, { "epoch": 0.5856546280574549, "grad_norm": 0.40220463275909424, "learning_rate": 9.231817048494032e-06, "loss": 0.3562, "step": 12762 }, { "epoch": 0.5857005185627093, "grad_norm": 0.46473538875579834, "learning_rate": 9.231686454895255e-06, "loss": 0.4125, "step": 12763 }, { "epoch": 0.5857464090679638, "grad_norm": 0.44042453169822693, "learning_rate": 9.231555851120608e-06, "loss": 0.3392, "step": 12764 }, { "epoch": 0.5857922995732183, "grad_norm": 0.4504871070384979, "learning_rate": 9.231425237170405e-06, "loss": 0.3142, "step": 12765 }, { "epoch": 0.5858381900784727, "grad_norm": 0.43226027488708496, "learning_rate": 9.23129461304496e-06, "loss": 0.3209, "step": 12766 }, { "epoch": 0.5858840805837272, "grad_norm": 0.4528903067111969, "learning_rate": 9.231163978744586e-06, "loss": 0.3629, "step": 12767 }, { "epoch": 0.5859299710889817, "grad_norm": 0.4520516097545624, "learning_rate": 9.231033334269598e-06, "loss": 0.4293, "step": 12768 }, { "epoch": 0.5859758615942362, "grad_norm": 0.5013335943222046, "learning_rate": 9.230902679620311e-06, "loss": 0.5089, "step": 12769 }, { "epoch": 0.5860217520994906, "grad_norm": 0.4365525245666504, "learning_rate": 9.230772014797039e-06, "loss": 0.409, "step": 12770 }, { "epoch": 0.5860676426047451, "grad_norm": 0.42906951904296875, "learning_rate": 9.230641339800092e-06, "loss": 0.3806, "step": 12771 }, { "epoch": 0.5861135331099996, "grad_norm": 0.43968191742897034, "learning_rate": 9.230510654629793e-06, "loss": 0.3212, "step": 12772 }, { "epoch": 0.586159423615254, "grad_norm": 0.4220793843269348, "learning_rate": 9.230379959286446e-06, "loss": 0.3821, "step": 12773 }, { "epoch": 0.5862053141205085, "grad_norm": 0.48120054602622986, "learning_rate": 9.230249253770374e-06, "loss": 0.4803, "step": 12774 }, { "epoch": 0.586251204625763, "grad_norm": 0.4396708607673645, "learning_rate": 9.230118538081885e-06, "loss": 0.3354, "step": 12775 }, { "epoch": 0.5862970951310174, "grad_norm": 0.4658530056476593, "learning_rate": 9.229987812221297e-06, "loss": 0.4721, "step": 12776 }, { "epoch": 0.5863429856362719, "grad_norm": 0.4603067636489868, "learning_rate": 9.229857076188923e-06, "loss": 0.4111, "step": 12777 }, { "epoch": 0.5863888761415263, "grad_norm": 0.4657119810581207, "learning_rate": 9.22972632998508e-06, "loss": 0.441, "step": 12778 }, { "epoch": 0.5864347666467807, "grad_norm": 0.4032839834690094, "learning_rate": 9.229595573610078e-06, "loss": 0.3053, "step": 12779 }, { "epoch": 0.5864806571520352, "grad_norm": 0.46165990829467773, "learning_rate": 9.229464807064234e-06, "loss": 0.3889, "step": 12780 }, { "epoch": 0.5865265476572897, "grad_norm": 0.45121240615844727, "learning_rate": 9.229334030347864e-06, "loss": 0.3873, "step": 12781 }, { "epoch": 0.5865724381625441, "grad_norm": 0.49502989649772644, "learning_rate": 9.229203243461277e-06, "loss": 0.4821, "step": 12782 }, { "epoch": 0.5866183286677986, "grad_norm": 0.4461996257305145, "learning_rate": 9.229072446404792e-06, "loss": 0.4068, "step": 12783 }, { "epoch": 0.5866642191730531, "grad_norm": 0.47412627935409546, "learning_rate": 9.228941639178724e-06, "loss": 0.4274, "step": 12784 }, { "epoch": 0.5867101096783076, "grad_norm": 0.443317174911499, "learning_rate": 9.228810821783384e-06, "loss": 0.379, "step": 12785 }, { "epoch": 0.586756000183562, "grad_norm": 0.4475384056568146, "learning_rate": 9.22867999421909e-06, "loss": 0.3795, "step": 12786 }, { "epoch": 0.5868018906888165, "grad_norm": 0.4661663770675659, "learning_rate": 9.228549156486154e-06, "loss": 0.3886, "step": 12787 }, { "epoch": 0.586847781194071, "grad_norm": 0.4729175269603729, "learning_rate": 9.228418308584893e-06, "loss": 0.4065, "step": 12788 }, { "epoch": 0.5868936716993254, "grad_norm": 0.44993671774864197, "learning_rate": 9.22828745051562e-06, "loss": 0.3758, "step": 12789 }, { "epoch": 0.5869395622045799, "grad_norm": 0.4828965663909912, "learning_rate": 9.228156582278651e-06, "loss": 0.4563, "step": 12790 }, { "epoch": 0.5869854527098344, "grad_norm": 0.42854830622673035, "learning_rate": 9.2280257038743e-06, "loss": 0.3696, "step": 12791 }, { "epoch": 0.5870313432150888, "grad_norm": 0.4355331361293793, "learning_rate": 9.22789481530288e-06, "loss": 0.3502, "step": 12792 }, { "epoch": 0.5870772337203433, "grad_norm": 0.40995606780052185, "learning_rate": 9.227763916564707e-06, "loss": 0.3095, "step": 12793 }, { "epoch": 0.5871231242255978, "grad_norm": 0.5143141746520996, "learning_rate": 9.227633007660097e-06, "loss": 0.4462, "step": 12794 }, { "epoch": 0.5871690147308521, "grad_norm": 0.4802948832511902, "learning_rate": 9.227502088589365e-06, "loss": 0.398, "step": 12795 }, { "epoch": 0.5872149052361066, "grad_norm": 0.48264527320861816, "learning_rate": 9.227371159352823e-06, "loss": 0.354, "step": 12796 }, { "epoch": 0.5872607957413611, "grad_norm": 0.5186548233032227, "learning_rate": 9.22724021995079e-06, "loss": 0.4432, "step": 12797 }, { "epoch": 0.5873066862466155, "grad_norm": 0.44044744968414307, "learning_rate": 9.227109270383576e-06, "loss": 0.3492, "step": 12798 }, { "epoch": 0.58735257675187, "grad_norm": 0.44781428575515747, "learning_rate": 9.2269783106515e-06, "loss": 0.3649, "step": 12799 }, { "epoch": 0.5873984672571245, "grad_norm": 0.44656169414520264, "learning_rate": 9.226847340754875e-06, "loss": 0.3589, "step": 12800 }, { "epoch": 0.5874443577623789, "grad_norm": 0.4392984211444855, "learning_rate": 9.226716360694015e-06, "loss": 0.3829, "step": 12801 }, { "epoch": 0.5874902482676334, "grad_norm": 0.4719136357307434, "learning_rate": 9.226585370469236e-06, "loss": 0.4101, "step": 12802 }, { "epoch": 0.5875361387728879, "grad_norm": 0.5066999793052673, "learning_rate": 9.226454370080854e-06, "loss": 0.518, "step": 12803 }, { "epoch": 0.5875820292781424, "grad_norm": 0.48161086440086365, "learning_rate": 9.226323359529184e-06, "loss": 0.4043, "step": 12804 }, { "epoch": 0.5876279197833968, "grad_norm": 0.45203524827957153, "learning_rate": 9.226192338814537e-06, "loss": 0.3777, "step": 12805 }, { "epoch": 0.5876738102886513, "grad_norm": 0.4613286852836609, "learning_rate": 9.226061307937235e-06, "loss": 0.4458, "step": 12806 }, { "epoch": 0.5877197007939058, "grad_norm": 0.4398617148399353, "learning_rate": 9.225930266897588e-06, "loss": 0.3449, "step": 12807 }, { "epoch": 0.5877655912991602, "grad_norm": 0.4350770115852356, "learning_rate": 9.225799215695911e-06, "loss": 0.4141, "step": 12808 }, { "epoch": 0.5878114818044147, "grad_norm": 0.4399188756942749, "learning_rate": 9.225668154332521e-06, "loss": 0.356, "step": 12809 }, { "epoch": 0.5878573723096692, "grad_norm": 0.4428427815437317, "learning_rate": 9.225537082807734e-06, "loss": 0.3676, "step": 12810 }, { "epoch": 0.5879032628149236, "grad_norm": 0.43981775641441345, "learning_rate": 9.225406001121864e-06, "loss": 0.3148, "step": 12811 }, { "epoch": 0.587949153320178, "grad_norm": 0.43860548734664917, "learning_rate": 9.225274909275226e-06, "loss": 0.4029, "step": 12812 }, { "epoch": 0.5879950438254326, "grad_norm": 0.4538743495941162, "learning_rate": 9.225143807268135e-06, "loss": 0.4003, "step": 12813 }, { "epoch": 0.5880409343306869, "grad_norm": 0.40248215198516846, "learning_rate": 9.225012695100907e-06, "loss": 0.3212, "step": 12814 }, { "epoch": 0.5880868248359414, "grad_norm": 0.4172745645046234, "learning_rate": 9.224881572773855e-06, "loss": 0.3243, "step": 12815 }, { "epoch": 0.5881327153411959, "grad_norm": 0.4479377567768097, "learning_rate": 9.224750440287299e-06, "loss": 0.3307, "step": 12816 }, { "epoch": 0.5881786058464503, "grad_norm": 0.4261455237865448, "learning_rate": 9.22461929764155e-06, "loss": 0.332, "step": 12817 }, { "epoch": 0.5882244963517048, "grad_norm": 0.461000919342041, "learning_rate": 9.224488144836925e-06, "loss": 0.4454, "step": 12818 }, { "epoch": 0.5882703868569593, "grad_norm": 0.4471290707588196, "learning_rate": 9.22435698187374e-06, "loss": 0.3871, "step": 12819 }, { "epoch": 0.5883162773622138, "grad_norm": 0.4553200304508209, "learning_rate": 9.22422580875231e-06, "loss": 0.3958, "step": 12820 }, { "epoch": 0.5883621678674682, "grad_norm": 0.4374959170818329, "learning_rate": 9.224094625472947e-06, "loss": 0.3654, "step": 12821 }, { "epoch": 0.5884080583727227, "grad_norm": 0.4218861758708954, "learning_rate": 9.223963432035974e-06, "loss": 0.3473, "step": 12822 }, { "epoch": 0.5884539488779772, "grad_norm": 0.47813570499420166, "learning_rate": 9.2238322284417e-06, "loss": 0.4665, "step": 12823 }, { "epoch": 0.5884998393832316, "grad_norm": 0.4410776197910309, "learning_rate": 9.223701014690442e-06, "loss": 0.3872, "step": 12824 }, { "epoch": 0.5885457298884861, "grad_norm": 0.45331352949142456, "learning_rate": 9.223569790782517e-06, "loss": 0.3533, "step": 12825 }, { "epoch": 0.5885916203937406, "grad_norm": 0.43798357248306274, "learning_rate": 9.223438556718238e-06, "loss": 0.3463, "step": 12826 }, { "epoch": 0.588637510898995, "grad_norm": 0.48944082856178284, "learning_rate": 9.223307312497925e-06, "loss": 0.5037, "step": 12827 }, { "epoch": 0.5886834014042495, "grad_norm": 0.48846033215522766, "learning_rate": 9.223176058121889e-06, "loss": 0.3967, "step": 12828 }, { "epoch": 0.588729291909504, "grad_norm": 0.4521346390247345, "learning_rate": 9.223044793590447e-06, "loss": 0.3699, "step": 12829 }, { "epoch": 0.5887751824147583, "grad_norm": 0.5086361169815063, "learning_rate": 9.222913518903918e-06, "loss": 0.4536, "step": 12830 }, { "epoch": 0.5888210729200128, "grad_norm": 0.4563990533351898, "learning_rate": 9.222782234062613e-06, "loss": 0.4157, "step": 12831 }, { "epoch": 0.5888669634252673, "grad_norm": 0.45894595980644226, "learning_rate": 9.222650939066849e-06, "loss": 0.4276, "step": 12832 }, { "epoch": 0.5889128539305217, "grad_norm": 0.483471155166626, "learning_rate": 9.222519633916943e-06, "loss": 0.5066, "step": 12833 }, { "epoch": 0.5889587444357762, "grad_norm": 0.43864163756370544, "learning_rate": 9.222388318613207e-06, "loss": 0.3811, "step": 12834 }, { "epoch": 0.5890046349410307, "grad_norm": 0.4597693681716919, "learning_rate": 9.222256993155962e-06, "loss": 0.4092, "step": 12835 }, { "epoch": 0.5890505254462851, "grad_norm": 0.45927566289901733, "learning_rate": 9.222125657545522e-06, "loss": 0.4452, "step": 12836 }, { "epoch": 0.5890964159515396, "grad_norm": 0.46506625413894653, "learning_rate": 9.221994311782204e-06, "loss": 0.4494, "step": 12837 }, { "epoch": 0.5891423064567941, "grad_norm": 0.4535025954246521, "learning_rate": 9.221862955866318e-06, "loss": 0.4237, "step": 12838 }, { "epoch": 0.5891881969620486, "grad_norm": 0.4407668113708496, "learning_rate": 9.221731589798186e-06, "loss": 0.4133, "step": 12839 }, { "epoch": 0.589234087467303, "grad_norm": 0.4473594129085541, "learning_rate": 9.221600213578124e-06, "loss": 0.3746, "step": 12840 }, { "epoch": 0.5892799779725575, "grad_norm": 0.4277879595756531, "learning_rate": 9.221468827206444e-06, "loss": 0.3623, "step": 12841 }, { "epoch": 0.589325868477812, "grad_norm": 0.47602543234825134, "learning_rate": 9.221337430683464e-06, "loss": 0.3443, "step": 12842 }, { "epoch": 0.5893717589830664, "grad_norm": 0.4712906777858734, "learning_rate": 9.221206024009498e-06, "loss": 0.4503, "step": 12843 }, { "epoch": 0.5894176494883209, "grad_norm": 0.4947783648967743, "learning_rate": 9.221074607184866e-06, "loss": 0.5037, "step": 12844 }, { "epoch": 0.5894635399935754, "grad_norm": 0.46926939487457275, "learning_rate": 9.22094318020988e-06, "loss": 0.4021, "step": 12845 }, { "epoch": 0.5895094304988298, "grad_norm": 0.44295534491539, "learning_rate": 9.22081174308486e-06, "loss": 0.3268, "step": 12846 }, { "epoch": 0.5895553210040843, "grad_norm": 0.46860864758491516, "learning_rate": 9.220680295810117e-06, "loss": 0.4382, "step": 12847 }, { "epoch": 0.5896012115093388, "grad_norm": 0.47284558415412903, "learning_rate": 9.220548838385973e-06, "loss": 0.4296, "step": 12848 }, { "epoch": 0.5896471020145931, "grad_norm": 0.5287516117095947, "learning_rate": 9.220417370812739e-06, "loss": 0.5469, "step": 12849 }, { "epoch": 0.5896929925198476, "grad_norm": 0.4415300190448761, "learning_rate": 9.220285893090734e-06, "loss": 0.3924, "step": 12850 }, { "epoch": 0.5897388830251021, "grad_norm": 0.4936564564704895, "learning_rate": 9.220154405220273e-06, "loss": 0.445, "step": 12851 }, { "epoch": 0.5897847735303565, "grad_norm": 0.4838492274284363, "learning_rate": 9.220022907201675e-06, "loss": 0.4188, "step": 12852 }, { "epoch": 0.589830664035611, "grad_norm": 0.44980093836784363, "learning_rate": 9.219891399035251e-06, "loss": 0.3771, "step": 12853 }, { "epoch": 0.5898765545408655, "grad_norm": 0.44502314925193787, "learning_rate": 9.219759880721322e-06, "loss": 0.3532, "step": 12854 }, { "epoch": 0.5899224450461199, "grad_norm": 0.4306795001029968, "learning_rate": 9.2196283522602e-06, "loss": 0.3516, "step": 12855 }, { "epoch": 0.5899683355513744, "grad_norm": 0.46965932846069336, "learning_rate": 9.219496813652205e-06, "loss": 0.42, "step": 12856 }, { "epoch": 0.5900142260566289, "grad_norm": 0.44421058893203735, "learning_rate": 9.219365264897651e-06, "loss": 0.3903, "step": 12857 }, { "epoch": 0.5900601165618834, "grad_norm": 0.4487733542919159, "learning_rate": 9.219233705996857e-06, "loss": 0.4001, "step": 12858 }, { "epoch": 0.5901060070671378, "grad_norm": 0.5017364621162415, "learning_rate": 9.219102136950135e-06, "loss": 0.5189, "step": 12859 }, { "epoch": 0.5901518975723923, "grad_norm": 0.44568100571632385, "learning_rate": 9.218970557757806e-06, "loss": 0.368, "step": 12860 }, { "epoch": 0.5901977880776468, "grad_norm": 0.48367759585380554, "learning_rate": 9.218838968420185e-06, "loss": 0.4322, "step": 12861 }, { "epoch": 0.5902436785829012, "grad_norm": 0.4829626679420471, "learning_rate": 9.218707368937585e-06, "loss": 0.4512, "step": 12862 }, { "epoch": 0.5902895690881557, "grad_norm": 0.44088661670684814, "learning_rate": 9.218575759310326e-06, "loss": 0.3828, "step": 12863 }, { "epoch": 0.5903354595934102, "grad_norm": 0.45712119340896606, "learning_rate": 9.218444139538725e-06, "loss": 0.3479, "step": 12864 }, { "epoch": 0.5903813500986645, "grad_norm": 0.41344553232192993, "learning_rate": 9.218312509623098e-06, "loss": 0.3004, "step": 12865 }, { "epoch": 0.590427240603919, "grad_norm": 0.4537729024887085, "learning_rate": 9.218180869563759e-06, "loss": 0.39, "step": 12866 }, { "epoch": 0.5904731311091735, "grad_norm": 0.44736534357070923, "learning_rate": 9.218049219361026e-06, "loss": 0.3991, "step": 12867 }, { "epoch": 0.5905190216144279, "grad_norm": 0.4564002752304077, "learning_rate": 9.217917559015216e-06, "loss": 0.378, "step": 12868 }, { "epoch": 0.5905649121196824, "grad_norm": 0.41807860136032104, "learning_rate": 9.217785888526648e-06, "loss": 0.3215, "step": 12869 }, { "epoch": 0.5906108026249369, "grad_norm": 0.4513213634490967, "learning_rate": 9.217654207895632e-06, "loss": 0.367, "step": 12870 }, { "epoch": 0.5906566931301913, "grad_norm": 0.49027618765830994, "learning_rate": 9.217522517122492e-06, "loss": 0.4537, "step": 12871 }, { "epoch": 0.5907025836354458, "grad_norm": 0.49609375, "learning_rate": 9.21739081620754e-06, "loss": 0.4781, "step": 12872 }, { "epoch": 0.5907484741407003, "grad_norm": 0.5928043723106384, "learning_rate": 9.217259105151095e-06, "loss": 0.4749, "step": 12873 }, { "epoch": 0.5907943646459548, "grad_norm": 0.4627956748008728, "learning_rate": 9.217127383953473e-06, "loss": 0.4436, "step": 12874 }, { "epoch": 0.5908402551512092, "grad_norm": 0.4158020317554474, "learning_rate": 9.216995652614989e-06, "loss": 0.3003, "step": 12875 }, { "epoch": 0.5908861456564637, "grad_norm": 0.46536651253700256, "learning_rate": 9.216863911135962e-06, "loss": 0.3831, "step": 12876 }, { "epoch": 0.5909320361617182, "grad_norm": 0.4562129080295563, "learning_rate": 9.216732159516708e-06, "loss": 0.4188, "step": 12877 }, { "epoch": 0.5909779266669726, "grad_norm": 0.4690381586551666, "learning_rate": 9.216600397757545e-06, "loss": 0.3794, "step": 12878 }, { "epoch": 0.5910238171722271, "grad_norm": 0.43046078085899353, "learning_rate": 9.216468625858789e-06, "loss": 0.3889, "step": 12879 }, { "epoch": 0.5910697076774816, "grad_norm": 0.45390385389328003, "learning_rate": 9.216336843820755e-06, "loss": 0.3209, "step": 12880 }, { "epoch": 0.591115598182736, "grad_norm": 0.4318632483482361, "learning_rate": 9.216205051643763e-06, "loss": 0.3556, "step": 12881 }, { "epoch": 0.5911614886879905, "grad_norm": 0.4421613812446594, "learning_rate": 9.216073249328128e-06, "loss": 0.435, "step": 12882 }, { "epoch": 0.591207379193245, "grad_norm": 0.493385910987854, "learning_rate": 9.215941436874168e-06, "loss": 0.4748, "step": 12883 }, { "epoch": 0.5912532696984993, "grad_norm": 0.49507957696914673, "learning_rate": 9.215809614282199e-06, "loss": 0.4049, "step": 12884 }, { "epoch": 0.5912991602037538, "grad_norm": 0.48176833987236023, "learning_rate": 9.21567778155254e-06, "loss": 0.4062, "step": 12885 }, { "epoch": 0.5913450507090083, "grad_norm": 0.44845959544181824, "learning_rate": 9.215545938685503e-06, "loss": 0.3786, "step": 12886 }, { "epoch": 0.5913909412142627, "grad_norm": 0.44788259267807007, "learning_rate": 9.215414085681411e-06, "loss": 0.3686, "step": 12887 }, { "epoch": 0.5914368317195172, "grad_norm": 0.4293821156024933, "learning_rate": 9.215282222540579e-06, "loss": 0.3231, "step": 12888 }, { "epoch": 0.5914827222247717, "grad_norm": 0.46858271956443787, "learning_rate": 9.215150349263322e-06, "loss": 0.3739, "step": 12889 }, { "epoch": 0.5915286127300261, "grad_norm": 0.4479474723339081, "learning_rate": 9.215018465849959e-06, "loss": 0.3709, "step": 12890 }, { "epoch": 0.5915745032352806, "grad_norm": 0.4279611110687256, "learning_rate": 9.214886572300808e-06, "loss": 0.3779, "step": 12891 }, { "epoch": 0.5916203937405351, "grad_norm": 0.4540843367576599, "learning_rate": 9.214754668616183e-06, "loss": 0.3693, "step": 12892 }, { "epoch": 0.5916662842457896, "grad_norm": 0.493689626455307, "learning_rate": 9.214622754796406e-06, "loss": 0.4196, "step": 12893 }, { "epoch": 0.591712174751044, "grad_norm": 0.4432911276817322, "learning_rate": 9.21449083084179e-06, "loss": 0.3919, "step": 12894 }, { "epoch": 0.5917580652562985, "grad_norm": 0.43998655676841736, "learning_rate": 9.214358896752653e-06, "loss": 0.367, "step": 12895 }, { "epoch": 0.591803955761553, "grad_norm": 0.4773479104042053, "learning_rate": 9.214226952529312e-06, "loss": 0.3971, "step": 12896 }, { "epoch": 0.5918498462668074, "grad_norm": 0.46761736273765564, "learning_rate": 9.214094998172088e-06, "loss": 0.3769, "step": 12897 }, { "epoch": 0.5918957367720619, "grad_norm": 0.4792700409889221, "learning_rate": 9.213963033681295e-06, "loss": 0.4446, "step": 12898 }, { "epoch": 0.5919416272773164, "grad_norm": 0.4793221056461334, "learning_rate": 9.21383105905725e-06, "loss": 0.4612, "step": 12899 }, { "epoch": 0.5919875177825707, "grad_norm": 0.4984211027622223, "learning_rate": 9.21369907430027e-06, "loss": 0.4945, "step": 12900 }, { "epoch": 0.5920334082878252, "grad_norm": 0.49336037039756775, "learning_rate": 9.213567079410676e-06, "loss": 0.4446, "step": 12901 }, { "epoch": 0.5920792987930797, "grad_norm": 0.47008034586906433, "learning_rate": 9.213435074388781e-06, "loss": 0.3767, "step": 12902 }, { "epoch": 0.5921251892983341, "grad_norm": 0.4513151943683624, "learning_rate": 9.213303059234907e-06, "loss": 0.4123, "step": 12903 }, { "epoch": 0.5921710798035886, "grad_norm": 0.5015353560447693, "learning_rate": 9.213171033949366e-06, "loss": 0.5016, "step": 12904 }, { "epoch": 0.5922169703088431, "grad_norm": 0.4718548357486725, "learning_rate": 9.213038998532481e-06, "loss": 0.4332, "step": 12905 }, { "epoch": 0.5922628608140975, "grad_norm": 0.40910062193870544, "learning_rate": 9.212906952984566e-06, "loss": 0.305, "step": 12906 }, { "epoch": 0.592308751319352, "grad_norm": 0.4839059114456177, "learning_rate": 9.212774897305938e-06, "loss": 0.4001, "step": 12907 }, { "epoch": 0.5923546418246065, "grad_norm": 0.47646403312683105, "learning_rate": 9.212642831496917e-06, "loss": 0.4662, "step": 12908 }, { "epoch": 0.592400532329861, "grad_norm": 0.46075674891471863, "learning_rate": 9.21251075555782e-06, "loss": 0.3963, "step": 12909 }, { "epoch": 0.5924464228351154, "grad_norm": 0.4128977656364441, "learning_rate": 9.212378669488965e-06, "loss": 0.3409, "step": 12910 }, { "epoch": 0.5924923133403699, "grad_norm": 0.43849748373031616, "learning_rate": 9.212246573290666e-06, "loss": 0.3695, "step": 12911 }, { "epoch": 0.5925382038456244, "grad_norm": 0.45872604846954346, "learning_rate": 9.212114466963246e-06, "loss": 0.4347, "step": 12912 }, { "epoch": 0.5925840943508788, "grad_norm": 0.50279301404953, "learning_rate": 9.211982350507018e-06, "loss": 0.5026, "step": 12913 }, { "epoch": 0.5926299848561333, "grad_norm": 0.44524189829826355, "learning_rate": 9.211850223922303e-06, "loss": 0.3756, "step": 12914 }, { "epoch": 0.5926758753613878, "grad_norm": 0.6861488223075867, "learning_rate": 9.211718087209417e-06, "loss": 0.4129, "step": 12915 }, { "epoch": 0.5927217658666422, "grad_norm": 0.5497481226921082, "learning_rate": 9.211585940368679e-06, "loss": 0.5359, "step": 12916 }, { "epoch": 0.5927676563718967, "grad_norm": 0.44153815507888794, "learning_rate": 9.211453783400406e-06, "loss": 0.3926, "step": 12917 }, { "epoch": 0.5928135468771512, "grad_norm": 0.4850890636444092, "learning_rate": 9.211321616304916e-06, "loss": 0.4052, "step": 12918 }, { "epoch": 0.5928594373824055, "grad_norm": 0.4923182725906372, "learning_rate": 9.211189439082526e-06, "loss": 0.4226, "step": 12919 }, { "epoch": 0.59290532788766, "grad_norm": 0.4720557928085327, "learning_rate": 9.211057251733553e-06, "loss": 0.4075, "step": 12920 }, { "epoch": 0.5929512183929145, "grad_norm": 0.5026051998138428, "learning_rate": 9.210925054258319e-06, "loss": 0.4457, "step": 12921 }, { "epoch": 0.5929971088981689, "grad_norm": 0.4773178696632385, "learning_rate": 9.210792846657137e-06, "loss": 0.4231, "step": 12922 }, { "epoch": 0.5930429994034234, "grad_norm": 0.4374334514141083, "learning_rate": 9.210660628930329e-06, "loss": 0.4285, "step": 12923 }, { "epoch": 0.5930888899086779, "grad_norm": 0.4798688590526581, "learning_rate": 9.210528401078211e-06, "loss": 0.452, "step": 12924 }, { "epoch": 0.5931347804139323, "grad_norm": 0.46923092007637024, "learning_rate": 9.210396163101102e-06, "loss": 0.4267, "step": 12925 }, { "epoch": 0.5931806709191868, "grad_norm": 0.4547783434391022, "learning_rate": 9.210263914999316e-06, "loss": 0.4137, "step": 12926 }, { "epoch": 0.5932265614244413, "grad_norm": 0.38375768065452576, "learning_rate": 9.210131656773176e-06, "loss": 0.3198, "step": 12927 }, { "epoch": 0.5932724519296958, "grad_norm": 0.4026845395565033, "learning_rate": 9.209999388422996e-06, "loss": 0.2886, "step": 12928 }, { "epoch": 0.5933183424349502, "grad_norm": 0.43749377131462097, "learning_rate": 9.2098671099491e-06, "loss": 0.3612, "step": 12929 }, { "epoch": 0.5933642329402047, "grad_norm": 0.4625486433506012, "learning_rate": 9.209734821351798e-06, "loss": 0.4003, "step": 12930 }, { "epoch": 0.5934101234454592, "grad_norm": 0.4336709976196289, "learning_rate": 9.209602522631415e-06, "loss": 0.3352, "step": 12931 }, { "epoch": 0.5934560139507136, "grad_norm": 0.4388015866279602, "learning_rate": 9.209470213788268e-06, "loss": 0.3827, "step": 12932 }, { "epoch": 0.5935019044559681, "grad_norm": 0.4855652451515198, "learning_rate": 9.209337894822671e-06, "loss": 0.432, "step": 12933 }, { "epoch": 0.5935477949612226, "grad_norm": 0.4404178559780121, "learning_rate": 9.209205565734946e-06, "loss": 0.3702, "step": 12934 }, { "epoch": 0.593593685466477, "grad_norm": 0.4284880459308624, "learning_rate": 9.209073226525408e-06, "loss": 0.3864, "step": 12935 }, { "epoch": 0.5936395759717314, "grad_norm": 0.441139280796051, "learning_rate": 9.208940877194381e-06, "loss": 0.3281, "step": 12936 }, { "epoch": 0.593685466476986, "grad_norm": 0.4923996329307556, "learning_rate": 9.208808517742176e-06, "loss": 0.5051, "step": 12937 }, { "epoch": 0.5937313569822403, "grad_norm": 0.44874125719070435, "learning_rate": 9.208676148169118e-06, "loss": 0.4281, "step": 12938 }, { "epoch": 0.5937772474874948, "grad_norm": 0.4644026756286621, "learning_rate": 9.20854376847552e-06, "loss": 0.4334, "step": 12939 }, { "epoch": 0.5938231379927493, "grad_norm": 0.4614405035972595, "learning_rate": 9.208411378661703e-06, "loss": 0.4126, "step": 12940 }, { "epoch": 0.5938690284980037, "grad_norm": 0.44731923937797546, "learning_rate": 9.208278978727984e-06, "loss": 0.3638, "step": 12941 }, { "epoch": 0.5939149190032582, "grad_norm": 0.47226008772850037, "learning_rate": 9.208146568674685e-06, "loss": 0.386, "step": 12942 }, { "epoch": 0.5939608095085127, "grad_norm": 0.41131842136383057, "learning_rate": 9.20801414850212e-06, "loss": 0.3158, "step": 12943 }, { "epoch": 0.5940067000137671, "grad_norm": 0.45567071437835693, "learning_rate": 9.207881718210608e-06, "loss": 0.414, "step": 12944 }, { "epoch": 0.5940525905190216, "grad_norm": 0.43952903151512146, "learning_rate": 9.20774927780047e-06, "loss": 0.4043, "step": 12945 }, { "epoch": 0.5940984810242761, "grad_norm": 0.4675942063331604, "learning_rate": 9.207616827272023e-06, "loss": 0.4278, "step": 12946 }, { "epoch": 0.5941443715295306, "grad_norm": 0.4753773808479309, "learning_rate": 9.207484366625584e-06, "loss": 0.4173, "step": 12947 }, { "epoch": 0.594190262034785, "grad_norm": 0.4498985707759857, "learning_rate": 9.207351895861473e-06, "loss": 0.3729, "step": 12948 }, { "epoch": 0.5942361525400395, "grad_norm": 0.43134593963623047, "learning_rate": 9.20721941498001e-06, "loss": 0.3277, "step": 12949 }, { "epoch": 0.594282043045294, "grad_norm": 0.46087270975112915, "learning_rate": 9.207086923981512e-06, "loss": 0.4323, "step": 12950 }, { "epoch": 0.5943279335505484, "grad_norm": 0.4703740179538727, "learning_rate": 9.206954422866297e-06, "loss": 0.421, "step": 12951 }, { "epoch": 0.5943738240558029, "grad_norm": 0.4733833968639374, "learning_rate": 9.206821911634685e-06, "loss": 0.4598, "step": 12952 }, { "epoch": 0.5944197145610574, "grad_norm": 0.46428442001342773, "learning_rate": 9.206689390286994e-06, "loss": 0.429, "step": 12953 }, { "epoch": 0.5944656050663117, "grad_norm": 0.46772095561027527, "learning_rate": 9.206556858823543e-06, "loss": 0.4016, "step": 12954 }, { "epoch": 0.5945114955715662, "grad_norm": 0.4766250252723694, "learning_rate": 9.20642431724465e-06, "loss": 0.4514, "step": 12955 }, { "epoch": 0.5945573860768207, "grad_norm": 0.43740561604499817, "learning_rate": 9.206291765550633e-06, "loss": 0.331, "step": 12956 }, { "epoch": 0.5946032765820751, "grad_norm": 0.44999006390571594, "learning_rate": 9.206159203741813e-06, "loss": 0.3664, "step": 12957 }, { "epoch": 0.5946491670873296, "grad_norm": 0.42002201080322266, "learning_rate": 9.206026631818508e-06, "loss": 0.3327, "step": 12958 }, { "epoch": 0.5946950575925841, "grad_norm": 0.5026551485061646, "learning_rate": 9.205894049781034e-06, "loss": 0.4749, "step": 12959 }, { "epoch": 0.5947409480978385, "grad_norm": 0.500328540802002, "learning_rate": 9.205761457629717e-06, "loss": 0.4272, "step": 12960 }, { "epoch": 0.594786838603093, "grad_norm": 0.48113298416137695, "learning_rate": 9.205628855364867e-06, "loss": 0.4308, "step": 12961 }, { "epoch": 0.5948327291083475, "grad_norm": 0.4434349536895752, "learning_rate": 9.205496242986806e-06, "loss": 0.3412, "step": 12962 }, { "epoch": 0.594878619613602, "grad_norm": 0.47223326563835144, "learning_rate": 9.205363620495858e-06, "loss": 0.3953, "step": 12963 }, { "epoch": 0.5949245101188564, "grad_norm": 0.4478762447834015, "learning_rate": 9.205230987892336e-06, "loss": 0.3718, "step": 12964 }, { "epoch": 0.5949704006241109, "grad_norm": 0.49147820472717285, "learning_rate": 9.20509834517656e-06, "loss": 0.4259, "step": 12965 }, { "epoch": 0.5950162911293654, "grad_norm": 0.4482985734939575, "learning_rate": 9.20496569234885e-06, "loss": 0.4404, "step": 12966 }, { "epoch": 0.5950621816346198, "grad_norm": 0.4371623992919922, "learning_rate": 9.204833029409523e-06, "loss": 0.3938, "step": 12967 }, { "epoch": 0.5951080721398743, "grad_norm": 0.45835113525390625, "learning_rate": 9.204700356358902e-06, "loss": 0.4275, "step": 12968 }, { "epoch": 0.5951539626451288, "grad_norm": 0.43346869945526123, "learning_rate": 9.204567673197303e-06, "loss": 0.3334, "step": 12969 }, { "epoch": 0.5951998531503832, "grad_norm": 0.4674853980541229, "learning_rate": 9.204434979925045e-06, "loss": 0.4065, "step": 12970 }, { "epoch": 0.5952457436556376, "grad_norm": 0.46480056643486023, "learning_rate": 9.204302276542449e-06, "loss": 0.4211, "step": 12971 }, { "epoch": 0.5952916341608921, "grad_norm": 0.444549560546875, "learning_rate": 9.204169563049833e-06, "loss": 0.3695, "step": 12972 }, { "epoch": 0.5953375246661465, "grad_norm": 0.4327852427959442, "learning_rate": 9.204036839447514e-06, "loss": 0.3494, "step": 12973 }, { "epoch": 0.595383415171401, "grad_norm": 0.49107569456100464, "learning_rate": 9.203904105735817e-06, "loss": 0.4681, "step": 12974 }, { "epoch": 0.5954293056766555, "grad_norm": 0.49489814043045044, "learning_rate": 9.203771361915054e-06, "loss": 0.5006, "step": 12975 }, { "epoch": 0.5954751961819099, "grad_norm": 0.4802511930465698, "learning_rate": 9.203638607985548e-06, "loss": 0.4732, "step": 12976 }, { "epoch": 0.5955210866871644, "grad_norm": 0.4593989849090576, "learning_rate": 9.203505843947619e-06, "loss": 0.3759, "step": 12977 }, { "epoch": 0.5955669771924189, "grad_norm": 0.5229728817939758, "learning_rate": 9.203373069801585e-06, "loss": 0.501, "step": 12978 }, { "epoch": 0.5956128676976733, "grad_norm": 0.49846121668815613, "learning_rate": 9.203240285547766e-06, "loss": 0.4222, "step": 12979 }, { "epoch": 0.5956587582029278, "grad_norm": 0.4297657012939453, "learning_rate": 9.203107491186479e-06, "loss": 0.3556, "step": 12980 }, { "epoch": 0.5957046487081823, "grad_norm": 0.49471333622932434, "learning_rate": 9.202974686718047e-06, "loss": 0.462, "step": 12981 }, { "epoch": 0.5957505392134368, "grad_norm": 0.4635196328163147, "learning_rate": 9.202841872142787e-06, "loss": 0.4241, "step": 12982 }, { "epoch": 0.5957964297186912, "grad_norm": 0.44151097536087036, "learning_rate": 9.202709047461017e-06, "loss": 0.3949, "step": 12983 }, { "epoch": 0.5958423202239457, "grad_norm": 0.45605647563934326, "learning_rate": 9.202576212673058e-06, "loss": 0.4089, "step": 12984 }, { "epoch": 0.5958882107292002, "grad_norm": 0.5137408971786499, "learning_rate": 9.202443367779232e-06, "loss": 0.5196, "step": 12985 }, { "epoch": 0.5959341012344546, "grad_norm": 0.4613582193851471, "learning_rate": 9.202310512779854e-06, "loss": 0.444, "step": 12986 }, { "epoch": 0.5959799917397091, "grad_norm": 0.43255722522735596, "learning_rate": 9.202177647675246e-06, "loss": 0.308, "step": 12987 }, { "epoch": 0.5960258822449636, "grad_norm": 0.4602659344673157, "learning_rate": 9.202044772465728e-06, "loss": 0.4659, "step": 12988 }, { "epoch": 0.5960717727502179, "grad_norm": 0.4723518490791321, "learning_rate": 9.201911887151618e-06, "loss": 0.3968, "step": 12989 }, { "epoch": 0.5961176632554724, "grad_norm": 0.49993953108787537, "learning_rate": 9.201778991733236e-06, "loss": 0.4746, "step": 12990 }, { "epoch": 0.5961635537607269, "grad_norm": 0.4536040127277374, "learning_rate": 9.2016460862109e-06, "loss": 0.3766, "step": 12991 }, { "epoch": 0.5962094442659813, "grad_norm": 0.45119616389274597, "learning_rate": 9.201513170584933e-06, "loss": 0.4346, "step": 12992 }, { "epoch": 0.5962553347712358, "grad_norm": 0.4271732270717621, "learning_rate": 9.201380244855653e-06, "loss": 0.3435, "step": 12993 }, { "epoch": 0.5963012252764903, "grad_norm": 0.5502074956893921, "learning_rate": 9.20124730902338e-06, "loss": 0.41, "step": 12994 }, { "epoch": 0.5963471157817447, "grad_norm": 0.4600518047809601, "learning_rate": 9.20111436308843e-06, "loss": 0.4339, "step": 12995 }, { "epoch": 0.5963930062869992, "grad_norm": 0.5103567242622375, "learning_rate": 9.20098140705113e-06, "loss": 0.4785, "step": 12996 }, { "epoch": 0.5964388967922537, "grad_norm": 0.49002790451049805, "learning_rate": 9.200848440911794e-06, "loss": 0.4409, "step": 12997 }, { "epoch": 0.5964847872975082, "grad_norm": 0.4779488742351532, "learning_rate": 9.200715464670743e-06, "loss": 0.4432, "step": 12998 }, { "epoch": 0.5965306778027626, "grad_norm": 0.4579794108867645, "learning_rate": 9.200582478328298e-06, "loss": 0.4017, "step": 12999 }, { "epoch": 0.5965765683080171, "grad_norm": 0.4982486367225647, "learning_rate": 9.200449481884775e-06, "loss": 0.4772, "step": 13000 }, { "epoch": 0.5966224588132716, "grad_norm": 0.47153058648109436, "learning_rate": 9.2003164753405e-06, "loss": 0.4126, "step": 13001 }, { "epoch": 0.596668349318526, "grad_norm": 0.4579368531703949, "learning_rate": 9.200183458695789e-06, "loss": 0.3991, "step": 13002 }, { "epoch": 0.5967142398237805, "grad_norm": 0.43567442893981934, "learning_rate": 9.20005043195096e-06, "loss": 0.3833, "step": 13003 }, { "epoch": 0.596760130329035, "grad_norm": 0.47357070446014404, "learning_rate": 9.199917395106338e-06, "loss": 0.4708, "step": 13004 }, { "epoch": 0.5968060208342894, "grad_norm": 0.4473586082458496, "learning_rate": 9.199784348162238e-06, "loss": 0.3603, "step": 13005 }, { "epoch": 0.5968519113395439, "grad_norm": 0.46122750639915466, "learning_rate": 9.199651291118983e-06, "loss": 0.4032, "step": 13006 }, { "epoch": 0.5968978018447983, "grad_norm": 0.48835256695747375, "learning_rate": 9.199518223976894e-06, "loss": 0.3967, "step": 13007 }, { "epoch": 0.5969436923500527, "grad_norm": 0.45220568776130676, "learning_rate": 9.199385146736286e-06, "loss": 0.4204, "step": 13008 }, { "epoch": 0.5969895828553072, "grad_norm": 0.42608579993247986, "learning_rate": 9.199252059397483e-06, "loss": 0.3677, "step": 13009 }, { "epoch": 0.5970354733605617, "grad_norm": 0.4402492046356201, "learning_rate": 9.199118961960806e-06, "loss": 0.3741, "step": 13010 }, { "epoch": 0.5970813638658161, "grad_norm": 0.44287845492362976, "learning_rate": 9.198985854426573e-06, "loss": 0.3666, "step": 13011 }, { "epoch": 0.5971272543710706, "grad_norm": 0.46072304248809814, "learning_rate": 9.198852736795102e-06, "loss": 0.4031, "step": 13012 }, { "epoch": 0.5971731448763251, "grad_norm": 0.45226195454597473, "learning_rate": 9.198719609066716e-06, "loss": 0.4338, "step": 13013 }, { "epoch": 0.5972190353815795, "grad_norm": 0.4359162747859955, "learning_rate": 9.198586471241736e-06, "loss": 0.44, "step": 13014 }, { "epoch": 0.597264925886834, "grad_norm": 0.4613572955131531, "learning_rate": 9.198453323320479e-06, "loss": 0.4341, "step": 13015 }, { "epoch": 0.5973108163920885, "grad_norm": 0.4477507174015045, "learning_rate": 9.198320165303266e-06, "loss": 0.4075, "step": 13016 }, { "epoch": 0.597356706897343, "grad_norm": 0.46776801347732544, "learning_rate": 9.198186997190421e-06, "loss": 0.4212, "step": 13017 }, { "epoch": 0.5974025974025974, "grad_norm": 0.4757107198238373, "learning_rate": 9.19805381898226e-06, "loss": 0.5101, "step": 13018 }, { "epoch": 0.5974484879078519, "grad_norm": 0.40856650471687317, "learning_rate": 9.197920630679103e-06, "loss": 0.3366, "step": 13019 }, { "epoch": 0.5974943784131064, "grad_norm": 0.4342365264892578, "learning_rate": 9.197787432281274e-06, "loss": 0.3665, "step": 13020 }, { "epoch": 0.5975402689183608, "grad_norm": 0.43029963970184326, "learning_rate": 9.19765422378909e-06, "loss": 0.3734, "step": 13021 }, { "epoch": 0.5975861594236153, "grad_norm": 0.4541354775428772, "learning_rate": 9.197521005202871e-06, "loss": 0.4506, "step": 13022 }, { "epoch": 0.5976320499288698, "grad_norm": 0.426876962184906, "learning_rate": 9.197387776522942e-06, "loss": 0.3396, "step": 13023 }, { "epoch": 0.5976779404341241, "grad_norm": 0.44111695885658264, "learning_rate": 9.197254537749619e-06, "loss": 0.3989, "step": 13024 }, { "epoch": 0.5977238309393786, "grad_norm": 0.5065500736236572, "learning_rate": 9.197121288883222e-06, "loss": 0.4454, "step": 13025 }, { "epoch": 0.5977697214446331, "grad_norm": 0.44995230436325073, "learning_rate": 9.196988029924074e-06, "loss": 0.4107, "step": 13026 }, { "epoch": 0.5978156119498875, "grad_norm": 0.5156726241111755, "learning_rate": 9.196854760872494e-06, "loss": 0.4785, "step": 13027 }, { "epoch": 0.597861502455142, "grad_norm": 0.5044330358505249, "learning_rate": 9.196721481728803e-06, "loss": 0.477, "step": 13028 }, { "epoch": 0.5979073929603965, "grad_norm": 0.4946177005767822, "learning_rate": 9.196588192493321e-06, "loss": 0.4441, "step": 13029 }, { "epoch": 0.5979532834656509, "grad_norm": 0.5054530501365662, "learning_rate": 9.19645489316637e-06, "loss": 0.5216, "step": 13030 }, { "epoch": 0.5979991739709054, "grad_norm": 0.4467655122280121, "learning_rate": 9.19632158374827e-06, "loss": 0.3715, "step": 13031 }, { "epoch": 0.5980450644761599, "grad_norm": 0.44318538904190063, "learning_rate": 9.196188264239338e-06, "loss": 0.324, "step": 13032 }, { "epoch": 0.5980909549814143, "grad_norm": 0.48898983001708984, "learning_rate": 9.1960549346399e-06, "loss": 0.395, "step": 13033 }, { "epoch": 0.5981368454866688, "grad_norm": 0.45287731289863586, "learning_rate": 9.195921594950274e-06, "loss": 0.3545, "step": 13034 }, { "epoch": 0.5981827359919233, "grad_norm": 0.460599809885025, "learning_rate": 9.195788245170781e-06, "loss": 0.4213, "step": 13035 }, { "epoch": 0.5982286264971778, "grad_norm": 0.5110242962837219, "learning_rate": 9.19565488530174e-06, "loss": 0.4765, "step": 13036 }, { "epoch": 0.5982745170024322, "grad_norm": 0.4277670085430145, "learning_rate": 9.195521515343472e-06, "loss": 0.3109, "step": 13037 }, { "epoch": 0.5983204075076867, "grad_norm": 0.46109262108802795, "learning_rate": 9.195388135296302e-06, "loss": 0.3857, "step": 13038 }, { "epoch": 0.5983662980129412, "grad_norm": 0.768392026424408, "learning_rate": 9.195254745160546e-06, "loss": 0.4387, "step": 13039 }, { "epoch": 0.5984121885181956, "grad_norm": 0.5008258819580078, "learning_rate": 9.195121344936528e-06, "loss": 0.5084, "step": 13040 }, { "epoch": 0.59845807902345, "grad_norm": 0.45953384041786194, "learning_rate": 9.194987934624567e-06, "loss": 0.418, "step": 13041 }, { "epoch": 0.5985039695287045, "grad_norm": 0.477068156003952, "learning_rate": 9.194854514224983e-06, "loss": 0.3709, "step": 13042 }, { "epoch": 0.5985498600339589, "grad_norm": 0.5379118323326111, "learning_rate": 9.194721083738097e-06, "loss": 0.3925, "step": 13043 }, { "epoch": 0.5985957505392134, "grad_norm": 0.45522406697273254, "learning_rate": 9.194587643164232e-06, "loss": 0.3671, "step": 13044 }, { "epoch": 0.5986416410444679, "grad_norm": 0.46020111441612244, "learning_rate": 9.194454192503708e-06, "loss": 0.433, "step": 13045 }, { "epoch": 0.5986875315497223, "grad_norm": 0.4651593863964081, "learning_rate": 9.194320731756844e-06, "loss": 0.4034, "step": 13046 }, { "epoch": 0.5987334220549768, "grad_norm": 0.532458484172821, "learning_rate": 9.194187260923963e-06, "loss": 0.4954, "step": 13047 }, { "epoch": 0.5987793125602313, "grad_norm": 0.4655955135822296, "learning_rate": 9.194053780005386e-06, "loss": 0.3916, "step": 13048 }, { "epoch": 0.5988252030654857, "grad_norm": 0.46037203073501587, "learning_rate": 9.193920289001433e-06, "loss": 0.3749, "step": 13049 }, { "epoch": 0.5988710935707402, "grad_norm": 0.42698484659194946, "learning_rate": 9.193786787912426e-06, "loss": 0.3004, "step": 13050 }, { "epoch": 0.5989169840759947, "grad_norm": 0.479028582572937, "learning_rate": 9.193653276738684e-06, "loss": 0.4179, "step": 13051 }, { "epoch": 0.5989628745812492, "grad_norm": 0.42838889360427856, "learning_rate": 9.193519755480528e-06, "loss": 0.3277, "step": 13052 }, { "epoch": 0.5990087650865036, "grad_norm": 0.4751914143562317, "learning_rate": 9.193386224138285e-06, "loss": 0.4125, "step": 13053 }, { "epoch": 0.5990546555917581, "grad_norm": 0.4815027117729187, "learning_rate": 9.193252682712268e-06, "loss": 0.4112, "step": 13054 }, { "epoch": 0.5991005460970126, "grad_norm": 0.458638072013855, "learning_rate": 9.193119131202804e-06, "loss": 0.3683, "step": 13055 }, { "epoch": 0.599146436602267, "grad_norm": 0.4553982615470886, "learning_rate": 9.19298556961021e-06, "loss": 0.3615, "step": 13056 }, { "epoch": 0.5991923271075215, "grad_norm": 0.48458924889564514, "learning_rate": 9.19285199793481e-06, "loss": 0.4219, "step": 13057 }, { "epoch": 0.599238217612776, "grad_norm": 0.4356871545314789, "learning_rate": 9.192718416176923e-06, "loss": 0.3365, "step": 13058 }, { "epoch": 0.5992841081180303, "grad_norm": 0.4829314351081848, "learning_rate": 9.192584824336873e-06, "loss": 0.4822, "step": 13059 }, { "epoch": 0.5993299986232848, "grad_norm": 0.44935545325279236, "learning_rate": 9.19245122241498e-06, "loss": 0.4036, "step": 13060 }, { "epoch": 0.5993758891285393, "grad_norm": 0.4538896083831787, "learning_rate": 9.192317610411563e-06, "loss": 0.4322, "step": 13061 }, { "epoch": 0.5994217796337937, "grad_norm": 0.4422077536582947, "learning_rate": 9.192183988326945e-06, "loss": 0.3865, "step": 13062 }, { "epoch": 0.5994676701390482, "grad_norm": 0.4704171121120453, "learning_rate": 9.19205035616145e-06, "loss": 0.4327, "step": 13063 }, { "epoch": 0.5995135606443027, "grad_norm": 8.319628715515137, "learning_rate": 9.191916713915397e-06, "loss": 0.3921, "step": 13064 }, { "epoch": 0.5995594511495571, "grad_norm": 0.4551405906677246, "learning_rate": 9.191783061589106e-06, "loss": 0.3639, "step": 13065 }, { "epoch": 0.5996053416548116, "grad_norm": 0.45395177602767944, "learning_rate": 9.1916493991829e-06, "loss": 0.3551, "step": 13066 }, { "epoch": 0.5996512321600661, "grad_norm": 0.5292371511459351, "learning_rate": 9.1915157266971e-06, "loss": 0.3898, "step": 13067 }, { "epoch": 0.5996971226653205, "grad_norm": 0.49894002079963684, "learning_rate": 9.191382044132026e-06, "loss": 0.4119, "step": 13068 }, { "epoch": 0.599743013170575, "grad_norm": 0.44736912846565247, "learning_rate": 9.191248351488004e-06, "loss": 0.3723, "step": 13069 }, { "epoch": 0.5997889036758295, "grad_norm": 0.4497484862804413, "learning_rate": 9.19111464876535e-06, "loss": 0.3748, "step": 13070 }, { "epoch": 0.599834794181084, "grad_norm": 0.4900120794773102, "learning_rate": 9.19098093596439e-06, "loss": 0.3982, "step": 13071 }, { "epoch": 0.5998806846863384, "grad_norm": 0.44614365696907043, "learning_rate": 9.190847213085443e-06, "loss": 0.3762, "step": 13072 }, { "epoch": 0.5999265751915929, "grad_norm": 0.48492252826690674, "learning_rate": 9.190713480128833e-06, "loss": 0.3949, "step": 13073 }, { "epoch": 0.5999724656968474, "grad_norm": 0.42985138297080994, "learning_rate": 9.190579737094876e-06, "loss": 0.3741, "step": 13074 }, { "epoch": 0.6000183562021018, "grad_norm": 0.5253840088844299, "learning_rate": 9.1904459839839e-06, "loss": 0.5328, "step": 13075 }, { "epoch": 0.6000642467073563, "grad_norm": 0.5018855929374695, "learning_rate": 9.190312220796223e-06, "loss": 0.4682, "step": 13076 }, { "epoch": 0.6001101372126108, "grad_norm": 0.4584819972515106, "learning_rate": 9.190178447532167e-06, "loss": 0.3859, "step": 13077 }, { "epoch": 0.6001560277178651, "grad_norm": 0.4401921331882477, "learning_rate": 9.190044664192055e-06, "loss": 0.3887, "step": 13078 }, { "epoch": 0.6002019182231196, "grad_norm": 0.44682690501213074, "learning_rate": 9.189910870776206e-06, "loss": 0.3979, "step": 13079 }, { "epoch": 0.6002478087283741, "grad_norm": 0.46294525265693665, "learning_rate": 9.189777067284947e-06, "loss": 0.4099, "step": 13080 }, { "epoch": 0.6002936992336285, "grad_norm": 0.46135830879211426, "learning_rate": 9.189643253718594e-06, "loss": 0.4875, "step": 13081 }, { "epoch": 0.600339589738883, "grad_norm": 0.45244288444519043, "learning_rate": 9.189509430077473e-06, "loss": 0.4327, "step": 13082 }, { "epoch": 0.6003854802441375, "grad_norm": 0.4156610071659088, "learning_rate": 9.189375596361902e-06, "loss": 0.3175, "step": 13083 }, { "epoch": 0.6004313707493919, "grad_norm": 0.44481128454208374, "learning_rate": 9.189241752572206e-06, "loss": 0.3866, "step": 13084 }, { "epoch": 0.6004772612546464, "grad_norm": 0.516258716583252, "learning_rate": 9.189107898708707e-06, "loss": 0.3719, "step": 13085 }, { "epoch": 0.6005231517599009, "grad_norm": 0.49316585063934326, "learning_rate": 9.188974034771724e-06, "loss": 0.4779, "step": 13086 }, { "epoch": 0.6005690422651553, "grad_norm": 0.4243500530719757, "learning_rate": 9.188840160761583e-06, "loss": 0.4062, "step": 13087 }, { "epoch": 0.6006149327704098, "grad_norm": 0.46520867943763733, "learning_rate": 9.188706276678601e-06, "loss": 0.3863, "step": 13088 }, { "epoch": 0.6006608232756643, "grad_norm": 0.4622286260128021, "learning_rate": 9.188572382523102e-06, "loss": 0.405, "step": 13089 }, { "epoch": 0.6007067137809188, "grad_norm": 0.5205509662628174, "learning_rate": 9.18843847829541e-06, "loss": 0.4794, "step": 13090 }, { "epoch": 0.6007526042861732, "grad_norm": 0.4583280086517334, "learning_rate": 9.188304563995845e-06, "loss": 0.433, "step": 13091 }, { "epoch": 0.6007984947914277, "grad_norm": 0.43881797790527344, "learning_rate": 9.188170639624729e-06, "loss": 0.3604, "step": 13092 }, { "epoch": 0.6008443852966822, "grad_norm": 0.45393168926239014, "learning_rate": 9.188036705182384e-06, "loss": 0.382, "step": 13093 }, { "epoch": 0.6008902758019365, "grad_norm": 0.4754018485546112, "learning_rate": 9.187902760669134e-06, "loss": 0.4136, "step": 13094 }, { "epoch": 0.600936166307191, "grad_norm": 0.4563855826854706, "learning_rate": 9.187768806085297e-06, "loss": 0.4001, "step": 13095 }, { "epoch": 0.6009820568124455, "grad_norm": 0.5106279850006104, "learning_rate": 9.187634841431201e-06, "loss": 0.4664, "step": 13096 }, { "epoch": 0.6010279473176999, "grad_norm": 0.4618285596370697, "learning_rate": 9.187500866707162e-06, "loss": 0.4364, "step": 13097 }, { "epoch": 0.6010738378229544, "grad_norm": 0.4490296244621277, "learning_rate": 9.187366881913506e-06, "loss": 0.3727, "step": 13098 }, { "epoch": 0.6011197283282089, "grad_norm": 0.4416576027870178, "learning_rate": 9.187232887050556e-06, "loss": 0.371, "step": 13099 }, { "epoch": 0.6011656188334633, "grad_norm": 0.4476524889469147, "learning_rate": 9.18709888211863e-06, "loss": 0.4184, "step": 13100 }, { "epoch": 0.6012115093387178, "grad_norm": 0.4171536862850189, "learning_rate": 9.186964867118054e-06, "loss": 0.3235, "step": 13101 }, { "epoch": 0.6012573998439723, "grad_norm": 0.45746901631355286, "learning_rate": 9.186830842049148e-06, "loss": 0.3861, "step": 13102 }, { "epoch": 0.6013032903492267, "grad_norm": 0.46083250641822815, "learning_rate": 9.186696806912236e-06, "loss": 0.3978, "step": 13103 }, { "epoch": 0.6013491808544812, "grad_norm": 0.5189059972763062, "learning_rate": 9.18656276170764e-06, "loss": 0.47, "step": 13104 }, { "epoch": 0.6013950713597357, "grad_norm": 0.44035518169403076, "learning_rate": 9.186428706435682e-06, "loss": 0.364, "step": 13105 }, { "epoch": 0.6014409618649902, "grad_norm": 0.43447956442832947, "learning_rate": 9.186294641096682e-06, "loss": 0.4012, "step": 13106 }, { "epoch": 0.6014868523702446, "grad_norm": 0.47762805223464966, "learning_rate": 9.186160565690967e-06, "loss": 0.4402, "step": 13107 }, { "epoch": 0.6015327428754991, "grad_norm": 0.4448474645614624, "learning_rate": 9.186026480218854e-06, "loss": 0.4035, "step": 13108 }, { "epoch": 0.6015786333807536, "grad_norm": 0.46709585189819336, "learning_rate": 9.185892384680671e-06, "loss": 0.4616, "step": 13109 }, { "epoch": 0.601624523886008, "grad_norm": 0.3941563367843628, "learning_rate": 9.185758279076737e-06, "loss": 0.3021, "step": 13110 }, { "epoch": 0.6016704143912625, "grad_norm": 0.48306068778038025, "learning_rate": 9.185624163407377e-06, "loss": 0.4861, "step": 13111 }, { "epoch": 0.601716304896517, "grad_norm": 0.42975151538848877, "learning_rate": 9.18549003767291e-06, "loss": 0.3632, "step": 13112 }, { "epoch": 0.6017621954017713, "grad_norm": 0.44125816226005554, "learning_rate": 9.185355901873662e-06, "loss": 0.3278, "step": 13113 }, { "epoch": 0.6018080859070258, "grad_norm": 0.5092422962188721, "learning_rate": 9.185221756009954e-06, "loss": 0.527, "step": 13114 }, { "epoch": 0.6018539764122803, "grad_norm": 0.4492291808128357, "learning_rate": 9.185087600082107e-06, "loss": 0.3573, "step": 13115 }, { "epoch": 0.6018998669175347, "grad_norm": 0.4542471766471863, "learning_rate": 9.184953434090446e-06, "loss": 0.3562, "step": 13116 }, { "epoch": 0.6019457574227892, "grad_norm": 0.4844908118247986, "learning_rate": 9.184819258035293e-06, "loss": 0.4132, "step": 13117 }, { "epoch": 0.6019916479280437, "grad_norm": 0.44149231910705566, "learning_rate": 9.18468507191697e-06, "loss": 0.3448, "step": 13118 }, { "epoch": 0.6020375384332981, "grad_norm": 0.46837496757507324, "learning_rate": 9.1845508757358e-06, "loss": 0.4653, "step": 13119 }, { "epoch": 0.6020834289385526, "grad_norm": 0.4606911838054657, "learning_rate": 9.184416669492107e-06, "loss": 0.3354, "step": 13120 }, { "epoch": 0.6021293194438071, "grad_norm": 0.43521472811698914, "learning_rate": 9.18428245318621e-06, "loss": 0.3792, "step": 13121 }, { "epoch": 0.6021752099490615, "grad_norm": 0.4705839157104492, "learning_rate": 9.184148226818437e-06, "loss": 0.4403, "step": 13122 }, { "epoch": 0.602221100454316, "grad_norm": 0.4833974838256836, "learning_rate": 9.184013990389107e-06, "loss": 0.3946, "step": 13123 }, { "epoch": 0.6022669909595705, "grad_norm": 0.45704784989356995, "learning_rate": 9.183879743898544e-06, "loss": 0.411, "step": 13124 }, { "epoch": 0.602312881464825, "grad_norm": 0.4900606870651245, "learning_rate": 9.18374548734707e-06, "loss": 0.4437, "step": 13125 }, { "epoch": 0.6023587719700794, "grad_norm": 0.3757486045360565, "learning_rate": 9.183611220735009e-06, "loss": 0.2631, "step": 13126 }, { "epoch": 0.6024046624753339, "grad_norm": 0.4267931878566742, "learning_rate": 9.183476944062684e-06, "loss": 0.3302, "step": 13127 }, { "epoch": 0.6024505529805884, "grad_norm": 0.46986663341522217, "learning_rate": 9.183342657330416e-06, "loss": 0.471, "step": 13128 }, { "epoch": 0.6024964434858427, "grad_norm": 0.4305763244628906, "learning_rate": 9.183208360538531e-06, "loss": 0.3432, "step": 13129 }, { "epoch": 0.6025423339910972, "grad_norm": 0.9164122939109802, "learning_rate": 9.183074053687348e-06, "loss": 0.441, "step": 13130 }, { "epoch": 0.6025882244963517, "grad_norm": 0.4516807794570923, "learning_rate": 9.182939736777194e-06, "loss": 0.3938, "step": 13131 }, { "epoch": 0.6026341150016061, "grad_norm": 0.45371124148368835, "learning_rate": 9.18280540980839e-06, "loss": 0.4188, "step": 13132 }, { "epoch": 0.6026800055068606, "grad_norm": 0.4521670341491699, "learning_rate": 9.182671072781257e-06, "loss": 0.4084, "step": 13133 }, { "epoch": 0.6027258960121151, "grad_norm": 0.4752746820449829, "learning_rate": 9.182536725696122e-06, "loss": 0.4118, "step": 13134 }, { "epoch": 0.6027717865173695, "grad_norm": 0.4611586332321167, "learning_rate": 9.182402368553308e-06, "loss": 0.4334, "step": 13135 }, { "epoch": 0.602817677022624, "grad_norm": 0.49664604663848877, "learning_rate": 9.182268001353133e-06, "loss": 0.5023, "step": 13136 }, { "epoch": 0.6028635675278785, "grad_norm": 0.5093265771865845, "learning_rate": 9.182133624095925e-06, "loss": 0.4809, "step": 13137 }, { "epoch": 0.6029094580331329, "grad_norm": 0.4916189908981323, "learning_rate": 9.181999236782004e-06, "loss": 0.4513, "step": 13138 }, { "epoch": 0.6029553485383874, "grad_norm": 0.4603444039821625, "learning_rate": 9.181864839411696e-06, "loss": 0.4259, "step": 13139 }, { "epoch": 0.6030012390436419, "grad_norm": 0.4792085587978363, "learning_rate": 9.181730431985323e-06, "loss": 0.4014, "step": 13140 }, { "epoch": 0.6030471295488964, "grad_norm": 0.47428810596466064, "learning_rate": 9.18159601450321e-06, "loss": 0.3854, "step": 13141 }, { "epoch": 0.6030930200541508, "grad_norm": 0.4688205420970917, "learning_rate": 9.181461586965674e-06, "loss": 0.3986, "step": 13142 }, { "epoch": 0.6031389105594053, "grad_norm": 0.4682396352291107, "learning_rate": 9.181327149373046e-06, "loss": 0.3962, "step": 13143 }, { "epoch": 0.6031848010646598, "grad_norm": 0.46324437856674194, "learning_rate": 9.181192701725644e-06, "loss": 0.3916, "step": 13144 }, { "epoch": 0.6032306915699142, "grad_norm": 0.45583561062812805, "learning_rate": 9.181058244023795e-06, "loss": 0.3465, "step": 13145 }, { "epoch": 0.6032765820751687, "grad_norm": 0.4615073502063751, "learning_rate": 9.180923776267819e-06, "loss": 0.3766, "step": 13146 }, { "epoch": 0.6033224725804232, "grad_norm": 0.422166645526886, "learning_rate": 9.180789298458041e-06, "loss": 0.3244, "step": 13147 }, { "epoch": 0.6033683630856775, "grad_norm": 0.49139878153800964, "learning_rate": 9.180654810594785e-06, "loss": 0.3963, "step": 13148 }, { "epoch": 0.603414253590932, "grad_norm": 0.47618353366851807, "learning_rate": 9.180520312678373e-06, "loss": 0.4462, "step": 13149 }, { "epoch": 0.6034601440961865, "grad_norm": 0.4901646375656128, "learning_rate": 9.180385804709128e-06, "loss": 0.4548, "step": 13150 }, { "epoch": 0.6035060346014409, "grad_norm": 0.448930025100708, "learning_rate": 9.180251286687376e-06, "loss": 0.3536, "step": 13151 }, { "epoch": 0.6035519251066954, "grad_norm": 0.48926153779029846, "learning_rate": 9.180116758613439e-06, "loss": 0.3892, "step": 13152 }, { "epoch": 0.6035978156119499, "grad_norm": 0.4527420103549957, "learning_rate": 9.179982220487639e-06, "loss": 0.4013, "step": 13153 }, { "epoch": 0.6036437061172043, "grad_norm": 0.4529235064983368, "learning_rate": 9.179847672310303e-06, "loss": 0.3397, "step": 13154 }, { "epoch": 0.6036895966224588, "grad_norm": 0.49350103735923767, "learning_rate": 9.179713114081752e-06, "loss": 0.4067, "step": 13155 }, { "epoch": 0.6037354871277133, "grad_norm": 0.44399493932724, "learning_rate": 9.17957854580231e-06, "loss": 0.3685, "step": 13156 }, { "epoch": 0.6037813776329677, "grad_norm": 0.8354093432426453, "learning_rate": 9.1794439674723e-06, "loss": 0.4493, "step": 13157 }, { "epoch": 0.6038272681382222, "grad_norm": 0.501746416091919, "learning_rate": 9.179309379092047e-06, "loss": 0.4754, "step": 13158 }, { "epoch": 0.6038731586434767, "grad_norm": 0.443144291639328, "learning_rate": 9.179174780661874e-06, "loss": 0.3863, "step": 13159 }, { "epoch": 0.6039190491487312, "grad_norm": 0.5173042416572571, "learning_rate": 9.179040172182104e-06, "loss": 0.4403, "step": 13160 }, { "epoch": 0.6039649396539856, "grad_norm": 0.455334335565567, "learning_rate": 9.178905553653062e-06, "loss": 0.3632, "step": 13161 }, { "epoch": 0.6040108301592401, "grad_norm": 0.4414371848106384, "learning_rate": 9.17877092507507e-06, "loss": 0.344, "step": 13162 }, { "epoch": 0.6040567206644946, "grad_norm": 0.4432876706123352, "learning_rate": 9.178636286448454e-06, "loss": 0.3586, "step": 13163 }, { "epoch": 0.604102611169749, "grad_norm": 0.4676642119884491, "learning_rate": 9.178501637773535e-06, "loss": 0.4242, "step": 13164 }, { "epoch": 0.6041485016750034, "grad_norm": 0.4625203311443329, "learning_rate": 9.17836697905064e-06, "loss": 0.4118, "step": 13165 }, { "epoch": 0.6041943921802579, "grad_norm": 0.430313378572464, "learning_rate": 9.17823231028009e-06, "loss": 0.3686, "step": 13166 }, { "epoch": 0.6042402826855123, "grad_norm": 0.46006646752357483, "learning_rate": 9.178097631462213e-06, "loss": 0.3957, "step": 13167 }, { "epoch": 0.6042861731907668, "grad_norm": 0.4284728765487671, "learning_rate": 9.177962942597327e-06, "loss": 0.3454, "step": 13168 }, { "epoch": 0.6043320636960213, "grad_norm": 0.4706713855266571, "learning_rate": 9.177828243685758e-06, "loss": 0.4013, "step": 13169 }, { "epoch": 0.6043779542012757, "grad_norm": 0.4403558671474457, "learning_rate": 9.177693534727832e-06, "loss": 0.3451, "step": 13170 }, { "epoch": 0.6044238447065302, "grad_norm": 0.48263901472091675, "learning_rate": 9.177558815723872e-06, "loss": 0.47, "step": 13171 }, { "epoch": 0.6044697352117847, "grad_norm": 0.45973673462867737, "learning_rate": 9.1774240866742e-06, "loss": 0.4174, "step": 13172 }, { "epoch": 0.6045156257170391, "grad_norm": 0.4744017720222473, "learning_rate": 9.177289347579144e-06, "loss": 0.3564, "step": 13173 }, { "epoch": 0.6045615162222936, "grad_norm": 0.45266127586364746, "learning_rate": 9.177154598439021e-06, "loss": 0.358, "step": 13174 }, { "epoch": 0.6046074067275481, "grad_norm": 0.4903585910797119, "learning_rate": 9.177019839254165e-06, "loss": 0.4423, "step": 13175 }, { "epoch": 0.6046532972328025, "grad_norm": 0.45294323563575745, "learning_rate": 9.17688507002489e-06, "loss": 0.3936, "step": 13176 }, { "epoch": 0.604699187738057, "grad_norm": 0.49111440777778625, "learning_rate": 9.176750290751528e-06, "loss": 0.4366, "step": 13177 }, { "epoch": 0.6047450782433115, "grad_norm": 0.5553954243659973, "learning_rate": 9.176615501434398e-06, "loss": 0.4649, "step": 13178 }, { "epoch": 0.604790968748566, "grad_norm": 0.4900134801864624, "learning_rate": 9.176480702073827e-06, "loss": 0.5209, "step": 13179 }, { "epoch": 0.6048368592538204, "grad_norm": 0.4586549401283264, "learning_rate": 9.176345892670137e-06, "loss": 0.4335, "step": 13180 }, { "epoch": 0.6048827497590749, "grad_norm": 0.44659167528152466, "learning_rate": 9.176211073223652e-06, "loss": 0.3732, "step": 13181 }, { "epoch": 0.6049286402643294, "grad_norm": 0.45426857471466064, "learning_rate": 9.176076243734702e-06, "loss": 0.3362, "step": 13182 }, { "epoch": 0.6049745307695837, "grad_norm": 0.44608205556869507, "learning_rate": 9.175941404203603e-06, "loss": 0.3709, "step": 13183 }, { "epoch": 0.6050204212748382, "grad_norm": 0.5027086138725281, "learning_rate": 9.175806554630685e-06, "loss": 0.4305, "step": 13184 }, { "epoch": 0.6050663117800927, "grad_norm": 0.47224509716033936, "learning_rate": 9.175671695016268e-06, "loss": 0.4154, "step": 13185 }, { "epoch": 0.6051122022853471, "grad_norm": 0.49410581588745117, "learning_rate": 9.175536825360678e-06, "loss": 0.5186, "step": 13186 }, { "epoch": 0.6051580927906016, "grad_norm": 0.46911725401878357, "learning_rate": 9.175401945664242e-06, "loss": 0.3999, "step": 13187 }, { "epoch": 0.6052039832958561, "grad_norm": 0.4600519835948944, "learning_rate": 9.17526705592728e-06, "loss": 0.3712, "step": 13188 }, { "epoch": 0.6052498738011105, "grad_norm": 0.46647652983665466, "learning_rate": 9.17513215615012e-06, "loss": 0.4224, "step": 13189 }, { "epoch": 0.605295764306365, "grad_norm": 0.44988003373146057, "learning_rate": 9.174997246333084e-06, "loss": 0.3621, "step": 13190 }, { "epoch": 0.6053416548116195, "grad_norm": 0.44794827699661255, "learning_rate": 9.174862326476498e-06, "loss": 0.442, "step": 13191 }, { "epoch": 0.6053875453168739, "grad_norm": 0.5060514807701111, "learning_rate": 9.174727396580685e-06, "loss": 0.5335, "step": 13192 }, { "epoch": 0.6054334358221284, "grad_norm": 0.43283554911613464, "learning_rate": 9.17459245664597e-06, "loss": 0.3723, "step": 13193 }, { "epoch": 0.6054793263273829, "grad_norm": 0.45293816924095154, "learning_rate": 9.174457506672679e-06, "loss": 0.4109, "step": 13194 }, { "epoch": 0.6055252168326374, "grad_norm": 0.4975714683532715, "learning_rate": 9.174322546661135e-06, "loss": 0.5223, "step": 13195 }, { "epoch": 0.6055711073378918, "grad_norm": 0.4433005750179291, "learning_rate": 9.174187576611662e-06, "loss": 0.3349, "step": 13196 }, { "epoch": 0.6056169978431463, "grad_norm": 0.4572184383869171, "learning_rate": 9.174052596524586e-06, "loss": 0.4109, "step": 13197 }, { "epoch": 0.6056628883484008, "grad_norm": 0.4908665120601654, "learning_rate": 9.17391760640023e-06, "loss": 0.423, "step": 13198 }, { "epoch": 0.6057087788536551, "grad_norm": 0.5083826184272766, "learning_rate": 9.17378260623892e-06, "loss": 0.4312, "step": 13199 }, { "epoch": 0.6057546693589096, "grad_norm": 0.46048974990844727, "learning_rate": 9.17364759604098e-06, "loss": 0.413, "step": 13200 }, { "epoch": 0.6058005598641641, "grad_norm": 0.45665237307548523, "learning_rate": 9.173512575806735e-06, "loss": 0.3945, "step": 13201 }, { "epoch": 0.6058464503694185, "grad_norm": 0.4536088705062866, "learning_rate": 9.17337754553651e-06, "loss": 0.4003, "step": 13202 }, { "epoch": 0.605892340874673, "grad_norm": 0.4466998279094696, "learning_rate": 9.173242505230628e-06, "loss": 0.3578, "step": 13203 }, { "epoch": 0.6059382313799275, "grad_norm": 0.4961372911930084, "learning_rate": 9.173107454889415e-06, "loss": 0.423, "step": 13204 }, { "epoch": 0.6059841218851819, "grad_norm": 0.5069820284843445, "learning_rate": 9.172972394513195e-06, "loss": 0.3985, "step": 13205 }, { "epoch": 0.6060300123904364, "grad_norm": 0.4713674783706665, "learning_rate": 9.172837324102294e-06, "loss": 0.3787, "step": 13206 }, { "epoch": 0.6060759028956909, "grad_norm": 0.4359363913536072, "learning_rate": 9.172702243657038e-06, "loss": 0.3892, "step": 13207 }, { "epoch": 0.6061217934009453, "grad_norm": 0.44410932064056396, "learning_rate": 9.172567153177749e-06, "loss": 0.3682, "step": 13208 }, { "epoch": 0.6061676839061998, "grad_norm": 0.4700978696346283, "learning_rate": 9.172432052664753e-06, "loss": 0.4116, "step": 13209 }, { "epoch": 0.6062135744114543, "grad_norm": 0.5533850789070129, "learning_rate": 9.172296942118373e-06, "loss": 0.4899, "step": 13210 }, { "epoch": 0.6062594649167087, "grad_norm": 0.543442964553833, "learning_rate": 9.172161821538938e-06, "loss": 0.4912, "step": 13211 }, { "epoch": 0.6063053554219632, "grad_norm": 0.43508365750312805, "learning_rate": 9.172026690926768e-06, "loss": 0.3445, "step": 13212 }, { "epoch": 0.6063512459272177, "grad_norm": 0.4867077171802521, "learning_rate": 9.171891550282193e-06, "loss": 0.452, "step": 13213 }, { "epoch": 0.6063971364324722, "grad_norm": 0.4436841607093811, "learning_rate": 9.171756399605535e-06, "loss": 0.3598, "step": 13214 }, { "epoch": 0.6064430269377266, "grad_norm": 0.45380640029907227, "learning_rate": 9.171621238897118e-06, "loss": 0.3598, "step": 13215 }, { "epoch": 0.6064889174429811, "grad_norm": 0.46638739109039307, "learning_rate": 9.17148606815727e-06, "loss": 0.4026, "step": 13216 }, { "epoch": 0.6065348079482356, "grad_norm": 0.45554307103157043, "learning_rate": 9.171350887386315e-06, "loss": 0.4218, "step": 13217 }, { "epoch": 0.6065806984534899, "grad_norm": 0.4145069122314453, "learning_rate": 9.171215696584577e-06, "loss": 0.3313, "step": 13218 }, { "epoch": 0.6066265889587444, "grad_norm": 0.48824459314346313, "learning_rate": 9.17108049575238e-06, "loss": 0.4952, "step": 13219 }, { "epoch": 0.6066724794639989, "grad_norm": 0.4915045201778412, "learning_rate": 9.170945284890054e-06, "loss": 0.4724, "step": 13220 }, { "epoch": 0.6067183699692533, "grad_norm": 0.4418303668498993, "learning_rate": 9.17081006399792e-06, "loss": 0.3506, "step": 13221 }, { "epoch": 0.6067642604745078, "grad_norm": 0.6353857517242432, "learning_rate": 9.170674833076301e-06, "loss": 0.325, "step": 13222 }, { "epoch": 0.6068101509797623, "grad_norm": 0.4405895471572876, "learning_rate": 9.170539592125529e-06, "loss": 0.3936, "step": 13223 }, { "epoch": 0.6068560414850167, "grad_norm": 0.47357064485549927, "learning_rate": 9.170404341145925e-06, "loss": 0.4479, "step": 13224 }, { "epoch": 0.6069019319902712, "grad_norm": 0.4581132233142853, "learning_rate": 9.170269080137814e-06, "loss": 0.367, "step": 13225 }, { "epoch": 0.6069478224955257, "grad_norm": 0.5103758573532104, "learning_rate": 9.170133809101522e-06, "loss": 0.3908, "step": 13226 }, { "epoch": 0.6069937130007801, "grad_norm": 0.48814964294433594, "learning_rate": 9.169998528037375e-06, "loss": 0.3826, "step": 13227 }, { "epoch": 0.6070396035060346, "grad_norm": 0.45264437794685364, "learning_rate": 9.169863236945697e-06, "loss": 0.3538, "step": 13228 }, { "epoch": 0.6070854940112891, "grad_norm": 0.47573983669281006, "learning_rate": 9.169727935826813e-06, "loss": 0.3834, "step": 13229 }, { "epoch": 0.6071313845165436, "grad_norm": 0.4457363486289978, "learning_rate": 9.169592624681052e-06, "loss": 0.4592, "step": 13230 }, { "epoch": 0.607177275021798, "grad_norm": 0.44521382451057434, "learning_rate": 9.169457303508734e-06, "loss": 0.4084, "step": 13231 }, { "epoch": 0.6072231655270525, "grad_norm": 0.44730156660079956, "learning_rate": 9.169321972310189e-06, "loss": 0.3678, "step": 13232 }, { "epoch": 0.607269056032307, "grad_norm": 0.4619680643081665, "learning_rate": 9.169186631085738e-06, "loss": 0.4317, "step": 13233 }, { "epoch": 0.6073149465375614, "grad_norm": 0.47980552911758423, "learning_rate": 9.169051279835712e-06, "loss": 0.4293, "step": 13234 }, { "epoch": 0.6073608370428158, "grad_norm": 0.4269371032714844, "learning_rate": 9.168915918560433e-06, "loss": 0.3021, "step": 13235 }, { "epoch": 0.6074067275480703, "grad_norm": 0.44181379675865173, "learning_rate": 9.168780547260225e-06, "loss": 0.3778, "step": 13236 }, { "epoch": 0.6074526180533247, "grad_norm": 0.4818260967731476, "learning_rate": 9.168645165935417e-06, "loss": 0.387, "step": 13237 }, { "epoch": 0.6074985085585792, "grad_norm": 0.45843955874443054, "learning_rate": 9.168509774586331e-06, "loss": 0.4406, "step": 13238 }, { "epoch": 0.6075443990638337, "grad_norm": 0.4779668152332306, "learning_rate": 9.168374373213298e-06, "loss": 0.4188, "step": 13239 }, { "epoch": 0.6075902895690881, "grad_norm": 0.45344918966293335, "learning_rate": 9.168238961816637e-06, "loss": 0.3955, "step": 13240 }, { "epoch": 0.6076361800743426, "grad_norm": 0.43827101588249207, "learning_rate": 9.168103540396681e-06, "loss": 0.3971, "step": 13241 }, { "epoch": 0.6076820705795971, "grad_norm": 0.464828759431839, "learning_rate": 9.167968108953748e-06, "loss": 0.3808, "step": 13242 }, { "epoch": 0.6077279610848515, "grad_norm": 0.4554233253002167, "learning_rate": 9.167832667488168e-06, "loss": 0.3729, "step": 13243 }, { "epoch": 0.607773851590106, "grad_norm": 0.453374445438385, "learning_rate": 9.167697216000265e-06, "loss": 0.3919, "step": 13244 }, { "epoch": 0.6078197420953605, "grad_norm": 0.46702778339385986, "learning_rate": 9.167561754490366e-06, "loss": 0.4137, "step": 13245 }, { "epoch": 0.6078656326006149, "grad_norm": 0.4790914058685303, "learning_rate": 9.167426282958797e-06, "loss": 0.4441, "step": 13246 }, { "epoch": 0.6079115231058694, "grad_norm": 0.4316692650318146, "learning_rate": 9.167290801405883e-06, "loss": 0.3765, "step": 13247 }, { "epoch": 0.6079574136111239, "grad_norm": 0.45797622203826904, "learning_rate": 9.16715530983195e-06, "loss": 0.4203, "step": 13248 }, { "epoch": 0.6080033041163784, "grad_norm": 0.442089319229126, "learning_rate": 9.167019808237323e-06, "loss": 0.361, "step": 13249 }, { "epoch": 0.6080491946216328, "grad_norm": 0.4688459634780884, "learning_rate": 9.166884296622329e-06, "loss": 0.3473, "step": 13250 }, { "epoch": 0.6080950851268873, "grad_norm": 0.4200957119464874, "learning_rate": 9.166748774987292e-06, "loss": 0.3012, "step": 13251 }, { "epoch": 0.6081409756321418, "grad_norm": 0.43397995829582214, "learning_rate": 9.16661324333254e-06, "loss": 0.3807, "step": 13252 }, { "epoch": 0.6081868661373961, "grad_norm": 0.5297274589538574, "learning_rate": 9.1664777016584e-06, "loss": 0.5295, "step": 13253 }, { "epoch": 0.6082327566426506, "grad_norm": 0.4836972653865814, "learning_rate": 9.166342149965195e-06, "loss": 0.3667, "step": 13254 }, { "epoch": 0.6082786471479051, "grad_norm": 0.4567781388759613, "learning_rate": 9.16620658825325e-06, "loss": 0.3751, "step": 13255 }, { "epoch": 0.6083245376531595, "grad_norm": 0.4763796329498291, "learning_rate": 9.166071016522895e-06, "loss": 0.4774, "step": 13256 }, { "epoch": 0.608370428158414, "grad_norm": 0.4118577241897583, "learning_rate": 9.165935434774455e-06, "loss": 0.3197, "step": 13257 }, { "epoch": 0.6084163186636685, "grad_norm": 0.43617308139801025, "learning_rate": 9.165799843008253e-06, "loss": 0.3347, "step": 13258 }, { "epoch": 0.6084622091689229, "grad_norm": 0.44588226079940796, "learning_rate": 9.165664241224618e-06, "loss": 0.4007, "step": 13259 }, { "epoch": 0.6085080996741774, "grad_norm": 0.5056838393211365, "learning_rate": 9.165528629423877e-06, "loss": 0.5304, "step": 13260 }, { "epoch": 0.6085539901794319, "grad_norm": 0.47182509303092957, "learning_rate": 9.165393007606352e-06, "loss": 0.4269, "step": 13261 }, { "epoch": 0.6085998806846863, "grad_norm": 0.5026556849479675, "learning_rate": 9.165257375772371e-06, "loss": 0.5562, "step": 13262 }, { "epoch": 0.6086457711899408, "grad_norm": 0.47351646423339844, "learning_rate": 9.165121733922262e-06, "loss": 0.4186, "step": 13263 }, { "epoch": 0.6086916616951953, "grad_norm": 0.4371229112148285, "learning_rate": 9.16498608205635e-06, "loss": 0.3282, "step": 13264 }, { "epoch": 0.6087375522004497, "grad_norm": 0.5177859663963318, "learning_rate": 9.16485042017496e-06, "loss": 0.5101, "step": 13265 }, { "epoch": 0.6087834427057042, "grad_norm": 0.46313774585723877, "learning_rate": 9.16471474827842e-06, "loss": 0.4103, "step": 13266 }, { "epoch": 0.6088293332109587, "grad_norm": 0.46497783064842224, "learning_rate": 9.164579066367054e-06, "loss": 0.4165, "step": 13267 }, { "epoch": 0.6088752237162132, "grad_norm": 0.4599396884441376, "learning_rate": 9.164443374441191e-06, "loss": 0.4143, "step": 13268 }, { "epoch": 0.6089211142214676, "grad_norm": 0.45004889369010925, "learning_rate": 9.164307672501157e-06, "loss": 0.3463, "step": 13269 }, { "epoch": 0.608967004726722, "grad_norm": 0.4713543951511383, "learning_rate": 9.164171960547275e-06, "loss": 0.4281, "step": 13270 }, { "epoch": 0.6090128952319765, "grad_norm": 0.511316180229187, "learning_rate": 9.164036238579876e-06, "loss": 0.4624, "step": 13271 }, { "epoch": 0.6090587857372309, "grad_norm": 0.4661910831928253, "learning_rate": 9.163900506599283e-06, "loss": 0.4194, "step": 13272 }, { "epoch": 0.6091046762424854, "grad_norm": 0.4375600218772888, "learning_rate": 9.163764764605823e-06, "loss": 0.3624, "step": 13273 }, { "epoch": 0.6091505667477399, "grad_norm": 0.4491141438484192, "learning_rate": 9.163629012599824e-06, "loss": 0.3493, "step": 13274 }, { "epoch": 0.6091964572529943, "grad_norm": 0.4904569983482361, "learning_rate": 9.16349325058161e-06, "loss": 0.4841, "step": 13275 }, { "epoch": 0.6092423477582488, "grad_norm": 0.46743354201316833, "learning_rate": 9.16335747855151e-06, "loss": 0.3889, "step": 13276 }, { "epoch": 0.6092882382635033, "grad_norm": 0.41804802417755127, "learning_rate": 9.163221696509847e-06, "loss": 0.3099, "step": 13277 }, { "epoch": 0.6093341287687577, "grad_norm": 0.43893468379974365, "learning_rate": 9.163085904456952e-06, "loss": 0.3489, "step": 13278 }, { "epoch": 0.6093800192740122, "grad_norm": 0.46362921595573425, "learning_rate": 9.16295010239315e-06, "loss": 0.4929, "step": 13279 }, { "epoch": 0.6094259097792667, "grad_norm": 0.44740214943885803, "learning_rate": 9.162814290318764e-06, "loss": 0.389, "step": 13280 }, { "epoch": 0.6094718002845211, "grad_norm": 0.45767709612846375, "learning_rate": 9.162678468234126e-06, "loss": 0.4196, "step": 13281 }, { "epoch": 0.6095176907897756, "grad_norm": 0.46190154552459717, "learning_rate": 9.162542636139557e-06, "loss": 0.3874, "step": 13282 }, { "epoch": 0.6095635812950301, "grad_norm": 0.4538164734840393, "learning_rate": 9.16240679403539e-06, "loss": 0.3929, "step": 13283 }, { "epoch": 0.6096094718002846, "grad_norm": 0.45440441370010376, "learning_rate": 9.162270941921947e-06, "loss": 0.421, "step": 13284 }, { "epoch": 0.609655362305539, "grad_norm": 0.4806460738182068, "learning_rate": 9.162135079799557e-06, "loss": 0.4569, "step": 13285 }, { "epoch": 0.6097012528107935, "grad_norm": 0.4554196894168854, "learning_rate": 9.161999207668544e-06, "loss": 0.4114, "step": 13286 }, { "epoch": 0.609747143316048, "grad_norm": 0.47221019864082336, "learning_rate": 9.161863325529237e-06, "loss": 0.4237, "step": 13287 }, { "epoch": 0.6097930338213023, "grad_norm": 0.44815418124198914, "learning_rate": 9.161727433381962e-06, "loss": 0.3815, "step": 13288 }, { "epoch": 0.6098389243265568, "grad_norm": 0.472644180059433, "learning_rate": 9.161591531227048e-06, "loss": 0.4549, "step": 13289 }, { "epoch": 0.6098848148318113, "grad_norm": 0.45040163397789, "learning_rate": 9.161455619064818e-06, "loss": 0.3823, "step": 13290 }, { "epoch": 0.6099307053370657, "grad_norm": 0.45927244424819946, "learning_rate": 9.1613196968956e-06, "loss": 0.3916, "step": 13291 }, { "epoch": 0.6099765958423202, "grad_norm": 0.45920559763908386, "learning_rate": 9.161183764719724e-06, "loss": 0.366, "step": 13292 }, { "epoch": 0.6100224863475747, "grad_norm": 0.4457528591156006, "learning_rate": 9.161047822537513e-06, "loss": 0.376, "step": 13293 }, { "epoch": 0.6100683768528291, "grad_norm": 0.43101900815963745, "learning_rate": 9.160911870349293e-06, "loss": 0.3429, "step": 13294 }, { "epoch": 0.6101142673580836, "grad_norm": 0.5050458908081055, "learning_rate": 9.160775908155397e-06, "loss": 0.4963, "step": 13295 }, { "epoch": 0.6101601578633381, "grad_norm": 0.5203952193260193, "learning_rate": 9.160639935956147e-06, "loss": 0.4884, "step": 13296 }, { "epoch": 0.6102060483685925, "grad_norm": 0.4877369999885559, "learning_rate": 9.16050395375187e-06, "loss": 0.4454, "step": 13297 }, { "epoch": 0.610251938873847, "grad_norm": 0.4694271683692932, "learning_rate": 9.160367961542894e-06, "loss": 0.4539, "step": 13298 }, { "epoch": 0.6102978293791015, "grad_norm": 0.4723474383354187, "learning_rate": 9.160231959329547e-06, "loss": 0.4457, "step": 13299 }, { "epoch": 0.6103437198843559, "grad_norm": 0.49191516637802124, "learning_rate": 9.160095947112154e-06, "loss": 0.4472, "step": 13300 }, { "epoch": 0.6103896103896104, "grad_norm": 0.46903061866760254, "learning_rate": 9.159959924891045e-06, "loss": 0.4144, "step": 13301 }, { "epoch": 0.6104355008948649, "grad_norm": 0.4640098512172699, "learning_rate": 9.159823892666544e-06, "loss": 0.4087, "step": 13302 }, { "epoch": 0.6104813914001194, "grad_norm": 0.4533601999282837, "learning_rate": 9.15968785043898e-06, "loss": 0.3645, "step": 13303 }, { "epoch": 0.6105272819053738, "grad_norm": 0.47778424620628357, "learning_rate": 9.159551798208678e-06, "loss": 0.4073, "step": 13304 }, { "epoch": 0.6105731724106283, "grad_norm": 0.4762365520000458, "learning_rate": 9.159415735975967e-06, "loss": 0.4052, "step": 13305 }, { "epoch": 0.6106190629158827, "grad_norm": 0.4551175534725189, "learning_rate": 9.159279663741177e-06, "loss": 0.3907, "step": 13306 }, { "epoch": 0.6106649534211371, "grad_norm": 0.4675368666648865, "learning_rate": 9.15914358150463e-06, "loss": 0.3595, "step": 13307 }, { "epoch": 0.6107108439263916, "grad_norm": 0.409780889749527, "learning_rate": 9.159007489266655e-06, "loss": 0.3363, "step": 13308 }, { "epoch": 0.6107567344316461, "grad_norm": 0.4369770884513855, "learning_rate": 9.158871387027579e-06, "loss": 0.3007, "step": 13309 }, { "epoch": 0.6108026249369005, "grad_norm": 0.4592125415802002, "learning_rate": 9.15873527478773e-06, "loss": 0.3907, "step": 13310 }, { "epoch": 0.610848515442155, "grad_norm": 0.44635066390037537, "learning_rate": 9.158599152547437e-06, "loss": 0.4421, "step": 13311 }, { "epoch": 0.6108944059474095, "grad_norm": 0.4271751940250397, "learning_rate": 9.158463020307025e-06, "loss": 0.3018, "step": 13312 }, { "epoch": 0.6109402964526639, "grad_norm": 0.4735080599784851, "learning_rate": 9.158326878066821e-06, "loss": 0.4225, "step": 13313 }, { "epoch": 0.6109861869579184, "grad_norm": 0.4223187267780304, "learning_rate": 9.158190725827155e-06, "loss": 0.3714, "step": 13314 }, { "epoch": 0.6110320774631729, "grad_norm": 0.45918720960617065, "learning_rate": 9.158054563588349e-06, "loss": 0.3681, "step": 13315 }, { "epoch": 0.6110779679684273, "grad_norm": 0.459389865398407, "learning_rate": 9.157918391350738e-06, "loss": 0.3977, "step": 13316 }, { "epoch": 0.6111238584736818, "grad_norm": 0.4682282507419586, "learning_rate": 9.157782209114643e-06, "loss": 0.4298, "step": 13317 }, { "epoch": 0.6111697489789363, "grad_norm": 0.5041065812110901, "learning_rate": 9.157646016880395e-06, "loss": 0.4551, "step": 13318 }, { "epoch": 0.6112156394841908, "grad_norm": 0.47502198815345764, "learning_rate": 9.157509814648323e-06, "loss": 0.3647, "step": 13319 }, { "epoch": 0.6112615299894452, "grad_norm": 0.47289445996284485, "learning_rate": 9.157373602418749e-06, "loss": 0.4238, "step": 13320 }, { "epoch": 0.6113074204946997, "grad_norm": 0.42670324444770813, "learning_rate": 9.157237380192003e-06, "loss": 0.3684, "step": 13321 }, { "epoch": 0.6113533109999542, "grad_norm": 0.46294015645980835, "learning_rate": 9.157101147968414e-06, "loss": 0.4177, "step": 13322 }, { "epoch": 0.6113992015052085, "grad_norm": 0.47807419300079346, "learning_rate": 9.15696490574831e-06, "loss": 0.3844, "step": 13323 }, { "epoch": 0.611445092010463, "grad_norm": 0.4557643532752991, "learning_rate": 9.156828653532018e-06, "loss": 0.3651, "step": 13324 }, { "epoch": 0.6114909825157175, "grad_norm": 0.4858211278915405, "learning_rate": 9.156692391319862e-06, "loss": 0.3605, "step": 13325 }, { "epoch": 0.6115368730209719, "grad_norm": 0.47030797600746155, "learning_rate": 9.156556119112175e-06, "loss": 0.447, "step": 13326 }, { "epoch": 0.6115827635262264, "grad_norm": 0.47900688648223877, "learning_rate": 9.15641983690928e-06, "loss": 0.4221, "step": 13327 }, { "epoch": 0.6116286540314809, "grad_norm": 0.43420517444610596, "learning_rate": 9.156283544711509e-06, "loss": 0.3492, "step": 13328 }, { "epoch": 0.6116745445367353, "grad_norm": 0.4537833333015442, "learning_rate": 9.156147242519188e-06, "loss": 0.3679, "step": 13329 }, { "epoch": 0.6117204350419898, "grad_norm": 0.44130414724349976, "learning_rate": 9.156010930332643e-06, "loss": 0.3723, "step": 13330 }, { "epoch": 0.6117663255472443, "grad_norm": 0.47028690576553345, "learning_rate": 9.155874608152206e-06, "loss": 0.4246, "step": 13331 }, { "epoch": 0.6118122160524987, "grad_norm": 0.423086553812027, "learning_rate": 9.155738275978201e-06, "loss": 0.3828, "step": 13332 }, { "epoch": 0.6118581065577532, "grad_norm": 0.4757053554058075, "learning_rate": 9.155601933810956e-06, "loss": 0.3893, "step": 13333 }, { "epoch": 0.6119039970630077, "grad_norm": 0.49114707112312317, "learning_rate": 9.155465581650801e-06, "loss": 0.3914, "step": 13334 }, { "epoch": 0.6119498875682621, "grad_norm": 0.46154144406318665, "learning_rate": 9.155329219498063e-06, "loss": 0.3805, "step": 13335 }, { "epoch": 0.6119957780735166, "grad_norm": 0.48199644684791565, "learning_rate": 9.155192847353068e-06, "loss": 0.4478, "step": 13336 }, { "epoch": 0.6120416685787711, "grad_norm": 0.4245755672454834, "learning_rate": 9.155056465216147e-06, "loss": 0.301, "step": 13337 }, { "epoch": 0.6120875590840256, "grad_norm": 0.4448850750923157, "learning_rate": 9.154920073087626e-06, "loss": 0.438, "step": 13338 }, { "epoch": 0.61213344958928, "grad_norm": 0.44625014066696167, "learning_rate": 9.154783670967835e-06, "loss": 0.4024, "step": 13339 }, { "epoch": 0.6121793400945345, "grad_norm": 0.4629194140434265, "learning_rate": 9.1546472588571e-06, "loss": 0.4416, "step": 13340 }, { "epoch": 0.612225230599789, "grad_norm": 0.46314817667007446, "learning_rate": 9.154510836755748e-06, "loss": 0.3864, "step": 13341 }, { "epoch": 0.6122711211050433, "grad_norm": 0.49812954664230347, "learning_rate": 9.15437440466411e-06, "loss": 0.4162, "step": 13342 }, { "epoch": 0.6123170116102978, "grad_norm": 0.44824784994125366, "learning_rate": 9.154237962582511e-06, "loss": 0.378, "step": 13343 }, { "epoch": 0.6123629021155523, "grad_norm": 0.4450624883174896, "learning_rate": 9.154101510511283e-06, "loss": 0.3625, "step": 13344 }, { "epoch": 0.6124087926208067, "grad_norm": 0.4278448224067688, "learning_rate": 9.15396504845075e-06, "loss": 0.3262, "step": 13345 }, { "epoch": 0.6124546831260612, "grad_norm": 0.47586536407470703, "learning_rate": 9.153828576401245e-06, "loss": 0.469, "step": 13346 }, { "epoch": 0.6125005736313157, "grad_norm": 0.4475921094417572, "learning_rate": 9.153692094363092e-06, "loss": 0.3417, "step": 13347 }, { "epoch": 0.6125464641365701, "grad_norm": 0.4293804168701172, "learning_rate": 9.15355560233662e-06, "loss": 0.3636, "step": 13348 }, { "epoch": 0.6125923546418246, "grad_norm": 0.4287230372428894, "learning_rate": 9.153419100322157e-06, "loss": 0.3868, "step": 13349 }, { "epoch": 0.6126382451470791, "grad_norm": 0.4459531903266907, "learning_rate": 9.153282588320032e-06, "loss": 0.3788, "step": 13350 }, { "epoch": 0.6126841356523335, "grad_norm": 0.4465651512145996, "learning_rate": 9.153146066330574e-06, "loss": 0.3785, "step": 13351 }, { "epoch": 0.612730026157588, "grad_norm": 0.43893948197364807, "learning_rate": 9.15300953435411e-06, "loss": 0.3819, "step": 13352 }, { "epoch": 0.6127759166628425, "grad_norm": 0.4302557408809662, "learning_rate": 9.152872992390969e-06, "loss": 0.389, "step": 13353 }, { "epoch": 0.6128218071680969, "grad_norm": 0.466801255941391, "learning_rate": 9.15273644044148e-06, "loss": 0.44, "step": 13354 }, { "epoch": 0.6128676976733514, "grad_norm": 0.45544758439064026, "learning_rate": 9.152599878505968e-06, "loss": 0.3866, "step": 13355 }, { "epoch": 0.6129135881786059, "grad_norm": 0.4682729244232178, "learning_rate": 9.152463306584766e-06, "loss": 0.3874, "step": 13356 }, { "epoch": 0.6129594786838604, "grad_norm": 0.4749312102794647, "learning_rate": 9.1523267246782e-06, "loss": 0.4494, "step": 13357 }, { "epoch": 0.6130053691891147, "grad_norm": 0.5078727602958679, "learning_rate": 9.152190132786598e-06, "loss": 0.5024, "step": 13358 }, { "epoch": 0.6130512596943692, "grad_norm": 0.4542948305606842, "learning_rate": 9.15205353091029e-06, "loss": 0.3657, "step": 13359 }, { "epoch": 0.6130971501996237, "grad_norm": 0.44105008244514465, "learning_rate": 9.151916919049604e-06, "loss": 0.3682, "step": 13360 }, { "epoch": 0.6131430407048781, "grad_norm": 0.4381992220878601, "learning_rate": 9.151780297204867e-06, "loss": 0.3544, "step": 13361 }, { "epoch": 0.6131889312101326, "grad_norm": 0.4764685332775116, "learning_rate": 9.151643665376408e-06, "loss": 0.4242, "step": 13362 }, { "epoch": 0.6132348217153871, "grad_norm": 0.4655154049396515, "learning_rate": 9.15150702356456e-06, "loss": 0.3931, "step": 13363 }, { "epoch": 0.6132807122206415, "grad_norm": 0.49365729093551636, "learning_rate": 9.151370371769645e-06, "loss": 0.4438, "step": 13364 }, { "epoch": 0.613326602725896, "grad_norm": 0.4606526494026184, "learning_rate": 9.151233709991995e-06, "loss": 0.4109, "step": 13365 }, { "epoch": 0.6133724932311505, "grad_norm": 0.4721617102622986, "learning_rate": 9.151097038231938e-06, "loss": 0.4438, "step": 13366 }, { "epoch": 0.6134183837364049, "grad_norm": 0.4713938236236572, "learning_rate": 9.150960356489803e-06, "loss": 0.3668, "step": 13367 }, { "epoch": 0.6134642742416594, "grad_norm": 0.4764348268508911, "learning_rate": 9.150823664765918e-06, "loss": 0.3837, "step": 13368 }, { "epoch": 0.6135101647469139, "grad_norm": 0.4997861683368683, "learning_rate": 9.150686963060612e-06, "loss": 0.4279, "step": 13369 }, { "epoch": 0.6135560552521683, "grad_norm": 0.49250558018684387, "learning_rate": 9.150550251374213e-06, "loss": 0.4223, "step": 13370 }, { "epoch": 0.6136019457574228, "grad_norm": 0.4279305636882782, "learning_rate": 9.150413529707053e-06, "loss": 0.3603, "step": 13371 }, { "epoch": 0.6136478362626773, "grad_norm": 0.49269410967826843, "learning_rate": 9.150276798059456e-06, "loss": 0.4214, "step": 13372 }, { "epoch": 0.6136937267679318, "grad_norm": 0.44942909479141235, "learning_rate": 9.150140056431756e-06, "loss": 0.3561, "step": 13373 }, { "epoch": 0.6137396172731862, "grad_norm": 0.44858014583587646, "learning_rate": 9.150003304824278e-06, "loss": 0.3724, "step": 13374 }, { "epoch": 0.6137855077784407, "grad_norm": 0.4494982063770294, "learning_rate": 9.14986654323735e-06, "loss": 0.4124, "step": 13375 }, { "epoch": 0.6138313982836952, "grad_norm": 0.47216880321502686, "learning_rate": 9.149729771671304e-06, "loss": 0.3985, "step": 13376 }, { "epoch": 0.6138772887889495, "grad_norm": 0.4310190677642822, "learning_rate": 9.149592990126468e-06, "loss": 0.3818, "step": 13377 }, { "epoch": 0.613923179294204, "grad_norm": 0.446488618850708, "learning_rate": 9.149456198603169e-06, "loss": 0.3549, "step": 13378 }, { "epoch": 0.6139690697994585, "grad_norm": 0.4528619050979614, "learning_rate": 9.149319397101739e-06, "loss": 0.3739, "step": 13379 }, { "epoch": 0.6140149603047129, "grad_norm": 0.4326205253601074, "learning_rate": 9.149182585622504e-06, "loss": 0.3846, "step": 13380 }, { "epoch": 0.6140608508099674, "grad_norm": 0.4209747314453125, "learning_rate": 9.149045764165796e-06, "loss": 0.3718, "step": 13381 }, { "epoch": 0.6141067413152219, "grad_norm": 0.43557941913604736, "learning_rate": 9.148908932731941e-06, "loss": 0.3653, "step": 13382 }, { "epoch": 0.6141526318204763, "grad_norm": 0.5107849836349487, "learning_rate": 9.14877209132127e-06, "loss": 0.4576, "step": 13383 }, { "epoch": 0.6141985223257308, "grad_norm": 0.5057132840156555, "learning_rate": 9.148635239934112e-06, "loss": 0.4478, "step": 13384 }, { "epoch": 0.6142444128309853, "grad_norm": 0.4581402838230133, "learning_rate": 9.148498378570796e-06, "loss": 0.4345, "step": 13385 }, { "epoch": 0.6142903033362397, "grad_norm": 0.4547920227050781, "learning_rate": 9.148361507231649e-06, "loss": 0.3676, "step": 13386 }, { "epoch": 0.6143361938414942, "grad_norm": 0.4942670166492462, "learning_rate": 9.148224625917003e-06, "loss": 0.5, "step": 13387 }, { "epoch": 0.6143820843467487, "grad_norm": 0.4619365930557251, "learning_rate": 9.148087734627185e-06, "loss": 0.4184, "step": 13388 }, { "epoch": 0.6144279748520031, "grad_norm": 0.45077359676361084, "learning_rate": 9.147950833362526e-06, "loss": 0.4329, "step": 13389 }, { "epoch": 0.6144738653572576, "grad_norm": 0.46725034713745117, "learning_rate": 9.147813922123354e-06, "loss": 0.4685, "step": 13390 }, { "epoch": 0.6145197558625121, "grad_norm": 0.4426793158054352, "learning_rate": 9.14767700091e-06, "loss": 0.4055, "step": 13391 }, { "epoch": 0.6145656463677666, "grad_norm": 0.477081835269928, "learning_rate": 9.14754006972279e-06, "loss": 0.4882, "step": 13392 }, { "epoch": 0.614611536873021, "grad_norm": 0.44980233907699585, "learning_rate": 9.147403128562056e-06, "loss": 0.3851, "step": 13393 }, { "epoch": 0.6146574273782754, "grad_norm": 0.45756906270980835, "learning_rate": 9.147266177428127e-06, "loss": 0.4074, "step": 13394 }, { "epoch": 0.6147033178835299, "grad_norm": 0.45969247817993164, "learning_rate": 9.14712921632133e-06, "loss": 0.4428, "step": 13395 }, { "epoch": 0.6147492083887843, "grad_norm": 0.4489195644855499, "learning_rate": 9.146992245241999e-06, "loss": 0.411, "step": 13396 }, { "epoch": 0.6147950988940388, "grad_norm": 0.41751763224601746, "learning_rate": 9.146855264190458e-06, "loss": 0.3479, "step": 13397 }, { "epoch": 0.6148409893992933, "grad_norm": 0.49162569642066956, "learning_rate": 9.146718273167039e-06, "loss": 0.4783, "step": 13398 }, { "epoch": 0.6148868799045477, "grad_norm": 0.5050580501556396, "learning_rate": 9.146581272172072e-06, "loss": 0.4207, "step": 13399 }, { "epoch": 0.6149327704098022, "grad_norm": 0.47160983085632324, "learning_rate": 9.146444261205884e-06, "loss": 0.4591, "step": 13400 }, { "epoch": 0.6149786609150567, "grad_norm": 0.45013126730918884, "learning_rate": 9.146307240268808e-06, "loss": 0.3408, "step": 13401 }, { "epoch": 0.6150245514203111, "grad_norm": 0.46886682510375977, "learning_rate": 9.146170209361171e-06, "loss": 0.3841, "step": 13402 }, { "epoch": 0.6150704419255656, "grad_norm": 0.4279814064502716, "learning_rate": 9.146033168483305e-06, "loss": 0.339, "step": 13403 }, { "epoch": 0.6151163324308201, "grad_norm": 0.46263134479522705, "learning_rate": 9.145896117635537e-06, "loss": 0.4046, "step": 13404 }, { "epoch": 0.6151622229360745, "grad_norm": 0.43564674258232117, "learning_rate": 9.145759056818195e-06, "loss": 0.3551, "step": 13405 }, { "epoch": 0.615208113441329, "grad_norm": 0.48240020871162415, "learning_rate": 9.145621986031615e-06, "loss": 0.3635, "step": 13406 }, { "epoch": 0.6152540039465835, "grad_norm": 0.445943146944046, "learning_rate": 9.145484905276119e-06, "loss": 0.3919, "step": 13407 }, { "epoch": 0.615299894451838, "grad_norm": 0.4761308431625366, "learning_rate": 9.145347814552041e-06, "loss": 0.4406, "step": 13408 }, { "epoch": 0.6153457849570924, "grad_norm": 0.4697396755218506, "learning_rate": 9.14521071385971e-06, "loss": 0.4571, "step": 13409 }, { "epoch": 0.6153916754623469, "grad_norm": 0.4474799931049347, "learning_rate": 9.145073603199456e-06, "loss": 0.3616, "step": 13410 }, { "epoch": 0.6154375659676014, "grad_norm": 0.4395371973514557, "learning_rate": 9.144936482571607e-06, "loss": 0.3648, "step": 13411 }, { "epoch": 0.6154834564728557, "grad_norm": 0.432668000459671, "learning_rate": 9.144799351976496e-06, "loss": 0.3309, "step": 13412 }, { "epoch": 0.6155293469781102, "grad_norm": 0.5442088842391968, "learning_rate": 9.144662211414448e-06, "loss": 0.519, "step": 13413 }, { "epoch": 0.6155752374833647, "grad_norm": 0.4398118257522583, "learning_rate": 9.144525060885798e-06, "loss": 0.3592, "step": 13414 }, { "epoch": 0.6156211279886191, "grad_norm": 0.48946818709373474, "learning_rate": 9.144387900390872e-06, "loss": 0.4304, "step": 13415 }, { "epoch": 0.6156670184938736, "grad_norm": 0.4541175067424774, "learning_rate": 9.144250729930002e-06, "loss": 0.3945, "step": 13416 }, { "epoch": 0.6157129089991281, "grad_norm": 0.505452573299408, "learning_rate": 9.144113549503516e-06, "loss": 0.4431, "step": 13417 }, { "epoch": 0.6157587995043825, "grad_norm": 0.4383891522884369, "learning_rate": 9.143976359111744e-06, "loss": 0.4418, "step": 13418 }, { "epoch": 0.615804690009637, "grad_norm": 0.4019473195075989, "learning_rate": 9.143839158755018e-06, "loss": 0.2872, "step": 13419 }, { "epoch": 0.6158505805148915, "grad_norm": 0.47392404079437256, "learning_rate": 9.143701948433668e-06, "loss": 0.4154, "step": 13420 }, { "epoch": 0.6158964710201459, "grad_norm": 0.49308133125305176, "learning_rate": 9.14356472814802e-06, "loss": 0.4972, "step": 13421 }, { "epoch": 0.6159423615254004, "grad_norm": 0.5038841366767883, "learning_rate": 9.143427497898408e-06, "loss": 0.4542, "step": 13422 }, { "epoch": 0.6159882520306549, "grad_norm": 0.4757876694202423, "learning_rate": 9.14329025768516e-06, "loss": 0.433, "step": 13423 }, { "epoch": 0.6160341425359093, "grad_norm": 0.44435879588127136, "learning_rate": 9.143153007508607e-06, "loss": 0.35, "step": 13424 }, { "epoch": 0.6160800330411638, "grad_norm": 0.4811384677886963, "learning_rate": 9.14301574736908e-06, "loss": 0.4689, "step": 13425 }, { "epoch": 0.6161259235464183, "grad_norm": 0.4709923267364502, "learning_rate": 9.142878477266906e-06, "loss": 0.4225, "step": 13426 }, { "epoch": 0.6161718140516728, "grad_norm": 0.4372086524963379, "learning_rate": 9.142741197202416e-06, "loss": 0.3423, "step": 13427 }, { "epoch": 0.6162177045569271, "grad_norm": 0.4705607295036316, "learning_rate": 9.142603907175942e-06, "loss": 0.3895, "step": 13428 }, { "epoch": 0.6162635950621816, "grad_norm": 0.41223546862602234, "learning_rate": 9.142466607187812e-06, "loss": 0.3212, "step": 13429 }, { "epoch": 0.6163094855674361, "grad_norm": 0.4744665324687958, "learning_rate": 9.142329297238359e-06, "loss": 0.4495, "step": 13430 }, { "epoch": 0.6163553760726905, "grad_norm": 0.45777949690818787, "learning_rate": 9.14219197732791e-06, "loss": 0.4456, "step": 13431 }, { "epoch": 0.616401266577945, "grad_norm": 0.4812173843383789, "learning_rate": 9.142054647456797e-06, "loss": 0.4251, "step": 13432 }, { "epoch": 0.6164471570831995, "grad_norm": 0.46591857075691223, "learning_rate": 9.14191730762535e-06, "loss": 0.4481, "step": 13433 }, { "epoch": 0.6164930475884539, "grad_norm": 0.40645402669906616, "learning_rate": 9.1417799578339e-06, "loss": 0.3316, "step": 13434 }, { "epoch": 0.6165389380937084, "grad_norm": 0.4581991136074066, "learning_rate": 9.141642598082775e-06, "loss": 0.3956, "step": 13435 }, { "epoch": 0.6165848285989629, "grad_norm": 0.46937936544418335, "learning_rate": 9.141505228372308e-06, "loss": 0.4868, "step": 13436 }, { "epoch": 0.6166307191042173, "grad_norm": 0.44141504168510437, "learning_rate": 9.141367848702827e-06, "loss": 0.3926, "step": 13437 }, { "epoch": 0.6166766096094718, "grad_norm": 0.46020522713661194, "learning_rate": 9.141230459074663e-06, "loss": 0.4207, "step": 13438 }, { "epoch": 0.6167225001147263, "grad_norm": 0.46896132826805115, "learning_rate": 9.141093059488148e-06, "loss": 0.4405, "step": 13439 }, { "epoch": 0.6167683906199807, "grad_norm": 0.46983203291893005, "learning_rate": 9.14095564994361e-06, "loss": 0.4496, "step": 13440 }, { "epoch": 0.6168142811252352, "grad_norm": 0.480518639087677, "learning_rate": 9.140818230441381e-06, "loss": 0.4331, "step": 13441 }, { "epoch": 0.6168601716304897, "grad_norm": 0.49814900755882263, "learning_rate": 9.140680800981791e-06, "loss": 0.4712, "step": 13442 }, { "epoch": 0.6169060621357441, "grad_norm": 0.44378945231437683, "learning_rate": 9.140543361565171e-06, "loss": 0.3667, "step": 13443 }, { "epoch": 0.6169519526409986, "grad_norm": 0.41307932138442993, "learning_rate": 9.14040591219185e-06, "loss": 0.3258, "step": 13444 }, { "epoch": 0.616997843146253, "grad_norm": 0.44544902443885803, "learning_rate": 9.140268452862159e-06, "loss": 0.371, "step": 13445 }, { "epoch": 0.6170437336515076, "grad_norm": 0.4458356201648712, "learning_rate": 9.140130983576432e-06, "loss": 0.3614, "step": 13446 }, { "epoch": 0.6170896241567619, "grad_norm": 0.5368770360946655, "learning_rate": 9.139993504334995e-06, "loss": 0.3311, "step": 13447 }, { "epoch": 0.6171355146620164, "grad_norm": 0.4459013044834137, "learning_rate": 9.13985601513818e-06, "loss": 0.3881, "step": 13448 }, { "epoch": 0.6171814051672709, "grad_norm": 0.4980817139148712, "learning_rate": 9.139718515986318e-06, "loss": 0.451, "step": 13449 }, { "epoch": 0.6172272956725253, "grad_norm": 0.44086116552352905, "learning_rate": 9.13958100687974e-06, "loss": 0.3691, "step": 13450 }, { "epoch": 0.6172731861777798, "grad_norm": 0.4244758188724518, "learning_rate": 9.139443487818775e-06, "loss": 0.328, "step": 13451 }, { "epoch": 0.6173190766830343, "grad_norm": 0.4575919508934021, "learning_rate": 9.139305958803755e-06, "loss": 0.4326, "step": 13452 }, { "epoch": 0.6173649671882887, "grad_norm": 0.4396938979625702, "learning_rate": 9.139168419835012e-06, "loss": 0.3601, "step": 13453 }, { "epoch": 0.6174108576935432, "grad_norm": 0.4686066508293152, "learning_rate": 9.139030870912875e-06, "loss": 0.4289, "step": 13454 }, { "epoch": 0.6174567481987977, "grad_norm": 0.4795283377170563, "learning_rate": 9.138893312037674e-06, "loss": 0.4225, "step": 13455 }, { "epoch": 0.6175026387040521, "grad_norm": 0.4413226842880249, "learning_rate": 9.13875574320974e-06, "loss": 0.3551, "step": 13456 }, { "epoch": 0.6175485292093066, "grad_norm": 0.43298423290252686, "learning_rate": 9.138618164429407e-06, "loss": 0.3673, "step": 13457 }, { "epoch": 0.6175944197145611, "grad_norm": 0.4431157112121582, "learning_rate": 9.138480575697002e-06, "loss": 0.2969, "step": 13458 }, { "epoch": 0.6176403102198155, "grad_norm": 0.44631248712539673, "learning_rate": 9.138342977012859e-06, "loss": 0.3797, "step": 13459 }, { "epoch": 0.61768620072507, "grad_norm": 0.39273110032081604, "learning_rate": 9.138205368377306e-06, "loss": 0.2929, "step": 13460 }, { "epoch": 0.6177320912303245, "grad_norm": 0.46222198009490967, "learning_rate": 9.138067749790674e-06, "loss": 0.372, "step": 13461 }, { "epoch": 0.617777981735579, "grad_norm": 0.4541102945804596, "learning_rate": 9.137930121253296e-06, "loss": 0.376, "step": 13462 }, { "epoch": 0.6178238722408333, "grad_norm": 0.49308663606643677, "learning_rate": 9.137792482765504e-06, "loss": 0.4419, "step": 13463 }, { "epoch": 0.6178697627460878, "grad_norm": 0.4454323351383209, "learning_rate": 9.137654834327626e-06, "loss": 0.416, "step": 13464 }, { "epoch": 0.6179156532513423, "grad_norm": 0.48245516419410706, "learning_rate": 9.137517175939992e-06, "loss": 0.4506, "step": 13465 }, { "epoch": 0.6179615437565967, "grad_norm": 0.44801098108291626, "learning_rate": 9.137379507602938e-06, "loss": 0.4178, "step": 13466 }, { "epoch": 0.6180074342618512, "grad_norm": 0.4346368610858917, "learning_rate": 9.137241829316789e-06, "loss": 0.3134, "step": 13467 }, { "epoch": 0.6180533247671057, "grad_norm": 0.44964346289634705, "learning_rate": 9.13710414108188e-06, "loss": 0.3548, "step": 13468 }, { "epoch": 0.6180992152723601, "grad_norm": 0.44593289494514465, "learning_rate": 9.136966442898544e-06, "loss": 0.3955, "step": 13469 }, { "epoch": 0.6181451057776146, "grad_norm": 0.45217379927635193, "learning_rate": 9.136828734767104e-06, "loss": 0.3361, "step": 13470 }, { "epoch": 0.6181909962828691, "grad_norm": 0.4572787880897522, "learning_rate": 9.136691016687901e-06, "loss": 0.3448, "step": 13471 }, { "epoch": 0.6182368867881235, "grad_norm": 0.4399280846118927, "learning_rate": 9.13655328866126e-06, "loss": 0.3578, "step": 13472 }, { "epoch": 0.618282777293378, "grad_norm": 0.41310587525367737, "learning_rate": 9.136415550687514e-06, "loss": 0.3396, "step": 13473 }, { "epoch": 0.6183286677986325, "grad_norm": 0.4077087640762329, "learning_rate": 9.136277802766995e-06, "loss": 0.3127, "step": 13474 }, { "epoch": 0.6183745583038869, "grad_norm": 0.51629638671875, "learning_rate": 9.136140044900032e-06, "loss": 0.4296, "step": 13475 }, { "epoch": 0.6184204488091414, "grad_norm": 0.45166507363319397, "learning_rate": 9.13600227708696e-06, "loss": 0.3942, "step": 13476 }, { "epoch": 0.6184663393143959, "grad_norm": 0.45083045959472656, "learning_rate": 9.135864499328106e-06, "loss": 0.394, "step": 13477 }, { "epoch": 0.6185122298196503, "grad_norm": 0.4511651396751404, "learning_rate": 9.135726711623803e-06, "loss": 0.3556, "step": 13478 }, { "epoch": 0.6185581203249048, "grad_norm": 0.4588184356689453, "learning_rate": 9.135588913974383e-06, "loss": 0.4425, "step": 13479 }, { "epoch": 0.6186040108301593, "grad_norm": 0.5027110576629639, "learning_rate": 9.135451106380176e-06, "loss": 0.4816, "step": 13480 }, { "epoch": 0.6186499013354138, "grad_norm": 0.4566195011138916, "learning_rate": 9.135313288841516e-06, "loss": 0.4289, "step": 13481 }, { "epoch": 0.6186957918406681, "grad_norm": 0.4354590177536011, "learning_rate": 9.135175461358731e-06, "loss": 0.3499, "step": 13482 }, { "epoch": 0.6187416823459226, "grad_norm": 0.485817551612854, "learning_rate": 9.135037623932154e-06, "loss": 0.4534, "step": 13483 }, { "epoch": 0.6187875728511771, "grad_norm": 0.4408957064151764, "learning_rate": 9.134899776562119e-06, "loss": 0.3638, "step": 13484 }, { "epoch": 0.6188334633564315, "grad_norm": 0.45121341943740845, "learning_rate": 9.134761919248953e-06, "loss": 0.3676, "step": 13485 }, { "epoch": 0.618879353861686, "grad_norm": 0.5158239006996155, "learning_rate": 9.134624051992991e-06, "loss": 0.5162, "step": 13486 }, { "epoch": 0.6189252443669405, "grad_norm": 0.48907721042633057, "learning_rate": 9.134486174794561e-06, "loss": 0.4788, "step": 13487 }, { "epoch": 0.6189711348721949, "grad_norm": 0.456527441740036, "learning_rate": 9.134348287653998e-06, "loss": 0.317, "step": 13488 }, { "epoch": 0.6190170253774494, "grad_norm": 0.5023139715194702, "learning_rate": 9.134210390571632e-06, "loss": 0.4736, "step": 13489 }, { "epoch": 0.6190629158827039, "grad_norm": 0.48126810789108276, "learning_rate": 9.134072483547794e-06, "loss": 0.4275, "step": 13490 }, { "epoch": 0.6191088063879583, "grad_norm": 0.49952611327171326, "learning_rate": 9.133934566582819e-06, "loss": 0.4731, "step": 13491 }, { "epoch": 0.6191546968932128, "grad_norm": 0.44755539298057556, "learning_rate": 9.133796639677033e-06, "loss": 0.3962, "step": 13492 }, { "epoch": 0.6192005873984673, "grad_norm": 0.46768897771835327, "learning_rate": 9.133658702830772e-06, "loss": 0.4181, "step": 13493 }, { "epoch": 0.6192464779037217, "grad_norm": 0.48543643951416016, "learning_rate": 9.133520756044368e-06, "loss": 0.4425, "step": 13494 }, { "epoch": 0.6192923684089762, "grad_norm": 0.44651708006858826, "learning_rate": 9.133382799318149e-06, "loss": 0.3988, "step": 13495 }, { "epoch": 0.6193382589142307, "grad_norm": 0.46467939019203186, "learning_rate": 9.133244832652448e-06, "loss": 0.4005, "step": 13496 }, { "epoch": 0.6193841494194852, "grad_norm": 0.4582372307777405, "learning_rate": 9.1331068560476e-06, "loss": 0.369, "step": 13497 }, { "epoch": 0.6194300399247396, "grad_norm": 0.4708572030067444, "learning_rate": 9.132968869503933e-06, "loss": 0.4574, "step": 13498 }, { "epoch": 0.619475930429994, "grad_norm": 0.43059664964675903, "learning_rate": 9.132830873021782e-06, "loss": 0.3366, "step": 13499 }, { "epoch": 0.6195218209352485, "grad_norm": 0.4699653387069702, "learning_rate": 9.132692866601476e-06, "loss": 0.4237, "step": 13500 }, { "epoch": 0.6195677114405029, "grad_norm": 0.4363558888435364, "learning_rate": 9.132554850243347e-06, "loss": 0.3363, "step": 13501 }, { "epoch": 0.6196136019457574, "grad_norm": 0.4825230836868286, "learning_rate": 9.13241682394773e-06, "loss": 0.3787, "step": 13502 }, { "epoch": 0.6196594924510119, "grad_norm": 0.48228612542152405, "learning_rate": 9.132278787714953e-06, "loss": 0.4631, "step": 13503 }, { "epoch": 0.6197053829562663, "grad_norm": 0.4433227777481079, "learning_rate": 9.13214074154535e-06, "loss": 0.3874, "step": 13504 }, { "epoch": 0.6197512734615208, "grad_norm": 0.45059722661972046, "learning_rate": 9.132002685439255e-06, "loss": 0.3824, "step": 13505 }, { "epoch": 0.6197971639667753, "grad_norm": 0.4203972816467285, "learning_rate": 9.131864619396994e-06, "loss": 0.3441, "step": 13506 }, { "epoch": 0.6198430544720297, "grad_norm": 0.45209014415740967, "learning_rate": 9.131726543418904e-06, "loss": 0.368, "step": 13507 }, { "epoch": 0.6198889449772842, "grad_norm": 0.47339892387390137, "learning_rate": 9.131588457505315e-06, "loss": 0.4602, "step": 13508 }, { "epoch": 0.6199348354825387, "grad_norm": 0.440996378660202, "learning_rate": 9.131450361656562e-06, "loss": 0.3785, "step": 13509 }, { "epoch": 0.6199807259877931, "grad_norm": 0.47609198093414307, "learning_rate": 9.131312255872973e-06, "loss": 0.4478, "step": 13510 }, { "epoch": 0.6200266164930476, "grad_norm": 0.47027817368507385, "learning_rate": 9.131174140154883e-06, "loss": 0.4089, "step": 13511 }, { "epoch": 0.6200725069983021, "grad_norm": 0.47962456941604614, "learning_rate": 9.131036014502622e-06, "loss": 0.477, "step": 13512 }, { "epoch": 0.6201183975035565, "grad_norm": 0.45942842960357666, "learning_rate": 9.130897878916524e-06, "loss": 0.3986, "step": 13513 }, { "epoch": 0.620164288008811, "grad_norm": 0.4236631989479065, "learning_rate": 9.130759733396918e-06, "loss": 0.3908, "step": 13514 }, { "epoch": 0.6202101785140655, "grad_norm": 0.4336707293987274, "learning_rate": 9.130621577944144e-06, "loss": 0.3623, "step": 13515 }, { "epoch": 0.62025606901932, "grad_norm": 0.43616944551467896, "learning_rate": 9.130483412558524e-06, "loss": 0.3456, "step": 13516 }, { "epoch": 0.6203019595245743, "grad_norm": 0.44043534994125366, "learning_rate": 9.130345237240396e-06, "loss": 0.4436, "step": 13517 }, { "epoch": 0.6203478500298288, "grad_norm": 0.43045178055763245, "learning_rate": 9.130207051990091e-06, "loss": 0.3828, "step": 13518 }, { "epoch": 0.6203937405350833, "grad_norm": 0.4150160253047943, "learning_rate": 9.130068856807943e-06, "loss": 0.3233, "step": 13519 }, { "epoch": 0.6204396310403377, "grad_norm": 0.4437844753265381, "learning_rate": 9.12993065169428e-06, "loss": 0.3986, "step": 13520 }, { "epoch": 0.6204855215455922, "grad_norm": 0.4456350803375244, "learning_rate": 9.12979243664944e-06, "loss": 0.4179, "step": 13521 }, { "epoch": 0.6205314120508467, "grad_norm": 0.4221799969673157, "learning_rate": 9.129654211673753e-06, "loss": 0.3547, "step": 13522 }, { "epoch": 0.6205773025561011, "grad_norm": 0.39722132682800293, "learning_rate": 9.129515976767548e-06, "loss": 0.2889, "step": 13523 }, { "epoch": 0.6206231930613556, "grad_norm": 0.46047040820121765, "learning_rate": 9.129377731931164e-06, "loss": 0.3442, "step": 13524 }, { "epoch": 0.6206690835666101, "grad_norm": 0.4540072977542877, "learning_rate": 9.129239477164927e-06, "loss": 0.3938, "step": 13525 }, { "epoch": 0.6207149740718645, "grad_norm": 0.4674703776836395, "learning_rate": 9.129101212469173e-06, "loss": 0.4126, "step": 13526 }, { "epoch": 0.620760864577119, "grad_norm": 0.4764498174190521, "learning_rate": 9.128962937844235e-06, "loss": 0.4466, "step": 13527 }, { "epoch": 0.6208067550823735, "grad_norm": 0.4278314411640167, "learning_rate": 9.128824653290443e-06, "loss": 0.3777, "step": 13528 }, { "epoch": 0.6208526455876279, "grad_norm": 0.48098185658454895, "learning_rate": 9.128686358808131e-06, "loss": 0.4257, "step": 13529 }, { "epoch": 0.6208985360928824, "grad_norm": 0.4629579484462738, "learning_rate": 9.128548054397632e-06, "loss": 0.3991, "step": 13530 }, { "epoch": 0.6209444265981369, "grad_norm": 0.4307628571987152, "learning_rate": 9.128409740059276e-06, "loss": 0.3492, "step": 13531 }, { "epoch": 0.6209903171033913, "grad_norm": 0.4825531244277954, "learning_rate": 9.1282714157934e-06, "loss": 0.4488, "step": 13532 }, { "epoch": 0.6210362076086458, "grad_norm": 0.42392441630363464, "learning_rate": 9.128133081600332e-06, "loss": 0.3785, "step": 13533 }, { "epoch": 0.6210820981139002, "grad_norm": 0.48532333970069885, "learning_rate": 9.12799473748041e-06, "loss": 0.4709, "step": 13534 }, { "epoch": 0.6211279886191547, "grad_norm": 0.4688650667667389, "learning_rate": 9.12785638343396e-06, "loss": 0.4506, "step": 13535 }, { "epoch": 0.6211738791244091, "grad_norm": 0.4706922471523285, "learning_rate": 9.12771801946132e-06, "loss": 0.46, "step": 13536 }, { "epoch": 0.6212197696296636, "grad_norm": 0.4879792332649231, "learning_rate": 9.127579645562821e-06, "loss": 0.5071, "step": 13537 }, { "epoch": 0.6212656601349181, "grad_norm": 0.4436523914337158, "learning_rate": 9.127441261738794e-06, "loss": 0.3738, "step": 13538 }, { "epoch": 0.6213115506401725, "grad_norm": 0.47871869802474976, "learning_rate": 9.127302867989576e-06, "loss": 0.4666, "step": 13539 }, { "epoch": 0.621357441145427, "grad_norm": 0.4736569821834564, "learning_rate": 9.127164464315494e-06, "loss": 0.4248, "step": 13540 }, { "epoch": 0.6214033316506815, "grad_norm": 0.4707769751548767, "learning_rate": 9.127026050716887e-06, "loss": 0.4659, "step": 13541 }, { "epoch": 0.6214492221559359, "grad_norm": 0.49210190773010254, "learning_rate": 9.126887627194084e-06, "loss": 0.443, "step": 13542 }, { "epoch": 0.6214951126611904, "grad_norm": 0.47874125838279724, "learning_rate": 9.126749193747419e-06, "loss": 0.4074, "step": 13543 }, { "epoch": 0.6215410031664449, "grad_norm": 0.4750124514102936, "learning_rate": 9.126610750377222e-06, "loss": 0.4295, "step": 13544 }, { "epoch": 0.6215868936716993, "grad_norm": 0.48303088545799255, "learning_rate": 9.126472297083833e-06, "loss": 0.4154, "step": 13545 }, { "epoch": 0.6216327841769538, "grad_norm": 0.48089855909347534, "learning_rate": 9.126333833867578e-06, "loss": 0.4946, "step": 13546 }, { "epoch": 0.6216786746822083, "grad_norm": 0.44583579897880554, "learning_rate": 9.126195360728791e-06, "loss": 0.3699, "step": 13547 }, { "epoch": 0.6217245651874627, "grad_norm": 0.46115127205848694, "learning_rate": 9.126056877667808e-06, "loss": 0.37, "step": 13548 }, { "epoch": 0.6217704556927172, "grad_norm": 0.4346621036529541, "learning_rate": 9.12591838468496e-06, "loss": 0.3509, "step": 13549 }, { "epoch": 0.6218163461979717, "grad_norm": 0.43880918622016907, "learning_rate": 9.12577988178058e-06, "loss": 0.3731, "step": 13550 }, { "epoch": 0.6218622367032262, "grad_norm": 0.4512544870376587, "learning_rate": 9.125641368955004e-06, "loss": 0.3658, "step": 13551 }, { "epoch": 0.6219081272084805, "grad_norm": 0.4697294533252716, "learning_rate": 9.12550284620856e-06, "loss": 0.4098, "step": 13552 }, { "epoch": 0.621954017713735, "grad_norm": 0.4583834111690521, "learning_rate": 9.125364313541586e-06, "loss": 0.4131, "step": 13553 }, { "epoch": 0.6219999082189895, "grad_norm": 0.43589186668395996, "learning_rate": 9.125225770954412e-06, "loss": 0.3181, "step": 13554 }, { "epoch": 0.6220457987242439, "grad_norm": 0.4776705801486969, "learning_rate": 9.12508721844737e-06, "loss": 0.4709, "step": 13555 }, { "epoch": 0.6220916892294984, "grad_norm": 0.44285672903060913, "learning_rate": 9.124948656020798e-06, "loss": 0.3752, "step": 13556 }, { "epoch": 0.6221375797347529, "grad_norm": 0.48146137595176697, "learning_rate": 9.124810083675024e-06, "loss": 0.3763, "step": 13557 }, { "epoch": 0.6221834702400073, "grad_norm": 0.42901986837387085, "learning_rate": 9.124671501410384e-06, "loss": 0.3293, "step": 13558 }, { "epoch": 0.6222293607452618, "grad_norm": 0.4728184640407562, "learning_rate": 9.124532909227214e-06, "loss": 0.3793, "step": 13559 }, { "epoch": 0.6222752512505163, "grad_norm": 0.4332300126552582, "learning_rate": 9.12439430712584e-06, "loss": 0.324, "step": 13560 }, { "epoch": 0.6223211417557707, "grad_norm": 0.4822970926761627, "learning_rate": 9.1242556951066e-06, "loss": 0.466, "step": 13561 }, { "epoch": 0.6223670322610252, "grad_norm": 0.4643024206161499, "learning_rate": 9.124117073169828e-06, "loss": 0.3782, "step": 13562 }, { "epoch": 0.6224129227662797, "grad_norm": 0.4630125164985657, "learning_rate": 9.123978441315855e-06, "loss": 0.3873, "step": 13563 }, { "epoch": 0.6224588132715341, "grad_norm": 0.4361162781715393, "learning_rate": 9.123839799545016e-06, "loss": 0.3889, "step": 13564 }, { "epoch": 0.6225047037767886, "grad_norm": 0.45569661259651184, "learning_rate": 9.123701147857644e-06, "loss": 0.3633, "step": 13565 }, { "epoch": 0.6225505942820431, "grad_norm": 0.48102834820747375, "learning_rate": 9.123562486254072e-06, "loss": 0.464, "step": 13566 }, { "epoch": 0.6225964847872975, "grad_norm": 0.4715704917907715, "learning_rate": 9.123423814734635e-06, "loss": 0.473, "step": 13567 }, { "epoch": 0.622642375292552, "grad_norm": 0.47909998893737793, "learning_rate": 9.123285133299664e-06, "loss": 0.4112, "step": 13568 }, { "epoch": 0.6226882657978065, "grad_norm": 0.4800023138523102, "learning_rate": 9.123146441949493e-06, "loss": 0.3939, "step": 13569 }, { "epoch": 0.622734156303061, "grad_norm": 0.4740374982357025, "learning_rate": 9.123007740684457e-06, "loss": 0.4785, "step": 13570 }, { "epoch": 0.6227800468083153, "grad_norm": 0.4531978964805603, "learning_rate": 9.122869029504887e-06, "loss": 0.4513, "step": 13571 }, { "epoch": 0.6228259373135698, "grad_norm": 0.4631929099559784, "learning_rate": 9.12273030841112e-06, "loss": 0.4951, "step": 13572 }, { "epoch": 0.6228718278188243, "grad_norm": 0.4883246123790741, "learning_rate": 9.122591577403488e-06, "loss": 0.4653, "step": 13573 }, { "epoch": 0.6229177183240787, "grad_norm": 0.4810955226421356, "learning_rate": 9.122452836482323e-06, "loss": 0.4904, "step": 13574 }, { "epoch": 0.6229636088293332, "grad_norm": 0.4530024826526642, "learning_rate": 9.12231408564796e-06, "loss": 0.4192, "step": 13575 }, { "epoch": 0.6230094993345877, "grad_norm": 0.4662466049194336, "learning_rate": 9.122175324900733e-06, "loss": 0.4455, "step": 13576 }, { "epoch": 0.6230553898398421, "grad_norm": 0.4660511016845703, "learning_rate": 9.122036554240976e-06, "loss": 0.4019, "step": 13577 }, { "epoch": 0.6231012803450966, "grad_norm": 0.5148074626922607, "learning_rate": 9.121897773669021e-06, "loss": 0.5202, "step": 13578 }, { "epoch": 0.6231471708503511, "grad_norm": 0.47557902336120605, "learning_rate": 9.121758983185204e-06, "loss": 0.3713, "step": 13579 }, { "epoch": 0.6231930613556055, "grad_norm": 0.5172755122184753, "learning_rate": 9.121620182789856e-06, "loss": 0.5078, "step": 13580 }, { "epoch": 0.62323895186086, "grad_norm": 0.4523959159851074, "learning_rate": 9.121481372483313e-06, "loss": 0.3695, "step": 13581 }, { "epoch": 0.6232848423661145, "grad_norm": 0.47998732328414917, "learning_rate": 9.121342552265909e-06, "loss": 0.467, "step": 13582 }, { "epoch": 0.6233307328713689, "grad_norm": 0.4421985447406769, "learning_rate": 9.121203722137976e-06, "loss": 0.3577, "step": 13583 }, { "epoch": 0.6233766233766234, "grad_norm": 0.48794248700141907, "learning_rate": 9.12106488209985e-06, "loss": 0.4482, "step": 13584 }, { "epoch": 0.6234225138818779, "grad_norm": 0.4165756106376648, "learning_rate": 9.120926032151862e-06, "loss": 0.3055, "step": 13585 }, { "epoch": 0.6234684043871324, "grad_norm": 0.4510299861431122, "learning_rate": 9.120787172294347e-06, "loss": 0.3997, "step": 13586 }, { "epoch": 0.6235142948923867, "grad_norm": 0.4689398407936096, "learning_rate": 9.120648302527641e-06, "loss": 0.423, "step": 13587 }, { "epoch": 0.6235601853976412, "grad_norm": 0.4426478445529938, "learning_rate": 9.120509422852076e-06, "loss": 0.366, "step": 13588 }, { "epoch": 0.6236060759028957, "grad_norm": 0.443268746137619, "learning_rate": 9.120370533267986e-06, "loss": 0.361, "step": 13589 }, { "epoch": 0.6236519664081501, "grad_norm": 0.42122969031333923, "learning_rate": 9.120231633775705e-06, "loss": 0.3699, "step": 13590 }, { "epoch": 0.6236978569134046, "grad_norm": 0.4692622423171997, "learning_rate": 9.120092724375567e-06, "loss": 0.3662, "step": 13591 }, { "epoch": 0.6237437474186591, "grad_norm": 0.440532386302948, "learning_rate": 9.119953805067907e-06, "loss": 0.3686, "step": 13592 }, { "epoch": 0.6237896379239135, "grad_norm": 0.45228350162506104, "learning_rate": 9.119814875853059e-06, "loss": 0.3998, "step": 13593 }, { "epoch": 0.623835528429168, "grad_norm": 0.4641745686531067, "learning_rate": 9.119675936731356e-06, "loss": 0.4433, "step": 13594 }, { "epoch": 0.6238814189344225, "grad_norm": 0.42702949047088623, "learning_rate": 9.119536987703132e-06, "loss": 0.3702, "step": 13595 }, { "epoch": 0.6239273094396769, "grad_norm": 0.4658576250076294, "learning_rate": 9.119398028768723e-06, "loss": 0.3977, "step": 13596 }, { "epoch": 0.6239731999449314, "grad_norm": 0.4700034558773041, "learning_rate": 9.11925905992846e-06, "loss": 0.4631, "step": 13597 }, { "epoch": 0.6240190904501859, "grad_norm": 0.4417860209941864, "learning_rate": 9.11912008118268e-06, "loss": 0.3801, "step": 13598 }, { "epoch": 0.6240649809554403, "grad_norm": 0.4522622227668762, "learning_rate": 9.118981092531717e-06, "loss": 0.4095, "step": 13599 }, { "epoch": 0.6241108714606948, "grad_norm": 0.4747748374938965, "learning_rate": 9.118842093975903e-06, "loss": 0.4609, "step": 13600 }, { "epoch": 0.6241567619659493, "grad_norm": 0.46969231963157654, "learning_rate": 9.118703085515575e-06, "loss": 0.4466, "step": 13601 }, { "epoch": 0.6242026524712037, "grad_norm": 0.4503541886806488, "learning_rate": 9.118564067151065e-06, "loss": 0.3771, "step": 13602 }, { "epoch": 0.6242485429764582, "grad_norm": 0.4588680565357208, "learning_rate": 9.11842503888271e-06, "loss": 0.4259, "step": 13603 }, { "epoch": 0.6242944334817127, "grad_norm": 0.45701706409454346, "learning_rate": 9.118286000710839e-06, "loss": 0.4672, "step": 13604 }, { "epoch": 0.6243403239869671, "grad_norm": 0.4905075430870056, "learning_rate": 9.118146952635793e-06, "loss": 0.4112, "step": 13605 }, { "epoch": 0.6243862144922215, "grad_norm": 0.4455539584159851, "learning_rate": 9.118007894657904e-06, "loss": 0.3967, "step": 13606 }, { "epoch": 0.624432104997476, "grad_norm": 0.4949384033679962, "learning_rate": 9.117868826777503e-06, "loss": 0.5338, "step": 13607 }, { "epoch": 0.6244779955027305, "grad_norm": 0.49051153659820557, "learning_rate": 9.117729748994928e-06, "loss": 0.4619, "step": 13608 }, { "epoch": 0.6245238860079849, "grad_norm": 0.46464529633522034, "learning_rate": 9.117590661310514e-06, "loss": 0.382, "step": 13609 }, { "epoch": 0.6245697765132394, "grad_norm": 0.4725222885608673, "learning_rate": 9.117451563724592e-06, "loss": 0.4114, "step": 13610 }, { "epoch": 0.6246156670184939, "grad_norm": 0.5443709492683411, "learning_rate": 9.1173124562375e-06, "loss": 0.5746, "step": 13611 }, { "epoch": 0.6246615575237483, "grad_norm": 0.42529138922691345, "learning_rate": 9.11717333884957e-06, "loss": 0.3011, "step": 13612 }, { "epoch": 0.6247074480290028, "grad_norm": 0.44743481278419495, "learning_rate": 9.117034211561138e-06, "loss": 0.3896, "step": 13613 }, { "epoch": 0.6247533385342573, "grad_norm": 0.43031972646713257, "learning_rate": 9.116895074372537e-06, "loss": 0.3718, "step": 13614 }, { "epoch": 0.6247992290395117, "grad_norm": 0.47878044843673706, "learning_rate": 9.116755927284104e-06, "loss": 0.4014, "step": 13615 }, { "epoch": 0.6248451195447662, "grad_norm": 0.4515795111656189, "learning_rate": 9.116616770296172e-06, "loss": 0.3964, "step": 13616 }, { "epoch": 0.6248910100500207, "grad_norm": 0.4825457036495209, "learning_rate": 9.116477603409075e-06, "loss": 0.4308, "step": 13617 }, { "epoch": 0.6249369005552751, "grad_norm": 0.4621758759021759, "learning_rate": 9.11633842662315e-06, "loss": 0.3712, "step": 13618 }, { "epoch": 0.6249827910605296, "grad_norm": 0.4357132315635681, "learning_rate": 9.11619923993873e-06, "loss": 0.3843, "step": 13619 }, { "epoch": 0.6250286815657841, "grad_norm": 0.4798409938812256, "learning_rate": 9.11606004335615e-06, "loss": 0.4475, "step": 13620 }, { "epoch": 0.6250745720710384, "grad_norm": 0.47069260478019714, "learning_rate": 9.115920836875745e-06, "loss": 0.4638, "step": 13621 }, { "epoch": 0.625120462576293, "grad_norm": 0.45995283126831055, "learning_rate": 9.115781620497848e-06, "loss": 0.4307, "step": 13622 }, { "epoch": 0.6251663530815474, "grad_norm": 0.4853038787841797, "learning_rate": 9.115642394222794e-06, "loss": 0.325, "step": 13623 }, { "epoch": 0.6252122435868019, "grad_norm": 0.45901229977607727, "learning_rate": 9.115503158050923e-06, "loss": 0.4347, "step": 13624 }, { "epoch": 0.6252581340920563, "grad_norm": 0.45606330037117004, "learning_rate": 9.115363911982563e-06, "loss": 0.3845, "step": 13625 }, { "epoch": 0.6253040245973108, "grad_norm": 0.492565393447876, "learning_rate": 9.115224656018052e-06, "loss": 0.4871, "step": 13626 }, { "epoch": 0.6253499151025653, "grad_norm": 0.4361816942691803, "learning_rate": 9.115085390157725e-06, "loss": 0.3388, "step": 13627 }, { "epoch": 0.6253958056078197, "grad_norm": 0.4343664348125458, "learning_rate": 9.114946114401915e-06, "loss": 0.31, "step": 13628 }, { "epoch": 0.6254416961130742, "grad_norm": 0.44521623849868774, "learning_rate": 9.11480682875096e-06, "loss": 0.3835, "step": 13629 }, { "epoch": 0.6254875866183287, "grad_norm": 0.489960253238678, "learning_rate": 9.114667533205193e-06, "loss": 0.477, "step": 13630 }, { "epoch": 0.6255334771235831, "grad_norm": 0.4271267354488373, "learning_rate": 9.11452822776495e-06, "loss": 0.3262, "step": 13631 }, { "epoch": 0.6255793676288376, "grad_norm": 0.44223764538764954, "learning_rate": 9.114388912430563e-06, "loss": 0.3759, "step": 13632 }, { "epoch": 0.6256252581340921, "grad_norm": 0.42774978280067444, "learning_rate": 9.114249587202371e-06, "loss": 0.3884, "step": 13633 }, { "epoch": 0.6256711486393465, "grad_norm": 0.44865235686302185, "learning_rate": 9.114110252080705e-06, "loss": 0.423, "step": 13634 }, { "epoch": 0.625717039144601, "grad_norm": 0.4593746066093445, "learning_rate": 9.113970907065905e-06, "loss": 0.4284, "step": 13635 }, { "epoch": 0.6257629296498555, "grad_norm": 0.48485514521598816, "learning_rate": 9.113831552158302e-06, "loss": 0.4491, "step": 13636 }, { "epoch": 0.6258088201551099, "grad_norm": 0.46986663341522217, "learning_rate": 9.113692187358233e-06, "loss": 0.3846, "step": 13637 }, { "epoch": 0.6258547106603644, "grad_norm": 0.4344359040260315, "learning_rate": 9.113552812666033e-06, "loss": 0.3783, "step": 13638 }, { "epoch": 0.6259006011656189, "grad_norm": 0.4404149651527405, "learning_rate": 9.113413428082036e-06, "loss": 0.3892, "step": 13639 }, { "epoch": 0.6259464916708734, "grad_norm": 0.47666043043136597, "learning_rate": 9.113274033606578e-06, "loss": 0.3968, "step": 13640 }, { "epoch": 0.6259923821761277, "grad_norm": 0.46210432052612305, "learning_rate": 9.113134629239994e-06, "loss": 0.4457, "step": 13641 }, { "epoch": 0.6260382726813822, "grad_norm": 0.4827441871166229, "learning_rate": 9.112995214982619e-06, "loss": 0.3991, "step": 13642 }, { "epoch": 0.6260841631866367, "grad_norm": 0.4562370479106903, "learning_rate": 9.112855790834789e-06, "loss": 0.3974, "step": 13643 }, { "epoch": 0.6261300536918911, "grad_norm": 0.5029146075248718, "learning_rate": 9.112716356796839e-06, "loss": 0.4656, "step": 13644 }, { "epoch": 0.6261759441971456, "grad_norm": 0.47383129596710205, "learning_rate": 9.112576912869104e-06, "loss": 0.3909, "step": 13645 }, { "epoch": 0.6262218347024001, "grad_norm": 0.4470995366573334, "learning_rate": 9.112437459051919e-06, "loss": 0.4201, "step": 13646 }, { "epoch": 0.6262677252076545, "grad_norm": 0.42845162749290466, "learning_rate": 9.11229799534562e-06, "loss": 0.3235, "step": 13647 }, { "epoch": 0.626313615712909, "grad_norm": 0.42569148540496826, "learning_rate": 9.112158521750544e-06, "loss": 0.3395, "step": 13648 }, { "epoch": 0.6263595062181635, "grad_norm": 0.4630505442619324, "learning_rate": 9.112019038267023e-06, "loss": 0.3906, "step": 13649 }, { "epoch": 0.6264053967234179, "grad_norm": 0.4603320062160492, "learning_rate": 9.111879544895395e-06, "loss": 0.386, "step": 13650 }, { "epoch": 0.6264512872286724, "grad_norm": 0.4445750117301941, "learning_rate": 9.111740041635993e-06, "loss": 0.3796, "step": 13651 }, { "epoch": 0.6264971777339269, "grad_norm": 0.470172256231308, "learning_rate": 9.111600528489154e-06, "loss": 0.4074, "step": 13652 }, { "epoch": 0.6265430682391813, "grad_norm": 0.4349626898765564, "learning_rate": 9.111461005455215e-06, "loss": 0.3776, "step": 13653 }, { "epoch": 0.6265889587444358, "grad_norm": 0.4975203275680542, "learning_rate": 9.111321472534509e-06, "loss": 0.4588, "step": 13654 }, { "epoch": 0.6266348492496903, "grad_norm": 0.4368135631084442, "learning_rate": 9.111181929727371e-06, "loss": 0.318, "step": 13655 }, { "epoch": 0.6266807397549446, "grad_norm": 0.4769822061061859, "learning_rate": 9.111042377034141e-06, "loss": 0.4914, "step": 13656 }, { "epoch": 0.6267266302601991, "grad_norm": 0.4669618010520935, "learning_rate": 9.110902814455149e-06, "loss": 0.4441, "step": 13657 }, { "epoch": 0.6267725207654536, "grad_norm": 0.4871855676174164, "learning_rate": 9.110763241990735e-06, "loss": 0.4918, "step": 13658 }, { "epoch": 0.6268184112707081, "grad_norm": 0.4395117461681366, "learning_rate": 9.110623659641231e-06, "loss": 0.4073, "step": 13659 }, { "epoch": 0.6268643017759625, "grad_norm": 0.46548977494239807, "learning_rate": 9.110484067406975e-06, "loss": 0.4305, "step": 13660 }, { "epoch": 0.626910192281217, "grad_norm": 0.523999810218811, "learning_rate": 9.110344465288303e-06, "loss": 0.5316, "step": 13661 }, { "epoch": 0.6269560827864715, "grad_norm": 0.4806918501853943, "learning_rate": 9.110204853285549e-06, "loss": 0.3853, "step": 13662 }, { "epoch": 0.6270019732917259, "grad_norm": 0.44492775201797485, "learning_rate": 9.11006523139905e-06, "loss": 0.3527, "step": 13663 }, { "epoch": 0.6270478637969804, "grad_norm": 0.462098091840744, "learning_rate": 9.10992559962914e-06, "loss": 0.3896, "step": 13664 }, { "epoch": 0.6270937543022349, "grad_norm": 0.4390881657600403, "learning_rate": 9.109785957976158e-06, "loss": 0.3566, "step": 13665 }, { "epoch": 0.6271396448074893, "grad_norm": 0.4432145655155182, "learning_rate": 9.109646306440438e-06, "loss": 0.3377, "step": 13666 }, { "epoch": 0.6271855353127438, "grad_norm": 0.4953557550907135, "learning_rate": 9.109506645022314e-06, "loss": 0.4393, "step": 13667 }, { "epoch": 0.6272314258179983, "grad_norm": 0.5088863968849182, "learning_rate": 9.109366973722125e-06, "loss": 0.4847, "step": 13668 }, { "epoch": 0.6272773163232527, "grad_norm": 0.5103203654289246, "learning_rate": 9.109227292540204e-06, "loss": 0.4544, "step": 13669 }, { "epoch": 0.6273232068285072, "grad_norm": 0.46356844902038574, "learning_rate": 9.10908760147689e-06, "loss": 0.4299, "step": 13670 }, { "epoch": 0.6273690973337617, "grad_norm": 0.4585416615009308, "learning_rate": 9.108947900532514e-06, "loss": 0.3872, "step": 13671 }, { "epoch": 0.6274149878390161, "grad_norm": 0.42603009939193726, "learning_rate": 9.108808189707418e-06, "loss": 0.3522, "step": 13672 }, { "epoch": 0.6274608783442706, "grad_norm": 0.45882779359817505, "learning_rate": 9.108668469001933e-06, "loss": 0.4076, "step": 13673 }, { "epoch": 0.627506768849525, "grad_norm": 0.46660116314888, "learning_rate": 9.108528738416399e-06, "loss": 0.3818, "step": 13674 }, { "epoch": 0.6275526593547794, "grad_norm": 0.4421790838241577, "learning_rate": 9.10838899795115e-06, "loss": 0.3738, "step": 13675 }, { "epoch": 0.6275985498600339, "grad_norm": 0.4388272762298584, "learning_rate": 9.10824924760652e-06, "loss": 0.3667, "step": 13676 }, { "epoch": 0.6276444403652884, "grad_norm": 0.4716217815876007, "learning_rate": 9.10810948738285e-06, "loss": 0.4028, "step": 13677 }, { "epoch": 0.6276903308705429, "grad_norm": 0.4507223963737488, "learning_rate": 9.107969717280471e-06, "loss": 0.391, "step": 13678 }, { "epoch": 0.6277362213757973, "grad_norm": 0.47650957107543945, "learning_rate": 9.107829937299724e-06, "loss": 0.3869, "step": 13679 }, { "epoch": 0.6277821118810518, "grad_norm": 0.45618265867233276, "learning_rate": 9.10769014744094e-06, "loss": 0.4075, "step": 13680 }, { "epoch": 0.6278280023863063, "grad_norm": 0.5932179093360901, "learning_rate": 9.107550347704457e-06, "loss": 0.4486, "step": 13681 }, { "epoch": 0.6278738928915607, "grad_norm": 0.4464779496192932, "learning_rate": 9.107410538090615e-06, "loss": 0.4153, "step": 13682 }, { "epoch": 0.6279197833968152, "grad_norm": 0.4385484457015991, "learning_rate": 9.107270718599743e-06, "loss": 0.3811, "step": 13683 }, { "epoch": 0.6279656739020697, "grad_norm": 0.47455912828445435, "learning_rate": 9.107130889232185e-06, "loss": 0.415, "step": 13684 }, { "epoch": 0.6280115644073241, "grad_norm": 0.4071928858757019, "learning_rate": 9.106991049988271e-06, "loss": 0.3222, "step": 13685 }, { "epoch": 0.6280574549125786, "grad_norm": 0.46810925006866455, "learning_rate": 9.106851200868341e-06, "loss": 0.3986, "step": 13686 }, { "epoch": 0.6281033454178331, "grad_norm": 0.45160913467407227, "learning_rate": 9.106711341872728e-06, "loss": 0.4065, "step": 13687 }, { "epoch": 0.6281492359230875, "grad_norm": 0.46360260248184204, "learning_rate": 9.106571473001772e-06, "loss": 0.3917, "step": 13688 }, { "epoch": 0.628195126428342, "grad_norm": 0.4638102650642395, "learning_rate": 9.10643159425581e-06, "loss": 0.4669, "step": 13689 }, { "epoch": 0.6282410169335965, "grad_norm": 0.43756335973739624, "learning_rate": 9.106291705635172e-06, "loss": 0.3737, "step": 13690 }, { "epoch": 0.6282869074388509, "grad_norm": 0.4349897503852844, "learning_rate": 9.1061518071402e-06, "loss": 0.3504, "step": 13691 }, { "epoch": 0.6283327979441053, "grad_norm": 0.46866628527641296, "learning_rate": 9.10601189877123e-06, "loss": 0.4293, "step": 13692 }, { "epoch": 0.6283786884493598, "grad_norm": 0.4611052870750427, "learning_rate": 9.105871980528595e-06, "loss": 0.397, "step": 13693 }, { "epoch": 0.6284245789546143, "grad_norm": 0.4438941478729248, "learning_rate": 9.105732052412634e-06, "loss": 0.3831, "step": 13694 }, { "epoch": 0.6284704694598687, "grad_norm": 0.4622237980365753, "learning_rate": 9.105592114423685e-06, "loss": 0.4501, "step": 13695 }, { "epoch": 0.6285163599651232, "grad_norm": 0.4574432373046875, "learning_rate": 9.105452166562082e-06, "loss": 0.4012, "step": 13696 }, { "epoch": 0.6285622504703777, "grad_norm": 0.45375698804855347, "learning_rate": 9.105312208828162e-06, "loss": 0.3427, "step": 13697 }, { "epoch": 0.6286081409756321, "grad_norm": 0.41117915511131287, "learning_rate": 9.105172241222262e-06, "loss": 0.3411, "step": 13698 }, { "epoch": 0.6286540314808866, "grad_norm": 0.4615662097930908, "learning_rate": 9.105032263744717e-06, "loss": 0.4107, "step": 13699 }, { "epoch": 0.6286999219861411, "grad_norm": 0.42452019453048706, "learning_rate": 9.104892276395866e-06, "loss": 0.363, "step": 13700 }, { "epoch": 0.6287458124913955, "grad_norm": 0.418771892786026, "learning_rate": 9.104752279176045e-06, "loss": 0.3387, "step": 13701 }, { "epoch": 0.62879170299665, "grad_norm": 0.47144344449043274, "learning_rate": 9.10461227208559e-06, "loss": 0.4406, "step": 13702 }, { "epoch": 0.6288375935019045, "grad_norm": 0.499202162027359, "learning_rate": 9.104472255124839e-06, "loss": 0.3083, "step": 13703 }, { "epoch": 0.6288834840071589, "grad_norm": 0.40753117203712463, "learning_rate": 9.104332228294127e-06, "loss": 0.3427, "step": 13704 }, { "epoch": 0.6289293745124134, "grad_norm": 0.4543326795101166, "learning_rate": 9.10419219159379e-06, "loss": 0.3791, "step": 13705 }, { "epoch": 0.6289752650176679, "grad_norm": 0.4498511552810669, "learning_rate": 9.104052145024169e-06, "loss": 0.3764, "step": 13706 }, { "epoch": 0.6290211555229223, "grad_norm": 0.4355706572532654, "learning_rate": 9.103912088585596e-06, "loss": 0.3275, "step": 13707 }, { "epoch": 0.6290670460281768, "grad_norm": 0.4331609904766083, "learning_rate": 9.103772022278409e-06, "loss": 0.3975, "step": 13708 }, { "epoch": 0.6291129365334313, "grad_norm": 0.4652096629142761, "learning_rate": 9.103631946102946e-06, "loss": 0.4657, "step": 13709 }, { "epoch": 0.6291588270386856, "grad_norm": 0.4701501727104187, "learning_rate": 9.103491860059544e-06, "loss": 0.3938, "step": 13710 }, { "epoch": 0.6292047175439401, "grad_norm": 0.49782222509384155, "learning_rate": 9.103351764148538e-06, "loss": 0.4854, "step": 13711 }, { "epoch": 0.6292506080491946, "grad_norm": 0.4514663517475128, "learning_rate": 9.103211658370268e-06, "loss": 0.3872, "step": 13712 }, { "epoch": 0.6292964985544491, "grad_norm": 0.44818544387817383, "learning_rate": 9.103071542725067e-06, "loss": 0.3653, "step": 13713 }, { "epoch": 0.6293423890597035, "grad_norm": 0.46137815713882446, "learning_rate": 9.102931417213275e-06, "loss": 0.3352, "step": 13714 }, { "epoch": 0.629388279564958, "grad_norm": 0.5046994686126709, "learning_rate": 9.102791281835227e-06, "loss": 0.4767, "step": 13715 }, { "epoch": 0.6294341700702125, "grad_norm": 0.45495080947875977, "learning_rate": 9.10265113659126e-06, "loss": 0.3999, "step": 13716 }, { "epoch": 0.6294800605754669, "grad_norm": 0.4386732876300812, "learning_rate": 9.102510981481715e-06, "loss": 0.3868, "step": 13717 }, { "epoch": 0.6295259510807214, "grad_norm": 0.4759403467178345, "learning_rate": 9.102370816506922e-06, "loss": 0.3955, "step": 13718 }, { "epoch": 0.6295718415859759, "grad_norm": 0.436422735452652, "learning_rate": 9.102230641667225e-06, "loss": 0.3701, "step": 13719 }, { "epoch": 0.6296177320912303, "grad_norm": 0.4523036777973175, "learning_rate": 9.102090456962957e-06, "loss": 0.439, "step": 13720 }, { "epoch": 0.6296636225964848, "grad_norm": 0.4791155457496643, "learning_rate": 9.101950262394456e-06, "loss": 0.4185, "step": 13721 }, { "epoch": 0.6297095131017393, "grad_norm": 0.43001556396484375, "learning_rate": 9.10181005796206e-06, "loss": 0.3468, "step": 13722 }, { "epoch": 0.6297554036069937, "grad_norm": 0.4744299054145813, "learning_rate": 9.101669843666105e-06, "loss": 0.4486, "step": 13723 }, { "epoch": 0.6298012941122482, "grad_norm": 0.4610910415649414, "learning_rate": 9.101529619506928e-06, "loss": 0.3687, "step": 13724 }, { "epoch": 0.6298471846175027, "grad_norm": 0.43942463397979736, "learning_rate": 9.101389385484866e-06, "loss": 0.3915, "step": 13725 }, { "epoch": 0.629893075122757, "grad_norm": 0.470424622297287, "learning_rate": 9.101249141600259e-06, "loss": 0.4098, "step": 13726 }, { "epoch": 0.6299389656280115, "grad_norm": 0.4710494875907898, "learning_rate": 9.10110888785344e-06, "loss": 0.4275, "step": 13727 }, { "epoch": 0.629984856133266, "grad_norm": 0.4294699430465698, "learning_rate": 9.10096862424475e-06, "loss": 0.3216, "step": 13728 }, { "epoch": 0.6300307466385205, "grad_norm": 0.44323498010635376, "learning_rate": 9.100828350774525e-06, "loss": 0.3515, "step": 13729 }, { "epoch": 0.6300766371437749, "grad_norm": 0.4185742139816284, "learning_rate": 9.100688067443102e-06, "loss": 0.3879, "step": 13730 }, { "epoch": 0.6301225276490294, "grad_norm": 0.4734041094779968, "learning_rate": 9.100547774250817e-06, "loss": 0.4252, "step": 13731 }, { "epoch": 0.6301684181542839, "grad_norm": 0.45422691106796265, "learning_rate": 9.100407471198009e-06, "loss": 0.4046, "step": 13732 }, { "epoch": 0.6302143086595383, "grad_norm": 0.44218188524246216, "learning_rate": 9.100267158285016e-06, "loss": 0.3592, "step": 13733 }, { "epoch": 0.6302601991647928, "grad_norm": 0.42819520831108093, "learning_rate": 9.100126835512175e-06, "loss": 0.3182, "step": 13734 }, { "epoch": 0.6303060896700473, "grad_norm": 0.4627501368522644, "learning_rate": 9.099986502879823e-06, "loss": 0.3996, "step": 13735 }, { "epoch": 0.6303519801753017, "grad_norm": 0.4762738347053528, "learning_rate": 9.099846160388297e-06, "loss": 0.4549, "step": 13736 }, { "epoch": 0.6303978706805562, "grad_norm": 0.5006212592124939, "learning_rate": 9.099705808037935e-06, "loss": 0.4958, "step": 13737 }, { "epoch": 0.6304437611858107, "grad_norm": 0.4639914929866791, "learning_rate": 9.099565445829072e-06, "loss": 0.4508, "step": 13738 }, { "epoch": 0.6304896516910651, "grad_norm": 0.48809918761253357, "learning_rate": 9.099425073762051e-06, "loss": 0.4518, "step": 13739 }, { "epoch": 0.6305355421963196, "grad_norm": 0.44706717133522034, "learning_rate": 9.099284691837206e-06, "loss": 0.3364, "step": 13740 }, { "epoch": 0.6305814327015741, "grad_norm": 0.4834180176258087, "learning_rate": 9.099144300054875e-06, "loss": 0.4764, "step": 13741 }, { "epoch": 0.6306273232068285, "grad_norm": 0.4418417513370514, "learning_rate": 9.099003898415394e-06, "loss": 0.3603, "step": 13742 }, { "epoch": 0.630673213712083, "grad_norm": 0.4699194133281708, "learning_rate": 9.098863486919105e-06, "loss": 0.4584, "step": 13743 }, { "epoch": 0.6307191042173375, "grad_norm": 0.4323606789112091, "learning_rate": 9.098723065566341e-06, "loss": 0.3384, "step": 13744 }, { "epoch": 0.6307649947225918, "grad_norm": 0.4393993616104126, "learning_rate": 9.098582634357442e-06, "loss": 0.368, "step": 13745 }, { "epoch": 0.6308108852278463, "grad_norm": 0.5486024618148804, "learning_rate": 9.098442193292747e-06, "loss": 0.476, "step": 13746 }, { "epoch": 0.6308567757331008, "grad_norm": 0.44537246227264404, "learning_rate": 9.09830174237259e-06, "loss": 0.4006, "step": 13747 }, { "epoch": 0.6309026662383553, "grad_norm": 0.4700680077075958, "learning_rate": 9.098161281597312e-06, "loss": 0.4597, "step": 13748 }, { "epoch": 0.6309485567436097, "grad_norm": 0.44967445731163025, "learning_rate": 9.09802081096725e-06, "loss": 0.3649, "step": 13749 }, { "epoch": 0.6309944472488642, "grad_norm": 0.45598292350769043, "learning_rate": 9.09788033048274e-06, "loss": 0.405, "step": 13750 }, { "epoch": 0.6310403377541187, "grad_norm": 0.48644453287124634, "learning_rate": 9.09773984014412e-06, "loss": 0.4496, "step": 13751 }, { "epoch": 0.6310862282593731, "grad_norm": 0.47058364748954773, "learning_rate": 9.09759933995173e-06, "loss": 0.4834, "step": 13752 }, { "epoch": 0.6311321187646276, "grad_norm": 0.4332495629787445, "learning_rate": 9.097458829905908e-06, "loss": 0.343, "step": 13753 }, { "epoch": 0.6311780092698821, "grad_norm": 0.4840836226940155, "learning_rate": 9.09731831000699e-06, "loss": 0.4337, "step": 13754 }, { "epoch": 0.6312238997751365, "grad_norm": 0.5089073181152344, "learning_rate": 9.097177780255315e-06, "loss": 0.4504, "step": 13755 }, { "epoch": 0.631269790280391, "grad_norm": 0.4627860188484192, "learning_rate": 9.097037240651219e-06, "loss": 0.3985, "step": 13756 }, { "epoch": 0.6313156807856455, "grad_norm": 0.4629143476486206, "learning_rate": 9.096896691195045e-06, "loss": 0.3851, "step": 13757 }, { "epoch": 0.6313615712908999, "grad_norm": 0.44181910157203674, "learning_rate": 9.096756131887123e-06, "loss": 0.3344, "step": 13758 }, { "epoch": 0.6314074617961544, "grad_norm": 0.5098085403442383, "learning_rate": 9.096615562727797e-06, "loss": 0.4219, "step": 13759 }, { "epoch": 0.6314533523014089, "grad_norm": 0.49601680040359497, "learning_rate": 9.096474983717404e-06, "loss": 0.4059, "step": 13760 }, { "epoch": 0.6314992428066633, "grad_norm": 0.44838351011276245, "learning_rate": 9.096334394856283e-06, "loss": 0.396, "step": 13761 }, { "epoch": 0.6315451333119178, "grad_norm": 0.4441055655479431, "learning_rate": 9.09619379614477e-06, "loss": 0.3732, "step": 13762 }, { "epoch": 0.6315910238171722, "grad_norm": 0.4844288229942322, "learning_rate": 9.096053187583202e-06, "loss": 0.4626, "step": 13763 }, { "epoch": 0.6316369143224266, "grad_norm": 0.4508432447910309, "learning_rate": 9.09591256917192e-06, "loss": 0.3457, "step": 13764 }, { "epoch": 0.6316828048276811, "grad_norm": 0.44080203771591187, "learning_rate": 9.095771940911261e-06, "loss": 0.3562, "step": 13765 }, { "epoch": 0.6317286953329356, "grad_norm": 0.4298355281352997, "learning_rate": 9.095631302801563e-06, "loss": 0.3687, "step": 13766 }, { "epoch": 0.6317745858381901, "grad_norm": 0.4905388057231903, "learning_rate": 9.095490654843164e-06, "loss": 0.4773, "step": 13767 }, { "epoch": 0.6318204763434445, "grad_norm": 0.4200710952281952, "learning_rate": 9.095349997036402e-06, "loss": 0.3635, "step": 13768 }, { "epoch": 0.631866366848699, "grad_norm": 0.4724201560020447, "learning_rate": 9.095209329381616e-06, "loss": 0.3926, "step": 13769 }, { "epoch": 0.6319122573539535, "grad_norm": 0.4468879699707031, "learning_rate": 9.095068651879146e-06, "loss": 0.3713, "step": 13770 }, { "epoch": 0.6319581478592079, "grad_norm": 0.4829399883747101, "learning_rate": 9.094927964529328e-06, "loss": 0.3525, "step": 13771 }, { "epoch": 0.6320040383644624, "grad_norm": 0.46195679903030396, "learning_rate": 9.0947872673325e-06, "loss": 0.3963, "step": 13772 }, { "epoch": 0.6320499288697169, "grad_norm": 0.4676263928413391, "learning_rate": 9.094646560289e-06, "loss": 0.4127, "step": 13773 }, { "epoch": 0.6320958193749713, "grad_norm": 0.45218780636787415, "learning_rate": 9.094505843399167e-06, "loss": 0.4086, "step": 13774 }, { "epoch": 0.6321417098802258, "grad_norm": 0.4736347794532776, "learning_rate": 9.09436511666334e-06, "loss": 0.3823, "step": 13775 }, { "epoch": 0.6321876003854803, "grad_norm": 0.4676770567893982, "learning_rate": 9.094224380081858e-06, "loss": 0.3827, "step": 13776 }, { "epoch": 0.6322334908907347, "grad_norm": 0.454408198595047, "learning_rate": 9.094083633655058e-06, "loss": 0.337, "step": 13777 }, { "epoch": 0.6322793813959892, "grad_norm": 0.45019590854644775, "learning_rate": 9.093942877383279e-06, "loss": 0.393, "step": 13778 }, { "epoch": 0.6323252719012437, "grad_norm": 0.4695059359073639, "learning_rate": 9.093802111266861e-06, "loss": 0.4311, "step": 13779 }, { "epoch": 0.632371162406498, "grad_norm": 0.46594831347465515, "learning_rate": 9.093661335306138e-06, "loss": 0.392, "step": 13780 }, { "epoch": 0.6324170529117525, "grad_norm": 0.390536367893219, "learning_rate": 9.093520549501454e-06, "loss": 0.2994, "step": 13781 }, { "epoch": 0.632462943417007, "grad_norm": 0.49860137701034546, "learning_rate": 9.093379753853144e-06, "loss": 0.5128, "step": 13782 }, { "epoch": 0.6325088339222615, "grad_norm": 0.4420652687549591, "learning_rate": 9.093238948361547e-06, "loss": 0.3341, "step": 13783 }, { "epoch": 0.6325547244275159, "grad_norm": 0.4485955834388733, "learning_rate": 9.093098133027004e-06, "loss": 0.3357, "step": 13784 }, { "epoch": 0.6326006149327704, "grad_norm": 0.46887052059173584, "learning_rate": 9.09295730784985e-06, "loss": 0.4137, "step": 13785 }, { "epoch": 0.6326465054380249, "grad_norm": 0.4433824419975281, "learning_rate": 9.092816472830426e-06, "loss": 0.3636, "step": 13786 }, { "epoch": 0.6326923959432793, "grad_norm": 0.4383932650089264, "learning_rate": 9.092675627969072e-06, "loss": 0.3971, "step": 13787 }, { "epoch": 0.6327382864485338, "grad_norm": 0.4448809027671814, "learning_rate": 9.092534773266123e-06, "loss": 0.3344, "step": 13788 }, { "epoch": 0.6327841769537883, "grad_norm": 0.4861891269683838, "learning_rate": 9.092393908721918e-06, "loss": 0.4417, "step": 13789 }, { "epoch": 0.6328300674590427, "grad_norm": 0.5005356669425964, "learning_rate": 9.0922530343368e-06, "loss": 0.4339, "step": 13790 }, { "epoch": 0.6328759579642972, "grad_norm": 0.46084117889404297, "learning_rate": 9.092112150111104e-06, "loss": 0.3971, "step": 13791 }, { "epoch": 0.6329218484695517, "grad_norm": 0.4766274392604828, "learning_rate": 9.091971256045171e-06, "loss": 0.4974, "step": 13792 }, { "epoch": 0.6329677389748061, "grad_norm": 0.4248136579990387, "learning_rate": 9.091830352139337e-06, "loss": 0.3507, "step": 13793 }, { "epoch": 0.6330136294800606, "grad_norm": 0.4457894563674927, "learning_rate": 9.091689438393943e-06, "loss": 0.3404, "step": 13794 }, { "epoch": 0.6330595199853151, "grad_norm": 0.46522021293640137, "learning_rate": 9.091548514809326e-06, "loss": 0.3995, "step": 13795 }, { "epoch": 0.6331054104905695, "grad_norm": 0.4518631398677826, "learning_rate": 9.091407581385827e-06, "loss": 0.3947, "step": 13796 }, { "epoch": 0.633151300995824, "grad_norm": 0.4420549273490906, "learning_rate": 9.091266638123783e-06, "loss": 0.354, "step": 13797 }, { "epoch": 0.6331971915010784, "grad_norm": 0.49655985832214355, "learning_rate": 9.091125685023536e-06, "loss": 0.4533, "step": 13798 }, { "epoch": 0.6332430820063328, "grad_norm": 0.45235541462898254, "learning_rate": 9.090984722085423e-06, "loss": 0.4258, "step": 13799 }, { "epoch": 0.6332889725115873, "grad_norm": 0.5090921521186829, "learning_rate": 9.09084374930978e-06, "loss": 0.5052, "step": 13800 }, { "epoch": 0.6333348630168418, "grad_norm": 0.38741999864578247, "learning_rate": 9.090702766696952e-06, "loss": 0.2875, "step": 13801 }, { "epoch": 0.6333807535220963, "grad_norm": 0.482356458902359, "learning_rate": 9.090561774247275e-06, "loss": 0.4361, "step": 13802 }, { "epoch": 0.6334266440273507, "grad_norm": 0.4722704291343689, "learning_rate": 9.090420771961086e-06, "loss": 0.4196, "step": 13803 }, { "epoch": 0.6334725345326052, "grad_norm": 0.47964537143707275, "learning_rate": 9.090279759838728e-06, "loss": 0.4708, "step": 13804 }, { "epoch": 0.6335184250378597, "grad_norm": 0.4740954637527466, "learning_rate": 9.090138737880536e-06, "loss": 0.4126, "step": 13805 }, { "epoch": 0.6335643155431141, "grad_norm": 0.533951997756958, "learning_rate": 9.089997706086852e-06, "loss": 0.4267, "step": 13806 }, { "epoch": 0.6336102060483686, "grad_norm": 0.4479615092277527, "learning_rate": 9.089856664458015e-06, "loss": 0.4248, "step": 13807 }, { "epoch": 0.6336560965536231, "grad_norm": 0.42520639300346375, "learning_rate": 9.089715612994362e-06, "loss": 0.3249, "step": 13808 }, { "epoch": 0.6337019870588775, "grad_norm": 0.47059741616249084, "learning_rate": 9.089574551696236e-06, "loss": 0.4684, "step": 13809 }, { "epoch": 0.633747877564132, "grad_norm": 0.44987934827804565, "learning_rate": 9.089433480563972e-06, "loss": 0.3834, "step": 13810 }, { "epoch": 0.6337937680693865, "grad_norm": 0.49606969952583313, "learning_rate": 9.08929239959791e-06, "loss": 0.4808, "step": 13811 }, { "epoch": 0.6338396585746409, "grad_norm": 0.40498825907707214, "learning_rate": 9.089151308798394e-06, "loss": 0.3397, "step": 13812 }, { "epoch": 0.6338855490798954, "grad_norm": 0.4510362446308136, "learning_rate": 9.089010208165758e-06, "loss": 0.3977, "step": 13813 }, { "epoch": 0.6339314395851499, "grad_norm": 0.4799993336200714, "learning_rate": 9.088869097700342e-06, "loss": 0.4164, "step": 13814 }, { "epoch": 0.6339773300904042, "grad_norm": 0.46112826466560364, "learning_rate": 9.088727977402487e-06, "loss": 0.3934, "step": 13815 }, { "epoch": 0.6340232205956587, "grad_norm": 0.44037288427352905, "learning_rate": 9.088586847272532e-06, "loss": 0.398, "step": 13816 }, { "epoch": 0.6340691111009132, "grad_norm": 0.45116597414016724, "learning_rate": 9.088445707310816e-06, "loss": 0.4015, "step": 13817 }, { "epoch": 0.6341150016061677, "grad_norm": 0.44511812925338745, "learning_rate": 9.088304557517677e-06, "loss": 0.337, "step": 13818 }, { "epoch": 0.6341608921114221, "grad_norm": 0.4653298258781433, "learning_rate": 9.088163397893459e-06, "loss": 0.4567, "step": 13819 }, { "epoch": 0.6342067826166766, "grad_norm": 0.44936805963516235, "learning_rate": 9.088022228438496e-06, "loss": 0.3789, "step": 13820 }, { "epoch": 0.6342526731219311, "grad_norm": 0.45591941475868225, "learning_rate": 9.087881049153128e-06, "loss": 0.4454, "step": 13821 }, { "epoch": 0.6342985636271855, "grad_norm": 0.45965129137039185, "learning_rate": 9.0877398600377e-06, "loss": 0.3848, "step": 13822 }, { "epoch": 0.63434445413244, "grad_norm": 0.42764368653297424, "learning_rate": 9.087598661092546e-06, "loss": 0.3248, "step": 13823 }, { "epoch": 0.6343903446376945, "grad_norm": 0.43337202072143555, "learning_rate": 9.087457452318008e-06, "loss": 0.3662, "step": 13824 }, { "epoch": 0.6344362351429489, "grad_norm": 0.42204010486602783, "learning_rate": 9.087316233714422e-06, "loss": 0.3336, "step": 13825 }, { "epoch": 0.6344821256482034, "grad_norm": 0.46364161372184753, "learning_rate": 9.087175005282135e-06, "loss": 0.4186, "step": 13826 }, { "epoch": 0.6345280161534579, "grad_norm": 0.5562419891357422, "learning_rate": 9.08703376702148e-06, "loss": 0.4086, "step": 13827 }, { "epoch": 0.6345739066587123, "grad_norm": 0.4431101977825165, "learning_rate": 9.086892518932798e-06, "loss": 0.3519, "step": 13828 }, { "epoch": 0.6346197971639668, "grad_norm": 0.42469343543052673, "learning_rate": 9.086751261016428e-06, "loss": 0.3766, "step": 13829 }, { "epoch": 0.6346656876692213, "grad_norm": 0.47859928011894226, "learning_rate": 9.086609993272715e-06, "loss": 0.4104, "step": 13830 }, { "epoch": 0.6347115781744757, "grad_norm": 0.47052422165870667, "learning_rate": 9.08646871570199e-06, "loss": 0.4489, "step": 13831 }, { "epoch": 0.6347574686797302, "grad_norm": 0.4955495297908783, "learning_rate": 9.086327428304602e-06, "loss": 0.5019, "step": 13832 }, { "epoch": 0.6348033591849847, "grad_norm": 0.46023106575012207, "learning_rate": 9.086186131080884e-06, "loss": 0.4016, "step": 13833 }, { "epoch": 0.634849249690239, "grad_norm": 0.4287262260913849, "learning_rate": 9.086044824031177e-06, "loss": 0.3529, "step": 13834 }, { "epoch": 0.6348951401954935, "grad_norm": 0.45648735761642456, "learning_rate": 9.085903507155824e-06, "loss": 0.4002, "step": 13835 }, { "epoch": 0.634941030700748, "grad_norm": 0.45051008462905884, "learning_rate": 9.085762180455162e-06, "loss": 0.3551, "step": 13836 }, { "epoch": 0.6349869212060025, "grad_norm": 0.42927584052085876, "learning_rate": 9.08562084392953e-06, "loss": 0.4034, "step": 13837 }, { "epoch": 0.6350328117112569, "grad_norm": 0.49887391924858093, "learning_rate": 9.085479497579272e-06, "loss": 0.4655, "step": 13838 }, { "epoch": 0.6350787022165114, "grad_norm": 0.444959431886673, "learning_rate": 9.085338141404722e-06, "loss": 0.3696, "step": 13839 }, { "epoch": 0.6351245927217659, "grad_norm": 0.4477539658546448, "learning_rate": 9.085196775406227e-06, "loss": 0.4385, "step": 13840 }, { "epoch": 0.6351704832270203, "grad_norm": 0.4772639572620392, "learning_rate": 9.08505539958412e-06, "loss": 0.4742, "step": 13841 }, { "epoch": 0.6352163737322748, "grad_norm": 0.4190462529659271, "learning_rate": 9.084914013938745e-06, "loss": 0.3382, "step": 13842 }, { "epoch": 0.6352622642375293, "grad_norm": 0.44994276762008667, "learning_rate": 9.08477261847044e-06, "loss": 0.3774, "step": 13843 }, { "epoch": 0.6353081547427837, "grad_norm": 0.45749905705451965, "learning_rate": 9.084631213179547e-06, "loss": 0.3602, "step": 13844 }, { "epoch": 0.6353540452480382, "grad_norm": 0.4623963236808777, "learning_rate": 9.084489798066405e-06, "loss": 0.3818, "step": 13845 }, { "epoch": 0.6353999357532927, "grad_norm": 0.4434499740600586, "learning_rate": 9.084348373131356e-06, "loss": 0.3486, "step": 13846 }, { "epoch": 0.6354458262585471, "grad_norm": 0.4792173504829407, "learning_rate": 9.084206938374736e-06, "loss": 0.4209, "step": 13847 }, { "epoch": 0.6354917167638016, "grad_norm": 0.47200843691825867, "learning_rate": 9.084065493796887e-06, "loss": 0.3741, "step": 13848 }, { "epoch": 0.6355376072690561, "grad_norm": 0.5126557350158691, "learning_rate": 9.08392403939815e-06, "loss": 0.4596, "step": 13849 }, { "epoch": 0.6355834977743104, "grad_norm": 0.5037282705307007, "learning_rate": 9.083782575178865e-06, "loss": 0.4655, "step": 13850 }, { "epoch": 0.6356293882795649, "grad_norm": 0.46655210852622986, "learning_rate": 9.083641101139372e-06, "loss": 0.4056, "step": 13851 }, { "epoch": 0.6356752787848194, "grad_norm": 0.4462193548679352, "learning_rate": 9.083499617280011e-06, "loss": 0.3545, "step": 13852 }, { "epoch": 0.6357211692900738, "grad_norm": 0.43894606828689575, "learning_rate": 9.083358123601123e-06, "loss": 0.4147, "step": 13853 }, { "epoch": 0.6357670597953283, "grad_norm": 0.44434648752212524, "learning_rate": 9.083216620103046e-06, "loss": 0.3847, "step": 13854 }, { "epoch": 0.6358129503005828, "grad_norm": 0.4428238868713379, "learning_rate": 9.08307510678612e-06, "loss": 0.3579, "step": 13855 }, { "epoch": 0.6358588408058373, "grad_norm": 0.441191703081131, "learning_rate": 9.082933583650691e-06, "loss": 0.3968, "step": 13856 }, { "epoch": 0.6359047313110917, "grad_norm": 0.4150821566581726, "learning_rate": 9.082792050697092e-06, "loss": 0.3311, "step": 13857 }, { "epoch": 0.6359506218163462, "grad_norm": 0.45145383477211, "learning_rate": 9.082650507925669e-06, "loss": 0.3701, "step": 13858 }, { "epoch": 0.6359965123216007, "grad_norm": 0.47092947363853455, "learning_rate": 9.082508955336759e-06, "loss": 0.3701, "step": 13859 }, { "epoch": 0.6360424028268551, "grad_norm": 0.5156068205833435, "learning_rate": 9.082367392930704e-06, "loss": 0.4711, "step": 13860 }, { "epoch": 0.6360882933321096, "grad_norm": 0.47675833106040955, "learning_rate": 9.082225820707844e-06, "loss": 0.4824, "step": 13861 }, { "epoch": 0.6361341838373641, "grad_norm": 0.42623695731163025, "learning_rate": 9.082084238668518e-06, "loss": 0.338, "step": 13862 }, { "epoch": 0.6361800743426185, "grad_norm": 0.5204612016677856, "learning_rate": 9.081942646813068e-06, "loss": 0.518, "step": 13863 }, { "epoch": 0.636225964847873, "grad_norm": 0.48516273498535156, "learning_rate": 9.081801045141834e-06, "loss": 0.4623, "step": 13864 }, { "epoch": 0.6362718553531275, "grad_norm": 0.4353639483451843, "learning_rate": 9.081659433655157e-06, "loss": 0.3804, "step": 13865 }, { "epoch": 0.6363177458583819, "grad_norm": 0.4441174566745758, "learning_rate": 9.081517812353378e-06, "loss": 0.4093, "step": 13866 }, { "epoch": 0.6363636363636364, "grad_norm": 0.4391174912452698, "learning_rate": 9.081376181236838e-06, "loss": 0.355, "step": 13867 }, { "epoch": 0.6364095268688909, "grad_norm": 0.4572235643863678, "learning_rate": 9.081234540305875e-06, "loss": 0.3853, "step": 13868 }, { "epoch": 0.6364554173741452, "grad_norm": 0.501128077507019, "learning_rate": 9.08109288956083e-06, "loss": 0.5306, "step": 13869 }, { "epoch": 0.6365013078793997, "grad_norm": 0.4364604949951172, "learning_rate": 9.080951229002046e-06, "loss": 0.3854, "step": 13870 }, { "epoch": 0.6365471983846542, "grad_norm": 0.4568452835083008, "learning_rate": 9.080809558629861e-06, "loss": 0.4229, "step": 13871 }, { "epoch": 0.6365930888899087, "grad_norm": 0.4498665928840637, "learning_rate": 9.080667878444618e-06, "loss": 0.401, "step": 13872 }, { "epoch": 0.6366389793951631, "grad_norm": 0.46098843216896057, "learning_rate": 9.080526188446657e-06, "loss": 0.4135, "step": 13873 }, { "epoch": 0.6366848699004176, "grad_norm": 0.44554442167282104, "learning_rate": 9.080384488636317e-06, "loss": 0.3927, "step": 13874 }, { "epoch": 0.6367307604056721, "grad_norm": 0.45249930024147034, "learning_rate": 9.080242779013941e-06, "loss": 0.37, "step": 13875 }, { "epoch": 0.6367766509109265, "grad_norm": 0.4642948806285858, "learning_rate": 9.08010105957987e-06, "loss": 0.3913, "step": 13876 }, { "epoch": 0.636822541416181, "grad_norm": 0.46328550577163696, "learning_rate": 9.079959330334442e-06, "loss": 0.3425, "step": 13877 }, { "epoch": 0.6368684319214355, "grad_norm": 0.4690941572189331, "learning_rate": 9.079817591278e-06, "loss": 0.4086, "step": 13878 }, { "epoch": 0.6369143224266899, "grad_norm": 0.46301230788230896, "learning_rate": 9.079675842410884e-06, "loss": 0.4015, "step": 13879 }, { "epoch": 0.6369602129319444, "grad_norm": 0.4607619345188141, "learning_rate": 9.079534083733435e-06, "loss": 0.4079, "step": 13880 }, { "epoch": 0.6370061034371989, "grad_norm": 0.4334520995616913, "learning_rate": 9.079392315245994e-06, "loss": 0.4022, "step": 13881 }, { "epoch": 0.6370519939424533, "grad_norm": 0.45250433683395386, "learning_rate": 9.079250536948905e-06, "loss": 0.3221, "step": 13882 }, { "epoch": 0.6370978844477078, "grad_norm": 0.4378756880760193, "learning_rate": 9.079108748842502e-06, "loss": 0.3635, "step": 13883 }, { "epoch": 0.6371437749529623, "grad_norm": 0.43111056089401245, "learning_rate": 9.078966950927131e-06, "loss": 0.3332, "step": 13884 }, { "epoch": 0.6371896654582166, "grad_norm": 0.46981289982795715, "learning_rate": 9.07882514320313e-06, "loss": 0.4495, "step": 13885 }, { "epoch": 0.6372355559634711, "grad_norm": 0.45617085695266724, "learning_rate": 9.078683325670846e-06, "loss": 0.3945, "step": 13886 }, { "epoch": 0.6372814464687256, "grad_norm": 0.435811847448349, "learning_rate": 9.078541498330613e-06, "loss": 0.3651, "step": 13887 }, { "epoch": 0.63732733697398, "grad_norm": 0.4522098898887634, "learning_rate": 9.078399661182777e-06, "loss": 0.3725, "step": 13888 }, { "epoch": 0.6373732274792345, "grad_norm": 0.44129475951194763, "learning_rate": 9.078257814227676e-06, "loss": 0.3686, "step": 13889 }, { "epoch": 0.637419117984489, "grad_norm": 0.44493645429611206, "learning_rate": 9.078115957465652e-06, "loss": 0.3638, "step": 13890 }, { "epoch": 0.6374650084897435, "grad_norm": 0.4450368583202362, "learning_rate": 9.077974090897046e-06, "loss": 0.3894, "step": 13891 }, { "epoch": 0.6375108989949979, "grad_norm": 0.4202248752117157, "learning_rate": 9.077832214522199e-06, "loss": 0.3365, "step": 13892 }, { "epoch": 0.6375567895002524, "grad_norm": 0.4527968764305115, "learning_rate": 9.077690328341453e-06, "loss": 0.3608, "step": 13893 }, { "epoch": 0.6376026800055069, "grad_norm": 0.45292168855667114, "learning_rate": 9.077548432355148e-06, "loss": 0.4014, "step": 13894 }, { "epoch": 0.6376485705107613, "grad_norm": 0.42058977484703064, "learning_rate": 9.077406526563627e-06, "loss": 0.3322, "step": 13895 }, { "epoch": 0.6376944610160158, "grad_norm": 0.41648879647254944, "learning_rate": 9.07726461096723e-06, "loss": 0.3272, "step": 13896 }, { "epoch": 0.6377403515212703, "grad_norm": 0.49815255403518677, "learning_rate": 9.077122685566297e-06, "loss": 0.4135, "step": 13897 }, { "epoch": 0.6377862420265247, "grad_norm": 0.49946486949920654, "learning_rate": 9.076980750361174e-06, "loss": 0.4479, "step": 13898 }, { "epoch": 0.6378321325317792, "grad_norm": 0.4121783375740051, "learning_rate": 9.076838805352196e-06, "loss": 0.3214, "step": 13899 }, { "epoch": 0.6378780230370337, "grad_norm": 0.45106053352355957, "learning_rate": 9.076696850539707e-06, "loss": 0.393, "step": 13900 }, { "epoch": 0.6379239135422881, "grad_norm": 0.5474677681922913, "learning_rate": 9.076554885924051e-06, "loss": 0.5361, "step": 13901 }, { "epoch": 0.6379698040475426, "grad_norm": 0.5133782029151917, "learning_rate": 9.076412911505566e-06, "loss": 0.4563, "step": 13902 }, { "epoch": 0.638015694552797, "grad_norm": 0.43608003854751587, "learning_rate": 9.076270927284594e-06, "loss": 0.3838, "step": 13903 }, { "epoch": 0.6380615850580514, "grad_norm": 0.4868790805339813, "learning_rate": 9.076128933261478e-06, "loss": 0.4459, "step": 13904 }, { "epoch": 0.6381074755633059, "grad_norm": 0.4699779152870178, "learning_rate": 9.075986929436557e-06, "loss": 0.3611, "step": 13905 }, { "epoch": 0.6381533660685604, "grad_norm": 0.4602355360984802, "learning_rate": 9.075844915810175e-06, "loss": 0.3695, "step": 13906 }, { "epoch": 0.6381992565738149, "grad_norm": 0.46012914180755615, "learning_rate": 9.075702892382673e-06, "loss": 0.4332, "step": 13907 }, { "epoch": 0.6382451470790693, "grad_norm": 0.4306870698928833, "learning_rate": 9.07556085915439e-06, "loss": 0.3671, "step": 13908 }, { "epoch": 0.6382910375843238, "grad_norm": 0.4583587348461151, "learning_rate": 9.075418816125673e-06, "loss": 0.394, "step": 13909 }, { "epoch": 0.6383369280895783, "grad_norm": 0.4546503722667694, "learning_rate": 9.075276763296856e-06, "loss": 0.4076, "step": 13910 }, { "epoch": 0.6383828185948327, "grad_norm": 0.4136744439601898, "learning_rate": 9.075134700668285e-06, "loss": 0.3242, "step": 13911 }, { "epoch": 0.6384287091000872, "grad_norm": 0.45430830121040344, "learning_rate": 9.074992628240302e-06, "loss": 0.4446, "step": 13912 }, { "epoch": 0.6384745996053417, "grad_norm": 0.4768562614917755, "learning_rate": 9.074850546013249e-06, "loss": 0.4077, "step": 13913 }, { "epoch": 0.6385204901105961, "grad_norm": 0.43960222601890564, "learning_rate": 9.074708453987466e-06, "loss": 0.3533, "step": 13914 }, { "epoch": 0.6385663806158506, "grad_norm": 0.4887329339981079, "learning_rate": 9.074566352163294e-06, "loss": 0.4536, "step": 13915 }, { "epoch": 0.6386122711211051, "grad_norm": 0.4777091145515442, "learning_rate": 9.074424240541077e-06, "loss": 0.3993, "step": 13916 }, { "epoch": 0.6386581616263595, "grad_norm": 0.44843190908432007, "learning_rate": 9.074282119121155e-06, "loss": 0.4163, "step": 13917 }, { "epoch": 0.638704052131614, "grad_norm": 0.4648585617542267, "learning_rate": 9.074139987903871e-06, "loss": 0.4279, "step": 13918 }, { "epoch": 0.6387499426368685, "grad_norm": 0.4809342622756958, "learning_rate": 9.073997846889563e-06, "loss": 0.4374, "step": 13919 }, { "epoch": 0.6387958331421228, "grad_norm": 0.42647239565849304, "learning_rate": 9.07385569607858e-06, "loss": 0.3586, "step": 13920 }, { "epoch": 0.6388417236473773, "grad_norm": 0.43390122056007385, "learning_rate": 9.07371353547126e-06, "loss": 0.3712, "step": 13921 }, { "epoch": 0.6388876141526318, "grad_norm": 0.4528542459011078, "learning_rate": 9.073571365067942e-06, "loss": 0.4075, "step": 13922 }, { "epoch": 0.6389335046578862, "grad_norm": 0.4658913016319275, "learning_rate": 9.073429184868972e-06, "loss": 0.4395, "step": 13923 }, { "epoch": 0.6389793951631407, "grad_norm": 0.4689754247665405, "learning_rate": 9.073286994874691e-06, "loss": 0.367, "step": 13924 }, { "epoch": 0.6390252856683952, "grad_norm": 0.4604182243347168, "learning_rate": 9.073144795085438e-06, "loss": 0.4074, "step": 13925 }, { "epoch": 0.6390711761736497, "grad_norm": 0.4281062185764313, "learning_rate": 9.073002585501558e-06, "loss": 0.3907, "step": 13926 }, { "epoch": 0.6391170666789041, "grad_norm": 0.4650721251964569, "learning_rate": 9.072860366123394e-06, "loss": 0.3575, "step": 13927 }, { "epoch": 0.6391629571841586, "grad_norm": 0.42986929416656494, "learning_rate": 9.072718136951287e-06, "loss": 0.3582, "step": 13928 }, { "epoch": 0.6392088476894131, "grad_norm": 0.4436732530593872, "learning_rate": 9.072575897985576e-06, "loss": 0.331, "step": 13929 }, { "epoch": 0.6392547381946675, "grad_norm": 0.4522177278995514, "learning_rate": 9.072433649226603e-06, "loss": 0.3716, "step": 13930 }, { "epoch": 0.639300628699922, "grad_norm": 0.45224958658218384, "learning_rate": 9.072291390674717e-06, "loss": 0.4476, "step": 13931 }, { "epoch": 0.6393465192051765, "grad_norm": 0.4480723738670349, "learning_rate": 9.072149122330253e-06, "loss": 0.369, "step": 13932 }, { "epoch": 0.6393924097104309, "grad_norm": 0.48439711332321167, "learning_rate": 9.072006844193558e-06, "loss": 0.3625, "step": 13933 }, { "epoch": 0.6394383002156854, "grad_norm": 0.45903655886650085, "learning_rate": 9.071864556264968e-06, "loss": 0.376, "step": 13934 }, { "epoch": 0.6394841907209399, "grad_norm": 0.4892946183681488, "learning_rate": 9.071722258544832e-06, "loss": 0.3771, "step": 13935 }, { "epoch": 0.6395300812261943, "grad_norm": 0.5190251469612122, "learning_rate": 9.071579951033486e-06, "loss": 0.5222, "step": 13936 }, { "epoch": 0.6395759717314488, "grad_norm": 0.48577019572257996, "learning_rate": 9.071437633731278e-06, "loss": 0.3985, "step": 13937 }, { "epoch": 0.6396218622367033, "grad_norm": 0.5002421140670776, "learning_rate": 9.071295306638546e-06, "loss": 0.3798, "step": 13938 }, { "epoch": 0.6396677527419576, "grad_norm": 0.488899827003479, "learning_rate": 9.071152969755635e-06, "loss": 0.4324, "step": 13939 }, { "epoch": 0.6397136432472121, "grad_norm": 0.43541520833969116, "learning_rate": 9.071010623082885e-06, "loss": 0.3299, "step": 13940 }, { "epoch": 0.6397595337524666, "grad_norm": 0.4609094560146332, "learning_rate": 9.070868266620641e-06, "loss": 0.4445, "step": 13941 }, { "epoch": 0.639805424257721, "grad_norm": 0.48501887917518616, "learning_rate": 9.070725900369241e-06, "loss": 0.4542, "step": 13942 }, { "epoch": 0.6398513147629755, "grad_norm": 0.42530471086502075, "learning_rate": 9.070583524329032e-06, "loss": 0.3468, "step": 13943 }, { "epoch": 0.63989720526823, "grad_norm": 0.4728492498397827, "learning_rate": 9.070441138500354e-06, "loss": 0.4661, "step": 13944 }, { "epoch": 0.6399430957734845, "grad_norm": 0.4799095690250397, "learning_rate": 9.070298742883548e-06, "loss": 0.4524, "step": 13945 }, { "epoch": 0.6399889862787389, "grad_norm": 0.47106826305389404, "learning_rate": 9.07015633747896e-06, "loss": 0.3504, "step": 13946 }, { "epoch": 0.6400348767839934, "grad_norm": 0.43228691816329956, "learning_rate": 9.070013922286929e-06, "loss": 0.3619, "step": 13947 }, { "epoch": 0.6400807672892479, "grad_norm": 0.47381365299224854, "learning_rate": 9.0698714973078e-06, "loss": 0.4266, "step": 13948 }, { "epoch": 0.6401266577945023, "grad_norm": 0.49155116081237793, "learning_rate": 9.069729062541914e-06, "loss": 0.4674, "step": 13949 }, { "epoch": 0.6401725482997568, "grad_norm": 0.45604097843170166, "learning_rate": 9.069586617989614e-06, "loss": 0.3445, "step": 13950 }, { "epoch": 0.6402184388050113, "grad_norm": 0.4782208502292633, "learning_rate": 9.069444163651245e-06, "loss": 0.4567, "step": 13951 }, { "epoch": 0.6402643293102657, "grad_norm": 0.426633358001709, "learning_rate": 9.069301699527144e-06, "loss": 0.3235, "step": 13952 }, { "epoch": 0.6403102198155202, "grad_norm": 0.463579922914505, "learning_rate": 9.069159225617658e-06, "loss": 0.4393, "step": 13953 }, { "epoch": 0.6403561103207747, "grad_norm": 0.4582008123397827, "learning_rate": 9.069016741923129e-06, "loss": 0.3854, "step": 13954 }, { "epoch": 0.640402000826029, "grad_norm": 0.4605084955692291, "learning_rate": 9.068874248443899e-06, "loss": 0.3833, "step": 13955 }, { "epoch": 0.6404478913312835, "grad_norm": 0.48323988914489746, "learning_rate": 9.06873174518031e-06, "loss": 0.4414, "step": 13956 }, { "epoch": 0.640493781836538, "grad_norm": 0.4243892729282379, "learning_rate": 9.068589232132704e-06, "loss": 0.3501, "step": 13957 }, { "epoch": 0.6405396723417924, "grad_norm": 0.44702792167663574, "learning_rate": 9.068446709301428e-06, "loss": 0.4225, "step": 13958 }, { "epoch": 0.6405855628470469, "grad_norm": 0.426170289516449, "learning_rate": 9.06830417668682e-06, "loss": 0.3419, "step": 13959 }, { "epoch": 0.6406314533523014, "grad_norm": 0.53554368019104, "learning_rate": 9.068161634289224e-06, "loss": 0.5226, "step": 13960 }, { "epoch": 0.6406773438575559, "grad_norm": 0.49567604064941406, "learning_rate": 9.068019082108982e-06, "loss": 0.4261, "step": 13961 }, { "epoch": 0.6407232343628103, "grad_norm": 0.4362735152244568, "learning_rate": 9.06787652014644e-06, "loss": 0.3433, "step": 13962 }, { "epoch": 0.6407691248680648, "grad_norm": 0.473021000623703, "learning_rate": 9.067733948401939e-06, "loss": 0.4611, "step": 13963 }, { "epoch": 0.6408150153733193, "grad_norm": 0.4598488211631775, "learning_rate": 9.06759136687582e-06, "loss": 0.3457, "step": 13964 }, { "epoch": 0.6408609058785737, "grad_norm": 0.4515002965927124, "learning_rate": 9.067448775568429e-06, "loss": 0.322, "step": 13965 }, { "epoch": 0.6409067963838282, "grad_norm": 0.4563615322113037, "learning_rate": 9.067306174480109e-06, "loss": 0.4141, "step": 13966 }, { "epoch": 0.6409526868890827, "grad_norm": 0.48475298285484314, "learning_rate": 9.067163563611197e-06, "loss": 0.4492, "step": 13967 }, { "epoch": 0.6409985773943371, "grad_norm": 0.4568878710269928, "learning_rate": 9.067020942962044e-06, "loss": 0.4575, "step": 13968 }, { "epoch": 0.6410444678995916, "grad_norm": 0.48156845569610596, "learning_rate": 9.066878312532987e-06, "loss": 0.4488, "step": 13969 }, { "epoch": 0.6410903584048461, "grad_norm": 0.44092920422554016, "learning_rate": 9.066735672324372e-06, "loss": 0.3715, "step": 13970 }, { "epoch": 0.6411362489101005, "grad_norm": 0.5198221206665039, "learning_rate": 9.06659302233654e-06, "loss": 0.4106, "step": 13971 }, { "epoch": 0.641182139415355, "grad_norm": 0.46925362944602966, "learning_rate": 9.06645036256984e-06, "loss": 0.3715, "step": 13972 }, { "epoch": 0.6412280299206095, "grad_norm": 0.45931297540664673, "learning_rate": 9.066307693024606e-06, "loss": 0.3282, "step": 13973 }, { "epoch": 0.6412739204258638, "grad_norm": 0.4393952786922455, "learning_rate": 9.066165013701185e-06, "loss": 0.322, "step": 13974 }, { "epoch": 0.6413198109311183, "grad_norm": 0.4711958169937134, "learning_rate": 9.066022324599921e-06, "loss": 0.3803, "step": 13975 }, { "epoch": 0.6413657014363728, "grad_norm": 0.4949358105659485, "learning_rate": 9.065879625721156e-06, "loss": 0.4167, "step": 13976 }, { "epoch": 0.6414115919416272, "grad_norm": 0.46667972207069397, "learning_rate": 9.065736917065237e-06, "loss": 0.4286, "step": 13977 }, { "epoch": 0.6414574824468817, "grad_norm": 0.4384036064147949, "learning_rate": 9.065594198632501e-06, "loss": 0.3634, "step": 13978 }, { "epoch": 0.6415033729521362, "grad_norm": 0.4735531508922577, "learning_rate": 9.065451470423294e-06, "loss": 0.4101, "step": 13979 }, { "epoch": 0.6415492634573907, "grad_norm": 0.48377203941345215, "learning_rate": 9.065308732437961e-06, "loss": 0.3827, "step": 13980 }, { "epoch": 0.6415951539626451, "grad_norm": 0.44137996435165405, "learning_rate": 9.065165984676843e-06, "loss": 0.3439, "step": 13981 }, { "epoch": 0.6416410444678996, "grad_norm": 0.4500725269317627, "learning_rate": 9.065023227140284e-06, "loss": 0.4202, "step": 13982 }, { "epoch": 0.6416869349731541, "grad_norm": 0.47660550475120544, "learning_rate": 9.064880459828626e-06, "loss": 0.3925, "step": 13983 }, { "epoch": 0.6417328254784085, "grad_norm": 0.43130001425743103, "learning_rate": 9.064737682742214e-06, "loss": 0.3211, "step": 13984 }, { "epoch": 0.641778715983663, "grad_norm": 0.46992093324661255, "learning_rate": 9.064594895881391e-06, "loss": 0.417, "step": 13985 }, { "epoch": 0.6418246064889175, "grad_norm": 0.4720174968242645, "learning_rate": 9.0644520992465e-06, "loss": 0.5006, "step": 13986 }, { "epoch": 0.6418704969941719, "grad_norm": 0.4534390866756439, "learning_rate": 9.064309292837884e-06, "loss": 0.3778, "step": 13987 }, { "epoch": 0.6419163874994264, "grad_norm": 0.4395870566368103, "learning_rate": 9.064166476655888e-06, "loss": 0.3524, "step": 13988 }, { "epoch": 0.6419622780046809, "grad_norm": 0.47124695777893066, "learning_rate": 9.064023650700852e-06, "loss": 0.4288, "step": 13989 }, { "epoch": 0.6420081685099353, "grad_norm": 0.43165233731269836, "learning_rate": 9.063880814973125e-06, "loss": 0.3405, "step": 13990 }, { "epoch": 0.6420540590151897, "grad_norm": 0.47706490755081177, "learning_rate": 9.063737969473046e-06, "loss": 0.3841, "step": 13991 }, { "epoch": 0.6420999495204442, "grad_norm": 0.4723082184791565, "learning_rate": 9.063595114200958e-06, "loss": 0.383, "step": 13992 }, { "epoch": 0.6421458400256986, "grad_norm": 0.5112971663475037, "learning_rate": 9.063452249157207e-06, "loss": 0.5565, "step": 13993 }, { "epoch": 0.6421917305309531, "grad_norm": 0.43778523802757263, "learning_rate": 9.063309374342138e-06, "loss": 0.3497, "step": 13994 }, { "epoch": 0.6422376210362076, "grad_norm": 0.41746553778648376, "learning_rate": 9.06316648975609e-06, "loss": 0.3253, "step": 13995 }, { "epoch": 0.6422835115414621, "grad_norm": 0.4504404067993164, "learning_rate": 9.063023595399409e-06, "loss": 0.3874, "step": 13996 }, { "epoch": 0.6423294020467165, "grad_norm": 0.454401433467865, "learning_rate": 9.06288069127244e-06, "loss": 0.3804, "step": 13997 }, { "epoch": 0.642375292551971, "grad_norm": 0.44376546144485474, "learning_rate": 9.062737777375523e-06, "loss": 0.3351, "step": 13998 }, { "epoch": 0.6424211830572255, "grad_norm": 0.4739118218421936, "learning_rate": 9.062594853709006e-06, "loss": 0.4415, "step": 13999 }, { "epoch": 0.6424670735624799, "grad_norm": 0.4341551959514618, "learning_rate": 9.06245192027323e-06, "loss": 0.3624, "step": 14000 }, { "epoch": 0.6425129640677344, "grad_norm": 0.45949438214302063, "learning_rate": 9.062308977068539e-06, "loss": 0.4062, "step": 14001 }, { "epoch": 0.6425588545729889, "grad_norm": 0.4945415258407593, "learning_rate": 9.062166024095278e-06, "loss": 0.4361, "step": 14002 }, { "epoch": 0.6426047450782433, "grad_norm": 0.46946659684181213, "learning_rate": 9.062023061353788e-06, "loss": 0.428, "step": 14003 }, { "epoch": 0.6426506355834978, "grad_norm": 0.47292548418045044, "learning_rate": 9.061880088844418e-06, "loss": 0.4204, "step": 14004 }, { "epoch": 0.6426965260887523, "grad_norm": 0.4691673815250397, "learning_rate": 9.061737106567505e-06, "loss": 0.4077, "step": 14005 }, { "epoch": 0.6427424165940067, "grad_norm": 0.4639378488063812, "learning_rate": 9.061594114523395e-06, "loss": 0.3308, "step": 14006 }, { "epoch": 0.6427883070992612, "grad_norm": 0.5107695460319519, "learning_rate": 9.061451112712435e-06, "loss": 0.47, "step": 14007 }, { "epoch": 0.6428341976045157, "grad_norm": 0.4573849141597748, "learning_rate": 9.06130810113497e-06, "loss": 0.3584, "step": 14008 }, { "epoch": 0.64288008810977, "grad_norm": 0.415199339389801, "learning_rate": 9.061165079791337e-06, "loss": 0.3335, "step": 14009 }, { "epoch": 0.6429259786150245, "grad_norm": 0.45390504598617554, "learning_rate": 9.061022048681884e-06, "loss": 0.4357, "step": 14010 }, { "epoch": 0.642971869120279, "grad_norm": 0.45877087116241455, "learning_rate": 9.060879007806956e-06, "loss": 0.3778, "step": 14011 }, { "epoch": 0.6430177596255334, "grad_norm": 0.5054169297218323, "learning_rate": 9.060735957166894e-06, "loss": 0.433, "step": 14012 }, { "epoch": 0.6430636501307879, "grad_norm": 0.4727879762649536, "learning_rate": 9.060592896762044e-06, "loss": 0.4213, "step": 14013 }, { "epoch": 0.6431095406360424, "grad_norm": 0.43782058358192444, "learning_rate": 9.06044982659275e-06, "loss": 0.331, "step": 14014 }, { "epoch": 0.6431554311412969, "grad_norm": 0.4261011481285095, "learning_rate": 9.060306746659357e-06, "loss": 0.347, "step": 14015 }, { "epoch": 0.6432013216465513, "grad_norm": 0.4498082995414734, "learning_rate": 9.060163656962207e-06, "loss": 0.3799, "step": 14016 }, { "epoch": 0.6432472121518058, "grad_norm": 0.49998435378074646, "learning_rate": 9.060020557501644e-06, "loss": 0.4703, "step": 14017 }, { "epoch": 0.6432931026570603, "grad_norm": 0.43560680747032166, "learning_rate": 9.059877448278013e-06, "loss": 0.342, "step": 14018 }, { "epoch": 0.6433389931623147, "grad_norm": 0.49742043018341064, "learning_rate": 9.059734329291658e-06, "loss": 0.4222, "step": 14019 }, { "epoch": 0.6433848836675692, "grad_norm": 0.5405114889144897, "learning_rate": 9.059591200542923e-06, "loss": 0.523, "step": 14020 }, { "epoch": 0.6434307741728237, "grad_norm": 0.5079758167266846, "learning_rate": 9.059448062032153e-06, "loss": 0.4544, "step": 14021 }, { "epoch": 0.6434766646780781, "grad_norm": 0.4347251057624817, "learning_rate": 9.059304913759693e-06, "loss": 0.3731, "step": 14022 }, { "epoch": 0.6435225551833326, "grad_norm": 0.45435693860054016, "learning_rate": 9.059161755725883e-06, "loss": 0.4245, "step": 14023 }, { "epoch": 0.6435684456885871, "grad_norm": 0.49252989888191223, "learning_rate": 9.059018587931073e-06, "loss": 0.4438, "step": 14024 }, { "epoch": 0.6436143361938415, "grad_norm": 0.5004079341888428, "learning_rate": 9.058875410375602e-06, "loss": 0.4749, "step": 14025 }, { "epoch": 0.643660226699096, "grad_norm": 0.47762390971183777, "learning_rate": 9.058732223059817e-06, "loss": 0.4307, "step": 14026 }, { "epoch": 0.6437061172043504, "grad_norm": 0.47857779264450073, "learning_rate": 9.058589025984064e-06, "loss": 0.3567, "step": 14027 }, { "epoch": 0.6437520077096048, "grad_norm": 0.43675971031188965, "learning_rate": 9.058445819148683e-06, "loss": 0.3597, "step": 14028 }, { "epoch": 0.6437978982148593, "grad_norm": 0.4836585819721222, "learning_rate": 9.05830260255402e-06, "loss": 0.5072, "step": 14029 }, { "epoch": 0.6438437887201138, "grad_norm": 0.47460660338401794, "learning_rate": 9.058159376200424e-06, "loss": 0.4011, "step": 14030 }, { "epoch": 0.6438896792253682, "grad_norm": 0.4512506127357483, "learning_rate": 9.058016140088232e-06, "loss": 0.4144, "step": 14031 }, { "epoch": 0.6439355697306227, "grad_norm": 0.4772023558616638, "learning_rate": 9.057872894217794e-06, "loss": 0.429, "step": 14032 }, { "epoch": 0.6439814602358772, "grad_norm": 0.4694622755050659, "learning_rate": 9.057729638589451e-06, "loss": 0.3801, "step": 14033 }, { "epoch": 0.6440273507411317, "grad_norm": 0.45099690556526184, "learning_rate": 9.057586373203548e-06, "loss": 0.4225, "step": 14034 }, { "epoch": 0.6440732412463861, "grad_norm": 0.460796058177948, "learning_rate": 9.057443098060432e-06, "loss": 0.3942, "step": 14035 }, { "epoch": 0.6441191317516406, "grad_norm": 0.45044317841529846, "learning_rate": 9.057299813160446e-06, "loss": 0.3983, "step": 14036 }, { "epoch": 0.6441650222568951, "grad_norm": 0.44518229365348816, "learning_rate": 9.057156518503934e-06, "loss": 0.3815, "step": 14037 }, { "epoch": 0.6442109127621495, "grad_norm": 0.45808717608451843, "learning_rate": 9.057013214091242e-06, "loss": 0.4432, "step": 14038 }, { "epoch": 0.644256803267404, "grad_norm": 0.4451320469379425, "learning_rate": 9.056869899922713e-06, "loss": 0.3604, "step": 14039 }, { "epoch": 0.6443026937726585, "grad_norm": 0.4737377464771271, "learning_rate": 9.056726575998691e-06, "loss": 0.3932, "step": 14040 }, { "epoch": 0.6443485842779129, "grad_norm": 0.4431821405887604, "learning_rate": 9.056583242319524e-06, "loss": 0.3491, "step": 14041 }, { "epoch": 0.6443944747831674, "grad_norm": 0.48313918709754944, "learning_rate": 9.056439898885553e-06, "loss": 0.43, "step": 14042 }, { "epoch": 0.6444403652884219, "grad_norm": 0.4375656843185425, "learning_rate": 9.056296545697125e-06, "loss": 0.4175, "step": 14043 }, { "epoch": 0.6444862557936762, "grad_norm": 0.4607250690460205, "learning_rate": 9.056153182754583e-06, "loss": 0.3792, "step": 14044 }, { "epoch": 0.6445321462989307, "grad_norm": 0.5055986642837524, "learning_rate": 9.056009810058274e-06, "loss": 0.4739, "step": 14045 }, { "epoch": 0.6445780368041852, "grad_norm": 0.43453145027160645, "learning_rate": 9.055866427608542e-06, "loss": 0.3315, "step": 14046 }, { "epoch": 0.6446239273094396, "grad_norm": 0.46851763129234314, "learning_rate": 9.05572303540573e-06, "loss": 0.3929, "step": 14047 }, { "epoch": 0.6446698178146941, "grad_norm": 0.5469868779182434, "learning_rate": 9.055579633450184e-06, "loss": 0.4833, "step": 14048 }, { "epoch": 0.6447157083199486, "grad_norm": 0.40964213013648987, "learning_rate": 9.05543622174225e-06, "loss": 0.2965, "step": 14049 }, { "epoch": 0.6447615988252031, "grad_norm": 0.4448835849761963, "learning_rate": 9.05529280028227e-06, "loss": 0.3719, "step": 14050 }, { "epoch": 0.6448074893304575, "grad_norm": 0.4760794937610626, "learning_rate": 9.055149369070593e-06, "loss": 0.3891, "step": 14051 }, { "epoch": 0.644853379835712, "grad_norm": 0.44954636693000793, "learning_rate": 9.05500592810756e-06, "loss": 0.3798, "step": 14052 }, { "epoch": 0.6448992703409665, "grad_norm": 0.44240760803222656, "learning_rate": 9.05486247739352e-06, "loss": 0.3628, "step": 14053 }, { "epoch": 0.6449451608462209, "grad_norm": 0.4264172315597534, "learning_rate": 9.054719016928813e-06, "loss": 0.378, "step": 14054 }, { "epoch": 0.6449910513514754, "grad_norm": 0.4289540946483612, "learning_rate": 9.054575546713787e-06, "loss": 0.361, "step": 14055 }, { "epoch": 0.6450369418567299, "grad_norm": 0.4617115557193756, "learning_rate": 9.054432066748787e-06, "loss": 0.4149, "step": 14056 }, { "epoch": 0.6450828323619843, "grad_norm": 0.4650711119174957, "learning_rate": 9.054288577034158e-06, "loss": 0.4174, "step": 14057 }, { "epoch": 0.6451287228672388, "grad_norm": 0.4586659371852875, "learning_rate": 9.054145077570242e-06, "loss": 0.3998, "step": 14058 }, { "epoch": 0.6451746133724933, "grad_norm": 0.47312062978744507, "learning_rate": 9.05400156835739e-06, "loss": 0.4246, "step": 14059 }, { "epoch": 0.6452205038777477, "grad_norm": 0.43735408782958984, "learning_rate": 9.053858049395941e-06, "loss": 0.3348, "step": 14060 }, { "epoch": 0.6452663943830022, "grad_norm": 0.46725115180015564, "learning_rate": 9.053714520686244e-06, "loss": 0.493, "step": 14061 }, { "epoch": 0.6453122848882566, "grad_norm": 0.43562230467796326, "learning_rate": 9.053570982228644e-06, "loss": 0.3694, "step": 14062 }, { "epoch": 0.645358175393511, "grad_norm": 0.48411309719085693, "learning_rate": 9.053427434023483e-06, "loss": 0.4414, "step": 14063 }, { "epoch": 0.6454040658987655, "grad_norm": 0.42066919803619385, "learning_rate": 9.053283876071109e-06, "loss": 0.3306, "step": 14064 }, { "epoch": 0.64544995640402, "grad_norm": 0.43161800503730774, "learning_rate": 9.053140308371866e-06, "loss": 0.3478, "step": 14065 }, { "epoch": 0.6454958469092744, "grad_norm": 0.49468934535980225, "learning_rate": 9.0529967309261e-06, "loss": 0.5155, "step": 14066 }, { "epoch": 0.6455417374145289, "grad_norm": 0.4748426079750061, "learning_rate": 9.052853143734157e-06, "loss": 0.429, "step": 14067 }, { "epoch": 0.6455876279197834, "grad_norm": 0.4822480380535126, "learning_rate": 9.052709546796381e-06, "loss": 0.4432, "step": 14068 }, { "epoch": 0.6456335184250379, "grad_norm": 0.47248879075050354, "learning_rate": 9.052565940113118e-06, "loss": 0.4557, "step": 14069 }, { "epoch": 0.6456794089302923, "grad_norm": 0.45363280177116394, "learning_rate": 9.052422323684713e-06, "loss": 0.4107, "step": 14070 }, { "epoch": 0.6457252994355468, "grad_norm": 0.48576921224594116, "learning_rate": 9.05227869751151e-06, "loss": 0.4458, "step": 14071 }, { "epoch": 0.6457711899408013, "grad_norm": 0.5302210450172424, "learning_rate": 9.052135061593856e-06, "loss": 0.5498, "step": 14072 }, { "epoch": 0.6458170804460557, "grad_norm": 0.41996899247169495, "learning_rate": 9.051991415932098e-06, "loss": 0.321, "step": 14073 }, { "epoch": 0.6458629709513102, "grad_norm": 0.45224234461784363, "learning_rate": 9.051847760526577e-06, "loss": 0.4136, "step": 14074 }, { "epoch": 0.6459088614565647, "grad_norm": 0.4406568109989166, "learning_rate": 9.051704095377642e-06, "loss": 0.3732, "step": 14075 }, { "epoch": 0.6459547519618191, "grad_norm": 0.45294687151908875, "learning_rate": 9.051560420485638e-06, "loss": 0.4073, "step": 14076 }, { "epoch": 0.6460006424670736, "grad_norm": 0.44497764110565186, "learning_rate": 9.051416735850908e-06, "loss": 0.3907, "step": 14077 }, { "epoch": 0.6460465329723281, "grad_norm": 0.4505990445613861, "learning_rate": 9.051273041473801e-06, "loss": 0.3679, "step": 14078 }, { "epoch": 0.6460924234775824, "grad_norm": 0.4884462356567383, "learning_rate": 9.05112933735466e-06, "loss": 0.4363, "step": 14079 }, { "epoch": 0.6461383139828369, "grad_norm": 0.42937272787094116, "learning_rate": 9.050985623493833e-06, "loss": 0.3557, "step": 14080 }, { "epoch": 0.6461842044880914, "grad_norm": 0.4646994173526764, "learning_rate": 9.050841899891663e-06, "loss": 0.4127, "step": 14081 }, { "epoch": 0.6462300949933458, "grad_norm": 0.4247244596481323, "learning_rate": 9.050698166548498e-06, "loss": 0.3251, "step": 14082 }, { "epoch": 0.6462759854986003, "grad_norm": 0.5226595401763916, "learning_rate": 9.05055442346468e-06, "loss": 0.4812, "step": 14083 }, { "epoch": 0.6463218760038548, "grad_norm": 0.519533097743988, "learning_rate": 9.050410670640558e-06, "loss": 0.5737, "step": 14084 }, { "epoch": 0.6463677665091093, "grad_norm": 0.466232031583786, "learning_rate": 9.050266908076476e-06, "loss": 0.3895, "step": 14085 }, { "epoch": 0.6464136570143637, "grad_norm": 0.4415220618247986, "learning_rate": 9.050123135772784e-06, "loss": 0.37, "step": 14086 }, { "epoch": 0.6464595475196182, "grad_norm": 0.4179003834724426, "learning_rate": 9.04997935372982e-06, "loss": 0.3231, "step": 14087 }, { "epoch": 0.6465054380248727, "grad_norm": 0.4091072380542755, "learning_rate": 9.049835561947936e-06, "loss": 0.3461, "step": 14088 }, { "epoch": 0.6465513285301271, "grad_norm": 0.4746333360671997, "learning_rate": 9.049691760427475e-06, "loss": 0.4101, "step": 14089 }, { "epoch": 0.6465972190353816, "grad_norm": 0.507542073726654, "learning_rate": 9.049547949168782e-06, "loss": 0.5176, "step": 14090 }, { "epoch": 0.6466431095406361, "grad_norm": 0.46834030747413635, "learning_rate": 9.049404128172205e-06, "loss": 0.4444, "step": 14091 }, { "epoch": 0.6466890000458905, "grad_norm": 0.43442246317863464, "learning_rate": 9.049260297438092e-06, "loss": 0.3612, "step": 14092 }, { "epoch": 0.646734890551145, "grad_norm": 0.44404488801956177, "learning_rate": 9.049116456966782e-06, "loss": 0.3586, "step": 14093 }, { "epoch": 0.6467807810563995, "grad_norm": 0.46931061148643494, "learning_rate": 9.048972606758627e-06, "loss": 0.4024, "step": 14094 }, { "epoch": 0.6468266715616539, "grad_norm": 0.46310216188430786, "learning_rate": 9.048828746813971e-06, "loss": 0.3673, "step": 14095 }, { "epoch": 0.6468725620669084, "grad_norm": 0.4568098187446594, "learning_rate": 9.048684877133157e-06, "loss": 0.4299, "step": 14096 }, { "epoch": 0.6469184525721628, "grad_norm": 1.5224976539611816, "learning_rate": 9.048540997716537e-06, "loss": 0.3141, "step": 14097 }, { "epoch": 0.6469643430774172, "grad_norm": 0.44508805871009827, "learning_rate": 9.048397108564453e-06, "loss": 0.3744, "step": 14098 }, { "epoch": 0.6470102335826717, "grad_norm": 0.482736736536026, "learning_rate": 9.04825320967725e-06, "loss": 0.3953, "step": 14099 }, { "epoch": 0.6470561240879262, "grad_norm": 0.4863525927066803, "learning_rate": 9.048109301055278e-06, "loss": 0.4456, "step": 14100 }, { "epoch": 0.6471020145931806, "grad_norm": 0.43306997418403625, "learning_rate": 9.047965382698879e-06, "loss": 0.3105, "step": 14101 }, { "epoch": 0.6471479050984351, "grad_norm": 0.4912335276603699, "learning_rate": 9.047821454608401e-06, "loss": 0.4687, "step": 14102 }, { "epoch": 0.6471937956036896, "grad_norm": 0.41020840406417847, "learning_rate": 9.04767751678419e-06, "loss": 0.2855, "step": 14103 }, { "epoch": 0.6472396861089441, "grad_norm": 0.4550251364707947, "learning_rate": 9.047533569226593e-06, "loss": 0.3624, "step": 14104 }, { "epoch": 0.6472855766141985, "grad_norm": 0.42754465341567993, "learning_rate": 9.047389611935955e-06, "loss": 0.3495, "step": 14105 }, { "epoch": 0.647331467119453, "grad_norm": 0.4354294538497925, "learning_rate": 9.04724564491262e-06, "loss": 0.3514, "step": 14106 }, { "epoch": 0.6473773576247075, "grad_norm": 0.4663691520690918, "learning_rate": 9.047101668156939e-06, "loss": 0.4118, "step": 14107 }, { "epoch": 0.6474232481299619, "grad_norm": 0.4161934554576874, "learning_rate": 9.046957681669257e-06, "loss": 0.2873, "step": 14108 }, { "epoch": 0.6474691386352164, "grad_norm": 0.4455099403858185, "learning_rate": 9.046813685449915e-06, "loss": 0.3925, "step": 14109 }, { "epoch": 0.6475150291404709, "grad_norm": 0.44092604517936707, "learning_rate": 9.046669679499267e-06, "loss": 0.406, "step": 14110 }, { "epoch": 0.6475609196457253, "grad_norm": 0.432089626789093, "learning_rate": 9.046525663817653e-06, "loss": 0.4165, "step": 14111 }, { "epoch": 0.6476068101509798, "grad_norm": 0.4164719879627228, "learning_rate": 9.046381638405424e-06, "loss": 0.3049, "step": 14112 }, { "epoch": 0.6476527006562343, "grad_norm": 0.5399343371391296, "learning_rate": 9.046237603262924e-06, "loss": 0.5593, "step": 14113 }, { "epoch": 0.6476985911614886, "grad_norm": 0.542177677154541, "learning_rate": 9.0460935583905e-06, "loss": 0.4102, "step": 14114 }, { "epoch": 0.6477444816667431, "grad_norm": 0.4234831631183624, "learning_rate": 9.045949503788495e-06, "loss": 0.3363, "step": 14115 }, { "epoch": 0.6477903721719976, "grad_norm": 0.441610723733902, "learning_rate": 9.04580543945726e-06, "loss": 0.375, "step": 14116 }, { "epoch": 0.647836262677252, "grad_norm": 0.47733840346336365, "learning_rate": 9.04566136539714e-06, "loss": 0.4484, "step": 14117 }, { "epoch": 0.6478821531825065, "grad_norm": 0.44860878586769104, "learning_rate": 9.045517281608482e-06, "loss": 0.4018, "step": 14118 }, { "epoch": 0.647928043687761, "grad_norm": 0.46530213952064514, "learning_rate": 9.04537318809163e-06, "loss": 0.396, "step": 14119 }, { "epoch": 0.6479739341930154, "grad_norm": 0.5208595991134644, "learning_rate": 9.045229084846933e-06, "loss": 0.4631, "step": 14120 }, { "epoch": 0.6480198246982699, "grad_norm": 0.49520808458328247, "learning_rate": 9.045084971874738e-06, "loss": 0.4434, "step": 14121 }, { "epoch": 0.6480657152035244, "grad_norm": 0.4780859351158142, "learning_rate": 9.04494084917539e-06, "loss": 0.4364, "step": 14122 }, { "epoch": 0.6481116057087789, "grad_norm": 0.4597601294517517, "learning_rate": 9.044796716749235e-06, "loss": 0.3539, "step": 14123 }, { "epoch": 0.6481574962140333, "grad_norm": 0.4641176462173462, "learning_rate": 9.044652574596621e-06, "loss": 0.457, "step": 14124 }, { "epoch": 0.6482033867192878, "grad_norm": 0.48095694184303284, "learning_rate": 9.044508422717892e-06, "loss": 0.3798, "step": 14125 }, { "epoch": 0.6482492772245423, "grad_norm": 0.4546697735786438, "learning_rate": 9.044364261113399e-06, "loss": 0.3612, "step": 14126 }, { "epoch": 0.6482951677297967, "grad_norm": 0.4892033636569977, "learning_rate": 9.044220089783487e-06, "loss": 0.4157, "step": 14127 }, { "epoch": 0.6483410582350512, "grad_norm": 0.46192535758018494, "learning_rate": 9.0440759087285e-06, "loss": 0.3593, "step": 14128 }, { "epoch": 0.6483869487403057, "grad_norm": 0.44785141944885254, "learning_rate": 9.043931717948788e-06, "loss": 0.3933, "step": 14129 }, { "epoch": 0.64843283924556, "grad_norm": 0.4695050120353699, "learning_rate": 9.043787517444697e-06, "loss": 0.431, "step": 14130 }, { "epoch": 0.6484787297508146, "grad_norm": 0.4779984652996063, "learning_rate": 9.043643307216572e-06, "loss": 0.4186, "step": 14131 }, { "epoch": 0.648524620256069, "grad_norm": 0.4555525779724121, "learning_rate": 9.043499087264763e-06, "loss": 0.3876, "step": 14132 }, { "epoch": 0.6485705107613234, "grad_norm": 0.4493536651134491, "learning_rate": 9.043354857589612e-06, "loss": 0.4263, "step": 14133 }, { "epoch": 0.6486164012665779, "grad_norm": 0.44953417778015137, "learning_rate": 9.04321061819147e-06, "loss": 0.419, "step": 14134 }, { "epoch": 0.6486622917718324, "grad_norm": 0.4317088723182678, "learning_rate": 9.043066369070683e-06, "loss": 0.3206, "step": 14135 }, { "epoch": 0.6487081822770868, "grad_norm": 0.4712981581687927, "learning_rate": 9.042922110227597e-06, "loss": 0.4451, "step": 14136 }, { "epoch": 0.6487540727823413, "grad_norm": 0.45407041907310486, "learning_rate": 9.04277784166256e-06, "loss": 0.4773, "step": 14137 }, { "epoch": 0.6487999632875958, "grad_norm": 0.4809816777706146, "learning_rate": 9.042633563375917e-06, "loss": 0.4239, "step": 14138 }, { "epoch": 0.6488458537928503, "grad_norm": 0.49848106503486633, "learning_rate": 9.042489275368018e-06, "loss": 0.5347, "step": 14139 }, { "epoch": 0.6488917442981047, "grad_norm": 0.4500460922718048, "learning_rate": 9.042344977639206e-06, "loss": 0.3855, "step": 14140 }, { "epoch": 0.6489376348033592, "grad_norm": 0.47755295038223267, "learning_rate": 9.042200670189832e-06, "loss": 0.4204, "step": 14141 }, { "epoch": 0.6489835253086137, "grad_norm": 0.469647616147995, "learning_rate": 9.04205635302024e-06, "loss": 0.4155, "step": 14142 }, { "epoch": 0.6490294158138681, "grad_norm": 0.4649830162525177, "learning_rate": 9.041912026130778e-06, "loss": 0.4485, "step": 14143 }, { "epoch": 0.6490753063191226, "grad_norm": 0.4809988737106323, "learning_rate": 9.041767689521794e-06, "loss": 0.4988, "step": 14144 }, { "epoch": 0.6491211968243771, "grad_norm": 0.4577782154083252, "learning_rate": 9.041623343193633e-06, "loss": 0.3877, "step": 14145 }, { "epoch": 0.6491670873296315, "grad_norm": 0.4883314371109009, "learning_rate": 9.041478987146645e-06, "loss": 0.4486, "step": 14146 }, { "epoch": 0.649212977834886, "grad_norm": 0.4496147334575653, "learning_rate": 9.041334621381175e-06, "loss": 0.4687, "step": 14147 }, { "epoch": 0.6492588683401405, "grad_norm": 0.45364218950271606, "learning_rate": 9.041190245897571e-06, "loss": 0.4014, "step": 14148 }, { "epoch": 0.6493047588453948, "grad_norm": 0.4536188244819641, "learning_rate": 9.04104586069618e-06, "loss": 0.4487, "step": 14149 }, { "epoch": 0.6493506493506493, "grad_norm": 0.4537698030471802, "learning_rate": 9.040901465777347e-06, "loss": 0.393, "step": 14150 }, { "epoch": 0.6493965398559038, "grad_norm": 0.47897160053253174, "learning_rate": 9.040757061141424e-06, "loss": 0.4634, "step": 14151 }, { "epoch": 0.6494424303611582, "grad_norm": 0.46400317549705505, "learning_rate": 9.040612646788755e-06, "loss": 0.4032, "step": 14152 }, { "epoch": 0.6494883208664127, "grad_norm": 0.4907035231590271, "learning_rate": 9.040468222719687e-06, "loss": 0.4523, "step": 14153 }, { "epoch": 0.6495342113716672, "grad_norm": 0.4830639958381653, "learning_rate": 9.040323788934568e-06, "loss": 0.4138, "step": 14154 }, { "epoch": 0.6495801018769216, "grad_norm": 0.4401586055755615, "learning_rate": 9.040179345433745e-06, "loss": 0.361, "step": 14155 }, { "epoch": 0.6496259923821761, "grad_norm": 0.4571276605129242, "learning_rate": 9.040034892217569e-06, "loss": 0.3553, "step": 14156 }, { "epoch": 0.6496718828874306, "grad_norm": 0.494339257478714, "learning_rate": 9.039890429286381e-06, "loss": 0.4193, "step": 14157 }, { "epoch": 0.6497177733926851, "grad_norm": 0.4599703550338745, "learning_rate": 9.039745956640533e-06, "loss": 0.4102, "step": 14158 }, { "epoch": 0.6497636638979395, "grad_norm": 0.48626193404197693, "learning_rate": 9.039601474280371e-06, "loss": 0.4348, "step": 14159 }, { "epoch": 0.649809554403194, "grad_norm": 0.45206359028816223, "learning_rate": 9.039456982206241e-06, "loss": 0.4062, "step": 14160 }, { "epoch": 0.6498554449084485, "grad_norm": 0.44994422793388367, "learning_rate": 9.039312480418493e-06, "loss": 0.3898, "step": 14161 }, { "epoch": 0.6499013354137029, "grad_norm": 0.41774889826774597, "learning_rate": 9.039167968917473e-06, "loss": 0.3291, "step": 14162 }, { "epoch": 0.6499472259189574, "grad_norm": 0.4634527266025543, "learning_rate": 9.039023447703529e-06, "loss": 0.3544, "step": 14163 }, { "epoch": 0.6499931164242119, "grad_norm": 0.49590712785720825, "learning_rate": 9.038878916777007e-06, "loss": 0.3944, "step": 14164 }, { "epoch": 0.6500390069294663, "grad_norm": 0.4279087781906128, "learning_rate": 9.038734376138258e-06, "loss": 0.2985, "step": 14165 }, { "epoch": 0.6500848974347208, "grad_norm": 0.464444637298584, "learning_rate": 9.038589825787626e-06, "loss": 0.41, "step": 14166 }, { "epoch": 0.6501307879399753, "grad_norm": 0.4504774510860443, "learning_rate": 9.038445265725462e-06, "loss": 0.3904, "step": 14167 }, { "epoch": 0.6501766784452296, "grad_norm": 0.4301224946975708, "learning_rate": 9.03830069595211e-06, "loss": 0.332, "step": 14168 }, { "epoch": 0.6502225689504841, "grad_norm": 0.4455925226211548, "learning_rate": 9.03815611646792e-06, "loss": 0.3554, "step": 14169 }, { "epoch": 0.6502684594557386, "grad_norm": 0.4768845736980438, "learning_rate": 9.038011527273238e-06, "loss": 0.4724, "step": 14170 }, { "epoch": 0.650314349960993, "grad_norm": 0.4593009054660797, "learning_rate": 9.037866928368414e-06, "loss": 0.3981, "step": 14171 }, { "epoch": 0.6503602404662475, "grad_norm": 0.4494643211364746, "learning_rate": 9.037722319753794e-06, "loss": 0.4357, "step": 14172 }, { "epoch": 0.650406130971502, "grad_norm": 0.42832937836647034, "learning_rate": 9.037577701429725e-06, "loss": 0.3538, "step": 14173 }, { "epoch": 0.6504520214767565, "grad_norm": 0.430900514125824, "learning_rate": 9.037433073396558e-06, "loss": 0.3572, "step": 14174 }, { "epoch": 0.6504979119820109, "grad_norm": 0.4725225269794464, "learning_rate": 9.037288435654638e-06, "loss": 0.456, "step": 14175 }, { "epoch": 0.6505438024872654, "grad_norm": 0.4682311713695526, "learning_rate": 9.037143788204313e-06, "loss": 0.3594, "step": 14176 }, { "epoch": 0.6505896929925199, "grad_norm": 0.4947977662086487, "learning_rate": 9.036999131045932e-06, "loss": 0.506, "step": 14177 }, { "epoch": 0.6506355834977743, "grad_norm": 0.5047365427017212, "learning_rate": 9.036854464179841e-06, "loss": 0.4631, "step": 14178 }, { "epoch": 0.6506814740030288, "grad_norm": 0.47522905468940735, "learning_rate": 9.03670978760639e-06, "loss": 0.3945, "step": 14179 }, { "epoch": 0.6507273645082833, "grad_norm": 0.4783630669116974, "learning_rate": 9.036565101325925e-06, "loss": 0.4521, "step": 14180 }, { "epoch": 0.6507732550135377, "grad_norm": 0.45084160566329956, "learning_rate": 9.036420405338798e-06, "loss": 0.4154, "step": 14181 }, { "epoch": 0.6508191455187922, "grad_norm": 0.45110151171684265, "learning_rate": 9.036275699645349e-06, "loss": 0.3603, "step": 14182 }, { "epoch": 0.6508650360240467, "grad_norm": 0.4372854232788086, "learning_rate": 9.036130984245934e-06, "loss": 0.3491, "step": 14183 }, { "epoch": 0.650910926529301, "grad_norm": 0.4539983570575714, "learning_rate": 9.035986259140897e-06, "loss": 0.3706, "step": 14184 }, { "epoch": 0.6509568170345555, "grad_norm": 0.45694440603256226, "learning_rate": 9.035841524330589e-06, "loss": 0.3792, "step": 14185 }, { "epoch": 0.65100270753981, "grad_norm": 0.41765594482421875, "learning_rate": 9.035696779815353e-06, "loss": 0.3209, "step": 14186 }, { "epoch": 0.6510485980450644, "grad_norm": 0.41461434960365295, "learning_rate": 9.03555202559554e-06, "loss": 0.3301, "step": 14187 }, { "epoch": 0.6510944885503189, "grad_norm": 0.5036013126373291, "learning_rate": 9.035407261671499e-06, "loss": 0.465, "step": 14188 }, { "epoch": 0.6511403790555734, "grad_norm": 0.46942099928855896, "learning_rate": 9.035262488043578e-06, "loss": 0.4167, "step": 14189 }, { "epoch": 0.6511862695608278, "grad_norm": 0.44426435232162476, "learning_rate": 9.035117704712123e-06, "loss": 0.393, "step": 14190 }, { "epoch": 0.6512321600660823, "grad_norm": 0.44255638122558594, "learning_rate": 9.034972911677484e-06, "loss": 0.3944, "step": 14191 }, { "epoch": 0.6512780505713368, "grad_norm": 0.44644495844841003, "learning_rate": 9.034828108940008e-06, "loss": 0.3591, "step": 14192 }, { "epoch": 0.6513239410765913, "grad_norm": 0.4471990168094635, "learning_rate": 9.034683296500045e-06, "loss": 0.4238, "step": 14193 }, { "epoch": 0.6513698315818457, "grad_norm": 0.4770403504371643, "learning_rate": 9.034538474357941e-06, "loss": 0.4435, "step": 14194 }, { "epoch": 0.6514157220871002, "grad_norm": 0.49885910749435425, "learning_rate": 9.034393642514046e-06, "loss": 0.4334, "step": 14195 }, { "epoch": 0.6514616125923547, "grad_norm": 0.4737358093261719, "learning_rate": 9.034248800968707e-06, "loss": 0.396, "step": 14196 }, { "epoch": 0.6515075030976091, "grad_norm": 0.4722176194190979, "learning_rate": 9.034103949722272e-06, "loss": 0.4733, "step": 14197 }, { "epoch": 0.6515533936028636, "grad_norm": 0.4813441336154938, "learning_rate": 9.033959088775092e-06, "loss": 0.4093, "step": 14198 }, { "epoch": 0.6515992841081181, "grad_norm": 0.4499693512916565, "learning_rate": 9.033814218127513e-06, "loss": 0.3775, "step": 14199 }, { "epoch": 0.6516451746133725, "grad_norm": 0.45358219742774963, "learning_rate": 9.033669337779885e-06, "loss": 0.3782, "step": 14200 }, { "epoch": 0.651691065118627, "grad_norm": 0.45872077345848083, "learning_rate": 9.033524447732554e-06, "loss": 0.3659, "step": 14201 }, { "epoch": 0.6517369556238815, "grad_norm": 0.4243559241294861, "learning_rate": 9.03337954798587e-06, "loss": 0.3089, "step": 14202 }, { "epoch": 0.6517828461291358, "grad_norm": 0.4494980573654175, "learning_rate": 9.03323463854018e-06, "loss": 0.3517, "step": 14203 }, { "epoch": 0.6518287366343903, "grad_norm": 0.42859968543052673, "learning_rate": 9.033089719395833e-06, "loss": 0.3393, "step": 14204 }, { "epoch": 0.6518746271396448, "grad_norm": 0.4387156367301941, "learning_rate": 9.032944790553182e-06, "loss": 0.4012, "step": 14205 }, { "epoch": 0.6519205176448992, "grad_norm": 0.4646153151988983, "learning_rate": 9.032799852012567e-06, "loss": 0.3992, "step": 14206 }, { "epoch": 0.6519664081501537, "grad_norm": 0.4445086419582367, "learning_rate": 9.032654903774344e-06, "loss": 0.4378, "step": 14207 }, { "epoch": 0.6520122986554082, "grad_norm": 0.4946781396865845, "learning_rate": 9.03250994583886e-06, "loss": 0.4068, "step": 14208 }, { "epoch": 0.6520581891606626, "grad_norm": 0.440120130777359, "learning_rate": 9.03236497820646e-06, "loss": 0.3387, "step": 14209 }, { "epoch": 0.6521040796659171, "grad_norm": 0.49724826216697693, "learning_rate": 9.032220000877493e-06, "loss": 0.4319, "step": 14210 }, { "epoch": 0.6521499701711716, "grad_norm": 0.4096190929412842, "learning_rate": 9.032075013852314e-06, "loss": 0.3478, "step": 14211 }, { "epoch": 0.6521958606764261, "grad_norm": 0.4368962347507477, "learning_rate": 9.031930017131265e-06, "loss": 0.3788, "step": 14212 }, { "epoch": 0.6522417511816805, "grad_norm": 0.462880939245224, "learning_rate": 9.031785010714695e-06, "loss": 0.4816, "step": 14213 }, { "epoch": 0.652287641686935, "grad_norm": 0.42750978469848633, "learning_rate": 9.031639994602956e-06, "loss": 0.3207, "step": 14214 }, { "epoch": 0.6523335321921895, "grad_norm": 0.49696362018585205, "learning_rate": 9.031494968796397e-06, "loss": 0.4518, "step": 14215 }, { "epoch": 0.6523794226974439, "grad_norm": 0.40314921736717224, "learning_rate": 9.031349933295364e-06, "loss": 0.2979, "step": 14216 }, { "epoch": 0.6524253132026984, "grad_norm": 0.49822306632995605, "learning_rate": 9.031204888100205e-06, "loss": 0.3275, "step": 14217 }, { "epoch": 0.6524712037079529, "grad_norm": 0.4009248614311218, "learning_rate": 9.031059833211272e-06, "loss": 0.3344, "step": 14218 }, { "epoch": 0.6525170942132072, "grad_norm": 0.43659937381744385, "learning_rate": 9.030914768628913e-06, "loss": 0.3628, "step": 14219 }, { "epoch": 0.6525629847184617, "grad_norm": 0.4257620573043823, "learning_rate": 9.030769694353473e-06, "loss": 0.3328, "step": 14220 }, { "epoch": 0.6526088752237162, "grad_norm": 0.4416256844997406, "learning_rate": 9.030624610385308e-06, "loss": 0.3649, "step": 14221 }, { "epoch": 0.6526547657289706, "grad_norm": 0.4330078661441803, "learning_rate": 9.030479516724762e-06, "loss": 0.3821, "step": 14222 }, { "epoch": 0.6527006562342251, "grad_norm": 0.4581303298473358, "learning_rate": 9.030334413372184e-06, "loss": 0.3833, "step": 14223 }, { "epoch": 0.6527465467394796, "grad_norm": 0.4504668116569519, "learning_rate": 9.030189300327922e-06, "loss": 0.4051, "step": 14224 }, { "epoch": 0.652792437244734, "grad_norm": 0.42233186960220337, "learning_rate": 9.03004417759233e-06, "loss": 0.339, "step": 14225 }, { "epoch": 0.6528383277499885, "grad_norm": 0.4453276991844177, "learning_rate": 9.02989904516575e-06, "loss": 0.3743, "step": 14226 }, { "epoch": 0.652884218255243, "grad_norm": 0.4239007830619812, "learning_rate": 9.029753903048538e-06, "loss": 0.2985, "step": 14227 }, { "epoch": 0.6529301087604975, "grad_norm": 0.4869670569896698, "learning_rate": 9.029608751241038e-06, "loss": 0.3967, "step": 14228 }, { "epoch": 0.6529759992657519, "grad_norm": 0.46322375535964966, "learning_rate": 9.0294635897436e-06, "loss": 0.3948, "step": 14229 }, { "epoch": 0.6530218897710064, "grad_norm": 0.4836317300796509, "learning_rate": 9.029318418556577e-06, "loss": 0.376, "step": 14230 }, { "epoch": 0.6530677802762609, "grad_norm": 0.4076985716819763, "learning_rate": 9.029173237680312e-06, "loss": 0.3342, "step": 14231 }, { "epoch": 0.6531136707815153, "grad_norm": 0.4360843002796173, "learning_rate": 9.029028047115158e-06, "loss": 0.3499, "step": 14232 }, { "epoch": 0.6531595612867698, "grad_norm": 0.5220640301704407, "learning_rate": 9.028882846861464e-06, "loss": 0.4185, "step": 14233 }, { "epoch": 0.6532054517920243, "grad_norm": 0.4931797683238983, "learning_rate": 9.028737636919577e-06, "loss": 0.4511, "step": 14234 }, { "epoch": 0.6532513422972787, "grad_norm": 0.4669604003429413, "learning_rate": 9.028592417289848e-06, "loss": 0.3986, "step": 14235 }, { "epoch": 0.6532972328025332, "grad_norm": 0.4486613869667053, "learning_rate": 9.028447187972627e-06, "loss": 0.3956, "step": 14236 }, { "epoch": 0.6533431233077877, "grad_norm": 0.4503069519996643, "learning_rate": 9.028301948968259e-06, "loss": 0.3652, "step": 14237 }, { "epoch": 0.653389013813042, "grad_norm": 0.43136584758758545, "learning_rate": 9.028156700277097e-06, "loss": 0.3808, "step": 14238 }, { "epoch": 0.6534349043182965, "grad_norm": 0.4658714234828949, "learning_rate": 9.028011441899491e-06, "loss": 0.4422, "step": 14239 }, { "epoch": 0.653480794823551, "grad_norm": 0.45481759309768677, "learning_rate": 9.027866173835788e-06, "loss": 0.4494, "step": 14240 }, { "epoch": 0.6535266853288054, "grad_norm": 0.4836934506893158, "learning_rate": 9.027720896086338e-06, "loss": 0.3856, "step": 14241 }, { "epoch": 0.6535725758340599, "grad_norm": 0.430143803358078, "learning_rate": 9.02757560865149e-06, "loss": 0.3788, "step": 14242 }, { "epoch": 0.6536184663393144, "grad_norm": 0.47186678647994995, "learning_rate": 9.027430311531594e-06, "loss": 0.4509, "step": 14243 }, { "epoch": 0.6536643568445688, "grad_norm": 0.4348425567150116, "learning_rate": 9.027285004727e-06, "loss": 0.3482, "step": 14244 }, { "epoch": 0.6537102473498233, "grad_norm": 0.4389311671257019, "learning_rate": 9.027139688238056e-06, "loss": 0.3724, "step": 14245 }, { "epoch": 0.6537561378550778, "grad_norm": 0.43273451924324036, "learning_rate": 9.026994362065113e-06, "loss": 0.3212, "step": 14246 }, { "epoch": 0.6538020283603323, "grad_norm": 0.41638949513435364, "learning_rate": 9.026849026208518e-06, "loss": 0.346, "step": 14247 }, { "epoch": 0.6538479188655867, "grad_norm": 0.4424901306629181, "learning_rate": 9.026703680668623e-06, "loss": 0.3425, "step": 14248 }, { "epoch": 0.6538938093708412, "grad_norm": 0.4880313277244568, "learning_rate": 9.026558325445775e-06, "loss": 0.3837, "step": 14249 }, { "epoch": 0.6539396998760957, "grad_norm": 0.4541361927986145, "learning_rate": 9.026412960540327e-06, "loss": 0.4208, "step": 14250 }, { "epoch": 0.6539855903813501, "grad_norm": 0.45776498317718506, "learning_rate": 9.026267585952626e-06, "loss": 0.3715, "step": 14251 }, { "epoch": 0.6540314808866046, "grad_norm": 0.43914008140563965, "learning_rate": 9.02612220168302e-06, "loss": 0.3789, "step": 14252 }, { "epoch": 0.6540773713918591, "grad_norm": 0.4991929531097412, "learning_rate": 9.025976807731864e-06, "loss": 0.4508, "step": 14253 }, { "epoch": 0.6541232618971135, "grad_norm": 0.49503421783447266, "learning_rate": 9.025831404099503e-06, "loss": 0.408, "step": 14254 }, { "epoch": 0.654169152402368, "grad_norm": 0.4243558347225189, "learning_rate": 9.025685990786288e-06, "loss": 0.3195, "step": 14255 }, { "epoch": 0.6542150429076224, "grad_norm": 0.49006035923957825, "learning_rate": 9.02554056779257e-06, "loss": 0.4205, "step": 14256 }, { "epoch": 0.6542609334128768, "grad_norm": 0.4685148000717163, "learning_rate": 9.025395135118695e-06, "loss": 0.4653, "step": 14257 }, { "epoch": 0.6543068239181313, "grad_norm": 0.4466828405857086, "learning_rate": 9.025249692765017e-06, "loss": 0.353, "step": 14258 }, { "epoch": 0.6543527144233858, "grad_norm": 0.5397123098373413, "learning_rate": 9.025104240731883e-06, "loss": 0.5319, "step": 14259 }, { "epoch": 0.6543986049286402, "grad_norm": 0.45067355036735535, "learning_rate": 9.024958779019645e-06, "loss": 0.3727, "step": 14260 }, { "epoch": 0.6544444954338947, "grad_norm": 0.4255097508430481, "learning_rate": 9.02481330762865e-06, "loss": 0.3411, "step": 14261 }, { "epoch": 0.6544903859391492, "grad_norm": 0.4505007565021515, "learning_rate": 9.02466782655925e-06, "loss": 0.4574, "step": 14262 }, { "epoch": 0.6545362764444036, "grad_norm": 0.4589614272117615, "learning_rate": 9.024522335811794e-06, "loss": 0.4258, "step": 14263 }, { "epoch": 0.6545821669496581, "grad_norm": 0.4743839502334595, "learning_rate": 9.024376835386631e-06, "loss": 0.4652, "step": 14264 }, { "epoch": 0.6546280574549126, "grad_norm": 0.45333167910575867, "learning_rate": 9.024231325284113e-06, "loss": 0.3775, "step": 14265 }, { "epoch": 0.6546739479601671, "grad_norm": 0.4392283260822296, "learning_rate": 9.024085805504588e-06, "loss": 0.3831, "step": 14266 }, { "epoch": 0.6547198384654215, "grad_norm": 0.44775786995887756, "learning_rate": 9.023940276048408e-06, "loss": 0.3266, "step": 14267 }, { "epoch": 0.654765728970676, "grad_norm": 0.4305972456932068, "learning_rate": 9.02379473691592e-06, "loss": 0.3965, "step": 14268 }, { "epoch": 0.6548116194759305, "grad_norm": 0.436907559633255, "learning_rate": 9.023649188107478e-06, "loss": 0.3977, "step": 14269 }, { "epoch": 0.6548575099811849, "grad_norm": 0.4303225874900818, "learning_rate": 9.023503629623426e-06, "loss": 0.3797, "step": 14270 }, { "epoch": 0.6549034004864394, "grad_norm": 0.42467400431632996, "learning_rate": 9.02335806146412e-06, "loss": 0.3632, "step": 14271 }, { "epoch": 0.6549492909916939, "grad_norm": 0.6963770389556885, "learning_rate": 9.023212483629907e-06, "loss": 0.402, "step": 14272 }, { "epoch": 0.6549951814969482, "grad_norm": 0.45656120777130127, "learning_rate": 9.02306689612114e-06, "loss": 0.3755, "step": 14273 }, { "epoch": 0.6550410720022027, "grad_norm": 0.44843944907188416, "learning_rate": 9.022921298938163e-06, "loss": 0.4065, "step": 14274 }, { "epoch": 0.6550869625074572, "grad_norm": 0.45828837156295776, "learning_rate": 9.022775692081332e-06, "loss": 0.38, "step": 14275 }, { "epoch": 0.6551328530127116, "grad_norm": 0.45324090123176575, "learning_rate": 9.022630075550995e-06, "loss": 0.4004, "step": 14276 }, { "epoch": 0.6551787435179661, "grad_norm": 0.45647162199020386, "learning_rate": 9.022484449347502e-06, "loss": 0.4304, "step": 14277 }, { "epoch": 0.6552246340232206, "grad_norm": 0.45964911580085754, "learning_rate": 9.022338813471204e-06, "loss": 0.3928, "step": 14278 }, { "epoch": 0.655270524528475, "grad_norm": 0.46865180134773254, "learning_rate": 9.02219316792245e-06, "loss": 0.4365, "step": 14279 }, { "epoch": 0.6553164150337295, "grad_norm": 0.459452360868454, "learning_rate": 9.02204751270159e-06, "loss": 0.3792, "step": 14280 }, { "epoch": 0.655362305538984, "grad_norm": 0.5320595502853394, "learning_rate": 9.021901847808975e-06, "loss": 0.4703, "step": 14281 }, { "epoch": 0.6554081960442385, "grad_norm": 0.4537582993507385, "learning_rate": 9.021756173244958e-06, "loss": 0.3559, "step": 14282 }, { "epoch": 0.6554540865494929, "grad_norm": 0.45568475127220154, "learning_rate": 9.021610489009885e-06, "loss": 0.3764, "step": 14283 }, { "epoch": 0.6554999770547474, "grad_norm": 0.42102038860321045, "learning_rate": 9.021464795104108e-06, "loss": 0.3606, "step": 14284 }, { "epoch": 0.6555458675600019, "grad_norm": 0.45229196548461914, "learning_rate": 9.021319091527978e-06, "loss": 0.3937, "step": 14285 }, { "epoch": 0.6555917580652563, "grad_norm": 0.45309579372406006, "learning_rate": 9.021173378281845e-06, "loss": 0.3704, "step": 14286 }, { "epoch": 0.6556376485705108, "grad_norm": 0.47985339164733887, "learning_rate": 9.021027655366059e-06, "loss": 0.4844, "step": 14287 }, { "epoch": 0.6556835390757653, "grad_norm": 0.510222852230072, "learning_rate": 9.02088192278097e-06, "loss": 0.5047, "step": 14288 }, { "epoch": 0.6557294295810197, "grad_norm": 0.4962892532348633, "learning_rate": 9.020736180526928e-06, "loss": 0.4492, "step": 14289 }, { "epoch": 0.6557753200862741, "grad_norm": 0.4696367084980011, "learning_rate": 9.020590428604286e-06, "loss": 0.4638, "step": 14290 }, { "epoch": 0.6558212105915286, "grad_norm": 0.45266345143318176, "learning_rate": 9.020444667013392e-06, "loss": 0.3963, "step": 14291 }, { "epoch": 0.655867101096783, "grad_norm": 0.4869053363800049, "learning_rate": 9.0202988957546e-06, "loss": 0.4932, "step": 14292 }, { "epoch": 0.6559129916020375, "grad_norm": 0.44247767329216003, "learning_rate": 9.020153114828254e-06, "loss": 0.3328, "step": 14293 }, { "epoch": 0.655958882107292, "grad_norm": 0.4148915708065033, "learning_rate": 9.020007324234713e-06, "loss": 0.3048, "step": 14294 }, { "epoch": 0.6560047726125464, "grad_norm": 0.4607759416103363, "learning_rate": 9.01986152397432e-06, "loss": 0.4011, "step": 14295 }, { "epoch": 0.6560506631178009, "grad_norm": 0.4817531704902649, "learning_rate": 9.019715714047428e-06, "loss": 0.4129, "step": 14296 }, { "epoch": 0.6560965536230554, "grad_norm": 0.4889727234840393, "learning_rate": 9.01956989445439e-06, "loss": 0.4222, "step": 14297 }, { "epoch": 0.6561424441283098, "grad_norm": 0.4200547933578491, "learning_rate": 9.019424065195556e-06, "loss": 0.3608, "step": 14298 }, { "epoch": 0.6561883346335643, "grad_norm": 0.44878315925598145, "learning_rate": 9.019278226271272e-06, "loss": 0.3678, "step": 14299 }, { "epoch": 0.6562342251388188, "grad_norm": 0.48445549607276917, "learning_rate": 9.019132377681897e-06, "loss": 0.4833, "step": 14300 }, { "epoch": 0.6562801156440733, "grad_norm": 0.4433364272117615, "learning_rate": 9.018986519427776e-06, "loss": 0.3626, "step": 14301 }, { "epoch": 0.6563260061493277, "grad_norm": 0.4375712275505066, "learning_rate": 9.018840651509258e-06, "loss": 0.3851, "step": 14302 }, { "epoch": 0.6563718966545822, "grad_norm": 0.4861016273498535, "learning_rate": 9.0186947739267e-06, "loss": 0.416, "step": 14303 }, { "epoch": 0.6564177871598367, "grad_norm": 0.42424270510673523, "learning_rate": 9.018548886680447e-06, "loss": 0.3436, "step": 14304 }, { "epoch": 0.6564636776650911, "grad_norm": 0.4334239363670349, "learning_rate": 9.018402989770854e-06, "loss": 0.3579, "step": 14305 }, { "epoch": 0.6565095681703456, "grad_norm": 0.43622320890426636, "learning_rate": 9.018257083198267e-06, "loss": 0.3446, "step": 14306 }, { "epoch": 0.6565554586756001, "grad_norm": 0.4864494204521179, "learning_rate": 9.018111166963044e-06, "loss": 0.4219, "step": 14307 }, { "epoch": 0.6566013491808544, "grad_norm": 0.43667083978652954, "learning_rate": 9.01796524106553e-06, "loss": 0.3525, "step": 14308 }, { "epoch": 0.6566472396861089, "grad_norm": 0.45527803897857666, "learning_rate": 9.017819305506077e-06, "loss": 0.4047, "step": 14309 }, { "epoch": 0.6566931301913634, "grad_norm": 0.47500160336494446, "learning_rate": 9.017673360285037e-06, "loss": 0.4044, "step": 14310 }, { "epoch": 0.6567390206966178, "grad_norm": 0.45390433073043823, "learning_rate": 9.01752740540276e-06, "loss": 0.4165, "step": 14311 }, { "epoch": 0.6567849112018723, "grad_norm": 0.46602702140808105, "learning_rate": 9.017381440859598e-06, "loss": 0.4384, "step": 14312 }, { "epoch": 0.6568308017071268, "grad_norm": 0.4664005637168884, "learning_rate": 9.017235466655902e-06, "loss": 0.4154, "step": 14313 }, { "epoch": 0.6568766922123812, "grad_norm": 0.47409966588020325, "learning_rate": 9.017089482792023e-06, "loss": 0.4471, "step": 14314 }, { "epoch": 0.6569225827176357, "grad_norm": 0.45713138580322266, "learning_rate": 9.016943489268312e-06, "loss": 0.4411, "step": 14315 }, { "epoch": 0.6569684732228902, "grad_norm": 0.4602508544921875, "learning_rate": 9.016797486085119e-06, "loss": 0.4133, "step": 14316 }, { "epoch": 0.6570143637281447, "grad_norm": 0.46990537643432617, "learning_rate": 9.016651473242795e-06, "loss": 0.4098, "step": 14317 }, { "epoch": 0.6570602542333991, "grad_norm": 0.4914743900299072, "learning_rate": 9.016505450741693e-06, "loss": 0.449, "step": 14318 }, { "epoch": 0.6571061447386536, "grad_norm": 0.44506338238716125, "learning_rate": 9.016359418582161e-06, "loss": 0.378, "step": 14319 }, { "epoch": 0.6571520352439081, "grad_norm": 0.4237197935581207, "learning_rate": 9.016213376764554e-06, "loss": 0.3439, "step": 14320 }, { "epoch": 0.6571979257491625, "grad_norm": 0.4724003076553345, "learning_rate": 9.016067325289222e-06, "loss": 0.4165, "step": 14321 }, { "epoch": 0.657243816254417, "grad_norm": 0.47174176573753357, "learning_rate": 9.015921264156513e-06, "loss": 0.4175, "step": 14322 }, { "epoch": 0.6572897067596715, "grad_norm": 0.45713236927986145, "learning_rate": 9.015775193366784e-06, "loss": 0.3784, "step": 14323 }, { "epoch": 0.6573355972649259, "grad_norm": 0.43167969584465027, "learning_rate": 9.015629112920381e-06, "loss": 0.349, "step": 14324 }, { "epoch": 0.6573814877701804, "grad_norm": 0.4784649908542633, "learning_rate": 9.015483022817659e-06, "loss": 0.3854, "step": 14325 }, { "epoch": 0.6574273782754348, "grad_norm": 0.4564891755580902, "learning_rate": 9.015336923058965e-06, "loss": 0.406, "step": 14326 }, { "epoch": 0.6574732687806892, "grad_norm": 0.4788930118083954, "learning_rate": 9.015190813644654e-06, "loss": 0.4721, "step": 14327 }, { "epoch": 0.6575191592859437, "grad_norm": 0.4814184308052063, "learning_rate": 9.015044694575078e-06, "loss": 0.3603, "step": 14328 }, { "epoch": 0.6575650497911982, "grad_norm": 0.45181649923324585, "learning_rate": 9.014898565850584e-06, "loss": 0.3708, "step": 14329 }, { "epoch": 0.6576109402964526, "grad_norm": 0.497953861951828, "learning_rate": 9.014752427471529e-06, "loss": 0.4345, "step": 14330 }, { "epoch": 0.6576568308017071, "grad_norm": 0.47690749168395996, "learning_rate": 9.014606279438261e-06, "loss": 0.4239, "step": 14331 }, { "epoch": 0.6577027213069616, "grad_norm": 0.4662839472293854, "learning_rate": 9.01446012175113e-06, "loss": 0.3978, "step": 14332 }, { "epoch": 0.657748611812216, "grad_norm": 0.42566877603530884, "learning_rate": 9.014313954410492e-06, "loss": 0.3447, "step": 14333 }, { "epoch": 0.6577945023174705, "grad_norm": 0.44314879179000854, "learning_rate": 9.014167777416694e-06, "loss": 0.3501, "step": 14334 }, { "epoch": 0.657840392822725, "grad_norm": 0.4898779094219208, "learning_rate": 9.01402159077009e-06, "loss": 0.4714, "step": 14335 }, { "epoch": 0.6578862833279795, "grad_norm": 0.45057713985443115, "learning_rate": 9.01387539447103e-06, "loss": 0.3914, "step": 14336 }, { "epoch": 0.6579321738332339, "grad_norm": 0.41558974981307983, "learning_rate": 9.013729188519867e-06, "loss": 0.3315, "step": 14337 }, { "epoch": 0.6579780643384884, "grad_norm": 0.4637351632118225, "learning_rate": 9.013582972916951e-06, "loss": 0.4349, "step": 14338 }, { "epoch": 0.6580239548437429, "grad_norm": 0.4306354522705078, "learning_rate": 9.013436747662638e-06, "loss": 0.3611, "step": 14339 }, { "epoch": 0.6580698453489973, "grad_norm": 0.4567810297012329, "learning_rate": 9.013290512757273e-06, "loss": 0.4682, "step": 14340 }, { "epoch": 0.6581157358542518, "grad_norm": 0.45350244641304016, "learning_rate": 9.013144268201213e-06, "loss": 0.3851, "step": 14341 }, { "epoch": 0.6581616263595063, "grad_norm": 0.47463229298591614, "learning_rate": 9.012998013994807e-06, "loss": 0.4284, "step": 14342 }, { "epoch": 0.6582075168647606, "grad_norm": 0.44029948115348816, "learning_rate": 9.012851750138408e-06, "loss": 0.3701, "step": 14343 }, { "epoch": 0.6582534073700151, "grad_norm": 0.4572148323059082, "learning_rate": 9.012705476632364e-06, "loss": 0.3879, "step": 14344 }, { "epoch": 0.6582992978752696, "grad_norm": 0.43075481057167053, "learning_rate": 9.012559193477033e-06, "loss": 0.3397, "step": 14345 }, { "epoch": 0.658345188380524, "grad_norm": 0.47975191473960876, "learning_rate": 9.012412900672762e-06, "loss": 0.4115, "step": 14346 }, { "epoch": 0.6583910788857785, "grad_norm": 0.4551202356815338, "learning_rate": 9.012266598219906e-06, "loss": 0.3805, "step": 14347 }, { "epoch": 0.658436969391033, "grad_norm": 0.48552870750427246, "learning_rate": 9.012120286118813e-06, "loss": 0.4382, "step": 14348 }, { "epoch": 0.6584828598962874, "grad_norm": 0.4749126136302948, "learning_rate": 9.011973964369839e-06, "loss": 0.4695, "step": 14349 }, { "epoch": 0.6585287504015419, "grad_norm": 0.4661877453327179, "learning_rate": 9.011827632973332e-06, "loss": 0.3642, "step": 14350 }, { "epoch": 0.6585746409067964, "grad_norm": 0.4630640745162964, "learning_rate": 9.011681291929646e-06, "loss": 0.4108, "step": 14351 }, { "epoch": 0.6586205314120508, "grad_norm": 0.4235508441925049, "learning_rate": 9.011534941239135e-06, "loss": 0.3507, "step": 14352 }, { "epoch": 0.6586664219173053, "grad_norm": 0.43487074971199036, "learning_rate": 9.011388580902145e-06, "loss": 0.3318, "step": 14353 }, { "epoch": 0.6587123124225598, "grad_norm": 0.4923548400402069, "learning_rate": 9.011242210919033e-06, "loss": 0.5123, "step": 14354 }, { "epoch": 0.6587582029278143, "grad_norm": 0.4181765615940094, "learning_rate": 9.011095831290151e-06, "loss": 0.3352, "step": 14355 }, { "epoch": 0.6588040934330687, "grad_norm": 0.4527325928211212, "learning_rate": 9.010949442015849e-06, "loss": 0.4278, "step": 14356 }, { "epoch": 0.6588499839383232, "grad_norm": 0.47471654415130615, "learning_rate": 9.010803043096477e-06, "loss": 0.411, "step": 14357 }, { "epoch": 0.6588958744435777, "grad_norm": 0.47008365392684937, "learning_rate": 9.01065663453239e-06, "loss": 0.4118, "step": 14358 }, { "epoch": 0.658941764948832, "grad_norm": 0.4287841320037842, "learning_rate": 9.010510216323943e-06, "loss": 0.3199, "step": 14359 }, { "epoch": 0.6589876554540866, "grad_norm": 0.47300583124160767, "learning_rate": 9.010363788471481e-06, "loss": 0.4267, "step": 14360 }, { "epoch": 0.659033545959341, "grad_norm": 0.45789220929145813, "learning_rate": 9.010217350975361e-06, "loss": 0.373, "step": 14361 }, { "epoch": 0.6590794364645954, "grad_norm": 0.450854629278183, "learning_rate": 9.010070903835934e-06, "loss": 0.4102, "step": 14362 }, { "epoch": 0.6591253269698499, "grad_norm": 0.43615835905075073, "learning_rate": 9.009924447053553e-06, "loss": 0.3417, "step": 14363 }, { "epoch": 0.6591712174751044, "grad_norm": 0.4559480845928192, "learning_rate": 9.009777980628568e-06, "loss": 0.4037, "step": 14364 }, { "epoch": 0.6592171079803588, "grad_norm": 0.45068272948265076, "learning_rate": 9.009631504561334e-06, "loss": 0.3763, "step": 14365 }, { "epoch": 0.6592629984856133, "grad_norm": 0.4611908793449402, "learning_rate": 9.0094850188522e-06, "loss": 0.4163, "step": 14366 }, { "epoch": 0.6593088889908678, "grad_norm": 0.45859256386756897, "learning_rate": 9.009338523501521e-06, "loss": 0.3631, "step": 14367 }, { "epoch": 0.6593547794961222, "grad_norm": 0.47314950823783875, "learning_rate": 9.009192018509647e-06, "loss": 0.4006, "step": 14368 }, { "epoch": 0.6594006700013767, "grad_norm": 0.48570716381073, "learning_rate": 9.009045503876932e-06, "loss": 0.4335, "step": 14369 }, { "epoch": 0.6594465605066312, "grad_norm": 0.49148470163345337, "learning_rate": 9.008898979603728e-06, "loss": 0.4471, "step": 14370 }, { "epoch": 0.6594924510118857, "grad_norm": 0.46235740184783936, "learning_rate": 9.008752445690388e-06, "loss": 0.3753, "step": 14371 }, { "epoch": 0.6595383415171401, "grad_norm": 0.49642324447631836, "learning_rate": 9.008605902137263e-06, "loss": 0.511, "step": 14372 }, { "epoch": 0.6595842320223946, "grad_norm": 0.4772595167160034, "learning_rate": 9.008459348944706e-06, "loss": 0.4478, "step": 14373 }, { "epoch": 0.6596301225276491, "grad_norm": 0.447195440530777, "learning_rate": 9.00831278611307e-06, "loss": 0.3771, "step": 14374 }, { "epoch": 0.6596760130329035, "grad_norm": 0.48210033774375916, "learning_rate": 9.008166213642705e-06, "loss": 0.3939, "step": 14375 }, { "epoch": 0.659721903538158, "grad_norm": 0.49801814556121826, "learning_rate": 9.008019631533967e-06, "loss": 0.4349, "step": 14376 }, { "epoch": 0.6597677940434125, "grad_norm": 0.513995885848999, "learning_rate": 9.007873039787206e-06, "loss": 0.4538, "step": 14377 }, { "epoch": 0.6598136845486668, "grad_norm": 0.46197929978370667, "learning_rate": 9.007726438402776e-06, "loss": 0.3815, "step": 14378 }, { "epoch": 0.6598595750539213, "grad_norm": 0.47664257884025574, "learning_rate": 9.007579827381028e-06, "loss": 0.4159, "step": 14379 }, { "epoch": 0.6599054655591758, "grad_norm": 0.4619807004928589, "learning_rate": 9.007433206722316e-06, "loss": 0.3946, "step": 14380 }, { "epoch": 0.6599513560644302, "grad_norm": 0.4921075403690338, "learning_rate": 9.00728657642699e-06, "loss": 0.473, "step": 14381 }, { "epoch": 0.6599972465696847, "grad_norm": 0.4531581997871399, "learning_rate": 9.007139936495406e-06, "loss": 0.3653, "step": 14382 }, { "epoch": 0.6600431370749392, "grad_norm": 0.5287548899650574, "learning_rate": 9.006993286927916e-06, "loss": 0.4458, "step": 14383 }, { "epoch": 0.6600890275801936, "grad_norm": 0.4318279027938843, "learning_rate": 9.00684662772487e-06, "loss": 0.3566, "step": 14384 }, { "epoch": 0.6601349180854481, "grad_norm": 0.5398879647254944, "learning_rate": 9.006699958886625e-06, "loss": 0.3729, "step": 14385 }, { "epoch": 0.6601808085907026, "grad_norm": 0.4714904725551605, "learning_rate": 9.006553280413528e-06, "loss": 0.488, "step": 14386 }, { "epoch": 0.660226699095957, "grad_norm": 0.4445558190345764, "learning_rate": 9.006406592305936e-06, "loss": 0.3829, "step": 14387 }, { "epoch": 0.6602725896012115, "grad_norm": 0.5022744536399841, "learning_rate": 9.006259894564203e-06, "loss": 0.4582, "step": 14388 }, { "epoch": 0.660318480106466, "grad_norm": 0.4796127676963806, "learning_rate": 9.006113187188676e-06, "loss": 0.3501, "step": 14389 }, { "epoch": 0.6603643706117205, "grad_norm": 0.4823330044746399, "learning_rate": 9.005966470179712e-06, "loss": 0.4156, "step": 14390 }, { "epoch": 0.6604102611169749, "grad_norm": 0.4508378803730011, "learning_rate": 9.005819743537666e-06, "loss": 0.3661, "step": 14391 }, { "epoch": 0.6604561516222294, "grad_norm": 0.44636479020118713, "learning_rate": 9.005673007262883e-06, "loss": 0.3417, "step": 14392 }, { "epoch": 0.6605020421274839, "grad_norm": 0.4510963559150696, "learning_rate": 9.005526261355724e-06, "loss": 0.3894, "step": 14393 }, { "epoch": 0.6605479326327383, "grad_norm": 0.4635971188545227, "learning_rate": 9.005379505816537e-06, "loss": 0.4203, "step": 14394 }, { "epoch": 0.6605938231379928, "grad_norm": 0.4718793034553528, "learning_rate": 9.005232740645677e-06, "loss": 0.361, "step": 14395 }, { "epoch": 0.6606397136432473, "grad_norm": 0.46480438113212585, "learning_rate": 9.005085965843496e-06, "loss": 0.4049, "step": 14396 }, { "epoch": 0.6606856041485016, "grad_norm": 0.41840851306915283, "learning_rate": 9.004939181410348e-06, "loss": 0.3314, "step": 14397 }, { "epoch": 0.6607314946537561, "grad_norm": 0.4678163528442383, "learning_rate": 9.004792387346583e-06, "loss": 0.4545, "step": 14398 }, { "epoch": 0.6607773851590106, "grad_norm": 0.4513968229293823, "learning_rate": 9.004645583652558e-06, "loss": 0.3961, "step": 14399 }, { "epoch": 0.660823275664265, "grad_norm": 0.47312647104263306, "learning_rate": 9.004498770328626e-06, "loss": 0.4601, "step": 14400 }, { "epoch": 0.6608691661695195, "grad_norm": 0.4664294719696045, "learning_rate": 9.004351947375136e-06, "loss": 0.4182, "step": 14401 }, { "epoch": 0.660915056674774, "grad_norm": 0.46030306816101074, "learning_rate": 9.004205114792444e-06, "loss": 0.4505, "step": 14402 }, { "epoch": 0.6609609471800284, "grad_norm": 0.45685482025146484, "learning_rate": 9.004058272580902e-06, "loss": 0.3583, "step": 14403 }, { "epoch": 0.6610068376852829, "grad_norm": 0.49850285053253174, "learning_rate": 9.003911420740866e-06, "loss": 0.4697, "step": 14404 }, { "epoch": 0.6610527281905374, "grad_norm": 0.4729253351688385, "learning_rate": 9.003764559272684e-06, "loss": 0.4314, "step": 14405 }, { "epoch": 0.6610986186957919, "grad_norm": 0.4743448793888092, "learning_rate": 9.003617688176712e-06, "loss": 0.4117, "step": 14406 }, { "epoch": 0.6611445092010463, "grad_norm": 0.44755828380584717, "learning_rate": 9.003470807453304e-06, "loss": 0.3448, "step": 14407 }, { "epoch": 0.6611903997063008, "grad_norm": 0.42651161551475525, "learning_rate": 9.003323917102811e-06, "loss": 0.3361, "step": 14408 }, { "epoch": 0.6612362902115553, "grad_norm": 0.41352325677871704, "learning_rate": 9.003177017125589e-06, "loss": 0.3552, "step": 14409 }, { "epoch": 0.6612821807168097, "grad_norm": 0.5120527744293213, "learning_rate": 9.003030107521988e-06, "loss": 0.4965, "step": 14410 }, { "epoch": 0.6613280712220642, "grad_norm": 0.5948606729507446, "learning_rate": 9.002883188292365e-06, "loss": 0.4096, "step": 14411 }, { "epoch": 0.6613739617273187, "grad_norm": 0.5069320797920227, "learning_rate": 9.00273625943707e-06, "loss": 0.4442, "step": 14412 }, { "epoch": 0.661419852232573, "grad_norm": 0.44752973318099976, "learning_rate": 9.002589320956458e-06, "loss": 0.3978, "step": 14413 }, { "epoch": 0.6614657427378275, "grad_norm": 0.4588022530078888, "learning_rate": 9.002442372850881e-06, "loss": 0.4436, "step": 14414 }, { "epoch": 0.661511633243082, "grad_norm": 0.5019850134849548, "learning_rate": 9.002295415120695e-06, "loss": 0.4936, "step": 14415 }, { "epoch": 0.6615575237483364, "grad_norm": 0.49573180079460144, "learning_rate": 9.00214844776625e-06, "loss": 0.5295, "step": 14416 }, { "epoch": 0.6616034142535909, "grad_norm": 0.5096704959869385, "learning_rate": 9.002001470787902e-06, "loss": 0.5112, "step": 14417 }, { "epoch": 0.6616493047588454, "grad_norm": 0.4680679142475128, "learning_rate": 9.001854484186005e-06, "loss": 0.3913, "step": 14418 }, { "epoch": 0.6616951952640998, "grad_norm": 0.4610459506511688, "learning_rate": 9.001707487960908e-06, "loss": 0.3951, "step": 14419 }, { "epoch": 0.6617410857693543, "grad_norm": 0.4382835626602173, "learning_rate": 9.00156048211297e-06, "loss": 0.3489, "step": 14420 }, { "epoch": 0.6617869762746088, "grad_norm": 0.4403751492500305, "learning_rate": 9.00141346664254e-06, "loss": 0.3629, "step": 14421 }, { "epoch": 0.6618328667798632, "grad_norm": 0.48926788568496704, "learning_rate": 9.001266441549975e-06, "loss": 0.474, "step": 14422 }, { "epoch": 0.6618787572851177, "grad_norm": 0.49508532881736755, "learning_rate": 9.001119406835626e-06, "loss": 0.4787, "step": 14423 }, { "epoch": 0.6619246477903722, "grad_norm": 0.4106306731700897, "learning_rate": 9.000972362499848e-06, "loss": 0.3086, "step": 14424 }, { "epoch": 0.6619705382956267, "grad_norm": 0.43695786595344543, "learning_rate": 9.000825308542995e-06, "loss": 0.3671, "step": 14425 }, { "epoch": 0.6620164288008811, "grad_norm": 0.46513378620147705, "learning_rate": 9.000678244965418e-06, "loss": 0.441, "step": 14426 }, { "epoch": 0.6620623193061356, "grad_norm": 0.47764840722084045, "learning_rate": 9.000531171767473e-06, "loss": 0.4182, "step": 14427 }, { "epoch": 0.6621082098113901, "grad_norm": 0.4981585144996643, "learning_rate": 9.000384088949514e-06, "loss": 0.4649, "step": 14428 }, { "epoch": 0.6621541003166445, "grad_norm": 0.4998481869697571, "learning_rate": 9.000236996511895e-06, "loss": 0.4607, "step": 14429 }, { "epoch": 0.662199990821899, "grad_norm": 0.41319283843040466, "learning_rate": 9.000089894454966e-06, "loss": 0.3318, "step": 14430 }, { "epoch": 0.6622458813271535, "grad_norm": 0.449195921421051, "learning_rate": 8.999942782779086e-06, "loss": 0.3836, "step": 14431 }, { "epoch": 0.6622917718324078, "grad_norm": 0.4686277508735657, "learning_rate": 8.999795661484603e-06, "loss": 0.4201, "step": 14432 }, { "epoch": 0.6623376623376623, "grad_norm": 0.5053922533988953, "learning_rate": 8.999648530571874e-06, "loss": 0.422, "step": 14433 }, { "epoch": 0.6623835528429168, "grad_norm": 0.4889240264892578, "learning_rate": 8.999501390041254e-06, "loss": 0.4721, "step": 14434 }, { "epoch": 0.6624294433481712, "grad_norm": 0.461119145154953, "learning_rate": 8.999354239893095e-06, "loss": 0.3945, "step": 14435 }, { "epoch": 0.6624753338534257, "grad_norm": 0.46428972482681274, "learning_rate": 8.999207080127752e-06, "loss": 0.456, "step": 14436 }, { "epoch": 0.6625212243586802, "grad_norm": 0.4733748137950897, "learning_rate": 8.999059910745577e-06, "loss": 0.4364, "step": 14437 }, { "epoch": 0.6625671148639346, "grad_norm": 0.45619314908981323, "learning_rate": 8.998912731746927e-06, "loss": 0.3941, "step": 14438 }, { "epoch": 0.6626130053691891, "grad_norm": 0.4507034718990326, "learning_rate": 8.998765543132153e-06, "loss": 0.4018, "step": 14439 }, { "epoch": 0.6626588958744436, "grad_norm": 0.4502747654914856, "learning_rate": 8.998618344901609e-06, "loss": 0.4111, "step": 14440 }, { "epoch": 0.662704786379698, "grad_norm": 0.4471191167831421, "learning_rate": 8.99847113705565e-06, "loss": 0.3817, "step": 14441 }, { "epoch": 0.6627506768849525, "grad_norm": 0.45347607135772705, "learning_rate": 8.99832391959463e-06, "loss": 0.3988, "step": 14442 }, { "epoch": 0.662796567390207, "grad_norm": 0.4385649561882019, "learning_rate": 8.998176692518904e-06, "loss": 0.3471, "step": 14443 }, { "epoch": 0.6628424578954615, "grad_norm": 0.43591105937957764, "learning_rate": 8.998029455828822e-06, "loss": 0.3484, "step": 14444 }, { "epoch": 0.6628883484007159, "grad_norm": 0.46470215916633606, "learning_rate": 8.997882209524745e-06, "loss": 0.447, "step": 14445 }, { "epoch": 0.6629342389059704, "grad_norm": 0.44905996322631836, "learning_rate": 8.99773495360702e-06, "loss": 0.4161, "step": 14446 }, { "epoch": 0.6629801294112249, "grad_norm": 0.4456089735031128, "learning_rate": 8.997587688076006e-06, "loss": 0.3882, "step": 14447 }, { "epoch": 0.6630260199164792, "grad_norm": 0.4242366850376129, "learning_rate": 8.997440412932054e-06, "loss": 0.3331, "step": 14448 }, { "epoch": 0.6630719104217337, "grad_norm": 0.44686028361320496, "learning_rate": 8.997293128175521e-06, "loss": 0.4227, "step": 14449 }, { "epoch": 0.6631178009269882, "grad_norm": 0.45099058747291565, "learning_rate": 8.997145833806759e-06, "loss": 0.3418, "step": 14450 }, { "epoch": 0.6631636914322426, "grad_norm": 0.4072268307209015, "learning_rate": 8.996998529826122e-06, "loss": 0.2773, "step": 14451 }, { "epoch": 0.6632095819374971, "grad_norm": 0.44425255060195923, "learning_rate": 8.996851216233967e-06, "loss": 0.3559, "step": 14452 }, { "epoch": 0.6632554724427516, "grad_norm": 0.46948766708374023, "learning_rate": 8.996703893030642e-06, "loss": 0.4116, "step": 14453 }, { "epoch": 0.663301362948006, "grad_norm": 0.9936320185661316, "learning_rate": 8.99655656021651e-06, "loss": 0.4005, "step": 14454 }, { "epoch": 0.6633472534532605, "grad_norm": 0.45868706703186035, "learning_rate": 8.996409217791919e-06, "loss": 0.412, "step": 14455 }, { "epoch": 0.663393143958515, "grad_norm": 0.4456869661808014, "learning_rate": 8.996261865757224e-06, "loss": 0.3545, "step": 14456 }, { "epoch": 0.6634390344637694, "grad_norm": 0.4670179784297943, "learning_rate": 8.996114504112784e-06, "loss": 0.3862, "step": 14457 }, { "epoch": 0.6634849249690239, "grad_norm": 0.47607988119125366, "learning_rate": 8.995967132858945e-06, "loss": 0.45, "step": 14458 }, { "epoch": 0.6635308154742784, "grad_norm": 0.4479120373725891, "learning_rate": 8.995819751996069e-06, "loss": 0.3858, "step": 14459 }, { "epoch": 0.6635767059795329, "grad_norm": 0.5265428423881531, "learning_rate": 8.995672361524508e-06, "loss": 0.5329, "step": 14460 }, { "epoch": 0.6636225964847873, "grad_norm": 0.4702661335468292, "learning_rate": 8.995524961444614e-06, "loss": 0.4157, "step": 14461 }, { "epoch": 0.6636684869900418, "grad_norm": 0.4336968958377838, "learning_rate": 8.995377551756746e-06, "loss": 0.3322, "step": 14462 }, { "epoch": 0.6637143774952963, "grad_norm": 0.4466639459133148, "learning_rate": 8.995230132461254e-06, "loss": 0.3634, "step": 14463 }, { "epoch": 0.6637602680005507, "grad_norm": 0.4759974479675293, "learning_rate": 8.995082703558496e-06, "loss": 0.4997, "step": 14464 }, { "epoch": 0.6638061585058052, "grad_norm": 0.44881126284599304, "learning_rate": 8.994935265048823e-06, "loss": 0.4193, "step": 14465 }, { "epoch": 0.6638520490110597, "grad_norm": 0.45830437541007996, "learning_rate": 8.994787816932595e-06, "loss": 0.4268, "step": 14466 }, { "epoch": 0.663897939516314, "grad_norm": 0.4368213415145874, "learning_rate": 8.99464035921016e-06, "loss": 0.4081, "step": 14467 }, { "epoch": 0.6639438300215685, "grad_norm": 0.5081778764724731, "learning_rate": 8.994492891881876e-06, "loss": 0.3771, "step": 14468 }, { "epoch": 0.663989720526823, "grad_norm": 0.46657243371009827, "learning_rate": 8.994345414948098e-06, "loss": 0.3276, "step": 14469 }, { "epoch": 0.6640356110320774, "grad_norm": 0.433438777923584, "learning_rate": 8.99419792840918e-06, "loss": 0.3954, "step": 14470 }, { "epoch": 0.6640815015373319, "grad_norm": 0.42769020795822144, "learning_rate": 8.994050432265476e-06, "loss": 0.3323, "step": 14471 }, { "epoch": 0.6641273920425864, "grad_norm": 0.49134954810142517, "learning_rate": 8.99390292651734e-06, "loss": 0.5142, "step": 14472 }, { "epoch": 0.6641732825478408, "grad_norm": 0.458547979593277, "learning_rate": 8.993755411165131e-06, "loss": 0.4146, "step": 14473 }, { "epoch": 0.6642191730530953, "grad_norm": 0.5217342376708984, "learning_rate": 8.9936078862092e-06, "loss": 0.406, "step": 14474 }, { "epoch": 0.6642650635583498, "grad_norm": 0.4476647675037384, "learning_rate": 8.993460351649902e-06, "loss": 0.385, "step": 14475 }, { "epoch": 0.6643109540636042, "grad_norm": 0.4613754153251648, "learning_rate": 8.993312807487591e-06, "loss": 0.3942, "step": 14476 }, { "epoch": 0.6643568445688587, "grad_norm": 0.46307024359703064, "learning_rate": 8.993165253722624e-06, "loss": 0.3989, "step": 14477 }, { "epoch": 0.6644027350741132, "grad_norm": 0.4667181968688965, "learning_rate": 8.993017690355355e-06, "loss": 0.3628, "step": 14478 }, { "epoch": 0.6644486255793677, "grad_norm": 0.4860840439796448, "learning_rate": 8.992870117386138e-06, "loss": 0.3664, "step": 14479 }, { "epoch": 0.6644945160846221, "grad_norm": 0.42400819063186646, "learning_rate": 8.99272253481533e-06, "loss": 0.3631, "step": 14480 }, { "epoch": 0.6645404065898766, "grad_norm": 0.4680634140968323, "learning_rate": 8.992574942643283e-06, "loss": 0.3683, "step": 14481 }, { "epoch": 0.6645862970951311, "grad_norm": 0.46209123730659485, "learning_rate": 8.992427340870355e-06, "loss": 0.428, "step": 14482 }, { "epoch": 0.6646321876003854, "grad_norm": 0.4881763458251953, "learning_rate": 8.992279729496898e-06, "loss": 0.4533, "step": 14483 }, { "epoch": 0.66467807810564, "grad_norm": 0.43561673164367676, "learning_rate": 8.992132108523268e-06, "loss": 0.3243, "step": 14484 }, { "epoch": 0.6647239686108944, "grad_norm": 0.474380761384964, "learning_rate": 8.99198447794982e-06, "loss": 0.4367, "step": 14485 }, { "epoch": 0.6647698591161488, "grad_norm": 0.44446250796318054, "learning_rate": 8.991836837776911e-06, "loss": 0.4074, "step": 14486 }, { "epoch": 0.6648157496214033, "grad_norm": 0.4481843113899231, "learning_rate": 8.991689188004892e-06, "loss": 0.3712, "step": 14487 }, { "epoch": 0.6648616401266578, "grad_norm": 0.44498389959335327, "learning_rate": 8.991541528634122e-06, "loss": 0.4235, "step": 14488 }, { "epoch": 0.6649075306319122, "grad_norm": 0.48705291748046875, "learning_rate": 8.991393859664954e-06, "loss": 0.4217, "step": 14489 }, { "epoch": 0.6649534211371667, "grad_norm": 0.46249619126319885, "learning_rate": 8.991246181097743e-06, "loss": 0.3389, "step": 14490 }, { "epoch": 0.6649993116424212, "grad_norm": 0.4388614296913147, "learning_rate": 8.991098492932845e-06, "loss": 0.4142, "step": 14491 }, { "epoch": 0.6650452021476756, "grad_norm": 0.41345667839050293, "learning_rate": 8.990950795170616e-06, "loss": 0.3253, "step": 14492 }, { "epoch": 0.6650910926529301, "grad_norm": 0.4584944546222687, "learning_rate": 8.990803087811408e-06, "loss": 0.3939, "step": 14493 }, { "epoch": 0.6651369831581846, "grad_norm": 0.5011733174324036, "learning_rate": 8.990655370855579e-06, "loss": 0.4773, "step": 14494 }, { "epoch": 0.6651828736634391, "grad_norm": 0.45549070835113525, "learning_rate": 8.990507644303483e-06, "loss": 0.4169, "step": 14495 }, { "epoch": 0.6652287641686935, "grad_norm": 0.4474542438983917, "learning_rate": 8.990359908155476e-06, "loss": 0.3782, "step": 14496 }, { "epoch": 0.665274654673948, "grad_norm": 0.4395194351673126, "learning_rate": 8.990212162411914e-06, "loss": 0.3451, "step": 14497 }, { "epoch": 0.6653205451792025, "grad_norm": 0.4599972367286682, "learning_rate": 8.990064407073149e-06, "loss": 0.3913, "step": 14498 }, { "epoch": 0.6653664356844569, "grad_norm": 0.4522850811481476, "learning_rate": 8.989916642139539e-06, "loss": 0.3441, "step": 14499 }, { "epoch": 0.6654123261897114, "grad_norm": 0.4209553897380829, "learning_rate": 8.989768867611439e-06, "loss": 0.3208, "step": 14500 }, { "epoch": 0.6654582166949659, "grad_norm": 0.42573878169059753, "learning_rate": 8.989621083489202e-06, "loss": 0.3388, "step": 14501 }, { "epoch": 0.6655041072002202, "grad_norm": 0.469412237405777, "learning_rate": 8.989473289773187e-06, "loss": 0.4739, "step": 14502 }, { "epoch": 0.6655499977054747, "grad_norm": 0.4056329131126404, "learning_rate": 8.98932548646375e-06, "loss": 0.2768, "step": 14503 }, { "epoch": 0.6655958882107292, "grad_norm": 0.47525283694267273, "learning_rate": 8.98917767356124e-06, "loss": 0.4509, "step": 14504 }, { "epoch": 0.6656417787159836, "grad_norm": 0.4655442237854004, "learning_rate": 8.989029851066021e-06, "loss": 0.4029, "step": 14505 }, { "epoch": 0.6656876692212381, "grad_norm": 0.4710235595703125, "learning_rate": 8.988882018978443e-06, "loss": 0.3992, "step": 14506 }, { "epoch": 0.6657335597264926, "grad_norm": 0.518703818321228, "learning_rate": 8.98873417729886e-06, "loss": 0.4532, "step": 14507 }, { "epoch": 0.665779450231747, "grad_norm": 0.47108733654022217, "learning_rate": 8.988586326027632e-06, "loss": 0.4316, "step": 14508 }, { "epoch": 0.6658253407370015, "grad_norm": 0.4415122866630554, "learning_rate": 8.988438465165113e-06, "loss": 0.4086, "step": 14509 }, { "epoch": 0.665871231242256, "grad_norm": 0.4599289000034332, "learning_rate": 8.988290594711659e-06, "loss": 0.3222, "step": 14510 }, { "epoch": 0.6659171217475104, "grad_norm": 0.4541757106781006, "learning_rate": 8.988142714667623e-06, "loss": 0.4082, "step": 14511 }, { "epoch": 0.6659630122527649, "grad_norm": 0.44427844882011414, "learning_rate": 8.987994825033363e-06, "loss": 0.361, "step": 14512 }, { "epoch": 0.6660089027580194, "grad_norm": 0.42946553230285645, "learning_rate": 8.987846925809235e-06, "loss": 0.3422, "step": 14513 }, { "epoch": 0.6660547932632739, "grad_norm": 0.56458979845047, "learning_rate": 8.987699016995592e-06, "loss": 0.5755, "step": 14514 }, { "epoch": 0.6661006837685283, "grad_norm": 0.4725033640861511, "learning_rate": 8.987551098592795e-06, "loss": 0.4118, "step": 14515 }, { "epoch": 0.6661465742737828, "grad_norm": 0.46170443296432495, "learning_rate": 8.987403170601193e-06, "loss": 0.3907, "step": 14516 }, { "epoch": 0.6661924647790373, "grad_norm": 0.4450153410434723, "learning_rate": 8.987255233021145e-06, "loss": 0.3392, "step": 14517 }, { "epoch": 0.6662383552842917, "grad_norm": 0.4926857650279999, "learning_rate": 8.987107285853008e-06, "loss": 0.5061, "step": 14518 }, { "epoch": 0.6662842457895461, "grad_norm": 0.44132015109062195, "learning_rate": 8.986959329097135e-06, "loss": 0.369, "step": 14519 }, { "epoch": 0.6663301362948006, "grad_norm": 0.46381857991218567, "learning_rate": 8.986811362753884e-06, "loss": 0.4105, "step": 14520 }, { "epoch": 0.666376026800055, "grad_norm": 0.450851708650589, "learning_rate": 8.98666338682361e-06, "loss": 0.3662, "step": 14521 }, { "epoch": 0.6664219173053095, "grad_norm": 0.9633417725563049, "learning_rate": 8.986515401306669e-06, "loss": 0.4472, "step": 14522 }, { "epoch": 0.666467807810564, "grad_norm": 0.4649474620819092, "learning_rate": 8.986367406203416e-06, "loss": 0.3965, "step": 14523 }, { "epoch": 0.6665136983158184, "grad_norm": 0.4647963345050812, "learning_rate": 8.986219401514207e-06, "loss": 0.4213, "step": 14524 }, { "epoch": 0.6665595888210729, "grad_norm": 0.46133482456207275, "learning_rate": 8.986071387239399e-06, "loss": 0.3496, "step": 14525 }, { "epoch": 0.6666054793263274, "grad_norm": 0.44894808530807495, "learning_rate": 8.985923363379348e-06, "loss": 0.3457, "step": 14526 }, { "epoch": 0.6666513698315818, "grad_norm": 0.46966975927352905, "learning_rate": 8.985775329934408e-06, "loss": 0.4401, "step": 14527 }, { "epoch": 0.6666972603368363, "grad_norm": 0.4736202657222748, "learning_rate": 8.985627286904936e-06, "loss": 0.4353, "step": 14528 }, { "epoch": 0.6667431508420908, "grad_norm": 0.48000848293304443, "learning_rate": 8.98547923429129e-06, "loss": 0.4629, "step": 14529 }, { "epoch": 0.6667890413473452, "grad_norm": 0.39614972472190857, "learning_rate": 8.985331172093823e-06, "loss": 0.2836, "step": 14530 }, { "epoch": 0.6668349318525997, "grad_norm": 0.5184483528137207, "learning_rate": 8.985183100312892e-06, "loss": 0.4443, "step": 14531 }, { "epoch": 0.6668808223578542, "grad_norm": 0.502870500087738, "learning_rate": 8.985035018948856e-06, "loss": 0.4438, "step": 14532 }, { "epoch": 0.6669267128631087, "grad_norm": 0.45267996191978455, "learning_rate": 8.984886928002066e-06, "loss": 0.4024, "step": 14533 }, { "epoch": 0.6669726033683631, "grad_norm": 0.4268811345100403, "learning_rate": 8.98473882747288e-06, "loss": 0.3448, "step": 14534 }, { "epoch": 0.6670184938736176, "grad_norm": 0.4367494583129883, "learning_rate": 8.984590717361657e-06, "loss": 0.3023, "step": 14535 }, { "epoch": 0.667064384378872, "grad_norm": 0.5010473728179932, "learning_rate": 8.98444259766875e-06, "loss": 0.4966, "step": 14536 }, { "epoch": 0.6671102748841264, "grad_norm": 0.4206502437591553, "learning_rate": 8.984294468394517e-06, "loss": 0.3492, "step": 14537 }, { "epoch": 0.6671561653893809, "grad_norm": 0.4631282389163971, "learning_rate": 8.984146329539312e-06, "loss": 0.4074, "step": 14538 }, { "epoch": 0.6672020558946354, "grad_norm": 0.4614452123641968, "learning_rate": 8.983998181103493e-06, "loss": 0.4561, "step": 14539 }, { "epoch": 0.6672479463998898, "grad_norm": 0.4451823830604553, "learning_rate": 8.983850023087416e-06, "loss": 0.4328, "step": 14540 }, { "epoch": 0.6672938369051443, "grad_norm": 0.4404507875442505, "learning_rate": 8.983701855491436e-06, "loss": 0.3616, "step": 14541 }, { "epoch": 0.6673397274103988, "grad_norm": 0.4492647349834442, "learning_rate": 8.98355367831591e-06, "loss": 0.3417, "step": 14542 }, { "epoch": 0.6673856179156532, "grad_norm": 0.4315362870693207, "learning_rate": 8.983405491561197e-06, "loss": 0.3581, "step": 14543 }, { "epoch": 0.6674315084209077, "grad_norm": 0.4772603511810303, "learning_rate": 8.98325729522765e-06, "loss": 0.3932, "step": 14544 }, { "epoch": 0.6674773989261622, "grad_norm": 0.5019184350967407, "learning_rate": 8.983109089315625e-06, "loss": 0.4983, "step": 14545 }, { "epoch": 0.6675232894314166, "grad_norm": 0.49283310770988464, "learning_rate": 8.982960873825481e-06, "loss": 0.4789, "step": 14546 }, { "epoch": 0.6675691799366711, "grad_norm": 0.4469640851020813, "learning_rate": 8.982812648757573e-06, "loss": 0.365, "step": 14547 }, { "epoch": 0.6676150704419256, "grad_norm": 0.5307384729385376, "learning_rate": 8.982664414112258e-06, "loss": 0.5309, "step": 14548 }, { "epoch": 0.6676609609471801, "grad_norm": 0.4557315707206726, "learning_rate": 8.98251616988989e-06, "loss": 0.3406, "step": 14549 }, { "epoch": 0.6677068514524345, "grad_norm": 0.42902955412864685, "learning_rate": 8.98236791609083e-06, "loss": 0.3635, "step": 14550 }, { "epoch": 0.667752741957689, "grad_norm": 0.45770910382270813, "learning_rate": 8.982219652715431e-06, "loss": 0.3486, "step": 14551 }, { "epoch": 0.6677986324629435, "grad_norm": 0.3997988998889923, "learning_rate": 8.98207137976405e-06, "loss": 0.2828, "step": 14552 }, { "epoch": 0.6678445229681979, "grad_norm": 0.39941275119781494, "learning_rate": 8.981923097237047e-06, "loss": 0.2977, "step": 14553 }, { "epoch": 0.6678904134734523, "grad_norm": 0.4932500422000885, "learning_rate": 8.981774805134772e-06, "loss": 0.4292, "step": 14554 }, { "epoch": 0.6679363039787068, "grad_norm": 0.4331819415092468, "learning_rate": 8.981626503457586e-06, "loss": 0.3473, "step": 14555 }, { "epoch": 0.6679821944839612, "grad_norm": 0.49689358472824097, "learning_rate": 8.981478192205845e-06, "loss": 0.4922, "step": 14556 }, { "epoch": 0.6680280849892157, "grad_norm": 0.5731780529022217, "learning_rate": 8.981329871379907e-06, "loss": 0.4214, "step": 14557 }, { "epoch": 0.6680739754944702, "grad_norm": 0.443897545337677, "learning_rate": 8.981181540980126e-06, "loss": 0.4044, "step": 14558 }, { "epoch": 0.6681198659997246, "grad_norm": 0.4272734820842743, "learning_rate": 8.98103320100686e-06, "loss": 0.3732, "step": 14559 }, { "epoch": 0.6681657565049791, "grad_norm": 0.4343867301940918, "learning_rate": 8.980884851460467e-06, "loss": 0.3385, "step": 14560 }, { "epoch": 0.6682116470102336, "grad_norm": 0.46007993817329407, "learning_rate": 8.980736492341301e-06, "loss": 0.4155, "step": 14561 }, { "epoch": 0.668257537515488, "grad_norm": 0.4299311935901642, "learning_rate": 8.980588123649721e-06, "loss": 0.3426, "step": 14562 }, { "epoch": 0.6683034280207425, "grad_norm": 0.4787943661212921, "learning_rate": 8.980439745386081e-06, "loss": 0.4455, "step": 14563 }, { "epoch": 0.668349318525997, "grad_norm": 0.46967437863349915, "learning_rate": 8.980291357550742e-06, "loss": 0.4884, "step": 14564 }, { "epoch": 0.6683952090312514, "grad_norm": 0.44886350631713867, "learning_rate": 8.980142960144057e-06, "loss": 0.3801, "step": 14565 }, { "epoch": 0.6684410995365059, "grad_norm": 0.4726877808570862, "learning_rate": 8.979994553166385e-06, "loss": 0.3976, "step": 14566 }, { "epoch": 0.6684869900417604, "grad_norm": 0.4741431474685669, "learning_rate": 8.97984613661808e-06, "loss": 0.4569, "step": 14567 }, { "epoch": 0.6685328805470149, "grad_norm": 0.45089778304100037, "learning_rate": 8.979697710499505e-06, "loss": 0.409, "step": 14568 }, { "epoch": 0.6685787710522693, "grad_norm": 0.4743574857711792, "learning_rate": 8.97954927481101e-06, "loss": 0.4622, "step": 14569 }, { "epoch": 0.6686246615575238, "grad_norm": 0.48954150080680847, "learning_rate": 8.979400829552956e-06, "loss": 0.4292, "step": 14570 }, { "epoch": 0.6686705520627783, "grad_norm": 0.44962358474731445, "learning_rate": 8.9792523747257e-06, "loss": 0.4257, "step": 14571 }, { "epoch": 0.6687164425680326, "grad_norm": 0.4855929911136627, "learning_rate": 8.979103910329597e-06, "loss": 0.4557, "step": 14572 }, { "epoch": 0.6687623330732871, "grad_norm": 0.4931475520133972, "learning_rate": 8.978955436365005e-06, "loss": 0.4891, "step": 14573 }, { "epoch": 0.6688082235785416, "grad_norm": 0.4573855996131897, "learning_rate": 8.97880695283228e-06, "loss": 0.3818, "step": 14574 }, { "epoch": 0.668854114083796, "grad_norm": 0.45578575134277344, "learning_rate": 8.978658459731782e-06, "loss": 0.3665, "step": 14575 }, { "epoch": 0.6689000045890505, "grad_norm": 0.4740530252456665, "learning_rate": 8.978509957063864e-06, "loss": 0.4676, "step": 14576 }, { "epoch": 0.668945895094305, "grad_norm": 0.44990482926368713, "learning_rate": 8.978361444828885e-06, "loss": 0.4267, "step": 14577 }, { "epoch": 0.6689917855995594, "grad_norm": 0.46761006116867065, "learning_rate": 8.978212923027204e-06, "loss": 0.48, "step": 14578 }, { "epoch": 0.6690376761048139, "grad_norm": 0.4664131999015808, "learning_rate": 8.978064391659176e-06, "loss": 0.3967, "step": 14579 }, { "epoch": 0.6690835666100684, "grad_norm": 0.5295001268386841, "learning_rate": 8.977915850725159e-06, "loss": 0.5084, "step": 14580 }, { "epoch": 0.6691294571153228, "grad_norm": 0.46649545431137085, "learning_rate": 8.97776730022551e-06, "loss": 0.4218, "step": 14581 }, { "epoch": 0.6691753476205773, "grad_norm": 0.46385177969932556, "learning_rate": 8.977618740160586e-06, "loss": 0.3492, "step": 14582 }, { "epoch": 0.6692212381258318, "grad_norm": 0.4378233551979065, "learning_rate": 8.977470170530742e-06, "loss": 0.3492, "step": 14583 }, { "epoch": 0.6692671286310863, "grad_norm": 0.45596447587013245, "learning_rate": 8.97732159133634e-06, "loss": 0.3564, "step": 14584 }, { "epoch": 0.6693130191363407, "grad_norm": 0.46202313899993896, "learning_rate": 8.977173002577735e-06, "loss": 0.4165, "step": 14585 }, { "epoch": 0.6693589096415952, "grad_norm": 0.4557839632034302, "learning_rate": 8.977024404255283e-06, "loss": 0.4258, "step": 14586 }, { "epoch": 0.6694048001468497, "grad_norm": 0.49688103795051575, "learning_rate": 8.976875796369342e-06, "loss": 0.3961, "step": 14587 }, { "epoch": 0.669450690652104, "grad_norm": 0.455834299325943, "learning_rate": 8.976727178920271e-06, "loss": 0.3753, "step": 14588 }, { "epoch": 0.6694965811573586, "grad_norm": 0.5413268804550171, "learning_rate": 8.976578551908426e-06, "loss": 0.4244, "step": 14589 }, { "epoch": 0.669542471662613, "grad_norm": 0.43210649490356445, "learning_rate": 8.976429915334164e-06, "loss": 0.3519, "step": 14590 }, { "epoch": 0.6695883621678674, "grad_norm": 0.43613702058792114, "learning_rate": 8.976281269197844e-06, "loss": 0.3621, "step": 14591 }, { "epoch": 0.6696342526731219, "grad_norm": 0.44548624753952026, "learning_rate": 8.976132613499822e-06, "loss": 0.3904, "step": 14592 }, { "epoch": 0.6696801431783764, "grad_norm": 0.5107896327972412, "learning_rate": 8.975983948240457e-06, "loss": 0.5, "step": 14593 }, { "epoch": 0.6697260336836308, "grad_norm": 0.44990479946136475, "learning_rate": 8.975835273420103e-06, "loss": 0.4299, "step": 14594 }, { "epoch": 0.6697719241888853, "grad_norm": 0.44652318954467773, "learning_rate": 8.975686589039123e-06, "loss": 0.4021, "step": 14595 }, { "epoch": 0.6698178146941398, "grad_norm": 0.40969118475914, "learning_rate": 8.97553789509787e-06, "loss": 0.2973, "step": 14596 }, { "epoch": 0.6698637051993942, "grad_norm": 0.4366760849952698, "learning_rate": 8.975389191596703e-06, "loss": 0.3393, "step": 14597 }, { "epoch": 0.6699095957046487, "grad_norm": 0.49897250533103943, "learning_rate": 8.975240478535981e-06, "loss": 0.5323, "step": 14598 }, { "epoch": 0.6699554862099032, "grad_norm": 0.428006112575531, "learning_rate": 8.975091755916058e-06, "loss": 0.3204, "step": 14599 }, { "epoch": 0.6700013767151576, "grad_norm": 0.48574578762054443, "learning_rate": 8.974943023737294e-06, "loss": 0.4228, "step": 14600 }, { "epoch": 0.6700472672204121, "grad_norm": 0.4550846517086029, "learning_rate": 8.974794282000048e-06, "loss": 0.3444, "step": 14601 }, { "epoch": 0.6700931577256666, "grad_norm": 0.49023714661598206, "learning_rate": 8.974645530704675e-06, "loss": 0.4368, "step": 14602 }, { "epoch": 0.6701390482309211, "grad_norm": 0.512380838394165, "learning_rate": 8.974496769851535e-06, "loss": 0.475, "step": 14603 }, { "epoch": 0.6701849387361755, "grad_norm": 0.4899825155735016, "learning_rate": 8.974347999440985e-06, "loss": 0.4847, "step": 14604 }, { "epoch": 0.67023082924143, "grad_norm": 0.45175325870513916, "learning_rate": 8.974199219473382e-06, "loss": 0.4281, "step": 14605 }, { "epoch": 0.6702767197466845, "grad_norm": 0.48382988572120667, "learning_rate": 8.974050429949083e-06, "loss": 0.4152, "step": 14606 }, { "epoch": 0.6703226102519388, "grad_norm": 0.4746871292591095, "learning_rate": 8.973901630868448e-06, "loss": 0.4255, "step": 14607 }, { "epoch": 0.6703685007571933, "grad_norm": 0.46630939841270447, "learning_rate": 8.973752822231833e-06, "loss": 0.3901, "step": 14608 }, { "epoch": 0.6704143912624478, "grad_norm": 0.46052226424217224, "learning_rate": 8.973604004039598e-06, "loss": 0.4168, "step": 14609 }, { "epoch": 0.6704602817677022, "grad_norm": 0.5103359222412109, "learning_rate": 8.973455176292098e-06, "loss": 0.473, "step": 14610 }, { "epoch": 0.6705061722729567, "grad_norm": 0.47642409801483154, "learning_rate": 8.973306338989693e-06, "loss": 0.4881, "step": 14611 }, { "epoch": 0.6705520627782112, "grad_norm": 0.4452926218509674, "learning_rate": 8.973157492132742e-06, "loss": 0.394, "step": 14612 }, { "epoch": 0.6705979532834656, "grad_norm": 0.47272583842277527, "learning_rate": 8.973008635721599e-06, "loss": 0.4618, "step": 14613 }, { "epoch": 0.6706438437887201, "grad_norm": 0.46520477533340454, "learning_rate": 8.972859769756625e-06, "loss": 0.3628, "step": 14614 }, { "epoch": 0.6706897342939746, "grad_norm": 0.450047105550766, "learning_rate": 8.972710894238177e-06, "loss": 0.383, "step": 14615 }, { "epoch": 0.670735624799229, "grad_norm": 0.4566286504268646, "learning_rate": 8.972562009166613e-06, "loss": 0.435, "step": 14616 }, { "epoch": 0.6707815153044835, "grad_norm": 0.4192664325237274, "learning_rate": 8.972413114542291e-06, "loss": 0.3369, "step": 14617 }, { "epoch": 0.670827405809738, "grad_norm": 0.44099557399749756, "learning_rate": 8.97226421036557e-06, "loss": 0.3721, "step": 14618 }, { "epoch": 0.6708732963149924, "grad_norm": 0.43638014793395996, "learning_rate": 8.972115296636806e-06, "loss": 0.3469, "step": 14619 }, { "epoch": 0.6709191868202469, "grad_norm": 0.43593382835388184, "learning_rate": 8.971966373356358e-06, "loss": 0.3774, "step": 14620 }, { "epoch": 0.6709650773255014, "grad_norm": 0.44241613149642944, "learning_rate": 8.971817440524586e-06, "loss": 0.3465, "step": 14621 }, { "epoch": 0.6710109678307559, "grad_norm": 0.43346571922302246, "learning_rate": 8.971668498141845e-06, "loss": 0.3822, "step": 14622 }, { "epoch": 0.6710568583360103, "grad_norm": 0.4866408407688141, "learning_rate": 8.971519546208497e-06, "loss": 0.4261, "step": 14623 }, { "epoch": 0.6711027488412648, "grad_norm": 0.46682170033454895, "learning_rate": 8.971370584724897e-06, "loss": 0.3573, "step": 14624 }, { "epoch": 0.6711486393465192, "grad_norm": 0.4426473379135132, "learning_rate": 8.971221613691403e-06, "loss": 0.3665, "step": 14625 }, { "epoch": 0.6711945298517736, "grad_norm": 0.46930718421936035, "learning_rate": 8.971072633108376e-06, "loss": 0.4059, "step": 14626 }, { "epoch": 0.6712404203570281, "grad_norm": 0.4628923237323761, "learning_rate": 8.97092364297617e-06, "loss": 0.3733, "step": 14627 }, { "epoch": 0.6712863108622826, "grad_norm": 0.516466498374939, "learning_rate": 8.970774643295148e-06, "loss": 0.4747, "step": 14628 }, { "epoch": 0.671332201367537, "grad_norm": 0.4853317141532898, "learning_rate": 8.970625634065666e-06, "loss": 0.4156, "step": 14629 }, { "epoch": 0.6713780918727915, "grad_norm": 0.502302348613739, "learning_rate": 8.970476615288083e-06, "loss": 0.4347, "step": 14630 }, { "epoch": 0.671423982378046, "grad_norm": 0.4493391513824463, "learning_rate": 8.970327586962757e-06, "loss": 0.3599, "step": 14631 }, { "epoch": 0.6714698728833004, "grad_norm": 0.420227974653244, "learning_rate": 8.970178549090046e-06, "loss": 0.326, "step": 14632 }, { "epoch": 0.6715157633885549, "grad_norm": 0.41016077995300293, "learning_rate": 8.970029501670308e-06, "loss": 0.313, "step": 14633 }, { "epoch": 0.6715616538938094, "grad_norm": 0.4221818447113037, "learning_rate": 8.9698804447039e-06, "loss": 0.3294, "step": 14634 }, { "epoch": 0.6716075443990638, "grad_norm": 0.48555177450180054, "learning_rate": 8.969731378191186e-06, "loss": 0.4609, "step": 14635 }, { "epoch": 0.6716534349043183, "grad_norm": 0.4398956894874573, "learning_rate": 8.969582302132521e-06, "loss": 0.3247, "step": 14636 }, { "epoch": 0.6716993254095728, "grad_norm": 0.47701141238212585, "learning_rate": 8.96943321652826e-06, "loss": 0.3832, "step": 14637 }, { "epoch": 0.6717452159148273, "grad_norm": 0.4711799919605255, "learning_rate": 8.969284121378768e-06, "loss": 0.3713, "step": 14638 }, { "epoch": 0.6717911064200817, "grad_norm": 0.4529390335083008, "learning_rate": 8.969135016684399e-06, "loss": 0.3703, "step": 14639 }, { "epoch": 0.6718369969253362, "grad_norm": 0.4878658652305603, "learning_rate": 8.968985902445513e-06, "loss": 0.4743, "step": 14640 }, { "epoch": 0.6718828874305907, "grad_norm": 0.533019483089447, "learning_rate": 8.968836778662468e-06, "loss": 0.4382, "step": 14641 }, { "epoch": 0.671928777935845, "grad_norm": 0.4211060404777527, "learning_rate": 8.968687645335624e-06, "loss": 0.3163, "step": 14642 }, { "epoch": 0.6719746684410995, "grad_norm": 0.46758297085762024, "learning_rate": 8.96853850246534e-06, "loss": 0.4251, "step": 14643 }, { "epoch": 0.672020558946354, "grad_norm": 0.42853522300720215, "learning_rate": 8.968389350051971e-06, "loss": 0.3129, "step": 14644 }, { "epoch": 0.6720664494516084, "grad_norm": 0.45601895451545715, "learning_rate": 8.96824018809588e-06, "loss": 0.3979, "step": 14645 }, { "epoch": 0.6721123399568629, "grad_norm": 0.5159990787506104, "learning_rate": 8.968091016597423e-06, "loss": 0.4433, "step": 14646 }, { "epoch": 0.6721582304621174, "grad_norm": 0.46587973833084106, "learning_rate": 8.967941835556958e-06, "loss": 0.3804, "step": 14647 }, { "epoch": 0.6722041209673718, "grad_norm": 0.44248902797698975, "learning_rate": 8.967792644974846e-06, "loss": 0.3919, "step": 14648 }, { "epoch": 0.6722500114726263, "grad_norm": 0.44894465804100037, "learning_rate": 8.967643444851446e-06, "loss": 0.3343, "step": 14649 }, { "epoch": 0.6722959019778808, "grad_norm": 0.47158700227737427, "learning_rate": 8.967494235187116e-06, "loss": 0.4089, "step": 14650 }, { "epoch": 0.6723417924831352, "grad_norm": 0.5258386135101318, "learning_rate": 8.967345015982213e-06, "loss": 0.5154, "step": 14651 }, { "epoch": 0.6723876829883897, "grad_norm": 0.4748987555503845, "learning_rate": 8.9671957872371e-06, "loss": 0.4823, "step": 14652 }, { "epoch": 0.6724335734936442, "grad_norm": 0.5245237350463867, "learning_rate": 8.967046548952129e-06, "loss": 0.4588, "step": 14653 }, { "epoch": 0.6724794639988986, "grad_norm": 0.46021100878715515, "learning_rate": 8.966897301127666e-06, "loss": 0.3796, "step": 14654 }, { "epoch": 0.6725253545041531, "grad_norm": 0.43166399002075195, "learning_rate": 8.966748043764068e-06, "loss": 0.3554, "step": 14655 }, { "epoch": 0.6725712450094076, "grad_norm": 0.471636563539505, "learning_rate": 8.96659877686169e-06, "loss": 0.4221, "step": 14656 }, { "epoch": 0.6726171355146621, "grad_norm": 0.4382438659667969, "learning_rate": 8.966449500420896e-06, "loss": 0.3705, "step": 14657 }, { "epoch": 0.6726630260199165, "grad_norm": 0.46581512689590454, "learning_rate": 8.966300214442041e-06, "loss": 0.405, "step": 14658 }, { "epoch": 0.672708916525171, "grad_norm": 0.43589815497398376, "learning_rate": 8.966150918925488e-06, "loss": 0.3347, "step": 14659 }, { "epoch": 0.6727548070304255, "grad_norm": 0.4498421549797058, "learning_rate": 8.966001613871594e-06, "loss": 0.3869, "step": 14660 }, { "epoch": 0.6728006975356798, "grad_norm": 0.47100743651390076, "learning_rate": 8.965852299280715e-06, "loss": 0.4378, "step": 14661 }, { "epoch": 0.6728465880409343, "grad_norm": 0.4656829833984375, "learning_rate": 8.965702975153215e-06, "loss": 0.379, "step": 14662 }, { "epoch": 0.6728924785461888, "grad_norm": 0.4608096182346344, "learning_rate": 8.96555364148945e-06, "loss": 0.4239, "step": 14663 }, { "epoch": 0.6729383690514432, "grad_norm": 0.4436311721801758, "learning_rate": 8.96540429828978e-06, "loss": 0.3442, "step": 14664 }, { "epoch": 0.6729842595566977, "grad_norm": 0.4427100419998169, "learning_rate": 8.965254945554564e-06, "loss": 0.3455, "step": 14665 }, { "epoch": 0.6730301500619522, "grad_norm": 0.5839512348175049, "learning_rate": 8.965105583284162e-06, "loss": 0.5596, "step": 14666 }, { "epoch": 0.6730760405672066, "grad_norm": 0.4420351982116699, "learning_rate": 8.964956211478932e-06, "loss": 0.3753, "step": 14667 }, { "epoch": 0.6731219310724611, "grad_norm": 0.4297736585140228, "learning_rate": 8.964806830139235e-06, "loss": 0.3771, "step": 14668 }, { "epoch": 0.6731678215777156, "grad_norm": 0.42432901263237, "learning_rate": 8.964657439265428e-06, "loss": 0.3356, "step": 14669 }, { "epoch": 0.67321371208297, "grad_norm": 0.42087340354919434, "learning_rate": 8.96450803885787e-06, "loss": 0.3067, "step": 14670 }, { "epoch": 0.6732596025882245, "grad_norm": 0.4212096333503723, "learning_rate": 8.964358628916922e-06, "loss": 0.316, "step": 14671 }, { "epoch": 0.673305493093479, "grad_norm": 0.4495421051979065, "learning_rate": 8.964209209442944e-06, "loss": 0.3763, "step": 14672 }, { "epoch": 0.6733513835987335, "grad_norm": 0.47295546531677246, "learning_rate": 8.964059780436294e-06, "loss": 0.4679, "step": 14673 }, { "epoch": 0.6733972741039879, "grad_norm": 0.4856405556201935, "learning_rate": 8.96391034189733e-06, "loss": 0.4599, "step": 14674 }, { "epoch": 0.6734431646092424, "grad_norm": 0.46058133244514465, "learning_rate": 8.963760893826413e-06, "loss": 0.385, "step": 14675 }, { "epoch": 0.6734890551144969, "grad_norm": 0.43373391032218933, "learning_rate": 8.963611436223903e-06, "loss": 0.3625, "step": 14676 }, { "epoch": 0.6735349456197512, "grad_norm": 0.487519234418869, "learning_rate": 8.963461969090158e-06, "loss": 0.413, "step": 14677 }, { "epoch": 0.6735808361250057, "grad_norm": 0.5002069473266602, "learning_rate": 8.963312492425535e-06, "loss": 0.4941, "step": 14678 }, { "epoch": 0.6736267266302602, "grad_norm": 0.4721301794052124, "learning_rate": 8.9631630062304e-06, "loss": 0.4304, "step": 14679 }, { "epoch": 0.6736726171355146, "grad_norm": 0.5168606638908386, "learning_rate": 8.963013510505108e-06, "loss": 0.4571, "step": 14680 }, { "epoch": 0.6737185076407691, "grad_norm": 0.46825718879699707, "learning_rate": 8.962864005250019e-06, "loss": 0.4, "step": 14681 }, { "epoch": 0.6737643981460236, "grad_norm": 0.46966734528541565, "learning_rate": 8.962714490465492e-06, "loss": 0.4101, "step": 14682 }, { "epoch": 0.673810288651278, "grad_norm": 0.4863331913948059, "learning_rate": 8.962564966151888e-06, "loss": 0.3914, "step": 14683 }, { "epoch": 0.6738561791565325, "grad_norm": 0.4328271746635437, "learning_rate": 8.962415432309565e-06, "loss": 0.3375, "step": 14684 }, { "epoch": 0.673902069661787, "grad_norm": 0.4547343850135803, "learning_rate": 8.962265888938886e-06, "loss": 0.3539, "step": 14685 }, { "epoch": 0.6739479601670414, "grad_norm": 0.47346803545951843, "learning_rate": 8.962116336040205e-06, "loss": 0.3967, "step": 14686 }, { "epoch": 0.6739938506722959, "grad_norm": 0.42227375507354736, "learning_rate": 8.961966773613887e-06, "loss": 0.3423, "step": 14687 }, { "epoch": 0.6740397411775504, "grad_norm": 0.4545140564441681, "learning_rate": 8.961817201660287e-06, "loss": 0.383, "step": 14688 }, { "epoch": 0.6740856316828048, "grad_norm": 0.5202023386955261, "learning_rate": 8.961667620179769e-06, "loss": 0.5039, "step": 14689 }, { "epoch": 0.6741315221880593, "grad_norm": 0.4110974371433258, "learning_rate": 8.96151802917269e-06, "loss": 0.3307, "step": 14690 }, { "epoch": 0.6741774126933138, "grad_norm": 0.4705328047275543, "learning_rate": 8.961368428639411e-06, "loss": 0.4119, "step": 14691 }, { "epoch": 0.6742233031985683, "grad_norm": 0.6943110227584839, "learning_rate": 8.961218818580291e-06, "loss": 0.4051, "step": 14692 }, { "epoch": 0.6742691937038227, "grad_norm": 0.4538700580596924, "learning_rate": 8.961069198995688e-06, "loss": 0.3729, "step": 14693 }, { "epoch": 0.6743150842090772, "grad_norm": 0.4574923515319824, "learning_rate": 8.960919569885967e-06, "loss": 0.3912, "step": 14694 }, { "epoch": 0.6743609747143317, "grad_norm": 0.44742801785469055, "learning_rate": 8.960769931251483e-06, "loss": 0.4014, "step": 14695 }, { "epoch": 0.674406865219586, "grad_norm": 0.42899659276008606, "learning_rate": 8.960620283092597e-06, "loss": 0.3326, "step": 14696 }, { "epoch": 0.6744527557248405, "grad_norm": 0.4652882516384125, "learning_rate": 8.96047062540967e-06, "loss": 0.4231, "step": 14697 }, { "epoch": 0.674498646230095, "grad_norm": 0.4359254539012909, "learning_rate": 8.96032095820306e-06, "loss": 0.3176, "step": 14698 }, { "epoch": 0.6745445367353494, "grad_norm": 0.4278014600276947, "learning_rate": 8.96017128147313e-06, "loss": 0.3306, "step": 14699 }, { "epoch": 0.6745904272406039, "grad_norm": 0.4722307622432709, "learning_rate": 8.960021595220236e-06, "loss": 0.4185, "step": 14700 }, { "epoch": 0.6746363177458584, "grad_norm": 0.4911159574985504, "learning_rate": 8.95987189944474e-06, "loss": 0.4744, "step": 14701 }, { "epoch": 0.6746822082511128, "grad_norm": 0.4785071909427643, "learning_rate": 8.959722194147002e-06, "loss": 0.4276, "step": 14702 }, { "epoch": 0.6747280987563673, "grad_norm": 0.43101930618286133, "learning_rate": 8.959572479327383e-06, "loss": 0.3434, "step": 14703 }, { "epoch": 0.6747739892616218, "grad_norm": 0.4710412323474884, "learning_rate": 8.95942275498624e-06, "loss": 0.4143, "step": 14704 }, { "epoch": 0.6748198797668762, "grad_norm": 0.45896440744400024, "learning_rate": 8.959273021123936e-06, "loss": 0.4157, "step": 14705 }, { "epoch": 0.6748657702721307, "grad_norm": 0.4247373044490814, "learning_rate": 8.959123277740831e-06, "loss": 0.3343, "step": 14706 }, { "epoch": 0.6749116607773852, "grad_norm": 0.4538171887397766, "learning_rate": 8.958973524837282e-06, "loss": 0.4136, "step": 14707 }, { "epoch": 0.6749575512826396, "grad_norm": 0.46192261576652527, "learning_rate": 8.958823762413652e-06, "loss": 0.3514, "step": 14708 }, { "epoch": 0.6750034417878941, "grad_norm": 0.4493251442909241, "learning_rate": 8.958673990470302e-06, "loss": 0.3473, "step": 14709 }, { "epoch": 0.6750493322931486, "grad_norm": 0.441125750541687, "learning_rate": 8.958524209007587e-06, "loss": 0.3468, "step": 14710 }, { "epoch": 0.6750952227984031, "grad_norm": 0.43881431221961975, "learning_rate": 8.958374418025873e-06, "loss": 0.3499, "step": 14711 }, { "epoch": 0.6751411133036574, "grad_norm": 0.42891258001327515, "learning_rate": 8.958224617525517e-06, "loss": 0.3406, "step": 14712 }, { "epoch": 0.675187003808912, "grad_norm": 0.48588812351226807, "learning_rate": 8.95807480750688e-06, "loss": 0.4883, "step": 14713 }, { "epoch": 0.6752328943141664, "grad_norm": 0.525397777557373, "learning_rate": 8.957924987970324e-06, "loss": 0.4651, "step": 14714 }, { "epoch": 0.6752787848194208, "grad_norm": 0.4215129613876343, "learning_rate": 8.957775158916206e-06, "loss": 0.3095, "step": 14715 }, { "epoch": 0.6753246753246753, "grad_norm": 0.45480912923812866, "learning_rate": 8.957625320344886e-06, "loss": 0.416, "step": 14716 }, { "epoch": 0.6753705658299298, "grad_norm": 0.43501749634742737, "learning_rate": 8.95747547225673e-06, "loss": 0.3321, "step": 14717 }, { "epoch": 0.6754164563351842, "grad_norm": 0.43271464109420776, "learning_rate": 8.957325614652092e-06, "loss": 0.2907, "step": 14718 }, { "epoch": 0.6754623468404387, "grad_norm": 0.46629834175109863, "learning_rate": 8.957175747531336e-06, "loss": 0.3797, "step": 14719 }, { "epoch": 0.6755082373456932, "grad_norm": 0.4651343524456024, "learning_rate": 8.957025870894822e-06, "loss": 0.4487, "step": 14720 }, { "epoch": 0.6755541278509476, "grad_norm": 0.48341336846351624, "learning_rate": 8.956875984742907e-06, "loss": 0.4806, "step": 14721 }, { "epoch": 0.6756000183562021, "grad_norm": 0.726714551448822, "learning_rate": 8.956726089075956e-06, "loss": 0.5004, "step": 14722 }, { "epoch": 0.6756459088614566, "grad_norm": 0.46652036905288696, "learning_rate": 8.956576183894328e-06, "loss": 0.4476, "step": 14723 }, { "epoch": 0.675691799366711, "grad_norm": 0.42625826597213745, "learning_rate": 8.956426269198381e-06, "loss": 0.3535, "step": 14724 }, { "epoch": 0.6757376898719655, "grad_norm": 0.4924218952655792, "learning_rate": 8.95627634498848e-06, "loss": 0.484, "step": 14725 }, { "epoch": 0.67578358037722, "grad_norm": 0.40904316306114197, "learning_rate": 8.95612641126498e-06, "loss": 0.3107, "step": 14726 }, { "epoch": 0.6758294708824745, "grad_norm": 0.4393841624259949, "learning_rate": 8.955976468028246e-06, "loss": 0.373, "step": 14727 }, { "epoch": 0.6758753613877289, "grad_norm": 0.46549513936042786, "learning_rate": 8.955826515278639e-06, "loss": 0.4033, "step": 14728 }, { "epoch": 0.6759212518929834, "grad_norm": 0.43072983622550964, "learning_rate": 8.955676553016514e-06, "loss": 0.3792, "step": 14729 }, { "epoch": 0.6759671423982379, "grad_norm": 0.5095604658126831, "learning_rate": 8.955526581242239e-06, "loss": 0.4312, "step": 14730 }, { "epoch": 0.6760130329034922, "grad_norm": 0.5377375483512878, "learning_rate": 8.955376599956169e-06, "loss": 0.5254, "step": 14731 }, { "epoch": 0.6760589234087467, "grad_norm": 0.48529115319252014, "learning_rate": 8.955226609158667e-06, "loss": 0.4435, "step": 14732 }, { "epoch": 0.6761048139140012, "grad_norm": 0.4673723578453064, "learning_rate": 8.955076608850092e-06, "loss": 0.418, "step": 14733 }, { "epoch": 0.6761507044192556, "grad_norm": 0.46938684582710266, "learning_rate": 8.954926599030808e-06, "loss": 0.4093, "step": 14734 }, { "epoch": 0.6761965949245101, "grad_norm": 0.45263391733169556, "learning_rate": 8.954776579701174e-06, "loss": 0.3938, "step": 14735 }, { "epoch": 0.6762424854297646, "grad_norm": 0.49526724219322205, "learning_rate": 8.95462655086155e-06, "loss": 0.3636, "step": 14736 }, { "epoch": 0.676288375935019, "grad_norm": 0.5028539299964905, "learning_rate": 8.954476512512296e-06, "loss": 0.2912, "step": 14737 }, { "epoch": 0.6763342664402735, "grad_norm": 0.4028368294239044, "learning_rate": 8.954326464653775e-06, "loss": 0.2976, "step": 14738 }, { "epoch": 0.676380156945528, "grad_norm": 0.4950895309448242, "learning_rate": 8.954176407286347e-06, "loss": 0.451, "step": 14739 }, { "epoch": 0.6764260474507824, "grad_norm": 0.46924006938934326, "learning_rate": 8.954026340410372e-06, "loss": 0.4481, "step": 14740 }, { "epoch": 0.6764719379560369, "grad_norm": 0.4596254229545593, "learning_rate": 8.953876264026213e-06, "loss": 0.3429, "step": 14741 }, { "epoch": 0.6765178284612914, "grad_norm": 0.4995757043361664, "learning_rate": 8.953726178134227e-06, "loss": 0.4562, "step": 14742 }, { "epoch": 0.6765637189665458, "grad_norm": 0.45612114667892456, "learning_rate": 8.95357608273478e-06, "loss": 0.4297, "step": 14743 }, { "epoch": 0.6766096094718003, "grad_norm": 0.6454023718833923, "learning_rate": 8.953425977828229e-06, "loss": 0.5747, "step": 14744 }, { "epoch": 0.6766554999770548, "grad_norm": 0.5305061936378479, "learning_rate": 8.953275863414937e-06, "loss": 0.5749, "step": 14745 }, { "epoch": 0.6767013904823093, "grad_norm": 0.4578821659088135, "learning_rate": 8.953125739495263e-06, "loss": 0.3847, "step": 14746 }, { "epoch": 0.6767472809875636, "grad_norm": 0.4212392270565033, "learning_rate": 8.952975606069569e-06, "loss": 0.3283, "step": 14747 }, { "epoch": 0.6767931714928181, "grad_norm": 0.47139298915863037, "learning_rate": 8.952825463138217e-06, "loss": 0.3851, "step": 14748 }, { "epoch": 0.6768390619980726, "grad_norm": 0.42642828822135925, "learning_rate": 8.952675310701568e-06, "loss": 0.3367, "step": 14749 }, { "epoch": 0.676884952503327, "grad_norm": 0.3850325345993042, "learning_rate": 8.952525148759983e-06, "loss": 0.2918, "step": 14750 }, { "epoch": 0.6769308430085815, "grad_norm": 0.4391419589519501, "learning_rate": 8.95237497731382e-06, "loss": 0.332, "step": 14751 }, { "epoch": 0.676976733513836, "grad_norm": 0.4256323575973511, "learning_rate": 8.952224796363444e-06, "loss": 0.3299, "step": 14752 }, { "epoch": 0.6770226240190904, "grad_norm": 0.39353954792022705, "learning_rate": 8.952074605909216e-06, "loss": 0.2775, "step": 14753 }, { "epoch": 0.6770685145243449, "grad_norm": 0.44493716955184937, "learning_rate": 8.951924405951494e-06, "loss": 0.4082, "step": 14754 }, { "epoch": 0.6771144050295994, "grad_norm": 0.4622228741645813, "learning_rate": 8.951774196490641e-06, "loss": 0.3933, "step": 14755 }, { "epoch": 0.6771602955348538, "grad_norm": 0.4285729229450226, "learning_rate": 8.95162397752702e-06, "loss": 0.3694, "step": 14756 }, { "epoch": 0.6772061860401083, "grad_norm": 0.4257205128669739, "learning_rate": 8.951473749060988e-06, "loss": 0.3408, "step": 14757 }, { "epoch": 0.6772520765453628, "grad_norm": 0.4245329797267914, "learning_rate": 8.95132351109291e-06, "loss": 0.3418, "step": 14758 }, { "epoch": 0.6772979670506172, "grad_norm": 0.413326621055603, "learning_rate": 8.951173263623148e-06, "loss": 0.3548, "step": 14759 }, { "epoch": 0.6773438575558717, "grad_norm": 0.4849330186843872, "learning_rate": 8.951023006652059e-06, "loss": 0.4878, "step": 14760 }, { "epoch": 0.6773897480611262, "grad_norm": 0.48579591512680054, "learning_rate": 8.950872740180006e-06, "loss": 0.4245, "step": 14761 }, { "epoch": 0.6774356385663807, "grad_norm": 0.43916749954223633, "learning_rate": 8.950722464207353e-06, "loss": 0.3923, "step": 14762 }, { "epoch": 0.6774815290716351, "grad_norm": 0.48171040415763855, "learning_rate": 8.950572178734457e-06, "loss": 0.3637, "step": 14763 }, { "epoch": 0.6775274195768896, "grad_norm": 0.461260050535202, "learning_rate": 8.950421883761684e-06, "loss": 0.4024, "step": 14764 }, { "epoch": 0.677573310082144, "grad_norm": 0.48985669016838074, "learning_rate": 8.950271579289394e-06, "loss": 0.4547, "step": 14765 }, { "epoch": 0.6776192005873984, "grad_norm": 0.4428180158138275, "learning_rate": 8.950121265317946e-06, "loss": 0.3599, "step": 14766 }, { "epoch": 0.6776650910926529, "grad_norm": 0.49926432967185974, "learning_rate": 8.949970941847701e-06, "loss": 0.5125, "step": 14767 }, { "epoch": 0.6777109815979074, "grad_norm": 0.4786827564239502, "learning_rate": 8.949820608879026e-06, "loss": 0.4975, "step": 14768 }, { "epoch": 0.6777568721031618, "grad_norm": 0.47190189361572266, "learning_rate": 8.949670266412278e-06, "loss": 0.4733, "step": 14769 }, { "epoch": 0.6778027626084163, "grad_norm": 0.46229228377342224, "learning_rate": 8.949519914447818e-06, "loss": 0.4407, "step": 14770 }, { "epoch": 0.6778486531136708, "grad_norm": 0.4745892882347107, "learning_rate": 8.949369552986011e-06, "loss": 0.4293, "step": 14771 }, { "epoch": 0.6778945436189252, "grad_norm": 0.4559294581413269, "learning_rate": 8.949219182027215e-06, "loss": 0.3471, "step": 14772 }, { "epoch": 0.6779404341241797, "grad_norm": 0.5512930154800415, "learning_rate": 8.949068801571794e-06, "loss": 0.332, "step": 14773 }, { "epoch": 0.6779863246294342, "grad_norm": 0.4335198998451233, "learning_rate": 8.94891841162011e-06, "loss": 0.3852, "step": 14774 }, { "epoch": 0.6780322151346886, "grad_norm": 0.4324401319026947, "learning_rate": 8.948768012172523e-06, "loss": 0.3789, "step": 14775 }, { "epoch": 0.6780781056399431, "grad_norm": 0.4959743916988373, "learning_rate": 8.948617603229395e-06, "loss": 0.4826, "step": 14776 }, { "epoch": 0.6781239961451976, "grad_norm": 0.45083341002464294, "learning_rate": 8.948467184791087e-06, "loss": 0.4073, "step": 14777 }, { "epoch": 0.678169886650452, "grad_norm": 0.4678332805633545, "learning_rate": 8.948316756857963e-06, "loss": 0.3927, "step": 14778 }, { "epoch": 0.6782157771557065, "grad_norm": 0.4929785430431366, "learning_rate": 8.948166319430382e-06, "loss": 0.439, "step": 14779 }, { "epoch": 0.678261667660961, "grad_norm": 0.40612107515335083, "learning_rate": 8.948015872508709e-06, "loss": 0.329, "step": 14780 }, { "epoch": 0.6783075581662155, "grad_norm": 0.48002389073371887, "learning_rate": 8.947865416093303e-06, "loss": 0.4819, "step": 14781 }, { "epoch": 0.6783534486714698, "grad_norm": 0.44353726506233215, "learning_rate": 8.947714950184525e-06, "loss": 0.3833, "step": 14782 }, { "epoch": 0.6783993391767243, "grad_norm": 0.4811346232891083, "learning_rate": 8.94756447478274e-06, "loss": 0.3768, "step": 14783 }, { "epoch": 0.6784452296819788, "grad_norm": 0.4238256812095642, "learning_rate": 8.947413989888307e-06, "loss": 0.3652, "step": 14784 }, { "epoch": 0.6784911201872332, "grad_norm": 0.4736774265766144, "learning_rate": 8.947263495501591e-06, "loss": 0.3939, "step": 14785 }, { "epoch": 0.6785370106924877, "grad_norm": 0.4651627838611603, "learning_rate": 8.947112991622951e-06, "loss": 0.3982, "step": 14786 }, { "epoch": 0.6785829011977422, "grad_norm": 0.4665450155735016, "learning_rate": 8.946962478252751e-06, "loss": 0.4067, "step": 14787 }, { "epoch": 0.6786287917029966, "grad_norm": 0.44031134247779846, "learning_rate": 8.94681195539135e-06, "loss": 0.3311, "step": 14788 }, { "epoch": 0.6786746822082511, "grad_norm": 0.4378812611103058, "learning_rate": 8.946661423039113e-06, "loss": 0.3972, "step": 14789 }, { "epoch": 0.6787205727135056, "grad_norm": 0.44837936758995056, "learning_rate": 8.946510881196402e-06, "loss": 0.3429, "step": 14790 }, { "epoch": 0.67876646321876, "grad_norm": 0.4941911995410919, "learning_rate": 8.946360329863576e-06, "loss": 0.4892, "step": 14791 }, { "epoch": 0.6788123537240145, "grad_norm": 0.4322866201400757, "learning_rate": 8.946209769041002e-06, "loss": 0.3849, "step": 14792 }, { "epoch": 0.678858244229269, "grad_norm": 0.44300970435142517, "learning_rate": 8.946059198729036e-06, "loss": 0.3974, "step": 14793 }, { "epoch": 0.6789041347345234, "grad_norm": 0.44444549083709717, "learning_rate": 8.945908618928044e-06, "loss": 0.374, "step": 14794 }, { "epoch": 0.6789500252397779, "grad_norm": 0.45047274231910706, "learning_rate": 8.945758029638388e-06, "loss": 0.3879, "step": 14795 }, { "epoch": 0.6789959157450324, "grad_norm": 0.4511168301105499, "learning_rate": 8.945607430860428e-06, "loss": 0.3884, "step": 14796 }, { "epoch": 0.6790418062502868, "grad_norm": 0.4733699858188629, "learning_rate": 8.945456822594528e-06, "loss": 0.4326, "step": 14797 }, { "epoch": 0.6790876967555413, "grad_norm": 0.45085573196411133, "learning_rate": 8.945306204841048e-06, "loss": 0.3735, "step": 14798 }, { "epoch": 0.6791335872607958, "grad_norm": 0.4270944595336914, "learning_rate": 8.945155577600353e-06, "loss": 0.3446, "step": 14799 }, { "epoch": 0.6791794777660503, "grad_norm": 0.4654461741447449, "learning_rate": 8.945004940872803e-06, "loss": 0.4252, "step": 14800 }, { "epoch": 0.6792253682713046, "grad_norm": 0.479457288980484, "learning_rate": 8.944854294658763e-06, "loss": 0.4698, "step": 14801 }, { "epoch": 0.6792712587765591, "grad_norm": 0.4618765413761139, "learning_rate": 8.944703638958591e-06, "loss": 0.392, "step": 14802 }, { "epoch": 0.6793171492818136, "grad_norm": 0.45287376642227173, "learning_rate": 8.944552973772654e-06, "loss": 0.4296, "step": 14803 }, { "epoch": 0.679363039787068, "grad_norm": 0.47194233536720276, "learning_rate": 8.944402299101312e-06, "loss": 0.4388, "step": 14804 }, { "epoch": 0.6794089302923225, "grad_norm": 0.44376716017723083, "learning_rate": 8.944251614944926e-06, "loss": 0.3598, "step": 14805 }, { "epoch": 0.679454820797577, "grad_norm": 0.45693904161453247, "learning_rate": 8.944100921303862e-06, "loss": 0.3911, "step": 14806 }, { "epoch": 0.6795007113028314, "grad_norm": 0.4648887813091278, "learning_rate": 8.943950218178478e-06, "loss": 0.435, "step": 14807 }, { "epoch": 0.6795466018080859, "grad_norm": 0.46242383122444153, "learning_rate": 8.94379950556914e-06, "loss": 0.409, "step": 14808 }, { "epoch": 0.6795924923133404, "grad_norm": 0.46837151050567627, "learning_rate": 8.943648783476206e-06, "loss": 0.395, "step": 14809 }, { "epoch": 0.6796383828185948, "grad_norm": 0.4884977340698242, "learning_rate": 8.943498051900044e-06, "loss": 0.4566, "step": 14810 }, { "epoch": 0.6796842733238493, "grad_norm": 0.431994765996933, "learning_rate": 8.943347310841013e-06, "loss": 0.3216, "step": 14811 }, { "epoch": 0.6797301638291038, "grad_norm": 0.45315325260162354, "learning_rate": 8.943196560299476e-06, "loss": 0.3843, "step": 14812 }, { "epoch": 0.6797760543343582, "grad_norm": 0.4566023647785187, "learning_rate": 8.943045800275795e-06, "loss": 0.3861, "step": 14813 }, { "epoch": 0.6798219448396127, "grad_norm": 0.44639286398887634, "learning_rate": 8.942895030770335e-06, "loss": 0.4079, "step": 14814 }, { "epoch": 0.6798678353448672, "grad_norm": 0.4505566954612732, "learning_rate": 8.942744251783456e-06, "loss": 0.367, "step": 14815 }, { "epoch": 0.6799137258501217, "grad_norm": 0.4297548830509186, "learning_rate": 8.942593463315522e-06, "loss": 0.3101, "step": 14816 }, { "epoch": 0.679959616355376, "grad_norm": 0.44771164655685425, "learning_rate": 8.942442665366895e-06, "loss": 0.4195, "step": 14817 }, { "epoch": 0.6800055068606305, "grad_norm": 0.48178502917289734, "learning_rate": 8.942291857937937e-06, "loss": 0.4598, "step": 14818 }, { "epoch": 0.680051397365885, "grad_norm": 0.468656450510025, "learning_rate": 8.942141041029011e-06, "loss": 0.3862, "step": 14819 }, { "epoch": 0.6800972878711394, "grad_norm": 0.47881656885147095, "learning_rate": 8.941990214640481e-06, "loss": 0.4548, "step": 14820 }, { "epoch": 0.6801431783763939, "grad_norm": 0.5204012393951416, "learning_rate": 8.941839378772708e-06, "loss": 0.4471, "step": 14821 }, { "epoch": 0.6801890688816484, "grad_norm": 0.43512874841690063, "learning_rate": 8.941688533426056e-06, "loss": 0.3805, "step": 14822 }, { "epoch": 0.6802349593869028, "grad_norm": 0.44788533449172974, "learning_rate": 8.941537678600886e-06, "loss": 0.3776, "step": 14823 }, { "epoch": 0.6802808498921573, "grad_norm": 0.4891255497932434, "learning_rate": 8.941386814297564e-06, "loss": 0.3898, "step": 14824 }, { "epoch": 0.6803267403974118, "grad_norm": 0.44314494729042053, "learning_rate": 8.941235940516449e-06, "loss": 0.344, "step": 14825 }, { "epoch": 0.6803726309026662, "grad_norm": 0.4424200654029846, "learning_rate": 8.941085057257905e-06, "loss": 0.3804, "step": 14826 }, { "epoch": 0.6804185214079207, "grad_norm": 0.4754483997821808, "learning_rate": 8.940934164522297e-06, "loss": 0.4385, "step": 14827 }, { "epoch": 0.6804644119131752, "grad_norm": 0.47143200039863586, "learning_rate": 8.940783262309984e-06, "loss": 0.4318, "step": 14828 }, { "epoch": 0.6805103024184296, "grad_norm": 0.414588987827301, "learning_rate": 8.940632350621333e-06, "loss": 0.3723, "step": 14829 }, { "epoch": 0.6805561929236841, "grad_norm": 0.46653369069099426, "learning_rate": 8.940481429456704e-06, "loss": 0.4689, "step": 14830 }, { "epoch": 0.6806020834289386, "grad_norm": 0.4801202714443207, "learning_rate": 8.940330498816461e-06, "loss": 0.3994, "step": 14831 }, { "epoch": 0.680647973934193, "grad_norm": 0.45494574308395386, "learning_rate": 8.940179558700967e-06, "loss": 0.4564, "step": 14832 }, { "epoch": 0.6806938644394475, "grad_norm": 0.4337141811847687, "learning_rate": 8.940028609110585e-06, "loss": 0.3746, "step": 14833 }, { "epoch": 0.680739754944702, "grad_norm": 0.461557000875473, "learning_rate": 8.939877650045677e-06, "loss": 0.4024, "step": 14834 }, { "epoch": 0.6807856454499565, "grad_norm": 0.4299245774745941, "learning_rate": 8.939726681506607e-06, "loss": 0.3367, "step": 14835 }, { "epoch": 0.6808315359552108, "grad_norm": 0.4379175007343292, "learning_rate": 8.939575703493738e-06, "loss": 0.4301, "step": 14836 }, { "epoch": 0.6808774264604653, "grad_norm": 0.4237874448299408, "learning_rate": 8.93942471600743e-06, "loss": 0.3268, "step": 14837 }, { "epoch": 0.6809233169657198, "grad_norm": 0.460771769285202, "learning_rate": 8.939273719048052e-06, "loss": 0.3951, "step": 14838 }, { "epoch": 0.6809692074709742, "grad_norm": 0.4490431547164917, "learning_rate": 8.939122712615961e-06, "loss": 0.4312, "step": 14839 }, { "epoch": 0.6810150979762287, "grad_norm": 0.4115942716598511, "learning_rate": 8.938971696711527e-06, "loss": 0.3175, "step": 14840 }, { "epoch": 0.6810609884814832, "grad_norm": 0.4233342111110687, "learning_rate": 8.938820671335107e-06, "loss": 0.3126, "step": 14841 }, { "epoch": 0.6811068789867376, "grad_norm": 0.493661105632782, "learning_rate": 8.938669636487067e-06, "loss": 0.4859, "step": 14842 }, { "epoch": 0.6811527694919921, "grad_norm": 0.4322417378425598, "learning_rate": 8.938518592167769e-06, "loss": 0.38, "step": 14843 }, { "epoch": 0.6811986599972466, "grad_norm": 0.4188777506351471, "learning_rate": 8.938367538377577e-06, "loss": 0.3445, "step": 14844 }, { "epoch": 0.681244550502501, "grad_norm": 0.43958789110183716, "learning_rate": 8.938216475116856e-06, "loss": 0.3813, "step": 14845 }, { "epoch": 0.6812904410077555, "grad_norm": 0.4574650228023529, "learning_rate": 8.938065402385965e-06, "loss": 0.374, "step": 14846 }, { "epoch": 0.68133633151301, "grad_norm": 0.4355171322822571, "learning_rate": 8.93791432018527e-06, "loss": 0.3578, "step": 14847 }, { "epoch": 0.6813822220182644, "grad_norm": 0.45174604654312134, "learning_rate": 8.937763228515135e-06, "loss": 0.4306, "step": 14848 }, { "epoch": 0.6814281125235189, "grad_norm": 0.42888110876083374, "learning_rate": 8.93761212737592e-06, "loss": 0.3575, "step": 14849 }, { "epoch": 0.6814740030287734, "grad_norm": 0.4579628109931946, "learning_rate": 8.937461016767993e-06, "loss": 0.4177, "step": 14850 }, { "epoch": 0.6815198935340278, "grad_norm": 0.4721222519874573, "learning_rate": 8.937309896691714e-06, "loss": 0.3787, "step": 14851 }, { "epoch": 0.6815657840392823, "grad_norm": 0.4567517638206482, "learning_rate": 8.937158767147447e-06, "loss": 0.4229, "step": 14852 }, { "epoch": 0.6816116745445367, "grad_norm": 0.4372602105140686, "learning_rate": 8.937007628135556e-06, "loss": 0.4225, "step": 14853 }, { "epoch": 0.6816575650497912, "grad_norm": 0.47924935817718506, "learning_rate": 8.936856479656403e-06, "loss": 0.417, "step": 14854 }, { "epoch": 0.6817034555550456, "grad_norm": 0.4316025376319885, "learning_rate": 8.936705321710356e-06, "loss": 0.3549, "step": 14855 }, { "epoch": 0.6817493460603001, "grad_norm": 0.44067510962486267, "learning_rate": 8.936554154297772e-06, "loss": 0.4328, "step": 14856 }, { "epoch": 0.6817952365655546, "grad_norm": 0.41196808218955994, "learning_rate": 8.93640297741902e-06, "loss": 0.3613, "step": 14857 }, { "epoch": 0.681841127070809, "grad_norm": 0.4127485454082489, "learning_rate": 8.93625179107446e-06, "loss": 0.3671, "step": 14858 }, { "epoch": 0.6818870175760635, "grad_norm": 0.469026654958725, "learning_rate": 8.936100595264456e-06, "loss": 0.4294, "step": 14859 }, { "epoch": 0.681932908081318, "grad_norm": 0.46003252267837524, "learning_rate": 8.935949389989374e-06, "loss": 0.3821, "step": 14860 }, { "epoch": 0.6819787985865724, "grad_norm": 0.699417769908905, "learning_rate": 8.935798175249575e-06, "loss": 0.3418, "step": 14861 }, { "epoch": 0.6820246890918269, "grad_norm": 0.45537805557250977, "learning_rate": 8.935646951045422e-06, "loss": 0.4173, "step": 14862 }, { "epoch": 0.6820705795970814, "grad_norm": 0.4469729959964752, "learning_rate": 8.935495717377283e-06, "loss": 0.3634, "step": 14863 }, { "epoch": 0.6821164701023358, "grad_norm": 0.48699885606765747, "learning_rate": 8.935344474245517e-06, "loss": 0.4698, "step": 14864 }, { "epoch": 0.6821623606075903, "grad_norm": 0.45305201411247253, "learning_rate": 8.93519322165049e-06, "loss": 0.377, "step": 14865 }, { "epoch": 0.6822082511128448, "grad_norm": 0.40661904215812683, "learning_rate": 8.935041959592566e-06, "loss": 0.2866, "step": 14866 }, { "epoch": 0.6822541416180992, "grad_norm": 0.460922509431839, "learning_rate": 8.934890688072108e-06, "loss": 0.4332, "step": 14867 }, { "epoch": 0.6823000321233537, "grad_norm": 0.4418080449104309, "learning_rate": 8.934739407089479e-06, "loss": 0.3841, "step": 14868 }, { "epoch": 0.6823459226286082, "grad_norm": 0.4592224657535553, "learning_rate": 8.934588116645044e-06, "loss": 0.3989, "step": 14869 }, { "epoch": 0.6823918131338627, "grad_norm": 0.46631962060928345, "learning_rate": 8.934436816739167e-06, "loss": 0.4323, "step": 14870 }, { "epoch": 0.682437703639117, "grad_norm": 0.44841068983078003, "learning_rate": 8.93428550737221e-06, "loss": 0.3376, "step": 14871 }, { "epoch": 0.6824835941443715, "grad_norm": 0.5000798106193542, "learning_rate": 8.93413418854454e-06, "loss": 0.5115, "step": 14872 }, { "epoch": 0.682529484649626, "grad_norm": 0.4701462388038635, "learning_rate": 8.933982860256516e-06, "loss": 0.4426, "step": 14873 }, { "epoch": 0.6825753751548804, "grad_norm": 0.4299839735031128, "learning_rate": 8.933831522508507e-06, "loss": 0.3602, "step": 14874 }, { "epoch": 0.6826212656601349, "grad_norm": 0.41019177436828613, "learning_rate": 8.933680175300872e-06, "loss": 0.2949, "step": 14875 }, { "epoch": 0.6826671561653894, "grad_norm": 0.4400378465652466, "learning_rate": 8.93352881863398e-06, "loss": 0.4081, "step": 14876 }, { "epoch": 0.6827130466706438, "grad_norm": 0.42967677116394043, "learning_rate": 8.933377452508192e-06, "loss": 0.3443, "step": 14877 }, { "epoch": 0.6827589371758983, "grad_norm": 0.44078895449638367, "learning_rate": 8.933226076923874e-06, "loss": 0.3476, "step": 14878 }, { "epoch": 0.6828048276811528, "grad_norm": 0.461739182472229, "learning_rate": 8.933074691881387e-06, "loss": 0.4514, "step": 14879 }, { "epoch": 0.6828507181864072, "grad_norm": 0.4512290954589844, "learning_rate": 8.932923297381098e-06, "loss": 0.3866, "step": 14880 }, { "epoch": 0.6828966086916617, "grad_norm": 0.4742060601711273, "learning_rate": 8.932771893423369e-06, "loss": 0.4279, "step": 14881 }, { "epoch": 0.6829424991969162, "grad_norm": 0.45089131593704224, "learning_rate": 8.932620480008565e-06, "loss": 0.386, "step": 14882 }, { "epoch": 0.6829883897021706, "grad_norm": 0.4825097322463989, "learning_rate": 8.93246905713705e-06, "loss": 0.4175, "step": 14883 }, { "epoch": 0.6830342802074251, "grad_norm": 0.4531537890434265, "learning_rate": 8.932317624809187e-06, "loss": 0.3994, "step": 14884 }, { "epoch": 0.6830801707126796, "grad_norm": 0.4647573232650757, "learning_rate": 8.932166183025342e-06, "loss": 0.3952, "step": 14885 }, { "epoch": 0.683126061217934, "grad_norm": 0.47387707233428955, "learning_rate": 8.932014731785878e-06, "loss": 0.4392, "step": 14886 }, { "epoch": 0.6831719517231885, "grad_norm": 0.47839128971099854, "learning_rate": 8.93186327109116e-06, "loss": 0.4101, "step": 14887 }, { "epoch": 0.683217842228443, "grad_norm": 0.4589564800262451, "learning_rate": 8.93171180094155e-06, "loss": 0.4254, "step": 14888 }, { "epoch": 0.6832637327336974, "grad_norm": 0.47041061520576477, "learning_rate": 8.931560321337419e-06, "loss": 0.4403, "step": 14889 }, { "epoch": 0.6833096232389518, "grad_norm": 0.4205566942691803, "learning_rate": 8.931408832279122e-06, "loss": 0.3572, "step": 14890 }, { "epoch": 0.6833555137442063, "grad_norm": 0.44884923100471497, "learning_rate": 8.931257333767029e-06, "loss": 0.4118, "step": 14891 }, { "epoch": 0.6834014042494608, "grad_norm": 0.468823105096817, "learning_rate": 8.931105825801501e-06, "loss": 0.4288, "step": 14892 }, { "epoch": 0.6834472947547152, "grad_norm": 0.5058838725090027, "learning_rate": 8.930954308382906e-06, "loss": 0.3961, "step": 14893 }, { "epoch": 0.6834931852599697, "grad_norm": 0.4750557243824005, "learning_rate": 8.930802781511606e-06, "loss": 0.4471, "step": 14894 }, { "epoch": 0.6835390757652242, "grad_norm": 0.44421112537384033, "learning_rate": 8.930651245187966e-06, "loss": 0.3701, "step": 14895 }, { "epoch": 0.6835849662704786, "grad_norm": 0.44765132665634155, "learning_rate": 8.930499699412351e-06, "loss": 0.3522, "step": 14896 }, { "epoch": 0.6836308567757331, "grad_norm": 0.4314253628253937, "learning_rate": 8.930348144185124e-06, "loss": 0.3348, "step": 14897 }, { "epoch": 0.6836767472809876, "grad_norm": 0.46028751134872437, "learning_rate": 8.93019657950665e-06, "loss": 0.4128, "step": 14898 }, { "epoch": 0.683722637786242, "grad_norm": 0.4367113411426544, "learning_rate": 8.930045005377293e-06, "loss": 0.3779, "step": 14899 }, { "epoch": 0.6837685282914965, "grad_norm": 0.39101332426071167, "learning_rate": 8.929893421797418e-06, "loss": 0.2956, "step": 14900 }, { "epoch": 0.683814418796751, "grad_norm": 0.457587867975235, "learning_rate": 8.92974182876739e-06, "loss": 0.3595, "step": 14901 }, { "epoch": 0.6838603093020054, "grad_norm": 0.43467581272125244, "learning_rate": 8.929590226287575e-06, "loss": 0.3868, "step": 14902 }, { "epoch": 0.6839061998072599, "grad_norm": 0.458671510219574, "learning_rate": 8.929438614358335e-06, "loss": 0.4591, "step": 14903 }, { "epoch": 0.6839520903125144, "grad_norm": 0.43546923995018005, "learning_rate": 8.929286992980035e-06, "loss": 0.3952, "step": 14904 }, { "epoch": 0.6839979808177689, "grad_norm": 0.44588080048561096, "learning_rate": 8.92913536215304e-06, "loss": 0.4154, "step": 14905 }, { "epoch": 0.6840438713230232, "grad_norm": 0.44860562682151794, "learning_rate": 8.928983721877713e-06, "loss": 0.3873, "step": 14906 }, { "epoch": 0.6840897618282777, "grad_norm": 0.44021889567375183, "learning_rate": 8.928832072154421e-06, "loss": 0.3916, "step": 14907 }, { "epoch": 0.6841356523335322, "grad_norm": 0.448893278837204, "learning_rate": 8.928680412983527e-06, "loss": 0.4228, "step": 14908 }, { "epoch": 0.6841815428387866, "grad_norm": 0.4396386444568634, "learning_rate": 8.9285287443654e-06, "loss": 0.3549, "step": 14909 }, { "epoch": 0.6842274333440411, "grad_norm": 0.4707038998603821, "learning_rate": 8.928377066300397e-06, "loss": 0.4569, "step": 14910 }, { "epoch": 0.6842733238492956, "grad_norm": 0.43428173661231995, "learning_rate": 8.92822537878889e-06, "loss": 0.3333, "step": 14911 }, { "epoch": 0.68431921435455, "grad_norm": 0.46840330958366394, "learning_rate": 8.928073681831239e-06, "loss": 0.4366, "step": 14912 }, { "epoch": 0.6843651048598045, "grad_norm": 0.47723808884620667, "learning_rate": 8.927921975427813e-06, "loss": 0.4582, "step": 14913 }, { "epoch": 0.684410995365059, "grad_norm": 0.44206565618515015, "learning_rate": 8.927770259578972e-06, "loss": 0.3392, "step": 14914 }, { "epoch": 0.6844568858703134, "grad_norm": 0.40152665972709656, "learning_rate": 8.927618534285083e-06, "loss": 0.3133, "step": 14915 }, { "epoch": 0.6845027763755679, "grad_norm": 0.44860610365867615, "learning_rate": 8.92746679954651e-06, "loss": 0.4498, "step": 14916 }, { "epoch": 0.6845486668808224, "grad_norm": 0.4252551794052124, "learning_rate": 8.927315055363623e-06, "loss": 0.3075, "step": 14917 }, { "epoch": 0.6845945573860768, "grad_norm": 0.4503668546676636, "learning_rate": 8.92716330173678e-06, "loss": 0.3748, "step": 14918 }, { "epoch": 0.6846404478913313, "grad_norm": 0.4311141073703766, "learning_rate": 8.927011538666349e-06, "loss": 0.391, "step": 14919 }, { "epoch": 0.6846863383965858, "grad_norm": 0.4734487533569336, "learning_rate": 8.926859766152694e-06, "loss": 0.386, "step": 14920 }, { "epoch": 0.6847322289018402, "grad_norm": 0.4565974473953247, "learning_rate": 8.926707984196182e-06, "loss": 0.4107, "step": 14921 }, { "epoch": 0.6847781194070947, "grad_norm": 0.47143927216529846, "learning_rate": 8.926556192797175e-06, "loss": 0.4595, "step": 14922 }, { "epoch": 0.6848240099123492, "grad_norm": 0.45793333649635315, "learning_rate": 8.926404391956042e-06, "loss": 0.3985, "step": 14923 }, { "epoch": 0.6848699004176036, "grad_norm": 0.4369822144508362, "learning_rate": 8.926252581673144e-06, "loss": 0.3685, "step": 14924 }, { "epoch": 0.684915790922858, "grad_norm": 0.4429752230644226, "learning_rate": 8.926100761948848e-06, "loss": 0.385, "step": 14925 }, { "epoch": 0.6849616814281125, "grad_norm": 0.4601396918296814, "learning_rate": 8.925948932783519e-06, "loss": 0.4679, "step": 14926 }, { "epoch": 0.685007571933367, "grad_norm": 0.4582616984844208, "learning_rate": 8.925797094177521e-06, "loss": 0.4112, "step": 14927 }, { "epoch": 0.6850534624386214, "grad_norm": 0.4293476641178131, "learning_rate": 8.925645246131221e-06, "loss": 0.3497, "step": 14928 }, { "epoch": 0.6850993529438759, "grad_norm": 0.5002524852752686, "learning_rate": 8.925493388644983e-06, "loss": 0.5079, "step": 14929 }, { "epoch": 0.6851452434491304, "grad_norm": 0.4156542718410492, "learning_rate": 8.925341521719173e-06, "loss": 0.3523, "step": 14930 }, { "epoch": 0.6851911339543848, "grad_norm": 0.4862723648548126, "learning_rate": 8.925189645354154e-06, "loss": 0.3732, "step": 14931 }, { "epoch": 0.6852370244596393, "grad_norm": 0.447369247674942, "learning_rate": 8.925037759550293e-06, "loss": 0.3512, "step": 14932 }, { "epoch": 0.6852829149648938, "grad_norm": 0.41253650188446045, "learning_rate": 8.924885864307954e-06, "loss": 0.3021, "step": 14933 }, { "epoch": 0.6853288054701482, "grad_norm": 0.5070491433143616, "learning_rate": 8.924733959627505e-06, "loss": 0.5105, "step": 14934 }, { "epoch": 0.6853746959754027, "grad_norm": 0.5056631565093994, "learning_rate": 8.924582045509308e-06, "loss": 0.4951, "step": 14935 }, { "epoch": 0.6854205864806572, "grad_norm": 0.47183579206466675, "learning_rate": 8.924430121953733e-06, "loss": 0.4581, "step": 14936 }, { "epoch": 0.6854664769859116, "grad_norm": 0.4818021357059479, "learning_rate": 8.92427818896114e-06, "loss": 0.4623, "step": 14937 }, { "epoch": 0.6855123674911661, "grad_norm": 0.4583207368850708, "learning_rate": 8.924126246531896e-06, "loss": 0.4314, "step": 14938 }, { "epoch": 0.6855582579964206, "grad_norm": 0.5014256238937378, "learning_rate": 8.923974294666367e-06, "loss": 0.4633, "step": 14939 }, { "epoch": 0.685604148501675, "grad_norm": 0.4391101002693176, "learning_rate": 8.923822333364918e-06, "loss": 0.3682, "step": 14940 }, { "epoch": 0.6856500390069294, "grad_norm": 0.44086316227912903, "learning_rate": 8.923670362627917e-06, "loss": 0.3728, "step": 14941 }, { "epoch": 0.6856959295121839, "grad_norm": 0.4357971251010895, "learning_rate": 8.923518382455724e-06, "loss": 0.384, "step": 14942 }, { "epoch": 0.6857418200174384, "grad_norm": 0.45085909962654114, "learning_rate": 8.92336639284871e-06, "loss": 0.38, "step": 14943 }, { "epoch": 0.6857877105226928, "grad_norm": 0.464751660823822, "learning_rate": 8.923214393807236e-06, "loss": 0.4683, "step": 14944 }, { "epoch": 0.6858336010279473, "grad_norm": 0.4326508045196533, "learning_rate": 8.923062385331671e-06, "loss": 0.4027, "step": 14945 }, { "epoch": 0.6858794915332018, "grad_norm": 0.44074225425720215, "learning_rate": 8.922910367422379e-06, "loss": 0.3505, "step": 14946 }, { "epoch": 0.6859253820384562, "grad_norm": 0.4517102539539337, "learning_rate": 8.922758340079724e-06, "loss": 0.391, "step": 14947 }, { "epoch": 0.6859712725437107, "grad_norm": 0.4954013228416443, "learning_rate": 8.922606303304075e-06, "loss": 0.4792, "step": 14948 }, { "epoch": 0.6860171630489652, "grad_norm": 0.4592500925064087, "learning_rate": 8.922454257095796e-06, "loss": 0.4164, "step": 14949 }, { "epoch": 0.6860630535542196, "grad_norm": 0.4353947639465332, "learning_rate": 8.922302201455253e-06, "loss": 0.3573, "step": 14950 }, { "epoch": 0.6861089440594741, "grad_norm": 0.44320446252822876, "learning_rate": 8.92215013638281e-06, "loss": 0.3776, "step": 14951 }, { "epoch": 0.6861548345647286, "grad_norm": 0.4774834215641022, "learning_rate": 8.921998061878833e-06, "loss": 0.364, "step": 14952 }, { "epoch": 0.686200725069983, "grad_norm": 0.46389034390449524, "learning_rate": 8.921845977943691e-06, "loss": 0.4105, "step": 14953 }, { "epoch": 0.6862466155752375, "grad_norm": 0.4633306860923767, "learning_rate": 8.921693884577744e-06, "loss": 0.3347, "step": 14954 }, { "epoch": 0.686292506080492, "grad_norm": 0.4615379273891449, "learning_rate": 8.921541781781362e-06, "loss": 0.3844, "step": 14955 }, { "epoch": 0.6863383965857464, "grad_norm": 0.4374758303165436, "learning_rate": 8.921389669554912e-06, "loss": 0.4007, "step": 14956 }, { "epoch": 0.6863842870910009, "grad_norm": 0.44353294372558594, "learning_rate": 8.921237547898755e-06, "loss": 0.3565, "step": 14957 }, { "epoch": 0.6864301775962554, "grad_norm": 0.4245252311229706, "learning_rate": 8.92108541681326e-06, "loss": 0.3584, "step": 14958 }, { "epoch": 0.6864760681015099, "grad_norm": 0.43870529532432556, "learning_rate": 8.920933276298791e-06, "loss": 0.3811, "step": 14959 }, { "epoch": 0.6865219586067642, "grad_norm": 0.4415648579597473, "learning_rate": 8.920781126355717e-06, "loss": 0.3679, "step": 14960 }, { "epoch": 0.6865678491120187, "grad_norm": 0.4761502146720886, "learning_rate": 8.920628966984402e-06, "loss": 0.4011, "step": 14961 }, { "epoch": 0.6866137396172732, "grad_norm": 0.4760671854019165, "learning_rate": 8.920476798185209e-06, "loss": 0.4554, "step": 14962 }, { "epoch": 0.6866596301225276, "grad_norm": 0.4473685622215271, "learning_rate": 8.920324619958509e-06, "loss": 0.4023, "step": 14963 }, { "epoch": 0.6867055206277821, "grad_norm": 0.4060588479042053, "learning_rate": 8.920172432304664e-06, "loss": 0.2982, "step": 14964 }, { "epoch": 0.6867514111330366, "grad_norm": 0.4596617519855499, "learning_rate": 8.920020235224044e-06, "loss": 0.3659, "step": 14965 }, { "epoch": 0.686797301638291, "grad_norm": 0.45741286873817444, "learning_rate": 8.919868028717009e-06, "loss": 0.4374, "step": 14966 }, { "epoch": 0.6868431921435455, "grad_norm": 0.41568976640701294, "learning_rate": 8.91971581278393e-06, "loss": 0.2884, "step": 14967 }, { "epoch": 0.6868890826488, "grad_norm": 0.484148770570755, "learning_rate": 8.919563587425173e-06, "loss": 0.4862, "step": 14968 }, { "epoch": 0.6869349731540544, "grad_norm": 0.5416460633277893, "learning_rate": 8.919411352641102e-06, "loss": 0.442, "step": 14969 }, { "epoch": 0.6869808636593089, "grad_norm": 0.4361247718334198, "learning_rate": 8.919259108432083e-06, "loss": 0.3608, "step": 14970 }, { "epoch": 0.6870267541645634, "grad_norm": 0.4695515036582947, "learning_rate": 8.919106854798483e-06, "loss": 0.4105, "step": 14971 }, { "epoch": 0.6870726446698178, "grad_norm": 0.42950406670570374, "learning_rate": 8.918954591740667e-06, "loss": 0.3842, "step": 14972 }, { "epoch": 0.6871185351750723, "grad_norm": 0.48853814601898193, "learning_rate": 8.918802319259004e-06, "loss": 0.4699, "step": 14973 }, { "epoch": 0.6871644256803268, "grad_norm": 0.46227094531059265, "learning_rate": 8.918650037353858e-06, "loss": 0.4438, "step": 14974 }, { "epoch": 0.6872103161855811, "grad_norm": 0.42016327381134033, "learning_rate": 8.918497746025594e-06, "loss": 0.3694, "step": 14975 }, { "epoch": 0.6872562066908356, "grad_norm": 0.45480069518089294, "learning_rate": 8.91834544527458e-06, "loss": 0.3921, "step": 14976 }, { "epoch": 0.6873020971960901, "grad_norm": 0.47756290435791016, "learning_rate": 8.918193135101184e-06, "loss": 0.4539, "step": 14977 }, { "epoch": 0.6873479877013446, "grad_norm": 0.4812740087509155, "learning_rate": 8.918040815505767e-06, "loss": 0.4032, "step": 14978 }, { "epoch": 0.687393878206599, "grad_norm": 0.46387481689453125, "learning_rate": 8.9178884864887e-06, "loss": 0.4336, "step": 14979 }, { "epoch": 0.6874397687118535, "grad_norm": 0.4229836165904999, "learning_rate": 8.917736148050347e-06, "loss": 0.3388, "step": 14980 }, { "epoch": 0.687485659217108, "grad_norm": 0.42387086153030396, "learning_rate": 8.917583800191076e-06, "loss": 0.3531, "step": 14981 }, { "epoch": 0.6875315497223624, "grad_norm": 0.46882203221321106, "learning_rate": 8.917431442911251e-06, "loss": 0.4454, "step": 14982 }, { "epoch": 0.6875774402276169, "grad_norm": 0.49281471967697144, "learning_rate": 8.917279076211242e-06, "loss": 0.4341, "step": 14983 }, { "epoch": 0.6876233307328714, "grad_norm": 0.46577614545822144, "learning_rate": 8.917126700091412e-06, "loss": 0.3798, "step": 14984 }, { "epoch": 0.6876692212381258, "grad_norm": 0.4871450960636139, "learning_rate": 8.916974314552128e-06, "loss": 0.4174, "step": 14985 }, { "epoch": 0.6877151117433803, "grad_norm": 0.501464307308197, "learning_rate": 8.916821919593757e-06, "loss": 0.5031, "step": 14986 }, { "epoch": 0.6877610022486348, "grad_norm": 0.5052502751350403, "learning_rate": 8.916669515216664e-06, "loss": 0.385, "step": 14987 }, { "epoch": 0.6878068927538892, "grad_norm": 0.423660010099411, "learning_rate": 8.916517101421221e-06, "loss": 0.3372, "step": 14988 }, { "epoch": 0.6878527832591437, "grad_norm": 0.4601968824863434, "learning_rate": 8.916364678207788e-06, "loss": 0.3893, "step": 14989 }, { "epoch": 0.6878986737643982, "grad_norm": 0.4666600823402405, "learning_rate": 8.916212245576734e-06, "loss": 0.3893, "step": 14990 }, { "epoch": 0.6879445642696526, "grad_norm": 0.4620806872844696, "learning_rate": 8.916059803528426e-06, "loss": 0.4488, "step": 14991 }, { "epoch": 0.6879904547749071, "grad_norm": 0.42871516942977905, "learning_rate": 8.91590735206323e-06, "loss": 0.3543, "step": 14992 }, { "epoch": 0.6880363452801616, "grad_norm": 0.43291613459587097, "learning_rate": 8.915754891181512e-06, "loss": 0.3422, "step": 14993 }, { "epoch": 0.688082235785416, "grad_norm": 0.46761688590049744, "learning_rate": 8.91560242088364e-06, "loss": 0.3907, "step": 14994 }, { "epoch": 0.6881281262906704, "grad_norm": 0.513192892074585, "learning_rate": 8.91544994116998e-06, "loss": 0.4788, "step": 14995 }, { "epoch": 0.6881740167959249, "grad_norm": 0.4781024158000946, "learning_rate": 8.915297452040899e-06, "loss": 0.4213, "step": 14996 }, { "epoch": 0.6882199073011794, "grad_norm": 0.4775366485118866, "learning_rate": 8.915144953496763e-06, "loss": 0.4836, "step": 14997 }, { "epoch": 0.6882657978064338, "grad_norm": 0.45348578691482544, "learning_rate": 8.91499244553794e-06, "loss": 0.3806, "step": 14998 }, { "epoch": 0.6883116883116883, "grad_norm": 0.4858158230781555, "learning_rate": 8.914839928164793e-06, "loss": 0.391, "step": 14999 }, { "epoch": 0.6883575788169428, "grad_norm": 0.4520188868045807, "learning_rate": 8.914687401377693e-06, "loss": 0.3567, "step": 15000 }, { "epoch": 0.6884034693221972, "grad_norm": 0.4315658211708069, "learning_rate": 8.914534865177005e-06, "loss": 0.3568, "step": 15001 }, { "epoch": 0.6884493598274517, "grad_norm": 0.4763556718826294, "learning_rate": 8.914382319563098e-06, "loss": 0.4319, "step": 15002 }, { "epoch": 0.6884952503327062, "grad_norm": 0.4684775173664093, "learning_rate": 8.914229764536335e-06, "loss": 0.4401, "step": 15003 }, { "epoch": 0.6885411408379606, "grad_norm": 0.4878898561000824, "learning_rate": 8.914077200097085e-06, "loss": 0.478, "step": 15004 }, { "epoch": 0.6885870313432151, "grad_norm": 0.4497095048427582, "learning_rate": 8.913924626245715e-06, "loss": 0.3742, "step": 15005 }, { "epoch": 0.6886329218484696, "grad_norm": 0.4629368782043457, "learning_rate": 8.913772042982593e-06, "loss": 0.4273, "step": 15006 }, { "epoch": 0.688678812353724, "grad_norm": 0.45213782787323, "learning_rate": 8.913619450308083e-06, "loss": 0.3808, "step": 15007 }, { "epoch": 0.6887247028589785, "grad_norm": 0.468328595161438, "learning_rate": 8.913466848222553e-06, "loss": 0.3621, "step": 15008 }, { "epoch": 0.688770593364233, "grad_norm": 0.48691269755363464, "learning_rate": 8.91331423672637e-06, "loss": 0.4772, "step": 15009 }, { "epoch": 0.6888164838694874, "grad_norm": 0.4540717601776123, "learning_rate": 8.913161615819903e-06, "loss": 0.3953, "step": 15010 }, { "epoch": 0.6888623743747418, "grad_norm": 0.507427453994751, "learning_rate": 8.913008985503516e-06, "loss": 0.4699, "step": 15011 }, { "epoch": 0.6889082648799963, "grad_norm": 0.515504777431488, "learning_rate": 8.912856345777579e-06, "loss": 0.5051, "step": 15012 }, { "epoch": 0.6889541553852508, "grad_norm": 0.4142671525478363, "learning_rate": 8.912703696642456e-06, "loss": 0.317, "step": 15013 }, { "epoch": 0.6890000458905052, "grad_norm": 0.43825942277908325, "learning_rate": 8.912551038098516e-06, "loss": 0.3791, "step": 15014 }, { "epoch": 0.6890459363957597, "grad_norm": 0.48192837834358215, "learning_rate": 8.912398370146124e-06, "loss": 0.3631, "step": 15015 }, { "epoch": 0.6890918269010142, "grad_norm": 0.4621369242668152, "learning_rate": 8.912245692785651e-06, "loss": 0.4348, "step": 15016 }, { "epoch": 0.6891377174062686, "grad_norm": 0.4731169641017914, "learning_rate": 8.91209300601746e-06, "loss": 0.4126, "step": 15017 }, { "epoch": 0.6891836079115231, "grad_norm": 0.4565713405609131, "learning_rate": 8.911940309841923e-06, "loss": 0.3899, "step": 15018 }, { "epoch": 0.6892294984167776, "grad_norm": 0.45031362771987915, "learning_rate": 8.9117876042594e-06, "loss": 0.3908, "step": 15019 }, { "epoch": 0.689275388922032, "grad_norm": 0.46312087774276733, "learning_rate": 8.911634889270265e-06, "loss": 0.403, "step": 15020 }, { "epoch": 0.6893212794272865, "grad_norm": 0.4490703046321869, "learning_rate": 8.911482164874884e-06, "loss": 0.3989, "step": 15021 }, { "epoch": 0.689367169932541, "grad_norm": 0.4504191279411316, "learning_rate": 8.911329431073618e-06, "loss": 0.4131, "step": 15022 }, { "epoch": 0.6894130604377954, "grad_norm": 0.48080718517303467, "learning_rate": 8.911176687866844e-06, "loss": 0.418, "step": 15023 }, { "epoch": 0.6894589509430499, "grad_norm": 0.4744383692741394, "learning_rate": 8.911023935254921e-06, "loss": 0.4024, "step": 15024 }, { "epoch": 0.6895048414483044, "grad_norm": 0.42912599444389343, "learning_rate": 8.910871173238223e-06, "loss": 0.3842, "step": 15025 }, { "epoch": 0.6895507319535588, "grad_norm": 0.4116972088813782, "learning_rate": 8.910718401817113e-06, "loss": 0.3377, "step": 15026 }, { "epoch": 0.6895966224588133, "grad_norm": 0.4719600975513458, "learning_rate": 8.910565620991958e-06, "loss": 0.4582, "step": 15027 }, { "epoch": 0.6896425129640678, "grad_norm": 0.45434215664863586, "learning_rate": 8.910412830763128e-06, "loss": 0.4046, "step": 15028 }, { "epoch": 0.6896884034693221, "grad_norm": 0.4496857523918152, "learning_rate": 8.91026003113099e-06, "loss": 0.3962, "step": 15029 }, { "epoch": 0.6897342939745766, "grad_norm": 0.4435851275920868, "learning_rate": 8.91010722209591e-06, "loss": 0.4155, "step": 15030 }, { "epoch": 0.6897801844798311, "grad_norm": 0.44755688309669495, "learning_rate": 8.909954403658256e-06, "loss": 0.3505, "step": 15031 }, { "epoch": 0.6898260749850856, "grad_norm": 0.5074527859687805, "learning_rate": 8.909801575818397e-06, "loss": 0.5101, "step": 15032 }, { "epoch": 0.68987196549034, "grad_norm": 0.47744691371917725, "learning_rate": 8.909648738576697e-06, "loss": 0.4434, "step": 15033 }, { "epoch": 0.6899178559955945, "grad_norm": 0.6416549682617188, "learning_rate": 8.909495891933526e-06, "loss": 0.3874, "step": 15034 }, { "epoch": 0.689963746500849, "grad_norm": 0.43551865220069885, "learning_rate": 8.909343035889252e-06, "loss": 0.4028, "step": 15035 }, { "epoch": 0.6900096370061034, "grad_norm": 0.4496057331562042, "learning_rate": 8.909190170444242e-06, "loss": 0.3852, "step": 15036 }, { "epoch": 0.6900555275113579, "grad_norm": 0.4212131202220917, "learning_rate": 8.909037295598864e-06, "loss": 0.3642, "step": 15037 }, { "epoch": 0.6901014180166124, "grad_norm": 0.4716685116291046, "learning_rate": 8.908884411353485e-06, "loss": 0.4415, "step": 15038 }, { "epoch": 0.6901473085218668, "grad_norm": 0.4818684160709381, "learning_rate": 8.908731517708472e-06, "loss": 0.4558, "step": 15039 }, { "epoch": 0.6901931990271213, "grad_norm": 0.4738227427005768, "learning_rate": 8.908578614664194e-06, "loss": 0.4122, "step": 15040 }, { "epoch": 0.6902390895323758, "grad_norm": 0.47754526138305664, "learning_rate": 8.908425702221016e-06, "loss": 0.4358, "step": 15041 }, { "epoch": 0.6902849800376302, "grad_norm": 0.4478150010108948, "learning_rate": 8.90827278037931e-06, "loss": 0.4292, "step": 15042 }, { "epoch": 0.6903308705428847, "grad_norm": 0.4927135407924652, "learning_rate": 8.908119849139441e-06, "loss": 0.4963, "step": 15043 }, { "epoch": 0.6903767610481392, "grad_norm": 0.469346284866333, "learning_rate": 8.907966908501775e-06, "loss": 0.4111, "step": 15044 }, { "epoch": 0.6904226515533936, "grad_norm": 0.45989370346069336, "learning_rate": 8.907813958466684e-06, "loss": 0.4432, "step": 15045 }, { "epoch": 0.690468542058648, "grad_norm": 0.44060951471328735, "learning_rate": 8.907660999034534e-06, "loss": 0.343, "step": 15046 }, { "epoch": 0.6905144325639025, "grad_norm": 0.45614075660705566, "learning_rate": 8.907508030205692e-06, "loss": 0.4079, "step": 15047 }, { "epoch": 0.690560323069157, "grad_norm": 0.42114952206611633, "learning_rate": 8.907355051980525e-06, "loss": 0.341, "step": 15048 }, { "epoch": 0.6906062135744114, "grad_norm": 0.42861485481262207, "learning_rate": 8.907202064359405e-06, "loss": 0.3322, "step": 15049 }, { "epoch": 0.6906521040796659, "grad_norm": 0.43844321370124817, "learning_rate": 8.907049067342697e-06, "loss": 0.3592, "step": 15050 }, { "epoch": 0.6906979945849204, "grad_norm": 0.40976181626319885, "learning_rate": 8.906896060930768e-06, "loss": 0.3518, "step": 15051 }, { "epoch": 0.6907438850901748, "grad_norm": 0.4560471177101135, "learning_rate": 8.906743045123986e-06, "loss": 0.4374, "step": 15052 }, { "epoch": 0.6907897755954293, "grad_norm": 0.4475359320640564, "learning_rate": 8.906590019922721e-06, "loss": 0.3923, "step": 15053 }, { "epoch": 0.6908356661006838, "grad_norm": 0.47006359696388245, "learning_rate": 8.90643698532734e-06, "loss": 0.3878, "step": 15054 }, { "epoch": 0.6908815566059382, "grad_norm": 0.4970708191394806, "learning_rate": 8.906283941338212e-06, "loss": 0.4285, "step": 15055 }, { "epoch": 0.6909274471111927, "grad_norm": 0.42144298553466797, "learning_rate": 8.906130887955702e-06, "loss": 0.3216, "step": 15056 }, { "epoch": 0.6909733376164472, "grad_norm": 0.4323214590549469, "learning_rate": 8.905977825180181e-06, "loss": 0.3316, "step": 15057 }, { "epoch": 0.6910192281217016, "grad_norm": 0.4509257674217224, "learning_rate": 8.905824753012015e-06, "loss": 0.3824, "step": 15058 }, { "epoch": 0.6910651186269561, "grad_norm": 0.4705160856246948, "learning_rate": 8.905671671451577e-06, "loss": 0.425, "step": 15059 }, { "epoch": 0.6911110091322106, "grad_norm": 0.45417124032974243, "learning_rate": 8.905518580499228e-06, "loss": 0.3772, "step": 15060 }, { "epoch": 0.691156899637465, "grad_norm": 0.4337405264377594, "learning_rate": 8.90536548015534e-06, "loss": 0.3679, "step": 15061 }, { "epoch": 0.6912027901427195, "grad_norm": 0.47078076004981995, "learning_rate": 8.90521237042028e-06, "loss": 0.4144, "step": 15062 }, { "epoch": 0.691248680647974, "grad_norm": 0.47007036209106445, "learning_rate": 8.905059251294416e-06, "loss": 0.3477, "step": 15063 }, { "epoch": 0.6912945711532283, "grad_norm": 0.4606531858444214, "learning_rate": 8.90490612277812e-06, "loss": 0.3665, "step": 15064 }, { "epoch": 0.6913404616584828, "grad_norm": 0.4860847294330597, "learning_rate": 8.904752984871756e-06, "loss": 0.4713, "step": 15065 }, { "epoch": 0.6913863521637373, "grad_norm": 0.44397976994514465, "learning_rate": 8.904599837575692e-06, "loss": 0.3735, "step": 15066 }, { "epoch": 0.6914322426689918, "grad_norm": 0.42582622170448303, "learning_rate": 8.904446680890299e-06, "loss": 0.3328, "step": 15067 }, { "epoch": 0.6914781331742462, "grad_norm": 0.41647234559059143, "learning_rate": 8.904293514815944e-06, "loss": 0.3264, "step": 15068 }, { "epoch": 0.6915240236795007, "grad_norm": 0.4586029052734375, "learning_rate": 8.904140339352994e-06, "loss": 0.4058, "step": 15069 }, { "epoch": 0.6915699141847552, "grad_norm": 0.43757638335227966, "learning_rate": 8.903987154501821e-06, "loss": 0.3785, "step": 15070 }, { "epoch": 0.6916158046900096, "grad_norm": 0.44139403104782104, "learning_rate": 8.903833960262788e-06, "loss": 0.3863, "step": 15071 }, { "epoch": 0.6916616951952641, "grad_norm": 0.430138498544693, "learning_rate": 8.90368075663627e-06, "loss": 0.3448, "step": 15072 }, { "epoch": 0.6917075857005186, "grad_norm": 0.46243876218795776, "learning_rate": 8.90352754362263e-06, "loss": 0.4137, "step": 15073 }, { "epoch": 0.691753476205773, "grad_norm": 0.451144278049469, "learning_rate": 8.903374321222238e-06, "loss": 0.3687, "step": 15074 }, { "epoch": 0.6917993667110275, "grad_norm": 0.44124680757522583, "learning_rate": 8.903221089435463e-06, "loss": 0.4067, "step": 15075 }, { "epoch": 0.691845257216282, "grad_norm": 0.5448135137557983, "learning_rate": 8.903067848262672e-06, "loss": 0.4468, "step": 15076 }, { "epoch": 0.6918911477215364, "grad_norm": 0.4734453856945038, "learning_rate": 8.902914597704236e-06, "loss": 0.4131, "step": 15077 }, { "epoch": 0.6919370382267909, "grad_norm": 0.43498197197914124, "learning_rate": 8.90276133776052e-06, "loss": 0.3443, "step": 15078 }, { "epoch": 0.6919829287320454, "grad_norm": 0.48123863339424133, "learning_rate": 8.902608068431899e-06, "loss": 0.4618, "step": 15079 }, { "epoch": 0.6920288192372998, "grad_norm": 0.5311402082443237, "learning_rate": 8.902454789718735e-06, "loss": 0.6245, "step": 15080 }, { "epoch": 0.6920747097425543, "grad_norm": 0.47681382298469543, "learning_rate": 8.9023015016214e-06, "loss": 0.4363, "step": 15081 }, { "epoch": 0.6921206002478087, "grad_norm": 0.4866882264614105, "learning_rate": 8.902148204140258e-06, "loss": 0.4313, "step": 15082 }, { "epoch": 0.6921664907530632, "grad_norm": 0.4718891382217407, "learning_rate": 8.901994897275685e-06, "loss": 0.4181, "step": 15083 }, { "epoch": 0.6922123812583176, "grad_norm": 0.4618433117866516, "learning_rate": 8.901841581028043e-06, "loss": 0.4029, "step": 15084 }, { "epoch": 0.6922582717635721, "grad_norm": 0.44824185967445374, "learning_rate": 8.901688255397705e-06, "loss": 0.3769, "step": 15085 }, { "epoch": 0.6923041622688266, "grad_norm": 0.4765690565109253, "learning_rate": 8.901534920385037e-06, "loss": 0.4352, "step": 15086 }, { "epoch": 0.692350052774081, "grad_norm": 0.4359038472175598, "learning_rate": 8.90138157599041e-06, "loss": 0.3808, "step": 15087 }, { "epoch": 0.6923959432793355, "grad_norm": 0.43580570816993713, "learning_rate": 8.901228222214191e-06, "loss": 0.3701, "step": 15088 }, { "epoch": 0.69244183378459, "grad_norm": 0.4450821876525879, "learning_rate": 8.90107485905675e-06, "loss": 0.3581, "step": 15089 }, { "epoch": 0.6924877242898444, "grad_norm": 0.46013516187667847, "learning_rate": 8.900921486518456e-06, "loss": 0.3828, "step": 15090 }, { "epoch": 0.6925336147950989, "grad_norm": 0.4831649661064148, "learning_rate": 8.900768104599676e-06, "loss": 0.4514, "step": 15091 }, { "epoch": 0.6925795053003534, "grad_norm": 0.46518245339393616, "learning_rate": 8.900614713300779e-06, "loss": 0.4314, "step": 15092 }, { "epoch": 0.6926253958056078, "grad_norm": 0.4164680242538452, "learning_rate": 8.900461312622135e-06, "loss": 0.3519, "step": 15093 }, { "epoch": 0.6926712863108623, "grad_norm": 0.4460037052631378, "learning_rate": 8.900307902564113e-06, "loss": 0.3734, "step": 15094 }, { "epoch": 0.6927171768161168, "grad_norm": 0.4843788743019104, "learning_rate": 8.900154483127082e-06, "loss": 0.4899, "step": 15095 }, { "epoch": 0.6927630673213712, "grad_norm": 0.48087796568870544, "learning_rate": 8.90000105431141e-06, "loss": 0.454, "step": 15096 }, { "epoch": 0.6928089578266257, "grad_norm": 0.49372223019599915, "learning_rate": 8.899847616117466e-06, "loss": 0.4397, "step": 15097 }, { "epoch": 0.6928548483318802, "grad_norm": 0.4934097230434418, "learning_rate": 8.89969416854562e-06, "loss": 0.4248, "step": 15098 }, { "epoch": 0.6929007388371345, "grad_norm": 0.45487314462661743, "learning_rate": 8.89954071159624e-06, "loss": 0.3673, "step": 15099 }, { "epoch": 0.692946629342389, "grad_norm": 0.5339693427085876, "learning_rate": 8.899387245269695e-06, "loss": 0.4942, "step": 15100 }, { "epoch": 0.6929925198476435, "grad_norm": 0.4568933844566345, "learning_rate": 8.899233769566353e-06, "loss": 0.4216, "step": 15101 }, { "epoch": 0.693038410352898, "grad_norm": 0.45888862013816833, "learning_rate": 8.899080284486586e-06, "loss": 0.3464, "step": 15102 }, { "epoch": 0.6930843008581524, "grad_norm": 0.4929792582988739, "learning_rate": 8.898926790030762e-06, "loss": 0.4545, "step": 15103 }, { "epoch": 0.6931301913634069, "grad_norm": 0.4200250506401062, "learning_rate": 8.89877328619925e-06, "loss": 0.3314, "step": 15104 }, { "epoch": 0.6931760818686614, "grad_norm": 0.49718424677848816, "learning_rate": 8.898619772992417e-06, "loss": 0.4436, "step": 15105 }, { "epoch": 0.6932219723739158, "grad_norm": 0.4373233914375305, "learning_rate": 8.898466250410634e-06, "loss": 0.3507, "step": 15106 }, { "epoch": 0.6932678628791703, "grad_norm": 0.4857986569404602, "learning_rate": 8.898312718454271e-06, "loss": 0.3894, "step": 15107 }, { "epoch": 0.6933137533844248, "grad_norm": 0.45529282093048096, "learning_rate": 8.898159177123697e-06, "loss": 0.3844, "step": 15108 }, { "epoch": 0.6933596438896792, "grad_norm": 0.46354246139526367, "learning_rate": 8.898005626419278e-06, "loss": 0.3848, "step": 15109 }, { "epoch": 0.6934055343949337, "grad_norm": 0.4467262029647827, "learning_rate": 8.897852066341388e-06, "loss": 0.3708, "step": 15110 }, { "epoch": 0.6934514249001882, "grad_norm": 0.47388315200805664, "learning_rate": 8.897698496890392e-06, "loss": 0.46, "step": 15111 }, { "epoch": 0.6934973154054426, "grad_norm": 0.43150198459625244, "learning_rate": 8.897544918066663e-06, "loss": 0.3362, "step": 15112 }, { "epoch": 0.6935432059106971, "grad_norm": 0.4536038935184479, "learning_rate": 8.897391329870569e-06, "loss": 0.368, "step": 15113 }, { "epoch": 0.6935890964159516, "grad_norm": 0.4295598864555359, "learning_rate": 8.897237732302477e-06, "loss": 0.3591, "step": 15114 }, { "epoch": 0.693634986921206, "grad_norm": 0.47307121753692627, "learning_rate": 8.897084125362758e-06, "loss": 0.4564, "step": 15115 }, { "epoch": 0.6936808774264605, "grad_norm": 0.44129276275634766, "learning_rate": 8.896930509051783e-06, "loss": 0.3645, "step": 15116 }, { "epoch": 0.693726767931715, "grad_norm": 0.48488351702690125, "learning_rate": 8.896776883369919e-06, "loss": 0.4329, "step": 15117 }, { "epoch": 0.6937726584369693, "grad_norm": 0.4221949279308319, "learning_rate": 8.896623248317538e-06, "loss": 0.3324, "step": 15118 }, { "epoch": 0.6938185489422238, "grad_norm": 0.4085926115512848, "learning_rate": 8.896469603895007e-06, "loss": 0.3358, "step": 15119 }, { "epoch": 0.6938644394474783, "grad_norm": 0.4322345554828644, "learning_rate": 8.896315950102697e-06, "loss": 0.3938, "step": 15120 }, { "epoch": 0.6939103299527328, "grad_norm": 0.4339832663536072, "learning_rate": 8.896162286940975e-06, "loss": 0.3829, "step": 15121 }, { "epoch": 0.6939562204579872, "grad_norm": 0.4838652014732361, "learning_rate": 8.896008614410215e-06, "loss": 0.4948, "step": 15122 }, { "epoch": 0.6940021109632417, "grad_norm": 0.4479416608810425, "learning_rate": 8.895854932510782e-06, "loss": 0.3512, "step": 15123 }, { "epoch": 0.6940480014684962, "grad_norm": 0.46026772260665894, "learning_rate": 8.895701241243048e-06, "loss": 0.4037, "step": 15124 }, { "epoch": 0.6940938919737506, "grad_norm": 0.4251273572444916, "learning_rate": 8.895547540607383e-06, "loss": 0.333, "step": 15125 }, { "epoch": 0.6941397824790051, "grad_norm": 0.4681026041507721, "learning_rate": 8.895393830604152e-06, "loss": 0.4454, "step": 15126 }, { "epoch": 0.6941856729842596, "grad_norm": 0.4373570382595062, "learning_rate": 8.895240111233734e-06, "loss": 0.4031, "step": 15127 }, { "epoch": 0.694231563489514, "grad_norm": 0.4509614408016205, "learning_rate": 8.89508638249649e-06, "loss": 0.4005, "step": 15128 }, { "epoch": 0.6942774539947685, "grad_norm": 0.4581938683986664, "learning_rate": 8.89493264439279e-06, "loss": 0.4054, "step": 15129 }, { "epoch": 0.694323344500023, "grad_norm": 0.4731428921222687, "learning_rate": 8.894778896923011e-06, "loss": 0.4397, "step": 15130 }, { "epoch": 0.6943692350052774, "grad_norm": 0.4233674108982086, "learning_rate": 8.894625140087514e-06, "loss": 0.3535, "step": 15131 }, { "epoch": 0.6944151255105319, "grad_norm": 0.4344710409641266, "learning_rate": 8.894471373886675e-06, "loss": 0.3771, "step": 15132 }, { "epoch": 0.6944610160157864, "grad_norm": 0.45964351296424866, "learning_rate": 8.89431759832086e-06, "loss": 0.4419, "step": 15133 }, { "epoch": 0.6945069065210407, "grad_norm": 0.4150189459323883, "learning_rate": 8.894163813390441e-06, "loss": 0.335, "step": 15134 }, { "epoch": 0.6945527970262952, "grad_norm": 0.4308984577655792, "learning_rate": 8.89401001909579e-06, "loss": 0.3139, "step": 15135 }, { "epoch": 0.6945986875315497, "grad_norm": 0.48209941387176514, "learning_rate": 8.89385621543727e-06, "loss": 0.4233, "step": 15136 }, { "epoch": 0.6946445780368042, "grad_norm": 0.4352678954601288, "learning_rate": 8.893702402415255e-06, "loss": 0.3792, "step": 15137 }, { "epoch": 0.6946904685420586, "grad_norm": 0.4760005474090576, "learning_rate": 8.893548580030116e-06, "loss": 0.4458, "step": 15138 }, { "epoch": 0.6947363590473131, "grad_norm": 0.46707314252853394, "learning_rate": 8.89339474828222e-06, "loss": 0.4116, "step": 15139 }, { "epoch": 0.6947822495525676, "grad_norm": 0.44792258739471436, "learning_rate": 8.89324090717194e-06, "loss": 0.4397, "step": 15140 }, { "epoch": 0.694828140057822, "grad_norm": 0.4552158713340759, "learning_rate": 8.893087056699643e-06, "loss": 0.403, "step": 15141 }, { "epoch": 0.6948740305630765, "grad_norm": 0.4578076899051666, "learning_rate": 8.892933196865701e-06, "loss": 0.3688, "step": 15142 }, { "epoch": 0.694919921068331, "grad_norm": 0.45841383934020996, "learning_rate": 8.892779327670484e-06, "loss": 0.3899, "step": 15143 }, { "epoch": 0.6949658115735854, "grad_norm": 0.43301504850387573, "learning_rate": 8.89262544911436e-06, "loss": 0.3611, "step": 15144 }, { "epoch": 0.6950117020788399, "grad_norm": 0.4400251805782318, "learning_rate": 8.892471561197702e-06, "loss": 0.3719, "step": 15145 }, { "epoch": 0.6950575925840944, "grad_norm": 0.4428766965866089, "learning_rate": 8.892317663920878e-06, "loss": 0.4023, "step": 15146 }, { "epoch": 0.6951034830893488, "grad_norm": 0.4433557689189911, "learning_rate": 8.892163757284257e-06, "loss": 0.4037, "step": 15147 }, { "epoch": 0.6951493735946033, "grad_norm": 0.44974005222320557, "learning_rate": 8.892009841288212e-06, "loss": 0.359, "step": 15148 }, { "epoch": 0.6951952640998578, "grad_norm": 0.45068299770355225, "learning_rate": 8.891855915933112e-06, "loss": 0.4228, "step": 15149 }, { "epoch": 0.6952411546051122, "grad_norm": 0.4278578758239746, "learning_rate": 8.891701981219325e-06, "loss": 0.3954, "step": 15150 }, { "epoch": 0.6952870451103667, "grad_norm": 0.4306012690067291, "learning_rate": 8.891548037147224e-06, "loss": 0.3567, "step": 15151 }, { "epoch": 0.6953329356156212, "grad_norm": 0.4072541892528534, "learning_rate": 8.891394083717177e-06, "loss": 0.3252, "step": 15152 }, { "epoch": 0.6953788261208755, "grad_norm": 0.44118452072143555, "learning_rate": 8.891240120929556e-06, "loss": 0.3963, "step": 15153 }, { "epoch": 0.69542471662613, "grad_norm": 0.4333735704421997, "learning_rate": 8.891086148784732e-06, "loss": 0.3523, "step": 15154 }, { "epoch": 0.6954706071313845, "grad_norm": 0.4640464782714844, "learning_rate": 8.890932167283074e-06, "loss": 0.4167, "step": 15155 }, { "epoch": 0.695516497636639, "grad_norm": 0.46337980031967163, "learning_rate": 8.89077817642495e-06, "loss": 0.436, "step": 15156 }, { "epoch": 0.6955623881418934, "grad_norm": 0.47425082325935364, "learning_rate": 8.890624176210734e-06, "loss": 0.436, "step": 15157 }, { "epoch": 0.6956082786471479, "grad_norm": 0.46500587463378906, "learning_rate": 8.890470166640795e-06, "loss": 0.4316, "step": 15158 }, { "epoch": 0.6956541691524024, "grad_norm": 0.48668068647384644, "learning_rate": 8.890316147715501e-06, "loss": 0.4771, "step": 15159 }, { "epoch": 0.6957000596576568, "grad_norm": 0.4826662838459015, "learning_rate": 8.890162119435226e-06, "loss": 0.4572, "step": 15160 }, { "epoch": 0.6957459501629113, "grad_norm": 0.4761209487915039, "learning_rate": 8.890008081800338e-06, "loss": 0.4245, "step": 15161 }, { "epoch": 0.6957918406681658, "grad_norm": 0.5205764770507812, "learning_rate": 8.889854034811211e-06, "loss": 0.533, "step": 15162 }, { "epoch": 0.6958377311734202, "grad_norm": 0.436120867729187, "learning_rate": 8.889699978468211e-06, "loss": 0.3431, "step": 15163 }, { "epoch": 0.6958836216786747, "grad_norm": 0.4808082580566406, "learning_rate": 8.88954591277171e-06, "loss": 0.4243, "step": 15164 }, { "epoch": 0.6959295121839292, "grad_norm": 0.41901248693466187, "learning_rate": 8.889391837722079e-06, "loss": 0.3097, "step": 15165 }, { "epoch": 0.6959754026891836, "grad_norm": 0.4621783494949341, "learning_rate": 8.889237753319688e-06, "loss": 0.4003, "step": 15166 }, { "epoch": 0.6960212931944381, "grad_norm": 0.4483255445957184, "learning_rate": 8.889083659564906e-06, "loss": 0.3621, "step": 15167 }, { "epoch": 0.6960671836996926, "grad_norm": 0.4626440703868866, "learning_rate": 8.888929556458108e-06, "loss": 0.3907, "step": 15168 }, { "epoch": 0.696113074204947, "grad_norm": 0.4225757420063019, "learning_rate": 8.88877544399966e-06, "loss": 0.3679, "step": 15169 }, { "epoch": 0.6961589647102014, "grad_norm": 0.4677918553352356, "learning_rate": 8.888621322189934e-06, "loss": 0.4134, "step": 15170 }, { "epoch": 0.6962048552154559, "grad_norm": 0.409432977437973, "learning_rate": 8.888467191029304e-06, "loss": 0.3283, "step": 15171 }, { "epoch": 0.6962507457207104, "grad_norm": 0.4502343237400055, "learning_rate": 8.888313050518134e-06, "loss": 0.4071, "step": 15172 }, { "epoch": 0.6962966362259648, "grad_norm": 0.45514896512031555, "learning_rate": 8.8881589006568e-06, "loss": 0.3861, "step": 15173 }, { "epoch": 0.6963425267312193, "grad_norm": 0.40824124217033386, "learning_rate": 8.88800474144567e-06, "loss": 0.3174, "step": 15174 }, { "epoch": 0.6963884172364738, "grad_norm": 0.4617963135242462, "learning_rate": 8.887850572885118e-06, "loss": 0.4366, "step": 15175 }, { "epoch": 0.6964343077417282, "grad_norm": 0.4800441861152649, "learning_rate": 8.88769639497551e-06, "loss": 0.4868, "step": 15176 }, { "epoch": 0.6964801982469827, "grad_norm": 0.4768453538417816, "learning_rate": 8.887542207717221e-06, "loss": 0.5377, "step": 15177 }, { "epoch": 0.6965260887522372, "grad_norm": 0.4700711965560913, "learning_rate": 8.887388011110619e-06, "loss": 0.3582, "step": 15178 }, { "epoch": 0.6965719792574916, "grad_norm": 0.45769718289375305, "learning_rate": 8.887233805156076e-06, "loss": 0.4358, "step": 15179 }, { "epoch": 0.6966178697627461, "grad_norm": 0.39754197001457214, "learning_rate": 8.887079589853962e-06, "loss": 0.2722, "step": 15180 }, { "epoch": 0.6966637602680006, "grad_norm": 0.4777667224407196, "learning_rate": 8.886925365204648e-06, "loss": 0.3749, "step": 15181 }, { "epoch": 0.696709650773255, "grad_norm": 0.49719342589378357, "learning_rate": 8.886771131208507e-06, "loss": 0.4603, "step": 15182 }, { "epoch": 0.6967555412785095, "grad_norm": 0.4033268988132477, "learning_rate": 8.886616887865906e-06, "loss": 0.271, "step": 15183 }, { "epoch": 0.696801431783764, "grad_norm": 0.4623813033103943, "learning_rate": 8.88646263517722e-06, "loss": 0.4315, "step": 15184 }, { "epoch": 0.6968473222890184, "grad_norm": 0.4295229911804199, "learning_rate": 8.886308373142816e-06, "loss": 0.3273, "step": 15185 }, { "epoch": 0.6968932127942729, "grad_norm": 0.45323947072029114, "learning_rate": 8.886154101763069e-06, "loss": 0.3642, "step": 15186 }, { "epoch": 0.6969391032995274, "grad_norm": 0.46795228123664856, "learning_rate": 8.885999821038346e-06, "loss": 0.4505, "step": 15187 }, { "epoch": 0.6969849938047817, "grad_norm": 0.4585321247577667, "learning_rate": 8.885845530969022e-06, "loss": 0.3842, "step": 15188 }, { "epoch": 0.6970308843100362, "grad_norm": 0.45521658658981323, "learning_rate": 8.885691231555464e-06, "loss": 0.3854, "step": 15189 }, { "epoch": 0.6970767748152907, "grad_norm": 0.4429137110710144, "learning_rate": 8.885536922798045e-06, "loss": 0.4124, "step": 15190 }, { "epoch": 0.6971226653205452, "grad_norm": 0.4948519468307495, "learning_rate": 8.885382604697137e-06, "loss": 0.5098, "step": 15191 }, { "epoch": 0.6971685558257996, "grad_norm": 0.42685559391975403, "learning_rate": 8.88522827725311e-06, "loss": 0.3336, "step": 15192 }, { "epoch": 0.6972144463310541, "grad_norm": 0.44985103607177734, "learning_rate": 8.885073940466336e-06, "loss": 0.327, "step": 15193 }, { "epoch": 0.6972603368363086, "grad_norm": 0.47658848762512207, "learning_rate": 8.884919594337183e-06, "loss": 0.4124, "step": 15194 }, { "epoch": 0.697306227341563, "grad_norm": 0.4722121059894562, "learning_rate": 8.884765238866026e-06, "loss": 0.4603, "step": 15195 }, { "epoch": 0.6973521178468175, "grad_norm": 0.5297197103500366, "learning_rate": 8.884610874053234e-06, "loss": 0.4366, "step": 15196 }, { "epoch": 0.697398008352072, "grad_norm": 0.4262726604938507, "learning_rate": 8.884456499899181e-06, "loss": 0.3539, "step": 15197 }, { "epoch": 0.6974438988573264, "grad_norm": 0.4734591841697693, "learning_rate": 8.884302116404232e-06, "loss": 0.3998, "step": 15198 }, { "epoch": 0.6974897893625809, "grad_norm": 0.4541209936141968, "learning_rate": 8.884147723568766e-06, "loss": 0.3887, "step": 15199 }, { "epoch": 0.6975356798678354, "grad_norm": 0.4840013384819031, "learning_rate": 8.88399332139315e-06, "loss": 0.4349, "step": 15200 }, { "epoch": 0.6975815703730898, "grad_norm": 0.4816952049732208, "learning_rate": 8.883838909877756e-06, "loss": 0.4119, "step": 15201 }, { "epoch": 0.6976274608783443, "grad_norm": 0.40447670221328735, "learning_rate": 8.883684489022953e-06, "loss": 0.3008, "step": 15202 }, { "epoch": 0.6976733513835988, "grad_norm": 0.43527907133102417, "learning_rate": 8.883530058829117e-06, "loss": 0.3787, "step": 15203 }, { "epoch": 0.6977192418888531, "grad_norm": 0.4083555340766907, "learning_rate": 8.883375619296615e-06, "loss": 0.3129, "step": 15204 }, { "epoch": 0.6977651323941076, "grad_norm": 0.43514540791511536, "learning_rate": 8.883221170425822e-06, "loss": 0.3388, "step": 15205 }, { "epoch": 0.6978110228993621, "grad_norm": 0.5058029890060425, "learning_rate": 8.883066712217105e-06, "loss": 0.4727, "step": 15206 }, { "epoch": 0.6978569134046165, "grad_norm": 0.4844582974910736, "learning_rate": 8.88291224467084e-06, "loss": 0.4164, "step": 15207 }, { "epoch": 0.697902803909871, "grad_norm": 0.44629886746406555, "learning_rate": 8.882757767787397e-06, "loss": 0.3801, "step": 15208 }, { "epoch": 0.6979486944151255, "grad_norm": 0.44696423411369324, "learning_rate": 8.882603281567147e-06, "loss": 0.3628, "step": 15209 }, { "epoch": 0.69799458492038, "grad_norm": 0.4221518337726593, "learning_rate": 8.88244878601046e-06, "loss": 0.3442, "step": 15210 }, { "epoch": 0.6980404754256344, "grad_norm": 0.454863041639328, "learning_rate": 8.882294281117711e-06, "loss": 0.432, "step": 15211 }, { "epoch": 0.6980863659308889, "grad_norm": 0.4666585326194763, "learning_rate": 8.882139766889269e-06, "loss": 0.4061, "step": 15212 }, { "epoch": 0.6981322564361434, "grad_norm": 0.5056252479553223, "learning_rate": 8.881985243325504e-06, "loss": 0.4975, "step": 15213 }, { "epoch": 0.6981781469413978, "grad_norm": 0.46398425102233887, "learning_rate": 8.881830710426793e-06, "loss": 0.4198, "step": 15214 }, { "epoch": 0.6982240374466523, "grad_norm": 0.45258086919784546, "learning_rate": 8.8816761681935e-06, "loss": 0.3783, "step": 15215 }, { "epoch": 0.6982699279519068, "grad_norm": 0.46092015504837036, "learning_rate": 8.881521616626005e-06, "loss": 0.3925, "step": 15216 }, { "epoch": 0.6983158184571612, "grad_norm": 0.4350115954875946, "learning_rate": 8.881367055724674e-06, "loss": 0.3644, "step": 15217 }, { "epoch": 0.6983617089624157, "grad_norm": 0.4020456075668335, "learning_rate": 8.88121248548988e-06, "loss": 0.3162, "step": 15218 }, { "epoch": 0.6984075994676702, "grad_norm": 0.44675400853157043, "learning_rate": 8.881057905921994e-06, "loss": 0.4137, "step": 15219 }, { "epoch": 0.6984534899729246, "grad_norm": 0.4497157037258148, "learning_rate": 8.88090331702139e-06, "loss": 0.3478, "step": 15220 }, { "epoch": 0.698499380478179, "grad_norm": 0.5763722062110901, "learning_rate": 8.88074871878844e-06, "loss": 0.3754, "step": 15221 }, { "epoch": 0.6985452709834336, "grad_norm": 0.4855414628982544, "learning_rate": 8.880594111223511e-06, "loss": 0.3975, "step": 15222 }, { "epoch": 0.6985911614886879, "grad_norm": 0.5418866276741028, "learning_rate": 8.880439494326978e-06, "loss": 0.3314, "step": 15223 }, { "epoch": 0.6986370519939424, "grad_norm": 0.49025633931159973, "learning_rate": 8.880284868099216e-06, "loss": 0.5026, "step": 15224 }, { "epoch": 0.6986829424991969, "grad_norm": 0.5110155940055847, "learning_rate": 8.88013023254059e-06, "loss": 0.4984, "step": 15225 }, { "epoch": 0.6987288330044514, "grad_norm": 0.48928919434547424, "learning_rate": 8.879975587651477e-06, "loss": 0.4981, "step": 15226 }, { "epoch": 0.6987747235097058, "grad_norm": 0.4417930543422699, "learning_rate": 8.879820933432247e-06, "loss": 0.3299, "step": 15227 }, { "epoch": 0.6988206140149603, "grad_norm": 0.4514995813369751, "learning_rate": 8.879666269883272e-06, "loss": 0.3651, "step": 15228 }, { "epoch": 0.6988665045202148, "grad_norm": 0.4827274680137634, "learning_rate": 8.879511597004924e-06, "loss": 0.4159, "step": 15229 }, { "epoch": 0.6989123950254692, "grad_norm": 0.46608299016952515, "learning_rate": 8.879356914797577e-06, "loss": 0.4027, "step": 15230 }, { "epoch": 0.6989582855307237, "grad_norm": 0.4592164158821106, "learning_rate": 8.879202223261598e-06, "loss": 0.3725, "step": 15231 }, { "epoch": 0.6990041760359782, "grad_norm": 0.4692539870738983, "learning_rate": 8.879047522397365e-06, "loss": 0.4422, "step": 15232 }, { "epoch": 0.6990500665412326, "grad_norm": 0.4496704041957855, "learning_rate": 8.878892812205245e-06, "loss": 0.3631, "step": 15233 }, { "epoch": 0.6990959570464871, "grad_norm": 0.4508979022502899, "learning_rate": 8.878738092685612e-06, "loss": 0.415, "step": 15234 }, { "epoch": 0.6991418475517416, "grad_norm": 0.40440821647644043, "learning_rate": 8.878583363838837e-06, "loss": 0.3145, "step": 15235 }, { "epoch": 0.699187738056996, "grad_norm": 0.48993462324142456, "learning_rate": 8.878428625665296e-06, "loss": 0.4803, "step": 15236 }, { "epoch": 0.6992336285622505, "grad_norm": 0.45309996604919434, "learning_rate": 8.878273878165356e-06, "loss": 0.3798, "step": 15237 }, { "epoch": 0.699279519067505, "grad_norm": 0.43323734402656555, "learning_rate": 8.878119121339392e-06, "loss": 0.3625, "step": 15238 }, { "epoch": 0.6993254095727593, "grad_norm": 0.45553144812583923, "learning_rate": 8.877964355187774e-06, "loss": 0.3906, "step": 15239 }, { "epoch": 0.6993713000780138, "grad_norm": 0.43476083874702454, "learning_rate": 8.877809579710878e-06, "loss": 0.3584, "step": 15240 }, { "epoch": 0.6994171905832683, "grad_norm": 0.4173594117164612, "learning_rate": 8.877654794909075e-06, "loss": 0.3464, "step": 15241 }, { "epoch": 0.6994630810885227, "grad_norm": 0.4591556787490845, "learning_rate": 8.877500000782732e-06, "loss": 0.4078, "step": 15242 }, { "epoch": 0.6995089715937772, "grad_norm": 0.4686826169490814, "learning_rate": 8.877345197332228e-06, "loss": 0.4042, "step": 15243 }, { "epoch": 0.6995548620990317, "grad_norm": 0.44457918405532837, "learning_rate": 8.877190384557931e-06, "loss": 0.3833, "step": 15244 }, { "epoch": 0.6996007526042862, "grad_norm": 0.4244305491447449, "learning_rate": 8.877035562460217e-06, "loss": 0.3153, "step": 15245 }, { "epoch": 0.6996466431095406, "grad_norm": 0.45465418696403503, "learning_rate": 8.876880731039454e-06, "loss": 0.3434, "step": 15246 }, { "epoch": 0.6996925336147951, "grad_norm": 0.48485392332077026, "learning_rate": 8.876725890296017e-06, "loss": 0.395, "step": 15247 }, { "epoch": 0.6997384241200496, "grad_norm": 0.42503198981285095, "learning_rate": 8.876571040230277e-06, "loss": 0.3792, "step": 15248 }, { "epoch": 0.699784314625304, "grad_norm": 0.45768308639526367, "learning_rate": 8.876416180842609e-06, "loss": 0.4183, "step": 15249 }, { "epoch": 0.6998302051305585, "grad_norm": 0.4520810842514038, "learning_rate": 8.876261312133383e-06, "loss": 0.423, "step": 15250 }, { "epoch": 0.699876095635813, "grad_norm": 0.4430385231971741, "learning_rate": 8.876106434102972e-06, "loss": 0.3566, "step": 15251 }, { "epoch": 0.6999219861410674, "grad_norm": 0.4753483235836029, "learning_rate": 8.875951546751746e-06, "loss": 0.4526, "step": 15252 }, { "epoch": 0.6999678766463219, "grad_norm": 0.41026216745376587, "learning_rate": 8.875796650080082e-06, "loss": 0.3403, "step": 15253 }, { "epoch": 0.7000137671515764, "grad_norm": 0.47939690947532654, "learning_rate": 8.87564174408835e-06, "loss": 0.4367, "step": 15254 }, { "epoch": 0.7000596576568308, "grad_norm": 0.5063282251358032, "learning_rate": 8.875486828776922e-06, "loss": 0.4948, "step": 15255 }, { "epoch": 0.7001055481620853, "grad_norm": 0.4945315420627594, "learning_rate": 8.875331904146171e-06, "loss": 0.4229, "step": 15256 }, { "epoch": 0.7001514386673398, "grad_norm": 0.442889541387558, "learning_rate": 8.875176970196473e-06, "loss": 0.3531, "step": 15257 }, { "epoch": 0.7001973291725941, "grad_norm": 0.4211094081401825, "learning_rate": 8.875022026928195e-06, "loss": 0.3241, "step": 15258 }, { "epoch": 0.7002432196778486, "grad_norm": 0.44450193643569946, "learning_rate": 8.874867074341712e-06, "loss": 0.3698, "step": 15259 }, { "epoch": 0.7002891101831031, "grad_norm": 0.470569372177124, "learning_rate": 8.874712112437397e-06, "loss": 0.4297, "step": 15260 }, { "epoch": 0.7003350006883576, "grad_norm": 0.4490543305873871, "learning_rate": 8.874557141215622e-06, "loss": 0.3704, "step": 15261 }, { "epoch": 0.700380891193612, "grad_norm": 0.445046991109848, "learning_rate": 8.874402160676761e-06, "loss": 0.4095, "step": 15262 }, { "epoch": 0.7004267816988665, "grad_norm": 0.46994394063949585, "learning_rate": 8.874247170821184e-06, "loss": 0.4145, "step": 15263 }, { "epoch": 0.700472672204121, "grad_norm": 0.4204995632171631, "learning_rate": 8.874092171649267e-06, "loss": 0.3051, "step": 15264 }, { "epoch": 0.7005185627093754, "grad_norm": 0.44294098019599915, "learning_rate": 8.87393716316138e-06, "loss": 0.3617, "step": 15265 }, { "epoch": 0.7005644532146299, "grad_norm": 0.4946003556251526, "learning_rate": 8.873782145357896e-06, "loss": 0.4141, "step": 15266 }, { "epoch": 0.7006103437198844, "grad_norm": 0.4453360438346863, "learning_rate": 8.873627118239189e-06, "loss": 0.4096, "step": 15267 }, { "epoch": 0.7006562342251388, "grad_norm": 0.4466598331928253, "learning_rate": 8.873472081805631e-06, "loss": 0.4037, "step": 15268 }, { "epoch": 0.7007021247303933, "grad_norm": 0.5217832326889038, "learning_rate": 8.873317036057598e-06, "loss": 0.5215, "step": 15269 }, { "epoch": 0.7007480152356478, "grad_norm": 0.45922544598579407, "learning_rate": 8.873161980995457e-06, "loss": 0.399, "step": 15270 }, { "epoch": 0.7007939057409022, "grad_norm": 0.4928055703639984, "learning_rate": 8.873006916619585e-06, "loss": 0.388, "step": 15271 }, { "epoch": 0.7008397962461567, "grad_norm": 0.4843306839466095, "learning_rate": 8.872851842930354e-06, "loss": 0.4444, "step": 15272 }, { "epoch": 0.7008856867514112, "grad_norm": 0.4655596911907196, "learning_rate": 8.872696759928134e-06, "loss": 0.3961, "step": 15273 }, { "epoch": 0.7009315772566656, "grad_norm": 0.4409341812133789, "learning_rate": 8.872541667613304e-06, "loss": 0.362, "step": 15274 }, { "epoch": 0.70097746776192, "grad_norm": 0.4653756320476532, "learning_rate": 8.872386565986231e-06, "loss": 0.4159, "step": 15275 }, { "epoch": 0.7010233582671745, "grad_norm": 0.4453827142715454, "learning_rate": 8.872231455047293e-06, "loss": 0.367, "step": 15276 }, { "epoch": 0.7010692487724289, "grad_norm": 0.5066048502922058, "learning_rate": 8.872076334796859e-06, "loss": 0.5074, "step": 15277 }, { "epoch": 0.7011151392776834, "grad_norm": 0.4510207176208496, "learning_rate": 8.871921205235304e-06, "loss": 0.4183, "step": 15278 }, { "epoch": 0.7011610297829379, "grad_norm": 0.4590039551258087, "learning_rate": 8.871766066363e-06, "loss": 0.4264, "step": 15279 }, { "epoch": 0.7012069202881924, "grad_norm": 0.4871574938297272, "learning_rate": 8.87161091818032e-06, "loss": 0.4082, "step": 15280 }, { "epoch": 0.7012528107934468, "grad_norm": 0.447847843170166, "learning_rate": 8.871455760687637e-06, "loss": 0.3883, "step": 15281 }, { "epoch": 0.7012987012987013, "grad_norm": 0.44927653670310974, "learning_rate": 8.871300593885327e-06, "loss": 0.416, "step": 15282 }, { "epoch": 0.7013445918039558, "grad_norm": 0.4417235553264618, "learning_rate": 8.871145417773758e-06, "loss": 0.3578, "step": 15283 }, { "epoch": 0.7013904823092102, "grad_norm": 0.4543023407459259, "learning_rate": 8.870990232353309e-06, "loss": 0.4264, "step": 15284 }, { "epoch": 0.7014363728144647, "grad_norm": 0.47682252526283264, "learning_rate": 8.87083503762435e-06, "loss": 0.4168, "step": 15285 }, { "epoch": 0.7014822633197192, "grad_norm": 0.49393871426582336, "learning_rate": 8.870679833587253e-06, "loss": 0.3984, "step": 15286 }, { "epoch": 0.7015281538249736, "grad_norm": 0.4497290253639221, "learning_rate": 8.870524620242393e-06, "loss": 0.3731, "step": 15287 }, { "epoch": 0.7015740443302281, "grad_norm": 0.5179337859153748, "learning_rate": 8.870369397590144e-06, "loss": 0.5057, "step": 15288 }, { "epoch": 0.7016199348354826, "grad_norm": 0.45535415410995483, "learning_rate": 8.870214165630878e-06, "loss": 0.3953, "step": 15289 }, { "epoch": 0.701665825340737, "grad_norm": 0.4394325613975525, "learning_rate": 8.870058924364967e-06, "loss": 0.3673, "step": 15290 }, { "epoch": 0.7017117158459915, "grad_norm": 0.4603269100189209, "learning_rate": 8.869903673792786e-06, "loss": 0.4307, "step": 15291 }, { "epoch": 0.701757606351246, "grad_norm": 0.43877696990966797, "learning_rate": 8.869748413914707e-06, "loss": 0.39, "step": 15292 }, { "epoch": 0.7018034968565003, "grad_norm": 0.45589444041252136, "learning_rate": 8.869593144731107e-06, "loss": 0.3915, "step": 15293 }, { "epoch": 0.7018493873617548, "grad_norm": 0.48724251985549927, "learning_rate": 8.869437866242355e-06, "loss": 0.5122, "step": 15294 }, { "epoch": 0.7018952778670093, "grad_norm": 0.43473827838897705, "learning_rate": 8.869282578448828e-06, "loss": 0.3901, "step": 15295 }, { "epoch": 0.7019411683722637, "grad_norm": 0.49126118421554565, "learning_rate": 8.869127281350896e-06, "loss": 0.5008, "step": 15296 }, { "epoch": 0.7019870588775182, "grad_norm": 0.4684297442436218, "learning_rate": 8.868971974948932e-06, "loss": 0.3945, "step": 15297 }, { "epoch": 0.7020329493827727, "grad_norm": 0.46284162998199463, "learning_rate": 8.868816659243316e-06, "loss": 0.3986, "step": 15298 }, { "epoch": 0.7020788398880272, "grad_norm": 0.47493064403533936, "learning_rate": 8.868661334234414e-06, "loss": 0.4379, "step": 15299 }, { "epoch": 0.7021247303932816, "grad_norm": 0.4944457411766052, "learning_rate": 8.868505999922603e-06, "loss": 0.444, "step": 15300 }, { "epoch": 0.7021706208985361, "grad_norm": 0.4436239004135132, "learning_rate": 8.868350656308256e-06, "loss": 0.3766, "step": 15301 }, { "epoch": 0.7022165114037906, "grad_norm": 0.4351639151573181, "learning_rate": 8.868195303391747e-06, "loss": 0.3513, "step": 15302 }, { "epoch": 0.702262401909045, "grad_norm": 0.4514792263507843, "learning_rate": 8.868039941173448e-06, "loss": 0.3806, "step": 15303 }, { "epoch": 0.7023082924142995, "grad_norm": 0.47317075729370117, "learning_rate": 8.867884569653735e-06, "loss": 0.4121, "step": 15304 }, { "epoch": 0.702354182919554, "grad_norm": 0.6164528131484985, "learning_rate": 8.867729188832981e-06, "loss": 0.4767, "step": 15305 }, { "epoch": 0.7024000734248084, "grad_norm": 0.42011305689811707, "learning_rate": 8.867573798711558e-06, "loss": 0.3357, "step": 15306 }, { "epoch": 0.7024459639300629, "grad_norm": 0.4813268780708313, "learning_rate": 8.86741839928984e-06, "loss": 0.4818, "step": 15307 }, { "epoch": 0.7024918544353174, "grad_norm": 0.47319892048835754, "learning_rate": 8.867262990568201e-06, "loss": 0.4657, "step": 15308 }, { "epoch": 0.7025377449405718, "grad_norm": 0.47796881198883057, "learning_rate": 8.867107572547017e-06, "loss": 0.4507, "step": 15309 }, { "epoch": 0.7025836354458262, "grad_norm": 0.45318812131881714, "learning_rate": 8.866952145226659e-06, "loss": 0.3631, "step": 15310 }, { "epoch": 0.7026295259510807, "grad_norm": 0.4498036503791809, "learning_rate": 8.8667967086075e-06, "loss": 0.3838, "step": 15311 }, { "epoch": 0.7026754164563351, "grad_norm": 0.4448075592517853, "learning_rate": 8.866641262689918e-06, "loss": 0.3628, "step": 15312 }, { "epoch": 0.7027213069615896, "grad_norm": 0.4660280644893646, "learning_rate": 8.866485807474282e-06, "loss": 0.3896, "step": 15313 }, { "epoch": 0.7027671974668441, "grad_norm": 0.41872724890708923, "learning_rate": 8.86633034296097e-06, "loss": 0.3136, "step": 15314 }, { "epoch": 0.7028130879720986, "grad_norm": 0.4663838744163513, "learning_rate": 8.866174869150352e-06, "loss": 0.3866, "step": 15315 }, { "epoch": 0.702858978477353, "grad_norm": 0.43932923674583435, "learning_rate": 8.866019386042804e-06, "loss": 0.3628, "step": 15316 }, { "epoch": 0.7029048689826075, "grad_norm": 0.47448521852493286, "learning_rate": 8.865863893638699e-06, "loss": 0.4381, "step": 15317 }, { "epoch": 0.702950759487862, "grad_norm": 0.4701744318008423, "learning_rate": 8.865708391938412e-06, "loss": 0.4104, "step": 15318 }, { "epoch": 0.7029966499931164, "grad_norm": 0.4675033390522003, "learning_rate": 8.865552880942316e-06, "loss": 0.4181, "step": 15319 }, { "epoch": 0.7030425404983709, "grad_norm": 0.4824400842189789, "learning_rate": 8.865397360650786e-06, "loss": 0.4482, "step": 15320 }, { "epoch": 0.7030884310036254, "grad_norm": 0.40212276577949524, "learning_rate": 8.865241831064196e-06, "loss": 0.3151, "step": 15321 }, { "epoch": 0.7031343215088798, "grad_norm": 0.47795942425727844, "learning_rate": 8.865086292182917e-06, "loss": 0.3506, "step": 15322 }, { "epoch": 0.7031802120141343, "grad_norm": 0.4723162353038788, "learning_rate": 8.864930744007328e-06, "loss": 0.4514, "step": 15323 }, { "epoch": 0.7032261025193888, "grad_norm": 0.46392154693603516, "learning_rate": 8.864775186537797e-06, "loss": 0.4163, "step": 15324 }, { "epoch": 0.7032719930246432, "grad_norm": 0.44807443022727966, "learning_rate": 8.864619619774706e-06, "loss": 0.4158, "step": 15325 }, { "epoch": 0.7033178835298977, "grad_norm": 0.4523496627807617, "learning_rate": 8.864464043718421e-06, "loss": 0.3223, "step": 15326 }, { "epoch": 0.7033637740351522, "grad_norm": 0.48893532156944275, "learning_rate": 8.864308458369319e-06, "loss": 0.4379, "step": 15327 }, { "epoch": 0.7034096645404065, "grad_norm": 0.42681941390037537, "learning_rate": 8.864152863727778e-06, "loss": 0.3116, "step": 15328 }, { "epoch": 0.703455555045661, "grad_norm": 0.4386947453022003, "learning_rate": 8.863997259794167e-06, "loss": 0.3552, "step": 15329 }, { "epoch": 0.7035014455509155, "grad_norm": 0.450403094291687, "learning_rate": 8.863841646568863e-06, "loss": 0.4357, "step": 15330 }, { "epoch": 0.7035473360561699, "grad_norm": 0.45087432861328125, "learning_rate": 8.863686024052239e-06, "loss": 0.3724, "step": 15331 }, { "epoch": 0.7035932265614244, "grad_norm": 0.4423559606075287, "learning_rate": 8.863530392244667e-06, "loss": 0.3465, "step": 15332 }, { "epoch": 0.7036391170666789, "grad_norm": 0.4362533688545227, "learning_rate": 8.863374751146527e-06, "loss": 0.3275, "step": 15333 }, { "epoch": 0.7036850075719334, "grad_norm": 0.46797019243240356, "learning_rate": 8.86321910075819e-06, "loss": 0.4128, "step": 15334 }, { "epoch": 0.7037308980771878, "grad_norm": 0.5074214935302734, "learning_rate": 8.863063441080029e-06, "loss": 0.5412, "step": 15335 }, { "epoch": 0.7037767885824423, "grad_norm": 0.45028167963027954, "learning_rate": 8.86290777211242e-06, "loss": 0.3533, "step": 15336 }, { "epoch": 0.7038226790876968, "grad_norm": 0.44906750321388245, "learning_rate": 8.862752093855736e-06, "loss": 0.4064, "step": 15337 }, { "epoch": 0.7038685695929512, "grad_norm": 0.4328353703022003, "learning_rate": 8.862596406310352e-06, "loss": 0.3423, "step": 15338 }, { "epoch": 0.7039144600982057, "grad_norm": 0.48786914348602295, "learning_rate": 8.862440709476646e-06, "loss": 0.4428, "step": 15339 }, { "epoch": 0.7039603506034602, "grad_norm": 2.4023451805114746, "learning_rate": 8.862285003354986e-06, "loss": 0.4632, "step": 15340 }, { "epoch": 0.7040062411087146, "grad_norm": 0.4542851448059082, "learning_rate": 8.86212928794575e-06, "loss": 0.425, "step": 15341 }, { "epoch": 0.7040521316139691, "grad_norm": 0.4616072177886963, "learning_rate": 8.861973563249314e-06, "loss": 0.3674, "step": 15342 }, { "epoch": 0.7040980221192236, "grad_norm": 0.4486668109893799, "learning_rate": 8.86181782926605e-06, "loss": 0.3823, "step": 15343 }, { "epoch": 0.704143912624478, "grad_norm": 0.45665648579597473, "learning_rate": 8.861662085996332e-06, "loss": 0.3836, "step": 15344 }, { "epoch": 0.7041898031297325, "grad_norm": 0.47215524315834045, "learning_rate": 8.861506333440535e-06, "loss": 0.44, "step": 15345 }, { "epoch": 0.704235693634987, "grad_norm": 0.4670223891735077, "learning_rate": 8.861350571599035e-06, "loss": 0.416, "step": 15346 }, { "epoch": 0.7042815841402413, "grad_norm": 0.4645777642726898, "learning_rate": 8.861194800472206e-06, "loss": 0.4039, "step": 15347 }, { "epoch": 0.7043274746454958, "grad_norm": 0.48237645626068115, "learning_rate": 8.86103902006042e-06, "loss": 0.4796, "step": 15348 }, { "epoch": 0.7043733651507503, "grad_norm": 0.4395120441913605, "learning_rate": 8.860883230364054e-06, "loss": 0.3401, "step": 15349 }, { "epoch": 0.7044192556560048, "grad_norm": 0.4802260100841522, "learning_rate": 8.860727431383484e-06, "loss": 0.4507, "step": 15350 }, { "epoch": 0.7044651461612592, "grad_norm": 0.40397658944129944, "learning_rate": 8.860571623119083e-06, "loss": 0.3237, "step": 15351 }, { "epoch": 0.7045110366665137, "grad_norm": 0.4285014271736145, "learning_rate": 8.860415805571225e-06, "loss": 0.3334, "step": 15352 }, { "epoch": 0.7045569271717682, "grad_norm": 0.5113673210144043, "learning_rate": 8.860259978740286e-06, "loss": 0.5131, "step": 15353 }, { "epoch": 0.7046028176770226, "grad_norm": 0.5180298089981079, "learning_rate": 8.86010414262664e-06, "loss": 0.4846, "step": 15354 }, { "epoch": 0.7046487081822771, "grad_norm": 0.4271198809146881, "learning_rate": 8.85994829723066e-06, "loss": 0.3729, "step": 15355 }, { "epoch": 0.7046945986875316, "grad_norm": 0.4483506977558136, "learning_rate": 8.859792442552723e-06, "loss": 0.3834, "step": 15356 }, { "epoch": 0.704740489192786, "grad_norm": 0.4692017734050751, "learning_rate": 8.859636578593204e-06, "loss": 0.4107, "step": 15357 }, { "epoch": 0.7047863796980405, "grad_norm": 0.48315146565437317, "learning_rate": 8.85948070535248e-06, "loss": 0.4653, "step": 15358 }, { "epoch": 0.704832270203295, "grad_norm": 0.4643354117870331, "learning_rate": 8.85932482283092e-06, "loss": 0.4135, "step": 15359 }, { "epoch": 0.7048781607085494, "grad_norm": 0.5070493221282959, "learning_rate": 8.8591689310289e-06, "loss": 0.4661, "step": 15360 }, { "epoch": 0.7049240512138039, "grad_norm": 0.4862259328365326, "learning_rate": 8.8590130299468e-06, "loss": 0.4639, "step": 15361 }, { "epoch": 0.7049699417190584, "grad_norm": 0.44670534133911133, "learning_rate": 8.85885711958499e-06, "loss": 0.4108, "step": 15362 }, { "epoch": 0.7050158322243127, "grad_norm": 0.47248533368110657, "learning_rate": 8.858701199943848e-06, "loss": 0.4209, "step": 15363 }, { "epoch": 0.7050617227295672, "grad_norm": 0.4051218628883362, "learning_rate": 8.858545271023746e-06, "loss": 0.3029, "step": 15364 }, { "epoch": 0.7051076132348217, "grad_norm": 0.44173547625541687, "learning_rate": 8.858389332825062e-06, "loss": 0.4628, "step": 15365 }, { "epoch": 0.7051535037400761, "grad_norm": 0.42290088534355164, "learning_rate": 8.858233385348167e-06, "loss": 0.3289, "step": 15366 }, { "epoch": 0.7051993942453306, "grad_norm": 0.48536011576652527, "learning_rate": 8.85807742859344e-06, "loss": 0.4264, "step": 15367 }, { "epoch": 0.7052452847505851, "grad_norm": 0.4500810503959656, "learning_rate": 8.857921462561255e-06, "loss": 0.3948, "step": 15368 }, { "epoch": 0.7052911752558396, "grad_norm": 0.43791645765304565, "learning_rate": 8.857765487251986e-06, "loss": 0.3233, "step": 15369 }, { "epoch": 0.705337065761094, "grad_norm": 0.4729611873626709, "learning_rate": 8.857609502666006e-06, "loss": 0.3852, "step": 15370 }, { "epoch": 0.7053829562663485, "grad_norm": 0.41330844163894653, "learning_rate": 8.857453508803695e-06, "loss": 0.3089, "step": 15371 }, { "epoch": 0.705428846771603, "grad_norm": 0.4496743977069855, "learning_rate": 8.857297505665426e-06, "loss": 0.4099, "step": 15372 }, { "epoch": 0.7054747372768574, "grad_norm": 0.438028484582901, "learning_rate": 8.857141493251574e-06, "loss": 0.3495, "step": 15373 }, { "epoch": 0.7055206277821119, "grad_norm": 0.44070401787757874, "learning_rate": 8.856985471562514e-06, "loss": 0.3634, "step": 15374 }, { "epoch": 0.7055665182873664, "grad_norm": 0.46683695912361145, "learning_rate": 8.85682944059862e-06, "loss": 0.402, "step": 15375 }, { "epoch": 0.7056124087926208, "grad_norm": 0.4212355315685272, "learning_rate": 8.856673400360269e-06, "loss": 0.3403, "step": 15376 }, { "epoch": 0.7056582992978753, "grad_norm": 0.46170690655708313, "learning_rate": 8.856517350847837e-06, "loss": 0.4548, "step": 15377 }, { "epoch": 0.7057041898031298, "grad_norm": 0.43015649914741516, "learning_rate": 8.856361292061696e-06, "loss": 0.3252, "step": 15378 }, { "epoch": 0.7057500803083842, "grad_norm": 0.4924994707107544, "learning_rate": 8.856205224002224e-06, "loss": 0.448, "step": 15379 }, { "epoch": 0.7057959708136387, "grad_norm": 0.44125211238861084, "learning_rate": 8.856049146669796e-06, "loss": 0.3615, "step": 15380 }, { "epoch": 0.7058418613188931, "grad_norm": 0.45056939125061035, "learning_rate": 8.855893060064787e-06, "loss": 0.4104, "step": 15381 }, { "epoch": 0.7058877518241475, "grad_norm": 0.42887240648269653, "learning_rate": 8.85573696418757e-06, "loss": 0.3692, "step": 15382 }, { "epoch": 0.705933642329402, "grad_norm": 0.4376792013645172, "learning_rate": 8.855580859038524e-06, "loss": 0.3633, "step": 15383 }, { "epoch": 0.7059795328346565, "grad_norm": 0.4779284596443176, "learning_rate": 8.855424744618023e-06, "loss": 0.4299, "step": 15384 }, { "epoch": 0.7060254233399109, "grad_norm": 0.4681902825832367, "learning_rate": 8.855268620926442e-06, "loss": 0.379, "step": 15385 }, { "epoch": 0.7060713138451654, "grad_norm": 0.4625047743320465, "learning_rate": 8.855112487964157e-06, "loss": 0.4148, "step": 15386 }, { "epoch": 0.7061172043504199, "grad_norm": 0.4290919303894043, "learning_rate": 8.854956345731543e-06, "loss": 0.3344, "step": 15387 }, { "epoch": 0.7061630948556744, "grad_norm": 0.4543091952800751, "learning_rate": 8.854800194228977e-06, "loss": 0.3843, "step": 15388 }, { "epoch": 0.7062089853609288, "grad_norm": 0.4304720461368561, "learning_rate": 8.854644033456831e-06, "loss": 0.3215, "step": 15389 }, { "epoch": 0.7062548758661833, "grad_norm": 0.47646355628967285, "learning_rate": 8.854487863415483e-06, "loss": 0.4136, "step": 15390 }, { "epoch": 0.7063007663714378, "grad_norm": 0.42578306794166565, "learning_rate": 8.854331684105308e-06, "loss": 0.3387, "step": 15391 }, { "epoch": 0.7063466568766922, "grad_norm": 0.4847331643104553, "learning_rate": 8.854175495526684e-06, "loss": 0.4895, "step": 15392 }, { "epoch": 0.7063925473819467, "grad_norm": 0.524849534034729, "learning_rate": 8.854019297679982e-06, "loss": 0.4883, "step": 15393 }, { "epoch": 0.7064384378872012, "grad_norm": 0.54474276304245, "learning_rate": 8.853863090565581e-06, "loss": 0.5256, "step": 15394 }, { "epoch": 0.7064843283924556, "grad_norm": 0.4386679232120514, "learning_rate": 8.853706874183854e-06, "loss": 0.3499, "step": 15395 }, { "epoch": 0.7065302188977101, "grad_norm": 0.438931941986084, "learning_rate": 8.85355064853518e-06, "loss": 0.3724, "step": 15396 }, { "epoch": 0.7065761094029646, "grad_norm": 0.4804537892341614, "learning_rate": 8.853394413619933e-06, "loss": 0.4568, "step": 15397 }, { "epoch": 0.706621999908219, "grad_norm": 0.421892374753952, "learning_rate": 8.853238169438488e-06, "loss": 0.3617, "step": 15398 }, { "epoch": 0.7066678904134734, "grad_norm": 0.44802194833755493, "learning_rate": 8.853081915991222e-06, "loss": 0.4069, "step": 15399 }, { "epoch": 0.7067137809187279, "grad_norm": 0.44459229707717896, "learning_rate": 8.852925653278509e-06, "loss": 0.3931, "step": 15400 }, { "epoch": 0.7067596714239823, "grad_norm": 0.4145488440990448, "learning_rate": 8.852769381300727e-06, "loss": 0.3125, "step": 15401 }, { "epoch": 0.7068055619292368, "grad_norm": 0.44795331358909607, "learning_rate": 8.85261310005825e-06, "loss": 0.3436, "step": 15402 }, { "epoch": 0.7068514524344913, "grad_norm": 0.5064924955368042, "learning_rate": 8.852456809551453e-06, "loss": 0.3545, "step": 15403 }, { "epoch": 0.7068973429397458, "grad_norm": 0.43935829401016235, "learning_rate": 8.852300509780716e-06, "loss": 0.3849, "step": 15404 }, { "epoch": 0.7069432334450002, "grad_norm": 0.44526517391204834, "learning_rate": 8.85214420074641e-06, "loss": 0.3471, "step": 15405 }, { "epoch": 0.7069891239502547, "grad_norm": 0.4927416741847992, "learning_rate": 8.851987882448913e-06, "loss": 0.4469, "step": 15406 }, { "epoch": 0.7070350144555092, "grad_norm": 0.6479256749153137, "learning_rate": 8.8518315548886e-06, "loss": 0.3492, "step": 15407 }, { "epoch": 0.7070809049607636, "grad_norm": 0.4275621771812439, "learning_rate": 8.851675218065851e-06, "loss": 0.3228, "step": 15408 }, { "epoch": 0.7071267954660181, "grad_norm": 0.4764132797718048, "learning_rate": 8.851518871981038e-06, "loss": 0.4261, "step": 15409 }, { "epoch": 0.7071726859712726, "grad_norm": 0.477669894695282, "learning_rate": 8.851362516634537e-06, "loss": 0.391, "step": 15410 }, { "epoch": 0.707218576476527, "grad_norm": 0.46213603019714355, "learning_rate": 8.851206152026724e-06, "loss": 0.3907, "step": 15411 }, { "epoch": 0.7072644669817815, "grad_norm": 0.47017014026641846, "learning_rate": 8.851049778157975e-06, "loss": 0.4335, "step": 15412 }, { "epoch": 0.707310357487036, "grad_norm": 0.4846242070198059, "learning_rate": 8.850893395028668e-06, "loss": 0.4037, "step": 15413 }, { "epoch": 0.7073562479922904, "grad_norm": 0.5150488615036011, "learning_rate": 8.850737002639178e-06, "loss": 0.419, "step": 15414 }, { "epoch": 0.7074021384975449, "grad_norm": 0.4290221333503723, "learning_rate": 8.85058060098988e-06, "loss": 0.3586, "step": 15415 }, { "epoch": 0.7074480290027994, "grad_norm": 0.48115789890289307, "learning_rate": 8.85042419008115e-06, "loss": 0.4246, "step": 15416 }, { "epoch": 0.7074939195080537, "grad_norm": 0.48476967215538025, "learning_rate": 8.850267769913368e-06, "loss": 0.5144, "step": 15417 }, { "epoch": 0.7075398100133082, "grad_norm": 0.4595085680484772, "learning_rate": 8.850111340486904e-06, "loss": 0.436, "step": 15418 }, { "epoch": 0.7075857005185627, "grad_norm": 0.4457300007343292, "learning_rate": 8.84995490180214e-06, "loss": 0.3629, "step": 15419 }, { "epoch": 0.7076315910238171, "grad_norm": 0.4694974720478058, "learning_rate": 8.849798453859448e-06, "loss": 0.4544, "step": 15420 }, { "epoch": 0.7076774815290716, "grad_norm": 0.4014597237110138, "learning_rate": 8.849641996659206e-06, "loss": 0.3443, "step": 15421 }, { "epoch": 0.7077233720343261, "grad_norm": 0.46210533380508423, "learning_rate": 8.84948553020179e-06, "loss": 0.369, "step": 15422 }, { "epoch": 0.7077692625395806, "grad_norm": 0.4576537609100342, "learning_rate": 8.849329054487576e-06, "loss": 0.3984, "step": 15423 }, { "epoch": 0.707815153044835, "grad_norm": 0.4509550929069519, "learning_rate": 8.849172569516942e-06, "loss": 0.3924, "step": 15424 }, { "epoch": 0.7078610435500895, "grad_norm": 0.4774022102355957, "learning_rate": 8.849016075290262e-06, "loss": 0.4329, "step": 15425 }, { "epoch": 0.707906934055344, "grad_norm": 0.4913226068019867, "learning_rate": 8.848859571807913e-06, "loss": 0.4757, "step": 15426 }, { "epoch": 0.7079528245605984, "grad_norm": 0.5216846466064453, "learning_rate": 8.848703059070269e-06, "loss": 0.5284, "step": 15427 }, { "epoch": 0.7079987150658529, "grad_norm": 0.4755108952522278, "learning_rate": 8.848546537077713e-06, "loss": 0.4076, "step": 15428 }, { "epoch": 0.7080446055711074, "grad_norm": 0.45100149512290955, "learning_rate": 8.848390005830615e-06, "loss": 0.3951, "step": 15429 }, { "epoch": 0.7080904960763618, "grad_norm": 0.45336994528770447, "learning_rate": 8.848233465329354e-06, "loss": 0.4049, "step": 15430 }, { "epoch": 0.7081363865816163, "grad_norm": 0.4541419744491577, "learning_rate": 8.848076915574306e-06, "loss": 0.3455, "step": 15431 }, { "epoch": 0.7081822770868708, "grad_norm": 0.4507831633090973, "learning_rate": 8.847920356565846e-06, "loss": 0.375, "step": 15432 }, { "epoch": 0.7082281675921251, "grad_norm": 0.4275922477245331, "learning_rate": 8.847763788304356e-06, "loss": 0.3775, "step": 15433 }, { "epoch": 0.7082740580973796, "grad_norm": 0.49520406126976013, "learning_rate": 8.847607210790204e-06, "loss": 0.4534, "step": 15434 }, { "epoch": 0.7083199486026341, "grad_norm": 0.42178016901016235, "learning_rate": 8.847450624023773e-06, "loss": 0.3304, "step": 15435 }, { "epoch": 0.7083658391078885, "grad_norm": 0.5048931837081909, "learning_rate": 8.847294028005438e-06, "loss": 0.4421, "step": 15436 }, { "epoch": 0.708411729613143, "grad_norm": 0.4656357169151306, "learning_rate": 8.847137422735575e-06, "loss": 0.3858, "step": 15437 }, { "epoch": 0.7084576201183975, "grad_norm": 0.4552110731601715, "learning_rate": 8.846980808214561e-06, "loss": 0.4303, "step": 15438 }, { "epoch": 0.7085035106236519, "grad_norm": 0.5013412833213806, "learning_rate": 8.84682418444277e-06, "loss": 0.4973, "step": 15439 }, { "epoch": 0.7085494011289064, "grad_norm": 0.4904690086841583, "learning_rate": 8.846667551420582e-06, "loss": 0.4064, "step": 15440 }, { "epoch": 0.7085952916341609, "grad_norm": 0.46359390020370483, "learning_rate": 8.846510909148373e-06, "loss": 0.4401, "step": 15441 }, { "epoch": 0.7086411821394154, "grad_norm": 0.4749346673488617, "learning_rate": 8.84635425762652e-06, "loss": 0.3759, "step": 15442 }, { "epoch": 0.7086870726446698, "grad_norm": 0.4849734604358673, "learning_rate": 8.846197596855399e-06, "loss": 0.4385, "step": 15443 }, { "epoch": 0.7087329631499243, "grad_norm": 0.4457416236400604, "learning_rate": 8.846040926835385e-06, "loss": 0.3309, "step": 15444 }, { "epoch": 0.7087788536551788, "grad_norm": 0.42112627625465393, "learning_rate": 8.845884247566859e-06, "loss": 0.3268, "step": 15445 }, { "epoch": 0.7088247441604332, "grad_norm": 0.4570595920085907, "learning_rate": 8.845727559050193e-06, "loss": 0.4305, "step": 15446 }, { "epoch": 0.7088706346656877, "grad_norm": 0.4492507576942444, "learning_rate": 8.845570861285766e-06, "loss": 0.3891, "step": 15447 }, { "epoch": 0.7089165251709422, "grad_norm": 0.5062021017074585, "learning_rate": 8.845414154273955e-06, "loss": 0.3823, "step": 15448 }, { "epoch": 0.7089624156761966, "grad_norm": 0.43542948365211487, "learning_rate": 8.845257438015137e-06, "loss": 0.3505, "step": 15449 }, { "epoch": 0.709008306181451, "grad_norm": 0.45576897263526917, "learning_rate": 8.84510071250969e-06, "loss": 0.352, "step": 15450 }, { "epoch": 0.7090541966867056, "grad_norm": 0.43054863810539246, "learning_rate": 8.844943977757986e-06, "loss": 0.3805, "step": 15451 }, { "epoch": 0.7091000871919599, "grad_norm": 0.4283328354358673, "learning_rate": 8.844787233760409e-06, "loss": 0.3652, "step": 15452 }, { "epoch": 0.7091459776972144, "grad_norm": 0.4831283390522003, "learning_rate": 8.84463048051733e-06, "loss": 0.4783, "step": 15453 }, { "epoch": 0.7091918682024689, "grad_norm": 0.432017058134079, "learning_rate": 8.844473718029127e-06, "loss": 0.396, "step": 15454 }, { "epoch": 0.7092377587077233, "grad_norm": 0.4756677746772766, "learning_rate": 8.844316946296179e-06, "loss": 0.5007, "step": 15455 }, { "epoch": 0.7092836492129778, "grad_norm": 0.45042964816093445, "learning_rate": 8.844160165318862e-06, "loss": 0.3903, "step": 15456 }, { "epoch": 0.7093295397182323, "grad_norm": 0.4317377209663391, "learning_rate": 8.844003375097553e-06, "loss": 0.3587, "step": 15457 }, { "epoch": 0.7093754302234868, "grad_norm": 0.48564499616622925, "learning_rate": 8.843846575632631e-06, "loss": 0.4416, "step": 15458 }, { "epoch": 0.7094213207287412, "grad_norm": 0.4756935238838196, "learning_rate": 8.843689766924469e-06, "loss": 0.4378, "step": 15459 }, { "epoch": 0.7094672112339957, "grad_norm": 0.4673527777194977, "learning_rate": 8.843532948973447e-06, "loss": 0.3964, "step": 15460 }, { "epoch": 0.7095131017392502, "grad_norm": 0.44505345821380615, "learning_rate": 8.843376121779941e-06, "loss": 0.3783, "step": 15461 }, { "epoch": 0.7095589922445046, "grad_norm": 0.43367916345596313, "learning_rate": 8.843219285344328e-06, "loss": 0.3309, "step": 15462 }, { "epoch": 0.7096048827497591, "grad_norm": 0.43835362792015076, "learning_rate": 8.843062439666986e-06, "loss": 0.3278, "step": 15463 }, { "epoch": 0.7096507732550136, "grad_norm": 0.5083480477333069, "learning_rate": 8.842905584748292e-06, "loss": 0.5513, "step": 15464 }, { "epoch": 0.709696663760268, "grad_norm": 0.44464245438575745, "learning_rate": 8.842748720588623e-06, "loss": 0.382, "step": 15465 }, { "epoch": 0.7097425542655225, "grad_norm": 0.4823538064956665, "learning_rate": 8.842591847188355e-06, "loss": 0.4262, "step": 15466 }, { "epoch": 0.709788444770777, "grad_norm": 0.44182926416397095, "learning_rate": 8.842434964547867e-06, "loss": 0.3419, "step": 15467 }, { "epoch": 0.7098343352760313, "grad_norm": 0.4673125743865967, "learning_rate": 8.842278072667536e-06, "loss": 0.4209, "step": 15468 }, { "epoch": 0.7098802257812858, "grad_norm": 0.4260706901550293, "learning_rate": 8.84212117154774e-06, "loss": 0.3617, "step": 15469 }, { "epoch": 0.7099261162865403, "grad_norm": 0.47984281182289124, "learning_rate": 8.841964261188853e-06, "loss": 0.4356, "step": 15470 }, { "epoch": 0.7099720067917947, "grad_norm": 0.5031965374946594, "learning_rate": 8.841807341591255e-06, "loss": 0.4856, "step": 15471 }, { "epoch": 0.7100178972970492, "grad_norm": 0.46491318941116333, "learning_rate": 8.841650412755325e-06, "loss": 0.4304, "step": 15472 }, { "epoch": 0.7100637878023037, "grad_norm": 0.44671010971069336, "learning_rate": 8.841493474681437e-06, "loss": 0.4109, "step": 15473 }, { "epoch": 0.7101096783075581, "grad_norm": 0.5169432759284973, "learning_rate": 8.841336527369968e-06, "loss": 0.454, "step": 15474 }, { "epoch": 0.7101555688128126, "grad_norm": 0.46499308943748474, "learning_rate": 8.841179570821299e-06, "loss": 0.4107, "step": 15475 }, { "epoch": 0.7102014593180671, "grad_norm": 0.46452900767326355, "learning_rate": 8.841022605035804e-06, "loss": 0.4144, "step": 15476 }, { "epoch": 0.7102473498233216, "grad_norm": 0.4622470736503601, "learning_rate": 8.840865630013861e-06, "loss": 0.406, "step": 15477 }, { "epoch": 0.710293240328576, "grad_norm": 0.435015469789505, "learning_rate": 8.84070864575585e-06, "loss": 0.3799, "step": 15478 }, { "epoch": 0.7103391308338305, "grad_norm": 0.4552239775657654, "learning_rate": 8.840551652262148e-06, "loss": 0.3863, "step": 15479 }, { "epoch": 0.710385021339085, "grad_norm": 0.4081590175628662, "learning_rate": 8.84039464953313e-06, "loss": 0.3369, "step": 15480 }, { "epoch": 0.7104309118443394, "grad_norm": 0.44889116287231445, "learning_rate": 8.840237637569175e-06, "loss": 0.4123, "step": 15481 }, { "epoch": 0.7104768023495939, "grad_norm": 0.5037751793861389, "learning_rate": 8.84008061637066e-06, "loss": 0.4937, "step": 15482 }, { "epoch": 0.7105226928548484, "grad_norm": 0.4261554479598999, "learning_rate": 8.839923585937965e-06, "loss": 0.3322, "step": 15483 }, { "epoch": 0.7105685833601028, "grad_norm": 0.39779770374298096, "learning_rate": 8.839766546271465e-06, "loss": 0.2995, "step": 15484 }, { "epoch": 0.7106144738653573, "grad_norm": 0.4634862542152405, "learning_rate": 8.839609497371537e-06, "loss": 0.4458, "step": 15485 }, { "epoch": 0.7106603643706118, "grad_norm": 0.47490790486335754, "learning_rate": 8.83945243923856e-06, "loss": 0.4558, "step": 15486 }, { "epoch": 0.7107062548758661, "grad_norm": 0.4840882122516632, "learning_rate": 8.839295371872912e-06, "loss": 0.4899, "step": 15487 }, { "epoch": 0.7107521453811206, "grad_norm": 0.45656970143318176, "learning_rate": 8.839138295274971e-06, "loss": 0.3998, "step": 15488 }, { "epoch": 0.7107980358863751, "grad_norm": 0.4725770950317383, "learning_rate": 8.838981209445114e-06, "loss": 0.443, "step": 15489 }, { "epoch": 0.7108439263916295, "grad_norm": 0.46444380283355713, "learning_rate": 8.838824114383719e-06, "loss": 0.3618, "step": 15490 }, { "epoch": 0.710889816896884, "grad_norm": 0.4527285695075989, "learning_rate": 8.838667010091161e-06, "loss": 0.3725, "step": 15491 }, { "epoch": 0.7109357074021385, "grad_norm": 0.4533938765525818, "learning_rate": 8.838509896567823e-06, "loss": 0.393, "step": 15492 }, { "epoch": 0.710981597907393, "grad_norm": 0.46753445267677307, "learning_rate": 8.838352773814081e-06, "loss": 0.423, "step": 15493 }, { "epoch": 0.7110274884126474, "grad_norm": 0.43710485100746155, "learning_rate": 8.83819564183031e-06, "loss": 0.4377, "step": 15494 }, { "epoch": 0.7110733789179019, "grad_norm": 0.48153793811798096, "learning_rate": 8.83803850061689e-06, "loss": 0.4319, "step": 15495 }, { "epoch": 0.7111192694231564, "grad_norm": 0.48703303933143616, "learning_rate": 8.8378813501742e-06, "loss": 0.4008, "step": 15496 }, { "epoch": 0.7111651599284108, "grad_norm": 0.5200701355934143, "learning_rate": 8.837724190502615e-06, "loss": 0.502, "step": 15497 }, { "epoch": 0.7112110504336653, "grad_norm": 0.4861406087875366, "learning_rate": 8.837567021602516e-06, "loss": 0.431, "step": 15498 }, { "epoch": 0.7112569409389198, "grad_norm": 0.4543859362602234, "learning_rate": 8.837409843474278e-06, "loss": 0.3904, "step": 15499 }, { "epoch": 0.7113028314441742, "grad_norm": 0.4633701741695404, "learning_rate": 8.837252656118283e-06, "loss": 0.4255, "step": 15500 }, { "epoch": 0.7113487219494287, "grad_norm": 0.4609166085720062, "learning_rate": 8.837095459534903e-06, "loss": 0.4612, "step": 15501 }, { "epoch": 0.7113946124546832, "grad_norm": 0.4380700886249542, "learning_rate": 8.836938253724522e-06, "loss": 0.365, "step": 15502 }, { "epoch": 0.7114405029599375, "grad_norm": 0.48445984721183777, "learning_rate": 8.836781038687514e-06, "loss": 0.4468, "step": 15503 }, { "epoch": 0.711486393465192, "grad_norm": 0.4344775974750519, "learning_rate": 8.836623814424258e-06, "loss": 0.3525, "step": 15504 }, { "epoch": 0.7115322839704465, "grad_norm": 0.4633212089538574, "learning_rate": 8.836466580935134e-06, "loss": 0.4431, "step": 15505 }, { "epoch": 0.7115781744757009, "grad_norm": 0.469025194644928, "learning_rate": 8.836309338220518e-06, "loss": 0.4019, "step": 15506 }, { "epoch": 0.7116240649809554, "grad_norm": 0.45229193568229675, "learning_rate": 8.83615208628079e-06, "loss": 0.4172, "step": 15507 }, { "epoch": 0.7116699554862099, "grad_norm": 0.4528132975101471, "learning_rate": 8.835994825116327e-06, "loss": 0.3682, "step": 15508 }, { "epoch": 0.7117158459914643, "grad_norm": 0.4499376714229584, "learning_rate": 8.835837554727507e-06, "loss": 0.3638, "step": 15509 }, { "epoch": 0.7117617364967188, "grad_norm": 0.46299105882644653, "learning_rate": 8.835680275114708e-06, "loss": 0.4122, "step": 15510 }, { "epoch": 0.7118076270019733, "grad_norm": 0.5443454384803772, "learning_rate": 8.835522986278308e-06, "loss": 0.4729, "step": 15511 }, { "epoch": 0.7118535175072278, "grad_norm": 0.5224723815917969, "learning_rate": 8.835365688218684e-06, "loss": 0.545, "step": 15512 }, { "epoch": 0.7118994080124822, "grad_norm": 0.4485406279563904, "learning_rate": 8.83520838093622e-06, "loss": 0.4107, "step": 15513 }, { "epoch": 0.7119452985177367, "grad_norm": 0.4891459047794342, "learning_rate": 8.835051064431288e-06, "loss": 0.4447, "step": 15514 }, { "epoch": 0.7119911890229912, "grad_norm": 0.41155001521110535, "learning_rate": 8.834893738704269e-06, "loss": 0.2997, "step": 15515 }, { "epoch": 0.7120370795282456, "grad_norm": 0.45614489912986755, "learning_rate": 8.83473640375554e-06, "loss": 0.4056, "step": 15516 }, { "epoch": 0.7120829700335001, "grad_norm": 0.46351298689842224, "learning_rate": 8.83457905958548e-06, "loss": 0.4325, "step": 15517 }, { "epoch": 0.7121288605387546, "grad_norm": 0.4697418212890625, "learning_rate": 8.834421706194469e-06, "loss": 0.4376, "step": 15518 }, { "epoch": 0.712174751044009, "grad_norm": 0.4287526309490204, "learning_rate": 8.834264343582883e-06, "loss": 0.3818, "step": 15519 }, { "epoch": 0.7122206415492635, "grad_norm": 0.4640028774738312, "learning_rate": 8.834106971751102e-06, "loss": 0.4379, "step": 15520 }, { "epoch": 0.712266532054518, "grad_norm": 0.5065779685974121, "learning_rate": 8.833949590699504e-06, "loss": 0.4895, "step": 15521 }, { "epoch": 0.7123124225597723, "grad_norm": 0.8899006247520447, "learning_rate": 8.833792200428468e-06, "loss": 0.4139, "step": 15522 }, { "epoch": 0.7123583130650268, "grad_norm": 0.4375467896461487, "learning_rate": 8.833634800938369e-06, "loss": 0.3691, "step": 15523 }, { "epoch": 0.7124042035702813, "grad_norm": 0.4735585153102875, "learning_rate": 8.83347739222959e-06, "loss": 0.4622, "step": 15524 }, { "epoch": 0.7124500940755357, "grad_norm": 0.41544556617736816, "learning_rate": 8.833319974302508e-06, "loss": 0.306, "step": 15525 }, { "epoch": 0.7124959845807902, "grad_norm": 0.44986918568611145, "learning_rate": 8.833162547157502e-06, "loss": 0.3501, "step": 15526 }, { "epoch": 0.7125418750860447, "grad_norm": 0.4355300962924957, "learning_rate": 8.833005110794947e-06, "loss": 0.3818, "step": 15527 }, { "epoch": 0.7125877655912991, "grad_norm": 0.46408483386039734, "learning_rate": 8.832847665215228e-06, "loss": 0.4484, "step": 15528 }, { "epoch": 0.7126336560965536, "grad_norm": 0.4280332028865814, "learning_rate": 8.832690210418717e-06, "loss": 0.3776, "step": 15529 }, { "epoch": 0.7126795466018081, "grad_norm": 0.47469472885131836, "learning_rate": 8.832532746405797e-06, "loss": 0.4085, "step": 15530 }, { "epoch": 0.7127254371070626, "grad_norm": 0.509865939617157, "learning_rate": 8.832375273176845e-06, "loss": 0.5008, "step": 15531 }, { "epoch": 0.712771327612317, "grad_norm": 0.46818438172340393, "learning_rate": 8.83221779073224e-06, "loss": 0.4143, "step": 15532 }, { "epoch": 0.7128172181175715, "grad_norm": 0.4492754638195038, "learning_rate": 8.83206029907236e-06, "loss": 0.4014, "step": 15533 }, { "epoch": 0.712863108622826, "grad_norm": 0.46445271372795105, "learning_rate": 8.831902798197588e-06, "loss": 0.3813, "step": 15534 }, { "epoch": 0.7129089991280804, "grad_norm": 0.47076019644737244, "learning_rate": 8.831745288108296e-06, "loss": 0.3953, "step": 15535 }, { "epoch": 0.7129548896333349, "grad_norm": 0.44588086009025574, "learning_rate": 8.831587768804865e-06, "loss": 0.3871, "step": 15536 }, { "epoch": 0.7130007801385894, "grad_norm": 0.48752206563949585, "learning_rate": 8.831430240287677e-06, "loss": 0.4733, "step": 15537 }, { "epoch": 0.7130466706438437, "grad_norm": 0.48644009232521057, "learning_rate": 8.831272702557107e-06, "loss": 0.4379, "step": 15538 }, { "epoch": 0.7130925611490982, "grad_norm": 0.4382927119731903, "learning_rate": 8.831115155613537e-06, "loss": 0.3885, "step": 15539 }, { "epoch": 0.7131384516543527, "grad_norm": 0.4415043890476227, "learning_rate": 8.830957599457341e-06, "loss": 0.3226, "step": 15540 }, { "epoch": 0.7131843421596071, "grad_norm": 0.48629793524742126, "learning_rate": 8.830800034088904e-06, "loss": 0.4203, "step": 15541 }, { "epoch": 0.7132302326648616, "grad_norm": 0.4292840361595154, "learning_rate": 8.830642459508602e-06, "loss": 0.3802, "step": 15542 }, { "epoch": 0.7132761231701161, "grad_norm": 0.43133819103240967, "learning_rate": 8.830484875716813e-06, "loss": 0.378, "step": 15543 }, { "epoch": 0.7133220136753705, "grad_norm": 0.4392184913158417, "learning_rate": 8.830327282713917e-06, "loss": 0.3893, "step": 15544 }, { "epoch": 0.713367904180625, "grad_norm": 0.42994388937950134, "learning_rate": 8.83016968050029e-06, "loss": 0.3524, "step": 15545 }, { "epoch": 0.7134137946858795, "grad_norm": 0.39495158195495605, "learning_rate": 8.830012069076317e-06, "loss": 0.3075, "step": 15546 }, { "epoch": 0.713459685191134, "grad_norm": 0.427035391330719, "learning_rate": 8.829854448442374e-06, "loss": 0.3647, "step": 15547 }, { "epoch": 0.7135055756963884, "grad_norm": 0.43526822328567505, "learning_rate": 8.82969681859884e-06, "loss": 0.3589, "step": 15548 }, { "epoch": 0.7135514662016429, "grad_norm": 0.4792308211326599, "learning_rate": 8.829539179546091e-06, "loss": 0.3816, "step": 15549 }, { "epoch": 0.7135973567068974, "grad_norm": 0.440013587474823, "learning_rate": 8.829381531284511e-06, "loss": 0.3521, "step": 15550 }, { "epoch": 0.7136432472121518, "grad_norm": 0.4347813129425049, "learning_rate": 8.829223873814477e-06, "loss": 0.3415, "step": 15551 }, { "epoch": 0.7136891377174063, "grad_norm": 0.42824214696884155, "learning_rate": 8.829066207136368e-06, "loss": 0.3796, "step": 15552 }, { "epoch": 0.7137350282226608, "grad_norm": 0.46256357431411743, "learning_rate": 8.828908531250563e-06, "loss": 0.4141, "step": 15553 }, { "epoch": 0.7137809187279152, "grad_norm": 0.4591212570667267, "learning_rate": 8.828750846157441e-06, "loss": 0.4207, "step": 15554 }, { "epoch": 0.7138268092331697, "grad_norm": 0.43453794717788696, "learning_rate": 8.828593151857383e-06, "loss": 0.3921, "step": 15555 }, { "epoch": 0.7138726997384242, "grad_norm": 0.4260489046573639, "learning_rate": 8.828435448350766e-06, "loss": 0.3783, "step": 15556 }, { "epoch": 0.7139185902436785, "grad_norm": 0.4535537660121918, "learning_rate": 8.82827773563797e-06, "loss": 0.4047, "step": 15557 }, { "epoch": 0.713964480748933, "grad_norm": 0.49402597546577454, "learning_rate": 8.828120013719373e-06, "loss": 0.4856, "step": 15558 }, { "epoch": 0.7140103712541875, "grad_norm": 0.4657919704914093, "learning_rate": 8.827962282595357e-06, "loss": 0.4457, "step": 15559 }, { "epoch": 0.7140562617594419, "grad_norm": 0.4483382999897003, "learning_rate": 8.827804542266301e-06, "loss": 0.4082, "step": 15560 }, { "epoch": 0.7141021522646964, "grad_norm": 0.4551346004009247, "learning_rate": 8.82764679273258e-06, "loss": 0.4579, "step": 15561 }, { "epoch": 0.7141480427699509, "grad_norm": 0.43995821475982666, "learning_rate": 8.82748903399458e-06, "loss": 0.3646, "step": 15562 }, { "epoch": 0.7141939332752053, "grad_norm": 0.516647219657898, "learning_rate": 8.827331266052675e-06, "loss": 0.4977, "step": 15563 }, { "epoch": 0.7142398237804598, "grad_norm": 0.45906272530555725, "learning_rate": 8.827173488907246e-06, "loss": 0.4255, "step": 15564 }, { "epoch": 0.7142857142857143, "grad_norm": 0.5009387731552124, "learning_rate": 8.827015702558672e-06, "loss": 0.4686, "step": 15565 }, { "epoch": 0.7143316047909688, "grad_norm": 0.4899574816226959, "learning_rate": 8.826857907007336e-06, "loss": 0.4398, "step": 15566 }, { "epoch": 0.7143774952962232, "grad_norm": 0.4522970914840698, "learning_rate": 8.826700102253612e-06, "loss": 0.3935, "step": 15567 }, { "epoch": 0.7144233858014777, "grad_norm": 0.46897608041763306, "learning_rate": 8.826542288297881e-06, "loss": 0.4551, "step": 15568 }, { "epoch": 0.7144692763067322, "grad_norm": 0.4823964536190033, "learning_rate": 8.826384465140527e-06, "loss": 0.4483, "step": 15569 }, { "epoch": 0.7145151668119866, "grad_norm": 0.49233704805374146, "learning_rate": 8.826226632781923e-06, "loss": 0.4402, "step": 15570 }, { "epoch": 0.7145610573172411, "grad_norm": 0.4153207540512085, "learning_rate": 8.826068791222454e-06, "loss": 0.3539, "step": 15571 }, { "epoch": 0.7146069478224956, "grad_norm": 0.46038228273391724, "learning_rate": 8.825910940462494e-06, "loss": 0.4195, "step": 15572 }, { "epoch": 0.71465283832775, "grad_norm": 0.42529672384262085, "learning_rate": 8.825753080502429e-06, "loss": 0.3627, "step": 15573 }, { "epoch": 0.7146987288330044, "grad_norm": 0.45616424083709717, "learning_rate": 8.825595211342632e-06, "loss": 0.3882, "step": 15574 }, { "epoch": 0.714744619338259, "grad_norm": 0.44404497742652893, "learning_rate": 8.825437332983488e-06, "loss": 0.3591, "step": 15575 }, { "epoch": 0.7147905098435133, "grad_norm": 0.4738733172416687, "learning_rate": 8.825279445425375e-06, "loss": 0.4096, "step": 15576 }, { "epoch": 0.7148364003487678, "grad_norm": 0.45659273862838745, "learning_rate": 8.825121548668672e-06, "loss": 0.4061, "step": 15577 }, { "epoch": 0.7148822908540223, "grad_norm": 0.4600314795970917, "learning_rate": 8.824963642713757e-06, "loss": 0.3981, "step": 15578 }, { "epoch": 0.7149281813592767, "grad_norm": 0.46980634331703186, "learning_rate": 8.824805727561013e-06, "loss": 0.4687, "step": 15579 }, { "epoch": 0.7149740718645312, "grad_norm": 0.4423784911632538, "learning_rate": 8.824647803210819e-06, "loss": 0.4163, "step": 15580 }, { "epoch": 0.7150199623697857, "grad_norm": 0.4743698239326477, "learning_rate": 8.824489869663553e-06, "loss": 0.4638, "step": 15581 }, { "epoch": 0.7150658528750402, "grad_norm": 0.4626905024051666, "learning_rate": 8.824331926919595e-06, "loss": 0.3833, "step": 15582 }, { "epoch": 0.7151117433802946, "grad_norm": 0.436654657125473, "learning_rate": 8.82417397497933e-06, "loss": 0.3337, "step": 15583 }, { "epoch": 0.7151576338855491, "grad_norm": 0.4399971067905426, "learning_rate": 8.824016013843128e-06, "loss": 0.3837, "step": 15584 }, { "epoch": 0.7152035243908036, "grad_norm": 0.5217958092689514, "learning_rate": 8.823858043511378e-06, "loss": 0.4122, "step": 15585 }, { "epoch": 0.715249414896058, "grad_norm": 0.4876897931098938, "learning_rate": 8.823700063984455e-06, "loss": 0.4012, "step": 15586 }, { "epoch": 0.7152953054013125, "grad_norm": 0.5061797499656677, "learning_rate": 8.823542075262741e-06, "loss": 0.4912, "step": 15587 }, { "epoch": 0.715341195906567, "grad_norm": 0.5132040977478027, "learning_rate": 8.823384077346614e-06, "loss": 0.4995, "step": 15588 }, { "epoch": 0.7153870864118214, "grad_norm": 0.4687791168689728, "learning_rate": 8.823226070236456e-06, "loss": 0.3787, "step": 15589 }, { "epoch": 0.7154329769170759, "grad_norm": 0.4527410864830017, "learning_rate": 8.823068053932645e-06, "loss": 0.3877, "step": 15590 }, { "epoch": 0.7154788674223304, "grad_norm": 0.4342135787010193, "learning_rate": 8.822910028435562e-06, "loss": 0.3906, "step": 15591 }, { "epoch": 0.7155247579275847, "grad_norm": 0.43960365653038025, "learning_rate": 8.822751993745587e-06, "loss": 0.3434, "step": 15592 }, { "epoch": 0.7155706484328392, "grad_norm": 0.4276764988899231, "learning_rate": 8.822593949863101e-06, "loss": 0.3492, "step": 15593 }, { "epoch": 0.7156165389380937, "grad_norm": 0.44562748074531555, "learning_rate": 8.822435896788482e-06, "loss": 0.3591, "step": 15594 }, { "epoch": 0.7156624294433481, "grad_norm": 0.4125620126724243, "learning_rate": 8.82227783452211e-06, "loss": 0.3187, "step": 15595 }, { "epoch": 0.7157083199486026, "grad_norm": 0.4754997789859772, "learning_rate": 8.822119763064369e-06, "loss": 0.4474, "step": 15596 }, { "epoch": 0.7157542104538571, "grad_norm": 0.4303065240383148, "learning_rate": 8.821961682415634e-06, "loss": 0.3504, "step": 15597 }, { "epoch": 0.7158001009591115, "grad_norm": 0.46645036339759827, "learning_rate": 8.821803592576288e-06, "loss": 0.421, "step": 15598 }, { "epoch": 0.715845991464366, "grad_norm": 0.48790207505226135, "learning_rate": 8.821645493546712e-06, "loss": 0.4389, "step": 15599 }, { "epoch": 0.7158918819696205, "grad_norm": 0.4692898988723755, "learning_rate": 8.821487385327283e-06, "loss": 0.3723, "step": 15600 }, { "epoch": 0.715937772474875, "grad_norm": 0.49176734685897827, "learning_rate": 8.821329267918382e-06, "loss": 0.4284, "step": 15601 }, { "epoch": 0.7159836629801294, "grad_norm": 0.43893522024154663, "learning_rate": 8.821171141320392e-06, "loss": 0.342, "step": 15602 }, { "epoch": 0.7160295534853839, "grad_norm": 0.45134127140045166, "learning_rate": 8.821013005533691e-06, "loss": 0.3989, "step": 15603 }, { "epoch": 0.7160754439906384, "grad_norm": 0.4622332751750946, "learning_rate": 8.820854860558659e-06, "loss": 0.3613, "step": 15604 }, { "epoch": 0.7161213344958928, "grad_norm": 0.42012733221054077, "learning_rate": 8.820696706395677e-06, "loss": 0.3348, "step": 15605 }, { "epoch": 0.7161672250011473, "grad_norm": 0.459369957447052, "learning_rate": 8.820538543045127e-06, "loss": 0.3951, "step": 15606 }, { "epoch": 0.7162131155064018, "grad_norm": 0.49629658460617065, "learning_rate": 8.820380370507386e-06, "loss": 0.5011, "step": 15607 }, { "epoch": 0.7162590060116562, "grad_norm": 0.48579883575439453, "learning_rate": 8.820222188782837e-06, "loss": 0.4652, "step": 15608 }, { "epoch": 0.7163048965169106, "grad_norm": 0.500974178314209, "learning_rate": 8.820063997871857e-06, "loss": 0.4412, "step": 15609 }, { "epoch": 0.7163507870221651, "grad_norm": 0.45358914136886597, "learning_rate": 8.81990579777483e-06, "loss": 0.4202, "step": 15610 }, { "epoch": 0.7163966775274195, "grad_norm": 0.47065556049346924, "learning_rate": 8.819747588492135e-06, "loss": 0.3794, "step": 15611 }, { "epoch": 0.716442568032674, "grad_norm": 0.45115119218826294, "learning_rate": 8.819589370024155e-06, "loss": 0.4398, "step": 15612 }, { "epoch": 0.7164884585379285, "grad_norm": 0.4697340726852417, "learning_rate": 8.819431142371267e-06, "loss": 0.4316, "step": 15613 }, { "epoch": 0.7165343490431829, "grad_norm": 0.45078858733177185, "learning_rate": 8.81927290553385e-06, "loss": 0.4139, "step": 15614 }, { "epoch": 0.7165802395484374, "grad_norm": 0.47174978256225586, "learning_rate": 8.819114659512289e-06, "loss": 0.5085, "step": 15615 }, { "epoch": 0.7166261300536919, "grad_norm": 0.43557462096214294, "learning_rate": 8.818956404306962e-06, "loss": 0.4072, "step": 15616 }, { "epoch": 0.7166720205589463, "grad_norm": 0.42182135581970215, "learning_rate": 8.818798139918253e-06, "loss": 0.3414, "step": 15617 }, { "epoch": 0.7167179110642008, "grad_norm": 0.47500914335250854, "learning_rate": 8.818639866346534e-06, "loss": 0.3834, "step": 15618 }, { "epoch": 0.7167638015694553, "grad_norm": 0.48851120471954346, "learning_rate": 8.818481583592196e-06, "loss": 0.4616, "step": 15619 }, { "epoch": 0.7168096920747098, "grad_norm": 0.4447363317012787, "learning_rate": 8.818323291655613e-06, "loss": 0.4035, "step": 15620 }, { "epoch": 0.7168555825799642, "grad_norm": 0.4497104287147522, "learning_rate": 8.818164990537169e-06, "loss": 0.4008, "step": 15621 }, { "epoch": 0.7169014730852187, "grad_norm": 0.4267825782299042, "learning_rate": 8.818006680237241e-06, "loss": 0.303, "step": 15622 }, { "epoch": 0.7169473635904732, "grad_norm": 0.41800087690353394, "learning_rate": 8.817848360756213e-06, "loss": 0.3896, "step": 15623 }, { "epoch": 0.7169932540957276, "grad_norm": 0.4382498264312744, "learning_rate": 8.817690032094466e-06, "loss": 0.3299, "step": 15624 }, { "epoch": 0.7170391446009821, "grad_norm": 0.45760563015937805, "learning_rate": 8.817531694252378e-06, "loss": 0.4024, "step": 15625 }, { "epoch": 0.7170850351062366, "grad_norm": 0.43356430530548096, "learning_rate": 8.81737334723033e-06, "loss": 0.3394, "step": 15626 }, { "epoch": 0.7171309256114909, "grad_norm": 0.5737821459770203, "learning_rate": 8.817214991028707e-06, "loss": 0.3363, "step": 15627 }, { "epoch": 0.7171768161167454, "grad_norm": 0.4786701798439026, "learning_rate": 8.817056625647885e-06, "loss": 0.398, "step": 15628 }, { "epoch": 0.7172227066219999, "grad_norm": 0.45972543954849243, "learning_rate": 8.816898251088246e-06, "loss": 0.3669, "step": 15629 }, { "epoch": 0.7172685971272543, "grad_norm": 0.46570178866386414, "learning_rate": 8.816739867350172e-06, "loss": 0.3675, "step": 15630 }, { "epoch": 0.7173144876325088, "grad_norm": 0.5328686237335205, "learning_rate": 8.816581474434042e-06, "loss": 0.4888, "step": 15631 }, { "epoch": 0.7173603781377633, "grad_norm": 0.4507195055484772, "learning_rate": 8.816423072340241e-06, "loss": 0.3662, "step": 15632 }, { "epoch": 0.7174062686430177, "grad_norm": 0.4924786388874054, "learning_rate": 8.816264661069144e-06, "loss": 0.4201, "step": 15633 }, { "epoch": 0.7174521591482722, "grad_norm": 0.474643349647522, "learning_rate": 8.816106240621137e-06, "loss": 0.4369, "step": 15634 }, { "epoch": 0.7174980496535267, "grad_norm": 0.4365168511867523, "learning_rate": 8.815947810996596e-06, "loss": 0.3771, "step": 15635 }, { "epoch": 0.7175439401587812, "grad_norm": 0.5028250813484192, "learning_rate": 8.815789372195909e-06, "loss": 0.4004, "step": 15636 }, { "epoch": 0.7175898306640356, "grad_norm": 0.4864856004714966, "learning_rate": 8.81563092421945e-06, "loss": 0.4997, "step": 15637 }, { "epoch": 0.7176357211692901, "grad_norm": 0.4270686209201813, "learning_rate": 8.815472467067603e-06, "loss": 0.3344, "step": 15638 }, { "epoch": 0.7176816116745446, "grad_norm": 0.48633384704589844, "learning_rate": 8.81531400074075e-06, "loss": 0.4453, "step": 15639 }, { "epoch": 0.717727502179799, "grad_norm": 0.46596699953079224, "learning_rate": 8.815155525239271e-06, "loss": 0.4321, "step": 15640 }, { "epoch": 0.7177733926850535, "grad_norm": 0.44546231627464294, "learning_rate": 8.814997040563547e-06, "loss": 0.3905, "step": 15641 }, { "epoch": 0.717819283190308, "grad_norm": 0.42674729228019714, "learning_rate": 8.814838546713958e-06, "loss": 0.3726, "step": 15642 }, { "epoch": 0.7178651736955624, "grad_norm": 0.47995319962501526, "learning_rate": 8.814680043690887e-06, "loss": 0.4005, "step": 15643 }, { "epoch": 0.7179110642008169, "grad_norm": 0.40687865018844604, "learning_rate": 8.814521531494715e-06, "loss": 0.3193, "step": 15644 }, { "epoch": 0.7179569547060713, "grad_norm": 0.4590286612510681, "learning_rate": 8.814363010125822e-06, "loss": 0.4412, "step": 15645 }, { "epoch": 0.7180028452113257, "grad_norm": 0.42670056223869324, "learning_rate": 8.81420447958459e-06, "loss": 0.3724, "step": 15646 }, { "epoch": 0.7180487357165802, "grad_norm": 0.7275289297103882, "learning_rate": 8.8140459398714e-06, "loss": 0.3582, "step": 15647 }, { "epoch": 0.7180946262218347, "grad_norm": 0.4692992568016052, "learning_rate": 8.813887390986632e-06, "loss": 0.3931, "step": 15648 }, { "epoch": 0.7181405167270891, "grad_norm": 0.4102296531200409, "learning_rate": 8.81372883293067e-06, "loss": 0.3119, "step": 15649 }, { "epoch": 0.7181864072323436, "grad_norm": 0.5070441365242004, "learning_rate": 8.813570265703894e-06, "loss": 0.4509, "step": 15650 }, { "epoch": 0.7182322977375981, "grad_norm": 0.4424387514591217, "learning_rate": 8.813411689306684e-06, "loss": 0.3823, "step": 15651 }, { "epoch": 0.7182781882428525, "grad_norm": 0.47151249647140503, "learning_rate": 8.813253103739422e-06, "loss": 0.3514, "step": 15652 }, { "epoch": 0.718324078748107, "grad_norm": 0.43100136518478394, "learning_rate": 8.813094509002492e-06, "loss": 0.3487, "step": 15653 }, { "epoch": 0.7183699692533615, "grad_norm": 0.4997897744178772, "learning_rate": 8.812935905096272e-06, "loss": 0.4515, "step": 15654 }, { "epoch": 0.718415859758616, "grad_norm": 0.44973400235176086, "learning_rate": 8.812777292021144e-06, "loss": 0.3713, "step": 15655 }, { "epoch": 0.7184617502638704, "grad_norm": 0.4724183678627014, "learning_rate": 8.812618669777492e-06, "loss": 0.3864, "step": 15656 }, { "epoch": 0.7185076407691249, "grad_norm": 0.48242223262786865, "learning_rate": 8.812460038365693e-06, "loss": 0.4544, "step": 15657 }, { "epoch": 0.7185535312743794, "grad_norm": 0.46911078691482544, "learning_rate": 8.812301397786131e-06, "loss": 0.454, "step": 15658 }, { "epoch": 0.7185994217796338, "grad_norm": 0.40428584814071655, "learning_rate": 8.812142748039187e-06, "loss": 0.3066, "step": 15659 }, { "epoch": 0.7186453122848883, "grad_norm": 0.4288439154624939, "learning_rate": 8.811984089125245e-06, "loss": 0.3393, "step": 15660 }, { "epoch": 0.7186912027901428, "grad_norm": 0.4545484185218811, "learning_rate": 8.811825421044683e-06, "loss": 0.3824, "step": 15661 }, { "epoch": 0.7187370932953971, "grad_norm": 0.4439581036567688, "learning_rate": 8.811666743797884e-06, "loss": 0.3568, "step": 15662 }, { "epoch": 0.7187829838006516, "grad_norm": 0.44223231077194214, "learning_rate": 8.811508057385229e-06, "loss": 0.357, "step": 15663 }, { "epoch": 0.7188288743059061, "grad_norm": 0.4481886327266693, "learning_rate": 8.811349361807102e-06, "loss": 0.3503, "step": 15664 }, { "epoch": 0.7188747648111605, "grad_norm": 0.4579429626464844, "learning_rate": 8.811190657063881e-06, "loss": 0.4495, "step": 15665 }, { "epoch": 0.718920655316415, "grad_norm": 0.4518534243106842, "learning_rate": 8.81103194315595e-06, "loss": 0.3189, "step": 15666 }, { "epoch": 0.7189665458216695, "grad_norm": 0.44195178151130676, "learning_rate": 8.810873220083688e-06, "loss": 0.347, "step": 15667 }, { "epoch": 0.7190124363269239, "grad_norm": 0.4850746989250183, "learning_rate": 8.810714487847483e-06, "loss": 0.4549, "step": 15668 }, { "epoch": 0.7190583268321784, "grad_norm": 0.46074604988098145, "learning_rate": 8.810555746447709e-06, "loss": 0.4036, "step": 15669 }, { "epoch": 0.7191042173374329, "grad_norm": 0.46106013655662537, "learning_rate": 8.810396995884751e-06, "loss": 0.3964, "step": 15670 }, { "epoch": 0.7191501078426874, "grad_norm": 0.45544198155403137, "learning_rate": 8.810238236158992e-06, "loss": 0.3777, "step": 15671 }, { "epoch": 0.7191959983479418, "grad_norm": 0.5385237336158752, "learning_rate": 8.810079467270812e-06, "loss": 0.47, "step": 15672 }, { "epoch": 0.7192418888531963, "grad_norm": 0.4805903434753418, "learning_rate": 8.809920689220594e-06, "loss": 0.4525, "step": 15673 }, { "epoch": 0.7192877793584508, "grad_norm": 0.45054641366004944, "learning_rate": 8.809761902008718e-06, "loss": 0.4088, "step": 15674 }, { "epoch": 0.7193336698637052, "grad_norm": 0.4502006471157074, "learning_rate": 8.809603105635569e-06, "loss": 0.3836, "step": 15675 }, { "epoch": 0.7193795603689597, "grad_norm": 0.47026869654655457, "learning_rate": 8.809444300101525e-06, "loss": 0.4403, "step": 15676 }, { "epoch": 0.7194254508742142, "grad_norm": 0.444906622171402, "learning_rate": 8.80928548540697e-06, "loss": 0.3636, "step": 15677 }, { "epoch": 0.7194713413794686, "grad_norm": 0.46094810962677, "learning_rate": 8.809126661552286e-06, "loss": 0.4027, "step": 15678 }, { "epoch": 0.719517231884723, "grad_norm": 0.4577856659889221, "learning_rate": 8.808967828537855e-06, "loss": 0.4096, "step": 15679 }, { "epoch": 0.7195631223899775, "grad_norm": 0.4108022451400757, "learning_rate": 8.808808986364056e-06, "loss": 0.3196, "step": 15680 }, { "epoch": 0.7196090128952319, "grad_norm": 0.4910483658313751, "learning_rate": 8.808650135031278e-06, "loss": 0.4724, "step": 15681 }, { "epoch": 0.7196549034004864, "grad_norm": 0.4647468030452728, "learning_rate": 8.808491274539896e-06, "loss": 0.4193, "step": 15682 }, { "epoch": 0.7197007939057409, "grad_norm": 0.4430696368217468, "learning_rate": 8.808332404890295e-06, "loss": 0.3394, "step": 15683 }, { "epoch": 0.7197466844109953, "grad_norm": 0.4280908703804016, "learning_rate": 8.808173526082856e-06, "loss": 0.3852, "step": 15684 }, { "epoch": 0.7197925749162498, "grad_norm": 0.4214204251766205, "learning_rate": 8.80801463811796e-06, "loss": 0.3362, "step": 15685 }, { "epoch": 0.7198384654215043, "grad_norm": 0.4595947861671448, "learning_rate": 8.807855740995994e-06, "loss": 0.3998, "step": 15686 }, { "epoch": 0.7198843559267587, "grad_norm": 0.45970043540000916, "learning_rate": 8.807696834717335e-06, "loss": 0.4393, "step": 15687 }, { "epoch": 0.7199302464320132, "grad_norm": 0.45524463057518005, "learning_rate": 8.807537919282365e-06, "loss": 0.3673, "step": 15688 }, { "epoch": 0.7199761369372677, "grad_norm": 0.4703691601753235, "learning_rate": 8.80737899469147e-06, "loss": 0.4272, "step": 15689 }, { "epoch": 0.7200220274425222, "grad_norm": 0.44374561309814453, "learning_rate": 8.80722006094503e-06, "loss": 0.4016, "step": 15690 }, { "epoch": 0.7200679179477766, "grad_norm": 0.46652352809906006, "learning_rate": 8.807061118043427e-06, "loss": 0.4595, "step": 15691 }, { "epoch": 0.7201138084530311, "grad_norm": 0.47145673632621765, "learning_rate": 8.806902165987045e-06, "loss": 0.4457, "step": 15692 }, { "epoch": 0.7201596989582856, "grad_norm": 0.45192602276802063, "learning_rate": 8.806743204776264e-06, "loss": 0.4388, "step": 15693 }, { "epoch": 0.72020558946354, "grad_norm": 0.42427805066108704, "learning_rate": 8.806584234411466e-06, "loss": 0.3501, "step": 15694 }, { "epoch": 0.7202514799687945, "grad_norm": 0.4368148446083069, "learning_rate": 8.806425254893037e-06, "loss": 0.3617, "step": 15695 }, { "epoch": 0.720297370474049, "grad_norm": 0.44040966033935547, "learning_rate": 8.806266266221354e-06, "loss": 0.3735, "step": 15696 }, { "epoch": 0.7203432609793033, "grad_norm": 0.4907754361629486, "learning_rate": 8.806107268396802e-06, "loss": 0.4621, "step": 15697 }, { "epoch": 0.7203891514845578, "grad_norm": 0.5049310922622681, "learning_rate": 8.805948261419763e-06, "loss": 0.5043, "step": 15698 }, { "epoch": 0.7204350419898123, "grad_norm": 0.4873790740966797, "learning_rate": 8.80578924529062e-06, "loss": 0.3838, "step": 15699 }, { "epoch": 0.7204809324950667, "grad_norm": 0.45794540643692017, "learning_rate": 8.805630220009757e-06, "loss": 0.3345, "step": 15700 }, { "epoch": 0.7205268230003212, "grad_norm": 0.44353345036506653, "learning_rate": 8.805471185577553e-06, "loss": 0.3902, "step": 15701 }, { "epoch": 0.7205727135055757, "grad_norm": 0.5043193101882935, "learning_rate": 8.80531214199439e-06, "loss": 0.4802, "step": 15702 }, { "epoch": 0.7206186040108301, "grad_norm": 0.46371254324913025, "learning_rate": 8.805153089260655e-06, "loss": 0.4082, "step": 15703 }, { "epoch": 0.7206644945160846, "grad_norm": 0.4520680606365204, "learning_rate": 8.804994027376726e-06, "loss": 0.3905, "step": 15704 }, { "epoch": 0.7207103850213391, "grad_norm": 0.4368646740913391, "learning_rate": 8.804834956342988e-06, "loss": 0.3622, "step": 15705 }, { "epoch": 0.7207562755265935, "grad_norm": 0.4347103238105774, "learning_rate": 8.804675876159824e-06, "loss": 0.3431, "step": 15706 }, { "epoch": 0.720802166031848, "grad_norm": 0.47889891266822815, "learning_rate": 8.804516786827613e-06, "loss": 0.5021, "step": 15707 }, { "epoch": 0.7208480565371025, "grad_norm": 0.4814665615558624, "learning_rate": 8.804357688346741e-06, "loss": 0.441, "step": 15708 }, { "epoch": 0.720893947042357, "grad_norm": 0.45779088139533997, "learning_rate": 8.80419858071759e-06, "loss": 0.3927, "step": 15709 }, { "epoch": 0.7209398375476114, "grad_norm": 0.4778338670730591, "learning_rate": 8.80403946394054e-06, "loss": 0.4164, "step": 15710 }, { "epoch": 0.7209857280528659, "grad_norm": 0.4592438340187073, "learning_rate": 8.80388033801598e-06, "loss": 0.376, "step": 15711 }, { "epoch": 0.7210316185581204, "grad_norm": 0.4740516245365143, "learning_rate": 8.803721202944284e-06, "loss": 0.3791, "step": 15712 }, { "epoch": 0.7210775090633748, "grad_norm": 0.4968891441822052, "learning_rate": 8.80356205872584e-06, "loss": 0.4507, "step": 15713 }, { "epoch": 0.7211233995686293, "grad_norm": 0.46413061022758484, "learning_rate": 8.803402905361032e-06, "loss": 0.438, "step": 15714 }, { "epoch": 0.7211692900738838, "grad_norm": 0.4207325875759125, "learning_rate": 8.803243742850238e-06, "loss": 0.3337, "step": 15715 }, { "epoch": 0.7212151805791381, "grad_norm": 0.48167723417282104, "learning_rate": 8.803084571193842e-06, "loss": 0.4237, "step": 15716 }, { "epoch": 0.7212610710843926, "grad_norm": 0.4395751357078552, "learning_rate": 8.80292539039223e-06, "loss": 0.3688, "step": 15717 }, { "epoch": 0.7213069615896471, "grad_norm": 0.5004598498344421, "learning_rate": 8.802766200445783e-06, "loss": 0.45, "step": 15718 }, { "epoch": 0.7213528520949015, "grad_norm": 0.4956144094467163, "learning_rate": 8.802607001354882e-06, "loss": 0.4351, "step": 15719 }, { "epoch": 0.721398742600156, "grad_norm": 0.47222280502319336, "learning_rate": 8.802447793119913e-06, "loss": 0.4149, "step": 15720 }, { "epoch": 0.7214446331054105, "grad_norm": 0.4475928843021393, "learning_rate": 8.802288575741256e-06, "loss": 0.4144, "step": 15721 }, { "epoch": 0.7214905236106649, "grad_norm": 0.4386114478111267, "learning_rate": 8.802129349219295e-06, "loss": 0.3823, "step": 15722 }, { "epoch": 0.7215364141159194, "grad_norm": 0.4891201853752136, "learning_rate": 8.801970113554414e-06, "loss": 0.416, "step": 15723 }, { "epoch": 0.7215823046211739, "grad_norm": 0.445738285779953, "learning_rate": 8.801810868746993e-06, "loss": 0.3818, "step": 15724 }, { "epoch": 0.7216281951264284, "grad_norm": 0.49361205101013184, "learning_rate": 8.801651614797418e-06, "loss": 0.4799, "step": 15725 }, { "epoch": 0.7216740856316828, "grad_norm": 0.45539504289627075, "learning_rate": 8.80149235170607e-06, "loss": 0.3808, "step": 15726 }, { "epoch": 0.7217199761369373, "grad_norm": 0.46922767162323, "learning_rate": 8.801333079473332e-06, "loss": 0.3942, "step": 15727 }, { "epoch": 0.7217658666421918, "grad_norm": 0.43232956528663635, "learning_rate": 8.801173798099589e-06, "loss": 0.3545, "step": 15728 }, { "epoch": 0.7218117571474462, "grad_norm": 0.4611961543560028, "learning_rate": 8.801014507585223e-06, "loss": 0.4056, "step": 15729 }, { "epoch": 0.7218576476527007, "grad_norm": 0.4680825471878052, "learning_rate": 8.800855207930616e-06, "loss": 0.3757, "step": 15730 }, { "epoch": 0.7219035381579552, "grad_norm": 0.4400262236595154, "learning_rate": 8.800695899136153e-06, "loss": 0.3925, "step": 15731 }, { "epoch": 0.7219494286632095, "grad_norm": 0.4886402189731598, "learning_rate": 8.800536581202214e-06, "loss": 0.484, "step": 15732 }, { "epoch": 0.721995319168464, "grad_norm": 0.4436909854412079, "learning_rate": 8.800377254129185e-06, "loss": 0.3534, "step": 15733 }, { "epoch": 0.7220412096737185, "grad_norm": 0.440734326839447, "learning_rate": 8.800217917917449e-06, "loss": 0.4411, "step": 15734 }, { "epoch": 0.7220871001789729, "grad_norm": 0.45247772336006165, "learning_rate": 8.800058572567387e-06, "loss": 0.4142, "step": 15735 }, { "epoch": 0.7221329906842274, "grad_norm": 0.47396689653396606, "learning_rate": 8.799899218079384e-06, "loss": 0.3968, "step": 15736 }, { "epoch": 0.7221788811894819, "grad_norm": 0.42098331451416016, "learning_rate": 8.799739854453821e-06, "loss": 0.3176, "step": 15737 }, { "epoch": 0.7222247716947363, "grad_norm": 0.44899746775627136, "learning_rate": 8.799580481691084e-06, "loss": 0.4095, "step": 15738 }, { "epoch": 0.7222706621999908, "grad_norm": 0.45648160576820374, "learning_rate": 8.799421099791557e-06, "loss": 0.412, "step": 15739 }, { "epoch": 0.7223165527052453, "grad_norm": 0.4719427824020386, "learning_rate": 8.79926170875562e-06, "loss": 0.4832, "step": 15740 }, { "epoch": 0.7223624432104997, "grad_norm": 0.4499124586582184, "learning_rate": 8.799102308583656e-06, "loss": 0.3783, "step": 15741 }, { "epoch": 0.7224083337157542, "grad_norm": 0.4321404993534088, "learning_rate": 8.798942899276052e-06, "loss": 0.3812, "step": 15742 }, { "epoch": 0.7224542242210087, "grad_norm": 0.47287285327911377, "learning_rate": 8.798783480833189e-06, "loss": 0.442, "step": 15743 }, { "epoch": 0.7225001147262632, "grad_norm": 0.4085655212402344, "learning_rate": 8.798624053255451e-06, "loss": 0.2896, "step": 15744 }, { "epoch": 0.7225460052315176, "grad_norm": 0.4424719512462616, "learning_rate": 8.79846461654322e-06, "loss": 0.3453, "step": 15745 }, { "epoch": 0.7225918957367721, "grad_norm": 0.4693467617034912, "learning_rate": 8.79830517069688e-06, "loss": 0.4388, "step": 15746 }, { "epoch": 0.7226377862420266, "grad_norm": 0.4066821336746216, "learning_rate": 8.798145715716817e-06, "loss": 0.3207, "step": 15747 }, { "epoch": 0.722683676747281, "grad_norm": 0.4815947115421295, "learning_rate": 8.79798625160341e-06, "loss": 0.4453, "step": 15748 }, { "epoch": 0.7227295672525355, "grad_norm": 0.3973434865474701, "learning_rate": 8.797826778357046e-06, "loss": 0.2875, "step": 15749 }, { "epoch": 0.72277545775779, "grad_norm": 0.4871484339237213, "learning_rate": 8.797667295978108e-06, "loss": 0.4225, "step": 15750 }, { "epoch": 0.7228213482630443, "grad_norm": 0.4572736620903015, "learning_rate": 8.797507804466978e-06, "loss": 0.4126, "step": 15751 }, { "epoch": 0.7228672387682988, "grad_norm": 0.4817626178264618, "learning_rate": 8.797348303824038e-06, "loss": 0.491, "step": 15752 }, { "epoch": 0.7229131292735533, "grad_norm": 0.4603731036186218, "learning_rate": 8.797188794049676e-06, "loss": 0.4229, "step": 15753 }, { "epoch": 0.7229590197788077, "grad_norm": 0.4694717228412628, "learning_rate": 8.797029275144274e-06, "loss": 0.4637, "step": 15754 }, { "epoch": 0.7230049102840622, "grad_norm": 0.42534172534942627, "learning_rate": 8.796869747108214e-06, "loss": 0.3375, "step": 15755 }, { "epoch": 0.7230508007893167, "grad_norm": 0.4359208941459656, "learning_rate": 8.79671020994188e-06, "loss": 0.342, "step": 15756 }, { "epoch": 0.7230966912945711, "grad_norm": 0.48801472783088684, "learning_rate": 8.796550663645657e-06, "loss": 0.4459, "step": 15757 }, { "epoch": 0.7231425817998256, "grad_norm": 0.4650600850582123, "learning_rate": 8.796391108219928e-06, "loss": 0.457, "step": 15758 }, { "epoch": 0.7231884723050801, "grad_norm": 0.46825850009918213, "learning_rate": 8.796231543665077e-06, "loss": 0.3878, "step": 15759 }, { "epoch": 0.7232343628103346, "grad_norm": 0.4077177047729492, "learning_rate": 8.796071969981487e-06, "loss": 0.3022, "step": 15760 }, { "epoch": 0.723280253315589, "grad_norm": 0.486931711435318, "learning_rate": 8.79591238716954e-06, "loss": 0.3872, "step": 15761 }, { "epoch": 0.7233261438208435, "grad_norm": 0.4552956223487854, "learning_rate": 8.795752795229624e-06, "loss": 0.3953, "step": 15762 }, { "epoch": 0.723372034326098, "grad_norm": 0.4604277014732361, "learning_rate": 8.79559319416212e-06, "loss": 0.4052, "step": 15763 }, { "epoch": 0.7234179248313524, "grad_norm": 0.41771620512008667, "learning_rate": 8.795433583967412e-06, "loss": 0.3204, "step": 15764 }, { "epoch": 0.7234638153366069, "grad_norm": 0.435382217168808, "learning_rate": 8.795273964645886e-06, "loss": 0.3855, "step": 15765 }, { "epoch": 0.7235097058418614, "grad_norm": 0.43797045946121216, "learning_rate": 8.79511433619792e-06, "loss": 0.321, "step": 15766 }, { "epoch": 0.7235555963471157, "grad_norm": 0.466752290725708, "learning_rate": 8.794954698623905e-06, "loss": 0.375, "step": 15767 }, { "epoch": 0.7236014868523702, "grad_norm": 0.42924177646636963, "learning_rate": 8.794795051924221e-06, "loss": 0.308, "step": 15768 }, { "epoch": 0.7236473773576247, "grad_norm": 0.46656152606010437, "learning_rate": 8.794635396099252e-06, "loss": 0.3575, "step": 15769 }, { "epoch": 0.7236932678628791, "grad_norm": 0.43610289692878723, "learning_rate": 8.794475731149384e-06, "loss": 0.3506, "step": 15770 }, { "epoch": 0.7237391583681336, "grad_norm": 0.44478684663772583, "learning_rate": 8.794316057074996e-06, "loss": 0.3658, "step": 15771 }, { "epoch": 0.7237850488733881, "grad_norm": 0.4541526734828949, "learning_rate": 8.79415637387648e-06, "loss": 0.3795, "step": 15772 }, { "epoch": 0.7238309393786425, "grad_norm": 0.4828837513923645, "learning_rate": 8.79399668155421e-06, "loss": 0.4326, "step": 15773 }, { "epoch": 0.723876829883897, "grad_norm": 0.4287390410900116, "learning_rate": 8.79383698010858e-06, "loss": 0.3495, "step": 15774 }, { "epoch": 0.7239227203891515, "grad_norm": 0.4825621247291565, "learning_rate": 8.793677269539967e-06, "loss": 0.4722, "step": 15775 }, { "epoch": 0.7239686108944059, "grad_norm": 0.44831010699272156, "learning_rate": 8.793517549848757e-06, "loss": 0.3835, "step": 15776 }, { "epoch": 0.7240145013996604, "grad_norm": 0.4726196527481079, "learning_rate": 8.793357821035338e-06, "loss": 0.407, "step": 15777 }, { "epoch": 0.7240603919049149, "grad_norm": 0.5705484747886658, "learning_rate": 8.793198083100089e-06, "loss": 0.4894, "step": 15778 }, { "epoch": 0.7241062824101694, "grad_norm": 0.4445796608924866, "learning_rate": 8.793038336043394e-06, "loss": 0.3307, "step": 15779 }, { "epoch": 0.7241521729154238, "grad_norm": 0.4393491744995117, "learning_rate": 8.79287857986564e-06, "loss": 0.4233, "step": 15780 }, { "epoch": 0.7241980634206783, "grad_norm": 0.6517716646194458, "learning_rate": 8.792718814567212e-06, "loss": 0.3973, "step": 15781 }, { "epoch": 0.7242439539259328, "grad_norm": 0.4433182179927826, "learning_rate": 8.79255904014849e-06, "loss": 0.3467, "step": 15782 }, { "epoch": 0.7242898444311872, "grad_norm": 0.48077258467674255, "learning_rate": 8.792399256609861e-06, "loss": 0.423, "step": 15783 }, { "epoch": 0.7243357349364417, "grad_norm": 0.4420410096645355, "learning_rate": 8.792239463951708e-06, "loss": 0.361, "step": 15784 }, { "epoch": 0.7243816254416962, "grad_norm": 0.39362818002700806, "learning_rate": 8.792079662174418e-06, "loss": 0.2834, "step": 15785 }, { "epoch": 0.7244275159469505, "grad_norm": 0.506283700466156, "learning_rate": 8.791919851278371e-06, "loss": 0.482, "step": 15786 }, { "epoch": 0.724473406452205, "grad_norm": 0.4326857030391693, "learning_rate": 8.791760031263954e-06, "loss": 0.3553, "step": 15787 }, { "epoch": 0.7245192969574595, "grad_norm": 0.4446062445640564, "learning_rate": 8.791600202131552e-06, "loss": 0.387, "step": 15788 }, { "epoch": 0.7245651874627139, "grad_norm": 0.46657004952430725, "learning_rate": 8.791440363881547e-06, "loss": 0.4079, "step": 15789 }, { "epoch": 0.7246110779679684, "grad_norm": 0.44427162408828735, "learning_rate": 8.791280516514325e-06, "loss": 0.3856, "step": 15790 }, { "epoch": 0.7246569684732229, "grad_norm": 0.4696272313594818, "learning_rate": 8.79112066003027e-06, "loss": 0.4756, "step": 15791 }, { "epoch": 0.7247028589784773, "grad_norm": 0.4774477481842041, "learning_rate": 8.790960794429765e-06, "loss": 0.4132, "step": 15792 }, { "epoch": 0.7247487494837318, "grad_norm": 0.4282197654247284, "learning_rate": 8.790800919713198e-06, "loss": 0.3524, "step": 15793 }, { "epoch": 0.7247946399889863, "grad_norm": 0.41371479630470276, "learning_rate": 8.79064103588095e-06, "loss": 0.3509, "step": 15794 }, { "epoch": 0.7248405304942407, "grad_norm": 0.5138885378837585, "learning_rate": 8.790481142933406e-06, "loss": 0.4376, "step": 15795 }, { "epoch": 0.7248864209994952, "grad_norm": 0.4904124438762665, "learning_rate": 8.790321240870954e-06, "loss": 0.3749, "step": 15796 }, { "epoch": 0.7249323115047497, "grad_norm": 0.47268912196159363, "learning_rate": 8.790161329693974e-06, "loss": 0.5133, "step": 15797 }, { "epoch": 0.7249782020100042, "grad_norm": 0.48660027980804443, "learning_rate": 8.790001409402851e-06, "loss": 0.3938, "step": 15798 }, { "epoch": 0.7250240925152586, "grad_norm": 0.46966612339019775, "learning_rate": 8.789841479997971e-06, "loss": 0.4424, "step": 15799 }, { "epoch": 0.7250699830205131, "grad_norm": 0.4524163007736206, "learning_rate": 8.78968154147972e-06, "loss": 0.3697, "step": 15800 }, { "epoch": 0.7251158735257676, "grad_norm": 0.43152281641960144, "learning_rate": 8.78952159384848e-06, "loss": 0.3336, "step": 15801 }, { "epoch": 0.725161764031022, "grad_norm": 0.48487457633018494, "learning_rate": 8.789361637104636e-06, "loss": 0.4048, "step": 15802 }, { "epoch": 0.7252076545362764, "grad_norm": 0.44444575905799866, "learning_rate": 8.789201671248572e-06, "loss": 0.3474, "step": 15803 }, { "epoch": 0.725253545041531, "grad_norm": 0.43307289481163025, "learning_rate": 8.789041696280678e-06, "loss": 0.3929, "step": 15804 }, { "epoch": 0.7252994355467853, "grad_norm": 0.45821964740753174, "learning_rate": 8.78888171220133e-06, "loss": 0.4303, "step": 15805 }, { "epoch": 0.7253453260520398, "grad_norm": 0.4718412458896637, "learning_rate": 8.788721719010922e-06, "loss": 0.4431, "step": 15806 }, { "epoch": 0.7253912165572943, "grad_norm": 0.43296968936920166, "learning_rate": 8.788561716709831e-06, "loss": 0.3778, "step": 15807 }, { "epoch": 0.7254371070625487, "grad_norm": 0.47573763132095337, "learning_rate": 8.788401705298445e-06, "loss": 0.4566, "step": 15808 }, { "epoch": 0.7254829975678032, "grad_norm": 0.44513794779777527, "learning_rate": 8.78824168477715e-06, "loss": 0.3362, "step": 15809 }, { "epoch": 0.7255288880730577, "grad_norm": 0.40864741802215576, "learning_rate": 8.788081655146328e-06, "loss": 0.3464, "step": 15810 }, { "epoch": 0.7255747785783121, "grad_norm": 0.4739340841770172, "learning_rate": 8.787921616406366e-06, "loss": 0.42, "step": 15811 }, { "epoch": 0.7256206690835666, "grad_norm": 0.45560669898986816, "learning_rate": 8.787761568557646e-06, "loss": 0.3421, "step": 15812 }, { "epoch": 0.7256665595888211, "grad_norm": 0.5029919147491455, "learning_rate": 8.787601511600556e-06, "loss": 0.4782, "step": 15813 }, { "epoch": 0.7257124500940756, "grad_norm": 0.4636812210083008, "learning_rate": 8.78744144553548e-06, "loss": 0.4033, "step": 15814 }, { "epoch": 0.72575834059933, "grad_norm": 0.45345252752304077, "learning_rate": 8.787281370362804e-06, "loss": 0.3715, "step": 15815 }, { "epoch": 0.7258042311045845, "grad_norm": 0.42035001516342163, "learning_rate": 8.78712128608291e-06, "loss": 0.3218, "step": 15816 }, { "epoch": 0.725850121609839, "grad_norm": 0.43998706340789795, "learning_rate": 8.786961192696185e-06, "loss": 0.3663, "step": 15817 }, { "epoch": 0.7258960121150934, "grad_norm": 0.4895240068435669, "learning_rate": 8.786801090203012e-06, "loss": 0.4713, "step": 15818 }, { "epoch": 0.7259419026203479, "grad_norm": 0.47725537419319153, "learning_rate": 8.78664097860378e-06, "loss": 0.4439, "step": 15819 }, { "epoch": 0.7259877931256024, "grad_norm": 0.45937857031822205, "learning_rate": 8.78648085789887e-06, "loss": 0.3434, "step": 15820 }, { "epoch": 0.7260336836308567, "grad_norm": 0.4288168251514435, "learning_rate": 8.786320728088668e-06, "loss": 0.3238, "step": 15821 }, { "epoch": 0.7260795741361112, "grad_norm": 0.512618899345398, "learning_rate": 8.786160589173563e-06, "loss": 0.486, "step": 15822 }, { "epoch": 0.7261254646413657, "grad_norm": 0.4348469078540802, "learning_rate": 8.786000441153934e-06, "loss": 0.3983, "step": 15823 }, { "epoch": 0.7261713551466201, "grad_norm": 0.47717514634132385, "learning_rate": 8.785840284030169e-06, "loss": 0.4099, "step": 15824 }, { "epoch": 0.7262172456518746, "grad_norm": 0.4558356702327728, "learning_rate": 8.785680117802654e-06, "loss": 0.4238, "step": 15825 }, { "epoch": 0.7262631361571291, "grad_norm": 0.4055575728416443, "learning_rate": 8.785519942471772e-06, "loss": 0.3053, "step": 15826 }, { "epoch": 0.7263090266623835, "grad_norm": 0.5978828072547913, "learning_rate": 8.785359758037908e-06, "loss": 0.4399, "step": 15827 }, { "epoch": 0.726354917167638, "grad_norm": 0.44162386655807495, "learning_rate": 8.785199564501451e-06, "loss": 0.3508, "step": 15828 }, { "epoch": 0.7264008076728925, "grad_norm": 0.4567866623401642, "learning_rate": 8.785039361862783e-06, "loss": 0.4367, "step": 15829 }, { "epoch": 0.7264466981781469, "grad_norm": 0.4178774058818817, "learning_rate": 8.784879150122289e-06, "loss": 0.3169, "step": 15830 }, { "epoch": 0.7264925886834014, "grad_norm": 0.4412555396556854, "learning_rate": 8.784718929280356e-06, "loss": 0.4174, "step": 15831 }, { "epoch": 0.7265384791886559, "grad_norm": 0.4471972584724426, "learning_rate": 8.784558699337368e-06, "loss": 0.3945, "step": 15832 }, { "epoch": 0.7265843696939104, "grad_norm": 0.4341907501220703, "learning_rate": 8.784398460293712e-06, "loss": 0.3805, "step": 15833 }, { "epoch": 0.7266302601991648, "grad_norm": 0.479985773563385, "learning_rate": 8.784238212149771e-06, "loss": 0.3843, "step": 15834 }, { "epoch": 0.7266761507044193, "grad_norm": 0.48687973618507385, "learning_rate": 8.784077954905932e-06, "loss": 0.5055, "step": 15835 }, { "epoch": 0.7267220412096738, "grad_norm": 0.47694897651672363, "learning_rate": 8.78391768856258e-06, "loss": 0.469, "step": 15836 }, { "epoch": 0.7267679317149282, "grad_norm": 0.45083072781562805, "learning_rate": 8.7837574131201e-06, "loss": 0.4119, "step": 15837 }, { "epoch": 0.7268138222201826, "grad_norm": 0.4373519718647003, "learning_rate": 8.783597128578877e-06, "loss": 0.3641, "step": 15838 }, { "epoch": 0.7268597127254371, "grad_norm": 0.4509945511817932, "learning_rate": 8.783436834939297e-06, "loss": 0.3495, "step": 15839 }, { "epoch": 0.7269056032306915, "grad_norm": 0.47613340616226196, "learning_rate": 8.783276532201745e-06, "loss": 0.403, "step": 15840 }, { "epoch": 0.726951493735946, "grad_norm": 6.271466255187988, "learning_rate": 8.783116220366608e-06, "loss": 0.5028, "step": 15841 }, { "epoch": 0.7269973842412005, "grad_norm": 0.39961832761764526, "learning_rate": 8.782955899434272e-06, "loss": 0.2846, "step": 15842 }, { "epoch": 0.7270432747464549, "grad_norm": 0.4274345934391022, "learning_rate": 8.782795569405118e-06, "loss": 0.348, "step": 15843 }, { "epoch": 0.7270891652517094, "grad_norm": 0.4682891368865967, "learning_rate": 8.782635230279536e-06, "loss": 0.3889, "step": 15844 }, { "epoch": 0.7271350557569639, "grad_norm": 0.5083214640617371, "learning_rate": 8.78247488205791e-06, "loss": 0.4117, "step": 15845 }, { "epoch": 0.7271809462622183, "grad_norm": 0.4408183991909027, "learning_rate": 8.782314524740625e-06, "loss": 0.3956, "step": 15846 }, { "epoch": 0.7272268367674728, "grad_norm": 0.4753551483154297, "learning_rate": 8.782154158328069e-06, "loss": 0.4564, "step": 15847 }, { "epoch": 0.7272727272727273, "grad_norm": 0.4275411367416382, "learning_rate": 8.781993782820624e-06, "loss": 0.3444, "step": 15848 }, { "epoch": 0.7273186177779818, "grad_norm": 0.44078466296195984, "learning_rate": 8.781833398218679e-06, "loss": 0.3627, "step": 15849 }, { "epoch": 0.7273645082832362, "grad_norm": 0.469561904668808, "learning_rate": 8.781673004522618e-06, "loss": 0.4105, "step": 15850 }, { "epoch": 0.7274103987884907, "grad_norm": 0.5032742023468018, "learning_rate": 8.781512601732827e-06, "loss": 0.477, "step": 15851 }, { "epoch": 0.7274562892937452, "grad_norm": 0.5570603013038635, "learning_rate": 8.78135218984969e-06, "loss": 0.5461, "step": 15852 }, { "epoch": 0.7275021797989996, "grad_norm": 0.4768106937408447, "learning_rate": 8.781191768873596e-06, "loss": 0.4157, "step": 15853 }, { "epoch": 0.7275480703042541, "grad_norm": 0.4715902805328369, "learning_rate": 8.781031338804928e-06, "loss": 0.3883, "step": 15854 }, { "epoch": 0.7275939608095086, "grad_norm": 0.495225727558136, "learning_rate": 8.780870899644075e-06, "loss": 0.5223, "step": 15855 }, { "epoch": 0.7276398513147629, "grad_norm": 0.47783002257347107, "learning_rate": 8.78071045139142e-06, "loss": 0.4299, "step": 15856 }, { "epoch": 0.7276857418200174, "grad_norm": 0.4998358488082886, "learning_rate": 8.780549994047349e-06, "loss": 0.466, "step": 15857 }, { "epoch": 0.7277316323252719, "grad_norm": 0.46260157227516174, "learning_rate": 8.780389527612249e-06, "loss": 0.4372, "step": 15858 }, { "epoch": 0.7277775228305263, "grad_norm": 0.6398410797119141, "learning_rate": 8.780229052086504e-06, "loss": 0.4804, "step": 15859 }, { "epoch": 0.7278234133357808, "grad_norm": 0.4729681611061096, "learning_rate": 8.780068567470504e-06, "loss": 0.3669, "step": 15860 }, { "epoch": 0.7278693038410353, "grad_norm": 0.44764643907546997, "learning_rate": 8.779908073764629e-06, "loss": 0.3719, "step": 15861 }, { "epoch": 0.7279151943462897, "grad_norm": 0.48311564326286316, "learning_rate": 8.779747570969269e-06, "loss": 0.4057, "step": 15862 }, { "epoch": 0.7279610848515442, "grad_norm": 0.47539085149765015, "learning_rate": 8.77958705908481e-06, "loss": 0.4121, "step": 15863 }, { "epoch": 0.7280069753567987, "grad_norm": 0.45124635100364685, "learning_rate": 8.779426538111636e-06, "loss": 0.4226, "step": 15864 }, { "epoch": 0.7280528658620531, "grad_norm": 0.48039737343788147, "learning_rate": 8.779266008050134e-06, "loss": 0.4472, "step": 15865 }, { "epoch": 0.7280987563673076, "grad_norm": 0.4407140612602234, "learning_rate": 8.779105468900691e-06, "loss": 0.3745, "step": 15866 }, { "epoch": 0.7281446468725621, "grad_norm": 0.4737776815891266, "learning_rate": 8.778944920663692e-06, "loss": 0.4633, "step": 15867 }, { "epoch": 0.7281905373778166, "grad_norm": 0.4528125822544098, "learning_rate": 8.778784363339522e-06, "loss": 0.3636, "step": 15868 }, { "epoch": 0.728236427883071, "grad_norm": 0.4774063527584076, "learning_rate": 8.778623796928569e-06, "loss": 0.4024, "step": 15869 }, { "epoch": 0.7282823183883255, "grad_norm": 0.4615359902381897, "learning_rate": 8.778463221431219e-06, "loss": 0.4051, "step": 15870 }, { "epoch": 0.72832820889358, "grad_norm": 0.5139020681381226, "learning_rate": 8.778302636847855e-06, "loss": 0.4715, "step": 15871 }, { "epoch": 0.7283740993988344, "grad_norm": 0.44127950072288513, "learning_rate": 8.778142043178868e-06, "loss": 0.3549, "step": 15872 }, { "epoch": 0.7284199899040888, "grad_norm": 0.4397776126861572, "learning_rate": 8.777981440424641e-06, "loss": 0.3408, "step": 15873 }, { "epoch": 0.7284658804093433, "grad_norm": 0.4101635813713074, "learning_rate": 8.777820828585561e-06, "loss": 0.3016, "step": 15874 }, { "epoch": 0.7285117709145977, "grad_norm": 0.48236075043678284, "learning_rate": 8.777660207662014e-06, "loss": 0.446, "step": 15875 }, { "epoch": 0.7285576614198522, "grad_norm": 0.46139639616012573, "learning_rate": 8.777499577654387e-06, "loss": 0.4006, "step": 15876 }, { "epoch": 0.7286035519251067, "grad_norm": 0.41257408261299133, "learning_rate": 8.777338938563065e-06, "loss": 0.2957, "step": 15877 }, { "epoch": 0.7286494424303611, "grad_norm": 0.49086636304855347, "learning_rate": 8.777178290388437e-06, "loss": 0.4718, "step": 15878 }, { "epoch": 0.7286953329356156, "grad_norm": 0.47208768129348755, "learning_rate": 8.777017633130884e-06, "loss": 0.4255, "step": 15879 }, { "epoch": 0.7287412234408701, "grad_norm": 0.4711315631866455, "learning_rate": 8.776856966790799e-06, "loss": 0.4034, "step": 15880 }, { "epoch": 0.7287871139461245, "grad_norm": 0.4615982174873352, "learning_rate": 8.776696291368562e-06, "loss": 0.3992, "step": 15881 }, { "epoch": 0.728833004451379, "grad_norm": 0.4622534513473511, "learning_rate": 8.776535606864565e-06, "loss": 0.4563, "step": 15882 }, { "epoch": 0.7288788949566335, "grad_norm": 0.43618839979171753, "learning_rate": 8.77637491327919e-06, "loss": 0.4097, "step": 15883 }, { "epoch": 0.7289247854618879, "grad_norm": 0.4568190276622772, "learning_rate": 8.776214210612825e-06, "loss": 0.4119, "step": 15884 }, { "epoch": 0.7289706759671424, "grad_norm": 0.4401451647281647, "learning_rate": 8.776053498865858e-06, "loss": 0.3797, "step": 15885 }, { "epoch": 0.7290165664723969, "grad_norm": 0.4336271584033966, "learning_rate": 8.775892778038675e-06, "loss": 0.3034, "step": 15886 }, { "epoch": 0.7290624569776514, "grad_norm": 0.4753570556640625, "learning_rate": 8.77573204813166e-06, "loss": 0.427, "step": 15887 }, { "epoch": 0.7291083474829058, "grad_norm": 0.4271291196346283, "learning_rate": 8.775571309145201e-06, "loss": 0.3838, "step": 15888 }, { "epoch": 0.7291542379881603, "grad_norm": 0.4823809862136841, "learning_rate": 8.775410561079684e-06, "loss": 0.3684, "step": 15889 }, { "epoch": 0.7292001284934148, "grad_norm": 0.4168423116207123, "learning_rate": 8.775249803935497e-06, "loss": 0.3519, "step": 15890 }, { "epoch": 0.7292460189986691, "grad_norm": 0.4381709694862366, "learning_rate": 8.775089037713026e-06, "loss": 0.3568, "step": 15891 }, { "epoch": 0.7292919095039236, "grad_norm": 0.45209264755249023, "learning_rate": 8.774928262412657e-06, "loss": 0.3538, "step": 15892 }, { "epoch": 0.7293378000091781, "grad_norm": 0.5010458827018738, "learning_rate": 8.774767478034776e-06, "loss": 0.4856, "step": 15893 }, { "epoch": 0.7293836905144325, "grad_norm": 0.48654666543006897, "learning_rate": 8.774606684579772e-06, "loss": 0.4921, "step": 15894 }, { "epoch": 0.729429581019687, "grad_norm": 0.479793906211853, "learning_rate": 8.77444588204803e-06, "loss": 0.4094, "step": 15895 }, { "epoch": 0.7294754715249415, "grad_norm": 0.4602452218532562, "learning_rate": 8.774285070439936e-06, "loss": 0.3709, "step": 15896 }, { "epoch": 0.7295213620301959, "grad_norm": 0.4258638918399811, "learning_rate": 8.77412424975588e-06, "loss": 0.3343, "step": 15897 }, { "epoch": 0.7295672525354504, "grad_norm": 0.5356239080429077, "learning_rate": 8.773963419996243e-06, "loss": 0.5135, "step": 15898 }, { "epoch": 0.7296131430407049, "grad_norm": 0.4787670373916626, "learning_rate": 8.773802581161417e-06, "loss": 0.4878, "step": 15899 }, { "epoch": 0.7296590335459593, "grad_norm": 0.4582371711730957, "learning_rate": 8.773641733251786e-06, "loss": 0.3828, "step": 15900 }, { "epoch": 0.7297049240512138, "grad_norm": 0.42731910943984985, "learning_rate": 8.773480876267739e-06, "loss": 0.3633, "step": 15901 }, { "epoch": 0.7297508145564683, "grad_norm": 0.4013974368572235, "learning_rate": 8.77332001020966e-06, "loss": 0.3134, "step": 15902 }, { "epoch": 0.7297967050617228, "grad_norm": 0.46906599402427673, "learning_rate": 8.773159135077937e-06, "loss": 0.3933, "step": 15903 }, { "epoch": 0.7298425955669772, "grad_norm": 0.45351725816726685, "learning_rate": 8.77299825087296e-06, "loss": 0.3672, "step": 15904 }, { "epoch": 0.7298884860722317, "grad_norm": 0.4581536054611206, "learning_rate": 8.77283735759511e-06, "loss": 0.3884, "step": 15905 }, { "epoch": 0.7299343765774862, "grad_norm": 0.48480382561683655, "learning_rate": 8.772676455244777e-06, "loss": 0.3946, "step": 15906 }, { "epoch": 0.7299802670827406, "grad_norm": 0.46109631657600403, "learning_rate": 8.77251554382235e-06, "loss": 0.4245, "step": 15907 }, { "epoch": 0.730026157587995, "grad_norm": 0.4518281817436218, "learning_rate": 8.772354623328212e-06, "loss": 0.3814, "step": 15908 }, { "epoch": 0.7300720480932495, "grad_norm": 0.4148745834827423, "learning_rate": 8.772193693762752e-06, "loss": 0.3318, "step": 15909 }, { "epoch": 0.7301179385985039, "grad_norm": 0.4644750654697418, "learning_rate": 8.772032755126357e-06, "loss": 0.417, "step": 15910 }, { "epoch": 0.7301638291037584, "grad_norm": 0.48413005471229553, "learning_rate": 8.771871807419412e-06, "loss": 0.3517, "step": 15911 }, { "epoch": 0.7302097196090129, "grad_norm": 0.5228267908096313, "learning_rate": 8.771710850642307e-06, "loss": 0.4318, "step": 15912 }, { "epoch": 0.7302556101142673, "grad_norm": 0.44682028889656067, "learning_rate": 8.771549884795429e-06, "loss": 0.385, "step": 15913 }, { "epoch": 0.7303015006195218, "grad_norm": 0.4255032539367676, "learning_rate": 8.771388909879164e-06, "loss": 0.3339, "step": 15914 }, { "epoch": 0.7303473911247763, "grad_norm": 0.4471794068813324, "learning_rate": 8.771227925893897e-06, "loss": 0.3399, "step": 15915 }, { "epoch": 0.7303932816300307, "grad_norm": 0.46839335560798645, "learning_rate": 8.771066932840017e-06, "loss": 0.3697, "step": 15916 }, { "epoch": 0.7304391721352852, "grad_norm": 0.4998243451118469, "learning_rate": 8.770905930717912e-06, "loss": 0.3839, "step": 15917 }, { "epoch": 0.7304850626405397, "grad_norm": 0.44538918137550354, "learning_rate": 8.770744919527969e-06, "loss": 0.3545, "step": 15918 }, { "epoch": 0.7305309531457941, "grad_norm": 0.4177456200122833, "learning_rate": 8.770583899270574e-06, "loss": 0.3235, "step": 15919 }, { "epoch": 0.7305768436510486, "grad_norm": 0.45691508054733276, "learning_rate": 8.770422869946115e-06, "loss": 0.3793, "step": 15920 }, { "epoch": 0.7306227341563031, "grad_norm": 0.4530298411846161, "learning_rate": 8.77026183155498e-06, "loss": 0.3397, "step": 15921 }, { "epoch": 0.7306686246615576, "grad_norm": 0.4441429078578949, "learning_rate": 8.770100784097553e-06, "loss": 0.3753, "step": 15922 }, { "epoch": 0.730714515166812, "grad_norm": 0.43963178992271423, "learning_rate": 8.769939727574224e-06, "loss": 0.3572, "step": 15923 }, { "epoch": 0.7307604056720665, "grad_norm": 0.46432948112487793, "learning_rate": 8.769778661985381e-06, "loss": 0.3434, "step": 15924 }, { "epoch": 0.730806296177321, "grad_norm": 0.5117664337158203, "learning_rate": 8.769617587331409e-06, "loss": 0.3587, "step": 15925 }, { "epoch": 0.7308521866825753, "grad_norm": 0.442678838968277, "learning_rate": 8.769456503612698e-06, "loss": 0.3704, "step": 15926 }, { "epoch": 0.7308980771878298, "grad_norm": 0.4432217478752136, "learning_rate": 8.769295410829632e-06, "loss": 0.4025, "step": 15927 }, { "epoch": 0.7309439676930843, "grad_norm": 0.47961968183517456, "learning_rate": 8.769134308982601e-06, "loss": 0.4367, "step": 15928 }, { "epoch": 0.7309898581983387, "grad_norm": 0.4575701951980591, "learning_rate": 8.76897319807199e-06, "loss": 0.4375, "step": 15929 }, { "epoch": 0.7310357487035932, "grad_norm": 0.44129225611686707, "learning_rate": 8.76881207809819e-06, "loss": 0.3409, "step": 15930 }, { "epoch": 0.7310816392088477, "grad_norm": 0.4386305809020996, "learning_rate": 8.768650949061585e-06, "loss": 0.3593, "step": 15931 }, { "epoch": 0.7311275297141021, "grad_norm": 0.4412127435207367, "learning_rate": 8.768489810962567e-06, "loss": 0.3497, "step": 15932 }, { "epoch": 0.7311734202193566, "grad_norm": 0.4620092213153839, "learning_rate": 8.768328663801517e-06, "loss": 0.4123, "step": 15933 }, { "epoch": 0.7312193107246111, "grad_norm": 0.4103982746601105, "learning_rate": 8.768167507578827e-06, "loss": 0.3206, "step": 15934 }, { "epoch": 0.7312652012298655, "grad_norm": 0.4471447169780731, "learning_rate": 8.768006342294884e-06, "loss": 0.3564, "step": 15935 }, { "epoch": 0.73131109173512, "grad_norm": 0.49769216775894165, "learning_rate": 8.767845167950073e-06, "loss": 0.4991, "step": 15936 }, { "epoch": 0.7313569822403745, "grad_norm": 0.4922749400138855, "learning_rate": 8.767683984544786e-06, "loss": 0.5009, "step": 15937 }, { "epoch": 0.731402872745629, "grad_norm": 0.49424469470977783, "learning_rate": 8.767522792079405e-06, "loss": 0.4887, "step": 15938 }, { "epoch": 0.7314487632508834, "grad_norm": 0.46387025713920593, "learning_rate": 8.767361590554324e-06, "loss": 0.4332, "step": 15939 }, { "epoch": 0.7314946537561379, "grad_norm": 0.4701308310031891, "learning_rate": 8.767200379969925e-06, "loss": 0.4268, "step": 15940 }, { "epoch": 0.7315405442613924, "grad_norm": 0.4916800856590271, "learning_rate": 8.7670391603266e-06, "loss": 0.452, "step": 15941 }, { "epoch": 0.7315864347666468, "grad_norm": 0.4440118968486786, "learning_rate": 8.766877931624733e-06, "loss": 0.3528, "step": 15942 }, { "epoch": 0.7316323252719013, "grad_norm": 0.4619618356227875, "learning_rate": 8.766716693864714e-06, "loss": 0.4314, "step": 15943 }, { "epoch": 0.7316782157771557, "grad_norm": 0.45432421565055847, "learning_rate": 8.766555447046932e-06, "loss": 0.3938, "step": 15944 }, { "epoch": 0.7317241062824101, "grad_norm": 0.4804248809814453, "learning_rate": 8.766394191171772e-06, "loss": 0.4084, "step": 15945 }, { "epoch": 0.7317699967876646, "grad_norm": 0.4608505070209503, "learning_rate": 8.76623292623962e-06, "loss": 0.4376, "step": 15946 }, { "epoch": 0.7318158872929191, "grad_norm": 0.46721744537353516, "learning_rate": 8.76607165225087e-06, "loss": 0.4387, "step": 15947 }, { "epoch": 0.7318617777981735, "grad_norm": 0.4271980822086334, "learning_rate": 8.765910369205904e-06, "loss": 0.3398, "step": 15948 }, { "epoch": 0.731907668303428, "grad_norm": 0.4862414002418518, "learning_rate": 8.765749077105112e-06, "loss": 0.4595, "step": 15949 }, { "epoch": 0.7319535588086825, "grad_norm": 0.44620829820632935, "learning_rate": 8.765587775948884e-06, "loss": 0.3916, "step": 15950 }, { "epoch": 0.7319994493139369, "grad_norm": 0.4894838035106659, "learning_rate": 8.765426465737605e-06, "loss": 0.5279, "step": 15951 }, { "epoch": 0.7320453398191914, "grad_norm": 0.4907549023628235, "learning_rate": 8.765265146471663e-06, "loss": 0.4735, "step": 15952 }, { "epoch": 0.7320912303244459, "grad_norm": 0.48567628860473633, "learning_rate": 8.765103818151448e-06, "loss": 0.4721, "step": 15953 }, { "epoch": 0.7321371208297003, "grad_norm": 0.4452628493309021, "learning_rate": 8.764942480777346e-06, "loss": 0.3599, "step": 15954 }, { "epoch": 0.7321830113349548, "grad_norm": 0.47635406255722046, "learning_rate": 8.764781134349745e-06, "loss": 0.4738, "step": 15955 }, { "epoch": 0.7322289018402093, "grad_norm": 0.514101505279541, "learning_rate": 8.764619778869034e-06, "loss": 0.4896, "step": 15956 }, { "epoch": 0.7322747923454638, "grad_norm": 0.4813731014728546, "learning_rate": 8.7644584143356e-06, "loss": 0.4239, "step": 15957 }, { "epoch": 0.7323206828507182, "grad_norm": 0.5118345022201538, "learning_rate": 8.764297040749832e-06, "loss": 0.5033, "step": 15958 }, { "epoch": 0.7323665733559727, "grad_norm": 0.525173544883728, "learning_rate": 8.764135658112118e-06, "loss": 0.4572, "step": 15959 }, { "epoch": 0.7324124638612272, "grad_norm": 0.48939767479896545, "learning_rate": 8.763974266422846e-06, "loss": 0.4634, "step": 15960 }, { "epoch": 0.7324583543664815, "grad_norm": 0.44155463576316833, "learning_rate": 8.763812865682402e-06, "loss": 0.3747, "step": 15961 }, { "epoch": 0.732504244871736, "grad_norm": 0.4541780352592468, "learning_rate": 8.763651455891177e-06, "loss": 0.3457, "step": 15962 }, { "epoch": 0.7325501353769905, "grad_norm": 0.43352413177490234, "learning_rate": 8.763490037049561e-06, "loss": 0.3809, "step": 15963 }, { "epoch": 0.7325960258822449, "grad_norm": 0.40558886528015137, "learning_rate": 8.763328609157935e-06, "loss": 0.3297, "step": 15964 }, { "epoch": 0.7326419163874994, "grad_norm": 0.5159944891929626, "learning_rate": 8.763167172216693e-06, "loss": 0.4175, "step": 15965 }, { "epoch": 0.7326878068927539, "grad_norm": 0.4289466142654419, "learning_rate": 8.763005726226221e-06, "loss": 0.3589, "step": 15966 }, { "epoch": 0.7327336973980083, "grad_norm": 0.45825204253196716, "learning_rate": 8.76284427118691e-06, "loss": 0.3848, "step": 15967 }, { "epoch": 0.7327795879032628, "grad_norm": 0.45539066195487976, "learning_rate": 8.762682807099143e-06, "loss": 0.4597, "step": 15968 }, { "epoch": 0.7328254784085173, "grad_norm": 0.47126147150993347, "learning_rate": 8.762521333963314e-06, "loss": 0.4245, "step": 15969 }, { "epoch": 0.7328713689137717, "grad_norm": 0.4594539999961853, "learning_rate": 8.762359851779807e-06, "loss": 0.406, "step": 15970 }, { "epoch": 0.7329172594190262, "grad_norm": 0.507514238357544, "learning_rate": 8.762198360549013e-06, "loss": 0.4746, "step": 15971 }, { "epoch": 0.7329631499242807, "grad_norm": 0.48064494132995605, "learning_rate": 8.76203686027132e-06, "loss": 0.4464, "step": 15972 }, { "epoch": 0.7330090404295351, "grad_norm": 0.4547726809978485, "learning_rate": 8.761875350947114e-06, "loss": 0.3689, "step": 15973 }, { "epoch": 0.7330549309347896, "grad_norm": 0.5376349091529846, "learning_rate": 8.761713832576785e-06, "loss": 0.3879, "step": 15974 }, { "epoch": 0.7331008214400441, "grad_norm": 0.45568010210990906, "learning_rate": 8.761552305160724e-06, "loss": 0.4075, "step": 15975 }, { "epoch": 0.7331467119452986, "grad_norm": 0.7805156111717224, "learning_rate": 8.761390768699315e-06, "loss": 0.4608, "step": 15976 }, { "epoch": 0.733192602450553, "grad_norm": 0.45124515891075134, "learning_rate": 8.761229223192949e-06, "loss": 0.4038, "step": 15977 }, { "epoch": 0.7332384929558075, "grad_norm": 0.520440936088562, "learning_rate": 8.761067668642013e-06, "loss": 0.495, "step": 15978 }, { "epoch": 0.733284383461062, "grad_norm": 0.4846421778202057, "learning_rate": 8.760906105046896e-06, "loss": 0.4409, "step": 15979 }, { "epoch": 0.7333302739663163, "grad_norm": 0.448649525642395, "learning_rate": 8.760744532407988e-06, "loss": 0.4349, "step": 15980 }, { "epoch": 0.7333761644715708, "grad_norm": 0.5006638765335083, "learning_rate": 8.760582950725675e-06, "loss": 0.4606, "step": 15981 }, { "epoch": 0.7334220549768253, "grad_norm": 0.47614434361457825, "learning_rate": 8.760421360000349e-06, "loss": 0.4111, "step": 15982 }, { "epoch": 0.7334679454820797, "grad_norm": 0.46055370569229126, "learning_rate": 8.760259760232395e-06, "loss": 0.4169, "step": 15983 }, { "epoch": 0.7335138359873342, "grad_norm": 0.45865610241889954, "learning_rate": 8.760098151422202e-06, "loss": 0.3945, "step": 15984 }, { "epoch": 0.7335597264925887, "grad_norm": 0.4412473142147064, "learning_rate": 8.75993653357016e-06, "loss": 0.3454, "step": 15985 }, { "epoch": 0.7336056169978431, "grad_norm": 0.4638570249080658, "learning_rate": 8.75977490667666e-06, "loss": 0.4087, "step": 15986 }, { "epoch": 0.7336515075030976, "grad_norm": 0.42857828736305237, "learning_rate": 8.759613270742085e-06, "loss": 0.3946, "step": 15987 }, { "epoch": 0.7336973980083521, "grad_norm": 0.49333256483078003, "learning_rate": 8.759451625766827e-06, "loss": 0.5215, "step": 15988 }, { "epoch": 0.7337432885136065, "grad_norm": 0.4647095501422882, "learning_rate": 8.759289971751275e-06, "loss": 0.3684, "step": 15989 }, { "epoch": 0.733789179018861, "grad_norm": 0.44152405858039856, "learning_rate": 8.759128308695818e-06, "loss": 0.3806, "step": 15990 }, { "epoch": 0.7338350695241155, "grad_norm": 0.45702317357063293, "learning_rate": 8.758966636600843e-06, "loss": 0.4124, "step": 15991 }, { "epoch": 0.73388096002937, "grad_norm": 0.4066959619522095, "learning_rate": 8.75880495546674e-06, "loss": 0.3076, "step": 15992 }, { "epoch": 0.7339268505346244, "grad_norm": 0.6690467000007629, "learning_rate": 8.758643265293896e-06, "loss": 0.3237, "step": 15993 }, { "epoch": 0.7339727410398789, "grad_norm": 0.4665399491786957, "learning_rate": 8.758481566082702e-06, "loss": 0.4458, "step": 15994 }, { "epoch": 0.7340186315451334, "grad_norm": 0.4557625949382782, "learning_rate": 8.758319857833546e-06, "loss": 0.3295, "step": 15995 }, { "epoch": 0.7340645220503877, "grad_norm": 0.44812726974487305, "learning_rate": 8.758158140546817e-06, "loss": 0.3598, "step": 15996 }, { "epoch": 0.7341104125556422, "grad_norm": 0.3999248743057251, "learning_rate": 8.757996414222905e-06, "loss": 0.3116, "step": 15997 }, { "epoch": 0.7341563030608967, "grad_norm": 0.45262986421585083, "learning_rate": 8.757834678862197e-06, "loss": 0.3819, "step": 15998 }, { "epoch": 0.7342021935661511, "grad_norm": 0.4417167603969574, "learning_rate": 8.757672934465082e-06, "loss": 0.3857, "step": 15999 }, { "epoch": 0.7342480840714056, "grad_norm": 0.40012234449386597, "learning_rate": 8.75751118103195e-06, "loss": 0.291, "step": 16000 }, { "epoch": 0.7342939745766601, "grad_norm": 0.4767579436302185, "learning_rate": 8.757349418563188e-06, "loss": 0.394, "step": 16001 }, { "epoch": 0.7343398650819145, "grad_norm": 0.48150455951690674, "learning_rate": 8.75718764705919e-06, "loss": 0.4305, "step": 16002 }, { "epoch": 0.734385755587169, "grad_norm": 0.5056874752044678, "learning_rate": 8.757025866520337e-06, "loss": 0.4422, "step": 16003 }, { "epoch": 0.7344316460924235, "grad_norm": 0.47848936915397644, "learning_rate": 8.756864076947025e-06, "loss": 0.4248, "step": 16004 }, { "epoch": 0.7344775365976779, "grad_norm": 0.44424909353256226, "learning_rate": 8.75670227833964e-06, "loss": 0.4041, "step": 16005 }, { "epoch": 0.7345234271029324, "grad_norm": 0.4556829035282135, "learning_rate": 8.756540470698573e-06, "loss": 0.3615, "step": 16006 }, { "epoch": 0.7345693176081869, "grad_norm": 0.4247104525566101, "learning_rate": 8.75637865402421e-06, "loss": 0.3395, "step": 16007 }, { "epoch": 0.7346152081134413, "grad_norm": 0.496927410364151, "learning_rate": 8.756216828316944e-06, "loss": 0.4665, "step": 16008 }, { "epoch": 0.7346610986186958, "grad_norm": 0.4343481659889221, "learning_rate": 8.75605499357716e-06, "loss": 0.3832, "step": 16009 }, { "epoch": 0.7347069891239503, "grad_norm": 0.46922731399536133, "learning_rate": 8.75589314980525e-06, "loss": 0.3707, "step": 16010 }, { "epoch": 0.7347528796292048, "grad_norm": 0.41550156474113464, "learning_rate": 8.755731297001601e-06, "loss": 0.3228, "step": 16011 }, { "epoch": 0.7347987701344592, "grad_norm": 0.4693114459514618, "learning_rate": 8.755569435166607e-06, "loss": 0.4068, "step": 16012 }, { "epoch": 0.7348446606397137, "grad_norm": 0.4681084156036377, "learning_rate": 8.755407564300651e-06, "loss": 0.4597, "step": 16013 }, { "epoch": 0.7348905511449682, "grad_norm": 0.48601141571998596, "learning_rate": 8.755245684404126e-06, "loss": 0.4388, "step": 16014 }, { "epoch": 0.7349364416502225, "grad_norm": 0.6528716683387756, "learning_rate": 8.75508379547742e-06, "loss": 0.4107, "step": 16015 }, { "epoch": 0.734982332155477, "grad_norm": 0.46836912631988525, "learning_rate": 8.754921897520923e-06, "loss": 0.4546, "step": 16016 }, { "epoch": 0.7350282226607315, "grad_norm": 0.5222553610801697, "learning_rate": 8.754759990535022e-06, "loss": 0.4365, "step": 16017 }, { "epoch": 0.7350741131659859, "grad_norm": 0.46116402745246887, "learning_rate": 8.75459807452011e-06, "loss": 0.4407, "step": 16018 }, { "epoch": 0.7351200036712404, "grad_norm": 0.6612066030502319, "learning_rate": 8.754436149476576e-06, "loss": 0.4789, "step": 16019 }, { "epoch": 0.7351658941764949, "grad_norm": 0.4208511412143707, "learning_rate": 8.754274215404806e-06, "loss": 0.3641, "step": 16020 }, { "epoch": 0.7352117846817493, "grad_norm": 0.4309150278568268, "learning_rate": 8.754112272305191e-06, "loss": 0.3518, "step": 16021 }, { "epoch": 0.7352576751870038, "grad_norm": 0.46029770374298096, "learning_rate": 8.753950320178121e-06, "loss": 0.3844, "step": 16022 }, { "epoch": 0.7353035656922583, "grad_norm": 0.45581042766571045, "learning_rate": 8.753788359023988e-06, "loss": 0.4, "step": 16023 }, { "epoch": 0.7353494561975127, "grad_norm": 0.4901447296142578, "learning_rate": 8.753626388843175e-06, "loss": 0.4308, "step": 16024 }, { "epoch": 0.7353953467027672, "grad_norm": 0.45787447690963745, "learning_rate": 8.753464409636077e-06, "loss": 0.3838, "step": 16025 }, { "epoch": 0.7354412372080217, "grad_norm": 0.46150150895118713, "learning_rate": 8.753302421403081e-06, "loss": 0.4225, "step": 16026 }, { "epoch": 0.7354871277132762, "grad_norm": 0.4517897367477417, "learning_rate": 8.75314042414458e-06, "loss": 0.4191, "step": 16027 }, { "epoch": 0.7355330182185306, "grad_norm": 0.507862389087677, "learning_rate": 8.752978417860957e-06, "loss": 0.4706, "step": 16028 }, { "epoch": 0.7355789087237851, "grad_norm": 0.4780707061290741, "learning_rate": 8.752816402552607e-06, "loss": 0.4458, "step": 16029 }, { "epoch": 0.7356247992290396, "grad_norm": 0.46732306480407715, "learning_rate": 8.752654378219917e-06, "loss": 0.4289, "step": 16030 }, { "epoch": 0.735670689734294, "grad_norm": 0.4724268615245819, "learning_rate": 8.752492344863279e-06, "loss": 0.4087, "step": 16031 }, { "epoch": 0.7357165802395484, "grad_norm": 0.4329822063446045, "learning_rate": 8.752330302483082e-06, "loss": 0.3865, "step": 16032 }, { "epoch": 0.7357624707448029, "grad_norm": 0.4339953362941742, "learning_rate": 8.752168251079713e-06, "loss": 0.3849, "step": 16033 }, { "epoch": 0.7358083612500573, "grad_norm": 0.47654518485069275, "learning_rate": 8.752006190653564e-06, "loss": 0.4491, "step": 16034 }, { "epoch": 0.7358542517553118, "grad_norm": 0.4512265622615814, "learning_rate": 8.751844121205024e-06, "loss": 0.3672, "step": 16035 }, { "epoch": 0.7359001422605663, "grad_norm": 0.42702749371528625, "learning_rate": 8.751682042734485e-06, "loss": 0.3908, "step": 16036 }, { "epoch": 0.7359460327658207, "grad_norm": 0.4433613121509552, "learning_rate": 8.751519955242334e-06, "loss": 0.3575, "step": 16037 }, { "epoch": 0.7359919232710752, "grad_norm": 0.4446146786212921, "learning_rate": 8.751357858728959e-06, "loss": 0.3793, "step": 16038 }, { "epoch": 0.7360378137763297, "grad_norm": 0.5112154483795166, "learning_rate": 8.751195753194754e-06, "loss": 0.3998, "step": 16039 }, { "epoch": 0.7360837042815841, "grad_norm": 0.46949493885040283, "learning_rate": 8.751033638640108e-06, "loss": 0.4367, "step": 16040 }, { "epoch": 0.7361295947868386, "grad_norm": 0.4684097468852997, "learning_rate": 8.75087151506541e-06, "loss": 0.4225, "step": 16041 }, { "epoch": 0.7361754852920931, "grad_norm": 0.44512760639190674, "learning_rate": 8.750709382471048e-06, "loss": 0.3536, "step": 16042 }, { "epoch": 0.7362213757973475, "grad_norm": 0.46184590458869934, "learning_rate": 8.750547240857416e-06, "loss": 0.4236, "step": 16043 }, { "epoch": 0.736267266302602, "grad_norm": 0.4564439058303833, "learning_rate": 8.7503850902249e-06, "loss": 0.3485, "step": 16044 }, { "epoch": 0.7363131568078565, "grad_norm": 0.4779980480670929, "learning_rate": 8.75022293057389e-06, "loss": 0.4504, "step": 16045 }, { "epoch": 0.736359047313111, "grad_norm": 0.48121780157089233, "learning_rate": 8.75006076190478e-06, "loss": 0.4323, "step": 16046 }, { "epoch": 0.7364049378183654, "grad_norm": 0.4444717466831207, "learning_rate": 8.749898584217957e-06, "loss": 0.3553, "step": 16047 }, { "epoch": 0.7364508283236199, "grad_norm": 0.46610522270202637, "learning_rate": 8.749736397513811e-06, "loss": 0.4079, "step": 16048 }, { "epoch": 0.7364967188288744, "grad_norm": 0.4422728419303894, "learning_rate": 8.749574201792734e-06, "loss": 0.3882, "step": 16049 }, { "epoch": 0.7365426093341287, "grad_norm": 0.43803870677948, "learning_rate": 8.749411997055111e-06, "loss": 0.3349, "step": 16050 }, { "epoch": 0.7365884998393832, "grad_norm": 0.462265282869339, "learning_rate": 8.74924978330134e-06, "loss": 0.3599, "step": 16051 }, { "epoch": 0.7366343903446377, "grad_norm": 0.5062516927719116, "learning_rate": 8.749087560531803e-06, "loss": 0.4379, "step": 16052 }, { "epoch": 0.7366802808498921, "grad_norm": 0.44809144735336304, "learning_rate": 8.748925328746894e-06, "loss": 0.3878, "step": 16053 }, { "epoch": 0.7367261713551466, "grad_norm": 0.4590931832790375, "learning_rate": 8.748763087947003e-06, "loss": 0.4043, "step": 16054 }, { "epoch": 0.7367720618604011, "grad_norm": 0.42366498708724976, "learning_rate": 8.748600838132521e-06, "loss": 0.3375, "step": 16055 }, { "epoch": 0.7368179523656555, "grad_norm": 0.41984736919403076, "learning_rate": 8.748438579303837e-06, "loss": 0.3973, "step": 16056 }, { "epoch": 0.73686384287091, "grad_norm": 0.426851361989975, "learning_rate": 8.748276311461341e-06, "loss": 0.3411, "step": 16057 }, { "epoch": 0.7369097333761645, "grad_norm": 0.44430434703826904, "learning_rate": 8.748114034605423e-06, "loss": 0.4268, "step": 16058 }, { "epoch": 0.7369556238814189, "grad_norm": 0.4714086651802063, "learning_rate": 8.747951748736474e-06, "loss": 0.4242, "step": 16059 }, { "epoch": 0.7370015143866734, "grad_norm": 0.4476342797279358, "learning_rate": 8.747789453854884e-06, "loss": 0.3771, "step": 16060 }, { "epoch": 0.7370474048919279, "grad_norm": 0.4290663003921509, "learning_rate": 8.747627149961042e-06, "loss": 0.3553, "step": 16061 }, { "epoch": 0.7370932953971823, "grad_norm": 0.40016189217567444, "learning_rate": 8.747464837055342e-06, "loss": 0.3311, "step": 16062 }, { "epoch": 0.7371391859024368, "grad_norm": 0.45907601714134216, "learning_rate": 8.74730251513817e-06, "loss": 0.4162, "step": 16063 }, { "epoch": 0.7371850764076913, "grad_norm": 0.47077348828315735, "learning_rate": 8.747140184209917e-06, "loss": 0.4257, "step": 16064 }, { "epoch": 0.7372309669129458, "grad_norm": 0.44760432839393616, "learning_rate": 8.746977844270976e-06, "loss": 0.3215, "step": 16065 }, { "epoch": 0.7372768574182001, "grad_norm": 0.4860057830810547, "learning_rate": 8.746815495321737e-06, "loss": 0.4057, "step": 16066 }, { "epoch": 0.7373227479234546, "grad_norm": 0.4765504002571106, "learning_rate": 8.746653137362588e-06, "loss": 0.5047, "step": 16067 }, { "epoch": 0.7373686384287091, "grad_norm": 0.5118085741996765, "learning_rate": 8.74649077039392e-06, "loss": 0.4715, "step": 16068 }, { "epoch": 0.7374145289339635, "grad_norm": 0.4291047155857086, "learning_rate": 8.746328394416124e-06, "loss": 0.3812, "step": 16069 }, { "epoch": 0.737460419439218, "grad_norm": 0.4587209224700928, "learning_rate": 8.746166009429592e-06, "loss": 0.4194, "step": 16070 }, { "epoch": 0.7375063099444725, "grad_norm": 0.4775572121143341, "learning_rate": 8.746003615434712e-06, "loss": 0.4702, "step": 16071 }, { "epoch": 0.7375522004497269, "grad_norm": 0.4398815333843231, "learning_rate": 8.745841212431878e-06, "loss": 0.3559, "step": 16072 }, { "epoch": 0.7375980909549814, "grad_norm": 0.466252863407135, "learning_rate": 8.745678800421476e-06, "loss": 0.4202, "step": 16073 }, { "epoch": 0.7376439814602359, "grad_norm": 0.4556097388267517, "learning_rate": 8.745516379403897e-06, "loss": 0.3742, "step": 16074 }, { "epoch": 0.7376898719654903, "grad_norm": 0.5115895867347717, "learning_rate": 8.745353949379536e-06, "loss": 0.498, "step": 16075 }, { "epoch": 0.7377357624707448, "grad_norm": 0.4719484746456146, "learning_rate": 8.74519151034878e-06, "loss": 0.3418, "step": 16076 }, { "epoch": 0.7377816529759993, "grad_norm": 0.42186978459358215, "learning_rate": 8.74502906231202e-06, "loss": 0.3525, "step": 16077 }, { "epoch": 0.7378275434812537, "grad_norm": 0.5289642214775085, "learning_rate": 8.744866605269648e-06, "loss": 0.4206, "step": 16078 }, { "epoch": 0.7378734339865082, "grad_norm": 0.4700157046318054, "learning_rate": 8.744704139222054e-06, "loss": 0.4882, "step": 16079 }, { "epoch": 0.7379193244917627, "grad_norm": 0.5120685696601868, "learning_rate": 8.744541664169626e-06, "loss": 0.3681, "step": 16080 }, { "epoch": 0.7379652149970172, "grad_norm": 0.4274618923664093, "learning_rate": 8.744379180112759e-06, "loss": 0.3345, "step": 16081 }, { "epoch": 0.7380111055022716, "grad_norm": 0.46479174494743347, "learning_rate": 8.744216687051841e-06, "loss": 0.4017, "step": 16082 }, { "epoch": 0.7380569960075261, "grad_norm": 0.4495057761669159, "learning_rate": 8.744054184987265e-06, "loss": 0.3548, "step": 16083 }, { "epoch": 0.7381028865127806, "grad_norm": 0.4770502746105194, "learning_rate": 8.74389167391942e-06, "loss": 0.3745, "step": 16084 }, { "epoch": 0.7381487770180349, "grad_norm": 0.4586268365383148, "learning_rate": 8.743729153848697e-06, "loss": 0.3839, "step": 16085 }, { "epoch": 0.7381946675232894, "grad_norm": 0.45045191049575806, "learning_rate": 8.743566624775485e-06, "loss": 0.3582, "step": 16086 }, { "epoch": 0.7382405580285439, "grad_norm": 0.4877607226371765, "learning_rate": 8.743404086700178e-06, "loss": 0.4244, "step": 16087 }, { "epoch": 0.7382864485337983, "grad_norm": 0.44610029458999634, "learning_rate": 8.743241539623166e-06, "loss": 0.3395, "step": 16088 }, { "epoch": 0.7383323390390528, "grad_norm": 0.455594003200531, "learning_rate": 8.743078983544839e-06, "loss": 0.3781, "step": 16089 }, { "epoch": 0.7383782295443073, "grad_norm": 0.483226478099823, "learning_rate": 8.74291641846559e-06, "loss": 0.4468, "step": 16090 }, { "epoch": 0.7384241200495617, "grad_norm": 0.48671430349349976, "learning_rate": 8.742753844385807e-06, "loss": 0.3759, "step": 16091 }, { "epoch": 0.7384700105548162, "grad_norm": 0.4697237014770508, "learning_rate": 8.742591261305882e-06, "loss": 0.3863, "step": 16092 }, { "epoch": 0.7385159010600707, "grad_norm": 0.43328678607940674, "learning_rate": 8.742428669226206e-06, "loss": 0.3554, "step": 16093 }, { "epoch": 0.7385617915653251, "grad_norm": 0.41437193751335144, "learning_rate": 8.742266068147171e-06, "loss": 0.3302, "step": 16094 }, { "epoch": 0.7386076820705796, "grad_norm": 0.5397595763206482, "learning_rate": 8.742103458069167e-06, "loss": 0.5184, "step": 16095 }, { "epoch": 0.7386535725758341, "grad_norm": 0.44679442048072815, "learning_rate": 8.741940838992585e-06, "loss": 0.3775, "step": 16096 }, { "epoch": 0.7386994630810885, "grad_norm": 0.44663742184638977, "learning_rate": 8.741778210917817e-06, "loss": 0.3961, "step": 16097 }, { "epoch": 0.738745353586343, "grad_norm": 0.48417043685913086, "learning_rate": 8.741615573845252e-06, "loss": 0.455, "step": 16098 }, { "epoch": 0.7387912440915975, "grad_norm": 0.43472760915756226, "learning_rate": 8.741452927775283e-06, "loss": 0.361, "step": 16099 }, { "epoch": 0.738837134596852, "grad_norm": 0.4686301052570343, "learning_rate": 8.741290272708302e-06, "loss": 0.3647, "step": 16100 }, { "epoch": 0.7388830251021063, "grad_norm": 0.44507551193237305, "learning_rate": 8.741127608644697e-06, "loss": 0.3847, "step": 16101 }, { "epoch": 0.7389289156073608, "grad_norm": 0.46205881237983704, "learning_rate": 8.740964935584861e-06, "loss": 0.3729, "step": 16102 }, { "epoch": 0.7389748061126153, "grad_norm": 0.4986153542995453, "learning_rate": 8.740802253529185e-06, "loss": 0.4484, "step": 16103 }, { "epoch": 0.7390206966178697, "grad_norm": 0.4620817303657532, "learning_rate": 8.740639562478062e-06, "loss": 0.3903, "step": 16104 }, { "epoch": 0.7390665871231242, "grad_norm": 0.4672529995441437, "learning_rate": 8.740476862431879e-06, "loss": 0.4594, "step": 16105 }, { "epoch": 0.7391124776283787, "grad_norm": 0.46298748254776, "learning_rate": 8.740314153391031e-06, "loss": 0.3803, "step": 16106 }, { "epoch": 0.7391583681336331, "grad_norm": 0.45846372842788696, "learning_rate": 8.740151435355906e-06, "loss": 0.3394, "step": 16107 }, { "epoch": 0.7392042586388876, "grad_norm": 0.4562525451183319, "learning_rate": 8.739988708326899e-06, "loss": 0.3802, "step": 16108 }, { "epoch": 0.7392501491441421, "grad_norm": 0.4561963379383087, "learning_rate": 8.739825972304401e-06, "loss": 0.4144, "step": 16109 }, { "epoch": 0.7392960396493965, "grad_norm": 0.4169153571128845, "learning_rate": 8.7396632272888e-06, "loss": 0.3103, "step": 16110 }, { "epoch": 0.739341930154651, "grad_norm": 0.48917412757873535, "learning_rate": 8.73950047328049e-06, "loss": 0.4745, "step": 16111 }, { "epoch": 0.7393878206599055, "grad_norm": 0.5736055374145508, "learning_rate": 8.73933771027986e-06, "loss": 0.6125, "step": 16112 }, { "epoch": 0.7394337111651599, "grad_norm": 0.4355540871620178, "learning_rate": 8.739174938287303e-06, "loss": 0.3656, "step": 16113 }, { "epoch": 0.7394796016704144, "grad_norm": 0.4507342278957367, "learning_rate": 8.739012157303212e-06, "loss": 0.398, "step": 16114 }, { "epoch": 0.7395254921756689, "grad_norm": 0.43851613998413086, "learning_rate": 8.738849367327976e-06, "loss": 0.3798, "step": 16115 }, { "epoch": 0.7395713826809233, "grad_norm": 0.4429139792919159, "learning_rate": 8.738686568361987e-06, "loss": 0.3877, "step": 16116 }, { "epoch": 0.7396172731861778, "grad_norm": 0.4806617796421051, "learning_rate": 8.738523760405637e-06, "loss": 0.4209, "step": 16117 }, { "epoch": 0.7396631636914323, "grad_norm": 0.45465174317359924, "learning_rate": 8.738360943459318e-06, "loss": 0.3526, "step": 16118 }, { "epoch": 0.7397090541966868, "grad_norm": 0.4482746124267578, "learning_rate": 8.73819811752342e-06, "loss": 0.3685, "step": 16119 }, { "epoch": 0.7397549447019411, "grad_norm": 0.45512130856513977, "learning_rate": 8.738035282598336e-06, "loss": 0.3541, "step": 16120 }, { "epoch": 0.7398008352071956, "grad_norm": 0.47207358479499817, "learning_rate": 8.737872438684457e-06, "loss": 0.4498, "step": 16121 }, { "epoch": 0.7398467257124501, "grad_norm": 0.46567198634147644, "learning_rate": 8.737709585782173e-06, "loss": 0.3997, "step": 16122 }, { "epoch": 0.7398926162177045, "grad_norm": 0.42531922459602356, "learning_rate": 8.737546723891878e-06, "loss": 0.3223, "step": 16123 }, { "epoch": 0.739938506722959, "grad_norm": 0.45814183354377747, "learning_rate": 8.737383853013963e-06, "loss": 0.4003, "step": 16124 }, { "epoch": 0.7399843972282135, "grad_norm": 0.42679598927497864, "learning_rate": 8.73722097314882e-06, "loss": 0.3884, "step": 16125 }, { "epoch": 0.7400302877334679, "grad_norm": 0.4293581247329712, "learning_rate": 8.737058084296839e-06, "loss": 0.3569, "step": 16126 }, { "epoch": 0.7400761782387224, "grad_norm": 0.4700793921947479, "learning_rate": 8.736895186458414e-06, "loss": 0.4256, "step": 16127 }, { "epoch": 0.7401220687439769, "grad_norm": 0.48024290800094604, "learning_rate": 8.736732279633933e-06, "loss": 0.3761, "step": 16128 }, { "epoch": 0.7401679592492313, "grad_norm": 0.4698987901210785, "learning_rate": 8.736569363823792e-06, "loss": 0.3934, "step": 16129 }, { "epoch": 0.7402138497544858, "grad_norm": 0.5660913586616516, "learning_rate": 8.73640643902838e-06, "loss": 0.4613, "step": 16130 }, { "epoch": 0.7402597402597403, "grad_norm": 0.6585381031036377, "learning_rate": 8.736243505248091e-06, "loss": 0.4257, "step": 16131 }, { "epoch": 0.7403056307649947, "grad_norm": 0.49519163370132446, "learning_rate": 8.736080562483316e-06, "loss": 0.5133, "step": 16132 }, { "epoch": 0.7403515212702492, "grad_norm": 0.46089187264442444, "learning_rate": 8.735917610734445e-06, "loss": 0.4457, "step": 16133 }, { "epoch": 0.7403974117755037, "grad_norm": 0.4055814743041992, "learning_rate": 8.73575465000187e-06, "loss": 0.3195, "step": 16134 }, { "epoch": 0.7404433022807582, "grad_norm": 0.4679926335811615, "learning_rate": 8.735591680285986e-06, "loss": 0.3813, "step": 16135 }, { "epoch": 0.7404891927860126, "grad_norm": 0.45333167910575867, "learning_rate": 8.735428701587183e-06, "loss": 0.3841, "step": 16136 }, { "epoch": 0.740535083291267, "grad_norm": 0.4486124813556671, "learning_rate": 8.735265713905853e-06, "loss": 0.4347, "step": 16137 }, { "epoch": 0.7405809737965215, "grad_norm": 0.47174447774887085, "learning_rate": 8.735102717242388e-06, "loss": 0.4276, "step": 16138 }, { "epoch": 0.7406268643017759, "grad_norm": 0.41880178451538086, "learning_rate": 8.734939711597178e-06, "loss": 0.3558, "step": 16139 }, { "epoch": 0.7406727548070304, "grad_norm": 0.4770233631134033, "learning_rate": 8.73477669697062e-06, "loss": 0.4692, "step": 16140 }, { "epoch": 0.7407186453122849, "grad_norm": 0.4251320958137512, "learning_rate": 8.734613673363099e-06, "loss": 0.3254, "step": 16141 }, { "epoch": 0.7407645358175393, "grad_norm": 0.5523183345794678, "learning_rate": 8.734450640775014e-06, "loss": 0.4772, "step": 16142 }, { "epoch": 0.7408104263227938, "grad_norm": 0.4386793076992035, "learning_rate": 8.734287599206751e-06, "loss": 0.3902, "step": 16143 }, { "epoch": 0.7408563168280483, "grad_norm": 0.46539005637168884, "learning_rate": 8.734124548658708e-06, "loss": 0.4615, "step": 16144 }, { "epoch": 0.7409022073333027, "grad_norm": 0.4543258547782898, "learning_rate": 8.73396148913127e-06, "loss": 0.3897, "step": 16145 }, { "epoch": 0.7409480978385572, "grad_norm": 0.5004029273986816, "learning_rate": 8.733798420624837e-06, "loss": 0.5044, "step": 16146 }, { "epoch": 0.7409939883438117, "grad_norm": 0.4698163568973541, "learning_rate": 8.733635343139796e-06, "loss": 0.4278, "step": 16147 }, { "epoch": 0.7410398788490661, "grad_norm": 0.4536164104938507, "learning_rate": 8.73347225667654e-06, "loss": 0.396, "step": 16148 }, { "epoch": 0.7410857693543206, "grad_norm": 0.4138358235359192, "learning_rate": 8.733309161235462e-06, "loss": 0.3147, "step": 16149 }, { "epoch": 0.7411316598595751, "grad_norm": 0.4568508565425873, "learning_rate": 8.733146056816953e-06, "loss": 0.3945, "step": 16150 }, { "epoch": 0.7411775503648295, "grad_norm": 0.44541874527931213, "learning_rate": 8.732982943421405e-06, "loss": 0.3942, "step": 16151 }, { "epoch": 0.741223440870084, "grad_norm": 0.4574776887893677, "learning_rate": 8.732819821049213e-06, "loss": 0.4135, "step": 16152 }, { "epoch": 0.7412693313753385, "grad_norm": 0.46046558022499084, "learning_rate": 8.732656689700767e-06, "loss": 0.4032, "step": 16153 }, { "epoch": 0.741315221880593, "grad_norm": 0.45076408982276917, "learning_rate": 8.73249354937646e-06, "loss": 0.3837, "step": 16154 }, { "epoch": 0.7413611123858473, "grad_norm": 0.4659837782382965, "learning_rate": 8.732330400076683e-06, "loss": 0.4083, "step": 16155 }, { "epoch": 0.7414070028911018, "grad_norm": 0.4350017011165619, "learning_rate": 8.73216724180183e-06, "loss": 0.3409, "step": 16156 }, { "epoch": 0.7414528933963563, "grad_norm": 0.469978928565979, "learning_rate": 8.732004074552294e-06, "loss": 0.358, "step": 16157 }, { "epoch": 0.7414987839016107, "grad_norm": 0.4696197211742401, "learning_rate": 8.731840898328462e-06, "loss": 0.429, "step": 16158 }, { "epoch": 0.7415446744068652, "grad_norm": 0.46041393280029297, "learning_rate": 8.731677713130735e-06, "loss": 0.4356, "step": 16159 }, { "epoch": 0.7415905649121197, "grad_norm": 0.4846954643726349, "learning_rate": 8.731514518959498e-06, "loss": 0.4385, "step": 16160 }, { "epoch": 0.7416364554173741, "grad_norm": 0.46422895789146423, "learning_rate": 8.731351315815149e-06, "loss": 0.4, "step": 16161 }, { "epoch": 0.7416823459226286, "grad_norm": 0.4660011827945709, "learning_rate": 8.731188103698076e-06, "loss": 0.4094, "step": 16162 }, { "epoch": 0.7417282364278831, "grad_norm": 0.4654594659805298, "learning_rate": 8.731024882608672e-06, "loss": 0.4115, "step": 16163 }, { "epoch": 0.7417741269331375, "grad_norm": 0.479792058467865, "learning_rate": 8.730861652547331e-06, "loss": 0.4584, "step": 16164 }, { "epoch": 0.741820017438392, "grad_norm": 0.47134044766426086, "learning_rate": 8.730698413514446e-06, "loss": 0.445, "step": 16165 }, { "epoch": 0.7418659079436465, "grad_norm": 0.44977229833602905, "learning_rate": 8.73053516551041e-06, "loss": 0.421, "step": 16166 }, { "epoch": 0.7419117984489009, "grad_norm": 0.47854113578796387, "learning_rate": 8.730371908535612e-06, "loss": 0.4239, "step": 16167 }, { "epoch": 0.7419576889541554, "grad_norm": 0.45471468567848206, "learning_rate": 8.73020864259045e-06, "loss": 0.3717, "step": 16168 }, { "epoch": 0.7420035794594099, "grad_norm": 0.4277383089065552, "learning_rate": 8.73004536767531e-06, "loss": 0.3291, "step": 16169 }, { "epoch": 0.7420494699646644, "grad_norm": 0.4229739010334015, "learning_rate": 8.729882083790591e-06, "loss": 0.3653, "step": 16170 }, { "epoch": 0.7420953604699188, "grad_norm": 0.4585472345352173, "learning_rate": 8.72971879093668e-06, "loss": 0.4243, "step": 16171 }, { "epoch": 0.7421412509751733, "grad_norm": 0.45656928420066833, "learning_rate": 8.729555489113975e-06, "loss": 0.3805, "step": 16172 }, { "epoch": 0.7421871414804277, "grad_norm": 0.4621952772140503, "learning_rate": 8.729392178322864e-06, "loss": 0.4046, "step": 16173 }, { "epoch": 0.7422330319856821, "grad_norm": 0.47696802020072937, "learning_rate": 8.729228858563745e-06, "loss": 0.4154, "step": 16174 }, { "epoch": 0.7422789224909366, "grad_norm": 0.4633540213108063, "learning_rate": 8.729065529837005e-06, "loss": 0.4577, "step": 16175 }, { "epoch": 0.7423248129961911, "grad_norm": 0.4880008101463318, "learning_rate": 8.72890219214304e-06, "loss": 0.4325, "step": 16176 }, { "epoch": 0.7423707035014455, "grad_norm": 0.4501653015613556, "learning_rate": 8.728738845482242e-06, "loss": 0.426, "step": 16177 }, { "epoch": 0.7424165940067, "grad_norm": 0.4904680550098419, "learning_rate": 8.728575489855003e-06, "loss": 0.4366, "step": 16178 }, { "epoch": 0.7424624845119545, "grad_norm": 0.464344322681427, "learning_rate": 8.728412125261718e-06, "loss": 0.4261, "step": 16179 }, { "epoch": 0.7425083750172089, "grad_norm": 0.4450833201408386, "learning_rate": 8.728248751702777e-06, "loss": 0.353, "step": 16180 }, { "epoch": 0.7425542655224634, "grad_norm": 0.4504692554473877, "learning_rate": 8.728085369178576e-06, "loss": 0.3482, "step": 16181 }, { "epoch": 0.7426001560277179, "grad_norm": 0.49562129378318787, "learning_rate": 8.727921977689506e-06, "loss": 0.4734, "step": 16182 }, { "epoch": 0.7426460465329723, "grad_norm": 0.4686272442340851, "learning_rate": 8.72775857723596e-06, "loss": 0.4348, "step": 16183 }, { "epoch": 0.7426919370382268, "grad_norm": 0.46457576751708984, "learning_rate": 8.727595167818332e-06, "loss": 0.4272, "step": 16184 }, { "epoch": 0.7427378275434813, "grad_norm": 0.46697044372558594, "learning_rate": 8.727431749437013e-06, "loss": 0.4274, "step": 16185 }, { "epoch": 0.7427837180487357, "grad_norm": 0.46541154384613037, "learning_rate": 8.727268322092397e-06, "loss": 0.3902, "step": 16186 }, { "epoch": 0.7428296085539902, "grad_norm": 0.4154607951641083, "learning_rate": 8.727104885784878e-06, "loss": 0.3416, "step": 16187 }, { "epoch": 0.7428754990592447, "grad_norm": 0.4436182379722595, "learning_rate": 8.726941440514846e-06, "loss": 0.3524, "step": 16188 }, { "epoch": 0.7429213895644992, "grad_norm": 0.4693727195262909, "learning_rate": 8.726777986282699e-06, "loss": 0.462, "step": 16189 }, { "epoch": 0.7429672800697535, "grad_norm": 0.4393177330493927, "learning_rate": 8.726614523088826e-06, "loss": 0.371, "step": 16190 }, { "epoch": 0.743013170575008, "grad_norm": 0.4943798780441284, "learning_rate": 8.72645105093362e-06, "loss": 0.4609, "step": 16191 }, { "epoch": 0.7430590610802625, "grad_norm": 0.46387019753456116, "learning_rate": 8.726287569817477e-06, "loss": 0.3802, "step": 16192 }, { "epoch": 0.7431049515855169, "grad_norm": 0.46085554361343384, "learning_rate": 8.726124079740788e-06, "loss": 0.3758, "step": 16193 }, { "epoch": 0.7431508420907714, "grad_norm": 0.44855400919914246, "learning_rate": 8.725960580703945e-06, "loss": 0.3517, "step": 16194 }, { "epoch": 0.7431967325960259, "grad_norm": 0.47725751996040344, "learning_rate": 8.725797072707345e-06, "loss": 0.4232, "step": 16195 }, { "epoch": 0.7432426231012803, "grad_norm": 0.4839957654476166, "learning_rate": 8.725633555751376e-06, "loss": 0.44, "step": 16196 }, { "epoch": 0.7432885136065348, "grad_norm": 0.45585089921951294, "learning_rate": 8.725470029836438e-06, "loss": 0.3766, "step": 16197 }, { "epoch": 0.7433344041117893, "grad_norm": 0.4755672514438629, "learning_rate": 8.725306494962917e-06, "loss": 0.4036, "step": 16198 }, { "epoch": 0.7433802946170437, "grad_norm": 0.49170634150505066, "learning_rate": 8.725142951131212e-06, "loss": 0.4461, "step": 16199 }, { "epoch": 0.7434261851222982, "grad_norm": 0.47405627369880676, "learning_rate": 8.724979398341714e-06, "loss": 0.4342, "step": 16200 }, { "epoch": 0.7434720756275527, "grad_norm": 0.48027193546295166, "learning_rate": 8.724815836594814e-06, "loss": 0.4459, "step": 16201 }, { "epoch": 0.7435179661328071, "grad_norm": 0.42188361287117004, "learning_rate": 8.72465226589091e-06, "loss": 0.3066, "step": 16202 }, { "epoch": 0.7435638566380616, "grad_norm": 0.4514961242675781, "learning_rate": 8.72448868623039e-06, "loss": 0.3437, "step": 16203 }, { "epoch": 0.7436097471433161, "grad_norm": 0.4493747353553772, "learning_rate": 8.724325097613651e-06, "loss": 0.4069, "step": 16204 }, { "epoch": 0.7436556376485705, "grad_norm": 0.5089772939682007, "learning_rate": 8.724161500041088e-06, "loss": 0.4212, "step": 16205 }, { "epoch": 0.743701528153825, "grad_norm": 0.4710423946380615, "learning_rate": 8.723997893513089e-06, "loss": 0.4122, "step": 16206 }, { "epoch": 0.7437474186590795, "grad_norm": 0.4489966332912445, "learning_rate": 8.723834278030053e-06, "loss": 0.3714, "step": 16207 }, { "epoch": 0.743793309164334, "grad_norm": 0.4418530762195587, "learning_rate": 8.72367065359237e-06, "loss": 0.383, "step": 16208 }, { "epoch": 0.7438391996695883, "grad_norm": 0.463349312543869, "learning_rate": 8.723507020200433e-06, "loss": 0.4312, "step": 16209 }, { "epoch": 0.7438850901748428, "grad_norm": 0.5727741718292236, "learning_rate": 8.723343377854636e-06, "loss": 0.4575, "step": 16210 }, { "epoch": 0.7439309806800973, "grad_norm": 0.46262651681900024, "learning_rate": 8.723179726555376e-06, "loss": 0.4151, "step": 16211 }, { "epoch": 0.7439768711853517, "grad_norm": 0.44559091329574585, "learning_rate": 8.723016066303042e-06, "loss": 0.3787, "step": 16212 }, { "epoch": 0.7440227616906062, "grad_norm": 0.4506240785121918, "learning_rate": 8.722852397098029e-06, "loss": 0.4261, "step": 16213 }, { "epoch": 0.7440686521958607, "grad_norm": 0.4291321635246277, "learning_rate": 8.722688718940733e-06, "loss": 0.3241, "step": 16214 }, { "epoch": 0.7441145427011151, "grad_norm": 0.4535481035709381, "learning_rate": 8.722525031831544e-06, "loss": 0.4105, "step": 16215 }, { "epoch": 0.7441604332063696, "grad_norm": 0.4677073359489441, "learning_rate": 8.722361335770858e-06, "loss": 0.4491, "step": 16216 }, { "epoch": 0.7442063237116241, "grad_norm": 0.4451531171798706, "learning_rate": 8.722197630759069e-06, "loss": 0.4068, "step": 16217 }, { "epoch": 0.7442522142168785, "grad_norm": 0.4810009300708771, "learning_rate": 8.722033916796567e-06, "loss": 0.47, "step": 16218 }, { "epoch": 0.744298104722133, "grad_norm": 0.4522962272167206, "learning_rate": 8.721870193883748e-06, "loss": 0.3713, "step": 16219 }, { "epoch": 0.7443439952273875, "grad_norm": 0.4376360774040222, "learning_rate": 8.721706462021007e-06, "loss": 0.3839, "step": 16220 }, { "epoch": 0.7443898857326419, "grad_norm": 0.4575299024581909, "learning_rate": 8.721542721208738e-06, "loss": 0.396, "step": 16221 }, { "epoch": 0.7444357762378964, "grad_norm": 0.43664589524269104, "learning_rate": 8.721378971447331e-06, "loss": 0.3538, "step": 16222 }, { "epoch": 0.7444816667431509, "grad_norm": 0.440625935792923, "learning_rate": 8.721215212737183e-06, "loss": 0.3999, "step": 16223 }, { "epoch": 0.7445275572484054, "grad_norm": 0.4351752996444702, "learning_rate": 8.721051445078686e-06, "loss": 0.3299, "step": 16224 }, { "epoch": 0.7445734477536597, "grad_norm": 0.5112953782081604, "learning_rate": 8.720887668472235e-06, "loss": 0.4671, "step": 16225 }, { "epoch": 0.7446193382589142, "grad_norm": 0.4620542824268341, "learning_rate": 8.720723882918225e-06, "loss": 0.4266, "step": 16226 }, { "epoch": 0.7446652287641687, "grad_norm": 0.4738379120826721, "learning_rate": 8.720560088417048e-06, "loss": 0.3883, "step": 16227 }, { "epoch": 0.7447111192694231, "grad_norm": 0.46384352445602417, "learning_rate": 8.720396284969096e-06, "loss": 0.4194, "step": 16228 }, { "epoch": 0.7447570097746776, "grad_norm": 0.4519800543785095, "learning_rate": 8.720232472574768e-06, "loss": 0.4543, "step": 16229 }, { "epoch": 0.7448029002799321, "grad_norm": 0.5369689464569092, "learning_rate": 8.720068651234452e-06, "loss": 0.4525, "step": 16230 }, { "epoch": 0.7448487907851865, "grad_norm": 0.4495983421802521, "learning_rate": 8.719904820948548e-06, "loss": 0.425, "step": 16231 }, { "epoch": 0.744894681290441, "grad_norm": 0.4389208257198334, "learning_rate": 8.719740981717445e-06, "loss": 0.3543, "step": 16232 }, { "epoch": 0.7449405717956955, "grad_norm": 0.4325524568557739, "learning_rate": 8.71957713354154e-06, "loss": 0.341, "step": 16233 }, { "epoch": 0.7449864623009499, "grad_norm": 0.44216424226760864, "learning_rate": 8.719413276421225e-06, "loss": 0.3566, "step": 16234 }, { "epoch": 0.7450323528062044, "grad_norm": 0.45559072494506836, "learning_rate": 8.719249410356895e-06, "loss": 0.3588, "step": 16235 }, { "epoch": 0.7450782433114589, "grad_norm": 0.4964854121208191, "learning_rate": 8.719085535348946e-06, "loss": 0.5206, "step": 16236 }, { "epoch": 0.7451241338167133, "grad_norm": 0.49076318740844727, "learning_rate": 8.718921651397767e-06, "loss": 0.4464, "step": 16237 }, { "epoch": 0.7451700243219678, "grad_norm": 0.47047311067581177, "learning_rate": 8.718757758503758e-06, "loss": 0.4493, "step": 16238 }, { "epoch": 0.7452159148272223, "grad_norm": 0.4499049186706543, "learning_rate": 8.718593856667307e-06, "loss": 0.3685, "step": 16239 }, { "epoch": 0.7452618053324767, "grad_norm": 0.4402592182159424, "learning_rate": 8.718429945888813e-06, "loss": 0.3348, "step": 16240 }, { "epoch": 0.7453076958377312, "grad_norm": 0.4658980965614319, "learning_rate": 8.71826602616867e-06, "loss": 0.4115, "step": 16241 }, { "epoch": 0.7453535863429857, "grad_norm": 0.420261949300766, "learning_rate": 8.71810209750727e-06, "loss": 0.3359, "step": 16242 }, { "epoch": 0.7453994768482402, "grad_norm": 0.4881352186203003, "learning_rate": 8.717938159905006e-06, "loss": 0.4537, "step": 16243 }, { "epoch": 0.7454453673534945, "grad_norm": 0.41938018798828125, "learning_rate": 8.717774213362277e-06, "loss": 0.3201, "step": 16244 }, { "epoch": 0.745491257858749, "grad_norm": 0.44861623644828796, "learning_rate": 8.717610257879472e-06, "loss": 0.3196, "step": 16245 }, { "epoch": 0.7455371483640035, "grad_norm": 0.4728897213935852, "learning_rate": 8.717446293456989e-06, "loss": 0.4528, "step": 16246 }, { "epoch": 0.7455830388692579, "grad_norm": 0.4771125316619873, "learning_rate": 8.71728232009522e-06, "loss": 0.4281, "step": 16247 }, { "epoch": 0.7456289293745124, "grad_norm": 0.47516292333602905, "learning_rate": 8.717118337794562e-06, "loss": 0.4029, "step": 16248 }, { "epoch": 0.7456748198797669, "grad_norm": 0.48476868867874146, "learning_rate": 8.716954346555406e-06, "loss": 0.4898, "step": 16249 }, { "epoch": 0.7457207103850213, "grad_norm": 0.4975925385951996, "learning_rate": 8.716790346378147e-06, "loss": 0.4811, "step": 16250 }, { "epoch": 0.7457666008902758, "grad_norm": 0.4206581115722656, "learning_rate": 8.716626337263183e-06, "loss": 0.3017, "step": 16251 }, { "epoch": 0.7458124913955303, "grad_norm": 0.4390853941440582, "learning_rate": 8.716462319210903e-06, "loss": 0.3789, "step": 16252 }, { "epoch": 0.7458583819007847, "grad_norm": 0.4813790023326874, "learning_rate": 8.716298292221706e-06, "loss": 0.3625, "step": 16253 }, { "epoch": 0.7459042724060392, "grad_norm": 0.47483494877815247, "learning_rate": 8.716134256295982e-06, "loss": 0.405, "step": 16254 }, { "epoch": 0.7459501629112937, "grad_norm": 0.448069304227829, "learning_rate": 8.715970211434132e-06, "loss": 0.3949, "step": 16255 }, { "epoch": 0.7459960534165481, "grad_norm": 0.4797340929508209, "learning_rate": 8.715806157636543e-06, "loss": 0.4413, "step": 16256 }, { "epoch": 0.7460419439218026, "grad_norm": 0.457722544670105, "learning_rate": 8.715642094903614e-06, "loss": 0.3786, "step": 16257 }, { "epoch": 0.7460878344270571, "grad_norm": 0.427398145198822, "learning_rate": 8.71547802323574e-06, "loss": 0.3218, "step": 16258 }, { "epoch": 0.7461337249323116, "grad_norm": 0.418654203414917, "learning_rate": 8.715313942633312e-06, "loss": 0.3191, "step": 16259 }, { "epoch": 0.746179615437566, "grad_norm": 0.4768824577331543, "learning_rate": 8.715149853096727e-06, "loss": 0.4881, "step": 16260 }, { "epoch": 0.7462255059428204, "grad_norm": 0.4630188047885895, "learning_rate": 8.71498575462638e-06, "loss": 0.3974, "step": 16261 }, { "epoch": 0.7462713964480749, "grad_norm": 0.4186602532863617, "learning_rate": 8.714821647222662e-06, "loss": 0.3233, "step": 16262 }, { "epoch": 0.7463172869533293, "grad_norm": 0.4300456643104553, "learning_rate": 8.714657530885972e-06, "loss": 0.3746, "step": 16263 }, { "epoch": 0.7463631774585838, "grad_norm": 0.46721217036247253, "learning_rate": 8.714493405616704e-06, "loss": 0.406, "step": 16264 }, { "epoch": 0.7464090679638383, "grad_norm": 0.48147737979888916, "learning_rate": 8.714329271415252e-06, "loss": 0.3742, "step": 16265 }, { "epoch": 0.7464549584690927, "grad_norm": 0.46314895153045654, "learning_rate": 8.714165128282008e-06, "loss": 0.4183, "step": 16266 }, { "epoch": 0.7465008489743472, "grad_norm": 0.4475615620613098, "learning_rate": 8.714000976217372e-06, "loss": 0.367, "step": 16267 }, { "epoch": 0.7465467394796017, "grad_norm": 0.4105927050113678, "learning_rate": 8.713836815221733e-06, "loss": 0.2798, "step": 16268 }, { "epoch": 0.7465926299848561, "grad_norm": 0.42869147658348083, "learning_rate": 8.71367264529549e-06, "loss": 0.336, "step": 16269 }, { "epoch": 0.7466385204901106, "grad_norm": 0.4547214210033417, "learning_rate": 8.713508466439037e-06, "loss": 0.393, "step": 16270 }, { "epoch": 0.7466844109953651, "grad_norm": 0.4529729187488556, "learning_rate": 8.713344278652766e-06, "loss": 0.3674, "step": 16271 }, { "epoch": 0.7467303015006195, "grad_norm": 0.49468713998794556, "learning_rate": 8.713180081937077e-06, "loss": 0.4506, "step": 16272 }, { "epoch": 0.746776192005874, "grad_norm": 0.4592164158821106, "learning_rate": 8.71301587629236e-06, "loss": 0.4144, "step": 16273 }, { "epoch": 0.7468220825111285, "grad_norm": 0.417090505361557, "learning_rate": 8.712851661719011e-06, "loss": 0.3323, "step": 16274 }, { "epoch": 0.7468679730163829, "grad_norm": 0.4867130219936371, "learning_rate": 8.712687438217427e-06, "loss": 0.4587, "step": 16275 }, { "epoch": 0.7469138635216374, "grad_norm": 0.47335928678512573, "learning_rate": 8.712523205788001e-06, "loss": 0.409, "step": 16276 }, { "epoch": 0.7469597540268919, "grad_norm": 0.4299606382846832, "learning_rate": 8.71235896443113e-06, "loss": 0.3584, "step": 16277 }, { "epoch": 0.7470056445321464, "grad_norm": 0.45408281683921814, "learning_rate": 8.712194714147204e-06, "loss": 0.3485, "step": 16278 }, { "epoch": 0.7470515350374007, "grad_norm": 0.4508644938468933, "learning_rate": 8.712030454936621e-06, "loss": 0.3595, "step": 16279 }, { "epoch": 0.7470974255426552, "grad_norm": 0.458680659532547, "learning_rate": 8.71186618679978e-06, "loss": 0.3975, "step": 16280 }, { "epoch": 0.7471433160479097, "grad_norm": 0.5161157250404358, "learning_rate": 8.711701909737071e-06, "loss": 0.5008, "step": 16281 }, { "epoch": 0.7471892065531641, "grad_norm": 0.47313717007637024, "learning_rate": 8.71153762374889e-06, "loss": 0.44, "step": 16282 }, { "epoch": 0.7472350970584186, "grad_norm": 0.44445687532424927, "learning_rate": 8.711373328835633e-06, "loss": 0.4042, "step": 16283 }, { "epoch": 0.7472809875636731, "grad_norm": 0.4926909804344177, "learning_rate": 8.711209024997694e-06, "loss": 0.4408, "step": 16284 }, { "epoch": 0.7473268780689275, "grad_norm": 0.4849625527858734, "learning_rate": 8.711044712235467e-06, "loss": 0.435, "step": 16285 }, { "epoch": 0.747372768574182, "grad_norm": 0.4572621285915375, "learning_rate": 8.71088039054935e-06, "loss": 0.3374, "step": 16286 }, { "epoch": 0.7474186590794365, "grad_norm": 0.4671629071235657, "learning_rate": 8.710716059939736e-06, "loss": 0.3991, "step": 16287 }, { "epoch": 0.7474645495846909, "grad_norm": 0.47363194823265076, "learning_rate": 8.710551720407024e-06, "loss": 0.405, "step": 16288 }, { "epoch": 0.7475104400899454, "grad_norm": 0.45135387778282166, "learning_rate": 8.710387371951603e-06, "loss": 0.3688, "step": 16289 }, { "epoch": 0.7475563305951999, "grad_norm": 0.44756782054901123, "learning_rate": 8.710223014573872e-06, "loss": 0.3438, "step": 16290 }, { "epoch": 0.7476022211004543, "grad_norm": 0.44525274634361267, "learning_rate": 8.710058648274228e-06, "loss": 0.3487, "step": 16291 }, { "epoch": 0.7476481116057088, "grad_norm": 0.4298510253429413, "learning_rate": 8.70989427305306e-06, "loss": 0.3668, "step": 16292 }, { "epoch": 0.7476940021109633, "grad_norm": 0.5232069492340088, "learning_rate": 8.709729888910771e-06, "loss": 0.5645, "step": 16293 }, { "epoch": 0.7477398926162176, "grad_norm": 0.46848854422569275, "learning_rate": 8.70956549584775e-06, "loss": 0.4005, "step": 16294 }, { "epoch": 0.7477857831214721, "grad_norm": 0.5575319528579712, "learning_rate": 8.709401093864396e-06, "loss": 0.3737, "step": 16295 }, { "epoch": 0.7478316736267266, "grad_norm": 0.43608811497688293, "learning_rate": 8.709236682961101e-06, "loss": 0.3516, "step": 16296 }, { "epoch": 0.7478775641319811, "grad_norm": 0.4218136668205261, "learning_rate": 8.709072263138265e-06, "loss": 0.333, "step": 16297 }, { "epoch": 0.7479234546372355, "grad_norm": 0.4742417335510254, "learning_rate": 8.708907834396281e-06, "loss": 0.426, "step": 16298 }, { "epoch": 0.74796934514249, "grad_norm": 0.49672842025756836, "learning_rate": 8.708743396735541e-06, "loss": 0.4655, "step": 16299 }, { "epoch": 0.7480152356477445, "grad_norm": 0.42824864387512207, "learning_rate": 8.708578950156447e-06, "loss": 0.3524, "step": 16300 }, { "epoch": 0.7480611261529989, "grad_norm": 0.5222131609916687, "learning_rate": 8.70841449465939e-06, "loss": 0.5447, "step": 16301 }, { "epoch": 0.7481070166582534, "grad_norm": 0.4452823996543884, "learning_rate": 8.708250030244766e-06, "loss": 0.3818, "step": 16302 }, { "epoch": 0.7481529071635079, "grad_norm": 0.45923227071762085, "learning_rate": 8.708085556912971e-06, "loss": 0.4081, "step": 16303 }, { "epoch": 0.7481987976687623, "grad_norm": 0.44174447655677795, "learning_rate": 8.707921074664402e-06, "loss": 0.3811, "step": 16304 }, { "epoch": 0.7482446881740168, "grad_norm": 0.5227293372154236, "learning_rate": 8.707756583499452e-06, "loss": 0.5099, "step": 16305 }, { "epoch": 0.7482905786792713, "grad_norm": 0.4361780285835266, "learning_rate": 8.707592083418516e-06, "loss": 0.3473, "step": 16306 }, { "epoch": 0.7483364691845257, "grad_norm": 0.4863373041152954, "learning_rate": 8.707427574421994e-06, "loss": 0.4552, "step": 16307 }, { "epoch": 0.7483823596897802, "grad_norm": 0.4374549686908722, "learning_rate": 8.707263056510278e-06, "loss": 0.3911, "step": 16308 }, { "epoch": 0.7484282501950347, "grad_norm": 0.43347620964050293, "learning_rate": 8.707098529683764e-06, "loss": 0.3572, "step": 16309 }, { "epoch": 0.7484741407002891, "grad_norm": 0.4563251733779907, "learning_rate": 8.706933993942848e-06, "loss": 0.4066, "step": 16310 }, { "epoch": 0.7485200312055436, "grad_norm": 0.5133817195892334, "learning_rate": 8.706769449287923e-06, "loss": 0.5011, "step": 16311 }, { "epoch": 0.748565921710798, "grad_norm": 0.4653419554233551, "learning_rate": 8.706604895719391e-06, "loss": 0.4015, "step": 16312 }, { "epoch": 0.7486118122160526, "grad_norm": 0.5610264539718628, "learning_rate": 8.706440333237645e-06, "loss": 0.4136, "step": 16313 }, { "epoch": 0.7486577027213069, "grad_norm": 0.4334607720375061, "learning_rate": 8.706275761843078e-06, "loss": 0.3714, "step": 16314 }, { "epoch": 0.7487035932265614, "grad_norm": 0.447208970785141, "learning_rate": 8.706111181536087e-06, "loss": 0.4384, "step": 16315 }, { "epoch": 0.7487494837318159, "grad_norm": 0.4321472644805908, "learning_rate": 8.705946592317067e-06, "loss": 0.3503, "step": 16316 }, { "epoch": 0.7487953742370703, "grad_norm": 0.48500803112983704, "learning_rate": 8.705781994186418e-06, "loss": 0.4111, "step": 16317 }, { "epoch": 0.7488412647423248, "grad_norm": 0.5076285600662231, "learning_rate": 8.705617387144532e-06, "loss": 0.4968, "step": 16318 }, { "epoch": 0.7488871552475793, "grad_norm": 0.4363185167312622, "learning_rate": 8.705452771191804e-06, "loss": 0.3582, "step": 16319 }, { "epoch": 0.7489330457528337, "grad_norm": 0.4652787744998932, "learning_rate": 8.705288146328635e-06, "loss": 0.4665, "step": 16320 }, { "epoch": 0.7489789362580882, "grad_norm": 0.4444766938686371, "learning_rate": 8.705123512555414e-06, "loss": 0.362, "step": 16321 }, { "epoch": 0.7490248267633427, "grad_norm": 0.47886624932289124, "learning_rate": 8.704958869872542e-06, "loss": 0.3991, "step": 16322 }, { "epoch": 0.7490707172685971, "grad_norm": 0.4478308856487274, "learning_rate": 8.704794218280413e-06, "loss": 0.4066, "step": 16323 }, { "epoch": 0.7491166077738516, "grad_norm": 0.4371248781681061, "learning_rate": 8.704629557779423e-06, "loss": 0.3767, "step": 16324 }, { "epoch": 0.7491624982791061, "grad_norm": 0.46291181445121765, "learning_rate": 8.704464888369967e-06, "loss": 0.4233, "step": 16325 }, { "epoch": 0.7492083887843605, "grad_norm": 0.4726863503456116, "learning_rate": 8.704300210052444e-06, "loss": 0.4017, "step": 16326 }, { "epoch": 0.749254279289615, "grad_norm": 0.4388914108276367, "learning_rate": 8.704135522827247e-06, "loss": 0.4206, "step": 16327 }, { "epoch": 0.7493001697948695, "grad_norm": 0.4627699851989746, "learning_rate": 8.703970826694774e-06, "loss": 0.3713, "step": 16328 }, { "epoch": 0.7493460603001239, "grad_norm": 0.4611329436302185, "learning_rate": 8.70380612165542e-06, "loss": 0.3726, "step": 16329 }, { "epoch": 0.7493919508053783, "grad_norm": 0.4225649833679199, "learning_rate": 8.70364140770958e-06, "loss": 0.2842, "step": 16330 }, { "epoch": 0.7494378413106328, "grad_norm": 0.4449273943901062, "learning_rate": 8.703476684857653e-06, "loss": 0.4146, "step": 16331 }, { "epoch": 0.7494837318158873, "grad_norm": 0.46716463565826416, "learning_rate": 8.703311953100033e-06, "loss": 0.4352, "step": 16332 }, { "epoch": 0.7495296223211417, "grad_norm": 0.43684014678001404, "learning_rate": 8.703147212437115e-06, "loss": 0.3728, "step": 16333 }, { "epoch": 0.7495755128263962, "grad_norm": 0.49534955620765686, "learning_rate": 8.702982462869298e-06, "loss": 0.4512, "step": 16334 }, { "epoch": 0.7496214033316507, "grad_norm": 0.45564666390419006, "learning_rate": 8.702817704396977e-06, "loss": 0.394, "step": 16335 }, { "epoch": 0.7496672938369051, "grad_norm": 0.46841639280319214, "learning_rate": 8.702652937020548e-06, "loss": 0.4657, "step": 16336 }, { "epoch": 0.7497131843421596, "grad_norm": 0.4222855269908905, "learning_rate": 8.702488160740408e-06, "loss": 0.3589, "step": 16337 }, { "epoch": 0.7497590748474141, "grad_norm": 0.49153923988342285, "learning_rate": 8.70232337555695e-06, "loss": 0.4462, "step": 16338 }, { "epoch": 0.7498049653526685, "grad_norm": 0.4328003525733948, "learning_rate": 8.702158581470576e-06, "loss": 0.3967, "step": 16339 }, { "epoch": 0.749850855857923, "grad_norm": 0.4350324869155884, "learning_rate": 8.701993778481676e-06, "loss": 0.3751, "step": 16340 }, { "epoch": 0.7498967463631775, "grad_norm": 0.44061386585235596, "learning_rate": 8.701828966590652e-06, "loss": 0.3913, "step": 16341 }, { "epoch": 0.7499426368684319, "grad_norm": 0.4632883071899414, "learning_rate": 8.701664145797897e-06, "loss": 0.4319, "step": 16342 }, { "epoch": 0.7499885273736864, "grad_norm": 0.43110471963882446, "learning_rate": 8.701499316103807e-06, "loss": 0.3248, "step": 16343 }, { "epoch": 0.7500344178789409, "grad_norm": 0.4401867687702179, "learning_rate": 8.70133447750878e-06, "loss": 0.4011, "step": 16344 }, { "epoch": 0.7500803083841953, "grad_norm": 0.45379939675331116, "learning_rate": 8.70116963001321e-06, "loss": 0.4471, "step": 16345 }, { "epoch": 0.7501261988894498, "grad_norm": 0.47710928320884705, "learning_rate": 8.7010047736175e-06, "loss": 0.4331, "step": 16346 }, { "epoch": 0.7501720893947043, "grad_norm": 0.4136173129081726, "learning_rate": 8.700839908322037e-06, "loss": 0.2936, "step": 16347 }, { "epoch": 0.7502179798999588, "grad_norm": 0.45674893260002136, "learning_rate": 8.700675034127225e-06, "loss": 0.4143, "step": 16348 }, { "epoch": 0.7502638704052131, "grad_norm": 0.42345237731933594, "learning_rate": 8.700510151033454e-06, "loss": 0.3128, "step": 16349 }, { "epoch": 0.7503097609104676, "grad_norm": 0.4282125234603882, "learning_rate": 8.700345259041125e-06, "loss": 0.3288, "step": 16350 }, { "epoch": 0.7503556514157221, "grad_norm": 0.45034828782081604, "learning_rate": 8.700180358150637e-06, "loss": 0.4057, "step": 16351 }, { "epoch": 0.7504015419209765, "grad_norm": 0.4370351731777191, "learning_rate": 8.700015448362378e-06, "loss": 0.3321, "step": 16352 }, { "epoch": 0.750447432426231, "grad_norm": 0.4691845774650574, "learning_rate": 8.699850529676754e-06, "loss": 0.4051, "step": 16353 }, { "epoch": 0.7504933229314855, "grad_norm": 0.460775226354599, "learning_rate": 8.699685602094154e-06, "loss": 0.4517, "step": 16354 }, { "epoch": 0.7505392134367399, "grad_norm": 0.47734689712524414, "learning_rate": 8.69952066561498e-06, "loss": 0.4748, "step": 16355 }, { "epoch": 0.7505851039419944, "grad_norm": 0.4480847418308258, "learning_rate": 8.699355720239625e-06, "loss": 0.3702, "step": 16356 }, { "epoch": 0.7506309944472489, "grad_norm": 0.4404084086418152, "learning_rate": 8.699190765968486e-06, "loss": 0.3583, "step": 16357 }, { "epoch": 0.7506768849525033, "grad_norm": 0.48154380917549133, "learning_rate": 8.699025802801963e-06, "loss": 0.4294, "step": 16358 }, { "epoch": 0.7507227754577578, "grad_norm": 0.47850754857063293, "learning_rate": 8.69886083074045e-06, "loss": 0.4756, "step": 16359 }, { "epoch": 0.7507686659630123, "grad_norm": 0.41177526116371155, "learning_rate": 8.698695849784344e-06, "loss": 0.3374, "step": 16360 }, { "epoch": 0.7508145564682667, "grad_norm": 0.448499470949173, "learning_rate": 8.698530859934042e-06, "loss": 0.4106, "step": 16361 }, { "epoch": 0.7508604469735212, "grad_norm": 0.41752296686172485, "learning_rate": 8.69836586118994e-06, "loss": 0.3313, "step": 16362 }, { "epoch": 0.7509063374787757, "grad_norm": 0.43674740195274353, "learning_rate": 8.698200853552434e-06, "loss": 0.3934, "step": 16363 }, { "epoch": 0.75095222798403, "grad_norm": 0.44744357466697693, "learning_rate": 8.698035837021925e-06, "loss": 0.3776, "step": 16364 }, { "epoch": 0.7509981184892845, "grad_norm": 0.4480912685394287, "learning_rate": 8.697870811598806e-06, "loss": 0.3739, "step": 16365 }, { "epoch": 0.751044008994539, "grad_norm": 0.47732213139533997, "learning_rate": 8.697705777283474e-06, "loss": 0.3588, "step": 16366 }, { "epoch": 0.7510898994997935, "grad_norm": 0.43123915791511536, "learning_rate": 8.697540734076327e-06, "loss": 0.3273, "step": 16367 }, { "epoch": 0.7511357900050479, "grad_norm": 0.47396403551101685, "learning_rate": 8.69737568197776e-06, "loss": 0.3506, "step": 16368 }, { "epoch": 0.7511816805103024, "grad_norm": 0.47032302618026733, "learning_rate": 8.697210620988174e-06, "loss": 0.4085, "step": 16369 }, { "epoch": 0.7512275710155569, "grad_norm": 0.43720537424087524, "learning_rate": 8.697045551107962e-06, "loss": 0.3714, "step": 16370 }, { "epoch": 0.7512734615208113, "grad_norm": 0.46425676345825195, "learning_rate": 8.696880472337521e-06, "loss": 0.4333, "step": 16371 }, { "epoch": 0.7513193520260658, "grad_norm": 0.43903085589408875, "learning_rate": 8.696715384677252e-06, "loss": 0.3721, "step": 16372 }, { "epoch": 0.7513652425313203, "grad_norm": 0.4716162085533142, "learning_rate": 8.696550288127548e-06, "loss": 0.4093, "step": 16373 }, { "epoch": 0.7514111330365747, "grad_norm": 0.47652992606163025, "learning_rate": 8.696385182688807e-06, "loss": 0.4341, "step": 16374 }, { "epoch": 0.7514570235418292, "grad_norm": 0.45425400137901306, "learning_rate": 8.696220068361427e-06, "loss": 0.4149, "step": 16375 }, { "epoch": 0.7515029140470837, "grad_norm": 0.469321608543396, "learning_rate": 8.696054945145804e-06, "loss": 0.3955, "step": 16376 }, { "epoch": 0.7515488045523381, "grad_norm": 0.4663131833076477, "learning_rate": 8.695889813042334e-06, "loss": 0.436, "step": 16377 }, { "epoch": 0.7515946950575926, "grad_norm": 0.44780367612838745, "learning_rate": 8.695724672051417e-06, "loss": 0.4238, "step": 16378 }, { "epoch": 0.7516405855628471, "grad_norm": 0.424713134765625, "learning_rate": 8.695559522173447e-06, "loss": 0.29, "step": 16379 }, { "epoch": 0.7516864760681015, "grad_norm": 0.4987753629684448, "learning_rate": 8.695394363408825e-06, "loss": 0.4812, "step": 16380 }, { "epoch": 0.751732366573356, "grad_norm": 0.47493046522140503, "learning_rate": 8.695229195757944e-06, "loss": 0.4146, "step": 16381 }, { "epoch": 0.7517782570786105, "grad_norm": 0.41972991824150085, "learning_rate": 8.695064019221203e-06, "loss": 0.3408, "step": 16382 }, { "epoch": 0.7518241475838648, "grad_norm": 0.4698231816291809, "learning_rate": 8.694898833799001e-06, "loss": 0.4231, "step": 16383 }, { "epoch": 0.7518700380891193, "grad_norm": 0.47360682487487793, "learning_rate": 8.694733639491732e-06, "loss": 0.4395, "step": 16384 }, { "epoch": 0.7519159285943738, "grad_norm": 0.4774980843067169, "learning_rate": 8.694568436299795e-06, "loss": 0.4217, "step": 16385 }, { "epoch": 0.7519618190996283, "grad_norm": 0.4981301724910736, "learning_rate": 8.694403224223586e-06, "loss": 0.4273, "step": 16386 }, { "epoch": 0.7520077096048827, "grad_norm": 0.4048086702823639, "learning_rate": 8.694238003263504e-06, "loss": 0.2919, "step": 16387 }, { "epoch": 0.7520536001101372, "grad_norm": 0.48239797353744507, "learning_rate": 8.694072773419947e-06, "loss": 0.4837, "step": 16388 }, { "epoch": 0.7520994906153917, "grad_norm": 0.45203641057014465, "learning_rate": 8.69390753469331e-06, "loss": 0.3708, "step": 16389 }, { "epoch": 0.7521453811206461, "grad_norm": 0.47509756684303284, "learning_rate": 8.69374228708399e-06, "loss": 0.4623, "step": 16390 }, { "epoch": 0.7521912716259006, "grad_norm": 0.43781110644340515, "learning_rate": 8.693577030592387e-06, "loss": 0.3517, "step": 16391 }, { "epoch": 0.7522371621311551, "grad_norm": 0.49243250489234924, "learning_rate": 8.693411765218896e-06, "loss": 0.4689, "step": 16392 }, { "epoch": 0.7522830526364095, "grad_norm": 0.4801262617111206, "learning_rate": 8.693246490963916e-06, "loss": 0.4359, "step": 16393 }, { "epoch": 0.752328943141664, "grad_norm": 0.46314555406570435, "learning_rate": 8.693081207827843e-06, "loss": 0.3836, "step": 16394 }, { "epoch": 0.7523748336469185, "grad_norm": 0.4475667178630829, "learning_rate": 8.692915915811076e-06, "loss": 0.3643, "step": 16395 }, { "epoch": 0.7524207241521729, "grad_norm": 0.49406206607818604, "learning_rate": 8.692750614914011e-06, "loss": 0.5016, "step": 16396 }, { "epoch": 0.7524666146574274, "grad_norm": 0.47319164872169495, "learning_rate": 8.692585305137047e-06, "loss": 0.4051, "step": 16397 }, { "epoch": 0.7525125051626819, "grad_norm": 0.46962156891822815, "learning_rate": 8.69241998648058e-06, "loss": 0.4382, "step": 16398 }, { "epoch": 0.7525583956679363, "grad_norm": 0.4622677266597748, "learning_rate": 8.69225465894501e-06, "loss": 0.4107, "step": 16399 }, { "epoch": 0.7526042861731908, "grad_norm": 0.46496719121932983, "learning_rate": 8.692089322530732e-06, "loss": 0.3784, "step": 16400 }, { "epoch": 0.7526501766784452, "grad_norm": 0.4915473461151123, "learning_rate": 8.691923977238144e-06, "loss": 0.4607, "step": 16401 }, { "epoch": 0.7526960671836997, "grad_norm": 0.4346165359020233, "learning_rate": 8.691758623067643e-06, "loss": 0.4136, "step": 16402 }, { "epoch": 0.7527419576889541, "grad_norm": 0.45603784918785095, "learning_rate": 8.691593260019629e-06, "loss": 0.4152, "step": 16403 }, { "epoch": 0.7527878481942086, "grad_norm": 0.4316512644290924, "learning_rate": 8.691427888094499e-06, "loss": 0.3384, "step": 16404 }, { "epoch": 0.7528337386994631, "grad_norm": 0.47658947110176086, "learning_rate": 8.691262507292649e-06, "loss": 0.4681, "step": 16405 }, { "epoch": 0.7528796292047175, "grad_norm": 0.46447667479515076, "learning_rate": 8.691097117614478e-06, "loss": 0.4034, "step": 16406 }, { "epoch": 0.752925519709972, "grad_norm": 0.4404204785823822, "learning_rate": 8.690931719060382e-06, "loss": 0.3795, "step": 16407 }, { "epoch": 0.7529714102152265, "grad_norm": 0.4307515323162079, "learning_rate": 8.690766311630762e-06, "loss": 0.3734, "step": 16408 }, { "epoch": 0.7530173007204809, "grad_norm": 0.4438691735267639, "learning_rate": 8.690600895326013e-06, "loss": 0.3748, "step": 16409 }, { "epoch": 0.7530631912257354, "grad_norm": 0.41612228751182556, "learning_rate": 8.690435470146535e-06, "loss": 0.3, "step": 16410 }, { "epoch": 0.7531090817309899, "grad_norm": 0.45534875988960266, "learning_rate": 8.690270036092721e-06, "loss": 0.3793, "step": 16411 }, { "epoch": 0.7531549722362443, "grad_norm": 0.448817640542984, "learning_rate": 8.690104593164975e-06, "loss": 0.4143, "step": 16412 }, { "epoch": 0.7532008627414988, "grad_norm": 0.4827724099159241, "learning_rate": 8.689939141363693e-06, "loss": 0.4489, "step": 16413 }, { "epoch": 0.7532467532467533, "grad_norm": 0.4931320548057556, "learning_rate": 8.68977368068927e-06, "loss": 0.4687, "step": 16414 }, { "epoch": 0.7532926437520077, "grad_norm": 0.5032831430435181, "learning_rate": 8.689608211142106e-06, "loss": 0.5154, "step": 16415 }, { "epoch": 0.7533385342572622, "grad_norm": 0.4744589626789093, "learning_rate": 8.6894427327226e-06, "loss": 0.4999, "step": 16416 }, { "epoch": 0.7533844247625167, "grad_norm": 0.4592866897583008, "learning_rate": 8.689277245431148e-06, "loss": 0.3769, "step": 16417 }, { "epoch": 0.753430315267771, "grad_norm": 0.43515658378601074, "learning_rate": 8.689111749268147e-06, "loss": 0.3653, "step": 16418 }, { "epoch": 0.7534762057730255, "grad_norm": 0.45515766739845276, "learning_rate": 8.688946244233998e-06, "loss": 0.4512, "step": 16419 }, { "epoch": 0.75352209627828, "grad_norm": 0.4465077519416809, "learning_rate": 8.688780730329096e-06, "loss": 0.3767, "step": 16420 }, { "epoch": 0.7535679867835345, "grad_norm": 0.43124011158943176, "learning_rate": 8.688615207553843e-06, "loss": 0.3691, "step": 16421 }, { "epoch": 0.7536138772887889, "grad_norm": 0.4072433114051819, "learning_rate": 8.688449675908633e-06, "loss": 0.3323, "step": 16422 }, { "epoch": 0.7536597677940434, "grad_norm": 0.4665652811527252, "learning_rate": 8.688284135393867e-06, "loss": 0.454, "step": 16423 }, { "epoch": 0.7537056582992979, "grad_norm": 0.4780343770980835, "learning_rate": 8.688118586009941e-06, "loss": 0.3746, "step": 16424 }, { "epoch": 0.7537515488045523, "grad_norm": 0.4679764211177826, "learning_rate": 8.687953027757251e-06, "loss": 0.4435, "step": 16425 }, { "epoch": 0.7537974393098068, "grad_norm": 0.4295421838760376, "learning_rate": 8.687787460636203e-06, "loss": 0.3573, "step": 16426 }, { "epoch": 0.7538433298150613, "grad_norm": 0.45141521096229553, "learning_rate": 8.687621884647187e-06, "loss": 0.3828, "step": 16427 }, { "epoch": 0.7538892203203157, "grad_norm": 0.45784273743629456, "learning_rate": 8.687456299790604e-06, "loss": 0.3848, "step": 16428 }, { "epoch": 0.7539351108255702, "grad_norm": 0.45207175612449646, "learning_rate": 8.687290706066854e-06, "loss": 0.4452, "step": 16429 }, { "epoch": 0.7539810013308247, "grad_norm": 0.45299622416496277, "learning_rate": 8.687125103476332e-06, "loss": 0.4178, "step": 16430 }, { "epoch": 0.7540268918360791, "grad_norm": 0.42734596133232117, "learning_rate": 8.68695949201944e-06, "loss": 0.3321, "step": 16431 }, { "epoch": 0.7540727823413336, "grad_norm": 0.43607038259506226, "learning_rate": 8.686793871696574e-06, "loss": 0.3247, "step": 16432 }, { "epoch": 0.7541186728465881, "grad_norm": 0.520976185798645, "learning_rate": 8.686628242508129e-06, "loss": 0.4701, "step": 16433 }, { "epoch": 0.7541645633518425, "grad_norm": 0.4357532560825348, "learning_rate": 8.686462604454509e-06, "loss": 0.3318, "step": 16434 }, { "epoch": 0.754210453857097, "grad_norm": 0.4754031002521515, "learning_rate": 8.68629695753611e-06, "loss": 0.3997, "step": 16435 }, { "epoch": 0.7542563443623514, "grad_norm": 0.4282938241958618, "learning_rate": 8.686131301753329e-06, "loss": 0.3727, "step": 16436 }, { "epoch": 0.754302234867606, "grad_norm": 0.48741579055786133, "learning_rate": 8.685965637106567e-06, "loss": 0.4259, "step": 16437 }, { "epoch": 0.7543481253728603, "grad_norm": 0.45609360933303833, "learning_rate": 8.68579996359622e-06, "loss": 0.3947, "step": 16438 }, { "epoch": 0.7543940158781148, "grad_norm": 0.44795170426368713, "learning_rate": 8.685634281222687e-06, "loss": 0.3983, "step": 16439 }, { "epoch": 0.7544399063833693, "grad_norm": 0.5011052489280701, "learning_rate": 8.685468589986369e-06, "loss": 0.4525, "step": 16440 }, { "epoch": 0.7544857968886237, "grad_norm": 0.4526043236255646, "learning_rate": 8.685302889887661e-06, "loss": 0.3774, "step": 16441 }, { "epoch": 0.7545316873938782, "grad_norm": 0.47486117482185364, "learning_rate": 8.685137180926961e-06, "loss": 0.4035, "step": 16442 }, { "epoch": 0.7545775778991327, "grad_norm": 0.46526750922203064, "learning_rate": 8.68497146310467e-06, "loss": 0.416, "step": 16443 }, { "epoch": 0.7546234684043871, "grad_norm": 0.4377737045288086, "learning_rate": 8.684805736421187e-06, "loss": 0.3469, "step": 16444 }, { "epoch": 0.7546693589096416, "grad_norm": 0.449629545211792, "learning_rate": 8.684640000876909e-06, "loss": 0.4686, "step": 16445 }, { "epoch": 0.7547152494148961, "grad_norm": 0.45988085865974426, "learning_rate": 8.684474256472234e-06, "loss": 0.4318, "step": 16446 }, { "epoch": 0.7547611399201505, "grad_norm": 0.4764769375324249, "learning_rate": 8.684308503207562e-06, "loss": 0.4015, "step": 16447 }, { "epoch": 0.754807030425405, "grad_norm": 0.46452459692955017, "learning_rate": 8.68414274108329e-06, "loss": 0.4128, "step": 16448 }, { "epoch": 0.7548529209306595, "grad_norm": 0.43204861879348755, "learning_rate": 8.683976970099817e-06, "loss": 0.37, "step": 16449 }, { "epoch": 0.7548988114359139, "grad_norm": 0.4518873989582062, "learning_rate": 8.683811190257544e-06, "loss": 0.4096, "step": 16450 }, { "epoch": 0.7549447019411684, "grad_norm": 0.4920696020126343, "learning_rate": 8.683645401556866e-06, "loss": 0.4952, "step": 16451 }, { "epoch": 0.7549905924464229, "grad_norm": 0.4557972252368927, "learning_rate": 8.683479603998183e-06, "loss": 0.3712, "step": 16452 }, { "epoch": 0.7550364829516772, "grad_norm": 0.43759390711784363, "learning_rate": 8.683313797581894e-06, "loss": 0.3727, "step": 16453 }, { "epoch": 0.7550823734569317, "grad_norm": 0.47462791204452515, "learning_rate": 8.6831479823084e-06, "loss": 0.4151, "step": 16454 }, { "epoch": 0.7551282639621862, "grad_norm": 0.4747932553291321, "learning_rate": 8.682982158178096e-06, "loss": 0.442, "step": 16455 }, { "epoch": 0.7551741544674407, "grad_norm": 0.4594792127609253, "learning_rate": 8.682816325191383e-06, "loss": 0.4028, "step": 16456 }, { "epoch": 0.7552200449726951, "grad_norm": 0.48942697048187256, "learning_rate": 8.682650483348657e-06, "loss": 0.4626, "step": 16457 }, { "epoch": 0.7552659354779496, "grad_norm": 0.4576258361339569, "learning_rate": 8.682484632650321e-06, "loss": 0.358, "step": 16458 }, { "epoch": 0.7553118259832041, "grad_norm": 0.43294620513916016, "learning_rate": 8.682318773096772e-06, "loss": 0.3543, "step": 16459 }, { "epoch": 0.7553577164884585, "grad_norm": 0.5066655278205872, "learning_rate": 8.682152904688408e-06, "loss": 0.5148, "step": 16460 }, { "epoch": 0.755403606993713, "grad_norm": 0.45729944109916687, "learning_rate": 8.681987027425626e-06, "loss": 0.4365, "step": 16461 }, { "epoch": 0.7554494974989675, "grad_norm": 0.47522297501564026, "learning_rate": 8.68182114130883e-06, "loss": 0.4227, "step": 16462 }, { "epoch": 0.7554953880042219, "grad_norm": 0.5697293281555176, "learning_rate": 8.681655246338417e-06, "loss": 0.521, "step": 16463 }, { "epoch": 0.7555412785094764, "grad_norm": 0.5223415493965149, "learning_rate": 8.681489342514782e-06, "loss": 0.459, "step": 16464 }, { "epoch": 0.7555871690147309, "grad_norm": 0.4290444552898407, "learning_rate": 8.68132342983833e-06, "loss": 0.3523, "step": 16465 }, { "epoch": 0.7556330595199853, "grad_norm": 0.4455793797969818, "learning_rate": 8.681157508309454e-06, "loss": 0.3867, "step": 16466 }, { "epoch": 0.7556789500252398, "grad_norm": 0.48968544602394104, "learning_rate": 8.68099157792856e-06, "loss": 0.4686, "step": 16467 }, { "epoch": 0.7557248405304943, "grad_norm": 0.4723726212978363, "learning_rate": 8.68082563869604e-06, "loss": 0.4642, "step": 16468 }, { "epoch": 0.7557707310357487, "grad_norm": 0.45363059639930725, "learning_rate": 8.680659690612297e-06, "loss": 0.3969, "step": 16469 }, { "epoch": 0.7558166215410032, "grad_norm": 0.4921032786369324, "learning_rate": 8.680493733677728e-06, "loss": 0.4451, "step": 16470 }, { "epoch": 0.7558625120462577, "grad_norm": 0.47581279277801514, "learning_rate": 8.680327767892734e-06, "loss": 0.38, "step": 16471 }, { "epoch": 0.755908402551512, "grad_norm": 0.4593745172023773, "learning_rate": 8.680161793257712e-06, "loss": 0.3867, "step": 16472 }, { "epoch": 0.7559542930567665, "grad_norm": 0.4932304322719574, "learning_rate": 8.679995809773065e-06, "loss": 0.4229, "step": 16473 }, { "epoch": 0.756000183562021, "grad_norm": 0.4568394720554352, "learning_rate": 8.679829817439187e-06, "loss": 0.4038, "step": 16474 }, { "epoch": 0.7560460740672755, "grad_norm": 0.4508468508720398, "learning_rate": 8.679663816256481e-06, "loss": 0.3477, "step": 16475 }, { "epoch": 0.7560919645725299, "grad_norm": 0.42869001626968384, "learning_rate": 8.679497806225345e-06, "loss": 0.3324, "step": 16476 }, { "epoch": 0.7561378550777844, "grad_norm": 0.48927417397499084, "learning_rate": 8.679331787346178e-06, "loss": 0.4479, "step": 16477 }, { "epoch": 0.7561837455830389, "grad_norm": 0.45819932222366333, "learning_rate": 8.679165759619378e-06, "loss": 0.4732, "step": 16478 }, { "epoch": 0.7562296360882933, "grad_norm": 0.4924762547016144, "learning_rate": 8.678999723045347e-06, "loss": 0.4736, "step": 16479 }, { "epoch": 0.7562755265935478, "grad_norm": 0.45785123109817505, "learning_rate": 8.678833677624483e-06, "loss": 0.4352, "step": 16480 }, { "epoch": 0.7563214170988023, "grad_norm": 0.551338255405426, "learning_rate": 8.678667623357184e-06, "loss": 0.4952, "step": 16481 }, { "epoch": 0.7563673076040567, "grad_norm": 0.4435712993144989, "learning_rate": 8.67850156024385e-06, "loss": 0.4096, "step": 16482 }, { "epoch": 0.7564131981093112, "grad_norm": 0.4520529508590698, "learning_rate": 8.678335488284882e-06, "loss": 0.3379, "step": 16483 }, { "epoch": 0.7564590886145657, "grad_norm": 0.4418685734272003, "learning_rate": 8.67816940748068e-06, "loss": 0.3527, "step": 16484 }, { "epoch": 0.7565049791198201, "grad_norm": 0.45493197441101074, "learning_rate": 8.678003317831636e-06, "loss": 0.4279, "step": 16485 }, { "epoch": 0.7565508696250746, "grad_norm": 0.41695305705070496, "learning_rate": 8.677837219338158e-06, "loss": 0.3624, "step": 16486 }, { "epoch": 0.7565967601303291, "grad_norm": 0.45581063628196716, "learning_rate": 8.677671112000643e-06, "loss": 0.4423, "step": 16487 }, { "epoch": 0.7566426506355834, "grad_norm": 0.4257953464984894, "learning_rate": 8.677504995819489e-06, "loss": 0.3605, "step": 16488 }, { "epoch": 0.756688541140838, "grad_norm": 0.4474286735057831, "learning_rate": 8.677338870795096e-06, "loss": 0.3764, "step": 16489 }, { "epoch": 0.7567344316460924, "grad_norm": 0.4403073191642761, "learning_rate": 8.677172736927864e-06, "loss": 0.3728, "step": 16490 }, { "epoch": 0.7567803221513469, "grad_norm": 0.4952709972858429, "learning_rate": 8.677006594218192e-06, "loss": 0.4687, "step": 16491 }, { "epoch": 0.7568262126566013, "grad_norm": 0.4735431373119354, "learning_rate": 8.676840442666478e-06, "loss": 0.462, "step": 16492 }, { "epoch": 0.7568721031618558, "grad_norm": 0.4452124834060669, "learning_rate": 8.676674282273125e-06, "loss": 0.3573, "step": 16493 }, { "epoch": 0.7569179936671103, "grad_norm": 0.4764384925365448, "learning_rate": 8.67650811303853e-06, "loss": 0.4161, "step": 16494 }, { "epoch": 0.7569638841723647, "grad_norm": 0.47172823548316956, "learning_rate": 8.676341934963093e-06, "loss": 0.4291, "step": 16495 }, { "epoch": 0.7570097746776192, "grad_norm": 0.46439728140830994, "learning_rate": 8.676175748047215e-06, "loss": 0.4583, "step": 16496 }, { "epoch": 0.7570556651828737, "grad_norm": 0.4912375211715698, "learning_rate": 8.676009552291293e-06, "loss": 0.4791, "step": 16497 }, { "epoch": 0.7571015556881281, "grad_norm": 0.4739128053188324, "learning_rate": 8.67584334769573e-06, "loss": 0.3964, "step": 16498 }, { "epoch": 0.7571474461933826, "grad_norm": 0.44093504548072815, "learning_rate": 8.675677134260922e-06, "loss": 0.3673, "step": 16499 }, { "epoch": 0.7571933366986371, "grad_norm": 0.4649273753166199, "learning_rate": 8.675510911987273e-06, "loss": 0.4304, "step": 16500 }, { "epoch": 0.7572392272038915, "grad_norm": 0.4365490972995758, "learning_rate": 8.67534468087518e-06, "loss": 0.3799, "step": 16501 }, { "epoch": 0.757285117709146, "grad_norm": 0.4415822923183441, "learning_rate": 8.675178440925041e-06, "loss": 0.3886, "step": 16502 }, { "epoch": 0.7573310082144005, "grad_norm": 0.45539602637290955, "learning_rate": 8.67501219213726e-06, "loss": 0.3644, "step": 16503 }, { "epoch": 0.7573768987196549, "grad_norm": 0.501395583152771, "learning_rate": 8.674845934512233e-06, "loss": 0.4997, "step": 16504 }, { "epoch": 0.7574227892249094, "grad_norm": 0.43436360359191895, "learning_rate": 8.674679668050362e-06, "loss": 0.3489, "step": 16505 }, { "epoch": 0.7574686797301639, "grad_norm": 0.41186556220054626, "learning_rate": 8.674513392752044e-06, "loss": 0.323, "step": 16506 }, { "epoch": 0.7575145702354182, "grad_norm": 0.4714110195636749, "learning_rate": 8.674347108617682e-06, "loss": 0.4293, "step": 16507 }, { "epoch": 0.7575604607406727, "grad_norm": 0.47999030351638794, "learning_rate": 8.674180815647677e-06, "loss": 0.4871, "step": 16508 }, { "epoch": 0.7576063512459272, "grad_norm": 0.4613196849822998, "learning_rate": 8.674014513842426e-06, "loss": 0.4404, "step": 16509 }, { "epoch": 0.7576522417511817, "grad_norm": 0.4788079857826233, "learning_rate": 8.67384820320233e-06, "loss": 0.4202, "step": 16510 }, { "epoch": 0.7576981322564361, "grad_norm": 0.46879836916923523, "learning_rate": 8.673681883727786e-06, "loss": 0.4148, "step": 16511 }, { "epoch": 0.7577440227616906, "grad_norm": 0.46039465069770813, "learning_rate": 8.673515555419199e-06, "loss": 0.4514, "step": 16512 }, { "epoch": 0.7577899132669451, "grad_norm": 0.44031867384910583, "learning_rate": 8.673349218276965e-06, "loss": 0.4, "step": 16513 }, { "epoch": 0.7578358037721995, "grad_norm": 0.49343207478523254, "learning_rate": 8.673182872301486e-06, "loss": 0.4607, "step": 16514 }, { "epoch": 0.757881694277454, "grad_norm": 0.5610526204109192, "learning_rate": 8.673016517493162e-06, "loss": 0.3694, "step": 16515 }, { "epoch": 0.7579275847827085, "grad_norm": 0.735427975654602, "learning_rate": 8.672850153852392e-06, "loss": 0.5461, "step": 16516 }, { "epoch": 0.7579734752879629, "grad_norm": 0.45693618059158325, "learning_rate": 8.672683781379575e-06, "loss": 0.4801, "step": 16517 }, { "epoch": 0.7580193657932174, "grad_norm": 0.4393463432788849, "learning_rate": 8.672517400075115e-06, "loss": 0.3573, "step": 16518 }, { "epoch": 0.7580652562984719, "grad_norm": 0.43958809971809387, "learning_rate": 8.672351009939411e-06, "loss": 0.3648, "step": 16519 }, { "epoch": 0.7581111468037263, "grad_norm": 0.43055394291877747, "learning_rate": 8.672184610972858e-06, "loss": 0.3439, "step": 16520 }, { "epoch": 0.7581570373089808, "grad_norm": 0.4080805480480194, "learning_rate": 8.672018203175862e-06, "loss": 0.3749, "step": 16521 }, { "epoch": 0.7582029278142353, "grad_norm": 0.44646701216697693, "learning_rate": 8.671851786548822e-06, "loss": 0.3987, "step": 16522 }, { "epoch": 0.7582488183194896, "grad_norm": 0.42510348558425903, "learning_rate": 8.671685361092135e-06, "loss": 0.346, "step": 16523 }, { "epoch": 0.7582947088247441, "grad_norm": 0.4436143636703491, "learning_rate": 8.671518926806206e-06, "loss": 0.4066, "step": 16524 }, { "epoch": 0.7583405993299986, "grad_norm": 0.4393865168094635, "learning_rate": 8.67135248369143e-06, "loss": 0.3947, "step": 16525 }, { "epoch": 0.7583864898352531, "grad_norm": 0.4510743021965027, "learning_rate": 8.671186031748214e-06, "loss": 0.3864, "step": 16526 }, { "epoch": 0.7584323803405075, "grad_norm": 0.46614620089530945, "learning_rate": 8.67101957097695e-06, "loss": 0.4331, "step": 16527 }, { "epoch": 0.758478270845762, "grad_norm": 0.4818055033683777, "learning_rate": 8.670853101378045e-06, "loss": 0.458, "step": 16528 }, { "epoch": 0.7585241613510165, "grad_norm": 0.44497495889663696, "learning_rate": 8.670686622951895e-06, "loss": 0.4144, "step": 16529 }, { "epoch": 0.7585700518562709, "grad_norm": 0.48932763934135437, "learning_rate": 8.670520135698906e-06, "loss": 0.4206, "step": 16530 }, { "epoch": 0.7586159423615254, "grad_norm": 0.4336932599544525, "learning_rate": 8.670353639619472e-06, "loss": 0.3425, "step": 16531 }, { "epoch": 0.7586618328667799, "grad_norm": 0.45773589611053467, "learning_rate": 8.670187134713995e-06, "loss": 0.3691, "step": 16532 }, { "epoch": 0.7587077233720343, "grad_norm": 0.5234536528587341, "learning_rate": 8.670020620982876e-06, "loss": 0.5156, "step": 16533 }, { "epoch": 0.7587536138772888, "grad_norm": 0.43747788667678833, "learning_rate": 8.669854098426517e-06, "loss": 0.3189, "step": 16534 }, { "epoch": 0.7587995043825433, "grad_norm": 0.47076040506362915, "learning_rate": 8.669687567045318e-06, "loss": 0.3831, "step": 16535 }, { "epoch": 0.7588453948877977, "grad_norm": 0.4278438985347748, "learning_rate": 8.669521026839677e-06, "loss": 0.3404, "step": 16536 }, { "epoch": 0.7588912853930522, "grad_norm": 0.4545997977256775, "learning_rate": 8.669354477809998e-06, "loss": 0.3997, "step": 16537 }, { "epoch": 0.7589371758983067, "grad_norm": 0.44615286588668823, "learning_rate": 8.669187919956676e-06, "loss": 0.3808, "step": 16538 }, { "epoch": 0.7589830664035611, "grad_norm": 0.4327333867549896, "learning_rate": 8.669021353280117e-06, "loss": 0.3799, "step": 16539 }, { "epoch": 0.7590289569088156, "grad_norm": 0.42314204573631287, "learning_rate": 8.66885477778072e-06, "loss": 0.353, "step": 16540 }, { "epoch": 0.75907484741407, "grad_norm": 0.44395628571510315, "learning_rate": 8.668688193458886e-06, "loss": 0.3871, "step": 16541 }, { "epoch": 0.7591207379193244, "grad_norm": 0.45105060935020447, "learning_rate": 8.668521600315014e-06, "loss": 0.3851, "step": 16542 }, { "epoch": 0.7591666284245789, "grad_norm": 0.9237419962882996, "learning_rate": 8.668354998349504e-06, "loss": 0.3966, "step": 16543 }, { "epoch": 0.7592125189298334, "grad_norm": 0.44396528601646423, "learning_rate": 8.668188387562759e-06, "loss": 0.3863, "step": 16544 }, { "epoch": 0.7592584094350879, "grad_norm": 0.5016133189201355, "learning_rate": 8.66802176795518e-06, "loss": 0.4464, "step": 16545 }, { "epoch": 0.7593042999403423, "grad_norm": 0.5487783551216125, "learning_rate": 8.667855139527164e-06, "loss": 0.4224, "step": 16546 }, { "epoch": 0.7593501904455968, "grad_norm": 0.4307863116264343, "learning_rate": 8.667688502279115e-06, "loss": 0.3748, "step": 16547 }, { "epoch": 0.7593960809508513, "grad_norm": 0.4519132077693939, "learning_rate": 8.667521856211434e-06, "loss": 0.3574, "step": 16548 }, { "epoch": 0.7594419714561057, "grad_norm": 0.4867001175880432, "learning_rate": 8.667355201324518e-06, "loss": 0.3804, "step": 16549 }, { "epoch": 0.7594878619613602, "grad_norm": 0.4645751714706421, "learning_rate": 8.667188537618772e-06, "loss": 0.3625, "step": 16550 }, { "epoch": 0.7595337524666147, "grad_norm": 0.48748108744621277, "learning_rate": 8.667021865094594e-06, "loss": 0.4576, "step": 16551 }, { "epoch": 0.7595796429718691, "grad_norm": 0.4626893401145935, "learning_rate": 8.666855183752387e-06, "loss": 0.3804, "step": 16552 }, { "epoch": 0.7596255334771236, "grad_norm": 0.4826729893684387, "learning_rate": 8.66668849359255e-06, "loss": 0.5, "step": 16553 }, { "epoch": 0.7596714239823781, "grad_norm": 0.49610671401023865, "learning_rate": 8.666521794615484e-06, "loss": 0.5087, "step": 16554 }, { "epoch": 0.7597173144876325, "grad_norm": 0.4287147521972656, "learning_rate": 8.66635508682159e-06, "loss": 0.3909, "step": 16555 }, { "epoch": 0.759763204992887, "grad_norm": 0.5004259943962097, "learning_rate": 8.66618837021127e-06, "loss": 0.4298, "step": 16556 }, { "epoch": 0.7598090954981415, "grad_norm": 0.4613230228424072, "learning_rate": 8.666021644784922e-06, "loss": 0.4072, "step": 16557 }, { "epoch": 0.7598549860033958, "grad_norm": 0.43367594480514526, "learning_rate": 8.665854910542953e-06, "loss": 0.3635, "step": 16558 }, { "epoch": 0.7599008765086503, "grad_norm": 0.46060261130332947, "learning_rate": 8.665688167485755e-06, "loss": 0.4287, "step": 16559 }, { "epoch": 0.7599467670139048, "grad_norm": 0.4652125835418701, "learning_rate": 8.665521415613739e-06, "loss": 0.4072, "step": 16560 }, { "epoch": 0.7599926575191592, "grad_norm": 0.4747850000858307, "learning_rate": 8.665354654927297e-06, "loss": 0.4742, "step": 16561 }, { "epoch": 0.7600385480244137, "grad_norm": 0.43166792392730713, "learning_rate": 8.665187885426836e-06, "loss": 0.3164, "step": 16562 }, { "epoch": 0.7600844385296682, "grad_norm": 0.4606620967388153, "learning_rate": 8.665021107112752e-06, "loss": 0.4316, "step": 16563 }, { "epoch": 0.7601303290349227, "grad_norm": 0.45336636900901794, "learning_rate": 8.664854319985452e-06, "loss": 0.4017, "step": 16564 }, { "epoch": 0.7601762195401771, "grad_norm": 0.4771563410758972, "learning_rate": 8.664687524045333e-06, "loss": 0.428, "step": 16565 }, { "epoch": 0.7602221100454316, "grad_norm": 0.4496387243270874, "learning_rate": 8.664520719292797e-06, "loss": 0.3962, "step": 16566 }, { "epoch": 0.7602680005506861, "grad_norm": 0.4229981601238251, "learning_rate": 8.664353905728245e-06, "loss": 0.3603, "step": 16567 }, { "epoch": 0.7603138910559405, "grad_norm": 0.4515206217765808, "learning_rate": 8.664187083352081e-06, "loss": 0.3802, "step": 16568 }, { "epoch": 0.760359781561195, "grad_norm": 0.4783252477645874, "learning_rate": 8.6640202521647e-06, "loss": 0.5008, "step": 16569 }, { "epoch": 0.7604056720664495, "grad_norm": 0.39518433809280396, "learning_rate": 8.663853412166508e-06, "loss": 0.3421, "step": 16570 }, { "epoch": 0.7604515625717039, "grad_norm": 0.4148382246494293, "learning_rate": 8.663686563357905e-06, "loss": 0.3316, "step": 16571 }, { "epoch": 0.7604974530769584, "grad_norm": 0.46770212054252625, "learning_rate": 8.663519705739291e-06, "loss": 0.4911, "step": 16572 }, { "epoch": 0.7605433435822129, "grad_norm": 0.4535134434700012, "learning_rate": 8.66335283931107e-06, "loss": 0.3615, "step": 16573 }, { "epoch": 0.7605892340874673, "grad_norm": 0.4684820771217346, "learning_rate": 8.663185964073642e-06, "loss": 0.4815, "step": 16574 }, { "epoch": 0.7606351245927218, "grad_norm": 0.47132790088653564, "learning_rate": 8.663019080027405e-06, "loss": 0.4779, "step": 16575 }, { "epoch": 0.7606810150979763, "grad_norm": 0.42527490854263306, "learning_rate": 8.662852187172765e-06, "loss": 0.3648, "step": 16576 }, { "epoch": 0.7607269056032306, "grad_norm": 0.4820786714553833, "learning_rate": 8.662685285510122e-06, "loss": 0.5175, "step": 16577 }, { "epoch": 0.7607727961084851, "grad_norm": 0.4523543417453766, "learning_rate": 8.662518375039877e-06, "loss": 0.4059, "step": 16578 }, { "epoch": 0.7608186866137396, "grad_norm": 0.45798397064208984, "learning_rate": 8.66235145576243e-06, "loss": 0.3828, "step": 16579 }, { "epoch": 0.7608645771189941, "grad_norm": 0.4404222071170807, "learning_rate": 8.662184527678183e-06, "loss": 0.3581, "step": 16580 }, { "epoch": 0.7609104676242485, "grad_norm": 0.47726085782051086, "learning_rate": 8.66201759078754e-06, "loss": 0.3756, "step": 16581 }, { "epoch": 0.760956358129503, "grad_norm": 0.4335137605667114, "learning_rate": 8.6618506450909e-06, "loss": 0.3604, "step": 16582 }, { "epoch": 0.7610022486347575, "grad_norm": 0.4485705494880676, "learning_rate": 8.661683690588665e-06, "loss": 0.3982, "step": 16583 }, { "epoch": 0.7610481391400119, "grad_norm": 0.45291033387184143, "learning_rate": 8.661516727281235e-06, "loss": 0.3807, "step": 16584 }, { "epoch": 0.7610940296452664, "grad_norm": 0.4448527991771698, "learning_rate": 8.661349755169016e-06, "loss": 0.3799, "step": 16585 }, { "epoch": 0.7611399201505209, "grad_norm": 0.44245779514312744, "learning_rate": 8.661182774252402e-06, "loss": 0.3932, "step": 16586 }, { "epoch": 0.7611858106557753, "grad_norm": 0.4559962749481201, "learning_rate": 8.661015784531802e-06, "loss": 0.414, "step": 16587 }, { "epoch": 0.7612317011610298, "grad_norm": 0.4283487796783447, "learning_rate": 8.660848786007615e-06, "loss": 0.3268, "step": 16588 }, { "epoch": 0.7612775916662843, "grad_norm": 0.4997137188911438, "learning_rate": 8.660681778680239e-06, "loss": 0.4902, "step": 16589 }, { "epoch": 0.7613234821715387, "grad_norm": 0.5060127377510071, "learning_rate": 8.66051476255008e-06, "loss": 0.521, "step": 16590 }, { "epoch": 0.7613693726767932, "grad_norm": 0.43822968006134033, "learning_rate": 8.66034773761754e-06, "loss": 0.3736, "step": 16591 }, { "epoch": 0.7614152631820477, "grad_norm": 0.4695410132408142, "learning_rate": 8.660180703883016e-06, "loss": 0.3914, "step": 16592 }, { "epoch": 0.761461153687302, "grad_norm": 0.44119271636009216, "learning_rate": 8.660013661346915e-06, "loss": 0.3476, "step": 16593 }, { "epoch": 0.7615070441925565, "grad_norm": 0.4380929172039032, "learning_rate": 8.659846610009636e-06, "loss": 0.3806, "step": 16594 }, { "epoch": 0.761552934697811, "grad_norm": 0.4260578453540802, "learning_rate": 8.65967954987158e-06, "loss": 0.3251, "step": 16595 }, { "epoch": 0.7615988252030654, "grad_norm": 0.47210654616355896, "learning_rate": 8.65951248093315e-06, "loss": 0.4451, "step": 16596 }, { "epoch": 0.7616447157083199, "grad_norm": 0.4752641022205353, "learning_rate": 8.659345403194747e-06, "loss": 0.3797, "step": 16597 }, { "epoch": 0.7616906062135744, "grad_norm": 0.4741351902484894, "learning_rate": 8.659178316656776e-06, "loss": 0.4153, "step": 16598 }, { "epoch": 0.7617364967188289, "grad_norm": 0.46650680899620056, "learning_rate": 8.659011221319633e-06, "loss": 0.4135, "step": 16599 }, { "epoch": 0.7617823872240833, "grad_norm": 0.439491331577301, "learning_rate": 8.658844117183725e-06, "loss": 0.3495, "step": 16600 }, { "epoch": 0.7618282777293378, "grad_norm": 0.4569900929927826, "learning_rate": 8.65867700424945e-06, "loss": 0.4482, "step": 16601 }, { "epoch": 0.7618741682345923, "grad_norm": 0.4787359833717346, "learning_rate": 8.65850988251721e-06, "loss": 0.5269, "step": 16602 }, { "epoch": 0.7619200587398467, "grad_norm": 0.48187947273254395, "learning_rate": 8.658342751987412e-06, "loss": 0.4349, "step": 16603 }, { "epoch": 0.7619659492451012, "grad_norm": 0.45106056332588196, "learning_rate": 8.658175612660452e-06, "loss": 0.4194, "step": 16604 }, { "epoch": 0.7620118397503557, "grad_norm": 0.47324132919311523, "learning_rate": 8.658008464536735e-06, "loss": 0.4265, "step": 16605 }, { "epoch": 0.7620577302556101, "grad_norm": 0.49539393186569214, "learning_rate": 8.657841307616662e-06, "loss": 0.4765, "step": 16606 }, { "epoch": 0.7621036207608646, "grad_norm": 0.4333312511444092, "learning_rate": 8.657674141900635e-06, "loss": 0.3488, "step": 16607 }, { "epoch": 0.7621495112661191, "grad_norm": 0.4328710436820984, "learning_rate": 8.657506967389056e-06, "loss": 0.369, "step": 16608 }, { "epoch": 0.7621954017713735, "grad_norm": 0.44848158955574036, "learning_rate": 8.657339784082329e-06, "loss": 0.3697, "step": 16609 }, { "epoch": 0.762241292276628, "grad_norm": 0.463376522064209, "learning_rate": 8.65717259198085e-06, "loss": 0.3678, "step": 16610 }, { "epoch": 0.7622871827818825, "grad_norm": 0.4599843919277191, "learning_rate": 8.657005391085028e-06, "loss": 0.4161, "step": 16611 }, { "epoch": 0.7623330732871368, "grad_norm": 0.4671322703361511, "learning_rate": 8.656838181395262e-06, "loss": 0.4055, "step": 16612 }, { "epoch": 0.7623789637923913, "grad_norm": 0.4879436790943146, "learning_rate": 8.656670962911953e-06, "loss": 0.5166, "step": 16613 }, { "epoch": 0.7624248542976458, "grad_norm": 0.48430460691452026, "learning_rate": 8.656503735635507e-06, "loss": 0.4219, "step": 16614 }, { "epoch": 0.7624707448029003, "grad_norm": 0.45655158162117004, "learning_rate": 8.656336499566321e-06, "loss": 0.4095, "step": 16615 }, { "epoch": 0.7625166353081547, "grad_norm": 0.5079159736633301, "learning_rate": 8.6561692547048e-06, "loss": 0.4591, "step": 16616 }, { "epoch": 0.7625625258134092, "grad_norm": 0.4365522563457489, "learning_rate": 8.656002001051346e-06, "loss": 0.3645, "step": 16617 }, { "epoch": 0.7626084163186637, "grad_norm": 0.4415024220943451, "learning_rate": 8.655834738606363e-06, "loss": 0.3685, "step": 16618 }, { "epoch": 0.7626543068239181, "grad_norm": 0.4379120171070099, "learning_rate": 8.65566746737025e-06, "loss": 0.3622, "step": 16619 }, { "epoch": 0.7627001973291726, "grad_norm": 0.4383671283721924, "learning_rate": 8.65550018734341e-06, "loss": 0.3937, "step": 16620 }, { "epoch": 0.7627460878344271, "grad_norm": 0.4518575966358185, "learning_rate": 8.655332898526246e-06, "loss": 0.4059, "step": 16621 }, { "epoch": 0.7627919783396815, "grad_norm": 0.4578760862350464, "learning_rate": 8.65516560091916e-06, "loss": 0.4229, "step": 16622 }, { "epoch": 0.762837868844936, "grad_norm": 0.46214017271995544, "learning_rate": 8.654998294522553e-06, "loss": 0.4434, "step": 16623 }, { "epoch": 0.7628837593501905, "grad_norm": 0.4209538996219635, "learning_rate": 8.654830979336833e-06, "loss": 0.3403, "step": 16624 }, { "epoch": 0.7629296498554449, "grad_norm": 0.44051656126976013, "learning_rate": 8.654663655362394e-06, "loss": 0.3712, "step": 16625 }, { "epoch": 0.7629755403606994, "grad_norm": 0.4441680312156677, "learning_rate": 8.654496322599644e-06, "loss": 0.3734, "step": 16626 }, { "epoch": 0.7630214308659539, "grad_norm": 0.4254602789878845, "learning_rate": 8.654328981048983e-06, "loss": 0.3596, "step": 16627 }, { "epoch": 0.7630673213712083, "grad_norm": 0.44044029712677, "learning_rate": 8.654161630710813e-06, "loss": 0.3769, "step": 16628 }, { "epoch": 0.7631132118764627, "grad_norm": 0.46766823530197144, "learning_rate": 8.65399427158554e-06, "loss": 0.4027, "step": 16629 }, { "epoch": 0.7631591023817172, "grad_norm": 0.43795838952064514, "learning_rate": 8.653826903673564e-06, "loss": 0.3841, "step": 16630 }, { "epoch": 0.7632049928869716, "grad_norm": 0.6902680397033691, "learning_rate": 8.653659526975286e-06, "loss": 0.358, "step": 16631 }, { "epoch": 0.7632508833922261, "grad_norm": 0.4811858832836151, "learning_rate": 8.653492141491108e-06, "loss": 0.4587, "step": 16632 }, { "epoch": 0.7632967738974806, "grad_norm": 0.4566099941730499, "learning_rate": 8.653324747221439e-06, "loss": 0.4051, "step": 16633 }, { "epoch": 0.7633426644027351, "grad_norm": 0.46556583046913147, "learning_rate": 8.653157344166674e-06, "loss": 0.4038, "step": 16634 }, { "epoch": 0.7633885549079895, "grad_norm": 0.48172369599342346, "learning_rate": 8.652989932327218e-06, "loss": 0.435, "step": 16635 }, { "epoch": 0.763434445413244, "grad_norm": 0.43391183018684387, "learning_rate": 8.652822511703477e-06, "loss": 0.3315, "step": 16636 }, { "epoch": 0.7634803359184985, "grad_norm": 0.4354071319103241, "learning_rate": 8.652655082295849e-06, "loss": 0.3412, "step": 16637 }, { "epoch": 0.7635262264237529, "grad_norm": 0.4992811679840088, "learning_rate": 8.65248764410474e-06, "loss": 0.4329, "step": 16638 }, { "epoch": 0.7635721169290074, "grad_norm": 0.4186078608036041, "learning_rate": 8.652320197130549e-06, "loss": 0.3361, "step": 16639 }, { "epoch": 0.7636180074342619, "grad_norm": 0.43802008032798767, "learning_rate": 8.65215274137368e-06, "loss": 0.341, "step": 16640 }, { "epoch": 0.7636638979395163, "grad_norm": 0.4748854339122772, "learning_rate": 8.651985276834538e-06, "loss": 0.4741, "step": 16641 }, { "epoch": 0.7637097884447708, "grad_norm": 0.48368120193481445, "learning_rate": 8.651817803513523e-06, "loss": 0.5021, "step": 16642 }, { "epoch": 0.7637556789500253, "grad_norm": 0.45552507042884827, "learning_rate": 8.65165032141104e-06, "loss": 0.4288, "step": 16643 }, { "epoch": 0.7638015694552797, "grad_norm": 0.4552462697029114, "learning_rate": 8.65148283052749e-06, "loss": 0.4514, "step": 16644 }, { "epoch": 0.7638474599605342, "grad_norm": 0.4808780550956726, "learning_rate": 8.651315330863276e-06, "loss": 0.4657, "step": 16645 }, { "epoch": 0.7638933504657887, "grad_norm": 0.4388098120689392, "learning_rate": 8.651147822418801e-06, "loss": 0.3926, "step": 16646 }, { "epoch": 0.763939240971043, "grad_norm": 0.4399193823337555, "learning_rate": 8.650980305194468e-06, "loss": 0.345, "step": 16647 }, { "epoch": 0.7639851314762975, "grad_norm": 0.4355769753456116, "learning_rate": 8.650812779190678e-06, "loss": 0.3043, "step": 16648 }, { "epoch": 0.764031021981552, "grad_norm": 0.5149824619293213, "learning_rate": 8.650645244407838e-06, "loss": 0.4716, "step": 16649 }, { "epoch": 0.7640769124868064, "grad_norm": 0.4733157753944397, "learning_rate": 8.650477700846346e-06, "loss": 0.4006, "step": 16650 }, { "epoch": 0.7641228029920609, "grad_norm": 0.4078599810600281, "learning_rate": 8.650310148506609e-06, "loss": 0.2985, "step": 16651 }, { "epoch": 0.7641686934973154, "grad_norm": 0.477463036775589, "learning_rate": 8.65014258738903e-06, "loss": 0.4361, "step": 16652 }, { "epoch": 0.7642145840025699, "grad_norm": 0.44911378622055054, "learning_rate": 8.649975017494005e-06, "loss": 0.3763, "step": 16653 }, { "epoch": 0.7642604745078243, "grad_norm": 0.46447426080703735, "learning_rate": 8.649807438821946e-06, "loss": 0.4093, "step": 16654 }, { "epoch": 0.7643063650130788, "grad_norm": 0.4102897346019745, "learning_rate": 8.64963985137325e-06, "loss": 0.35, "step": 16655 }, { "epoch": 0.7643522555183333, "grad_norm": 0.4692215025424957, "learning_rate": 8.649472255148324e-06, "loss": 0.4715, "step": 16656 }, { "epoch": 0.7643981460235877, "grad_norm": 0.4415777325630188, "learning_rate": 8.649304650147567e-06, "loss": 0.401, "step": 16657 }, { "epoch": 0.7644440365288422, "grad_norm": 0.4136008322238922, "learning_rate": 8.649137036371386e-06, "loss": 0.355, "step": 16658 }, { "epoch": 0.7644899270340967, "grad_norm": 0.44934991002082825, "learning_rate": 8.64896941382018e-06, "loss": 0.3894, "step": 16659 }, { "epoch": 0.7645358175393511, "grad_norm": 0.45560553669929504, "learning_rate": 8.648801782494356e-06, "loss": 0.3785, "step": 16660 }, { "epoch": 0.7645817080446056, "grad_norm": 0.44760313630104065, "learning_rate": 8.648634142394314e-06, "loss": 0.3738, "step": 16661 }, { "epoch": 0.7646275985498601, "grad_norm": 0.4740528166294098, "learning_rate": 8.64846649352046e-06, "loss": 0.4697, "step": 16662 }, { "epoch": 0.7646734890551145, "grad_norm": 0.4445270597934723, "learning_rate": 8.648298835873194e-06, "loss": 0.4285, "step": 16663 }, { "epoch": 0.764719379560369, "grad_norm": 0.4388747811317444, "learning_rate": 8.648131169452923e-06, "loss": 0.3598, "step": 16664 }, { "epoch": 0.7647652700656234, "grad_norm": 0.4456874430179596, "learning_rate": 8.647963494260045e-06, "loss": 0.4431, "step": 16665 }, { "epoch": 0.7648111605708778, "grad_norm": 0.5008742809295654, "learning_rate": 8.647795810294968e-06, "loss": 0.5462, "step": 16666 }, { "epoch": 0.7648570510761323, "grad_norm": 0.4694848954677582, "learning_rate": 8.647628117558093e-06, "loss": 0.4428, "step": 16667 }, { "epoch": 0.7649029415813868, "grad_norm": 0.4653250277042389, "learning_rate": 8.647460416049823e-06, "loss": 0.4335, "step": 16668 }, { "epoch": 0.7649488320866413, "grad_norm": 0.4807059168815613, "learning_rate": 8.647292705770563e-06, "loss": 0.5, "step": 16669 }, { "epoch": 0.7649947225918957, "grad_norm": 0.48507270216941833, "learning_rate": 8.647124986720714e-06, "loss": 0.4982, "step": 16670 }, { "epoch": 0.7650406130971502, "grad_norm": 0.4438900947570801, "learning_rate": 8.646957258900682e-06, "loss": 0.4102, "step": 16671 }, { "epoch": 0.7650865036024047, "grad_norm": 0.41129499673843384, "learning_rate": 8.646789522310866e-06, "loss": 0.2967, "step": 16672 }, { "epoch": 0.7651323941076591, "grad_norm": 0.44413602352142334, "learning_rate": 8.646621776951674e-06, "loss": 0.3596, "step": 16673 }, { "epoch": 0.7651782846129136, "grad_norm": 0.4327761232852936, "learning_rate": 8.646454022823508e-06, "loss": 0.3422, "step": 16674 }, { "epoch": 0.7652241751181681, "grad_norm": 0.4554285705089569, "learning_rate": 8.64628625992677e-06, "loss": 0.4242, "step": 16675 }, { "epoch": 0.7652700656234225, "grad_norm": 0.47412264347076416, "learning_rate": 8.646118488261866e-06, "loss": 0.4382, "step": 16676 }, { "epoch": 0.765315956128677, "grad_norm": 0.4398435056209564, "learning_rate": 8.645950707829194e-06, "loss": 0.3474, "step": 16677 }, { "epoch": 0.7653618466339315, "grad_norm": 0.4326326847076416, "learning_rate": 8.645782918629165e-06, "loss": 0.3432, "step": 16678 }, { "epoch": 0.7654077371391859, "grad_norm": 0.4290851950645447, "learning_rate": 8.645615120662176e-06, "loss": 0.3891, "step": 16679 }, { "epoch": 0.7654536276444404, "grad_norm": 0.4184976816177368, "learning_rate": 8.645447313928636e-06, "loss": 0.3359, "step": 16680 }, { "epoch": 0.7654995181496949, "grad_norm": 0.4594894349575043, "learning_rate": 8.645279498428944e-06, "loss": 0.3823, "step": 16681 }, { "epoch": 0.7655454086549492, "grad_norm": 0.4451071321964264, "learning_rate": 8.645111674163506e-06, "loss": 0.3567, "step": 16682 }, { "epoch": 0.7655912991602037, "grad_norm": 0.462082177400589, "learning_rate": 8.644943841132724e-06, "loss": 0.413, "step": 16683 }, { "epoch": 0.7656371896654582, "grad_norm": 0.4698992073535919, "learning_rate": 8.644775999337004e-06, "loss": 0.4484, "step": 16684 }, { "epoch": 0.7656830801707126, "grad_norm": 0.4230175018310547, "learning_rate": 8.644608148776745e-06, "loss": 0.3427, "step": 16685 }, { "epoch": 0.7657289706759671, "grad_norm": 0.44336095452308655, "learning_rate": 8.644440289452357e-06, "loss": 0.3648, "step": 16686 }, { "epoch": 0.7657748611812216, "grad_norm": 0.4543813169002533, "learning_rate": 8.644272421364239e-06, "loss": 0.3752, "step": 16687 }, { "epoch": 0.7658207516864761, "grad_norm": 0.4402708411216736, "learning_rate": 8.644104544512796e-06, "loss": 0.3667, "step": 16688 }, { "epoch": 0.7658666421917305, "grad_norm": 0.410935640335083, "learning_rate": 8.643936658898431e-06, "loss": 0.2947, "step": 16689 }, { "epoch": 0.765912532696985, "grad_norm": 0.585806667804718, "learning_rate": 8.64376876452155e-06, "loss": 0.3698, "step": 16690 }, { "epoch": 0.7659584232022395, "grad_norm": 0.4716147482395172, "learning_rate": 8.643600861382554e-06, "loss": 0.375, "step": 16691 }, { "epoch": 0.7660043137074939, "grad_norm": 0.46725332736968994, "learning_rate": 8.643432949481847e-06, "loss": 0.3955, "step": 16692 }, { "epoch": 0.7660502042127484, "grad_norm": 0.4907747209072113, "learning_rate": 8.643265028819837e-06, "loss": 0.4524, "step": 16693 }, { "epoch": 0.7660960947180029, "grad_norm": 0.45562073588371277, "learning_rate": 8.643097099396921e-06, "loss": 0.4352, "step": 16694 }, { "epoch": 0.7661419852232573, "grad_norm": 0.4529406726360321, "learning_rate": 8.642929161213507e-06, "loss": 0.4104, "step": 16695 }, { "epoch": 0.7661878757285118, "grad_norm": 0.45797809958457947, "learning_rate": 8.642761214269999e-06, "loss": 0.3875, "step": 16696 }, { "epoch": 0.7662337662337663, "grad_norm": 0.4381345808506012, "learning_rate": 8.642593258566799e-06, "loss": 0.3322, "step": 16697 }, { "epoch": 0.7662796567390207, "grad_norm": 0.47425922751426697, "learning_rate": 8.642425294104312e-06, "loss": 0.4524, "step": 16698 }, { "epoch": 0.7663255472442752, "grad_norm": 0.4533551335334778, "learning_rate": 8.642257320882942e-06, "loss": 0.4062, "step": 16699 }, { "epoch": 0.7663714377495296, "grad_norm": 0.43710264563560486, "learning_rate": 8.642089338903093e-06, "loss": 0.3433, "step": 16700 }, { "epoch": 0.766417328254784, "grad_norm": 0.4641318917274475, "learning_rate": 8.64192134816517e-06, "loss": 0.3851, "step": 16701 }, { "epoch": 0.7664632187600385, "grad_norm": 0.5152125954627991, "learning_rate": 8.641753348669573e-06, "loss": 0.5483, "step": 16702 }, { "epoch": 0.766509109265293, "grad_norm": 0.4314947724342346, "learning_rate": 8.64158534041671e-06, "loss": 0.3106, "step": 16703 }, { "epoch": 0.7665549997705474, "grad_norm": 0.45960569381713867, "learning_rate": 8.641417323406983e-06, "loss": 0.4333, "step": 16704 }, { "epoch": 0.7666008902758019, "grad_norm": 0.5077401399612427, "learning_rate": 8.641249297640796e-06, "loss": 0.4126, "step": 16705 }, { "epoch": 0.7666467807810564, "grad_norm": 0.5035547614097595, "learning_rate": 8.641081263118556e-06, "loss": 0.4219, "step": 16706 }, { "epoch": 0.7666926712863109, "grad_norm": 0.6299911141395569, "learning_rate": 8.640913219840664e-06, "loss": 0.5065, "step": 16707 }, { "epoch": 0.7667385617915653, "grad_norm": 0.4525778293609619, "learning_rate": 8.640745167807523e-06, "loss": 0.4516, "step": 16708 }, { "epoch": 0.7667844522968198, "grad_norm": 0.4584752023220062, "learning_rate": 8.640577107019538e-06, "loss": 0.3727, "step": 16709 }, { "epoch": 0.7668303428020743, "grad_norm": 0.4778797924518585, "learning_rate": 8.640409037477118e-06, "loss": 0.4252, "step": 16710 }, { "epoch": 0.7668762333073287, "grad_norm": 0.5247454643249512, "learning_rate": 8.64024095918066e-06, "loss": 0.5022, "step": 16711 }, { "epoch": 0.7669221238125832, "grad_norm": 0.45136985182762146, "learning_rate": 8.640072872130573e-06, "loss": 0.4008, "step": 16712 }, { "epoch": 0.7669680143178377, "grad_norm": 0.4450985789299011, "learning_rate": 8.63990477632726e-06, "loss": 0.3848, "step": 16713 }, { "epoch": 0.7670139048230921, "grad_norm": 0.44062259793281555, "learning_rate": 8.639736671771122e-06, "loss": 0.3586, "step": 16714 }, { "epoch": 0.7670597953283466, "grad_norm": 0.44705069065093994, "learning_rate": 8.639568558462568e-06, "loss": 0.3808, "step": 16715 }, { "epoch": 0.7671056858336011, "grad_norm": 0.473104864358902, "learning_rate": 8.639400436402002e-06, "loss": 0.4683, "step": 16716 }, { "epoch": 0.7671515763388554, "grad_norm": 0.45324209332466125, "learning_rate": 8.639232305589824e-06, "loss": 0.3779, "step": 16717 }, { "epoch": 0.7671974668441099, "grad_norm": 0.46641144156455994, "learning_rate": 8.63906416602644e-06, "loss": 0.3826, "step": 16718 }, { "epoch": 0.7672433573493644, "grad_norm": 0.46970435976982117, "learning_rate": 8.638896017712257e-06, "loss": 0.4369, "step": 16719 }, { "epoch": 0.7672892478546188, "grad_norm": 0.4547556936740875, "learning_rate": 8.638727860647678e-06, "loss": 0.3491, "step": 16720 }, { "epoch": 0.7673351383598733, "grad_norm": 0.4516390860080719, "learning_rate": 8.638559694833105e-06, "loss": 0.3725, "step": 16721 }, { "epoch": 0.7673810288651278, "grad_norm": 0.46866875886917114, "learning_rate": 8.638391520268946e-06, "loss": 0.3657, "step": 16722 }, { "epoch": 0.7674269193703823, "grad_norm": 0.43218621611595154, "learning_rate": 8.638223336955604e-06, "loss": 0.3285, "step": 16723 }, { "epoch": 0.7674728098756367, "grad_norm": 0.44133177399635315, "learning_rate": 8.638055144893483e-06, "loss": 0.3942, "step": 16724 }, { "epoch": 0.7675187003808912, "grad_norm": 0.4587036073207855, "learning_rate": 8.637886944082986e-06, "loss": 0.4121, "step": 16725 }, { "epoch": 0.7675645908861457, "grad_norm": 0.4630714952945709, "learning_rate": 8.637718734524522e-06, "loss": 0.327, "step": 16726 }, { "epoch": 0.7676104813914001, "grad_norm": 0.45840924978256226, "learning_rate": 8.63755051621849e-06, "loss": 0.4179, "step": 16727 }, { "epoch": 0.7676563718966546, "grad_norm": 0.445102721452713, "learning_rate": 8.637382289165299e-06, "loss": 0.4043, "step": 16728 }, { "epoch": 0.7677022624019091, "grad_norm": 0.4620171785354614, "learning_rate": 8.63721405336535e-06, "loss": 0.4234, "step": 16729 }, { "epoch": 0.7677481529071635, "grad_norm": 0.46046048402786255, "learning_rate": 8.63704580881905e-06, "loss": 0.3923, "step": 16730 }, { "epoch": 0.767794043412418, "grad_norm": 0.47586095333099365, "learning_rate": 8.636877555526801e-06, "loss": 0.4441, "step": 16731 }, { "epoch": 0.7678399339176725, "grad_norm": 0.434968501329422, "learning_rate": 8.636709293489011e-06, "loss": 0.3277, "step": 16732 }, { "epoch": 0.7678858244229269, "grad_norm": 0.4700027108192444, "learning_rate": 8.636541022706082e-06, "loss": 0.4437, "step": 16733 }, { "epoch": 0.7679317149281814, "grad_norm": 0.44369786977767944, "learning_rate": 8.63637274317842e-06, "loss": 0.3821, "step": 16734 }, { "epoch": 0.7679776054334359, "grad_norm": 0.4425202012062073, "learning_rate": 8.63620445490643e-06, "loss": 0.4142, "step": 16735 }, { "epoch": 0.7680234959386902, "grad_norm": 0.4245244264602661, "learning_rate": 8.636036157890514e-06, "loss": 0.3616, "step": 16736 }, { "epoch": 0.7680693864439447, "grad_norm": 0.43719521164894104, "learning_rate": 8.635867852131079e-06, "loss": 0.364, "step": 16737 }, { "epoch": 0.7681152769491992, "grad_norm": 0.44025149941444397, "learning_rate": 8.63569953762853e-06, "loss": 0.3419, "step": 16738 }, { "epoch": 0.7681611674544536, "grad_norm": 0.42897433042526245, "learning_rate": 8.635531214383272e-06, "loss": 0.3867, "step": 16739 }, { "epoch": 0.7682070579597081, "grad_norm": 0.4870644807815552, "learning_rate": 8.635362882395709e-06, "loss": 0.4401, "step": 16740 }, { "epoch": 0.7682529484649626, "grad_norm": 0.4196176826953888, "learning_rate": 8.635194541666243e-06, "loss": 0.3293, "step": 16741 }, { "epoch": 0.7682988389702171, "grad_norm": 0.45129823684692383, "learning_rate": 8.635026192195283e-06, "loss": 0.4493, "step": 16742 }, { "epoch": 0.7683447294754715, "grad_norm": 0.44881734251976013, "learning_rate": 8.634857833983232e-06, "loss": 0.4094, "step": 16743 }, { "epoch": 0.768390619980726, "grad_norm": 0.47999757528305054, "learning_rate": 8.634689467030496e-06, "loss": 0.4797, "step": 16744 }, { "epoch": 0.7684365104859805, "grad_norm": 0.4394710659980774, "learning_rate": 8.634521091337478e-06, "loss": 0.3692, "step": 16745 }, { "epoch": 0.7684824009912349, "grad_norm": 0.45004144310951233, "learning_rate": 8.634352706904584e-06, "loss": 0.3982, "step": 16746 }, { "epoch": 0.7685282914964894, "grad_norm": 0.48920729756355286, "learning_rate": 8.634184313732219e-06, "loss": 0.4488, "step": 16747 }, { "epoch": 0.7685741820017439, "grad_norm": 0.47797054052352905, "learning_rate": 8.634015911820788e-06, "loss": 0.3946, "step": 16748 }, { "epoch": 0.7686200725069983, "grad_norm": 0.4358716309070587, "learning_rate": 8.633847501170696e-06, "loss": 0.3434, "step": 16749 }, { "epoch": 0.7686659630122528, "grad_norm": 0.5199787020683289, "learning_rate": 8.633679081782347e-06, "loss": 0.5065, "step": 16750 }, { "epoch": 0.7687118535175073, "grad_norm": 0.47950631380081177, "learning_rate": 8.633510653656146e-06, "loss": 0.4067, "step": 16751 }, { "epoch": 0.7687577440227616, "grad_norm": 0.4889020621776581, "learning_rate": 8.633342216792498e-06, "loss": 0.4974, "step": 16752 }, { "epoch": 0.7688036345280161, "grad_norm": 0.5132023692131042, "learning_rate": 8.633173771191811e-06, "loss": 0.5192, "step": 16753 }, { "epoch": 0.7688495250332706, "grad_norm": 0.45993226766586304, "learning_rate": 8.633005316854486e-06, "loss": 0.4084, "step": 16754 }, { "epoch": 0.768895415538525, "grad_norm": 0.4833483397960663, "learning_rate": 8.63283685378093e-06, "loss": 0.4479, "step": 16755 }, { "epoch": 0.7689413060437795, "grad_norm": 0.4572613537311554, "learning_rate": 8.63266838197155e-06, "loss": 0.3786, "step": 16756 }, { "epoch": 0.768987196549034, "grad_norm": 0.4406229853630066, "learning_rate": 8.632499901426747e-06, "loss": 0.3842, "step": 16757 }, { "epoch": 0.7690330870542885, "grad_norm": 0.3853614628314972, "learning_rate": 8.632331412146928e-06, "loss": 0.244, "step": 16758 }, { "epoch": 0.7690789775595429, "grad_norm": 0.4902091324329376, "learning_rate": 8.6321629141325e-06, "loss": 0.449, "step": 16759 }, { "epoch": 0.7691248680647974, "grad_norm": 0.45169204473495483, "learning_rate": 8.631994407383865e-06, "loss": 0.4294, "step": 16760 }, { "epoch": 0.7691707585700519, "grad_norm": 0.49083441495895386, "learning_rate": 8.63182589190143e-06, "loss": 0.4061, "step": 16761 }, { "epoch": 0.7692166490753063, "grad_norm": 0.4819958209991455, "learning_rate": 8.6316573676856e-06, "loss": 0.3858, "step": 16762 }, { "epoch": 0.7692625395805608, "grad_norm": 0.4892674684524536, "learning_rate": 8.63148883473678e-06, "loss": 0.5148, "step": 16763 }, { "epoch": 0.7693084300858153, "grad_norm": 0.46496593952178955, "learning_rate": 8.631320293055376e-06, "loss": 0.3916, "step": 16764 }, { "epoch": 0.7693543205910697, "grad_norm": 0.45585861802101135, "learning_rate": 8.631151742641794e-06, "loss": 0.404, "step": 16765 }, { "epoch": 0.7694002110963242, "grad_norm": 0.43657055497169495, "learning_rate": 8.630983183496436e-06, "loss": 0.4274, "step": 16766 }, { "epoch": 0.7694461016015787, "grad_norm": 0.5027413964271545, "learning_rate": 8.630814615619711e-06, "loss": 0.5115, "step": 16767 }, { "epoch": 0.7694919921068331, "grad_norm": 0.5198984742164612, "learning_rate": 8.630646039012021e-06, "loss": 0.5225, "step": 16768 }, { "epoch": 0.7695378826120876, "grad_norm": 0.45199620723724365, "learning_rate": 8.630477453673774e-06, "loss": 0.3944, "step": 16769 }, { "epoch": 0.769583773117342, "grad_norm": 0.4292090833187103, "learning_rate": 8.630308859605374e-06, "loss": 0.3655, "step": 16770 }, { "epoch": 0.7696296636225964, "grad_norm": 0.4415838420391083, "learning_rate": 8.630140256807228e-06, "loss": 0.3388, "step": 16771 }, { "epoch": 0.7696755541278509, "grad_norm": 0.4891478717327118, "learning_rate": 8.62997164527974e-06, "loss": 0.4582, "step": 16772 }, { "epoch": 0.7697214446331054, "grad_norm": 0.4409283399581909, "learning_rate": 8.629803025023315e-06, "loss": 0.3891, "step": 16773 }, { "epoch": 0.7697673351383598, "grad_norm": 0.458002507686615, "learning_rate": 8.629634396038361e-06, "loss": 0.3758, "step": 16774 }, { "epoch": 0.7698132256436143, "grad_norm": 0.46311643719673157, "learning_rate": 8.629465758325282e-06, "loss": 0.4111, "step": 16775 }, { "epoch": 0.7698591161488688, "grad_norm": 0.48786279559135437, "learning_rate": 8.629297111884481e-06, "loss": 0.4008, "step": 16776 }, { "epoch": 0.7699050066541233, "grad_norm": 0.4939804673194885, "learning_rate": 8.629128456716369e-06, "loss": 0.4607, "step": 16777 }, { "epoch": 0.7699508971593777, "grad_norm": 0.4647766053676605, "learning_rate": 8.628959792821346e-06, "loss": 0.4244, "step": 16778 }, { "epoch": 0.7699967876646322, "grad_norm": 0.4827609062194824, "learning_rate": 8.62879112019982e-06, "loss": 0.4225, "step": 16779 }, { "epoch": 0.7700426781698867, "grad_norm": 0.45400139689445496, "learning_rate": 8.628622438852197e-06, "loss": 0.3897, "step": 16780 }, { "epoch": 0.7700885686751411, "grad_norm": 0.44279295206069946, "learning_rate": 8.628453748778882e-06, "loss": 0.4062, "step": 16781 }, { "epoch": 0.7701344591803956, "grad_norm": 0.4537462592124939, "learning_rate": 8.628285049980283e-06, "loss": 0.397, "step": 16782 }, { "epoch": 0.7701803496856501, "grad_norm": 0.4813184142112732, "learning_rate": 8.628116342456801e-06, "loss": 0.4178, "step": 16783 }, { "epoch": 0.7702262401909045, "grad_norm": 0.517454206943512, "learning_rate": 8.627947626208845e-06, "loss": 0.5322, "step": 16784 }, { "epoch": 0.770272130696159, "grad_norm": 0.4143744111061096, "learning_rate": 8.62777890123682e-06, "loss": 0.3441, "step": 16785 }, { "epoch": 0.7703180212014135, "grad_norm": 0.4471566677093506, "learning_rate": 8.627610167541134e-06, "loss": 0.3458, "step": 16786 }, { "epoch": 0.7703639117066678, "grad_norm": 0.479196697473526, "learning_rate": 8.627441425122187e-06, "loss": 0.4291, "step": 16787 }, { "epoch": 0.7704098022119223, "grad_norm": 0.436245322227478, "learning_rate": 8.62727267398039e-06, "loss": 0.3422, "step": 16788 }, { "epoch": 0.7704556927171768, "grad_norm": 0.517333984375, "learning_rate": 8.627103914116146e-06, "loss": 0.4887, "step": 16789 }, { "epoch": 0.7705015832224312, "grad_norm": 0.46458911895751953, "learning_rate": 8.626935145529863e-06, "loss": 0.4494, "step": 16790 }, { "epoch": 0.7705474737276857, "grad_norm": 0.5149855017662048, "learning_rate": 8.626766368221946e-06, "loss": 0.4964, "step": 16791 }, { "epoch": 0.7705933642329402, "grad_norm": 0.42306336760520935, "learning_rate": 8.626597582192799e-06, "loss": 0.2988, "step": 16792 }, { "epoch": 0.7706392547381946, "grad_norm": 0.4497124254703522, "learning_rate": 8.62642878744283e-06, "loss": 0.4026, "step": 16793 }, { "epoch": 0.7706851452434491, "grad_norm": 0.44155851006507874, "learning_rate": 8.626259983972446e-06, "loss": 0.3805, "step": 16794 }, { "epoch": 0.7707310357487036, "grad_norm": 0.4343738853931427, "learning_rate": 8.626091171782048e-06, "loss": 0.3773, "step": 16795 }, { "epoch": 0.7707769262539581, "grad_norm": 0.4640238881111145, "learning_rate": 8.625922350872047e-06, "loss": 0.4028, "step": 16796 }, { "epoch": 0.7708228167592125, "grad_norm": 0.4515562653541565, "learning_rate": 8.625753521242848e-06, "loss": 0.3748, "step": 16797 }, { "epoch": 0.770868707264467, "grad_norm": 0.4636695086956024, "learning_rate": 8.625584682894854e-06, "loss": 0.4203, "step": 16798 }, { "epoch": 0.7709145977697215, "grad_norm": 0.4181848168373108, "learning_rate": 8.625415835828475e-06, "loss": 0.3057, "step": 16799 }, { "epoch": 0.7709604882749759, "grad_norm": 0.426975280046463, "learning_rate": 8.625246980044112e-06, "loss": 0.3615, "step": 16800 }, { "epoch": 0.7710063787802304, "grad_norm": 0.5417707562446594, "learning_rate": 8.625078115542178e-06, "loss": 0.4595, "step": 16801 }, { "epoch": 0.7710522692854849, "grad_norm": 0.4715006649494171, "learning_rate": 8.624909242323074e-06, "loss": 0.4089, "step": 16802 }, { "epoch": 0.7710981597907393, "grad_norm": 0.5248382687568665, "learning_rate": 8.624740360387206e-06, "loss": 0.524, "step": 16803 }, { "epoch": 0.7711440502959938, "grad_norm": 0.4586992561817169, "learning_rate": 8.624571469734983e-06, "loss": 0.3935, "step": 16804 }, { "epoch": 0.7711899408012483, "grad_norm": 0.4734436869621277, "learning_rate": 8.624402570366808e-06, "loss": 0.4287, "step": 16805 }, { "epoch": 0.7712358313065026, "grad_norm": 0.4462924897670746, "learning_rate": 8.624233662283091e-06, "loss": 0.3858, "step": 16806 }, { "epoch": 0.7712817218117571, "grad_norm": 0.4268195331096649, "learning_rate": 8.624064745484235e-06, "loss": 0.3541, "step": 16807 }, { "epoch": 0.7713276123170116, "grad_norm": 0.4548666179180145, "learning_rate": 8.623895819970646e-06, "loss": 0.3857, "step": 16808 }, { "epoch": 0.771373502822266, "grad_norm": 0.4665895402431488, "learning_rate": 8.623726885742732e-06, "loss": 0.381, "step": 16809 }, { "epoch": 0.7714193933275205, "grad_norm": 0.440584272146225, "learning_rate": 8.623557942800898e-06, "loss": 0.3502, "step": 16810 }, { "epoch": 0.771465283832775, "grad_norm": 0.4469936192035675, "learning_rate": 8.623388991145552e-06, "loss": 0.3486, "step": 16811 }, { "epoch": 0.7715111743380295, "grad_norm": 0.4688628911972046, "learning_rate": 8.623220030777097e-06, "loss": 0.4345, "step": 16812 }, { "epoch": 0.7715570648432839, "grad_norm": 0.4318545460700989, "learning_rate": 8.623051061695943e-06, "loss": 0.3582, "step": 16813 }, { "epoch": 0.7716029553485384, "grad_norm": 0.44012364745140076, "learning_rate": 8.622882083902495e-06, "loss": 0.401, "step": 16814 }, { "epoch": 0.7716488458537929, "grad_norm": 0.44759029150009155, "learning_rate": 8.622713097397158e-06, "loss": 0.4351, "step": 16815 }, { "epoch": 0.7716947363590473, "grad_norm": 0.49206414818763733, "learning_rate": 8.622544102180339e-06, "loss": 0.4573, "step": 16816 }, { "epoch": 0.7717406268643018, "grad_norm": 0.417838990688324, "learning_rate": 8.622375098252446e-06, "loss": 0.3379, "step": 16817 }, { "epoch": 0.7717865173695563, "grad_norm": 0.4458671808242798, "learning_rate": 8.622206085613884e-06, "loss": 0.3685, "step": 16818 }, { "epoch": 0.7718324078748107, "grad_norm": 0.44375380873680115, "learning_rate": 8.622037064265058e-06, "loss": 0.354, "step": 16819 }, { "epoch": 0.7718782983800652, "grad_norm": 0.5009838342666626, "learning_rate": 8.621868034206377e-06, "loss": 0.459, "step": 16820 }, { "epoch": 0.7719241888853197, "grad_norm": 0.42498329281806946, "learning_rate": 8.621698995438248e-06, "loss": 0.3224, "step": 16821 }, { "epoch": 0.771970079390574, "grad_norm": 0.4551143944263458, "learning_rate": 8.621529947961073e-06, "loss": 0.4056, "step": 16822 }, { "epoch": 0.7720159698958285, "grad_norm": 0.4636278748512268, "learning_rate": 8.621360891775265e-06, "loss": 0.4314, "step": 16823 }, { "epoch": 0.772061860401083, "grad_norm": 0.45101889967918396, "learning_rate": 8.621191826881224e-06, "loss": 0.3415, "step": 16824 }, { "epoch": 0.7721077509063374, "grad_norm": 0.4125545024871826, "learning_rate": 8.62102275327936e-06, "loss": 0.3254, "step": 16825 }, { "epoch": 0.7721536414115919, "grad_norm": 0.42588305473327637, "learning_rate": 8.62085367097008e-06, "loss": 0.3132, "step": 16826 }, { "epoch": 0.7721995319168464, "grad_norm": 0.46967780590057373, "learning_rate": 8.620684579953789e-06, "loss": 0.4171, "step": 16827 }, { "epoch": 0.7722454224221008, "grad_norm": 0.473286896944046, "learning_rate": 8.620515480230895e-06, "loss": 0.466, "step": 16828 }, { "epoch": 0.7722913129273553, "grad_norm": 0.44213396310806274, "learning_rate": 8.620346371801804e-06, "loss": 0.402, "step": 16829 }, { "epoch": 0.7723372034326098, "grad_norm": 0.43393293023109436, "learning_rate": 8.620177254666921e-06, "loss": 0.3829, "step": 16830 }, { "epoch": 0.7723830939378643, "grad_norm": 0.44293439388275146, "learning_rate": 8.620008128826656e-06, "loss": 0.3679, "step": 16831 }, { "epoch": 0.7724289844431187, "grad_norm": 0.44425949454307556, "learning_rate": 8.619838994281415e-06, "loss": 0.3191, "step": 16832 }, { "epoch": 0.7724748749483732, "grad_norm": 0.4396025538444519, "learning_rate": 8.619669851031602e-06, "loss": 0.3403, "step": 16833 }, { "epoch": 0.7725207654536277, "grad_norm": 0.571470320224762, "learning_rate": 8.619500699077626e-06, "loss": 0.3282, "step": 16834 }, { "epoch": 0.7725666559588821, "grad_norm": 0.4428199231624603, "learning_rate": 8.61933153841989e-06, "loss": 0.3644, "step": 16835 }, { "epoch": 0.7726125464641366, "grad_norm": 0.4916760325431824, "learning_rate": 8.619162369058808e-06, "loss": 0.4579, "step": 16836 }, { "epoch": 0.7726584369693911, "grad_norm": 0.5163108110427856, "learning_rate": 8.618993190994782e-06, "loss": 0.4467, "step": 16837 }, { "epoch": 0.7727043274746455, "grad_norm": 0.4558572471141815, "learning_rate": 8.61882400422822e-06, "loss": 0.4037, "step": 16838 }, { "epoch": 0.7727502179799, "grad_norm": 0.4770214557647705, "learning_rate": 8.618654808759525e-06, "loss": 0.4112, "step": 16839 }, { "epoch": 0.7727961084851545, "grad_norm": 0.46561184525489807, "learning_rate": 8.61848560458911e-06, "loss": 0.4434, "step": 16840 }, { "epoch": 0.7728419989904088, "grad_norm": 0.42374444007873535, "learning_rate": 8.618316391717378e-06, "loss": 0.3482, "step": 16841 }, { "epoch": 0.7728878894956633, "grad_norm": 0.468965619802475, "learning_rate": 8.618147170144738e-06, "loss": 0.4067, "step": 16842 }, { "epoch": 0.7729337800009178, "grad_norm": 0.4533752202987671, "learning_rate": 8.617977939871595e-06, "loss": 0.3618, "step": 16843 }, { "epoch": 0.7729796705061722, "grad_norm": 0.471626877784729, "learning_rate": 8.617808700898359e-06, "loss": 0.4275, "step": 16844 }, { "epoch": 0.7730255610114267, "grad_norm": 0.4311619997024536, "learning_rate": 8.617639453225433e-06, "loss": 0.3534, "step": 16845 }, { "epoch": 0.7730714515166812, "grad_norm": 0.4441860318183899, "learning_rate": 8.617470196853226e-06, "loss": 0.3883, "step": 16846 }, { "epoch": 0.7731173420219357, "grad_norm": 0.4784258008003235, "learning_rate": 8.617300931782146e-06, "loss": 0.4544, "step": 16847 }, { "epoch": 0.7731632325271901, "grad_norm": 0.4597739577293396, "learning_rate": 8.617131658012599e-06, "loss": 0.4052, "step": 16848 }, { "epoch": 0.7732091230324446, "grad_norm": 0.4413967430591583, "learning_rate": 8.61696237554499e-06, "loss": 0.3515, "step": 16849 }, { "epoch": 0.7732550135376991, "grad_norm": 0.4443657100200653, "learning_rate": 8.61679308437973e-06, "loss": 0.408, "step": 16850 }, { "epoch": 0.7733009040429535, "grad_norm": 0.47268804907798767, "learning_rate": 8.616623784517224e-06, "loss": 0.3758, "step": 16851 }, { "epoch": 0.773346794548208, "grad_norm": 0.4530348479747772, "learning_rate": 8.616454475957878e-06, "loss": 0.4328, "step": 16852 }, { "epoch": 0.7733926850534625, "grad_norm": 0.45229071378707886, "learning_rate": 8.6162851587021e-06, "loss": 0.386, "step": 16853 }, { "epoch": 0.7734385755587169, "grad_norm": 0.45669665932655334, "learning_rate": 8.6161158327503e-06, "loss": 0.3584, "step": 16854 }, { "epoch": 0.7734844660639714, "grad_norm": 0.45503878593444824, "learning_rate": 8.61594649810288e-06, "loss": 0.3845, "step": 16855 }, { "epoch": 0.7735303565692259, "grad_norm": 0.42145493626594543, "learning_rate": 8.615777154760252e-06, "loss": 0.3327, "step": 16856 }, { "epoch": 0.7735762470744802, "grad_norm": 0.40514248609542847, "learning_rate": 8.61560780272282e-06, "loss": 0.2942, "step": 16857 }, { "epoch": 0.7736221375797347, "grad_norm": 0.4242538809776306, "learning_rate": 8.615438441990992e-06, "loss": 0.365, "step": 16858 }, { "epoch": 0.7736680280849892, "grad_norm": 0.4237339496612549, "learning_rate": 8.615269072565176e-06, "loss": 0.3319, "step": 16859 }, { "epoch": 0.7737139185902436, "grad_norm": 0.4476374089717865, "learning_rate": 8.615099694445778e-06, "loss": 0.3486, "step": 16860 }, { "epoch": 0.7737598090954981, "grad_norm": 0.4669581353664398, "learning_rate": 8.61493030763321e-06, "loss": 0.4397, "step": 16861 }, { "epoch": 0.7738056996007526, "grad_norm": 0.49119532108306885, "learning_rate": 8.614760912127871e-06, "loss": 0.4058, "step": 16862 }, { "epoch": 0.773851590106007, "grad_norm": 0.4685514569282532, "learning_rate": 8.614591507930174e-06, "loss": 0.3299, "step": 16863 }, { "epoch": 0.7738974806112615, "grad_norm": 0.4511096179485321, "learning_rate": 8.614422095040527e-06, "loss": 0.3647, "step": 16864 }, { "epoch": 0.773943371116516, "grad_norm": 0.5055418014526367, "learning_rate": 8.614252673459335e-06, "loss": 0.52, "step": 16865 }, { "epoch": 0.7739892616217705, "grad_norm": 0.45167604088783264, "learning_rate": 8.614083243187004e-06, "loss": 0.3475, "step": 16866 }, { "epoch": 0.7740351521270249, "grad_norm": 0.46586674451828003, "learning_rate": 8.613913804223946e-06, "loss": 0.4013, "step": 16867 }, { "epoch": 0.7740810426322794, "grad_norm": 0.4665861129760742, "learning_rate": 8.613744356570563e-06, "loss": 0.3975, "step": 16868 }, { "epoch": 0.7741269331375339, "grad_norm": 0.46501991152763367, "learning_rate": 8.613574900227269e-06, "loss": 0.3893, "step": 16869 }, { "epoch": 0.7741728236427883, "grad_norm": 0.4572458565235138, "learning_rate": 8.613405435194466e-06, "loss": 0.4155, "step": 16870 }, { "epoch": 0.7742187141480428, "grad_norm": 0.5149495005607605, "learning_rate": 8.613235961472562e-06, "loss": 0.4377, "step": 16871 }, { "epoch": 0.7742646046532973, "grad_norm": 0.46111032366752625, "learning_rate": 8.61306647906197e-06, "loss": 0.432, "step": 16872 }, { "epoch": 0.7743104951585517, "grad_norm": 0.40280014276504517, "learning_rate": 8.612896987963088e-06, "loss": 0.2958, "step": 16873 }, { "epoch": 0.7743563856638062, "grad_norm": 0.4556792080402374, "learning_rate": 8.612727488176333e-06, "loss": 0.3978, "step": 16874 }, { "epoch": 0.7744022761690607, "grad_norm": 0.4502858817577362, "learning_rate": 8.612557979702107e-06, "loss": 0.3888, "step": 16875 }, { "epoch": 0.774448166674315, "grad_norm": 0.4924439489841461, "learning_rate": 8.61238846254082e-06, "loss": 0.4787, "step": 16876 }, { "epoch": 0.7744940571795695, "grad_norm": 0.4544217884540558, "learning_rate": 8.612218936692878e-06, "loss": 0.3752, "step": 16877 }, { "epoch": 0.774539947684824, "grad_norm": 0.4419492483139038, "learning_rate": 8.61204940215869e-06, "loss": 0.3947, "step": 16878 }, { "epoch": 0.7745858381900784, "grad_norm": 0.47296708822250366, "learning_rate": 8.611879858938662e-06, "loss": 0.3819, "step": 16879 }, { "epoch": 0.7746317286953329, "grad_norm": 0.49265536665916443, "learning_rate": 8.611710307033204e-06, "loss": 0.4409, "step": 16880 }, { "epoch": 0.7746776192005874, "grad_norm": 0.4374864995479584, "learning_rate": 8.611540746442723e-06, "loss": 0.3715, "step": 16881 }, { "epoch": 0.7747235097058418, "grad_norm": 0.4849449396133423, "learning_rate": 8.611371177167625e-06, "loss": 0.3959, "step": 16882 }, { "epoch": 0.7747694002110963, "grad_norm": 0.4384102523326874, "learning_rate": 8.611201599208321e-06, "loss": 0.3721, "step": 16883 }, { "epoch": 0.7748152907163508, "grad_norm": 0.5025368928909302, "learning_rate": 8.611032012565214e-06, "loss": 0.5033, "step": 16884 }, { "epoch": 0.7748611812216053, "grad_norm": 0.46315592527389526, "learning_rate": 8.610862417238716e-06, "loss": 0.4067, "step": 16885 }, { "epoch": 0.7749070717268597, "grad_norm": 0.4575701951980591, "learning_rate": 8.610692813229235e-06, "loss": 0.4217, "step": 16886 }, { "epoch": 0.7749529622321142, "grad_norm": 0.4692322909832001, "learning_rate": 8.610523200537178e-06, "loss": 0.4057, "step": 16887 }, { "epoch": 0.7749988527373687, "grad_norm": 0.45591580867767334, "learning_rate": 8.61035357916295e-06, "loss": 0.3993, "step": 16888 }, { "epoch": 0.7750447432426231, "grad_norm": 0.42353740334510803, "learning_rate": 8.610183949106961e-06, "loss": 0.3249, "step": 16889 }, { "epoch": 0.7750906337478776, "grad_norm": 0.4491284489631653, "learning_rate": 8.610014310369618e-06, "loss": 0.4171, "step": 16890 }, { "epoch": 0.7751365242531321, "grad_norm": 0.4470716714859009, "learning_rate": 8.609844662951333e-06, "loss": 0.3534, "step": 16891 }, { "epoch": 0.7751824147583865, "grad_norm": 0.46116113662719727, "learning_rate": 8.60967500685251e-06, "loss": 0.4358, "step": 16892 }, { "epoch": 0.775228305263641, "grad_norm": 0.45538708567619324, "learning_rate": 8.609505342073556e-06, "loss": 0.3734, "step": 16893 }, { "epoch": 0.7752741957688954, "grad_norm": 0.440536767244339, "learning_rate": 8.609335668614882e-06, "loss": 0.3909, "step": 16894 }, { "epoch": 0.7753200862741498, "grad_norm": 0.4551977515220642, "learning_rate": 8.609165986476898e-06, "loss": 0.3655, "step": 16895 }, { "epoch": 0.7753659767794043, "grad_norm": 0.46975958347320557, "learning_rate": 8.608996295660004e-06, "loss": 0.3925, "step": 16896 }, { "epoch": 0.7754118672846588, "grad_norm": 0.48826202750205994, "learning_rate": 8.608826596164615e-06, "loss": 0.4516, "step": 16897 }, { "epoch": 0.7754577577899132, "grad_norm": 0.49547696113586426, "learning_rate": 8.608656887991136e-06, "loss": 0.4593, "step": 16898 }, { "epoch": 0.7755036482951677, "grad_norm": 0.4274131655693054, "learning_rate": 8.608487171139978e-06, "loss": 0.3203, "step": 16899 }, { "epoch": 0.7755495388004222, "grad_norm": 0.43693792819976807, "learning_rate": 8.608317445611547e-06, "loss": 0.4103, "step": 16900 }, { "epoch": 0.7755954293056767, "grad_norm": 0.40566906332969666, "learning_rate": 8.60814771140625e-06, "loss": 0.3196, "step": 16901 }, { "epoch": 0.7756413198109311, "grad_norm": 0.43336349725723267, "learning_rate": 8.607977968524497e-06, "loss": 0.3718, "step": 16902 }, { "epoch": 0.7756872103161856, "grad_norm": 0.48081040382385254, "learning_rate": 8.607808216966696e-06, "loss": 0.4117, "step": 16903 }, { "epoch": 0.7757331008214401, "grad_norm": 0.4720076620578766, "learning_rate": 8.607638456733255e-06, "loss": 0.3469, "step": 16904 }, { "epoch": 0.7757789913266945, "grad_norm": 0.4629420042037964, "learning_rate": 8.607468687824581e-06, "loss": 0.4151, "step": 16905 }, { "epoch": 0.775824881831949, "grad_norm": 0.4876801371574402, "learning_rate": 8.607298910241084e-06, "loss": 0.4627, "step": 16906 }, { "epoch": 0.7758707723372035, "grad_norm": 0.4505068063735962, "learning_rate": 8.607129123983172e-06, "loss": 0.3839, "step": 16907 }, { "epoch": 0.7759166628424579, "grad_norm": 0.4997379183769226, "learning_rate": 8.606959329051253e-06, "loss": 0.5035, "step": 16908 }, { "epoch": 0.7759625533477124, "grad_norm": 0.44132763147354126, "learning_rate": 8.606789525445735e-06, "loss": 0.3963, "step": 16909 }, { "epoch": 0.7760084438529669, "grad_norm": 0.44596344232559204, "learning_rate": 8.606619713167025e-06, "loss": 0.3627, "step": 16910 }, { "epoch": 0.7760543343582212, "grad_norm": 0.45813503861427307, "learning_rate": 8.606449892215536e-06, "loss": 0.3769, "step": 16911 }, { "epoch": 0.7761002248634757, "grad_norm": 0.4365362524986267, "learning_rate": 8.60628006259167e-06, "loss": 0.3597, "step": 16912 }, { "epoch": 0.7761461153687302, "grad_norm": 0.417276531457901, "learning_rate": 8.606110224295842e-06, "loss": 0.324, "step": 16913 }, { "epoch": 0.7761920058739846, "grad_norm": 0.47401151061058044, "learning_rate": 8.605940377328454e-06, "loss": 0.4582, "step": 16914 }, { "epoch": 0.7762378963792391, "grad_norm": 0.4661887586116791, "learning_rate": 8.60577052168992e-06, "loss": 0.4247, "step": 16915 }, { "epoch": 0.7762837868844936, "grad_norm": 0.45259785652160645, "learning_rate": 8.605600657380643e-06, "loss": 0.3892, "step": 16916 }, { "epoch": 0.776329677389748, "grad_norm": 0.4371308386325836, "learning_rate": 8.605430784401037e-06, "loss": 0.3999, "step": 16917 }, { "epoch": 0.7763755678950025, "grad_norm": 0.4300791323184967, "learning_rate": 8.605260902751507e-06, "loss": 0.3667, "step": 16918 }, { "epoch": 0.776421458400257, "grad_norm": 0.45549625158309937, "learning_rate": 8.605091012432462e-06, "loss": 0.3963, "step": 16919 }, { "epoch": 0.7764673489055115, "grad_norm": 0.45245325565338135, "learning_rate": 8.60492111344431e-06, "loss": 0.3855, "step": 16920 }, { "epoch": 0.7765132394107659, "grad_norm": 0.4253554046154022, "learning_rate": 8.604751205787463e-06, "loss": 0.3163, "step": 16921 }, { "epoch": 0.7765591299160204, "grad_norm": 0.4317529499530792, "learning_rate": 8.604581289462324e-06, "loss": 0.3426, "step": 16922 }, { "epoch": 0.7766050204212749, "grad_norm": 0.4471127688884735, "learning_rate": 8.604411364469307e-06, "loss": 0.3502, "step": 16923 }, { "epoch": 0.7766509109265293, "grad_norm": 0.4521816372871399, "learning_rate": 8.604241430808818e-06, "loss": 0.4294, "step": 16924 }, { "epoch": 0.7766968014317838, "grad_norm": 0.44483277201652527, "learning_rate": 8.604071488481265e-06, "loss": 0.4056, "step": 16925 }, { "epoch": 0.7767426919370383, "grad_norm": 0.46801871061325073, "learning_rate": 8.603901537487059e-06, "loss": 0.3919, "step": 16926 }, { "epoch": 0.7767885824422927, "grad_norm": 0.4468994140625, "learning_rate": 8.603731577826605e-06, "loss": 0.3746, "step": 16927 }, { "epoch": 0.7768344729475471, "grad_norm": 0.47118332982063293, "learning_rate": 8.603561609500314e-06, "loss": 0.409, "step": 16928 }, { "epoch": 0.7768803634528016, "grad_norm": 0.4489784240722656, "learning_rate": 8.603391632508597e-06, "loss": 0.3757, "step": 16929 }, { "epoch": 0.776926253958056, "grad_norm": 0.44380053877830505, "learning_rate": 8.603221646851859e-06, "loss": 0.3841, "step": 16930 }, { "epoch": 0.7769721444633105, "grad_norm": 0.4650479257106781, "learning_rate": 8.60305165253051e-06, "loss": 0.4163, "step": 16931 }, { "epoch": 0.777018034968565, "grad_norm": 0.4571394920349121, "learning_rate": 8.602881649544959e-06, "loss": 0.4402, "step": 16932 }, { "epoch": 0.7770639254738194, "grad_norm": 0.4364791214466095, "learning_rate": 8.602711637895615e-06, "loss": 0.3638, "step": 16933 }, { "epoch": 0.7771098159790739, "grad_norm": 0.46674564480781555, "learning_rate": 8.602541617582887e-06, "loss": 0.4149, "step": 16934 }, { "epoch": 0.7771557064843284, "grad_norm": 0.4759582281112671, "learning_rate": 8.602371588607183e-06, "loss": 0.4254, "step": 16935 }, { "epoch": 0.7772015969895829, "grad_norm": 0.46773627400398254, "learning_rate": 8.602201550968911e-06, "loss": 0.4399, "step": 16936 }, { "epoch": 0.7772474874948373, "grad_norm": 0.45208248496055603, "learning_rate": 8.602031504668483e-06, "loss": 0.4296, "step": 16937 }, { "epoch": 0.7772933780000918, "grad_norm": 0.40974459052085876, "learning_rate": 8.601861449706305e-06, "loss": 0.3439, "step": 16938 }, { "epoch": 0.7773392685053463, "grad_norm": 0.48815661668777466, "learning_rate": 8.601691386082789e-06, "loss": 0.4397, "step": 16939 }, { "epoch": 0.7773851590106007, "grad_norm": 0.44761961698532104, "learning_rate": 8.60152131379834e-06, "loss": 0.4047, "step": 16940 }, { "epoch": 0.7774310495158552, "grad_norm": 0.4427563548088074, "learning_rate": 8.601351232853368e-06, "loss": 0.3595, "step": 16941 }, { "epoch": 0.7774769400211097, "grad_norm": 0.4863508939743042, "learning_rate": 8.601181143248285e-06, "loss": 0.4809, "step": 16942 }, { "epoch": 0.7775228305263641, "grad_norm": 0.41547828912734985, "learning_rate": 8.601011044983497e-06, "loss": 0.3279, "step": 16943 }, { "epoch": 0.7775687210316186, "grad_norm": 0.5034317374229431, "learning_rate": 8.600840938059414e-06, "loss": 0.329, "step": 16944 }, { "epoch": 0.7776146115368731, "grad_norm": 0.4456773102283478, "learning_rate": 8.600670822476444e-06, "loss": 0.3311, "step": 16945 }, { "epoch": 0.7776605020421274, "grad_norm": 0.4610023498535156, "learning_rate": 8.600500698235e-06, "loss": 0.3721, "step": 16946 }, { "epoch": 0.7777063925473819, "grad_norm": 0.47589805722236633, "learning_rate": 8.600330565335486e-06, "loss": 0.439, "step": 16947 }, { "epoch": 0.7777522830526364, "grad_norm": 0.4469650387763977, "learning_rate": 8.600160423778314e-06, "loss": 0.4025, "step": 16948 }, { "epoch": 0.7777981735578908, "grad_norm": 0.4813380241394043, "learning_rate": 8.599990273563891e-06, "loss": 0.4631, "step": 16949 }, { "epoch": 0.7778440640631453, "grad_norm": 0.4371643364429474, "learning_rate": 8.599820114692628e-06, "loss": 0.3442, "step": 16950 }, { "epoch": 0.7778899545683998, "grad_norm": 0.4149708151817322, "learning_rate": 8.599649947164934e-06, "loss": 0.3996, "step": 16951 }, { "epoch": 0.7779358450736542, "grad_norm": 0.44185352325439453, "learning_rate": 8.599479770981219e-06, "loss": 0.4062, "step": 16952 }, { "epoch": 0.7779817355789087, "grad_norm": 0.4264153242111206, "learning_rate": 8.59930958614189e-06, "loss": 0.3801, "step": 16953 }, { "epoch": 0.7780276260841632, "grad_norm": 0.47552672028541565, "learning_rate": 8.59913939264736e-06, "loss": 0.44, "step": 16954 }, { "epoch": 0.7780735165894177, "grad_norm": 0.4497244358062744, "learning_rate": 8.598969190498032e-06, "loss": 0.3831, "step": 16955 }, { "epoch": 0.7781194070946721, "grad_norm": 0.4605443775653839, "learning_rate": 8.59879897969432e-06, "loss": 0.3706, "step": 16956 }, { "epoch": 0.7781652975999266, "grad_norm": 0.4667157232761383, "learning_rate": 8.598628760236633e-06, "loss": 0.4186, "step": 16957 }, { "epoch": 0.7782111881051811, "grad_norm": 0.4445166289806366, "learning_rate": 8.59845853212538e-06, "loss": 0.3698, "step": 16958 }, { "epoch": 0.7782570786104355, "grad_norm": 0.46820008754730225, "learning_rate": 8.598288295360969e-06, "loss": 0.3991, "step": 16959 }, { "epoch": 0.77830296911569, "grad_norm": 0.45852819085121155, "learning_rate": 8.598118049943811e-06, "loss": 0.4201, "step": 16960 }, { "epoch": 0.7783488596209445, "grad_norm": 0.5182089805603027, "learning_rate": 8.597947795874313e-06, "loss": 0.5053, "step": 16961 }, { "epoch": 0.7783947501261989, "grad_norm": 0.4940803349018097, "learning_rate": 8.59777753315289e-06, "loss": 0.4411, "step": 16962 }, { "epoch": 0.7784406406314534, "grad_norm": 0.47086775302886963, "learning_rate": 8.597607261779943e-06, "loss": 0.3839, "step": 16963 }, { "epoch": 0.7784865311367078, "grad_norm": 0.4992276132106781, "learning_rate": 8.597436981755889e-06, "loss": 0.3903, "step": 16964 }, { "epoch": 0.7785324216419622, "grad_norm": 0.4815516471862793, "learning_rate": 8.597266693081131e-06, "loss": 0.4351, "step": 16965 }, { "epoch": 0.7785783121472167, "grad_norm": 0.4850395917892456, "learning_rate": 8.597096395756085e-06, "loss": 0.4152, "step": 16966 }, { "epoch": 0.7786242026524712, "grad_norm": 0.4815860986709595, "learning_rate": 8.596926089781157e-06, "loss": 0.4305, "step": 16967 }, { "epoch": 0.7786700931577256, "grad_norm": 0.4743412733078003, "learning_rate": 8.596755775156758e-06, "loss": 0.421, "step": 16968 }, { "epoch": 0.7787159836629801, "grad_norm": 0.4712136387825012, "learning_rate": 8.596585451883294e-06, "loss": 0.4196, "step": 16969 }, { "epoch": 0.7787618741682346, "grad_norm": 0.41681602597236633, "learning_rate": 8.596415119961179e-06, "loss": 0.3049, "step": 16970 }, { "epoch": 0.778807764673489, "grad_norm": 0.45696568489074707, "learning_rate": 8.59624477939082e-06, "loss": 0.3799, "step": 16971 }, { "epoch": 0.7788536551787435, "grad_norm": 0.47887057065963745, "learning_rate": 8.596074430172627e-06, "loss": 0.4131, "step": 16972 }, { "epoch": 0.778899545683998, "grad_norm": 0.4474768042564392, "learning_rate": 8.595904072307011e-06, "loss": 0.3473, "step": 16973 }, { "epoch": 0.7789454361892525, "grad_norm": 0.46849435567855835, "learning_rate": 8.595733705794381e-06, "loss": 0.4595, "step": 16974 }, { "epoch": 0.7789913266945069, "grad_norm": 0.4657447040081024, "learning_rate": 8.595563330635144e-06, "loss": 0.4284, "step": 16975 }, { "epoch": 0.7790372171997614, "grad_norm": 0.4583030045032501, "learning_rate": 8.595392946829712e-06, "loss": 0.4212, "step": 16976 }, { "epoch": 0.7790831077050159, "grad_norm": 0.5090739130973816, "learning_rate": 8.595222554378497e-06, "loss": 0.4935, "step": 16977 }, { "epoch": 0.7791289982102703, "grad_norm": 0.47014614939689636, "learning_rate": 8.595052153281905e-06, "loss": 0.4966, "step": 16978 }, { "epoch": 0.7791748887155248, "grad_norm": 0.4327756464481354, "learning_rate": 8.594881743540347e-06, "loss": 0.3579, "step": 16979 }, { "epoch": 0.7792207792207793, "grad_norm": 0.46908295154571533, "learning_rate": 8.594711325154235e-06, "loss": 0.4082, "step": 16980 }, { "epoch": 0.7792666697260336, "grad_norm": 0.47554317116737366, "learning_rate": 8.594540898123976e-06, "loss": 0.4799, "step": 16981 }, { "epoch": 0.7793125602312881, "grad_norm": 0.47663891315460205, "learning_rate": 8.594370462449979e-06, "loss": 0.4053, "step": 16982 }, { "epoch": 0.7793584507365426, "grad_norm": 0.4742930829524994, "learning_rate": 8.594200018132654e-06, "loss": 0.4108, "step": 16983 }, { "epoch": 0.779404341241797, "grad_norm": 0.4363305866718292, "learning_rate": 8.594029565172418e-06, "loss": 0.369, "step": 16984 }, { "epoch": 0.7794502317470515, "grad_norm": 0.4817432463169098, "learning_rate": 8.59385910356967e-06, "loss": 0.5153, "step": 16985 }, { "epoch": 0.779496122252306, "grad_norm": 0.4340348541736603, "learning_rate": 8.593688633324826e-06, "loss": 0.3834, "step": 16986 }, { "epoch": 0.7795420127575604, "grad_norm": 0.5035750865936279, "learning_rate": 8.593518154438297e-06, "loss": 0.5914, "step": 16987 }, { "epoch": 0.7795879032628149, "grad_norm": 0.4310015141963959, "learning_rate": 8.59334766691049e-06, "loss": 0.3538, "step": 16988 }, { "epoch": 0.7796337937680694, "grad_norm": 0.49534115195274353, "learning_rate": 8.593177170741815e-06, "loss": 0.3981, "step": 16989 }, { "epoch": 0.7796796842733239, "grad_norm": 0.45083001255989075, "learning_rate": 8.593006665932685e-06, "loss": 0.332, "step": 16990 }, { "epoch": 0.7797255747785783, "grad_norm": 0.4978991150856018, "learning_rate": 8.592836152483507e-06, "loss": 0.4671, "step": 16991 }, { "epoch": 0.7797714652838328, "grad_norm": 0.5678019523620605, "learning_rate": 8.592665630394692e-06, "loss": 0.5559, "step": 16992 }, { "epoch": 0.7798173557890873, "grad_norm": 0.43045276403427124, "learning_rate": 8.59249509966665e-06, "loss": 0.3558, "step": 16993 }, { "epoch": 0.7798632462943417, "grad_norm": 0.4593508243560791, "learning_rate": 8.59232456029979e-06, "loss": 0.4298, "step": 16994 }, { "epoch": 0.7799091367995962, "grad_norm": 0.48363614082336426, "learning_rate": 8.592154012294525e-06, "loss": 0.4232, "step": 16995 }, { "epoch": 0.7799550273048507, "grad_norm": 0.46471211314201355, "learning_rate": 8.591983455651262e-06, "loss": 0.3879, "step": 16996 }, { "epoch": 0.780000917810105, "grad_norm": 0.47445499897003174, "learning_rate": 8.591812890370413e-06, "loss": 0.3882, "step": 16997 }, { "epoch": 0.7800468083153596, "grad_norm": 0.47170308232307434, "learning_rate": 8.591642316452387e-06, "loss": 0.4082, "step": 16998 }, { "epoch": 0.780092698820614, "grad_norm": 0.46292388439178467, "learning_rate": 8.591471733897595e-06, "loss": 0.4217, "step": 16999 }, { "epoch": 0.7801385893258684, "grad_norm": 0.4319770038127899, "learning_rate": 8.591301142706449e-06, "loss": 0.3736, "step": 17000 }, { "epoch": 0.7801844798311229, "grad_norm": 0.4480537176132202, "learning_rate": 8.591130542879356e-06, "loss": 0.374, "step": 17001 }, { "epoch": 0.7802303703363774, "grad_norm": 0.44551634788513184, "learning_rate": 8.590959934416726e-06, "loss": 0.3389, "step": 17002 }, { "epoch": 0.7802762608416318, "grad_norm": 0.4868551790714264, "learning_rate": 8.590789317318972e-06, "loss": 0.4295, "step": 17003 }, { "epoch": 0.7803221513468863, "grad_norm": 0.44733572006225586, "learning_rate": 8.590618691586502e-06, "loss": 0.3556, "step": 17004 }, { "epoch": 0.7803680418521408, "grad_norm": 0.4398041367530823, "learning_rate": 8.590448057219728e-06, "loss": 0.3466, "step": 17005 }, { "epoch": 0.7804139323573952, "grad_norm": 0.4365110397338867, "learning_rate": 8.590277414219059e-06, "loss": 0.4271, "step": 17006 }, { "epoch": 0.7804598228626497, "grad_norm": 0.460732638835907, "learning_rate": 8.590106762584907e-06, "loss": 0.4107, "step": 17007 }, { "epoch": 0.7805057133679042, "grad_norm": 0.47732633352279663, "learning_rate": 8.589936102317683e-06, "loss": 0.4355, "step": 17008 }, { "epoch": 0.7805516038731587, "grad_norm": 0.46782296895980835, "learning_rate": 8.589765433417792e-06, "loss": 0.443, "step": 17009 }, { "epoch": 0.7805974943784131, "grad_norm": 0.432796835899353, "learning_rate": 8.589594755885648e-06, "loss": 0.3796, "step": 17010 }, { "epoch": 0.7806433848836676, "grad_norm": 0.4564862549304962, "learning_rate": 8.589424069721664e-06, "loss": 0.4201, "step": 17011 }, { "epoch": 0.7806892753889221, "grad_norm": 0.4354449212551117, "learning_rate": 8.589253374926248e-06, "loss": 0.3419, "step": 17012 }, { "epoch": 0.7807351658941765, "grad_norm": 0.4580015540122986, "learning_rate": 8.589082671499809e-06, "loss": 0.4061, "step": 17013 }, { "epoch": 0.780781056399431, "grad_norm": 0.4637317657470703, "learning_rate": 8.588911959442758e-06, "loss": 0.3456, "step": 17014 }, { "epoch": 0.7808269469046855, "grad_norm": 0.4457900822162628, "learning_rate": 8.58874123875551e-06, "loss": 0.3577, "step": 17015 }, { "epoch": 0.7808728374099398, "grad_norm": 0.4248909056186676, "learning_rate": 8.588570509438468e-06, "loss": 0.352, "step": 17016 }, { "epoch": 0.7809187279151943, "grad_norm": 0.4572480320930481, "learning_rate": 8.588399771492048e-06, "loss": 0.3969, "step": 17017 }, { "epoch": 0.7809646184204488, "grad_norm": 0.44316598773002625, "learning_rate": 8.58822902491666e-06, "loss": 0.3704, "step": 17018 }, { "epoch": 0.7810105089257032, "grad_norm": 0.45023679733276367, "learning_rate": 8.588058269712714e-06, "loss": 0.4063, "step": 17019 }, { "epoch": 0.7810563994309577, "grad_norm": 0.4691431224346161, "learning_rate": 8.587887505880618e-06, "loss": 0.4028, "step": 17020 }, { "epoch": 0.7811022899362122, "grad_norm": 0.4431575536727905, "learning_rate": 8.587716733420787e-06, "loss": 0.4269, "step": 17021 }, { "epoch": 0.7811481804414666, "grad_norm": 0.4495683014392853, "learning_rate": 8.587545952333629e-06, "loss": 0.3554, "step": 17022 }, { "epoch": 0.7811940709467211, "grad_norm": 0.4431593418121338, "learning_rate": 8.587375162619555e-06, "loss": 0.3478, "step": 17023 }, { "epoch": 0.7812399614519756, "grad_norm": 0.4422595202922821, "learning_rate": 8.587204364278976e-06, "loss": 0.376, "step": 17024 }, { "epoch": 0.7812858519572301, "grad_norm": 0.4178480803966522, "learning_rate": 8.587033557312303e-06, "loss": 0.3281, "step": 17025 }, { "epoch": 0.7813317424624845, "grad_norm": 0.4244552254676819, "learning_rate": 8.586862741719945e-06, "loss": 0.3638, "step": 17026 }, { "epoch": 0.781377632967739, "grad_norm": 0.45411446690559387, "learning_rate": 8.586691917502315e-06, "loss": 0.4002, "step": 17027 }, { "epoch": 0.7814235234729935, "grad_norm": 0.46251872181892395, "learning_rate": 8.586521084659824e-06, "loss": 0.3461, "step": 17028 }, { "epoch": 0.7814694139782479, "grad_norm": 0.4846585988998413, "learning_rate": 8.586350243192881e-06, "loss": 0.504, "step": 17029 }, { "epoch": 0.7815153044835024, "grad_norm": 0.49908068776130676, "learning_rate": 8.586179393101897e-06, "loss": 0.4969, "step": 17030 }, { "epoch": 0.7815611949887569, "grad_norm": 0.4757101535797119, "learning_rate": 8.586008534387285e-06, "loss": 0.3561, "step": 17031 }, { "epoch": 0.7816070854940113, "grad_norm": 0.42304080724716187, "learning_rate": 8.585837667049452e-06, "loss": 0.404, "step": 17032 }, { "epoch": 0.7816529759992658, "grad_norm": 0.4756632447242737, "learning_rate": 8.585666791088812e-06, "loss": 0.425, "step": 17033 }, { "epoch": 0.7816988665045203, "grad_norm": 0.43152543902397156, "learning_rate": 8.585495906505776e-06, "loss": 0.3655, "step": 17034 }, { "epoch": 0.7817447570097746, "grad_norm": 0.4368312656879425, "learning_rate": 8.585325013300754e-06, "loss": 0.3646, "step": 17035 }, { "epoch": 0.7817906475150291, "grad_norm": 0.484638512134552, "learning_rate": 8.585154111474155e-06, "loss": 0.3886, "step": 17036 }, { "epoch": 0.7818365380202836, "grad_norm": 0.45188310742378235, "learning_rate": 8.584983201026394e-06, "loss": 0.3834, "step": 17037 }, { "epoch": 0.781882428525538, "grad_norm": 0.44477757811546326, "learning_rate": 8.584812281957877e-06, "loss": 0.3963, "step": 17038 }, { "epoch": 0.7819283190307925, "grad_norm": 0.4587835371494293, "learning_rate": 8.584641354269021e-06, "loss": 0.4422, "step": 17039 }, { "epoch": 0.781974209536047, "grad_norm": 0.41530269384384155, "learning_rate": 8.584470417960232e-06, "loss": 0.3083, "step": 17040 }, { "epoch": 0.7820201000413014, "grad_norm": 0.4844847023487091, "learning_rate": 8.584299473031923e-06, "loss": 0.5143, "step": 17041 }, { "epoch": 0.7820659905465559, "grad_norm": 0.46292179822921753, "learning_rate": 8.584128519484507e-06, "loss": 0.3993, "step": 17042 }, { "epoch": 0.7821118810518104, "grad_norm": 0.4614207148551941, "learning_rate": 8.58395755731839e-06, "loss": 0.4493, "step": 17043 }, { "epoch": 0.7821577715570649, "grad_norm": 0.4714962840080261, "learning_rate": 8.58378658653399e-06, "loss": 0.4075, "step": 17044 }, { "epoch": 0.7822036620623193, "grad_norm": 0.44587165117263794, "learning_rate": 8.58361560713171e-06, "loss": 0.39, "step": 17045 }, { "epoch": 0.7822495525675738, "grad_norm": 0.440456360578537, "learning_rate": 8.583444619111968e-06, "loss": 0.3782, "step": 17046 }, { "epoch": 0.7822954430728283, "grad_norm": 0.44569656252861023, "learning_rate": 8.583273622475173e-06, "loss": 0.3722, "step": 17047 }, { "epoch": 0.7823413335780827, "grad_norm": 0.4473586082458496, "learning_rate": 8.583102617221734e-06, "loss": 0.3615, "step": 17048 }, { "epoch": 0.7823872240833372, "grad_norm": 0.5035783052444458, "learning_rate": 8.582931603352067e-06, "loss": 0.4535, "step": 17049 }, { "epoch": 0.7824331145885917, "grad_norm": 0.4415551424026489, "learning_rate": 8.582760580866576e-06, "loss": 0.3573, "step": 17050 }, { "epoch": 0.782479005093846, "grad_norm": 0.43495893478393555, "learning_rate": 8.58258954976568e-06, "loss": 0.3558, "step": 17051 }, { "epoch": 0.7825248955991005, "grad_norm": 0.4600459635257721, "learning_rate": 8.582418510049787e-06, "loss": 0.4106, "step": 17052 }, { "epoch": 0.782570786104355, "grad_norm": 0.45710983872413635, "learning_rate": 8.582247461719307e-06, "loss": 0.3976, "step": 17053 }, { "epoch": 0.7826166766096094, "grad_norm": 0.43762823939323425, "learning_rate": 8.582076404774651e-06, "loss": 0.4038, "step": 17054 }, { "epoch": 0.7826625671148639, "grad_norm": 0.4170437753200531, "learning_rate": 8.581905339216235e-06, "loss": 0.2982, "step": 17055 }, { "epoch": 0.7827084576201184, "grad_norm": 0.4715779721736908, "learning_rate": 8.581734265044465e-06, "loss": 0.4138, "step": 17056 }, { "epoch": 0.7827543481253728, "grad_norm": 0.464697003364563, "learning_rate": 8.581563182259755e-06, "loss": 0.4222, "step": 17057 }, { "epoch": 0.7828002386306273, "grad_norm": 0.42772409319877625, "learning_rate": 8.581392090862515e-06, "loss": 0.3022, "step": 17058 }, { "epoch": 0.7828461291358818, "grad_norm": 0.42809921503067017, "learning_rate": 8.581220990853158e-06, "loss": 0.3617, "step": 17059 }, { "epoch": 0.7828920196411362, "grad_norm": 0.44953227043151855, "learning_rate": 8.581049882232095e-06, "loss": 0.4155, "step": 17060 }, { "epoch": 0.7829379101463907, "grad_norm": 0.47323521971702576, "learning_rate": 8.580878764999739e-06, "loss": 0.4827, "step": 17061 }, { "epoch": 0.7829838006516452, "grad_norm": 0.4372754991054535, "learning_rate": 8.580707639156496e-06, "loss": 0.4088, "step": 17062 }, { "epoch": 0.7830296911568997, "grad_norm": 0.41560351848602295, "learning_rate": 8.580536504702783e-06, "loss": 0.3393, "step": 17063 }, { "epoch": 0.7830755816621541, "grad_norm": 0.4736132323741913, "learning_rate": 8.58036536163901e-06, "loss": 0.449, "step": 17064 }, { "epoch": 0.7831214721674086, "grad_norm": 0.5012199878692627, "learning_rate": 8.580194209965588e-06, "loss": 0.451, "step": 17065 }, { "epoch": 0.7831673626726631, "grad_norm": 0.46233174204826355, "learning_rate": 8.580023049682931e-06, "loss": 0.4665, "step": 17066 }, { "epoch": 0.7832132531779175, "grad_norm": 0.43320024013519287, "learning_rate": 8.579851880791445e-06, "loss": 0.3723, "step": 17067 }, { "epoch": 0.783259143683172, "grad_norm": 0.47294214367866516, "learning_rate": 8.579680703291548e-06, "loss": 0.4725, "step": 17068 }, { "epoch": 0.7833050341884265, "grad_norm": 0.46606338024139404, "learning_rate": 8.579509517183646e-06, "loss": 0.4087, "step": 17069 }, { "epoch": 0.7833509246936808, "grad_norm": 0.474692702293396, "learning_rate": 8.579338322468155e-06, "loss": 0.3951, "step": 17070 }, { "epoch": 0.7833968151989353, "grad_norm": 0.42577216029167175, "learning_rate": 8.579167119145484e-06, "loss": 0.3369, "step": 17071 }, { "epoch": 0.7834427057041898, "grad_norm": 0.45509079098701477, "learning_rate": 8.578995907216048e-06, "loss": 0.404, "step": 17072 }, { "epoch": 0.7834885962094442, "grad_norm": 0.4243028461933136, "learning_rate": 8.578824686680255e-06, "loss": 0.3444, "step": 17073 }, { "epoch": 0.7835344867146987, "grad_norm": 0.4326368570327759, "learning_rate": 8.578653457538517e-06, "loss": 0.3511, "step": 17074 }, { "epoch": 0.7835803772199532, "grad_norm": 0.44456160068511963, "learning_rate": 8.578482219791248e-06, "loss": 0.3929, "step": 17075 }, { "epoch": 0.7836262677252076, "grad_norm": 0.4653504490852356, "learning_rate": 8.578310973438858e-06, "loss": 0.324, "step": 17076 }, { "epoch": 0.7836721582304621, "grad_norm": 0.4456257224082947, "learning_rate": 8.578139718481759e-06, "loss": 0.3969, "step": 17077 }, { "epoch": 0.7837180487357166, "grad_norm": 0.47462353110313416, "learning_rate": 8.577968454920365e-06, "loss": 0.4681, "step": 17078 }, { "epoch": 0.7837639392409711, "grad_norm": 0.41800087690353394, "learning_rate": 8.577797182755086e-06, "loss": 0.3125, "step": 17079 }, { "epoch": 0.7838098297462255, "grad_norm": 0.461171954870224, "learning_rate": 8.577625901986333e-06, "loss": 0.3905, "step": 17080 }, { "epoch": 0.78385572025148, "grad_norm": 0.6888251304626465, "learning_rate": 8.577454612614519e-06, "loss": 0.4178, "step": 17081 }, { "epoch": 0.7839016107567345, "grad_norm": 0.4391978681087494, "learning_rate": 8.577283314640055e-06, "loss": 0.3701, "step": 17082 }, { "epoch": 0.7839475012619889, "grad_norm": 0.475168377161026, "learning_rate": 8.577112008063355e-06, "loss": 0.4198, "step": 17083 }, { "epoch": 0.7839933917672434, "grad_norm": 0.46289145946502686, "learning_rate": 8.57694069288483e-06, "loss": 0.4213, "step": 17084 }, { "epoch": 0.7840392822724979, "grad_norm": 0.501196563243866, "learning_rate": 8.576769369104889e-06, "loss": 0.4319, "step": 17085 }, { "epoch": 0.7840851727777522, "grad_norm": 0.42122530937194824, "learning_rate": 8.576598036723948e-06, "loss": 0.3415, "step": 17086 }, { "epoch": 0.7841310632830067, "grad_norm": 0.42527833580970764, "learning_rate": 8.57642669574242e-06, "loss": 0.3299, "step": 17087 }, { "epoch": 0.7841769537882612, "grad_norm": 0.47453293204307556, "learning_rate": 8.57625534616071e-06, "loss": 0.4425, "step": 17088 }, { "epoch": 0.7842228442935156, "grad_norm": 0.4598444402217865, "learning_rate": 8.576083987979237e-06, "loss": 0.4105, "step": 17089 }, { "epoch": 0.7842687347987701, "grad_norm": 0.4670698940753937, "learning_rate": 8.575912621198412e-06, "loss": 0.4127, "step": 17090 }, { "epoch": 0.7843146253040246, "grad_norm": 0.5091941952705383, "learning_rate": 8.575741245818643e-06, "loss": 0.4805, "step": 17091 }, { "epoch": 0.784360515809279, "grad_norm": 0.47951096296310425, "learning_rate": 8.575569861840347e-06, "loss": 0.3716, "step": 17092 }, { "epoch": 0.7844064063145335, "grad_norm": 0.44542521238327026, "learning_rate": 8.57539846926393e-06, "loss": 0.3869, "step": 17093 }, { "epoch": 0.784452296819788, "grad_norm": 0.4719654619693756, "learning_rate": 8.575227068089813e-06, "loss": 0.4124, "step": 17094 }, { "epoch": 0.7844981873250424, "grad_norm": 0.41698333621025085, "learning_rate": 8.575055658318402e-06, "loss": 0.3454, "step": 17095 }, { "epoch": 0.7845440778302969, "grad_norm": 0.4496590495109558, "learning_rate": 8.57488423995011e-06, "loss": 0.3882, "step": 17096 }, { "epoch": 0.7845899683355514, "grad_norm": 0.45034921169281006, "learning_rate": 8.574712812985349e-06, "loss": 0.3333, "step": 17097 }, { "epoch": 0.7846358588408059, "grad_norm": 0.43507716059684753, "learning_rate": 8.574541377424532e-06, "loss": 0.3565, "step": 17098 }, { "epoch": 0.7846817493460603, "grad_norm": 0.4196447432041168, "learning_rate": 8.574369933268071e-06, "loss": 0.3203, "step": 17099 }, { "epoch": 0.7847276398513148, "grad_norm": 0.4530881345272064, "learning_rate": 8.57419848051638e-06, "loss": 0.3783, "step": 17100 }, { "epoch": 0.7847735303565693, "grad_norm": 0.4364980161190033, "learning_rate": 8.574027019169868e-06, "loss": 0.3728, "step": 17101 }, { "epoch": 0.7848194208618237, "grad_norm": 0.5109471082687378, "learning_rate": 8.57385554922895e-06, "loss": 0.4933, "step": 17102 }, { "epoch": 0.7848653113670782, "grad_norm": 0.46648159623146057, "learning_rate": 8.573684070694037e-06, "loss": 0.385, "step": 17103 }, { "epoch": 0.7849112018723327, "grad_norm": 0.47312548756599426, "learning_rate": 8.573512583565542e-06, "loss": 0.4222, "step": 17104 }, { "epoch": 0.784957092377587, "grad_norm": 0.467281311750412, "learning_rate": 8.573341087843876e-06, "loss": 0.4192, "step": 17105 }, { "epoch": 0.7850029828828415, "grad_norm": 0.42873871326446533, "learning_rate": 8.573169583529454e-06, "loss": 0.3433, "step": 17106 }, { "epoch": 0.785048873388096, "grad_norm": 0.43758949637413025, "learning_rate": 8.572998070622685e-06, "loss": 0.3839, "step": 17107 }, { "epoch": 0.7850947638933504, "grad_norm": 0.45014718174934387, "learning_rate": 8.572826549123985e-06, "loss": 0.3918, "step": 17108 }, { "epoch": 0.7851406543986049, "grad_norm": 0.46529287099838257, "learning_rate": 8.572655019033764e-06, "loss": 0.4469, "step": 17109 }, { "epoch": 0.7851865449038594, "grad_norm": 0.45789286494255066, "learning_rate": 8.572483480352437e-06, "loss": 0.3508, "step": 17110 }, { "epoch": 0.7852324354091138, "grad_norm": 0.45020776987075806, "learning_rate": 8.572311933080413e-06, "loss": 0.4032, "step": 17111 }, { "epoch": 0.7852783259143683, "grad_norm": 0.43734240531921387, "learning_rate": 8.572140377218105e-06, "loss": 0.426, "step": 17112 }, { "epoch": 0.7853242164196228, "grad_norm": 0.502072274684906, "learning_rate": 8.571968812765929e-06, "loss": 0.4527, "step": 17113 }, { "epoch": 0.7853701069248773, "grad_norm": 0.4603514075279236, "learning_rate": 8.571797239724295e-06, "loss": 0.4174, "step": 17114 }, { "epoch": 0.7854159974301317, "grad_norm": 0.4474254548549652, "learning_rate": 8.571625658093615e-06, "loss": 0.4367, "step": 17115 }, { "epoch": 0.7854618879353862, "grad_norm": 0.4662390649318695, "learning_rate": 8.571454067874304e-06, "loss": 0.4441, "step": 17116 }, { "epoch": 0.7855077784406407, "grad_norm": 0.4569355547428131, "learning_rate": 8.571282469066771e-06, "loss": 0.4148, "step": 17117 }, { "epoch": 0.7855536689458951, "grad_norm": 0.4233655631542206, "learning_rate": 8.571110861671432e-06, "loss": 0.3438, "step": 17118 }, { "epoch": 0.7855995594511496, "grad_norm": 0.4323183000087738, "learning_rate": 8.570939245688699e-06, "loss": 0.3684, "step": 17119 }, { "epoch": 0.7856454499564041, "grad_norm": 0.48957934975624084, "learning_rate": 8.570767621118983e-06, "loss": 0.4345, "step": 17120 }, { "epoch": 0.7856913404616584, "grad_norm": 0.4283546507358551, "learning_rate": 8.570595987962698e-06, "loss": 0.3838, "step": 17121 }, { "epoch": 0.785737230966913, "grad_norm": 0.47225281596183777, "learning_rate": 8.570424346220258e-06, "loss": 0.411, "step": 17122 }, { "epoch": 0.7857831214721674, "grad_norm": 0.4663572609424591, "learning_rate": 8.570252695892073e-06, "loss": 0.4315, "step": 17123 }, { "epoch": 0.7858290119774218, "grad_norm": 0.49042749404907227, "learning_rate": 8.570081036978557e-06, "loss": 0.453, "step": 17124 }, { "epoch": 0.7858749024826763, "grad_norm": 0.4535020589828491, "learning_rate": 8.569909369480122e-06, "loss": 0.4077, "step": 17125 }, { "epoch": 0.7859207929879308, "grad_norm": 0.4530177414417267, "learning_rate": 8.569737693397184e-06, "loss": 0.3506, "step": 17126 }, { "epoch": 0.7859666834931852, "grad_norm": 0.45346471667289734, "learning_rate": 8.569566008730152e-06, "loss": 0.3929, "step": 17127 }, { "epoch": 0.7860125739984397, "grad_norm": 0.44365355372428894, "learning_rate": 8.56939431547944e-06, "loss": 0.3639, "step": 17128 }, { "epoch": 0.7860584645036942, "grad_norm": 0.44638627767562866, "learning_rate": 8.569222613645463e-06, "loss": 0.3677, "step": 17129 }, { "epoch": 0.7861043550089486, "grad_norm": 0.4293568432331085, "learning_rate": 8.56905090322863e-06, "loss": 0.3366, "step": 17130 }, { "epoch": 0.7861502455142031, "grad_norm": 0.44036248326301575, "learning_rate": 8.568879184229357e-06, "loss": 0.393, "step": 17131 }, { "epoch": 0.7861961360194576, "grad_norm": 0.43053802847862244, "learning_rate": 8.568707456648056e-06, "loss": 0.3833, "step": 17132 }, { "epoch": 0.7862420265247121, "grad_norm": 0.5189328789710999, "learning_rate": 8.568535720485141e-06, "loss": 0.5208, "step": 17133 }, { "epoch": 0.7862879170299665, "grad_norm": 0.4671753942966461, "learning_rate": 8.568363975741023e-06, "loss": 0.3971, "step": 17134 }, { "epoch": 0.786333807535221, "grad_norm": 0.4216882288455963, "learning_rate": 8.568192222416114e-06, "loss": 0.3568, "step": 17135 }, { "epoch": 0.7863796980404755, "grad_norm": 0.4572247862815857, "learning_rate": 8.56802046051083e-06, "loss": 0.4232, "step": 17136 }, { "epoch": 0.7864255885457299, "grad_norm": 0.4463280141353607, "learning_rate": 8.567848690025584e-06, "loss": 0.3988, "step": 17137 }, { "epoch": 0.7864714790509844, "grad_norm": 0.44375041127204895, "learning_rate": 8.567676910960787e-06, "loss": 0.3999, "step": 17138 }, { "epoch": 0.7865173695562389, "grad_norm": 0.4713706970214844, "learning_rate": 8.567505123316854e-06, "loss": 0.4051, "step": 17139 }, { "epoch": 0.7865632600614932, "grad_norm": 0.4290619194507599, "learning_rate": 8.567333327094196e-06, "loss": 0.3728, "step": 17140 }, { "epoch": 0.7866091505667477, "grad_norm": 0.49214574694633484, "learning_rate": 8.567161522293227e-06, "loss": 0.4228, "step": 17141 }, { "epoch": 0.7866550410720022, "grad_norm": 0.47395938634872437, "learning_rate": 8.566989708914362e-06, "loss": 0.3957, "step": 17142 }, { "epoch": 0.7867009315772566, "grad_norm": 0.4636525809764862, "learning_rate": 8.566817886958012e-06, "loss": 0.385, "step": 17143 }, { "epoch": 0.7867468220825111, "grad_norm": 0.4787483513355255, "learning_rate": 8.56664605642459e-06, "loss": 0.5046, "step": 17144 }, { "epoch": 0.7867927125877656, "grad_norm": 0.4723854959011078, "learning_rate": 8.56647421731451e-06, "loss": 0.5408, "step": 17145 }, { "epoch": 0.78683860309302, "grad_norm": 0.4569069743156433, "learning_rate": 8.566302369628185e-06, "loss": 0.4194, "step": 17146 }, { "epoch": 0.7868844935982745, "grad_norm": 0.49809855222702026, "learning_rate": 8.566130513366028e-06, "loss": 0.4522, "step": 17147 }, { "epoch": 0.786930384103529, "grad_norm": 0.43556126952171326, "learning_rate": 8.565958648528454e-06, "loss": 0.404, "step": 17148 }, { "epoch": 0.7869762746087834, "grad_norm": 0.44776713848114014, "learning_rate": 8.565786775115874e-06, "loss": 0.4036, "step": 17149 }, { "epoch": 0.7870221651140379, "grad_norm": 0.447168231010437, "learning_rate": 8.565614893128702e-06, "loss": 0.3733, "step": 17150 }, { "epoch": 0.7870680556192924, "grad_norm": 0.4839850068092346, "learning_rate": 8.56544300256735e-06, "loss": 0.475, "step": 17151 }, { "epoch": 0.7871139461245469, "grad_norm": 0.5121183395385742, "learning_rate": 8.565271103432235e-06, "loss": 0.4356, "step": 17152 }, { "epoch": 0.7871598366298013, "grad_norm": 0.45978716015815735, "learning_rate": 8.56509919572377e-06, "loss": 0.3872, "step": 17153 }, { "epoch": 0.7872057271350558, "grad_norm": 0.48227548599243164, "learning_rate": 8.564927279442363e-06, "loss": 0.4932, "step": 17154 }, { "epoch": 0.7872516176403103, "grad_norm": 0.436812162399292, "learning_rate": 8.564755354588434e-06, "loss": 0.3784, "step": 17155 }, { "epoch": 0.7872975081455647, "grad_norm": 0.42787501215934753, "learning_rate": 8.56458342116239e-06, "loss": 0.3763, "step": 17156 }, { "epoch": 0.7873433986508191, "grad_norm": 0.4604097604751587, "learning_rate": 8.564411479164652e-06, "loss": 0.4596, "step": 17157 }, { "epoch": 0.7873892891560736, "grad_norm": 0.48435500264167786, "learning_rate": 8.564239528595625e-06, "loss": 0.4402, "step": 17158 }, { "epoch": 0.787435179661328, "grad_norm": 0.5035337805747986, "learning_rate": 8.56406756945573e-06, "loss": 0.5083, "step": 17159 }, { "epoch": 0.7874810701665825, "grad_norm": 0.4315187335014343, "learning_rate": 8.563895601745376e-06, "loss": 0.4164, "step": 17160 }, { "epoch": 0.787526960671837, "grad_norm": 0.4794437885284424, "learning_rate": 8.56372362546498e-06, "loss": 0.4233, "step": 17161 }, { "epoch": 0.7875728511770914, "grad_norm": 0.46684151887893677, "learning_rate": 8.56355164061495e-06, "loss": 0.437, "step": 17162 }, { "epoch": 0.7876187416823459, "grad_norm": 0.4438982605934143, "learning_rate": 8.563379647195704e-06, "loss": 0.4235, "step": 17163 }, { "epoch": 0.7876646321876004, "grad_norm": 0.4240376055240631, "learning_rate": 8.563207645207656e-06, "loss": 0.334, "step": 17164 }, { "epoch": 0.7877105226928548, "grad_norm": 0.44225120544433594, "learning_rate": 8.563035634651218e-06, "loss": 0.3766, "step": 17165 }, { "epoch": 0.7877564131981093, "grad_norm": 0.49483808875083923, "learning_rate": 8.562863615526802e-06, "loss": 0.487, "step": 17166 }, { "epoch": 0.7878023037033638, "grad_norm": 0.42341458797454834, "learning_rate": 8.562691587834827e-06, "loss": 0.3419, "step": 17167 }, { "epoch": 0.7878481942086183, "grad_norm": 0.4706038534641266, "learning_rate": 8.5625195515757e-06, "loss": 0.4684, "step": 17168 }, { "epoch": 0.7878940847138727, "grad_norm": 0.4092176556587219, "learning_rate": 8.562347506749839e-06, "loss": 0.3221, "step": 17169 }, { "epoch": 0.7879399752191272, "grad_norm": 0.44596561789512634, "learning_rate": 8.562175453357655e-06, "loss": 0.3862, "step": 17170 }, { "epoch": 0.7879858657243817, "grad_norm": 0.4571721851825714, "learning_rate": 8.562003391399565e-06, "loss": 0.4149, "step": 17171 }, { "epoch": 0.7880317562296361, "grad_norm": 0.409482479095459, "learning_rate": 8.56183132087598e-06, "loss": 0.3049, "step": 17172 }, { "epoch": 0.7880776467348906, "grad_norm": 0.4265441596508026, "learning_rate": 8.561659241787318e-06, "loss": 0.3053, "step": 17173 }, { "epoch": 0.7881235372401451, "grad_norm": 0.4258749485015869, "learning_rate": 8.561487154133984e-06, "loss": 0.3523, "step": 17174 }, { "epoch": 0.7881694277453994, "grad_norm": 0.4452821612358093, "learning_rate": 8.561315057916401e-06, "loss": 0.376, "step": 17175 }, { "epoch": 0.7882153182506539, "grad_norm": 0.4856232702732086, "learning_rate": 8.56114295313498e-06, "loss": 0.4267, "step": 17176 }, { "epoch": 0.7882612087559084, "grad_norm": 0.43713143467903137, "learning_rate": 8.560970839790132e-06, "loss": 0.3845, "step": 17177 }, { "epoch": 0.7883070992611628, "grad_norm": 0.5090425610542297, "learning_rate": 8.560798717882273e-06, "loss": 0.3699, "step": 17178 }, { "epoch": 0.7883529897664173, "grad_norm": 0.43923458456993103, "learning_rate": 8.56062658741182e-06, "loss": 0.3135, "step": 17179 }, { "epoch": 0.7883988802716718, "grad_norm": 0.49745458364486694, "learning_rate": 8.56045444837918e-06, "loss": 0.4978, "step": 17180 }, { "epoch": 0.7884447707769262, "grad_norm": 0.4496169686317444, "learning_rate": 8.560282300784773e-06, "loss": 0.3551, "step": 17181 }, { "epoch": 0.7884906612821807, "grad_norm": 0.4361990988254547, "learning_rate": 8.560110144629011e-06, "loss": 0.3779, "step": 17182 }, { "epoch": 0.7885365517874352, "grad_norm": 0.42884111404418945, "learning_rate": 8.559937979912307e-06, "loss": 0.3674, "step": 17183 }, { "epoch": 0.7885824422926896, "grad_norm": 0.45398902893066406, "learning_rate": 8.559765806635074e-06, "loss": 0.3516, "step": 17184 }, { "epoch": 0.7886283327979441, "grad_norm": 0.4343807101249695, "learning_rate": 8.559593624797729e-06, "loss": 0.3432, "step": 17185 }, { "epoch": 0.7886742233031986, "grad_norm": 0.43676406145095825, "learning_rate": 8.559421434400685e-06, "loss": 0.361, "step": 17186 }, { "epoch": 0.7887201138084531, "grad_norm": 0.43893498182296753, "learning_rate": 8.559249235444356e-06, "loss": 0.3728, "step": 17187 }, { "epoch": 0.7887660043137075, "grad_norm": 0.4559212625026703, "learning_rate": 8.559077027929158e-06, "loss": 0.3611, "step": 17188 }, { "epoch": 0.788811894818962, "grad_norm": 0.4440266788005829, "learning_rate": 8.558904811855499e-06, "loss": 0.4048, "step": 17189 }, { "epoch": 0.7888577853242165, "grad_norm": 0.47478795051574707, "learning_rate": 8.5587325872238e-06, "loss": 0.4537, "step": 17190 }, { "epoch": 0.7889036758294709, "grad_norm": 0.42379775643348694, "learning_rate": 8.558560354034471e-06, "loss": 0.379, "step": 17191 }, { "epoch": 0.7889495663347253, "grad_norm": 0.4595082700252533, "learning_rate": 8.558388112287927e-06, "loss": 0.3799, "step": 17192 }, { "epoch": 0.7889954568399798, "grad_norm": 0.4567592144012451, "learning_rate": 8.558215861984583e-06, "loss": 0.3912, "step": 17193 }, { "epoch": 0.7890413473452342, "grad_norm": 0.44178637862205505, "learning_rate": 8.558043603124853e-06, "loss": 0.3992, "step": 17194 }, { "epoch": 0.7890872378504887, "grad_norm": 0.470222145318985, "learning_rate": 8.557871335709151e-06, "loss": 0.4012, "step": 17195 }, { "epoch": 0.7891331283557432, "grad_norm": 0.47202497720718384, "learning_rate": 8.557699059737893e-06, "loss": 0.3892, "step": 17196 }, { "epoch": 0.7891790188609976, "grad_norm": 0.4622460603713989, "learning_rate": 8.557526775211489e-06, "loss": 0.4201, "step": 17197 }, { "epoch": 0.7892249093662521, "grad_norm": 0.5011447072029114, "learning_rate": 8.557354482130357e-06, "loss": 0.4721, "step": 17198 }, { "epoch": 0.7892707998715066, "grad_norm": 0.4705965518951416, "learning_rate": 8.557182180494912e-06, "loss": 0.3908, "step": 17199 }, { "epoch": 0.789316690376761, "grad_norm": 0.420924574136734, "learning_rate": 8.557009870305563e-06, "loss": 0.3252, "step": 17200 }, { "epoch": 0.7893625808820155, "grad_norm": 0.43872472643852234, "learning_rate": 8.556837551562731e-06, "loss": 0.3416, "step": 17201 }, { "epoch": 0.78940847138727, "grad_norm": 0.4418359100818634, "learning_rate": 8.556665224266826e-06, "loss": 0.3739, "step": 17202 }, { "epoch": 0.7894543618925245, "grad_norm": 0.4496125876903534, "learning_rate": 8.556492888418264e-06, "loss": 0.3944, "step": 17203 }, { "epoch": 0.7895002523977789, "grad_norm": 0.40348702669143677, "learning_rate": 8.556320544017459e-06, "loss": 0.2703, "step": 17204 }, { "epoch": 0.7895461429030334, "grad_norm": 0.45193061232566833, "learning_rate": 8.556148191064824e-06, "loss": 0.3613, "step": 17205 }, { "epoch": 0.7895920334082879, "grad_norm": 0.41576990485191345, "learning_rate": 8.555975829560776e-06, "loss": 0.3328, "step": 17206 }, { "epoch": 0.7896379239135423, "grad_norm": 0.4641425311565399, "learning_rate": 8.55580345950573e-06, "loss": 0.382, "step": 17207 }, { "epoch": 0.7896838144187968, "grad_norm": 0.4670281410217285, "learning_rate": 8.555631080900096e-06, "loss": 0.4256, "step": 17208 }, { "epoch": 0.7897297049240513, "grad_norm": 0.47999438643455505, "learning_rate": 8.555458693744293e-06, "loss": 0.3747, "step": 17209 }, { "epoch": 0.7897755954293056, "grad_norm": 0.4544064700603485, "learning_rate": 8.555286298038735e-06, "loss": 0.3791, "step": 17210 }, { "epoch": 0.7898214859345601, "grad_norm": 0.4362332224845886, "learning_rate": 8.555113893783834e-06, "loss": 0.4168, "step": 17211 }, { "epoch": 0.7898673764398146, "grad_norm": 0.4537018835544586, "learning_rate": 8.554941480980008e-06, "loss": 0.4548, "step": 17212 }, { "epoch": 0.789913266945069, "grad_norm": 0.4793187975883484, "learning_rate": 8.554769059627667e-06, "loss": 0.4001, "step": 17213 }, { "epoch": 0.7899591574503235, "grad_norm": 0.4365011751651764, "learning_rate": 8.55459662972723e-06, "loss": 0.3787, "step": 17214 }, { "epoch": 0.790005047955578, "grad_norm": 0.45444753766059875, "learning_rate": 8.55442419127911e-06, "loss": 0.414, "step": 17215 }, { "epoch": 0.7900509384608324, "grad_norm": 0.45154058933258057, "learning_rate": 8.55425174428372e-06, "loss": 0.4287, "step": 17216 }, { "epoch": 0.7900968289660869, "grad_norm": 0.4343357980251312, "learning_rate": 8.554079288741478e-06, "loss": 0.3431, "step": 17217 }, { "epoch": 0.7901427194713414, "grad_norm": 0.4607161581516266, "learning_rate": 8.553906824652796e-06, "loss": 0.4175, "step": 17218 }, { "epoch": 0.7901886099765958, "grad_norm": 0.4593159556388855, "learning_rate": 8.553734352018091e-06, "loss": 0.4183, "step": 17219 }, { "epoch": 0.7902345004818503, "grad_norm": 0.43572238087654114, "learning_rate": 8.553561870837777e-06, "loss": 0.3746, "step": 17220 }, { "epoch": 0.7902803909871048, "grad_norm": 0.6195092797279358, "learning_rate": 8.553389381112265e-06, "loss": 0.3037, "step": 17221 }, { "epoch": 0.7903262814923593, "grad_norm": 0.46369999647140503, "learning_rate": 8.553216882841975e-06, "loss": 0.3833, "step": 17222 }, { "epoch": 0.7903721719976137, "grad_norm": 0.4104870557785034, "learning_rate": 8.55304437602732e-06, "loss": 0.3304, "step": 17223 }, { "epoch": 0.7904180625028682, "grad_norm": 0.4783789813518524, "learning_rate": 8.552871860668714e-06, "loss": 0.4509, "step": 17224 }, { "epoch": 0.7904639530081227, "grad_norm": 0.45948266983032227, "learning_rate": 8.552699336766574e-06, "loss": 0.3993, "step": 17225 }, { "epoch": 0.790509843513377, "grad_norm": 0.5095104575157166, "learning_rate": 8.552526804321314e-06, "loss": 0.5109, "step": 17226 }, { "epoch": 0.7905557340186316, "grad_norm": 0.4878319203853607, "learning_rate": 8.552354263333345e-06, "loss": 0.4764, "step": 17227 }, { "epoch": 0.790601624523886, "grad_norm": 0.4699997901916504, "learning_rate": 8.552181713803086e-06, "loss": 0.466, "step": 17228 }, { "epoch": 0.7906475150291404, "grad_norm": 0.47339287400245667, "learning_rate": 8.552009155730953e-06, "loss": 0.3921, "step": 17229 }, { "epoch": 0.7906934055343949, "grad_norm": 0.4630031883716583, "learning_rate": 8.551836589117358e-06, "loss": 0.3882, "step": 17230 }, { "epoch": 0.7907392960396494, "grad_norm": 0.4879360496997833, "learning_rate": 8.551664013962718e-06, "loss": 0.4707, "step": 17231 }, { "epoch": 0.7907851865449038, "grad_norm": 0.39771568775177, "learning_rate": 8.551491430267445e-06, "loss": 0.321, "step": 17232 }, { "epoch": 0.7908310770501583, "grad_norm": 0.4232565462589264, "learning_rate": 8.551318838031955e-06, "loss": 0.3502, "step": 17233 }, { "epoch": 0.7908769675554128, "grad_norm": 0.48249053955078125, "learning_rate": 8.551146237256667e-06, "loss": 0.5121, "step": 17234 }, { "epoch": 0.7909228580606672, "grad_norm": 0.4547002911567688, "learning_rate": 8.550973627941991e-06, "loss": 0.4512, "step": 17235 }, { "epoch": 0.7909687485659217, "grad_norm": 0.4193003177642822, "learning_rate": 8.550801010088346e-06, "loss": 0.3754, "step": 17236 }, { "epoch": 0.7910146390711762, "grad_norm": 0.4411562383174896, "learning_rate": 8.550628383696144e-06, "loss": 0.3801, "step": 17237 }, { "epoch": 0.7910605295764306, "grad_norm": 0.43179214000701904, "learning_rate": 8.550455748765801e-06, "loss": 0.3409, "step": 17238 }, { "epoch": 0.7911064200816851, "grad_norm": 0.47141706943511963, "learning_rate": 8.550283105297734e-06, "loss": 0.4333, "step": 17239 }, { "epoch": 0.7911523105869396, "grad_norm": 0.4539576470851898, "learning_rate": 8.550110453292355e-06, "loss": 0.432, "step": 17240 }, { "epoch": 0.7911982010921941, "grad_norm": 0.47078731656074524, "learning_rate": 8.549937792750082e-06, "loss": 0.4216, "step": 17241 }, { "epoch": 0.7912440915974485, "grad_norm": 0.4110214412212372, "learning_rate": 8.549765123671328e-06, "loss": 0.3031, "step": 17242 }, { "epoch": 0.791289982102703, "grad_norm": 0.4198560416698456, "learning_rate": 8.54959244605651e-06, "loss": 0.351, "step": 17243 }, { "epoch": 0.7913358726079575, "grad_norm": 0.4529282748699188, "learning_rate": 8.549419759906042e-06, "loss": 0.3712, "step": 17244 }, { "epoch": 0.7913817631132118, "grad_norm": 0.4488847851753235, "learning_rate": 8.54924706522034e-06, "loss": 0.3676, "step": 17245 }, { "epoch": 0.7914276536184663, "grad_norm": 0.46307677030563354, "learning_rate": 8.54907436199982e-06, "loss": 0.4308, "step": 17246 }, { "epoch": 0.7914735441237208, "grad_norm": 0.45445457100868225, "learning_rate": 8.548901650244894e-06, "loss": 0.4041, "step": 17247 }, { "epoch": 0.7915194346289752, "grad_norm": 0.45837122201919556, "learning_rate": 8.548728929955982e-06, "loss": 0.4286, "step": 17248 }, { "epoch": 0.7915653251342297, "grad_norm": 0.45341235399246216, "learning_rate": 8.548556201133495e-06, "loss": 0.3607, "step": 17249 }, { "epoch": 0.7916112156394842, "grad_norm": 0.4551328122615814, "learning_rate": 8.548383463777852e-06, "loss": 0.4024, "step": 17250 }, { "epoch": 0.7916571061447386, "grad_norm": 0.4732138514518738, "learning_rate": 8.548210717889465e-06, "loss": 0.4792, "step": 17251 }, { "epoch": 0.7917029966499931, "grad_norm": 0.4484398663043976, "learning_rate": 8.548037963468752e-06, "loss": 0.4293, "step": 17252 }, { "epoch": 0.7917488871552476, "grad_norm": 0.4463064968585968, "learning_rate": 8.547865200516128e-06, "loss": 0.3447, "step": 17253 }, { "epoch": 0.791794777660502, "grad_norm": 0.4638603627681732, "learning_rate": 8.547692429032006e-06, "loss": 0.4079, "step": 17254 }, { "epoch": 0.7918406681657565, "grad_norm": 0.43883687257766724, "learning_rate": 8.547519649016805e-06, "loss": 0.3863, "step": 17255 }, { "epoch": 0.791886558671011, "grad_norm": 0.5217450261116028, "learning_rate": 8.547346860470938e-06, "loss": 0.4902, "step": 17256 }, { "epoch": 0.7919324491762655, "grad_norm": 0.43585386872291565, "learning_rate": 8.547174063394821e-06, "loss": 0.357, "step": 17257 }, { "epoch": 0.7919783396815199, "grad_norm": 0.4599902927875519, "learning_rate": 8.547001257788871e-06, "loss": 0.4322, "step": 17258 }, { "epoch": 0.7920242301867744, "grad_norm": 0.49865201115608215, "learning_rate": 8.546828443653504e-06, "loss": 0.4451, "step": 17259 }, { "epoch": 0.7920701206920289, "grad_norm": 0.4622938632965088, "learning_rate": 8.54665562098913e-06, "loss": 0.4438, "step": 17260 }, { "epoch": 0.7921160111972833, "grad_norm": 0.4822182059288025, "learning_rate": 8.546482789796172e-06, "loss": 0.5275, "step": 17261 }, { "epoch": 0.7921619017025378, "grad_norm": 0.4941704571247101, "learning_rate": 8.54630995007504e-06, "loss": 0.4894, "step": 17262 }, { "epoch": 0.7922077922077922, "grad_norm": 0.42387962341308594, "learning_rate": 8.546137101826154e-06, "loss": 0.3621, "step": 17263 }, { "epoch": 0.7922536827130466, "grad_norm": 0.44690072536468506, "learning_rate": 8.545964245049926e-06, "loss": 0.3939, "step": 17264 }, { "epoch": 0.7922995732183011, "grad_norm": 0.49550363421440125, "learning_rate": 8.545791379746774e-06, "loss": 0.4635, "step": 17265 }, { "epoch": 0.7923454637235556, "grad_norm": 0.48905831575393677, "learning_rate": 8.545618505917112e-06, "loss": 0.4749, "step": 17266 }, { "epoch": 0.79239135422881, "grad_norm": 0.40370485186576843, "learning_rate": 8.545445623561357e-06, "loss": 0.3018, "step": 17267 }, { "epoch": 0.7924372447340645, "grad_norm": 0.4208553731441498, "learning_rate": 8.545272732679923e-06, "loss": 0.3212, "step": 17268 }, { "epoch": 0.792483135239319, "grad_norm": 0.4605855643749237, "learning_rate": 8.545099833273229e-06, "loss": 0.4316, "step": 17269 }, { "epoch": 0.7925290257445734, "grad_norm": 0.4115740954875946, "learning_rate": 8.544926925341687e-06, "loss": 0.3111, "step": 17270 }, { "epoch": 0.7925749162498279, "grad_norm": 0.4296366274356842, "learning_rate": 8.544754008885715e-06, "loss": 0.3674, "step": 17271 }, { "epoch": 0.7926208067550824, "grad_norm": 0.44176560640335083, "learning_rate": 8.54458108390573e-06, "loss": 0.4243, "step": 17272 }, { "epoch": 0.7926666972603368, "grad_norm": 0.41267749667167664, "learning_rate": 8.544408150402144e-06, "loss": 0.3149, "step": 17273 }, { "epoch": 0.7927125877655913, "grad_norm": 0.46088534593582153, "learning_rate": 8.544235208375375e-06, "loss": 0.4384, "step": 17274 }, { "epoch": 0.7927584782708458, "grad_norm": 0.4485809803009033, "learning_rate": 8.544062257825841e-06, "loss": 0.3614, "step": 17275 }, { "epoch": 0.7928043687761003, "grad_norm": 0.46488314867019653, "learning_rate": 8.543889298753953e-06, "loss": 0.4268, "step": 17276 }, { "epoch": 0.7928502592813547, "grad_norm": 0.48688897490501404, "learning_rate": 8.543716331160133e-06, "loss": 0.2753, "step": 17277 }, { "epoch": 0.7928961497866092, "grad_norm": 0.6692848205566406, "learning_rate": 8.543543355044791e-06, "loss": 0.5327, "step": 17278 }, { "epoch": 0.7929420402918637, "grad_norm": 0.48614612221717834, "learning_rate": 8.543370370408346e-06, "loss": 0.4461, "step": 17279 }, { "epoch": 0.792987930797118, "grad_norm": 0.4368221163749695, "learning_rate": 8.543197377251215e-06, "loss": 0.3705, "step": 17280 }, { "epoch": 0.7930338213023725, "grad_norm": 0.45697784423828125, "learning_rate": 8.543024375573813e-06, "loss": 0.3852, "step": 17281 }, { "epoch": 0.793079711807627, "grad_norm": 0.47184309363365173, "learning_rate": 8.542851365376552e-06, "loss": 0.4475, "step": 17282 }, { "epoch": 0.7931256023128814, "grad_norm": 0.4400814473628998, "learning_rate": 8.542678346659855e-06, "loss": 0.4017, "step": 17283 }, { "epoch": 0.7931714928181359, "grad_norm": 0.45345747470855713, "learning_rate": 8.542505319424134e-06, "loss": 0.364, "step": 17284 }, { "epoch": 0.7932173833233904, "grad_norm": 0.46244215965270996, "learning_rate": 8.542332283669805e-06, "loss": 0.3801, "step": 17285 }, { "epoch": 0.7932632738286448, "grad_norm": 0.4394710659980774, "learning_rate": 8.542159239397285e-06, "loss": 0.3414, "step": 17286 }, { "epoch": 0.7933091643338993, "grad_norm": 0.45818227529525757, "learning_rate": 8.54198618660699e-06, "loss": 0.4087, "step": 17287 }, { "epoch": 0.7933550548391538, "grad_norm": 0.4504507780075073, "learning_rate": 8.541813125299337e-06, "loss": 0.4135, "step": 17288 }, { "epoch": 0.7934009453444082, "grad_norm": 0.4910365641117096, "learning_rate": 8.54164005547474e-06, "loss": 0.4444, "step": 17289 }, { "epoch": 0.7934468358496627, "grad_norm": 0.4514201283454895, "learning_rate": 8.541466977133616e-06, "loss": 0.4317, "step": 17290 }, { "epoch": 0.7934927263549172, "grad_norm": 0.42900437116622925, "learning_rate": 8.541293890276383e-06, "loss": 0.3609, "step": 17291 }, { "epoch": 0.7935386168601716, "grad_norm": 0.46192467212677, "learning_rate": 8.541120794903456e-06, "loss": 0.4148, "step": 17292 }, { "epoch": 0.7935845073654261, "grad_norm": 0.45300108194351196, "learning_rate": 8.540947691015252e-06, "loss": 0.4043, "step": 17293 }, { "epoch": 0.7936303978706806, "grad_norm": 0.43007004261016846, "learning_rate": 8.540774578612182e-06, "loss": 0.3495, "step": 17294 }, { "epoch": 0.7936762883759351, "grad_norm": 0.44749870896339417, "learning_rate": 8.540601457694672e-06, "loss": 0.3853, "step": 17295 }, { "epoch": 0.7937221788811895, "grad_norm": 0.4807637929916382, "learning_rate": 8.54042832826313e-06, "loss": 0.4808, "step": 17296 }, { "epoch": 0.793768069386444, "grad_norm": 0.459590345621109, "learning_rate": 8.540255190317977e-06, "loss": 0.3991, "step": 17297 }, { "epoch": 0.7938139598916985, "grad_norm": 0.4139643907546997, "learning_rate": 8.540082043859626e-06, "loss": 0.3383, "step": 17298 }, { "epoch": 0.7938598503969528, "grad_norm": 0.4161096513271332, "learning_rate": 8.539908888888495e-06, "loss": 0.3658, "step": 17299 }, { "epoch": 0.7939057409022073, "grad_norm": 0.44474828243255615, "learning_rate": 8.539735725405e-06, "loss": 0.3876, "step": 17300 }, { "epoch": 0.7939516314074618, "grad_norm": 0.44705772399902344, "learning_rate": 8.539562553409559e-06, "loss": 0.3861, "step": 17301 }, { "epoch": 0.7939975219127162, "grad_norm": 0.45543137192726135, "learning_rate": 8.539389372902587e-06, "loss": 0.4253, "step": 17302 }, { "epoch": 0.7940434124179707, "grad_norm": 0.4713234007358551, "learning_rate": 8.5392161838845e-06, "loss": 0.4405, "step": 17303 }, { "epoch": 0.7940893029232252, "grad_norm": 0.42139607667922974, "learning_rate": 8.539042986355714e-06, "loss": 0.3238, "step": 17304 }, { "epoch": 0.7941351934284796, "grad_norm": 0.47023651003837585, "learning_rate": 8.53886978031665e-06, "loss": 0.4378, "step": 17305 }, { "epoch": 0.7941810839337341, "grad_norm": 0.41005706787109375, "learning_rate": 8.538696565767718e-06, "loss": 0.3412, "step": 17306 }, { "epoch": 0.7942269744389886, "grad_norm": 0.4828278124332428, "learning_rate": 8.538523342709338e-06, "loss": 0.4553, "step": 17307 }, { "epoch": 0.794272864944243, "grad_norm": 0.4378403425216675, "learning_rate": 8.538350111141927e-06, "loss": 0.3604, "step": 17308 }, { "epoch": 0.7943187554494975, "grad_norm": 0.4598485827445984, "learning_rate": 8.538176871065901e-06, "loss": 0.3663, "step": 17309 }, { "epoch": 0.794364645954752, "grad_norm": 0.44187384843826294, "learning_rate": 8.538003622481675e-06, "loss": 0.3966, "step": 17310 }, { "epoch": 0.7944105364600065, "grad_norm": 0.44187048077583313, "learning_rate": 8.537830365389668e-06, "loss": 0.3433, "step": 17311 }, { "epoch": 0.7944564269652609, "grad_norm": 0.428903192281723, "learning_rate": 8.537657099790295e-06, "loss": 0.3552, "step": 17312 }, { "epoch": 0.7945023174705154, "grad_norm": 0.43505802750587463, "learning_rate": 8.537483825683974e-06, "loss": 0.3478, "step": 17313 }, { "epoch": 0.7945482079757699, "grad_norm": 0.45521458983421326, "learning_rate": 8.53731054307112e-06, "loss": 0.4305, "step": 17314 }, { "epoch": 0.7945940984810242, "grad_norm": 0.4717352092266083, "learning_rate": 8.537137251952152e-06, "loss": 0.4433, "step": 17315 }, { "epoch": 0.7946399889862787, "grad_norm": 0.4372889995574951, "learning_rate": 8.536963952327484e-06, "loss": 0.4333, "step": 17316 }, { "epoch": 0.7946858794915332, "grad_norm": 0.510814905166626, "learning_rate": 8.536790644197532e-06, "loss": 0.4286, "step": 17317 }, { "epoch": 0.7947317699967876, "grad_norm": 0.41214531660079956, "learning_rate": 8.536617327562718e-06, "loss": 0.3132, "step": 17318 }, { "epoch": 0.7947776605020421, "grad_norm": 0.43785569071769714, "learning_rate": 8.536444002423453e-06, "loss": 0.3895, "step": 17319 }, { "epoch": 0.7948235510072966, "grad_norm": 0.5033149719238281, "learning_rate": 8.53627066878016e-06, "loss": 0.4852, "step": 17320 }, { "epoch": 0.794869441512551, "grad_norm": 0.4413720369338989, "learning_rate": 8.53609732663325e-06, "loss": 0.371, "step": 17321 }, { "epoch": 0.7949153320178055, "grad_norm": 0.442349910736084, "learning_rate": 8.535923975983141e-06, "loss": 0.3539, "step": 17322 }, { "epoch": 0.79496122252306, "grad_norm": 0.497665673494339, "learning_rate": 8.535750616830252e-06, "loss": 0.4871, "step": 17323 }, { "epoch": 0.7950071130283144, "grad_norm": 0.44707366824150085, "learning_rate": 8.535577249174998e-06, "loss": 0.3719, "step": 17324 }, { "epoch": 0.7950530035335689, "grad_norm": 0.5090526938438416, "learning_rate": 8.535403873017795e-06, "loss": 0.4827, "step": 17325 }, { "epoch": 0.7950988940388234, "grad_norm": 0.4506312906742096, "learning_rate": 8.535230488359065e-06, "loss": 0.3841, "step": 17326 }, { "epoch": 0.7951447845440778, "grad_norm": 0.42831799387931824, "learning_rate": 8.53505709519922e-06, "loss": 0.3492, "step": 17327 }, { "epoch": 0.7951906750493323, "grad_norm": 0.4776977598667145, "learning_rate": 8.534883693538678e-06, "loss": 0.3997, "step": 17328 }, { "epoch": 0.7952365655545868, "grad_norm": 0.47641322016716003, "learning_rate": 8.534710283377856e-06, "loss": 0.4173, "step": 17329 }, { "epoch": 0.7952824560598413, "grad_norm": 0.45216673612594604, "learning_rate": 8.534536864717172e-06, "loss": 0.4064, "step": 17330 }, { "epoch": 0.7953283465650957, "grad_norm": 0.45900672674179077, "learning_rate": 8.534363437557042e-06, "loss": 0.395, "step": 17331 }, { "epoch": 0.7953742370703502, "grad_norm": 0.4505752921104431, "learning_rate": 8.534190001897884e-06, "loss": 0.4086, "step": 17332 }, { "epoch": 0.7954201275756047, "grad_norm": 0.46673840284347534, "learning_rate": 8.534016557740112e-06, "loss": 0.3915, "step": 17333 }, { "epoch": 0.795466018080859, "grad_norm": 0.46488118171691895, "learning_rate": 8.53384310508415e-06, "loss": 0.4727, "step": 17334 }, { "epoch": 0.7955119085861135, "grad_norm": 0.4449230432510376, "learning_rate": 8.533669643930407e-06, "loss": 0.3842, "step": 17335 }, { "epoch": 0.795557799091368, "grad_norm": 0.41877156496047974, "learning_rate": 8.533496174279305e-06, "loss": 0.3396, "step": 17336 }, { "epoch": 0.7956036895966224, "grad_norm": 0.46254199743270874, "learning_rate": 8.533322696131262e-06, "loss": 0.3944, "step": 17337 }, { "epoch": 0.7956495801018769, "grad_norm": 0.4546830654144287, "learning_rate": 8.533149209486688e-06, "loss": 0.4357, "step": 17338 }, { "epoch": 0.7956954706071314, "grad_norm": 0.46741804480552673, "learning_rate": 8.53297571434601e-06, "loss": 0.429, "step": 17339 }, { "epoch": 0.7957413611123858, "grad_norm": 0.43567103147506714, "learning_rate": 8.532802210709638e-06, "loss": 0.4027, "step": 17340 }, { "epoch": 0.7957872516176403, "grad_norm": 0.4721885919570923, "learning_rate": 8.532628698577992e-06, "loss": 0.4921, "step": 17341 }, { "epoch": 0.7958331421228948, "grad_norm": 0.5093852281570435, "learning_rate": 8.53245517795149e-06, "loss": 0.5067, "step": 17342 }, { "epoch": 0.7958790326281492, "grad_norm": 0.4458777904510498, "learning_rate": 8.532281648830547e-06, "loss": 0.3388, "step": 17343 }, { "epoch": 0.7959249231334037, "grad_norm": 0.4870307743549347, "learning_rate": 8.532108111215583e-06, "loss": 0.5027, "step": 17344 }, { "epoch": 0.7959708136386582, "grad_norm": 0.42581629753112793, "learning_rate": 8.531934565107011e-06, "loss": 0.3147, "step": 17345 }, { "epoch": 0.7960167041439127, "grad_norm": 0.4919399321079254, "learning_rate": 8.531761010505253e-06, "loss": 0.4224, "step": 17346 }, { "epoch": 0.7960625946491671, "grad_norm": 0.42880934476852417, "learning_rate": 8.531587447410725e-06, "loss": 0.3363, "step": 17347 }, { "epoch": 0.7961084851544216, "grad_norm": 0.4705013036727905, "learning_rate": 8.531413875823843e-06, "loss": 0.4625, "step": 17348 }, { "epoch": 0.7961543756596761, "grad_norm": 0.419408917427063, "learning_rate": 8.531240295745025e-06, "loss": 0.3152, "step": 17349 }, { "epoch": 0.7962002661649304, "grad_norm": 0.49158501625061035, "learning_rate": 8.53106670717469e-06, "loss": 0.4014, "step": 17350 }, { "epoch": 0.796246156670185, "grad_norm": 0.47131234407424927, "learning_rate": 8.530893110113252e-06, "loss": 0.3938, "step": 17351 }, { "epoch": 0.7962920471754394, "grad_norm": 0.46139469742774963, "learning_rate": 8.53071950456113e-06, "loss": 0.3707, "step": 17352 }, { "epoch": 0.7963379376806938, "grad_norm": 0.4659257233142853, "learning_rate": 8.530545890518743e-06, "loss": 0.4573, "step": 17353 }, { "epoch": 0.7963838281859483, "grad_norm": 0.4700642228126526, "learning_rate": 8.530372267986509e-06, "loss": 0.4123, "step": 17354 }, { "epoch": 0.7964297186912028, "grad_norm": 0.4003869593143463, "learning_rate": 8.530198636964842e-06, "loss": 0.3231, "step": 17355 }, { "epoch": 0.7964756091964572, "grad_norm": 0.48110300302505493, "learning_rate": 8.530024997454161e-06, "loss": 0.4564, "step": 17356 }, { "epoch": 0.7965214997017117, "grad_norm": 0.49582207202911377, "learning_rate": 8.529851349454885e-06, "loss": 0.4915, "step": 17357 }, { "epoch": 0.7965673902069662, "grad_norm": 0.4540678858757019, "learning_rate": 8.52967769296743e-06, "loss": 0.3496, "step": 17358 }, { "epoch": 0.7966132807122206, "grad_norm": 0.522850513458252, "learning_rate": 8.529504027992214e-06, "loss": 0.415, "step": 17359 }, { "epoch": 0.7966591712174751, "grad_norm": 0.4113677740097046, "learning_rate": 8.529330354529657e-06, "loss": 0.2884, "step": 17360 }, { "epoch": 0.7967050617227296, "grad_norm": 0.4390864968299866, "learning_rate": 8.529156672580172e-06, "loss": 0.3632, "step": 17361 }, { "epoch": 0.796750952227984, "grad_norm": 0.47810623049736023, "learning_rate": 8.52898298214418e-06, "loss": 0.4182, "step": 17362 }, { "epoch": 0.7967968427332385, "grad_norm": 0.48694515228271484, "learning_rate": 8.528809283222097e-06, "loss": 0.4599, "step": 17363 }, { "epoch": 0.796842733238493, "grad_norm": 0.44697320461273193, "learning_rate": 8.528635575814342e-06, "loss": 0.3668, "step": 17364 }, { "epoch": 0.7968886237437475, "grad_norm": 0.4329720735549927, "learning_rate": 8.528461859921333e-06, "loss": 0.3807, "step": 17365 }, { "epoch": 0.7969345142490019, "grad_norm": 0.460861474275589, "learning_rate": 8.528288135543484e-06, "loss": 0.4023, "step": 17366 }, { "epoch": 0.7969804047542564, "grad_norm": 0.45921745896339417, "learning_rate": 8.528114402681219e-06, "loss": 0.4221, "step": 17367 }, { "epoch": 0.7970262952595109, "grad_norm": 0.5792778730392456, "learning_rate": 8.52794066133495e-06, "loss": 0.4293, "step": 17368 }, { "epoch": 0.7970721857647652, "grad_norm": 0.4442204535007477, "learning_rate": 8.527766911505097e-06, "loss": 0.3727, "step": 17369 }, { "epoch": 0.7971180762700197, "grad_norm": 0.4427334666252136, "learning_rate": 8.52759315319208e-06, "loss": 0.3318, "step": 17370 }, { "epoch": 0.7971639667752742, "grad_norm": 0.44364240765571594, "learning_rate": 8.527419386396313e-06, "loss": 0.4154, "step": 17371 }, { "epoch": 0.7972098572805286, "grad_norm": 0.45921948552131653, "learning_rate": 8.527245611118215e-06, "loss": 0.451, "step": 17372 }, { "epoch": 0.7972557477857831, "grad_norm": 0.475739985704422, "learning_rate": 8.527071827358207e-06, "loss": 0.4503, "step": 17373 }, { "epoch": 0.7973016382910376, "grad_norm": 0.4728918969631195, "learning_rate": 8.526898035116704e-06, "loss": 0.4576, "step": 17374 }, { "epoch": 0.797347528796292, "grad_norm": 0.4763958156108856, "learning_rate": 8.526724234394123e-06, "loss": 0.4421, "step": 17375 }, { "epoch": 0.7973934193015465, "grad_norm": 0.4438845217227936, "learning_rate": 8.526550425190886e-06, "loss": 0.3751, "step": 17376 }, { "epoch": 0.797439309806801, "grad_norm": 0.46401268243789673, "learning_rate": 8.526376607507405e-06, "loss": 0.4364, "step": 17377 }, { "epoch": 0.7974852003120554, "grad_norm": 0.5637767910957336, "learning_rate": 8.526202781344103e-06, "loss": 0.4913, "step": 17378 }, { "epoch": 0.7975310908173099, "grad_norm": 0.4584812819957733, "learning_rate": 8.526028946701394e-06, "loss": 0.4298, "step": 17379 }, { "epoch": 0.7975769813225644, "grad_norm": 0.463896244764328, "learning_rate": 8.5258551035797e-06, "loss": 0.3945, "step": 17380 }, { "epoch": 0.7976228718278188, "grad_norm": 0.45107871294021606, "learning_rate": 8.525681251979437e-06, "loss": 0.4025, "step": 17381 }, { "epoch": 0.7976687623330733, "grad_norm": 0.45406076312065125, "learning_rate": 8.525507391901024e-06, "loss": 0.4273, "step": 17382 }, { "epoch": 0.7977146528383278, "grad_norm": 0.4512777626514435, "learning_rate": 8.525333523344878e-06, "loss": 0.3852, "step": 17383 }, { "epoch": 0.7977605433435823, "grad_norm": 0.4962588846683502, "learning_rate": 8.525159646311416e-06, "loss": 0.4518, "step": 17384 }, { "epoch": 0.7978064338488366, "grad_norm": 0.43524184823036194, "learning_rate": 8.524985760801059e-06, "loss": 0.3747, "step": 17385 }, { "epoch": 0.7978523243540911, "grad_norm": 0.4741273522377014, "learning_rate": 8.524811866814225e-06, "loss": 0.4351, "step": 17386 }, { "epoch": 0.7978982148593456, "grad_norm": 0.4265655279159546, "learning_rate": 8.524637964351328e-06, "loss": 0.3562, "step": 17387 }, { "epoch": 0.7979441053646, "grad_norm": 0.4441221356391907, "learning_rate": 8.52446405341279e-06, "loss": 0.3676, "step": 17388 }, { "epoch": 0.7979899958698545, "grad_norm": 0.47150611877441406, "learning_rate": 8.524290133999032e-06, "loss": 0.4396, "step": 17389 }, { "epoch": 0.798035886375109, "grad_norm": 0.4297012984752655, "learning_rate": 8.524116206110464e-06, "loss": 0.3181, "step": 17390 }, { "epoch": 0.7980817768803634, "grad_norm": 0.48709699511528015, "learning_rate": 8.52394226974751e-06, "loss": 0.4649, "step": 17391 }, { "epoch": 0.7981276673856179, "grad_norm": 0.45016026496887207, "learning_rate": 8.523768324910589e-06, "loss": 0.3667, "step": 17392 }, { "epoch": 0.7981735578908724, "grad_norm": 0.46889638900756836, "learning_rate": 8.523594371600114e-06, "loss": 0.4093, "step": 17393 }, { "epoch": 0.7982194483961268, "grad_norm": 0.4164358973503113, "learning_rate": 8.52342040981651e-06, "loss": 0.2915, "step": 17394 }, { "epoch": 0.7982653389013813, "grad_norm": 0.45595481991767883, "learning_rate": 8.523246439560188e-06, "loss": 0.3906, "step": 17395 }, { "epoch": 0.7983112294066358, "grad_norm": 0.4730428457260132, "learning_rate": 8.523072460831575e-06, "loss": 0.3975, "step": 17396 }, { "epoch": 0.7983571199118902, "grad_norm": 0.5198157429695129, "learning_rate": 8.52289847363108e-06, "loss": 0.5012, "step": 17397 }, { "epoch": 0.7984030104171447, "grad_norm": 0.487239807844162, "learning_rate": 8.522724477959128e-06, "loss": 0.4577, "step": 17398 }, { "epoch": 0.7984489009223992, "grad_norm": 0.44806692004203796, "learning_rate": 8.522550473816136e-06, "loss": 0.3806, "step": 17399 }, { "epoch": 0.7984947914276537, "grad_norm": 0.5005094408988953, "learning_rate": 8.522376461202522e-06, "loss": 0.4376, "step": 17400 }, { "epoch": 0.7985406819329081, "grad_norm": 0.45714861154556274, "learning_rate": 8.522202440118704e-06, "loss": 0.4746, "step": 17401 }, { "epoch": 0.7985865724381626, "grad_norm": 0.45959559082984924, "learning_rate": 8.5220284105651e-06, "loss": 0.436, "step": 17402 }, { "epoch": 0.798632462943417, "grad_norm": 0.3999626934528351, "learning_rate": 8.52185437254213e-06, "loss": 0.3271, "step": 17403 }, { "epoch": 0.7986783534486714, "grad_norm": 0.44875892996788025, "learning_rate": 8.521680326050212e-06, "loss": 0.3785, "step": 17404 }, { "epoch": 0.7987242439539259, "grad_norm": 0.5110406279563904, "learning_rate": 8.521506271089764e-06, "loss": 0.4941, "step": 17405 }, { "epoch": 0.7987701344591804, "grad_norm": 0.4528273642063141, "learning_rate": 8.521332207661203e-06, "loss": 0.3241, "step": 17406 }, { "epoch": 0.7988160249644348, "grad_norm": 0.477131187915802, "learning_rate": 8.521158135764954e-06, "loss": 0.3722, "step": 17407 }, { "epoch": 0.7988619154696893, "grad_norm": 0.456575870513916, "learning_rate": 8.520984055401426e-06, "loss": 0.3487, "step": 17408 }, { "epoch": 0.7989078059749438, "grad_norm": 0.42182672023773193, "learning_rate": 8.520809966571044e-06, "loss": 0.3185, "step": 17409 }, { "epoch": 0.7989536964801982, "grad_norm": 0.42592522501945496, "learning_rate": 8.520635869274227e-06, "loss": 0.3615, "step": 17410 }, { "epoch": 0.7989995869854527, "grad_norm": 0.518481433391571, "learning_rate": 8.520461763511391e-06, "loss": 0.5343, "step": 17411 }, { "epoch": 0.7990454774907072, "grad_norm": 0.4880763590335846, "learning_rate": 8.520287649282955e-06, "loss": 0.4196, "step": 17412 }, { "epoch": 0.7990913679959616, "grad_norm": 0.4802713692188263, "learning_rate": 8.52011352658934e-06, "loss": 0.4797, "step": 17413 }, { "epoch": 0.7991372585012161, "grad_norm": 0.46200743317604065, "learning_rate": 8.519939395430961e-06, "loss": 0.4293, "step": 17414 }, { "epoch": 0.7991831490064706, "grad_norm": 0.46974611282348633, "learning_rate": 8.51976525580824e-06, "loss": 0.4038, "step": 17415 }, { "epoch": 0.799229039511725, "grad_norm": 0.44320976734161377, "learning_rate": 8.519591107721592e-06, "loss": 0.4185, "step": 17416 }, { "epoch": 0.7992749300169795, "grad_norm": 0.45917925238609314, "learning_rate": 8.519416951171439e-06, "loss": 0.4233, "step": 17417 }, { "epoch": 0.799320820522234, "grad_norm": 0.43788355588912964, "learning_rate": 8.519242786158202e-06, "loss": 0.3756, "step": 17418 }, { "epoch": 0.7993667110274885, "grad_norm": 0.48166245222091675, "learning_rate": 8.519068612682294e-06, "loss": 0.404, "step": 17419 }, { "epoch": 0.7994126015327429, "grad_norm": 0.4552880525588989, "learning_rate": 8.518894430744137e-06, "loss": 0.3964, "step": 17420 }, { "epoch": 0.7994584920379973, "grad_norm": 0.4498491585254669, "learning_rate": 8.518720240344149e-06, "loss": 0.3724, "step": 17421 }, { "epoch": 0.7995043825432518, "grad_norm": 0.44619670510292053, "learning_rate": 8.518546041482751e-06, "loss": 0.4167, "step": 17422 }, { "epoch": 0.7995502730485062, "grad_norm": 0.43638819456100464, "learning_rate": 8.51837183416036e-06, "loss": 0.3551, "step": 17423 }, { "epoch": 0.7995961635537607, "grad_norm": 0.42472153902053833, "learning_rate": 8.518197618377393e-06, "loss": 0.3617, "step": 17424 }, { "epoch": 0.7996420540590152, "grad_norm": 0.4819573760032654, "learning_rate": 8.518023394134275e-06, "loss": 0.4802, "step": 17425 }, { "epoch": 0.7996879445642696, "grad_norm": 0.4771646559238434, "learning_rate": 8.517849161431418e-06, "loss": 0.5027, "step": 17426 }, { "epoch": 0.7997338350695241, "grad_norm": 0.457241952419281, "learning_rate": 8.517674920269244e-06, "loss": 0.3518, "step": 17427 }, { "epoch": 0.7997797255747786, "grad_norm": 0.48232996463775635, "learning_rate": 8.517500670648173e-06, "loss": 0.4211, "step": 17428 }, { "epoch": 0.799825616080033, "grad_norm": 0.4540688097476959, "learning_rate": 8.517326412568624e-06, "loss": 0.4017, "step": 17429 }, { "epoch": 0.7998715065852875, "grad_norm": 0.45765626430511475, "learning_rate": 8.517152146031012e-06, "loss": 0.3539, "step": 17430 }, { "epoch": 0.799917397090542, "grad_norm": 0.4929465353488922, "learning_rate": 8.51697787103576e-06, "loss": 0.4819, "step": 17431 }, { "epoch": 0.7999632875957964, "grad_norm": 0.46364501118659973, "learning_rate": 8.516803587583289e-06, "loss": 0.4479, "step": 17432 }, { "epoch": 0.8000091781010509, "grad_norm": 0.5282514095306396, "learning_rate": 8.516629295674015e-06, "loss": 0.4615, "step": 17433 }, { "epoch": 0.8000550686063054, "grad_norm": 0.4677409827709198, "learning_rate": 8.516454995308355e-06, "loss": 0.349, "step": 17434 }, { "epoch": 0.8001009591115599, "grad_norm": 0.44146832823753357, "learning_rate": 8.516280686486731e-06, "loss": 0.3271, "step": 17435 }, { "epoch": 0.8001468496168143, "grad_norm": 0.49169692397117615, "learning_rate": 8.516106369209562e-06, "loss": 0.4626, "step": 17436 }, { "epoch": 0.8001927401220688, "grad_norm": 0.4435870945453644, "learning_rate": 8.515932043477268e-06, "loss": 0.4047, "step": 17437 }, { "epoch": 0.8002386306273233, "grad_norm": 0.4633375108242035, "learning_rate": 8.515757709290265e-06, "loss": 0.4493, "step": 17438 }, { "epoch": 0.8002845211325776, "grad_norm": 0.4477768540382385, "learning_rate": 8.515583366648976e-06, "loss": 0.4031, "step": 17439 }, { "epoch": 0.8003304116378321, "grad_norm": 0.4911952316761017, "learning_rate": 8.515409015553818e-06, "loss": 0.4519, "step": 17440 }, { "epoch": 0.8003763021430866, "grad_norm": 0.4222570061683655, "learning_rate": 8.51523465600521e-06, "loss": 0.3052, "step": 17441 }, { "epoch": 0.800422192648341, "grad_norm": 0.44558194279670715, "learning_rate": 8.515060288003574e-06, "loss": 0.3944, "step": 17442 }, { "epoch": 0.8004680831535955, "grad_norm": 0.4427376687526703, "learning_rate": 8.514885911549327e-06, "loss": 0.3586, "step": 17443 }, { "epoch": 0.80051397365885, "grad_norm": 0.4829748272895813, "learning_rate": 8.514711526642887e-06, "loss": 0.4635, "step": 17444 }, { "epoch": 0.8005598641641044, "grad_norm": 0.47320976853370667, "learning_rate": 8.514537133284677e-06, "loss": 0.4464, "step": 17445 }, { "epoch": 0.8006057546693589, "grad_norm": 0.4252266585826874, "learning_rate": 8.514362731475112e-06, "loss": 0.3229, "step": 17446 }, { "epoch": 0.8006516451746134, "grad_norm": 0.4535079598426819, "learning_rate": 8.514188321214617e-06, "loss": 0.4452, "step": 17447 }, { "epoch": 0.8006975356798678, "grad_norm": 0.43816477060317993, "learning_rate": 8.514013902503606e-06, "loss": 0.3239, "step": 17448 }, { "epoch": 0.8007434261851223, "grad_norm": 0.44645780324935913, "learning_rate": 8.513839475342501e-06, "loss": 0.3695, "step": 17449 }, { "epoch": 0.8007893166903768, "grad_norm": 0.46480223536491394, "learning_rate": 8.513665039731722e-06, "loss": 0.442, "step": 17450 }, { "epoch": 0.8008352071956312, "grad_norm": 0.44286370277404785, "learning_rate": 8.513490595671687e-06, "loss": 0.3446, "step": 17451 }, { "epoch": 0.8008810977008857, "grad_norm": 0.44705668091773987, "learning_rate": 8.513316143162816e-06, "loss": 0.3569, "step": 17452 }, { "epoch": 0.8009269882061402, "grad_norm": 0.44909295439720154, "learning_rate": 8.513141682205528e-06, "loss": 0.3953, "step": 17453 }, { "epoch": 0.8009728787113947, "grad_norm": 0.4579792320728302, "learning_rate": 8.512967212800243e-06, "loss": 0.4125, "step": 17454 }, { "epoch": 0.801018769216649, "grad_norm": 0.47918441891670227, "learning_rate": 8.51279273494738e-06, "loss": 0.4624, "step": 17455 }, { "epoch": 0.8010646597219035, "grad_norm": 0.4858976900577545, "learning_rate": 8.51261824864736e-06, "loss": 0.4731, "step": 17456 }, { "epoch": 0.801110550227158, "grad_norm": 0.4246160686016083, "learning_rate": 8.512443753900603e-06, "loss": 0.3131, "step": 17457 }, { "epoch": 0.8011564407324124, "grad_norm": 0.46327030658721924, "learning_rate": 8.512269250707526e-06, "loss": 0.4237, "step": 17458 }, { "epoch": 0.8012023312376669, "grad_norm": 0.4327425956726074, "learning_rate": 8.51209473906855e-06, "loss": 0.3592, "step": 17459 }, { "epoch": 0.8012482217429214, "grad_norm": 0.4413203299045563, "learning_rate": 8.511920218984094e-06, "loss": 0.3996, "step": 17460 }, { "epoch": 0.8012941122481758, "grad_norm": 0.4644427001476288, "learning_rate": 8.51174569045458e-06, "loss": 0.4113, "step": 17461 }, { "epoch": 0.8013400027534303, "grad_norm": 0.46664342284202576, "learning_rate": 8.511571153480424e-06, "loss": 0.3955, "step": 17462 }, { "epoch": 0.8013858932586848, "grad_norm": 0.4471636414527893, "learning_rate": 8.511396608062048e-06, "loss": 0.4296, "step": 17463 }, { "epoch": 0.8014317837639392, "grad_norm": 0.47241243720054626, "learning_rate": 8.511222054199872e-06, "loss": 0.4212, "step": 17464 }, { "epoch": 0.8014776742691937, "grad_norm": 0.4173448383808136, "learning_rate": 8.511047491894315e-06, "loss": 0.3196, "step": 17465 }, { "epoch": 0.8015235647744482, "grad_norm": 0.4807930588722229, "learning_rate": 8.510872921145798e-06, "loss": 0.4352, "step": 17466 }, { "epoch": 0.8015694552797026, "grad_norm": 0.5392604470252991, "learning_rate": 8.51069834195474e-06, "loss": 0.4899, "step": 17467 }, { "epoch": 0.8016153457849571, "grad_norm": 0.4549521803855896, "learning_rate": 8.510523754321558e-06, "loss": 0.3699, "step": 17468 }, { "epoch": 0.8016612362902116, "grad_norm": 0.4334564805030823, "learning_rate": 8.510349158246676e-06, "loss": 0.3577, "step": 17469 }, { "epoch": 0.801707126795466, "grad_norm": 0.43394628167152405, "learning_rate": 8.51017455373051e-06, "loss": 0.3687, "step": 17470 }, { "epoch": 0.8017530173007205, "grad_norm": 0.4640337824821472, "learning_rate": 8.509999940773484e-06, "loss": 0.4002, "step": 17471 }, { "epoch": 0.801798907805975, "grad_norm": 0.47587671875953674, "learning_rate": 8.509825319376017e-06, "loss": 0.4445, "step": 17472 }, { "epoch": 0.8018447983112295, "grad_norm": 0.44264906644821167, "learning_rate": 8.509650689538527e-06, "loss": 0.3228, "step": 17473 }, { "epoch": 0.8018906888164838, "grad_norm": 0.5028887391090393, "learning_rate": 8.509476051261433e-06, "loss": 0.4662, "step": 17474 }, { "epoch": 0.8019365793217383, "grad_norm": 0.47195491194725037, "learning_rate": 8.509301404545158e-06, "loss": 0.4313, "step": 17475 }, { "epoch": 0.8019824698269928, "grad_norm": 0.5014392137527466, "learning_rate": 8.50912674939012e-06, "loss": 0.3277, "step": 17476 }, { "epoch": 0.8020283603322472, "grad_norm": 0.4536079168319702, "learning_rate": 8.50895208579674e-06, "loss": 0.3755, "step": 17477 }, { "epoch": 0.8020742508375017, "grad_norm": 0.46835607290267944, "learning_rate": 8.508777413765438e-06, "loss": 0.4183, "step": 17478 }, { "epoch": 0.8021201413427562, "grad_norm": 0.42910054326057434, "learning_rate": 8.508602733296635e-06, "loss": 0.3474, "step": 17479 }, { "epoch": 0.8021660318480106, "grad_norm": 0.4519830346107483, "learning_rate": 8.508428044390748e-06, "loss": 0.3767, "step": 17480 }, { "epoch": 0.8022119223532651, "grad_norm": 0.44371291995048523, "learning_rate": 8.5082533470482e-06, "loss": 0.3803, "step": 17481 }, { "epoch": 0.8022578128585196, "grad_norm": 0.42579391598701477, "learning_rate": 8.508078641269409e-06, "loss": 0.3284, "step": 17482 }, { "epoch": 0.802303703363774, "grad_norm": 0.4859975278377533, "learning_rate": 8.507903927054798e-06, "loss": 0.489, "step": 17483 }, { "epoch": 0.8023495938690285, "grad_norm": 0.43391838669776917, "learning_rate": 8.507729204404782e-06, "loss": 0.4006, "step": 17484 }, { "epoch": 0.802395484374283, "grad_norm": 0.43761229515075684, "learning_rate": 8.507554473319785e-06, "loss": 0.3717, "step": 17485 }, { "epoch": 0.8024413748795374, "grad_norm": 0.4543003737926483, "learning_rate": 8.507379733800228e-06, "loss": 0.3859, "step": 17486 }, { "epoch": 0.8024872653847919, "grad_norm": 0.4755701720714569, "learning_rate": 8.50720498584653e-06, "loss": 0.3691, "step": 17487 }, { "epoch": 0.8025331558900464, "grad_norm": 0.4766960144042969, "learning_rate": 8.507030229459108e-06, "loss": 0.3994, "step": 17488 }, { "epoch": 0.8025790463953009, "grad_norm": 0.4752466678619385, "learning_rate": 8.506855464638388e-06, "loss": 0.4749, "step": 17489 }, { "epoch": 0.8026249369005553, "grad_norm": 0.40678051114082336, "learning_rate": 8.506680691384786e-06, "loss": 0.3033, "step": 17490 }, { "epoch": 0.8026708274058098, "grad_norm": 0.4832918643951416, "learning_rate": 8.506505909698724e-06, "loss": 0.4608, "step": 17491 }, { "epoch": 0.8027167179110642, "grad_norm": 0.48920491337776184, "learning_rate": 8.506331119580623e-06, "loss": 0.4157, "step": 17492 }, { "epoch": 0.8027626084163186, "grad_norm": 0.49876484274864197, "learning_rate": 8.506156321030903e-06, "loss": 0.5038, "step": 17493 }, { "epoch": 0.8028084989215731, "grad_norm": 0.457685649394989, "learning_rate": 8.505981514049982e-06, "loss": 0.4347, "step": 17494 }, { "epoch": 0.8028543894268276, "grad_norm": 0.4268772602081299, "learning_rate": 8.505806698638281e-06, "loss": 0.3287, "step": 17495 }, { "epoch": 0.802900279932082, "grad_norm": 0.42562031745910645, "learning_rate": 8.505631874796223e-06, "loss": 0.2905, "step": 17496 }, { "epoch": 0.8029461704373365, "grad_norm": 0.4237743318080902, "learning_rate": 8.505457042524226e-06, "loss": 0.3442, "step": 17497 }, { "epoch": 0.802992060942591, "grad_norm": 0.4356409013271332, "learning_rate": 8.50528220182271e-06, "loss": 0.3755, "step": 17498 }, { "epoch": 0.8030379514478454, "grad_norm": 0.4810279309749603, "learning_rate": 8.505107352692098e-06, "loss": 0.4423, "step": 17499 }, { "epoch": 0.8030838419530999, "grad_norm": 0.47642117738723755, "learning_rate": 8.50493249513281e-06, "loss": 0.4336, "step": 17500 }, { "epoch": 0.8031297324583544, "grad_norm": 0.4263390898704529, "learning_rate": 8.504757629145264e-06, "loss": 0.3394, "step": 17501 }, { "epoch": 0.8031756229636088, "grad_norm": 0.41579416394233704, "learning_rate": 8.504582754729882e-06, "loss": 0.3482, "step": 17502 }, { "epoch": 0.8032215134688633, "grad_norm": 0.5019678473472595, "learning_rate": 8.504407871887086e-06, "loss": 0.4498, "step": 17503 }, { "epoch": 0.8032674039741178, "grad_norm": 0.4784577190876007, "learning_rate": 8.504232980617293e-06, "loss": 0.4234, "step": 17504 }, { "epoch": 0.8033132944793722, "grad_norm": 0.4198947846889496, "learning_rate": 8.504058080920929e-06, "loss": 0.3387, "step": 17505 }, { "epoch": 0.8033591849846267, "grad_norm": 0.4566434621810913, "learning_rate": 8.503883172798408e-06, "loss": 0.3935, "step": 17506 }, { "epoch": 0.8034050754898812, "grad_norm": 0.4633612632751465, "learning_rate": 8.503708256250155e-06, "loss": 0.4872, "step": 17507 }, { "epoch": 0.8034509659951357, "grad_norm": 0.41579049825668335, "learning_rate": 8.50353333127659e-06, "loss": 0.346, "step": 17508 }, { "epoch": 0.80349685650039, "grad_norm": 0.44550225138664246, "learning_rate": 8.503358397878132e-06, "loss": 0.3658, "step": 17509 }, { "epoch": 0.8035427470056445, "grad_norm": 0.46103397011756897, "learning_rate": 8.503183456055202e-06, "loss": 0.4376, "step": 17510 }, { "epoch": 0.803588637510899, "grad_norm": 0.44786983728408813, "learning_rate": 8.503008505808223e-06, "loss": 0.3436, "step": 17511 }, { "epoch": 0.8036345280161534, "grad_norm": 0.41981229186058044, "learning_rate": 8.502833547137615e-06, "loss": 0.3341, "step": 17512 }, { "epoch": 0.8036804185214079, "grad_norm": 0.43122565746307373, "learning_rate": 8.502658580043796e-06, "loss": 0.3309, "step": 17513 }, { "epoch": 0.8037263090266624, "grad_norm": 0.5017602443695068, "learning_rate": 8.50248360452719e-06, "loss": 0.4463, "step": 17514 }, { "epoch": 0.8037721995319168, "grad_norm": 0.43788447976112366, "learning_rate": 8.502308620588215e-06, "loss": 0.3788, "step": 17515 }, { "epoch": 0.8038180900371713, "grad_norm": 0.4142209589481354, "learning_rate": 8.502133628227294e-06, "loss": 0.3728, "step": 17516 }, { "epoch": 0.8038639805424258, "grad_norm": 0.460007905960083, "learning_rate": 8.501958627444846e-06, "loss": 0.3887, "step": 17517 }, { "epoch": 0.8039098710476802, "grad_norm": 0.4433433711528778, "learning_rate": 8.501783618241292e-06, "loss": 0.3426, "step": 17518 }, { "epoch": 0.8039557615529347, "grad_norm": 0.4799129366874695, "learning_rate": 8.501608600617057e-06, "loss": 0.4972, "step": 17519 }, { "epoch": 0.8040016520581892, "grad_norm": 0.47678110003471375, "learning_rate": 8.501433574572555e-06, "loss": 0.4419, "step": 17520 }, { "epoch": 0.8040475425634436, "grad_norm": 0.4584716558456421, "learning_rate": 8.501258540108212e-06, "loss": 0.3936, "step": 17521 }, { "epoch": 0.8040934330686981, "grad_norm": 0.5016146898269653, "learning_rate": 8.501083497224446e-06, "loss": 0.5283, "step": 17522 }, { "epoch": 0.8041393235739526, "grad_norm": 0.4731155037879944, "learning_rate": 8.500908445921681e-06, "loss": 0.4581, "step": 17523 }, { "epoch": 0.8041852140792071, "grad_norm": 0.5109710693359375, "learning_rate": 8.500733386200336e-06, "loss": 0.4965, "step": 17524 }, { "epoch": 0.8042311045844615, "grad_norm": 0.5115070343017578, "learning_rate": 8.50055831806083e-06, "loss": 0.5215, "step": 17525 }, { "epoch": 0.804276995089716, "grad_norm": 0.4108628034591675, "learning_rate": 8.500383241503589e-06, "loss": 0.2982, "step": 17526 }, { "epoch": 0.8043228855949704, "grad_norm": 0.4349287748336792, "learning_rate": 8.500208156529028e-06, "loss": 0.3516, "step": 17527 }, { "epoch": 0.8043687761002248, "grad_norm": 0.4549620747566223, "learning_rate": 8.500033063137574e-06, "loss": 0.383, "step": 17528 }, { "epoch": 0.8044146666054793, "grad_norm": 0.4648036062717438, "learning_rate": 8.499857961329642e-06, "loss": 0.4123, "step": 17529 }, { "epoch": 0.8044605571107338, "grad_norm": 0.4485704004764557, "learning_rate": 8.49968285110566e-06, "loss": 0.3971, "step": 17530 }, { "epoch": 0.8045064476159882, "grad_norm": 0.46702978014945984, "learning_rate": 8.499507732466042e-06, "loss": 0.3784, "step": 17531 }, { "epoch": 0.8045523381212427, "grad_norm": 0.46648073196411133, "learning_rate": 8.499332605411213e-06, "loss": 0.4191, "step": 17532 }, { "epoch": 0.8045982286264972, "grad_norm": 0.4410386383533478, "learning_rate": 8.499157469941595e-06, "loss": 0.3697, "step": 17533 }, { "epoch": 0.8046441191317516, "grad_norm": 0.41777467727661133, "learning_rate": 8.498982326057605e-06, "loss": 0.3146, "step": 17534 }, { "epoch": 0.8046900096370061, "grad_norm": 0.4639178216457367, "learning_rate": 8.49880717375967e-06, "loss": 0.4536, "step": 17535 }, { "epoch": 0.8047359001422606, "grad_norm": 0.4542098343372345, "learning_rate": 8.498632013048207e-06, "loss": 0.4077, "step": 17536 }, { "epoch": 0.804781790647515, "grad_norm": 0.4633980095386505, "learning_rate": 8.498456843923636e-06, "loss": 0.4125, "step": 17537 }, { "epoch": 0.8048276811527695, "grad_norm": 0.4290273189544678, "learning_rate": 8.498281666386384e-06, "loss": 0.4065, "step": 17538 }, { "epoch": 0.804873571658024, "grad_norm": 0.41363418102264404, "learning_rate": 8.498106480436865e-06, "loss": 0.3374, "step": 17539 }, { "epoch": 0.8049194621632784, "grad_norm": 0.4671345353126526, "learning_rate": 8.497931286075507e-06, "loss": 0.4141, "step": 17540 }, { "epoch": 0.8049653526685329, "grad_norm": 0.48397475481033325, "learning_rate": 8.497756083302726e-06, "loss": 0.4576, "step": 17541 }, { "epoch": 0.8050112431737874, "grad_norm": 0.5039771795272827, "learning_rate": 8.497580872118946e-06, "loss": 0.4963, "step": 17542 }, { "epoch": 0.8050571336790419, "grad_norm": 0.4717659056186676, "learning_rate": 8.497405652524589e-06, "loss": 0.3314, "step": 17543 }, { "epoch": 0.8051030241842962, "grad_norm": 0.4212946891784668, "learning_rate": 8.497230424520075e-06, "loss": 0.3366, "step": 17544 }, { "epoch": 0.8051489146895507, "grad_norm": 0.4275185465812683, "learning_rate": 8.497055188105825e-06, "loss": 0.3431, "step": 17545 }, { "epoch": 0.8051948051948052, "grad_norm": 0.4715089499950409, "learning_rate": 8.49687994328226e-06, "loss": 0.4586, "step": 17546 }, { "epoch": 0.8052406957000596, "grad_norm": 0.45079877972602844, "learning_rate": 8.496704690049804e-06, "loss": 0.3992, "step": 17547 }, { "epoch": 0.8052865862053141, "grad_norm": 0.41887596249580383, "learning_rate": 8.496529428408877e-06, "loss": 0.3294, "step": 17548 }, { "epoch": 0.8053324767105686, "grad_norm": 0.4424496591091156, "learning_rate": 8.496354158359899e-06, "loss": 0.3757, "step": 17549 }, { "epoch": 0.805378367215823, "grad_norm": 0.42896175384521484, "learning_rate": 8.496178879903294e-06, "loss": 0.3755, "step": 17550 }, { "epoch": 0.8054242577210775, "grad_norm": 0.44552144408226013, "learning_rate": 8.49600359303948e-06, "loss": 0.4048, "step": 17551 }, { "epoch": 0.805470148226332, "grad_norm": 0.46317818760871887, "learning_rate": 8.495828297768884e-06, "loss": 0.3982, "step": 17552 }, { "epoch": 0.8055160387315864, "grad_norm": 0.48188456892967224, "learning_rate": 8.495652994091922e-06, "loss": 0.4211, "step": 17553 }, { "epoch": 0.8055619292368409, "grad_norm": 0.4468204975128174, "learning_rate": 8.495477682009019e-06, "loss": 0.3773, "step": 17554 }, { "epoch": 0.8056078197420954, "grad_norm": 0.426616370677948, "learning_rate": 8.495302361520594e-06, "loss": 0.3708, "step": 17555 }, { "epoch": 0.8056537102473498, "grad_norm": 0.4044811427593231, "learning_rate": 8.49512703262707e-06, "loss": 0.3229, "step": 17556 }, { "epoch": 0.8056996007526043, "grad_norm": 0.46874159574508667, "learning_rate": 8.49495169532887e-06, "loss": 0.4263, "step": 17557 }, { "epoch": 0.8057454912578588, "grad_norm": 0.4282771348953247, "learning_rate": 8.494776349626413e-06, "loss": 0.3676, "step": 17558 }, { "epoch": 0.8057913817631132, "grad_norm": 0.4589526951313019, "learning_rate": 8.494600995520122e-06, "loss": 0.3943, "step": 17559 }, { "epoch": 0.8058372722683677, "grad_norm": 0.44682082533836365, "learning_rate": 8.49442563301042e-06, "loss": 0.3813, "step": 17560 }, { "epoch": 0.8058831627736222, "grad_norm": 0.4717269241809845, "learning_rate": 8.494250262097726e-06, "loss": 0.3924, "step": 17561 }, { "epoch": 0.8059290532788767, "grad_norm": 0.4556524455547333, "learning_rate": 8.494074882782462e-06, "loss": 0.403, "step": 17562 }, { "epoch": 0.805974943784131, "grad_norm": 0.45353206992149353, "learning_rate": 8.493899495065053e-06, "loss": 0.4196, "step": 17563 }, { "epoch": 0.8060208342893855, "grad_norm": 0.416664719581604, "learning_rate": 8.493724098945917e-06, "loss": 0.3681, "step": 17564 }, { "epoch": 0.80606672479464, "grad_norm": 0.45668983459472656, "learning_rate": 8.493548694425478e-06, "loss": 0.3924, "step": 17565 }, { "epoch": 0.8061126152998944, "grad_norm": 0.429853230714798, "learning_rate": 8.493373281504156e-06, "loss": 0.3456, "step": 17566 }, { "epoch": 0.8061585058051489, "grad_norm": 0.4246322512626648, "learning_rate": 8.493197860182374e-06, "loss": 0.3459, "step": 17567 }, { "epoch": 0.8062043963104034, "grad_norm": 0.5401395559310913, "learning_rate": 8.493022430460553e-06, "loss": 0.5133, "step": 17568 }, { "epoch": 0.8062502868156578, "grad_norm": 0.5249103307723999, "learning_rate": 8.492846992339117e-06, "loss": 0.5732, "step": 17569 }, { "epoch": 0.8062961773209123, "grad_norm": 0.49460428953170776, "learning_rate": 8.492671545818485e-06, "loss": 0.5163, "step": 17570 }, { "epoch": 0.8063420678261668, "grad_norm": 0.4316990077495575, "learning_rate": 8.492496090899081e-06, "loss": 0.3835, "step": 17571 }, { "epoch": 0.8063879583314212, "grad_norm": 0.4703753888607025, "learning_rate": 8.492320627581325e-06, "loss": 0.4047, "step": 17572 }, { "epoch": 0.8064338488366757, "grad_norm": 0.4622040092945099, "learning_rate": 8.492145155865642e-06, "loss": 0.4116, "step": 17573 }, { "epoch": 0.8064797393419302, "grad_norm": 0.4789207875728607, "learning_rate": 8.491969675752451e-06, "loss": 0.4588, "step": 17574 }, { "epoch": 0.8065256298471846, "grad_norm": 0.46948862075805664, "learning_rate": 8.491794187242175e-06, "loss": 0.4119, "step": 17575 }, { "epoch": 0.8065715203524391, "grad_norm": 0.4332551062107086, "learning_rate": 8.491618690335238e-06, "loss": 0.4058, "step": 17576 }, { "epoch": 0.8066174108576936, "grad_norm": 0.48013734817504883, "learning_rate": 8.491443185032058e-06, "loss": 0.4626, "step": 17577 }, { "epoch": 0.8066633013629481, "grad_norm": 0.44204577803611755, "learning_rate": 8.491267671333058e-06, "loss": 0.4212, "step": 17578 }, { "epoch": 0.8067091918682024, "grad_norm": 0.4621376097202301, "learning_rate": 8.491092149238663e-06, "loss": 0.3937, "step": 17579 }, { "epoch": 0.8067550823734569, "grad_norm": 0.4336664080619812, "learning_rate": 8.490916618749292e-06, "loss": 0.4195, "step": 17580 }, { "epoch": 0.8068009728787114, "grad_norm": 0.5178913474082947, "learning_rate": 8.490741079865368e-06, "loss": 0.5187, "step": 17581 }, { "epoch": 0.8068468633839658, "grad_norm": 0.4744816720485687, "learning_rate": 8.490565532587316e-06, "loss": 0.4203, "step": 17582 }, { "epoch": 0.8068927538892203, "grad_norm": 0.440844863653183, "learning_rate": 8.490389976915554e-06, "loss": 0.3704, "step": 17583 }, { "epoch": 0.8069386443944748, "grad_norm": 0.4821132719516754, "learning_rate": 8.490214412850505e-06, "loss": 0.4639, "step": 17584 }, { "epoch": 0.8069845348997292, "grad_norm": 0.4527164101600647, "learning_rate": 8.49003884039259e-06, "loss": 0.3881, "step": 17585 }, { "epoch": 0.8070304254049837, "grad_norm": 0.44395074248313904, "learning_rate": 8.489863259542239e-06, "loss": 0.3855, "step": 17586 }, { "epoch": 0.8070763159102382, "grad_norm": 0.45803093910217285, "learning_rate": 8.489687670299864e-06, "loss": 0.4374, "step": 17587 }, { "epoch": 0.8071222064154926, "grad_norm": 0.4702085852622986, "learning_rate": 8.489512072665893e-06, "loss": 0.3696, "step": 17588 }, { "epoch": 0.8071680969207471, "grad_norm": 0.4370243549346924, "learning_rate": 8.489336466640746e-06, "loss": 0.3497, "step": 17589 }, { "epoch": 0.8072139874260016, "grad_norm": 0.43086567521095276, "learning_rate": 8.489160852224845e-06, "loss": 0.3518, "step": 17590 }, { "epoch": 0.807259877931256, "grad_norm": 0.4483693242073059, "learning_rate": 8.488985229418616e-06, "loss": 0.4225, "step": 17591 }, { "epoch": 0.8073057684365105, "grad_norm": 0.5005820989608765, "learning_rate": 8.488809598222476e-06, "loss": 0.4964, "step": 17592 }, { "epoch": 0.807351658941765, "grad_norm": 0.4495147168636322, "learning_rate": 8.488633958636852e-06, "loss": 0.4018, "step": 17593 }, { "epoch": 0.8073975494470194, "grad_norm": 0.5154932737350464, "learning_rate": 8.488458310662164e-06, "loss": 0.3851, "step": 17594 }, { "epoch": 0.8074434399522739, "grad_norm": 0.45882177352905273, "learning_rate": 8.488282654298834e-06, "loss": 0.4501, "step": 17595 }, { "epoch": 0.8074893304575284, "grad_norm": 0.4814545214176178, "learning_rate": 8.488106989547285e-06, "loss": 0.4558, "step": 17596 }, { "epoch": 0.8075352209627829, "grad_norm": 0.4598345160484314, "learning_rate": 8.48793131640794e-06, "loss": 0.4113, "step": 17597 }, { "epoch": 0.8075811114680372, "grad_norm": 0.4630196988582611, "learning_rate": 8.487755634881222e-06, "loss": 0.3791, "step": 17598 }, { "epoch": 0.8076270019732917, "grad_norm": 0.4725218415260315, "learning_rate": 8.487579944967551e-06, "loss": 0.4473, "step": 17599 }, { "epoch": 0.8076728924785462, "grad_norm": 0.4276121258735657, "learning_rate": 8.487404246667352e-06, "loss": 0.3427, "step": 17600 }, { "epoch": 0.8077187829838006, "grad_norm": 0.501977801322937, "learning_rate": 8.487228539981045e-06, "loss": 0.5395, "step": 17601 }, { "epoch": 0.8077646734890551, "grad_norm": 0.4523829519748688, "learning_rate": 8.487052824909056e-06, "loss": 0.4354, "step": 17602 }, { "epoch": 0.8078105639943096, "grad_norm": 0.46578773856163025, "learning_rate": 8.486877101451805e-06, "loss": 0.4089, "step": 17603 }, { "epoch": 0.807856454499564, "grad_norm": 0.4490325152873993, "learning_rate": 8.486701369609714e-06, "loss": 0.3935, "step": 17604 }, { "epoch": 0.8079023450048185, "grad_norm": 0.45202866196632385, "learning_rate": 8.486525629383206e-06, "loss": 0.3702, "step": 17605 }, { "epoch": 0.807948235510073, "grad_norm": 0.467622309923172, "learning_rate": 8.486349880772707e-06, "loss": 0.419, "step": 17606 }, { "epoch": 0.8079941260153274, "grad_norm": 0.4658639132976532, "learning_rate": 8.486174123778635e-06, "loss": 0.4304, "step": 17607 }, { "epoch": 0.8080400165205819, "grad_norm": 0.45953649282455444, "learning_rate": 8.485998358401415e-06, "loss": 0.3995, "step": 17608 }, { "epoch": 0.8080859070258364, "grad_norm": 0.42754751443862915, "learning_rate": 8.485822584641467e-06, "loss": 0.3353, "step": 17609 }, { "epoch": 0.8081317975310908, "grad_norm": 0.47774219512939453, "learning_rate": 8.485646802499219e-06, "loss": 0.4575, "step": 17610 }, { "epoch": 0.8081776880363453, "grad_norm": 0.4445607662200928, "learning_rate": 8.485471011975089e-06, "loss": 0.3858, "step": 17611 }, { "epoch": 0.8082235785415998, "grad_norm": 0.5216554999351501, "learning_rate": 8.485295213069503e-06, "loss": 0.5137, "step": 17612 }, { "epoch": 0.8082694690468543, "grad_norm": 0.6023508310317993, "learning_rate": 8.48511940578288e-06, "loss": 0.3842, "step": 17613 }, { "epoch": 0.8083153595521086, "grad_norm": 0.4393535554409027, "learning_rate": 8.484943590115644e-06, "loss": 0.3455, "step": 17614 }, { "epoch": 0.8083612500573631, "grad_norm": 0.43418657779693604, "learning_rate": 8.48476776606822e-06, "loss": 0.407, "step": 17615 }, { "epoch": 0.8084071405626176, "grad_norm": 0.4357938766479492, "learning_rate": 8.48459193364103e-06, "loss": 0.3664, "step": 17616 }, { "epoch": 0.808453031067872, "grad_norm": 0.4474564492702484, "learning_rate": 8.484416092834496e-06, "loss": 0.3923, "step": 17617 }, { "epoch": 0.8084989215731265, "grad_norm": 0.46380454301834106, "learning_rate": 8.48424024364904e-06, "loss": 0.417, "step": 17618 }, { "epoch": 0.808544812078381, "grad_norm": 0.4853716194629669, "learning_rate": 8.484064386085087e-06, "loss": 0.5074, "step": 17619 }, { "epoch": 0.8085907025836354, "grad_norm": 0.46699202060699463, "learning_rate": 8.483888520143057e-06, "loss": 0.4481, "step": 17620 }, { "epoch": 0.8086365930888899, "grad_norm": 0.5123815536499023, "learning_rate": 8.483712645823375e-06, "loss": 0.4693, "step": 17621 }, { "epoch": 0.8086824835941444, "grad_norm": 0.472379595041275, "learning_rate": 8.483536763126466e-06, "loss": 0.4485, "step": 17622 }, { "epoch": 0.8087283740993988, "grad_norm": 0.4936237037181854, "learning_rate": 8.483360872052748e-06, "loss": 0.4482, "step": 17623 }, { "epoch": 0.8087742646046533, "grad_norm": 0.4686533510684967, "learning_rate": 8.483184972602647e-06, "loss": 0.4409, "step": 17624 }, { "epoch": 0.8088201551099078, "grad_norm": 0.44081053137779236, "learning_rate": 8.483009064776587e-06, "loss": 0.3979, "step": 17625 }, { "epoch": 0.8088660456151622, "grad_norm": 0.5010126233100891, "learning_rate": 8.482833148574988e-06, "loss": 0.4262, "step": 17626 }, { "epoch": 0.8089119361204167, "grad_norm": 0.4746077358722687, "learning_rate": 8.482657223998275e-06, "loss": 0.4347, "step": 17627 }, { "epoch": 0.8089578266256712, "grad_norm": 0.46039292216300964, "learning_rate": 8.482481291046869e-06, "loss": 0.4191, "step": 17628 }, { "epoch": 0.8090037171309256, "grad_norm": 0.45977118611335754, "learning_rate": 8.482305349721197e-06, "loss": 0.4037, "step": 17629 }, { "epoch": 0.8090496076361801, "grad_norm": 0.4507550299167633, "learning_rate": 8.482129400021678e-06, "loss": 0.359, "step": 17630 }, { "epoch": 0.8090954981414346, "grad_norm": 0.4625616669654846, "learning_rate": 8.481953441948736e-06, "loss": 0.4245, "step": 17631 }, { "epoch": 0.809141388646689, "grad_norm": 0.4414457678794861, "learning_rate": 8.481777475502797e-06, "loss": 0.3174, "step": 17632 }, { "epoch": 0.8091872791519434, "grad_norm": 0.4520516097545624, "learning_rate": 8.48160150068428e-06, "loss": 0.3905, "step": 17633 }, { "epoch": 0.8092331696571979, "grad_norm": 0.4836430549621582, "learning_rate": 8.481425517493613e-06, "loss": 0.4242, "step": 17634 }, { "epoch": 0.8092790601624524, "grad_norm": 0.5088282227516174, "learning_rate": 8.481249525931214e-06, "loss": 0.4561, "step": 17635 }, { "epoch": 0.8093249506677068, "grad_norm": 0.4247996509075165, "learning_rate": 8.48107352599751e-06, "loss": 0.3605, "step": 17636 }, { "epoch": 0.8093708411729613, "grad_norm": 0.4483327865600586, "learning_rate": 8.480897517692923e-06, "loss": 0.4181, "step": 17637 }, { "epoch": 0.8094167316782158, "grad_norm": 0.44462546706199646, "learning_rate": 8.480721501017875e-06, "loss": 0.3928, "step": 17638 }, { "epoch": 0.8094626221834702, "grad_norm": 0.47163113951683044, "learning_rate": 8.48054547597279e-06, "loss": 0.4427, "step": 17639 }, { "epoch": 0.8095085126887247, "grad_norm": 0.46032679080963135, "learning_rate": 8.480369442558092e-06, "loss": 0.3676, "step": 17640 }, { "epoch": 0.8095544031939792, "grad_norm": 0.4542371928691864, "learning_rate": 8.480193400774205e-06, "loss": 0.384, "step": 17641 }, { "epoch": 0.8096002936992336, "grad_norm": 0.5070081353187561, "learning_rate": 8.48001735062155e-06, "loss": 0.413, "step": 17642 }, { "epoch": 0.8096461842044881, "grad_norm": 0.43948009610176086, "learning_rate": 8.479841292100552e-06, "loss": 0.3536, "step": 17643 }, { "epoch": 0.8096920747097426, "grad_norm": 0.4311222434043884, "learning_rate": 8.479665225211634e-06, "loss": 0.3184, "step": 17644 }, { "epoch": 0.809737965214997, "grad_norm": 0.5065967440605164, "learning_rate": 8.47948914995522e-06, "loss": 0.4688, "step": 17645 }, { "epoch": 0.8097838557202515, "grad_norm": 0.45486101508140564, "learning_rate": 8.479313066331732e-06, "loss": 0.3618, "step": 17646 }, { "epoch": 0.809829746225506, "grad_norm": 0.4497266113758087, "learning_rate": 8.479136974341594e-06, "loss": 0.384, "step": 17647 }, { "epoch": 0.8098756367307604, "grad_norm": 0.46375998854637146, "learning_rate": 8.478960873985231e-06, "loss": 0.4549, "step": 17648 }, { "epoch": 0.8099215272360148, "grad_norm": 0.4329161047935486, "learning_rate": 8.478784765263064e-06, "loss": 0.3388, "step": 17649 }, { "epoch": 0.8099674177412693, "grad_norm": 0.48870131373405457, "learning_rate": 8.478608648175518e-06, "loss": 0.511, "step": 17650 }, { "epoch": 0.8100133082465238, "grad_norm": 0.48089170455932617, "learning_rate": 8.478432522723015e-06, "loss": 0.4801, "step": 17651 }, { "epoch": 0.8100591987517782, "grad_norm": 0.45487064123153687, "learning_rate": 8.47825638890598e-06, "loss": 0.4623, "step": 17652 }, { "epoch": 0.8101050892570327, "grad_norm": 0.45531803369522095, "learning_rate": 8.478080246724837e-06, "loss": 0.3652, "step": 17653 }, { "epoch": 0.8101509797622872, "grad_norm": 0.44705572724342346, "learning_rate": 8.477904096180008e-06, "loss": 0.3457, "step": 17654 }, { "epoch": 0.8101968702675416, "grad_norm": 0.4420647919178009, "learning_rate": 8.477727937271918e-06, "loss": 0.374, "step": 17655 }, { "epoch": 0.8102427607727961, "grad_norm": 0.4088599383831024, "learning_rate": 8.477551770000989e-06, "loss": 0.3002, "step": 17656 }, { "epoch": 0.8102886512780506, "grad_norm": 0.447159081697464, "learning_rate": 8.477375594367645e-06, "loss": 0.372, "step": 17657 }, { "epoch": 0.810334541783305, "grad_norm": 0.5326886177062988, "learning_rate": 8.477199410372313e-06, "loss": 0.5348, "step": 17658 }, { "epoch": 0.8103804322885595, "grad_norm": 0.4563516676425934, "learning_rate": 8.477023218015411e-06, "loss": 0.4119, "step": 17659 }, { "epoch": 0.810426322793814, "grad_norm": 0.5115314722061157, "learning_rate": 8.476847017297367e-06, "loss": 0.364, "step": 17660 }, { "epoch": 0.8104722132990684, "grad_norm": 0.4515569806098938, "learning_rate": 8.476670808218602e-06, "loss": 0.3922, "step": 17661 }, { "epoch": 0.8105181038043229, "grad_norm": 0.48099735379219055, "learning_rate": 8.476494590779542e-06, "loss": 0.4781, "step": 17662 }, { "epoch": 0.8105639943095774, "grad_norm": 0.442349910736084, "learning_rate": 8.47631836498061e-06, "loss": 0.3612, "step": 17663 }, { "epoch": 0.8106098848148318, "grad_norm": 0.4784599542617798, "learning_rate": 8.476142130822228e-06, "loss": 0.3984, "step": 17664 }, { "epoch": 0.8106557753200863, "grad_norm": 0.40242910385131836, "learning_rate": 8.475965888304823e-06, "loss": 0.3235, "step": 17665 }, { "epoch": 0.8107016658253408, "grad_norm": 0.4843137562274933, "learning_rate": 8.475789637428816e-06, "loss": 0.4081, "step": 17666 }, { "epoch": 0.8107475563305953, "grad_norm": 0.42413055896759033, "learning_rate": 8.475613378194632e-06, "loss": 0.3606, "step": 17667 }, { "epoch": 0.8107934468358496, "grad_norm": 0.45493200421333313, "learning_rate": 8.475437110602696e-06, "loss": 0.4236, "step": 17668 }, { "epoch": 0.8108393373411041, "grad_norm": 0.4516465365886688, "learning_rate": 8.47526083465343e-06, "loss": 0.3909, "step": 17669 }, { "epoch": 0.8108852278463586, "grad_norm": 0.4620436728000641, "learning_rate": 8.475084550347256e-06, "loss": 0.3587, "step": 17670 }, { "epoch": 0.810931118351613, "grad_norm": 0.46484631299972534, "learning_rate": 8.474908257684605e-06, "loss": 0.4072, "step": 17671 }, { "epoch": 0.8109770088568675, "grad_norm": 0.4686445891857147, "learning_rate": 8.474731956665892e-06, "loss": 0.3783, "step": 17672 }, { "epoch": 0.811022899362122, "grad_norm": 0.46818459033966064, "learning_rate": 8.474555647291549e-06, "loss": 0.3592, "step": 17673 }, { "epoch": 0.8110687898673764, "grad_norm": 0.425287127494812, "learning_rate": 8.474379329561994e-06, "loss": 0.3205, "step": 17674 }, { "epoch": 0.8111146803726309, "grad_norm": 0.4470694363117218, "learning_rate": 8.474203003477652e-06, "loss": 0.3387, "step": 17675 }, { "epoch": 0.8111605708778854, "grad_norm": 0.42304760217666626, "learning_rate": 8.47402666903895e-06, "loss": 0.337, "step": 17676 }, { "epoch": 0.8112064613831398, "grad_norm": 0.46694934368133545, "learning_rate": 8.473850326246312e-06, "loss": 0.4201, "step": 17677 }, { "epoch": 0.8112523518883943, "grad_norm": 0.4868218004703522, "learning_rate": 8.473673975100159e-06, "loss": 0.4813, "step": 17678 }, { "epoch": 0.8112982423936488, "grad_norm": 0.4300846457481384, "learning_rate": 8.473497615600915e-06, "loss": 0.3997, "step": 17679 }, { "epoch": 0.8113441328989032, "grad_norm": 0.4250863492488861, "learning_rate": 8.473321247749007e-06, "loss": 0.3252, "step": 17680 }, { "epoch": 0.8113900234041577, "grad_norm": 0.886294960975647, "learning_rate": 8.473144871544857e-06, "loss": 0.3889, "step": 17681 }, { "epoch": 0.8114359139094122, "grad_norm": 0.437728613615036, "learning_rate": 8.47296848698889e-06, "loss": 0.3461, "step": 17682 }, { "epoch": 0.8114818044146666, "grad_norm": 0.47892123460769653, "learning_rate": 8.47279209408153e-06, "loss": 0.4935, "step": 17683 }, { "epoch": 0.811527694919921, "grad_norm": 0.48336225748062134, "learning_rate": 8.472615692823201e-06, "loss": 0.4275, "step": 17684 }, { "epoch": 0.8115735854251755, "grad_norm": 0.4525268077850342, "learning_rate": 8.472439283214326e-06, "loss": 0.4056, "step": 17685 }, { "epoch": 0.81161947593043, "grad_norm": 0.4483889937400818, "learning_rate": 8.472262865255333e-06, "loss": 0.3851, "step": 17686 }, { "epoch": 0.8116653664356844, "grad_norm": 0.4581266939640045, "learning_rate": 8.472086438946642e-06, "loss": 0.3602, "step": 17687 }, { "epoch": 0.8117112569409389, "grad_norm": 0.5165950059890747, "learning_rate": 8.471910004288679e-06, "loss": 0.44, "step": 17688 }, { "epoch": 0.8117571474461934, "grad_norm": 0.4431636333465576, "learning_rate": 8.471733561281867e-06, "loss": 0.3582, "step": 17689 }, { "epoch": 0.8118030379514478, "grad_norm": 0.4877323806285858, "learning_rate": 8.471557109926634e-06, "loss": 0.4592, "step": 17690 }, { "epoch": 0.8118489284567023, "grad_norm": 0.4613015055656433, "learning_rate": 8.4713806502234e-06, "loss": 0.4314, "step": 17691 }, { "epoch": 0.8118948189619568, "grad_norm": 0.5059880018234253, "learning_rate": 8.471204182172593e-06, "loss": 0.4786, "step": 17692 }, { "epoch": 0.8119407094672112, "grad_norm": 0.4817078709602356, "learning_rate": 8.471027705774634e-06, "loss": 0.4132, "step": 17693 }, { "epoch": 0.8119865999724657, "grad_norm": 0.46134814620018005, "learning_rate": 8.470851221029949e-06, "loss": 0.4168, "step": 17694 }, { "epoch": 0.8120324904777202, "grad_norm": 0.451264351606369, "learning_rate": 8.470674727938964e-06, "loss": 0.3973, "step": 17695 }, { "epoch": 0.8120783809829746, "grad_norm": 0.4588071405887604, "learning_rate": 8.4704982265021e-06, "loss": 0.3651, "step": 17696 }, { "epoch": 0.8121242714882291, "grad_norm": 0.42154666781425476, "learning_rate": 8.470321716719783e-06, "loss": 0.3349, "step": 17697 }, { "epoch": 0.8121701619934836, "grad_norm": 0.4712468087673187, "learning_rate": 8.470145198592436e-06, "loss": 0.4473, "step": 17698 }, { "epoch": 0.812216052498738, "grad_norm": 0.45039933919906616, "learning_rate": 8.469968672120487e-06, "loss": 0.4269, "step": 17699 }, { "epoch": 0.8122619430039925, "grad_norm": 0.4717012941837311, "learning_rate": 8.469792137304357e-06, "loss": 0.4735, "step": 17700 }, { "epoch": 0.812307833509247, "grad_norm": 0.4294164776802063, "learning_rate": 8.469615594144477e-06, "loss": 0.4119, "step": 17701 }, { "epoch": 0.8123537240145015, "grad_norm": 0.4132399559020996, "learning_rate": 8.46943904264126e-06, "loss": 0.3268, "step": 17702 }, { "epoch": 0.8123996145197558, "grad_norm": 0.5065289735794067, "learning_rate": 8.469262482795139e-06, "loss": 0.4573, "step": 17703 }, { "epoch": 0.8124455050250103, "grad_norm": 0.4541685879230499, "learning_rate": 8.46908591460654e-06, "loss": 0.4235, "step": 17704 }, { "epoch": 0.8124913955302648, "grad_norm": 0.45357418060302734, "learning_rate": 8.468909338075882e-06, "loss": 0.3866, "step": 17705 }, { "epoch": 0.8125372860355192, "grad_norm": 0.41954857110977173, "learning_rate": 8.46873275320359e-06, "loss": 0.3607, "step": 17706 }, { "epoch": 0.8125831765407737, "grad_norm": 0.44689586758613586, "learning_rate": 8.468556159990094e-06, "loss": 0.3671, "step": 17707 }, { "epoch": 0.8126290670460282, "grad_norm": 0.43343621492385864, "learning_rate": 8.468379558435813e-06, "loss": 0.3803, "step": 17708 }, { "epoch": 0.8126749575512826, "grad_norm": 0.4828508794307709, "learning_rate": 8.468202948541173e-06, "loss": 0.4271, "step": 17709 }, { "epoch": 0.8127208480565371, "grad_norm": 0.45076027512550354, "learning_rate": 8.468026330306602e-06, "loss": 0.3199, "step": 17710 }, { "epoch": 0.8127667385617916, "grad_norm": 0.4199490249156952, "learning_rate": 8.46784970373252e-06, "loss": 0.3491, "step": 17711 }, { "epoch": 0.812812629067046, "grad_norm": 0.4521474540233612, "learning_rate": 8.467673068819355e-06, "loss": 0.4007, "step": 17712 }, { "epoch": 0.8128585195723005, "grad_norm": 0.45946773886680603, "learning_rate": 8.467496425567529e-06, "loss": 0.3645, "step": 17713 }, { "epoch": 0.812904410077555, "grad_norm": 0.4599514305591583, "learning_rate": 8.46731977397747e-06, "loss": 0.4248, "step": 17714 }, { "epoch": 0.8129503005828094, "grad_norm": 0.4499518573284149, "learning_rate": 8.467143114049602e-06, "loss": 0.336, "step": 17715 }, { "epoch": 0.8129961910880639, "grad_norm": 0.4637841582298279, "learning_rate": 8.466966445784349e-06, "loss": 0.3748, "step": 17716 }, { "epoch": 0.8130420815933184, "grad_norm": 0.4588336944580078, "learning_rate": 8.466789769182134e-06, "loss": 0.3932, "step": 17717 }, { "epoch": 0.8130879720985728, "grad_norm": 0.4422934651374817, "learning_rate": 8.466613084243386e-06, "loss": 0.3789, "step": 17718 }, { "epoch": 0.8131338626038273, "grad_norm": 0.4200917184352875, "learning_rate": 8.466436390968525e-06, "loss": 0.3412, "step": 17719 }, { "epoch": 0.8131797531090817, "grad_norm": 0.4593302011489868, "learning_rate": 8.466259689357981e-06, "loss": 0.3764, "step": 17720 }, { "epoch": 0.8132256436143362, "grad_norm": 0.44620248675346375, "learning_rate": 8.466082979412175e-06, "loss": 0.3821, "step": 17721 }, { "epoch": 0.8132715341195906, "grad_norm": 0.451384037733078, "learning_rate": 8.465906261131533e-06, "loss": 0.4022, "step": 17722 }, { "epoch": 0.8133174246248451, "grad_norm": 0.48389217257499695, "learning_rate": 8.465729534516482e-06, "loss": 0.4304, "step": 17723 }, { "epoch": 0.8133633151300996, "grad_norm": 0.4236779808998108, "learning_rate": 8.465552799567445e-06, "loss": 0.2994, "step": 17724 }, { "epoch": 0.813409205635354, "grad_norm": 0.4794600307941437, "learning_rate": 8.465376056284846e-06, "loss": 0.482, "step": 17725 }, { "epoch": 0.8134550961406085, "grad_norm": 0.46359068155288696, "learning_rate": 8.465199304669112e-06, "loss": 0.3918, "step": 17726 }, { "epoch": 0.813500986645863, "grad_norm": 0.4240097403526306, "learning_rate": 8.465022544720667e-06, "loss": 0.3488, "step": 17727 }, { "epoch": 0.8135468771511174, "grad_norm": 0.44780975580215454, "learning_rate": 8.464845776439938e-06, "loss": 0.3851, "step": 17728 }, { "epoch": 0.8135927676563719, "grad_norm": 0.4641299545764923, "learning_rate": 8.464668999827345e-06, "loss": 0.3647, "step": 17729 }, { "epoch": 0.8136386581616264, "grad_norm": 0.4356493651866913, "learning_rate": 8.464492214883319e-06, "loss": 0.3396, "step": 17730 }, { "epoch": 0.8136845486668808, "grad_norm": 0.5075135827064514, "learning_rate": 8.464315421608284e-06, "loss": 0.4969, "step": 17731 }, { "epoch": 0.8137304391721353, "grad_norm": 0.4463634490966797, "learning_rate": 8.464138620002662e-06, "loss": 0.431, "step": 17732 }, { "epoch": 0.8137763296773898, "grad_norm": 0.48259538412094116, "learning_rate": 8.46396181006688e-06, "loss": 0.4567, "step": 17733 }, { "epoch": 0.8138222201826442, "grad_norm": 0.46133771538734436, "learning_rate": 8.463784991801362e-06, "loss": 0.409, "step": 17734 }, { "epoch": 0.8138681106878987, "grad_norm": 0.4328539967536926, "learning_rate": 8.463608165206536e-06, "loss": 0.3628, "step": 17735 }, { "epoch": 0.8139140011931532, "grad_norm": 0.4174555242061615, "learning_rate": 8.463431330282826e-06, "loss": 0.3275, "step": 17736 }, { "epoch": 0.8139598916984075, "grad_norm": 0.4667373597621918, "learning_rate": 8.463254487030655e-06, "loss": 0.3951, "step": 17737 }, { "epoch": 0.814005782203662, "grad_norm": 0.4213361442089081, "learning_rate": 8.463077635450452e-06, "loss": 0.3493, "step": 17738 }, { "epoch": 0.8140516727089165, "grad_norm": 0.5281664729118347, "learning_rate": 8.462900775542638e-06, "loss": 0.4382, "step": 17739 }, { "epoch": 0.814097563214171, "grad_norm": 0.46837037801742554, "learning_rate": 8.462723907307641e-06, "loss": 0.3871, "step": 17740 }, { "epoch": 0.8141434537194254, "grad_norm": 0.4668399691581726, "learning_rate": 8.462547030745887e-06, "loss": 0.4469, "step": 17741 }, { "epoch": 0.8141893442246799, "grad_norm": 0.4656510651111603, "learning_rate": 8.4623701458578e-06, "loss": 0.3728, "step": 17742 }, { "epoch": 0.8142352347299344, "grad_norm": 0.4243941903114319, "learning_rate": 8.462193252643805e-06, "loss": 0.3588, "step": 17743 }, { "epoch": 0.8142811252351888, "grad_norm": 0.4399278163909912, "learning_rate": 8.462016351104327e-06, "loss": 0.3382, "step": 17744 }, { "epoch": 0.8143270157404433, "grad_norm": 0.48134341835975647, "learning_rate": 8.461839441239794e-06, "loss": 0.4667, "step": 17745 }, { "epoch": 0.8143729062456978, "grad_norm": 0.42967841029167175, "learning_rate": 8.46166252305063e-06, "loss": 0.3647, "step": 17746 }, { "epoch": 0.8144187967509522, "grad_norm": 0.469441682100296, "learning_rate": 8.461485596537257e-06, "loss": 0.4075, "step": 17747 }, { "epoch": 0.8144646872562067, "grad_norm": 0.43080922961235046, "learning_rate": 8.461308661700107e-06, "loss": 0.3914, "step": 17748 }, { "epoch": 0.8145105777614612, "grad_norm": 0.444606751203537, "learning_rate": 8.461131718539602e-06, "loss": 0.3824, "step": 17749 }, { "epoch": 0.8145564682667156, "grad_norm": 0.4257248044013977, "learning_rate": 8.460954767056166e-06, "loss": 0.3498, "step": 17750 }, { "epoch": 0.8146023587719701, "grad_norm": 0.41601067781448364, "learning_rate": 8.460777807250226e-06, "loss": 0.3451, "step": 17751 }, { "epoch": 0.8146482492772246, "grad_norm": 0.4621587097644806, "learning_rate": 8.460600839122208e-06, "loss": 0.4252, "step": 17752 }, { "epoch": 0.814694139782479, "grad_norm": 0.4444040060043335, "learning_rate": 8.460423862672537e-06, "loss": 0.3813, "step": 17753 }, { "epoch": 0.8147400302877335, "grad_norm": 0.39249908924102783, "learning_rate": 8.46024687790164e-06, "loss": 0.2776, "step": 17754 }, { "epoch": 0.814785920792988, "grad_norm": 0.45111456513404846, "learning_rate": 8.46006988480994e-06, "loss": 0.3825, "step": 17755 }, { "epoch": 0.8148318112982424, "grad_norm": 0.4661438763141632, "learning_rate": 8.459892883397866e-06, "loss": 0.4689, "step": 17756 }, { "epoch": 0.8148777018034968, "grad_norm": 0.48406240344047546, "learning_rate": 8.459715873665839e-06, "loss": 0.4558, "step": 17757 }, { "epoch": 0.8149235923087513, "grad_norm": 0.4763455390930176, "learning_rate": 8.459538855614289e-06, "loss": 0.4329, "step": 17758 }, { "epoch": 0.8149694828140058, "grad_norm": 0.5002901554107666, "learning_rate": 8.45936182924364e-06, "loss": 0.4089, "step": 17759 }, { "epoch": 0.8150153733192602, "grad_norm": 0.46355754137039185, "learning_rate": 8.459184794554316e-06, "loss": 0.4187, "step": 17760 }, { "epoch": 0.8150612638245147, "grad_norm": 0.4148882031440735, "learning_rate": 8.459007751546746e-06, "loss": 0.3309, "step": 17761 }, { "epoch": 0.8151071543297692, "grad_norm": 0.4791349470615387, "learning_rate": 8.458830700221353e-06, "loss": 0.4112, "step": 17762 }, { "epoch": 0.8151530448350236, "grad_norm": 0.4429086744785309, "learning_rate": 8.458653640578564e-06, "loss": 0.3804, "step": 17763 }, { "epoch": 0.8151989353402781, "grad_norm": 0.48680782318115234, "learning_rate": 8.458476572618804e-06, "loss": 0.4349, "step": 17764 }, { "epoch": 0.8152448258455326, "grad_norm": 0.44916871190071106, "learning_rate": 8.4582994963425e-06, "loss": 0.3886, "step": 17765 }, { "epoch": 0.815290716350787, "grad_norm": 0.44826626777648926, "learning_rate": 8.458122411750078e-06, "loss": 0.3648, "step": 17766 }, { "epoch": 0.8153366068560415, "grad_norm": 0.4474397301673889, "learning_rate": 8.457945318841962e-06, "loss": 0.3615, "step": 17767 }, { "epoch": 0.815382497361296, "grad_norm": 0.4609276056289673, "learning_rate": 8.457768217618578e-06, "loss": 0.3807, "step": 17768 }, { "epoch": 0.8154283878665504, "grad_norm": 0.4670076370239258, "learning_rate": 8.457591108080354e-06, "loss": 0.4156, "step": 17769 }, { "epoch": 0.8154742783718049, "grad_norm": 0.4769991636276245, "learning_rate": 8.457413990227714e-06, "loss": 0.469, "step": 17770 }, { "epoch": 0.8155201688770594, "grad_norm": 0.384021133184433, "learning_rate": 8.457236864061085e-06, "loss": 0.2575, "step": 17771 }, { "epoch": 0.8155660593823137, "grad_norm": 0.4797494411468506, "learning_rate": 8.457059729580894e-06, "loss": 0.4431, "step": 17772 }, { "epoch": 0.8156119498875682, "grad_norm": 0.4846058487892151, "learning_rate": 8.456882586787562e-06, "loss": 0.4859, "step": 17773 }, { "epoch": 0.8156578403928227, "grad_norm": 0.48710381984710693, "learning_rate": 8.456705435681522e-06, "loss": 0.4823, "step": 17774 }, { "epoch": 0.8157037308980772, "grad_norm": 0.4556836187839508, "learning_rate": 8.456528276263193e-06, "loss": 0.4011, "step": 17775 }, { "epoch": 0.8157496214033316, "grad_norm": 0.6782156229019165, "learning_rate": 8.456351108533007e-06, "loss": 0.3793, "step": 17776 }, { "epoch": 0.8157955119085861, "grad_norm": 0.4809435307979584, "learning_rate": 8.456173932491387e-06, "loss": 0.3905, "step": 17777 }, { "epoch": 0.8158414024138406, "grad_norm": 0.42793768644332886, "learning_rate": 8.45599674813876e-06, "loss": 0.3663, "step": 17778 }, { "epoch": 0.815887292919095, "grad_norm": 0.45739156007766724, "learning_rate": 8.455819555475548e-06, "loss": 0.4303, "step": 17779 }, { "epoch": 0.8159331834243495, "grad_norm": 0.43945157527923584, "learning_rate": 8.455642354502185e-06, "loss": 0.3708, "step": 17780 }, { "epoch": 0.815979073929604, "grad_norm": 0.5045421123504639, "learning_rate": 8.455465145219091e-06, "loss": 0.379, "step": 17781 }, { "epoch": 0.8160249644348584, "grad_norm": 0.46192875504493713, "learning_rate": 8.455287927626694e-06, "loss": 0.4686, "step": 17782 }, { "epoch": 0.8160708549401129, "grad_norm": 0.48617199063301086, "learning_rate": 8.45511070172542e-06, "loss": 0.4485, "step": 17783 }, { "epoch": 0.8161167454453674, "grad_norm": 0.5151456594467163, "learning_rate": 8.454933467515695e-06, "loss": 0.404, "step": 17784 }, { "epoch": 0.8161626359506218, "grad_norm": 0.5029243230819702, "learning_rate": 8.454756224997945e-06, "loss": 0.4266, "step": 17785 }, { "epoch": 0.8162085264558763, "grad_norm": 0.5284098386764526, "learning_rate": 8.454578974172598e-06, "loss": 0.4798, "step": 17786 }, { "epoch": 0.8162544169611308, "grad_norm": 0.552433431148529, "learning_rate": 8.454401715040077e-06, "loss": 0.4519, "step": 17787 }, { "epoch": 0.8163003074663852, "grad_norm": 0.44040995836257935, "learning_rate": 8.454224447600812e-06, "loss": 0.3602, "step": 17788 }, { "epoch": 0.8163461979716397, "grad_norm": 0.4876677989959717, "learning_rate": 8.454047171855227e-06, "loss": 0.4315, "step": 17789 }, { "epoch": 0.8163920884768942, "grad_norm": 0.5032410025596619, "learning_rate": 8.453869887803747e-06, "loss": 0.5389, "step": 17790 }, { "epoch": 0.8164379789821486, "grad_norm": 0.4923698902130127, "learning_rate": 8.453692595446803e-06, "loss": 0.4377, "step": 17791 }, { "epoch": 0.816483869487403, "grad_norm": 0.44457751512527466, "learning_rate": 8.453515294784815e-06, "loss": 0.3908, "step": 17792 }, { "epoch": 0.8165297599926575, "grad_norm": 0.44208553433418274, "learning_rate": 8.453337985818215e-06, "loss": 0.3672, "step": 17793 }, { "epoch": 0.816575650497912, "grad_norm": 0.45864975452423096, "learning_rate": 8.453160668547426e-06, "loss": 0.4236, "step": 17794 }, { "epoch": 0.8166215410031664, "grad_norm": 0.5064997673034668, "learning_rate": 8.452983342972875e-06, "loss": 0.4952, "step": 17795 }, { "epoch": 0.8166674315084209, "grad_norm": 0.473934531211853, "learning_rate": 8.452806009094992e-06, "loss": 0.4459, "step": 17796 }, { "epoch": 0.8167133220136754, "grad_norm": 0.47915735840797424, "learning_rate": 8.452628666914197e-06, "loss": 0.4555, "step": 17797 }, { "epoch": 0.8167592125189298, "grad_norm": 0.41730380058288574, "learning_rate": 8.452451316430921e-06, "loss": 0.3159, "step": 17798 }, { "epoch": 0.8168051030241843, "grad_norm": 0.44993406534194946, "learning_rate": 8.452273957645589e-06, "loss": 0.4068, "step": 17799 }, { "epoch": 0.8168509935294388, "grad_norm": 0.4649324119091034, "learning_rate": 8.452096590558627e-06, "loss": 0.4428, "step": 17800 }, { "epoch": 0.8168968840346932, "grad_norm": 0.49509313702583313, "learning_rate": 8.451919215170463e-06, "loss": 0.4426, "step": 17801 }, { "epoch": 0.8169427745399477, "grad_norm": 0.4350048899650574, "learning_rate": 8.451741831481522e-06, "loss": 0.4111, "step": 17802 }, { "epoch": 0.8169886650452022, "grad_norm": 0.4675920307636261, "learning_rate": 8.451564439492232e-06, "loss": 0.4116, "step": 17803 }, { "epoch": 0.8170345555504566, "grad_norm": 0.49004170298576355, "learning_rate": 8.451387039203019e-06, "loss": 0.4487, "step": 17804 }, { "epoch": 0.8170804460557111, "grad_norm": 0.44811636209487915, "learning_rate": 8.451209630614309e-06, "loss": 0.3915, "step": 17805 }, { "epoch": 0.8171263365609656, "grad_norm": 0.4539348781108856, "learning_rate": 8.451032213726528e-06, "loss": 0.3384, "step": 17806 }, { "epoch": 0.81717222706622, "grad_norm": 0.4370141327381134, "learning_rate": 8.450854788540106e-06, "loss": 0.4094, "step": 17807 }, { "epoch": 0.8172181175714744, "grad_norm": 0.42220792174339294, "learning_rate": 8.450677355055465e-06, "loss": 0.367, "step": 17808 }, { "epoch": 0.8172640080767289, "grad_norm": 0.45550698041915894, "learning_rate": 8.450499913273037e-06, "loss": 0.3917, "step": 17809 }, { "epoch": 0.8173098985819834, "grad_norm": 0.40571096539497375, "learning_rate": 8.450322463193243e-06, "loss": 0.3088, "step": 17810 }, { "epoch": 0.8173557890872378, "grad_norm": 0.44377827644348145, "learning_rate": 8.450145004816513e-06, "loss": 0.3963, "step": 17811 }, { "epoch": 0.8174016795924923, "grad_norm": 0.4445282816886902, "learning_rate": 8.449967538143274e-06, "loss": 0.3661, "step": 17812 }, { "epoch": 0.8174475700977468, "grad_norm": 0.43142879009246826, "learning_rate": 8.449790063173953e-06, "loss": 0.3811, "step": 17813 }, { "epoch": 0.8174934606030012, "grad_norm": 0.43431249260902405, "learning_rate": 8.449612579908972e-06, "loss": 0.3608, "step": 17814 }, { "epoch": 0.8175393511082557, "grad_norm": 0.4378563463687897, "learning_rate": 8.449435088348765e-06, "loss": 0.3352, "step": 17815 }, { "epoch": 0.8175852416135102, "grad_norm": 0.41473838686943054, "learning_rate": 8.449257588493753e-06, "loss": 0.3297, "step": 17816 }, { "epoch": 0.8176311321187646, "grad_norm": 0.5034177899360657, "learning_rate": 8.449080080344367e-06, "loss": 0.4264, "step": 17817 }, { "epoch": 0.8176770226240191, "grad_norm": 0.4560909569263458, "learning_rate": 8.448902563901031e-06, "loss": 0.3817, "step": 17818 }, { "epoch": 0.8177229131292736, "grad_norm": 0.4419739842414856, "learning_rate": 8.448725039164174e-06, "loss": 0.3791, "step": 17819 }, { "epoch": 0.817768803634528, "grad_norm": 0.4512062966823578, "learning_rate": 8.44854750613422e-06, "loss": 0.3826, "step": 17820 }, { "epoch": 0.8178146941397825, "grad_norm": 0.4667685627937317, "learning_rate": 8.448369964811598e-06, "loss": 0.4273, "step": 17821 }, { "epoch": 0.817860584645037, "grad_norm": 0.4926930367946625, "learning_rate": 8.448192415196734e-06, "loss": 0.4432, "step": 17822 }, { "epoch": 0.8179064751502914, "grad_norm": 0.47055089473724365, "learning_rate": 8.448014857290057e-06, "loss": 0.4775, "step": 17823 }, { "epoch": 0.8179523656555459, "grad_norm": 0.4343009293079376, "learning_rate": 8.447837291091992e-06, "loss": 0.3395, "step": 17824 }, { "epoch": 0.8179982561608004, "grad_norm": 0.4633493423461914, "learning_rate": 8.447659716602965e-06, "loss": 0.3775, "step": 17825 }, { "epoch": 0.8180441466660547, "grad_norm": 0.4561808705329895, "learning_rate": 8.447482133823407e-06, "loss": 0.4117, "step": 17826 }, { "epoch": 0.8180900371713092, "grad_norm": 0.44060763716697693, "learning_rate": 8.447304542753741e-06, "loss": 0.3724, "step": 17827 }, { "epoch": 0.8181359276765637, "grad_norm": 0.4121582806110382, "learning_rate": 8.447126943394397e-06, "loss": 0.3328, "step": 17828 }, { "epoch": 0.8181818181818182, "grad_norm": 0.4680311977863312, "learning_rate": 8.4469493357458e-06, "loss": 0.3787, "step": 17829 }, { "epoch": 0.8182277086870726, "grad_norm": 0.6310790777206421, "learning_rate": 8.446771719808376e-06, "loss": 0.3654, "step": 17830 }, { "epoch": 0.8182735991923271, "grad_norm": 0.45473721623420715, "learning_rate": 8.446594095582555e-06, "loss": 0.372, "step": 17831 }, { "epoch": 0.8183194896975816, "grad_norm": 0.44879406690597534, "learning_rate": 8.446416463068763e-06, "loss": 0.3656, "step": 17832 }, { "epoch": 0.818365380202836, "grad_norm": 0.45837101340293884, "learning_rate": 8.446238822267428e-06, "loss": 0.436, "step": 17833 }, { "epoch": 0.8184112707080905, "grad_norm": 0.4236229360103607, "learning_rate": 8.446061173178975e-06, "loss": 0.3522, "step": 17834 }, { "epoch": 0.818457161213345, "grad_norm": 0.47889837622642517, "learning_rate": 8.445883515803834e-06, "loss": 0.4767, "step": 17835 }, { "epoch": 0.8185030517185994, "grad_norm": 0.4336996376514435, "learning_rate": 8.445705850142429e-06, "loss": 0.3264, "step": 17836 }, { "epoch": 0.8185489422238539, "grad_norm": 0.42929285764694214, "learning_rate": 8.44552817619519e-06, "loss": 0.3331, "step": 17837 }, { "epoch": 0.8185948327291084, "grad_norm": 0.4646855592727661, "learning_rate": 8.445350493962542e-06, "loss": 0.4369, "step": 17838 }, { "epoch": 0.8186407232343628, "grad_norm": 0.4273969531059265, "learning_rate": 8.445172803444915e-06, "loss": 0.3411, "step": 17839 }, { "epoch": 0.8186866137396173, "grad_norm": 0.4667750298976898, "learning_rate": 8.444995104642734e-06, "loss": 0.4047, "step": 17840 }, { "epoch": 0.8187325042448718, "grad_norm": 0.4668880105018616, "learning_rate": 8.444817397556426e-06, "loss": 0.3899, "step": 17841 }, { "epoch": 0.8187783947501261, "grad_norm": 0.4907797873020172, "learning_rate": 8.444639682186421e-06, "loss": 0.4785, "step": 17842 }, { "epoch": 0.8188242852553806, "grad_norm": 0.45590436458587646, "learning_rate": 8.444461958533146e-06, "loss": 0.3761, "step": 17843 }, { "epoch": 0.8188701757606351, "grad_norm": 0.42426902055740356, "learning_rate": 8.444284226597023e-06, "loss": 0.3575, "step": 17844 }, { "epoch": 0.8189160662658896, "grad_norm": 0.4448358416557312, "learning_rate": 8.444106486378486e-06, "loss": 0.4036, "step": 17845 }, { "epoch": 0.818961956771144, "grad_norm": 0.45628583431243896, "learning_rate": 8.44392873787796e-06, "loss": 0.406, "step": 17846 }, { "epoch": 0.8190078472763985, "grad_norm": 0.43884557485580444, "learning_rate": 8.443750981095873e-06, "loss": 0.3797, "step": 17847 }, { "epoch": 0.819053737781653, "grad_norm": 0.4737260639667511, "learning_rate": 8.44357321603265e-06, "loss": 0.4298, "step": 17848 }, { "epoch": 0.8190996282869074, "grad_norm": 0.43591901659965515, "learning_rate": 8.443395442688721e-06, "loss": 0.3548, "step": 17849 }, { "epoch": 0.8191455187921619, "grad_norm": 0.46873801946640015, "learning_rate": 8.443217661064513e-06, "loss": 0.4097, "step": 17850 }, { "epoch": 0.8191914092974164, "grad_norm": 0.4389280378818512, "learning_rate": 8.443039871160452e-06, "loss": 0.3992, "step": 17851 }, { "epoch": 0.8192372998026708, "grad_norm": 0.4239269495010376, "learning_rate": 8.442862072976968e-06, "loss": 0.3839, "step": 17852 }, { "epoch": 0.8192831903079253, "grad_norm": 0.44302040338516235, "learning_rate": 8.442684266514488e-06, "loss": 0.3586, "step": 17853 }, { "epoch": 0.8193290808131798, "grad_norm": 0.4616401493549347, "learning_rate": 8.442506451773437e-06, "loss": 0.4071, "step": 17854 }, { "epoch": 0.8193749713184342, "grad_norm": 0.4629646837711334, "learning_rate": 8.442328628754244e-06, "loss": 0.4824, "step": 17855 }, { "epoch": 0.8194208618236887, "grad_norm": 0.5071123242378235, "learning_rate": 8.44215079745734e-06, "loss": 0.464, "step": 17856 }, { "epoch": 0.8194667523289432, "grad_norm": 0.4645283818244934, "learning_rate": 8.44197295788315e-06, "loss": 0.4206, "step": 17857 }, { "epoch": 0.8195126428341976, "grad_norm": 0.4429694712162018, "learning_rate": 8.441795110032098e-06, "loss": 0.4576, "step": 17858 }, { "epoch": 0.8195585333394521, "grad_norm": 0.5408008098602295, "learning_rate": 8.441617253904615e-06, "loss": 0.4782, "step": 17859 }, { "epoch": 0.8196044238447066, "grad_norm": 0.4746640622615814, "learning_rate": 8.441439389501132e-06, "loss": 0.4357, "step": 17860 }, { "epoch": 0.8196503143499609, "grad_norm": 0.47243228554725647, "learning_rate": 8.44126151682207e-06, "loss": 0.4166, "step": 17861 }, { "epoch": 0.8196962048552154, "grad_norm": 0.45268604159355164, "learning_rate": 8.441083635867862e-06, "loss": 0.3731, "step": 17862 }, { "epoch": 0.8197420953604699, "grad_norm": 0.5436232089996338, "learning_rate": 8.440905746638934e-06, "loss": 0.517, "step": 17863 }, { "epoch": 0.8197879858657244, "grad_norm": 0.5798476338386536, "learning_rate": 8.440727849135714e-06, "loss": 0.5543, "step": 17864 }, { "epoch": 0.8198338763709788, "grad_norm": 0.4424605965614319, "learning_rate": 8.440549943358629e-06, "loss": 0.4041, "step": 17865 }, { "epoch": 0.8198797668762333, "grad_norm": 0.42778280377388, "learning_rate": 8.440372029308105e-06, "loss": 0.3294, "step": 17866 }, { "epoch": 0.8199256573814878, "grad_norm": 0.4639279544353485, "learning_rate": 8.440194106984576e-06, "loss": 0.4592, "step": 17867 }, { "epoch": 0.8199715478867422, "grad_norm": 0.42307767271995544, "learning_rate": 8.440016176388463e-06, "loss": 0.3687, "step": 17868 }, { "epoch": 0.8200174383919967, "grad_norm": 0.4594457447528839, "learning_rate": 8.439838237520197e-06, "loss": 0.4395, "step": 17869 }, { "epoch": 0.8200633288972512, "grad_norm": 0.47358983755111694, "learning_rate": 8.439660290380206e-06, "loss": 0.4527, "step": 17870 }, { "epoch": 0.8201092194025056, "grad_norm": 0.43438515067100525, "learning_rate": 8.43948233496892e-06, "loss": 0.3603, "step": 17871 }, { "epoch": 0.8201551099077601, "grad_norm": 0.4331245422363281, "learning_rate": 8.439304371286764e-06, "loss": 0.357, "step": 17872 }, { "epoch": 0.8202010004130146, "grad_norm": 0.4388468563556671, "learning_rate": 8.439126399334164e-06, "loss": 0.366, "step": 17873 }, { "epoch": 0.820246890918269, "grad_norm": 0.45032402873039246, "learning_rate": 8.438948419111553e-06, "loss": 0.3719, "step": 17874 }, { "epoch": 0.8202927814235235, "grad_norm": 0.4262039065361023, "learning_rate": 8.438770430619354e-06, "loss": 0.3166, "step": 17875 }, { "epoch": 0.820338671928778, "grad_norm": 0.4503183662891388, "learning_rate": 8.438592433857998e-06, "loss": 0.3877, "step": 17876 }, { "epoch": 0.8203845624340323, "grad_norm": 0.4305194914340973, "learning_rate": 8.438414428827913e-06, "loss": 0.362, "step": 17877 }, { "epoch": 0.8204304529392868, "grad_norm": 0.494642436504364, "learning_rate": 8.438236415529526e-06, "loss": 0.4674, "step": 17878 }, { "epoch": 0.8204763434445413, "grad_norm": 0.4419185519218445, "learning_rate": 8.438058393963265e-06, "loss": 0.359, "step": 17879 }, { "epoch": 0.8205222339497957, "grad_norm": 0.4418424069881439, "learning_rate": 8.43788036412956e-06, "loss": 0.3533, "step": 17880 }, { "epoch": 0.8205681244550502, "grad_norm": 0.44324707984924316, "learning_rate": 8.437702326028837e-06, "loss": 0.3828, "step": 17881 }, { "epoch": 0.8206140149603047, "grad_norm": 0.4626299738883972, "learning_rate": 8.437524279661527e-06, "loss": 0.4356, "step": 17882 }, { "epoch": 0.8206599054655592, "grad_norm": 0.45641717314720154, "learning_rate": 8.437346225028054e-06, "loss": 0.404, "step": 17883 }, { "epoch": 0.8207057959708136, "grad_norm": 0.4591040313243866, "learning_rate": 8.437168162128847e-06, "loss": 0.385, "step": 17884 }, { "epoch": 0.8207516864760681, "grad_norm": 0.4994543790817261, "learning_rate": 8.436990090964337e-06, "loss": 0.5062, "step": 17885 }, { "epoch": 0.8207975769813226, "grad_norm": 0.4303171634674072, "learning_rate": 8.436812011534949e-06, "loss": 0.3609, "step": 17886 }, { "epoch": 0.820843467486577, "grad_norm": 0.7444688081741333, "learning_rate": 8.436633923841116e-06, "loss": 0.4836, "step": 17887 }, { "epoch": 0.8208893579918315, "grad_norm": 0.41902852058410645, "learning_rate": 8.43645582788326e-06, "loss": 0.3326, "step": 17888 }, { "epoch": 0.820935248497086, "grad_norm": 0.4969289302825928, "learning_rate": 8.436277723661814e-06, "loss": 0.4586, "step": 17889 }, { "epoch": 0.8209811390023404, "grad_norm": 0.44333550333976746, "learning_rate": 8.436099611177203e-06, "loss": 0.4034, "step": 17890 }, { "epoch": 0.8210270295075949, "grad_norm": 0.46270135045051575, "learning_rate": 8.43592149042986e-06, "loss": 0.3446, "step": 17891 }, { "epoch": 0.8210729200128494, "grad_norm": 0.48731061816215515, "learning_rate": 8.435743361420206e-06, "loss": 0.4634, "step": 17892 }, { "epoch": 0.8211188105181038, "grad_norm": 0.45579615235328674, "learning_rate": 8.435565224148676e-06, "loss": 0.4197, "step": 17893 }, { "epoch": 0.8211647010233583, "grad_norm": 0.45256131887435913, "learning_rate": 8.435387078615696e-06, "loss": 0.4199, "step": 17894 }, { "epoch": 0.8212105915286128, "grad_norm": 0.44706952571868896, "learning_rate": 8.435208924821694e-06, "loss": 0.332, "step": 17895 }, { "epoch": 0.8212564820338671, "grad_norm": 0.4572947025299072, "learning_rate": 8.4350307627671e-06, "loss": 0.3887, "step": 17896 }, { "epoch": 0.8213023725391216, "grad_norm": 0.41217607259750366, "learning_rate": 8.434852592452338e-06, "loss": 0.3355, "step": 17897 }, { "epoch": 0.8213482630443761, "grad_norm": 0.4582231938838959, "learning_rate": 8.434674413877841e-06, "loss": 0.4259, "step": 17898 }, { "epoch": 0.8213941535496306, "grad_norm": 0.4440719485282898, "learning_rate": 8.434496227044038e-06, "loss": 0.4137, "step": 17899 }, { "epoch": 0.821440044054885, "grad_norm": 0.4457356035709381, "learning_rate": 8.434318031951354e-06, "loss": 0.4449, "step": 17900 }, { "epoch": 0.8214859345601395, "grad_norm": 0.508654773235321, "learning_rate": 8.434139828600219e-06, "loss": 0.4741, "step": 17901 }, { "epoch": 0.821531825065394, "grad_norm": 0.46433717012405396, "learning_rate": 8.433961616991061e-06, "loss": 0.4628, "step": 17902 }, { "epoch": 0.8215777155706484, "grad_norm": 0.50074303150177, "learning_rate": 8.43378339712431e-06, "loss": 0.3391, "step": 17903 }, { "epoch": 0.8216236060759029, "grad_norm": 0.4183187782764435, "learning_rate": 8.433605169000393e-06, "loss": 0.2966, "step": 17904 }, { "epoch": 0.8216694965811574, "grad_norm": 0.4308905601501465, "learning_rate": 8.43342693261974e-06, "loss": 0.3542, "step": 17905 }, { "epoch": 0.8217153870864118, "grad_norm": 0.5007399320602417, "learning_rate": 8.433248687982778e-06, "loss": 0.4733, "step": 17906 }, { "epoch": 0.8217612775916663, "grad_norm": 0.47217103838920593, "learning_rate": 8.433070435089937e-06, "loss": 0.4128, "step": 17907 }, { "epoch": 0.8218071680969208, "grad_norm": 0.4739191234111786, "learning_rate": 8.432892173941646e-06, "loss": 0.4132, "step": 17908 }, { "epoch": 0.8218530586021752, "grad_norm": 0.47100046277046204, "learning_rate": 8.432713904538331e-06, "loss": 0.4096, "step": 17909 }, { "epoch": 0.8218989491074297, "grad_norm": 0.45202040672302246, "learning_rate": 8.432535626880424e-06, "loss": 0.3803, "step": 17910 }, { "epoch": 0.8219448396126842, "grad_norm": 0.4423539638519287, "learning_rate": 8.43235734096835e-06, "loss": 0.3711, "step": 17911 }, { "epoch": 0.8219907301179386, "grad_norm": 0.4939267933368683, "learning_rate": 8.432179046802542e-06, "loss": 0.4421, "step": 17912 }, { "epoch": 0.822036620623193, "grad_norm": 0.418667197227478, "learning_rate": 8.432000744383426e-06, "loss": 0.3186, "step": 17913 }, { "epoch": 0.8220825111284475, "grad_norm": 0.4393908381462097, "learning_rate": 8.431822433711431e-06, "loss": 0.4131, "step": 17914 }, { "epoch": 0.8221284016337019, "grad_norm": 0.5270518660545349, "learning_rate": 8.431644114786987e-06, "loss": 0.5361, "step": 17915 }, { "epoch": 0.8221742921389564, "grad_norm": 0.4561271667480469, "learning_rate": 8.431465787610523e-06, "loss": 0.4399, "step": 17916 }, { "epoch": 0.8222201826442109, "grad_norm": 0.4458649754524231, "learning_rate": 8.431287452182463e-06, "loss": 0.4392, "step": 17917 }, { "epoch": 0.8222660731494654, "grad_norm": 0.4482864737510681, "learning_rate": 8.431109108503243e-06, "loss": 0.3997, "step": 17918 }, { "epoch": 0.8223119636547198, "grad_norm": 0.4181709587574005, "learning_rate": 8.430930756573287e-06, "loss": 0.3454, "step": 17919 }, { "epoch": 0.8223578541599743, "grad_norm": 0.4522830545902252, "learning_rate": 8.430752396393024e-06, "loss": 0.3992, "step": 17920 }, { "epoch": 0.8224037446652288, "grad_norm": 0.42259693145751953, "learning_rate": 8.430574027962888e-06, "loss": 0.3171, "step": 17921 }, { "epoch": 0.8224496351704832, "grad_norm": 0.4334827959537506, "learning_rate": 8.430395651283302e-06, "loss": 0.3478, "step": 17922 }, { "epoch": 0.8224955256757377, "grad_norm": 0.4645261764526367, "learning_rate": 8.430217266354697e-06, "loss": 0.3887, "step": 17923 }, { "epoch": 0.8225414161809922, "grad_norm": 0.4732913672924042, "learning_rate": 8.430038873177501e-06, "loss": 0.3864, "step": 17924 }, { "epoch": 0.8225873066862466, "grad_norm": 0.4776921570301056, "learning_rate": 8.429860471752146e-06, "loss": 0.3955, "step": 17925 }, { "epoch": 0.8226331971915011, "grad_norm": 0.5224975943565369, "learning_rate": 8.429682062079059e-06, "loss": 0.4604, "step": 17926 }, { "epoch": 0.8226790876967556, "grad_norm": 0.4713437855243683, "learning_rate": 8.429503644158668e-06, "loss": 0.4134, "step": 17927 }, { "epoch": 0.82272497820201, "grad_norm": 0.43453097343444824, "learning_rate": 8.429325217991404e-06, "loss": 0.3699, "step": 17928 }, { "epoch": 0.8227708687072645, "grad_norm": 0.4282715916633606, "learning_rate": 8.429146783577695e-06, "loss": 0.3633, "step": 17929 }, { "epoch": 0.822816759212519, "grad_norm": 0.4438944160938263, "learning_rate": 8.428968340917972e-06, "loss": 0.3589, "step": 17930 }, { "epoch": 0.8228626497177733, "grad_norm": 0.46914318203926086, "learning_rate": 8.428789890012659e-06, "loss": 0.4943, "step": 17931 }, { "epoch": 0.8229085402230278, "grad_norm": 0.48776641488075256, "learning_rate": 8.42861143086219e-06, "loss": 0.4223, "step": 17932 }, { "epoch": 0.8229544307282823, "grad_norm": 0.42592111229896545, "learning_rate": 8.428432963466992e-06, "loss": 0.3623, "step": 17933 }, { "epoch": 0.8230003212335368, "grad_norm": 0.4786435663700104, "learning_rate": 8.428254487827495e-06, "loss": 0.438, "step": 17934 }, { "epoch": 0.8230462117387912, "grad_norm": 0.4186241030693054, "learning_rate": 8.428076003944128e-06, "loss": 0.3482, "step": 17935 }, { "epoch": 0.8230921022440457, "grad_norm": 0.46027281880378723, "learning_rate": 8.427897511817322e-06, "loss": 0.4654, "step": 17936 }, { "epoch": 0.8231379927493002, "grad_norm": 0.4466524124145508, "learning_rate": 8.427719011447502e-06, "loss": 0.3654, "step": 17937 }, { "epoch": 0.8231838832545546, "grad_norm": 0.4481244683265686, "learning_rate": 8.4275405028351e-06, "loss": 0.4511, "step": 17938 }, { "epoch": 0.8232297737598091, "grad_norm": 0.4062945544719696, "learning_rate": 8.427361985980545e-06, "loss": 0.2924, "step": 17939 }, { "epoch": 0.8232756642650636, "grad_norm": 0.4455844461917877, "learning_rate": 8.427183460884267e-06, "loss": 0.4099, "step": 17940 }, { "epoch": 0.823321554770318, "grad_norm": 0.4624093770980835, "learning_rate": 8.427004927546693e-06, "loss": 0.3913, "step": 17941 }, { "epoch": 0.8233674452755725, "grad_norm": 0.4717477858066559, "learning_rate": 8.426826385968253e-06, "loss": 0.4078, "step": 17942 }, { "epoch": 0.823413335780827, "grad_norm": 0.43445292115211487, "learning_rate": 8.42664783614938e-06, "loss": 0.4065, "step": 17943 }, { "epoch": 0.8234592262860814, "grad_norm": 0.4906216561794281, "learning_rate": 8.426469278090498e-06, "loss": 0.4367, "step": 17944 }, { "epoch": 0.8235051167913359, "grad_norm": 0.4314855635166168, "learning_rate": 8.426290711792042e-06, "loss": 0.3329, "step": 17945 }, { "epoch": 0.8235510072965904, "grad_norm": 0.47723203897476196, "learning_rate": 8.426112137254435e-06, "loss": 0.4804, "step": 17946 }, { "epoch": 0.8235968978018448, "grad_norm": 0.43511807918548584, "learning_rate": 8.425933554478112e-06, "loss": 0.3578, "step": 17947 }, { "epoch": 0.8236427883070992, "grad_norm": 0.4902743399143219, "learning_rate": 8.425754963463497e-06, "loss": 0.388, "step": 17948 }, { "epoch": 0.8236886788123537, "grad_norm": 0.4480859041213989, "learning_rate": 8.425576364211025e-06, "loss": 0.3628, "step": 17949 }, { "epoch": 0.8237345693176081, "grad_norm": 0.45992904901504517, "learning_rate": 8.425397756721121e-06, "loss": 0.3921, "step": 17950 }, { "epoch": 0.8237804598228626, "grad_norm": 0.4630306661128998, "learning_rate": 8.425219140994218e-06, "loss": 0.3884, "step": 17951 }, { "epoch": 0.8238263503281171, "grad_norm": 0.4694446921348572, "learning_rate": 8.425040517030745e-06, "loss": 0.4498, "step": 17952 }, { "epoch": 0.8238722408333716, "grad_norm": 0.463346391916275, "learning_rate": 8.424861884831128e-06, "loss": 0.3904, "step": 17953 }, { "epoch": 0.823918131338626, "grad_norm": 0.46538862586021423, "learning_rate": 8.424683244395801e-06, "loss": 0.4092, "step": 17954 }, { "epoch": 0.8239640218438805, "grad_norm": 0.47268155217170715, "learning_rate": 8.42450459572519e-06, "loss": 0.4475, "step": 17955 }, { "epoch": 0.824009912349135, "grad_norm": 0.43658512830734253, "learning_rate": 8.424325938819728e-06, "loss": 0.4048, "step": 17956 }, { "epoch": 0.8240558028543894, "grad_norm": 0.4581782817840576, "learning_rate": 8.424147273679842e-06, "loss": 0.43, "step": 17957 }, { "epoch": 0.8241016933596439, "grad_norm": 0.47186845541000366, "learning_rate": 8.423968600305963e-06, "loss": 0.4317, "step": 17958 }, { "epoch": 0.8241475838648984, "grad_norm": 0.4590403139591217, "learning_rate": 8.42378991869852e-06, "loss": 0.4038, "step": 17959 }, { "epoch": 0.8241934743701528, "grad_norm": 0.48367607593536377, "learning_rate": 8.423611228857942e-06, "loss": 0.4672, "step": 17960 }, { "epoch": 0.8242393648754073, "grad_norm": 0.4666590392589569, "learning_rate": 8.423432530784659e-06, "loss": 0.4204, "step": 17961 }, { "epoch": 0.8242852553806618, "grad_norm": 0.41143351793289185, "learning_rate": 8.423253824479104e-06, "loss": 0.3132, "step": 17962 }, { "epoch": 0.8243311458859162, "grad_norm": 0.41674065589904785, "learning_rate": 8.4230751099417e-06, "loss": 0.3145, "step": 17963 }, { "epoch": 0.8243770363911707, "grad_norm": 0.44985729455947876, "learning_rate": 8.422896387172884e-06, "loss": 0.4251, "step": 17964 }, { "epoch": 0.8244229268964252, "grad_norm": 0.43448880314826965, "learning_rate": 8.422717656173083e-06, "loss": 0.3628, "step": 17965 }, { "epoch": 0.8244688174016795, "grad_norm": 0.4244597554206848, "learning_rate": 8.422538916942723e-06, "loss": 0.3223, "step": 17966 }, { "epoch": 0.824514707906934, "grad_norm": 0.42590245604515076, "learning_rate": 8.42236016948224e-06, "loss": 0.3684, "step": 17967 }, { "epoch": 0.8245605984121885, "grad_norm": 0.47466611862182617, "learning_rate": 8.42218141379206e-06, "loss": 0.4639, "step": 17968 }, { "epoch": 0.8246064889174429, "grad_norm": 0.5025848746299744, "learning_rate": 8.422002649872612e-06, "loss": 0.4968, "step": 17969 }, { "epoch": 0.8246523794226974, "grad_norm": 0.4497338831424713, "learning_rate": 8.42182387772433e-06, "loss": 0.391, "step": 17970 }, { "epoch": 0.8246982699279519, "grad_norm": 0.4293939769268036, "learning_rate": 8.42164509734764e-06, "loss": 0.366, "step": 17971 }, { "epoch": 0.8247441604332064, "grad_norm": 0.4464573264122009, "learning_rate": 8.421466308742973e-06, "loss": 0.3431, "step": 17972 }, { "epoch": 0.8247900509384608, "grad_norm": 0.47099459171295166, "learning_rate": 8.421287511910763e-06, "loss": 0.3905, "step": 17973 }, { "epoch": 0.8248359414437153, "grad_norm": 0.4275107681751251, "learning_rate": 8.421108706851432e-06, "loss": 0.3297, "step": 17974 }, { "epoch": 0.8248818319489698, "grad_norm": 0.43864890933036804, "learning_rate": 8.420929893565417e-06, "loss": 0.3892, "step": 17975 }, { "epoch": 0.8249277224542242, "grad_norm": 0.4424125850200653, "learning_rate": 8.420751072053146e-06, "loss": 0.3921, "step": 17976 }, { "epoch": 0.8249736129594787, "grad_norm": 0.5345363020896912, "learning_rate": 8.420572242315046e-06, "loss": 0.4595, "step": 17977 }, { "epoch": 0.8250195034647332, "grad_norm": 0.43983420729637146, "learning_rate": 8.42039340435155e-06, "loss": 0.3847, "step": 17978 }, { "epoch": 0.8250653939699876, "grad_norm": 0.4110962748527527, "learning_rate": 8.420214558163087e-06, "loss": 0.304, "step": 17979 }, { "epoch": 0.8251112844752421, "grad_norm": 0.4997156858444214, "learning_rate": 8.420035703750087e-06, "loss": 0.5184, "step": 17980 }, { "epoch": 0.8251571749804966, "grad_norm": 0.44162389636039734, "learning_rate": 8.419856841112982e-06, "loss": 0.3694, "step": 17981 }, { "epoch": 0.825203065485751, "grad_norm": 0.44219037890434265, "learning_rate": 8.419677970252201e-06, "loss": 0.4004, "step": 17982 }, { "epoch": 0.8252489559910055, "grad_norm": 0.4799107313156128, "learning_rate": 8.41949909116817e-06, "loss": 0.5016, "step": 17983 }, { "epoch": 0.82529484649626, "grad_norm": 0.434451162815094, "learning_rate": 8.419320203861327e-06, "loss": 0.3824, "step": 17984 }, { "epoch": 0.8253407370015143, "grad_norm": 0.4709138870239258, "learning_rate": 8.419141308332098e-06, "loss": 0.4162, "step": 17985 }, { "epoch": 0.8253866275067688, "grad_norm": 0.4469585418701172, "learning_rate": 8.418962404580911e-06, "loss": 0.4102, "step": 17986 }, { "epoch": 0.8254325180120233, "grad_norm": 0.43084341287612915, "learning_rate": 8.4187834926082e-06, "loss": 0.3168, "step": 17987 }, { "epoch": 0.8254784085172778, "grad_norm": 0.44466376304626465, "learning_rate": 8.418604572414393e-06, "loss": 0.4108, "step": 17988 }, { "epoch": 0.8255242990225322, "grad_norm": 0.4413444697856903, "learning_rate": 8.418425643999921e-06, "loss": 0.3613, "step": 17989 }, { "epoch": 0.8255701895277867, "grad_norm": 0.44197750091552734, "learning_rate": 8.418246707365215e-06, "loss": 0.3733, "step": 17990 }, { "epoch": 0.8256160800330412, "grad_norm": 0.4442468583583832, "learning_rate": 8.418067762510702e-06, "loss": 0.3969, "step": 17991 }, { "epoch": 0.8256619705382956, "grad_norm": 0.5202842950820923, "learning_rate": 8.417888809436817e-06, "loss": 0.4697, "step": 17992 }, { "epoch": 0.8257078610435501, "grad_norm": 0.4693809449672699, "learning_rate": 8.417709848143989e-06, "loss": 0.4368, "step": 17993 }, { "epoch": 0.8257537515488046, "grad_norm": 0.4337964355945587, "learning_rate": 8.417530878632645e-06, "loss": 0.3444, "step": 17994 }, { "epoch": 0.825799642054059, "grad_norm": 0.4276637136936188, "learning_rate": 8.41735190090322e-06, "loss": 0.3561, "step": 17995 }, { "epoch": 0.8258455325593135, "grad_norm": 0.46315792202949524, "learning_rate": 8.417172914956141e-06, "loss": 0.4518, "step": 17996 }, { "epoch": 0.825891423064568, "grad_norm": 0.4326402544975281, "learning_rate": 8.416993920791841e-06, "loss": 0.366, "step": 17997 }, { "epoch": 0.8259373135698224, "grad_norm": 0.4896598756313324, "learning_rate": 8.416814918410747e-06, "loss": 0.4922, "step": 17998 }, { "epoch": 0.8259832040750769, "grad_norm": 0.409915566444397, "learning_rate": 8.416635907813291e-06, "loss": 0.3404, "step": 17999 }, { "epoch": 0.8260290945803314, "grad_norm": 0.466506689786911, "learning_rate": 8.416456888999907e-06, "loss": 0.4754, "step": 18000 }, { "epoch": 0.8260749850855857, "grad_norm": 0.5669476985931396, "learning_rate": 8.41627786197102e-06, "loss": 0.4346, "step": 18001 }, { "epoch": 0.8261208755908402, "grad_norm": 0.48300784826278687, "learning_rate": 8.416098826727064e-06, "loss": 0.4153, "step": 18002 }, { "epoch": 0.8261667660960947, "grad_norm": 0.44610118865966797, "learning_rate": 8.415919783268468e-06, "loss": 0.3821, "step": 18003 }, { "epoch": 0.8262126566013491, "grad_norm": 0.43876445293426514, "learning_rate": 8.415740731595664e-06, "loss": 0.3887, "step": 18004 }, { "epoch": 0.8262585471066036, "grad_norm": 0.4780218303203583, "learning_rate": 8.41556167170908e-06, "loss": 0.4776, "step": 18005 }, { "epoch": 0.8263044376118581, "grad_norm": 0.49615898728370667, "learning_rate": 8.415382603609151e-06, "loss": 0.5029, "step": 18006 }, { "epoch": 0.8263503281171126, "grad_norm": 0.42977532744407654, "learning_rate": 8.415203527296302e-06, "loss": 0.3576, "step": 18007 }, { "epoch": 0.826396218622367, "grad_norm": 0.4524504244327545, "learning_rate": 8.415024442770966e-06, "loss": 0.4226, "step": 18008 }, { "epoch": 0.8264421091276215, "grad_norm": 0.4816759526729584, "learning_rate": 8.414845350033576e-06, "loss": 0.4386, "step": 18009 }, { "epoch": 0.826487999632876, "grad_norm": 0.4708247184753418, "learning_rate": 8.41466624908456e-06, "loss": 0.3641, "step": 18010 }, { "epoch": 0.8265338901381304, "grad_norm": 0.48955750465393066, "learning_rate": 8.41448713992435e-06, "loss": 0.426, "step": 18011 }, { "epoch": 0.8265797806433849, "grad_norm": 0.4376161992549896, "learning_rate": 8.414308022553374e-06, "loss": 0.3622, "step": 18012 }, { "epoch": 0.8266256711486394, "grad_norm": 0.46941858530044556, "learning_rate": 8.414128896972066e-06, "loss": 0.4396, "step": 18013 }, { "epoch": 0.8266715616538938, "grad_norm": 0.4503255784511566, "learning_rate": 8.413949763180858e-06, "loss": 0.435, "step": 18014 }, { "epoch": 0.8267174521591483, "grad_norm": 0.4422112703323364, "learning_rate": 8.413770621180176e-06, "loss": 0.3759, "step": 18015 }, { "epoch": 0.8267633426644028, "grad_norm": 0.47206711769104004, "learning_rate": 8.413591470970453e-06, "loss": 0.4762, "step": 18016 }, { "epoch": 0.8268092331696572, "grad_norm": 0.7578540444374084, "learning_rate": 8.413412312552118e-06, "loss": 0.4061, "step": 18017 }, { "epoch": 0.8268551236749117, "grad_norm": 0.48906344175338745, "learning_rate": 8.413233145925606e-06, "loss": 0.4378, "step": 18018 }, { "epoch": 0.8269010141801661, "grad_norm": 0.4059424102306366, "learning_rate": 8.413053971091347e-06, "loss": 0.3237, "step": 18019 }, { "epoch": 0.8269469046854205, "grad_norm": 0.4637203812599182, "learning_rate": 8.412874788049768e-06, "loss": 0.3884, "step": 18020 }, { "epoch": 0.826992795190675, "grad_norm": 0.41301819682121277, "learning_rate": 8.412695596801303e-06, "loss": 0.3487, "step": 18021 }, { "epoch": 0.8270386856959295, "grad_norm": 0.47388318181037903, "learning_rate": 8.412516397346383e-06, "loss": 0.4085, "step": 18022 }, { "epoch": 0.827084576201184, "grad_norm": 0.5158187747001648, "learning_rate": 8.412337189685436e-06, "loss": 0.5103, "step": 18023 }, { "epoch": 0.8271304667064384, "grad_norm": 0.5430427193641663, "learning_rate": 8.412157973818895e-06, "loss": 0.5405, "step": 18024 }, { "epoch": 0.8271763572116929, "grad_norm": 0.4692544639110565, "learning_rate": 8.411978749747193e-06, "loss": 0.3055, "step": 18025 }, { "epoch": 0.8272222477169474, "grad_norm": 0.40423405170440674, "learning_rate": 8.411799517470758e-06, "loss": 0.3459, "step": 18026 }, { "epoch": 0.8272681382222018, "grad_norm": 0.4462067484855652, "learning_rate": 8.411620276990022e-06, "loss": 0.4153, "step": 18027 }, { "epoch": 0.8273140287274563, "grad_norm": 0.4292373061180115, "learning_rate": 8.411441028305416e-06, "loss": 0.3682, "step": 18028 }, { "epoch": 0.8273599192327108, "grad_norm": 0.4671582281589508, "learning_rate": 8.411261771417371e-06, "loss": 0.4075, "step": 18029 }, { "epoch": 0.8274058097379652, "grad_norm": 0.4636654257774353, "learning_rate": 8.411082506326318e-06, "loss": 0.3751, "step": 18030 }, { "epoch": 0.8274517002432197, "grad_norm": 0.4226151704788208, "learning_rate": 8.410903233032688e-06, "loss": 0.3436, "step": 18031 }, { "epoch": 0.8274975907484742, "grad_norm": 0.44645023345947266, "learning_rate": 8.410723951536913e-06, "loss": 0.4009, "step": 18032 }, { "epoch": 0.8275434812537286, "grad_norm": 0.4608747065067291, "learning_rate": 8.410544661839421e-06, "loss": 0.3945, "step": 18033 }, { "epoch": 0.8275893717589831, "grad_norm": 0.48663607239723206, "learning_rate": 8.410365363940648e-06, "loss": 0.3995, "step": 18034 }, { "epoch": 0.8276352622642376, "grad_norm": 0.4439545273780823, "learning_rate": 8.410186057841022e-06, "loss": 0.3795, "step": 18035 }, { "epoch": 0.827681152769492, "grad_norm": 0.4712582230567932, "learning_rate": 8.410006743540975e-06, "loss": 0.445, "step": 18036 }, { "epoch": 0.8277270432747464, "grad_norm": 0.4870122969150543, "learning_rate": 8.409827421040938e-06, "loss": 0.4372, "step": 18037 }, { "epoch": 0.8277729337800009, "grad_norm": 0.436261922121048, "learning_rate": 8.409648090341342e-06, "loss": 0.3909, "step": 18038 }, { "epoch": 0.8278188242852553, "grad_norm": 0.45746394991874695, "learning_rate": 8.409468751442618e-06, "loss": 0.419, "step": 18039 }, { "epoch": 0.8278647147905098, "grad_norm": 0.4783158302307129, "learning_rate": 8.409289404345198e-06, "loss": 0.5015, "step": 18040 }, { "epoch": 0.8279106052957643, "grad_norm": 0.4209771752357483, "learning_rate": 8.409110049049514e-06, "loss": 0.3269, "step": 18041 }, { "epoch": 0.8279564958010188, "grad_norm": 0.44271835684776306, "learning_rate": 8.408930685555994e-06, "loss": 0.4013, "step": 18042 }, { "epoch": 0.8280023863062732, "grad_norm": 0.5674777626991272, "learning_rate": 8.408751313865073e-06, "loss": 0.341, "step": 18043 }, { "epoch": 0.8280482768115277, "grad_norm": 0.4137333631515503, "learning_rate": 8.408571933977181e-06, "loss": 0.3353, "step": 18044 }, { "epoch": 0.8280941673167822, "grad_norm": 0.4755222797393799, "learning_rate": 8.40839254589275e-06, "loss": 0.4725, "step": 18045 }, { "epoch": 0.8281400578220366, "grad_norm": 0.4928211271762848, "learning_rate": 8.40821314961221e-06, "loss": 0.497, "step": 18046 }, { "epoch": 0.8281859483272911, "grad_norm": 0.43681150674819946, "learning_rate": 8.408033745135993e-06, "loss": 0.395, "step": 18047 }, { "epoch": 0.8282318388325456, "grad_norm": 0.48329344391822815, "learning_rate": 8.407854332464528e-06, "loss": 0.4503, "step": 18048 }, { "epoch": 0.8282777293378, "grad_norm": 0.48742738366127014, "learning_rate": 8.407674911598254e-06, "loss": 0.5051, "step": 18049 }, { "epoch": 0.8283236198430545, "grad_norm": 0.47547316551208496, "learning_rate": 8.407495482537594e-06, "loss": 0.4974, "step": 18050 }, { "epoch": 0.828369510348309, "grad_norm": 0.4149698317050934, "learning_rate": 8.407316045282983e-06, "loss": 0.358, "step": 18051 }, { "epoch": 0.8284154008535634, "grad_norm": 0.42962345480918884, "learning_rate": 8.407136599834853e-06, "loss": 0.4039, "step": 18052 }, { "epoch": 0.8284612913588179, "grad_norm": 0.4373871684074402, "learning_rate": 8.406957146193635e-06, "loss": 0.3708, "step": 18053 }, { "epoch": 0.8285071818640724, "grad_norm": 0.4803074896335602, "learning_rate": 8.406777684359759e-06, "loss": 0.3612, "step": 18054 }, { "epoch": 0.8285530723693267, "grad_norm": 0.4561117887496948, "learning_rate": 8.406598214333659e-06, "loss": 0.4283, "step": 18055 }, { "epoch": 0.8285989628745812, "grad_norm": 0.47030138969421387, "learning_rate": 8.406418736115764e-06, "loss": 0.4109, "step": 18056 }, { "epoch": 0.8286448533798357, "grad_norm": 0.4206443727016449, "learning_rate": 8.406239249706508e-06, "loss": 0.3379, "step": 18057 }, { "epoch": 0.8286907438850901, "grad_norm": 0.47873127460479736, "learning_rate": 8.406059755106323e-06, "loss": 0.474, "step": 18058 }, { "epoch": 0.8287366343903446, "grad_norm": 0.5150394439697266, "learning_rate": 8.405880252315637e-06, "loss": 0.4727, "step": 18059 }, { "epoch": 0.8287825248955991, "grad_norm": 0.49425581097602844, "learning_rate": 8.405700741334885e-06, "loss": 0.4338, "step": 18060 }, { "epoch": 0.8288284154008536, "grad_norm": 0.409773051738739, "learning_rate": 8.4055212221645e-06, "loss": 0.3596, "step": 18061 }, { "epoch": 0.828874305906108, "grad_norm": 0.39452025294303894, "learning_rate": 8.405341694804907e-06, "loss": 0.2546, "step": 18062 }, { "epoch": 0.8289201964113625, "grad_norm": 0.44225457310676575, "learning_rate": 8.405162159256544e-06, "loss": 0.3571, "step": 18063 }, { "epoch": 0.828966086916617, "grad_norm": 0.45075926184654236, "learning_rate": 8.40498261551984e-06, "loss": 0.3445, "step": 18064 }, { "epoch": 0.8290119774218714, "grad_norm": 0.4690568447113037, "learning_rate": 8.404803063595228e-06, "loss": 0.4195, "step": 18065 }, { "epoch": 0.8290578679271259, "grad_norm": 0.44292670488357544, "learning_rate": 8.40462350348314e-06, "loss": 0.3491, "step": 18066 }, { "epoch": 0.8291037584323804, "grad_norm": 0.4242332875728607, "learning_rate": 8.404443935184006e-06, "loss": 0.3804, "step": 18067 }, { "epoch": 0.8291496489376348, "grad_norm": 0.435048907995224, "learning_rate": 8.404264358698258e-06, "loss": 0.3937, "step": 18068 }, { "epoch": 0.8291955394428893, "grad_norm": 0.5266005396842957, "learning_rate": 8.404084774026331e-06, "loss": 0.5278, "step": 18069 }, { "epoch": 0.8292414299481438, "grad_norm": 0.5077800750732422, "learning_rate": 8.403905181168653e-06, "loss": 0.5428, "step": 18070 }, { "epoch": 0.8292873204533981, "grad_norm": 0.4602810740470886, "learning_rate": 8.403725580125656e-06, "loss": 0.3739, "step": 18071 }, { "epoch": 0.8293332109586526, "grad_norm": 0.4061303436756134, "learning_rate": 8.403545970897775e-06, "loss": 0.2948, "step": 18072 }, { "epoch": 0.8293791014639071, "grad_norm": 0.45500680804252625, "learning_rate": 8.40336635348544e-06, "loss": 0.3849, "step": 18073 }, { "epoch": 0.8294249919691615, "grad_norm": 0.4685817062854767, "learning_rate": 8.403186727889084e-06, "loss": 0.4493, "step": 18074 }, { "epoch": 0.829470882474416, "grad_norm": 0.44068941473960876, "learning_rate": 8.403007094109136e-06, "loss": 0.348, "step": 18075 }, { "epoch": 0.8295167729796705, "grad_norm": 0.47200459241867065, "learning_rate": 8.40282745214603e-06, "loss": 0.4508, "step": 18076 }, { "epoch": 0.829562663484925, "grad_norm": 0.44364723563194275, "learning_rate": 8.402647802000198e-06, "loss": 0.3701, "step": 18077 }, { "epoch": 0.8296085539901794, "grad_norm": 0.4850562512874603, "learning_rate": 8.402468143672074e-06, "loss": 0.448, "step": 18078 }, { "epoch": 0.8296544444954339, "grad_norm": 0.4606315493583679, "learning_rate": 8.402288477162085e-06, "loss": 0.3653, "step": 18079 }, { "epoch": 0.8297003350006884, "grad_norm": 0.4254797697067261, "learning_rate": 8.40210880247067e-06, "loss": 0.3601, "step": 18080 }, { "epoch": 0.8297462255059428, "grad_norm": 0.43236732482910156, "learning_rate": 8.401929119598253e-06, "loss": 0.3325, "step": 18081 }, { "epoch": 0.8297921160111973, "grad_norm": 0.48773565888404846, "learning_rate": 8.401749428545273e-06, "loss": 0.486, "step": 18082 }, { "epoch": 0.8298380065164518, "grad_norm": 0.4719679057598114, "learning_rate": 8.401569729312159e-06, "loss": 0.464, "step": 18083 }, { "epoch": 0.8298838970217062, "grad_norm": 0.4486527144908905, "learning_rate": 8.401390021899342e-06, "loss": 0.4266, "step": 18084 }, { "epoch": 0.8299297875269607, "grad_norm": 0.49645915627479553, "learning_rate": 8.401210306307255e-06, "loss": 0.4334, "step": 18085 }, { "epoch": 0.8299756780322152, "grad_norm": 0.47308865189552307, "learning_rate": 8.401030582536332e-06, "loss": 0.4524, "step": 18086 }, { "epoch": 0.8300215685374696, "grad_norm": 0.43211984634399414, "learning_rate": 8.400850850587004e-06, "loss": 0.355, "step": 18087 }, { "epoch": 0.830067459042724, "grad_norm": 0.42160764336586, "learning_rate": 8.400671110459701e-06, "loss": 0.3562, "step": 18088 }, { "epoch": 0.8301133495479786, "grad_norm": 0.4502413868904114, "learning_rate": 8.40049136215486e-06, "loss": 0.3833, "step": 18089 }, { "epoch": 0.8301592400532329, "grad_norm": 0.7507365942001343, "learning_rate": 8.40031160567291e-06, "loss": 0.3645, "step": 18090 }, { "epoch": 0.8302051305584874, "grad_norm": 0.455634742975235, "learning_rate": 8.400131841014283e-06, "loss": 0.4178, "step": 18091 }, { "epoch": 0.8302510210637419, "grad_norm": 0.46778732538223267, "learning_rate": 8.399952068179411e-06, "loss": 0.4109, "step": 18092 }, { "epoch": 0.8302969115689963, "grad_norm": 0.4121411144733429, "learning_rate": 8.399772287168729e-06, "loss": 0.3349, "step": 18093 }, { "epoch": 0.8303428020742508, "grad_norm": 0.4600745439529419, "learning_rate": 8.399592497982667e-06, "loss": 0.3824, "step": 18094 }, { "epoch": 0.8303886925795053, "grad_norm": 0.45569780468940735, "learning_rate": 8.399412700621658e-06, "loss": 0.3557, "step": 18095 }, { "epoch": 0.8304345830847598, "grad_norm": 0.45500025153160095, "learning_rate": 8.399232895086134e-06, "loss": 0.4198, "step": 18096 }, { "epoch": 0.8304804735900142, "grad_norm": 0.438245952129364, "learning_rate": 8.399053081376529e-06, "loss": 0.3797, "step": 18097 }, { "epoch": 0.8305263640952687, "grad_norm": 0.40246495604515076, "learning_rate": 8.398873259493273e-06, "loss": 0.318, "step": 18098 }, { "epoch": 0.8305722546005232, "grad_norm": 0.44568273425102234, "learning_rate": 8.3986934294368e-06, "loss": 0.3898, "step": 18099 }, { "epoch": 0.8306181451057776, "grad_norm": 0.4664110839366913, "learning_rate": 8.398513591207542e-06, "loss": 0.4373, "step": 18100 }, { "epoch": 0.8306640356110321, "grad_norm": 0.4412544369697571, "learning_rate": 8.39833374480593e-06, "loss": 0.3627, "step": 18101 }, { "epoch": 0.8307099261162866, "grad_norm": 0.43946385383605957, "learning_rate": 8.398153890232398e-06, "loss": 0.4009, "step": 18102 }, { "epoch": 0.830755816621541, "grad_norm": 0.4306648373603821, "learning_rate": 8.397974027487382e-06, "loss": 0.3395, "step": 18103 }, { "epoch": 0.8308017071267955, "grad_norm": 0.45021679997444153, "learning_rate": 8.397794156571307e-06, "loss": 0.4637, "step": 18104 }, { "epoch": 0.83084759763205, "grad_norm": 0.46398481726646423, "learning_rate": 8.397614277484611e-06, "loss": 0.375, "step": 18105 }, { "epoch": 0.8308934881373043, "grad_norm": 0.4511909782886505, "learning_rate": 8.397434390227726e-06, "loss": 0.3994, "step": 18106 }, { "epoch": 0.8309393786425588, "grad_norm": 0.45857441425323486, "learning_rate": 8.397254494801082e-06, "loss": 0.3875, "step": 18107 }, { "epoch": 0.8309852691478133, "grad_norm": 0.44027769565582275, "learning_rate": 8.397074591205113e-06, "loss": 0.3892, "step": 18108 }, { "epoch": 0.8310311596530677, "grad_norm": 0.4730708599090576, "learning_rate": 8.396894679440253e-06, "loss": 0.439, "step": 18109 }, { "epoch": 0.8310770501583222, "grad_norm": 0.4575815498828888, "learning_rate": 8.396714759506931e-06, "loss": 0.3903, "step": 18110 }, { "epoch": 0.8311229406635767, "grad_norm": 0.45963162183761597, "learning_rate": 8.396534831405584e-06, "loss": 0.3984, "step": 18111 }, { "epoch": 0.8311688311688312, "grad_norm": 0.45422109961509705, "learning_rate": 8.396354895136642e-06, "loss": 0.4081, "step": 18112 }, { "epoch": 0.8312147216740856, "grad_norm": 0.4470139443874359, "learning_rate": 8.396174950700539e-06, "loss": 0.3702, "step": 18113 }, { "epoch": 0.8312606121793401, "grad_norm": 0.49207833409309387, "learning_rate": 8.395994998097707e-06, "loss": 0.48, "step": 18114 }, { "epoch": 0.8313065026845946, "grad_norm": 0.4725109040737152, "learning_rate": 8.395815037328579e-06, "loss": 0.4251, "step": 18115 }, { "epoch": 0.831352393189849, "grad_norm": 0.4591810703277588, "learning_rate": 8.395635068393586e-06, "loss": 0.3748, "step": 18116 }, { "epoch": 0.8313982836951035, "grad_norm": 0.5284903645515442, "learning_rate": 8.395455091293165e-06, "loss": 0.4397, "step": 18117 }, { "epoch": 0.831444174200358, "grad_norm": 0.4265201985836029, "learning_rate": 8.395275106027745e-06, "loss": 0.3229, "step": 18118 }, { "epoch": 0.8314900647056124, "grad_norm": 0.4584498405456543, "learning_rate": 8.395095112597758e-06, "loss": 0.4244, "step": 18119 }, { "epoch": 0.8315359552108669, "grad_norm": 0.46957993507385254, "learning_rate": 8.394915111003642e-06, "loss": 0.3933, "step": 18120 }, { "epoch": 0.8315818457161214, "grad_norm": 0.4478274881839752, "learning_rate": 8.394735101245823e-06, "loss": 0.3288, "step": 18121 }, { "epoch": 0.8316277362213758, "grad_norm": 0.4760933220386505, "learning_rate": 8.39455508332474e-06, "loss": 0.4662, "step": 18122 }, { "epoch": 0.8316736267266303, "grad_norm": 0.4454164206981659, "learning_rate": 8.394375057240825e-06, "loss": 0.3571, "step": 18123 }, { "epoch": 0.8317195172318848, "grad_norm": 0.46467339992523193, "learning_rate": 8.394195022994506e-06, "loss": 0.363, "step": 18124 }, { "epoch": 0.8317654077371391, "grad_norm": 0.49284371733665466, "learning_rate": 8.39401498058622e-06, "loss": 0.5076, "step": 18125 }, { "epoch": 0.8318112982423936, "grad_norm": 0.4246115982532501, "learning_rate": 8.3938349300164e-06, "loss": 0.3383, "step": 18126 }, { "epoch": 0.8318571887476481, "grad_norm": 0.5000753402709961, "learning_rate": 8.393654871285477e-06, "loss": 0.4716, "step": 18127 }, { "epoch": 0.8319030792529025, "grad_norm": 0.48261353373527527, "learning_rate": 8.393474804393887e-06, "loss": 0.3555, "step": 18128 }, { "epoch": 0.831948969758157, "grad_norm": 0.4638386368751526, "learning_rate": 8.393294729342058e-06, "loss": 0.3854, "step": 18129 }, { "epoch": 0.8319948602634115, "grad_norm": 0.49651774764060974, "learning_rate": 8.39311464613043e-06, "loss": 0.4523, "step": 18130 }, { "epoch": 0.832040750768666, "grad_norm": 0.42594966292381287, "learning_rate": 8.39293455475943e-06, "loss": 0.3154, "step": 18131 }, { "epoch": 0.8320866412739204, "grad_norm": 0.42838552594184875, "learning_rate": 8.392754455229493e-06, "loss": 0.3466, "step": 18132 }, { "epoch": 0.8321325317791749, "grad_norm": 0.47378867864608765, "learning_rate": 8.392574347541053e-06, "loss": 0.3814, "step": 18133 }, { "epoch": 0.8321784222844294, "grad_norm": 0.41699907183647156, "learning_rate": 8.392394231694543e-06, "loss": 0.3009, "step": 18134 }, { "epoch": 0.8322243127896838, "grad_norm": 0.45433488488197327, "learning_rate": 8.392214107690392e-06, "loss": 0.3768, "step": 18135 }, { "epoch": 0.8322702032949383, "grad_norm": 0.4418494701385498, "learning_rate": 8.392033975529042e-06, "loss": 0.3363, "step": 18136 }, { "epoch": 0.8323160938001928, "grad_norm": 0.42773592472076416, "learning_rate": 8.391853835210917e-06, "loss": 0.3528, "step": 18137 }, { "epoch": 0.8323619843054472, "grad_norm": 0.4696636199951172, "learning_rate": 8.391673686736455e-06, "loss": 0.457, "step": 18138 }, { "epoch": 0.8324078748107017, "grad_norm": 0.467499315738678, "learning_rate": 8.39149353010609e-06, "loss": 0.4812, "step": 18139 }, { "epoch": 0.8324537653159562, "grad_norm": 0.434304416179657, "learning_rate": 8.391313365320251e-06, "loss": 0.3549, "step": 18140 }, { "epoch": 0.8324996558212105, "grad_norm": 0.46710005402565, "learning_rate": 8.391133192379375e-06, "loss": 0.386, "step": 18141 }, { "epoch": 0.832545546326465, "grad_norm": 0.4291851222515106, "learning_rate": 8.390953011283893e-06, "loss": 0.3763, "step": 18142 }, { "epoch": 0.8325914368317195, "grad_norm": 0.45171165466308594, "learning_rate": 8.390772822034241e-06, "loss": 0.4121, "step": 18143 }, { "epoch": 0.8326373273369739, "grad_norm": 0.4355604946613312, "learning_rate": 8.390592624630849e-06, "loss": 0.3786, "step": 18144 }, { "epoch": 0.8326832178422284, "grad_norm": 0.4775046408176422, "learning_rate": 8.390412419074151e-06, "loss": 0.4161, "step": 18145 }, { "epoch": 0.8327291083474829, "grad_norm": 0.47253572940826416, "learning_rate": 8.390232205364584e-06, "loss": 0.4262, "step": 18146 }, { "epoch": 0.8327749988527373, "grad_norm": 0.43773478269577026, "learning_rate": 8.390051983502576e-06, "loss": 0.3803, "step": 18147 }, { "epoch": 0.8328208893579918, "grad_norm": 0.44730404019355774, "learning_rate": 8.389871753488565e-06, "loss": 0.4128, "step": 18148 }, { "epoch": 0.8328667798632463, "grad_norm": 0.46576371788978577, "learning_rate": 8.389691515322979e-06, "loss": 0.4009, "step": 18149 }, { "epoch": 0.8329126703685008, "grad_norm": 0.4602697193622589, "learning_rate": 8.389511269006258e-06, "loss": 0.4042, "step": 18150 }, { "epoch": 0.8329585608737552, "grad_norm": 0.4739201068878174, "learning_rate": 8.38933101453883e-06, "loss": 0.456, "step": 18151 }, { "epoch": 0.8330044513790097, "grad_norm": 0.44872918725013733, "learning_rate": 8.389150751921133e-06, "loss": 0.3798, "step": 18152 }, { "epoch": 0.8330503418842642, "grad_norm": 0.44699302315711975, "learning_rate": 8.388970481153595e-06, "loss": 0.3213, "step": 18153 }, { "epoch": 0.8330962323895186, "grad_norm": 0.48090022802352905, "learning_rate": 8.388790202236656e-06, "loss": 0.4092, "step": 18154 }, { "epoch": 0.8331421228947731, "grad_norm": 0.45971307158470154, "learning_rate": 8.388609915170745e-06, "loss": 0.3906, "step": 18155 }, { "epoch": 0.8331880134000276, "grad_norm": 0.655051589012146, "learning_rate": 8.388429619956296e-06, "loss": 0.3838, "step": 18156 }, { "epoch": 0.833233903905282, "grad_norm": 0.4284975230693817, "learning_rate": 8.388249316593742e-06, "loss": 0.3236, "step": 18157 }, { "epoch": 0.8332797944105365, "grad_norm": 0.45652008056640625, "learning_rate": 8.38806900508352e-06, "loss": 0.3793, "step": 18158 }, { "epoch": 0.833325684915791, "grad_norm": 0.46670088171958923, "learning_rate": 8.38788868542606e-06, "loss": 0.4473, "step": 18159 }, { "epoch": 0.8333715754210453, "grad_norm": 0.4363967776298523, "learning_rate": 8.387708357621796e-06, "loss": 0.3746, "step": 18160 }, { "epoch": 0.8334174659262998, "grad_norm": 0.48592546582221985, "learning_rate": 8.387528021671166e-06, "loss": 0.4717, "step": 18161 }, { "epoch": 0.8334633564315543, "grad_norm": 0.4747212529182434, "learning_rate": 8.387347677574598e-06, "loss": 0.4166, "step": 18162 }, { "epoch": 0.8335092469368087, "grad_norm": 0.463144987821579, "learning_rate": 8.387167325332527e-06, "loss": 0.4142, "step": 18163 }, { "epoch": 0.8335551374420632, "grad_norm": 0.4370797574520111, "learning_rate": 8.386986964945388e-06, "loss": 0.3815, "step": 18164 }, { "epoch": 0.8336010279473177, "grad_norm": 0.4630209803581238, "learning_rate": 8.386806596413615e-06, "loss": 0.4011, "step": 18165 }, { "epoch": 0.8336469184525722, "grad_norm": 0.41693389415740967, "learning_rate": 8.386626219737641e-06, "loss": 0.3209, "step": 18166 }, { "epoch": 0.8336928089578266, "grad_norm": 0.4283023178577423, "learning_rate": 8.3864458349179e-06, "loss": 0.3379, "step": 18167 }, { "epoch": 0.8337386994630811, "grad_norm": 0.47956883907318115, "learning_rate": 8.386265441954825e-06, "loss": 0.4359, "step": 18168 }, { "epoch": 0.8337845899683356, "grad_norm": 0.46075186133384705, "learning_rate": 8.38608504084885e-06, "loss": 0.4225, "step": 18169 }, { "epoch": 0.83383048047359, "grad_norm": 0.4661857783794403, "learning_rate": 8.38590463160041e-06, "loss": 0.4501, "step": 18170 }, { "epoch": 0.8338763709788445, "grad_norm": 0.44938957691192627, "learning_rate": 8.385724214209939e-06, "loss": 0.3756, "step": 18171 }, { "epoch": 0.833922261484099, "grad_norm": 0.45433881878852844, "learning_rate": 8.385543788677868e-06, "loss": 0.4328, "step": 18172 }, { "epoch": 0.8339681519893534, "grad_norm": 0.4779817461967468, "learning_rate": 8.385363355004634e-06, "loss": 0.4703, "step": 18173 }, { "epoch": 0.8340140424946079, "grad_norm": 0.47456952929496765, "learning_rate": 8.385182913190669e-06, "loss": 0.4505, "step": 18174 }, { "epoch": 0.8340599329998624, "grad_norm": 0.47255849838256836, "learning_rate": 8.385002463236407e-06, "loss": 0.4404, "step": 18175 }, { "epoch": 0.8341058235051168, "grad_norm": 0.4196443259716034, "learning_rate": 8.384822005142283e-06, "loss": 0.3262, "step": 18176 }, { "epoch": 0.8341517140103712, "grad_norm": 0.49156394600868225, "learning_rate": 8.38464153890873e-06, "loss": 0.4085, "step": 18177 }, { "epoch": 0.8341976045156257, "grad_norm": 0.4468965530395508, "learning_rate": 8.384461064536183e-06, "loss": 0.3537, "step": 18178 }, { "epoch": 0.8342434950208801, "grad_norm": 0.4583326578140259, "learning_rate": 8.384280582025075e-06, "loss": 0.4362, "step": 18179 }, { "epoch": 0.8342893855261346, "grad_norm": 0.4402487874031067, "learning_rate": 8.384100091375839e-06, "loss": 0.3745, "step": 18180 }, { "epoch": 0.8343352760313891, "grad_norm": 0.4688296616077423, "learning_rate": 8.383919592588912e-06, "loss": 0.4041, "step": 18181 }, { "epoch": 0.8343811665366435, "grad_norm": 0.4449317455291748, "learning_rate": 8.383739085664725e-06, "loss": 0.3708, "step": 18182 }, { "epoch": 0.834427057041898, "grad_norm": 0.4405859708786011, "learning_rate": 8.383558570603715e-06, "loss": 0.3814, "step": 18183 }, { "epoch": 0.8344729475471525, "grad_norm": 0.45990121364593506, "learning_rate": 8.383378047406313e-06, "loss": 0.4024, "step": 18184 }, { "epoch": 0.834518838052407, "grad_norm": 0.4787445068359375, "learning_rate": 8.383197516072956e-06, "loss": 0.4482, "step": 18185 }, { "epoch": 0.8345647285576614, "grad_norm": 0.4425095319747925, "learning_rate": 8.383016976604077e-06, "loss": 0.3798, "step": 18186 }, { "epoch": 0.8346106190629159, "grad_norm": 0.46628013253211975, "learning_rate": 8.382836429000108e-06, "loss": 0.4836, "step": 18187 }, { "epoch": 0.8346565095681704, "grad_norm": 0.4309116005897522, "learning_rate": 8.382655873261487e-06, "loss": 0.3399, "step": 18188 }, { "epoch": 0.8347024000734248, "grad_norm": 0.47421401739120483, "learning_rate": 8.382475309388645e-06, "loss": 0.4396, "step": 18189 }, { "epoch": 0.8347482905786793, "grad_norm": 0.4512055516242981, "learning_rate": 8.382294737382018e-06, "loss": 0.3535, "step": 18190 }, { "epoch": 0.8347941810839338, "grad_norm": 0.4429827928543091, "learning_rate": 8.382114157242038e-06, "loss": 0.4072, "step": 18191 }, { "epoch": 0.8348400715891882, "grad_norm": 0.5159306526184082, "learning_rate": 8.381933568969143e-06, "loss": 0.525, "step": 18192 }, { "epoch": 0.8348859620944427, "grad_norm": 0.45336470007896423, "learning_rate": 8.381752972563764e-06, "loss": 0.3956, "step": 18193 }, { "epoch": 0.8349318525996972, "grad_norm": 0.47070544958114624, "learning_rate": 8.381572368026337e-06, "loss": 0.4198, "step": 18194 }, { "epoch": 0.8349777431049515, "grad_norm": 0.47601374983787537, "learning_rate": 8.381391755357295e-06, "loss": 0.4548, "step": 18195 }, { "epoch": 0.835023633610206, "grad_norm": 0.4822837710380554, "learning_rate": 8.381211134557074e-06, "loss": 0.4666, "step": 18196 }, { "epoch": 0.8350695241154605, "grad_norm": 0.448219358921051, "learning_rate": 8.381030505626106e-06, "loss": 0.3418, "step": 18197 }, { "epoch": 0.8351154146207149, "grad_norm": 0.44391027092933655, "learning_rate": 8.380849868564828e-06, "loss": 0.3623, "step": 18198 }, { "epoch": 0.8351613051259694, "grad_norm": 0.48952609300613403, "learning_rate": 8.380669223373674e-06, "loss": 0.4899, "step": 18199 }, { "epoch": 0.8352071956312239, "grad_norm": 0.43809714913368225, "learning_rate": 8.380488570053076e-06, "loss": 0.3717, "step": 18200 }, { "epoch": 0.8352530861364784, "grad_norm": 0.44115597009658813, "learning_rate": 8.380307908603468e-06, "loss": 0.3665, "step": 18201 }, { "epoch": 0.8352989766417328, "grad_norm": 0.441062867641449, "learning_rate": 8.38012723902529e-06, "loss": 0.3884, "step": 18202 }, { "epoch": 0.8353448671469873, "grad_norm": 0.4904654920101166, "learning_rate": 8.37994656131897e-06, "loss": 0.451, "step": 18203 }, { "epoch": 0.8353907576522418, "grad_norm": 0.45128825306892395, "learning_rate": 8.379765875484946e-06, "loss": 0.4153, "step": 18204 }, { "epoch": 0.8354366481574962, "grad_norm": 0.4851352870464325, "learning_rate": 8.379585181523652e-06, "loss": 0.4445, "step": 18205 }, { "epoch": 0.8354825386627507, "grad_norm": 0.41595596075057983, "learning_rate": 8.379404479435523e-06, "loss": 0.3566, "step": 18206 }, { "epoch": 0.8355284291680052, "grad_norm": 0.45313572883605957, "learning_rate": 8.379223769220993e-06, "loss": 0.3559, "step": 18207 }, { "epoch": 0.8355743196732596, "grad_norm": 0.45733004808425903, "learning_rate": 8.379043050880495e-06, "loss": 0.4468, "step": 18208 }, { "epoch": 0.8356202101785141, "grad_norm": 0.4851774275302887, "learning_rate": 8.378862324414466e-06, "loss": 0.4502, "step": 18209 }, { "epoch": 0.8356661006837686, "grad_norm": 0.44199225306510925, "learning_rate": 8.37868158982334e-06, "loss": 0.3817, "step": 18210 }, { "epoch": 0.835711991189023, "grad_norm": 0.46941936016082764, "learning_rate": 8.378500847107548e-06, "loss": 0.4253, "step": 18211 }, { "epoch": 0.8357578816942774, "grad_norm": 0.43103986978530884, "learning_rate": 8.378320096267531e-06, "loss": 0.3524, "step": 18212 }, { "epoch": 0.835803772199532, "grad_norm": 0.45080307126045227, "learning_rate": 8.37813933730372e-06, "loss": 0.4403, "step": 18213 }, { "epoch": 0.8358496627047863, "grad_norm": 0.43782469630241394, "learning_rate": 8.37795857021655e-06, "loss": 0.3331, "step": 18214 }, { "epoch": 0.8358955532100408, "grad_norm": 0.4822864830493927, "learning_rate": 8.377777795006454e-06, "loss": 0.5019, "step": 18215 }, { "epoch": 0.8359414437152953, "grad_norm": 0.4819743037223816, "learning_rate": 8.37759701167387e-06, "loss": 0.4553, "step": 18216 }, { "epoch": 0.8359873342205497, "grad_norm": 0.4571407735347748, "learning_rate": 8.37741622021923e-06, "loss": 0.3739, "step": 18217 }, { "epoch": 0.8360332247258042, "grad_norm": 0.4282774329185486, "learning_rate": 8.377235420642971e-06, "loss": 0.3644, "step": 18218 }, { "epoch": 0.8360791152310587, "grad_norm": 0.4403384029865265, "learning_rate": 8.377054612945528e-06, "loss": 0.3684, "step": 18219 }, { "epoch": 0.8361250057363132, "grad_norm": 0.42528074979782104, "learning_rate": 8.376873797127332e-06, "loss": 0.3603, "step": 18220 }, { "epoch": 0.8361708962415676, "grad_norm": 0.4658953845500946, "learning_rate": 8.37669297318882e-06, "loss": 0.3937, "step": 18221 }, { "epoch": 0.8362167867468221, "grad_norm": 0.41958317160606384, "learning_rate": 8.376512141130429e-06, "loss": 0.307, "step": 18222 }, { "epoch": 0.8362626772520766, "grad_norm": 0.5113511085510254, "learning_rate": 8.376331300952591e-06, "loss": 0.4992, "step": 18223 }, { "epoch": 0.836308567757331, "grad_norm": 0.45085346698760986, "learning_rate": 8.376150452655743e-06, "loss": 0.3644, "step": 18224 }, { "epoch": 0.8363544582625855, "grad_norm": 0.4446466267108917, "learning_rate": 8.375969596240318e-06, "loss": 0.3469, "step": 18225 }, { "epoch": 0.83640034876784, "grad_norm": 0.4998534321784973, "learning_rate": 8.375788731706751e-06, "loss": 0.4714, "step": 18226 }, { "epoch": 0.8364462392730944, "grad_norm": 0.42451003193855286, "learning_rate": 8.375607859055478e-06, "loss": 0.3173, "step": 18227 }, { "epoch": 0.8364921297783489, "grad_norm": 0.44549790024757385, "learning_rate": 8.375426978286934e-06, "loss": 0.348, "step": 18228 }, { "epoch": 0.8365380202836034, "grad_norm": 0.4476400911808014, "learning_rate": 8.375246089401553e-06, "loss": 0.3919, "step": 18229 }, { "epoch": 0.8365839107888577, "grad_norm": 0.43966755270957947, "learning_rate": 8.37506519239977e-06, "loss": 0.3566, "step": 18230 }, { "epoch": 0.8366298012941122, "grad_norm": 0.48417019844055176, "learning_rate": 8.37488428728202e-06, "loss": 0.4397, "step": 18231 }, { "epoch": 0.8366756917993667, "grad_norm": 0.4825865924358368, "learning_rate": 8.374703374048738e-06, "loss": 0.452, "step": 18232 }, { "epoch": 0.8367215823046211, "grad_norm": 0.4816177189350128, "learning_rate": 8.374522452700361e-06, "loss": 0.4485, "step": 18233 }, { "epoch": 0.8367674728098756, "grad_norm": 0.43582573533058167, "learning_rate": 8.374341523237323e-06, "loss": 0.3637, "step": 18234 }, { "epoch": 0.8368133633151301, "grad_norm": 0.4633936285972595, "learning_rate": 8.374160585660058e-06, "loss": 0.4161, "step": 18235 }, { "epoch": 0.8368592538203845, "grad_norm": 0.4648289978504181, "learning_rate": 8.373979639969002e-06, "loss": 0.3864, "step": 18236 }, { "epoch": 0.836905144325639, "grad_norm": 0.45461776852607727, "learning_rate": 8.373798686164588e-06, "loss": 0.3718, "step": 18237 }, { "epoch": 0.8369510348308935, "grad_norm": 0.43692269921302795, "learning_rate": 8.373617724247253e-06, "loss": 0.3473, "step": 18238 }, { "epoch": 0.836996925336148, "grad_norm": 0.45504483580589294, "learning_rate": 8.373436754217434e-06, "loss": 0.4197, "step": 18239 }, { "epoch": 0.8370428158414024, "grad_norm": 0.47626927495002747, "learning_rate": 8.373255776075564e-06, "loss": 0.4046, "step": 18240 }, { "epoch": 0.8370887063466569, "grad_norm": 0.4373861849308014, "learning_rate": 8.373074789822079e-06, "loss": 0.389, "step": 18241 }, { "epoch": 0.8371345968519114, "grad_norm": 0.46969908475875854, "learning_rate": 8.372893795457413e-06, "loss": 0.404, "step": 18242 }, { "epoch": 0.8371804873571658, "grad_norm": 0.4690396189689636, "learning_rate": 8.372712792982001e-06, "loss": 0.3867, "step": 18243 }, { "epoch": 0.8372263778624203, "grad_norm": 0.4490091800689697, "learning_rate": 8.372531782396281e-06, "loss": 0.3749, "step": 18244 }, { "epoch": 0.8372722683676748, "grad_norm": 0.5162858366966248, "learning_rate": 8.372350763700685e-06, "loss": 0.4186, "step": 18245 }, { "epoch": 0.8373181588729292, "grad_norm": 0.531032919883728, "learning_rate": 8.37216973689565e-06, "loss": 0.5088, "step": 18246 }, { "epoch": 0.8373640493781837, "grad_norm": 0.38909170031547546, "learning_rate": 8.37198870198161e-06, "loss": 0.2762, "step": 18247 }, { "epoch": 0.8374099398834381, "grad_norm": 0.45522668957710266, "learning_rate": 8.371807658959004e-06, "loss": 0.4091, "step": 18248 }, { "epoch": 0.8374558303886925, "grad_norm": 0.45884183049201965, "learning_rate": 8.371626607828262e-06, "loss": 0.4561, "step": 18249 }, { "epoch": 0.837501720893947, "grad_norm": 0.4830606281757355, "learning_rate": 8.371445548589823e-06, "loss": 0.4357, "step": 18250 }, { "epoch": 0.8375476113992015, "grad_norm": 0.5501541495323181, "learning_rate": 8.371264481244124e-06, "loss": 0.458, "step": 18251 }, { "epoch": 0.8375935019044559, "grad_norm": 0.46663227677345276, "learning_rate": 8.371083405791595e-06, "loss": 0.4706, "step": 18252 }, { "epoch": 0.8376393924097104, "grad_norm": 0.4751712381839752, "learning_rate": 8.370902322232675e-06, "loss": 0.407, "step": 18253 }, { "epoch": 0.8376852829149649, "grad_norm": 0.4225381016731262, "learning_rate": 8.3707212305678e-06, "loss": 0.3707, "step": 18254 }, { "epoch": 0.8377311734202194, "grad_norm": 0.45665115118026733, "learning_rate": 8.370540130797401e-06, "loss": 0.4122, "step": 18255 }, { "epoch": 0.8377770639254738, "grad_norm": 0.44249656796455383, "learning_rate": 8.370359022921921e-06, "loss": 0.4172, "step": 18256 }, { "epoch": 0.8378229544307283, "grad_norm": 0.4695450961589813, "learning_rate": 8.37017790694179e-06, "loss": 0.3845, "step": 18257 }, { "epoch": 0.8378688449359828, "grad_norm": 0.44053804874420166, "learning_rate": 8.369996782857444e-06, "loss": 0.3528, "step": 18258 }, { "epoch": 0.8379147354412372, "grad_norm": 0.5228598713874817, "learning_rate": 8.36981565066932e-06, "loss": 0.4423, "step": 18259 }, { "epoch": 0.8379606259464917, "grad_norm": 0.47499021887779236, "learning_rate": 8.36963451037785e-06, "loss": 0.4314, "step": 18260 }, { "epoch": 0.8380065164517462, "grad_norm": 0.43316978216171265, "learning_rate": 8.369453361983477e-06, "loss": 0.3891, "step": 18261 }, { "epoch": 0.8380524069570006, "grad_norm": 0.40092164278030396, "learning_rate": 8.36927220548663e-06, "loss": 0.3224, "step": 18262 }, { "epoch": 0.8380982974622551, "grad_norm": 0.4555911719799042, "learning_rate": 8.369091040887745e-06, "loss": 0.3705, "step": 18263 }, { "epoch": 0.8381441879675096, "grad_norm": 0.4745902419090271, "learning_rate": 8.368909868187263e-06, "loss": 0.391, "step": 18264 }, { "epoch": 0.8381900784727639, "grad_norm": 0.48085346817970276, "learning_rate": 8.368728687385614e-06, "loss": 0.4961, "step": 18265 }, { "epoch": 0.8382359689780184, "grad_norm": 0.41196852922439575, "learning_rate": 8.368547498483235e-06, "loss": 0.2914, "step": 18266 }, { "epoch": 0.8382818594832729, "grad_norm": 0.48756515979766846, "learning_rate": 8.368366301480564e-06, "loss": 0.5019, "step": 18267 }, { "epoch": 0.8383277499885273, "grad_norm": 0.4667830765247345, "learning_rate": 8.368185096378034e-06, "loss": 0.4408, "step": 18268 }, { "epoch": 0.8383736404937818, "grad_norm": 0.41695255041122437, "learning_rate": 8.368003883176082e-06, "loss": 0.3197, "step": 18269 }, { "epoch": 0.8384195309990363, "grad_norm": 0.4801861345767975, "learning_rate": 8.367822661875145e-06, "loss": 0.3842, "step": 18270 }, { "epoch": 0.8384654215042907, "grad_norm": 0.4447426497936249, "learning_rate": 8.367641432475654e-06, "loss": 0.3564, "step": 18271 }, { "epoch": 0.8385113120095452, "grad_norm": 0.476460725069046, "learning_rate": 8.367460194978051e-06, "loss": 0.4585, "step": 18272 }, { "epoch": 0.8385572025147997, "grad_norm": 0.43056103587150574, "learning_rate": 8.36727894938277e-06, "loss": 0.4062, "step": 18273 }, { "epoch": 0.8386030930200542, "grad_norm": 0.4621809422969818, "learning_rate": 8.367097695690243e-06, "loss": 0.3807, "step": 18274 }, { "epoch": 0.8386489835253086, "grad_norm": 0.47685378789901733, "learning_rate": 8.366916433900911e-06, "loss": 0.3642, "step": 18275 }, { "epoch": 0.8386948740305631, "grad_norm": 0.4333129823207855, "learning_rate": 8.366735164015208e-06, "loss": 0.3853, "step": 18276 }, { "epoch": 0.8387407645358176, "grad_norm": 0.45602378249168396, "learning_rate": 8.366553886033567e-06, "loss": 0.3948, "step": 18277 }, { "epoch": 0.838786655041072, "grad_norm": 0.4518270194530487, "learning_rate": 8.366372599956427e-06, "loss": 0.4088, "step": 18278 }, { "epoch": 0.8388325455463265, "grad_norm": 0.44950994849205017, "learning_rate": 8.366191305784223e-06, "loss": 0.3594, "step": 18279 }, { "epoch": 0.838878436051581, "grad_norm": 0.48702433705329895, "learning_rate": 8.366010003517392e-06, "loss": 0.4785, "step": 18280 }, { "epoch": 0.8389243265568354, "grad_norm": 0.5133915543556213, "learning_rate": 8.365828693156372e-06, "loss": 0.4987, "step": 18281 }, { "epoch": 0.8389702170620899, "grad_norm": 0.4809609353542328, "learning_rate": 8.365647374701592e-06, "loss": 0.3996, "step": 18282 }, { "epoch": 0.8390161075673443, "grad_norm": 0.45083382725715637, "learning_rate": 8.365466048153496e-06, "loss": 0.3895, "step": 18283 }, { "epoch": 0.8390619980725987, "grad_norm": 0.4805561602115631, "learning_rate": 8.365284713512513e-06, "loss": 0.4041, "step": 18284 }, { "epoch": 0.8391078885778532, "grad_norm": 0.4367426037788391, "learning_rate": 8.365103370779085e-06, "loss": 0.3589, "step": 18285 }, { "epoch": 0.8391537790831077, "grad_norm": 0.4850913882255554, "learning_rate": 8.364922019953644e-06, "loss": 0.4533, "step": 18286 }, { "epoch": 0.8391996695883621, "grad_norm": 0.4381568431854248, "learning_rate": 8.364740661036627e-06, "loss": 0.3806, "step": 18287 }, { "epoch": 0.8392455600936166, "grad_norm": 0.47120267152786255, "learning_rate": 8.364559294028472e-06, "loss": 0.472, "step": 18288 }, { "epoch": 0.8392914505988711, "grad_norm": 0.42072293162345886, "learning_rate": 8.364377918929613e-06, "loss": 0.3556, "step": 18289 }, { "epoch": 0.8393373411041256, "grad_norm": 0.44752365350723267, "learning_rate": 8.364196535740488e-06, "loss": 0.3455, "step": 18290 }, { "epoch": 0.83938323160938, "grad_norm": 0.4691773056983948, "learning_rate": 8.364015144461531e-06, "loss": 0.428, "step": 18291 }, { "epoch": 0.8394291221146345, "grad_norm": 0.4422893226146698, "learning_rate": 8.36383374509318e-06, "loss": 0.3528, "step": 18292 }, { "epoch": 0.839475012619889, "grad_norm": 0.4254034757614136, "learning_rate": 8.36365233763587e-06, "loss": 0.3421, "step": 18293 }, { "epoch": 0.8395209031251434, "grad_norm": 0.4813319444656372, "learning_rate": 8.363470922090038e-06, "loss": 0.4386, "step": 18294 }, { "epoch": 0.8395667936303979, "grad_norm": 0.460416316986084, "learning_rate": 8.36328949845612e-06, "loss": 0.3825, "step": 18295 }, { "epoch": 0.8396126841356524, "grad_norm": 0.41950246691703796, "learning_rate": 8.363108066734553e-06, "loss": 0.3682, "step": 18296 }, { "epoch": 0.8396585746409068, "grad_norm": 0.46988046169281006, "learning_rate": 8.362926626925771e-06, "loss": 0.4225, "step": 18297 }, { "epoch": 0.8397044651461613, "grad_norm": 0.4681702256202698, "learning_rate": 8.362745179030212e-06, "loss": 0.4359, "step": 18298 }, { "epoch": 0.8397503556514158, "grad_norm": 0.4623136520385742, "learning_rate": 8.362563723048314e-06, "loss": 0.4619, "step": 18299 }, { "epoch": 0.8397962461566701, "grad_norm": 0.4576776921749115, "learning_rate": 8.362382258980509e-06, "loss": 0.4467, "step": 18300 }, { "epoch": 0.8398421366619246, "grad_norm": 0.4294199049472809, "learning_rate": 8.362200786827237e-06, "loss": 0.3744, "step": 18301 }, { "epoch": 0.8398880271671791, "grad_norm": 0.45716628432273865, "learning_rate": 8.362019306588933e-06, "loss": 0.4641, "step": 18302 }, { "epoch": 0.8399339176724335, "grad_norm": 0.4630660116672516, "learning_rate": 8.361837818266034e-06, "loss": 0.4396, "step": 18303 }, { "epoch": 0.839979808177688, "grad_norm": 0.42552071809768677, "learning_rate": 8.361656321858975e-06, "loss": 0.3565, "step": 18304 }, { "epoch": 0.8400256986829425, "grad_norm": 0.421857625246048, "learning_rate": 8.361474817368197e-06, "loss": 0.3349, "step": 18305 }, { "epoch": 0.8400715891881969, "grad_norm": 0.47285741567611694, "learning_rate": 8.36129330479413e-06, "loss": 0.4435, "step": 18306 }, { "epoch": 0.8401174796934514, "grad_norm": 0.44761720299720764, "learning_rate": 8.361111784137212e-06, "loss": 0.3719, "step": 18307 }, { "epoch": 0.8401633701987059, "grad_norm": 0.42355242371559143, "learning_rate": 8.360930255397883e-06, "loss": 0.3288, "step": 18308 }, { "epoch": 0.8402092607039604, "grad_norm": 0.4656539857387543, "learning_rate": 8.36074871857658e-06, "loss": 0.3869, "step": 18309 }, { "epoch": 0.8402551512092148, "grad_norm": 0.4362870454788208, "learning_rate": 8.360567173673732e-06, "loss": 0.3263, "step": 18310 }, { "epoch": 0.8403010417144693, "grad_norm": 0.4727589190006256, "learning_rate": 8.360385620689783e-06, "loss": 0.5223, "step": 18311 }, { "epoch": 0.8403469322197238, "grad_norm": 0.48550984263420105, "learning_rate": 8.360204059625168e-06, "loss": 0.4289, "step": 18312 }, { "epoch": 0.8403928227249782, "grad_norm": 0.7085279822349548, "learning_rate": 8.360022490480322e-06, "loss": 0.4521, "step": 18313 }, { "epoch": 0.8404387132302327, "grad_norm": 0.4335705637931824, "learning_rate": 8.35984091325568e-06, "loss": 0.3139, "step": 18314 }, { "epoch": 0.8404846037354872, "grad_norm": 0.41161349415779114, "learning_rate": 8.359659327951684e-06, "loss": 0.2958, "step": 18315 }, { "epoch": 0.8405304942407416, "grad_norm": 0.47334349155426025, "learning_rate": 8.359477734568767e-06, "loss": 0.4034, "step": 18316 }, { "epoch": 0.840576384745996, "grad_norm": 0.42495861649513245, "learning_rate": 8.359296133107366e-06, "loss": 0.3786, "step": 18317 }, { "epoch": 0.8406222752512506, "grad_norm": 0.414435476064682, "learning_rate": 8.35911452356792e-06, "loss": 0.3136, "step": 18318 }, { "epoch": 0.8406681657565049, "grad_norm": 0.4815065860748291, "learning_rate": 8.358932905950861e-06, "loss": 0.3652, "step": 18319 }, { "epoch": 0.8407140562617594, "grad_norm": 0.4765389561653137, "learning_rate": 8.358751280256629e-06, "loss": 0.4399, "step": 18320 }, { "epoch": 0.8407599467670139, "grad_norm": 0.45406317710876465, "learning_rate": 8.358569646485661e-06, "loss": 0.3658, "step": 18321 }, { "epoch": 0.8408058372722683, "grad_norm": 0.4345114231109619, "learning_rate": 8.358388004638391e-06, "loss": 0.3932, "step": 18322 }, { "epoch": 0.8408517277775228, "grad_norm": 0.43068015575408936, "learning_rate": 8.35820635471526e-06, "loss": 0.3707, "step": 18323 }, { "epoch": 0.8408976182827773, "grad_norm": 0.44780364632606506, "learning_rate": 8.358024696716703e-06, "loss": 0.4148, "step": 18324 }, { "epoch": 0.8409435087880317, "grad_norm": 0.42362454533576965, "learning_rate": 8.357843030643154e-06, "loss": 0.3397, "step": 18325 }, { "epoch": 0.8409893992932862, "grad_norm": 0.43837878108024597, "learning_rate": 8.357661356495056e-06, "loss": 0.3599, "step": 18326 }, { "epoch": 0.8410352897985407, "grad_norm": 0.46053776144981384, "learning_rate": 8.35747967427284e-06, "loss": 0.4272, "step": 18327 }, { "epoch": 0.8410811803037952, "grad_norm": 0.5229914784431458, "learning_rate": 8.357297983976944e-06, "loss": 0.5187, "step": 18328 }, { "epoch": 0.8411270708090496, "grad_norm": 0.41766518354415894, "learning_rate": 8.357116285607808e-06, "loss": 0.325, "step": 18329 }, { "epoch": 0.8411729613143041, "grad_norm": 0.48048317432403564, "learning_rate": 8.356934579165865e-06, "loss": 0.4417, "step": 18330 }, { "epoch": 0.8412188518195586, "grad_norm": 0.4603572189807892, "learning_rate": 8.356752864651554e-06, "loss": 0.4003, "step": 18331 }, { "epoch": 0.841264742324813, "grad_norm": 0.42645716667175293, "learning_rate": 8.356571142065312e-06, "loss": 0.3305, "step": 18332 }, { "epoch": 0.8413106328300675, "grad_norm": 0.43814700841903687, "learning_rate": 8.356389411407575e-06, "loss": 0.3518, "step": 18333 }, { "epoch": 0.841356523335322, "grad_norm": 0.47320249676704407, "learning_rate": 8.356207672678783e-06, "loss": 0.4403, "step": 18334 }, { "epoch": 0.8414024138405763, "grad_norm": 0.4550647437572479, "learning_rate": 8.35602592587937e-06, "loss": 0.4552, "step": 18335 }, { "epoch": 0.8414483043458308, "grad_norm": 0.4302375614643097, "learning_rate": 8.355844171009773e-06, "loss": 0.342, "step": 18336 }, { "epoch": 0.8414941948510853, "grad_norm": 0.4398466944694519, "learning_rate": 8.355662408070431e-06, "loss": 0.3827, "step": 18337 }, { "epoch": 0.8415400853563397, "grad_norm": 0.4282795786857605, "learning_rate": 8.355480637061779e-06, "loss": 0.3622, "step": 18338 }, { "epoch": 0.8415859758615942, "grad_norm": 0.492548406124115, "learning_rate": 8.355298857984256e-06, "loss": 0.4309, "step": 18339 }, { "epoch": 0.8416318663668487, "grad_norm": 0.4126216471195221, "learning_rate": 8.355117070838296e-06, "loss": 0.339, "step": 18340 }, { "epoch": 0.8416777568721031, "grad_norm": 0.4651237726211548, "learning_rate": 8.35493527562434e-06, "loss": 0.4344, "step": 18341 }, { "epoch": 0.8417236473773576, "grad_norm": 0.4612409174442291, "learning_rate": 8.354753472342825e-06, "loss": 0.4002, "step": 18342 }, { "epoch": 0.8417695378826121, "grad_norm": 0.49226418137550354, "learning_rate": 8.354571660994183e-06, "loss": 0.4589, "step": 18343 }, { "epoch": 0.8418154283878666, "grad_norm": 0.5086532831192017, "learning_rate": 8.354389841578859e-06, "loss": 0.4529, "step": 18344 }, { "epoch": 0.841861318893121, "grad_norm": 0.45290786027908325, "learning_rate": 8.354208014097283e-06, "loss": 0.4188, "step": 18345 }, { "epoch": 0.8419072093983755, "grad_norm": 0.4725872874259949, "learning_rate": 8.354026178549898e-06, "loss": 0.3949, "step": 18346 }, { "epoch": 0.84195309990363, "grad_norm": 0.4875481128692627, "learning_rate": 8.353844334937136e-06, "loss": 0.4106, "step": 18347 }, { "epoch": 0.8419989904088844, "grad_norm": 0.47561973333358765, "learning_rate": 8.353662483259437e-06, "loss": 0.451, "step": 18348 }, { "epoch": 0.8420448809141389, "grad_norm": 0.43495455384254456, "learning_rate": 8.35348062351724e-06, "loss": 0.3278, "step": 18349 }, { "epoch": 0.8420907714193934, "grad_norm": 0.4717242121696472, "learning_rate": 8.35329875571098e-06, "loss": 0.3881, "step": 18350 }, { "epoch": 0.8421366619246478, "grad_norm": 0.4546155631542206, "learning_rate": 8.353116879841095e-06, "loss": 0.3838, "step": 18351 }, { "epoch": 0.8421825524299023, "grad_norm": 0.4862527549266815, "learning_rate": 8.352934995908021e-06, "loss": 0.4725, "step": 18352 }, { "epoch": 0.8422284429351568, "grad_norm": 0.4699269235134125, "learning_rate": 8.352753103912198e-06, "loss": 0.4307, "step": 18353 }, { "epoch": 0.8422743334404111, "grad_norm": 0.44853541254997253, "learning_rate": 8.35257120385406e-06, "loss": 0.3502, "step": 18354 }, { "epoch": 0.8423202239456656, "grad_norm": 0.5033037066459656, "learning_rate": 8.352389295734049e-06, "loss": 0.4932, "step": 18355 }, { "epoch": 0.8423661144509201, "grad_norm": 0.4705391526222229, "learning_rate": 8.352207379552598e-06, "loss": 0.3691, "step": 18356 }, { "epoch": 0.8424120049561745, "grad_norm": 0.5278042554855347, "learning_rate": 8.352025455310146e-06, "loss": 0.6233, "step": 18357 }, { "epoch": 0.842457895461429, "grad_norm": 0.48265302181243896, "learning_rate": 8.351843523007132e-06, "loss": 0.4672, "step": 18358 }, { "epoch": 0.8425037859666835, "grad_norm": 0.7072678804397583, "learning_rate": 8.351661582643991e-06, "loss": 0.4261, "step": 18359 }, { "epoch": 0.8425496764719379, "grad_norm": 0.4573521018028259, "learning_rate": 8.351479634221163e-06, "loss": 0.4106, "step": 18360 }, { "epoch": 0.8425955669771924, "grad_norm": 0.46562162041664124, "learning_rate": 8.351297677739084e-06, "loss": 0.4555, "step": 18361 }, { "epoch": 0.8426414574824469, "grad_norm": 0.4400818943977356, "learning_rate": 8.35111571319819e-06, "loss": 0.3902, "step": 18362 }, { "epoch": 0.8426873479877014, "grad_norm": 0.4723038971424103, "learning_rate": 8.350933740598921e-06, "loss": 0.4621, "step": 18363 }, { "epoch": 0.8427332384929558, "grad_norm": 0.4322201609611511, "learning_rate": 8.350751759941715e-06, "loss": 0.3817, "step": 18364 }, { "epoch": 0.8427791289982103, "grad_norm": 0.43046241998672485, "learning_rate": 8.350569771227008e-06, "loss": 0.3625, "step": 18365 }, { "epoch": 0.8428250195034648, "grad_norm": 0.4378136098384857, "learning_rate": 8.35038777445524e-06, "loss": 0.3413, "step": 18366 }, { "epoch": 0.8428709100087192, "grad_norm": 0.42299291491508484, "learning_rate": 8.350205769626844e-06, "loss": 0.3738, "step": 18367 }, { "epoch": 0.8429168005139737, "grad_norm": 0.44240838289260864, "learning_rate": 8.35002375674226e-06, "loss": 0.3759, "step": 18368 }, { "epoch": 0.8429626910192282, "grad_norm": 0.45195770263671875, "learning_rate": 8.349841735801929e-06, "loss": 0.4503, "step": 18369 }, { "epoch": 0.8430085815244825, "grad_norm": 0.4279383718967438, "learning_rate": 8.349659706806284e-06, "loss": 0.3035, "step": 18370 }, { "epoch": 0.843054472029737, "grad_norm": 0.44820088148117065, "learning_rate": 8.349477669755766e-06, "loss": 0.3793, "step": 18371 }, { "epoch": 0.8431003625349915, "grad_norm": 0.45392870903015137, "learning_rate": 8.34929562465081e-06, "loss": 0.3796, "step": 18372 }, { "epoch": 0.8431462530402459, "grad_norm": 0.512766420841217, "learning_rate": 8.349113571491856e-06, "loss": 0.4709, "step": 18373 }, { "epoch": 0.8431921435455004, "grad_norm": 0.4132944643497467, "learning_rate": 8.34893151027934e-06, "loss": 0.3124, "step": 18374 }, { "epoch": 0.8432380340507549, "grad_norm": 0.44059717655181885, "learning_rate": 8.348749441013702e-06, "loss": 0.3556, "step": 18375 }, { "epoch": 0.8432839245560093, "grad_norm": 0.4390440881252289, "learning_rate": 8.348567363695376e-06, "loss": 0.3768, "step": 18376 }, { "epoch": 0.8433298150612638, "grad_norm": 0.4518889784812927, "learning_rate": 8.348385278324805e-06, "loss": 0.3843, "step": 18377 }, { "epoch": 0.8433757055665183, "grad_norm": 0.42900529503822327, "learning_rate": 8.348203184902423e-06, "loss": 0.3658, "step": 18378 }, { "epoch": 0.8434215960717728, "grad_norm": 0.4830401539802551, "learning_rate": 8.348021083428667e-06, "loss": 0.493, "step": 18379 }, { "epoch": 0.8434674865770272, "grad_norm": 0.4534807503223419, "learning_rate": 8.34783897390398e-06, "loss": 0.365, "step": 18380 }, { "epoch": 0.8435133770822817, "grad_norm": 0.4254912734031677, "learning_rate": 8.347656856328796e-06, "loss": 0.351, "step": 18381 }, { "epoch": 0.8435592675875362, "grad_norm": 0.4541734755039215, "learning_rate": 8.347474730703555e-06, "loss": 0.3866, "step": 18382 }, { "epoch": 0.8436051580927906, "grad_norm": 0.4775274097919464, "learning_rate": 8.347292597028691e-06, "loss": 0.4458, "step": 18383 }, { "epoch": 0.8436510485980451, "grad_norm": 0.4680865406990051, "learning_rate": 8.347110455304649e-06, "loss": 0.4412, "step": 18384 }, { "epoch": 0.8436969391032996, "grad_norm": 0.468293160200119, "learning_rate": 8.346928305531858e-06, "loss": 0.4562, "step": 18385 }, { "epoch": 0.843742829608554, "grad_norm": 0.47502392530441284, "learning_rate": 8.346746147710764e-06, "loss": 0.4234, "step": 18386 }, { "epoch": 0.8437887201138085, "grad_norm": 0.45605209469795227, "learning_rate": 8.3465639818418e-06, "loss": 0.3644, "step": 18387 }, { "epoch": 0.843834610619063, "grad_norm": 0.4365732669830322, "learning_rate": 8.346381807925407e-06, "loss": 0.4029, "step": 18388 }, { "epoch": 0.8438805011243173, "grad_norm": 0.44695624709129333, "learning_rate": 8.34619962596202e-06, "loss": 0.3661, "step": 18389 }, { "epoch": 0.8439263916295718, "grad_norm": 0.6114743947982788, "learning_rate": 8.34601743595208e-06, "loss": 0.4372, "step": 18390 }, { "epoch": 0.8439722821348263, "grad_norm": 0.45880502462387085, "learning_rate": 8.345835237896025e-06, "loss": 0.4248, "step": 18391 }, { "epoch": 0.8440181726400807, "grad_norm": 0.4660395085811615, "learning_rate": 8.345653031794292e-06, "loss": 0.4061, "step": 18392 }, { "epoch": 0.8440640631453352, "grad_norm": 0.4742988049983978, "learning_rate": 8.345470817647319e-06, "loss": 0.4584, "step": 18393 }, { "epoch": 0.8441099536505897, "grad_norm": 0.45204228162765503, "learning_rate": 8.345288595455546e-06, "loss": 0.3911, "step": 18394 }, { "epoch": 0.8441558441558441, "grad_norm": 0.44109320640563965, "learning_rate": 8.345106365219406e-06, "loss": 0.3983, "step": 18395 }, { "epoch": 0.8442017346610986, "grad_norm": 0.4372449517250061, "learning_rate": 8.344924126939343e-06, "loss": 0.3579, "step": 18396 }, { "epoch": 0.8442476251663531, "grad_norm": 0.47066545486450195, "learning_rate": 8.344741880615795e-06, "loss": 0.4207, "step": 18397 }, { "epoch": 0.8442935156716076, "grad_norm": 0.3910118043422699, "learning_rate": 8.344559626249196e-06, "loss": 0.2738, "step": 18398 }, { "epoch": 0.844339406176862, "grad_norm": 0.4371149241924286, "learning_rate": 8.344377363839987e-06, "loss": 0.3625, "step": 18399 }, { "epoch": 0.8443852966821165, "grad_norm": 0.5049681663513184, "learning_rate": 8.344195093388608e-06, "loss": 0.5397, "step": 18400 }, { "epoch": 0.844431187187371, "grad_norm": 0.4227767288684845, "learning_rate": 8.344012814895492e-06, "loss": 0.3677, "step": 18401 }, { "epoch": 0.8444770776926254, "grad_norm": 0.48698192834854126, "learning_rate": 8.343830528361084e-06, "loss": 0.4733, "step": 18402 }, { "epoch": 0.8445229681978799, "grad_norm": 0.4347972273826599, "learning_rate": 8.343648233785815e-06, "loss": 0.3526, "step": 18403 }, { "epoch": 0.8445688587031344, "grad_norm": 0.48539137840270996, "learning_rate": 8.34346593117013e-06, "loss": 0.4649, "step": 18404 }, { "epoch": 0.8446147492083887, "grad_norm": 0.4273861348628998, "learning_rate": 8.343283620514464e-06, "loss": 0.3588, "step": 18405 }, { "epoch": 0.8446606397136432, "grad_norm": 0.4330707788467407, "learning_rate": 8.343101301819256e-06, "loss": 0.3516, "step": 18406 }, { "epoch": 0.8447065302188977, "grad_norm": 0.6371825337409973, "learning_rate": 8.342918975084946e-06, "loss": 0.3855, "step": 18407 }, { "epoch": 0.8447524207241521, "grad_norm": 0.437532514333725, "learning_rate": 8.34273664031197e-06, "loss": 0.3413, "step": 18408 }, { "epoch": 0.8447983112294066, "grad_norm": 0.44700440764427185, "learning_rate": 8.342554297500766e-06, "loss": 0.3565, "step": 18409 }, { "epoch": 0.8448442017346611, "grad_norm": 0.4678964912891388, "learning_rate": 8.342371946651775e-06, "loss": 0.4224, "step": 18410 }, { "epoch": 0.8448900922399155, "grad_norm": 0.5274521708488464, "learning_rate": 8.342189587765435e-06, "loss": 0.5183, "step": 18411 }, { "epoch": 0.84493598274517, "grad_norm": 0.47910451889038086, "learning_rate": 8.342007220842182e-06, "loss": 0.4269, "step": 18412 }, { "epoch": 0.8449818732504245, "grad_norm": 0.44621360301971436, "learning_rate": 8.341824845882457e-06, "loss": 0.3836, "step": 18413 }, { "epoch": 0.8450277637556789, "grad_norm": 0.4184834361076355, "learning_rate": 8.341642462886699e-06, "loss": 0.319, "step": 18414 }, { "epoch": 0.8450736542609334, "grad_norm": 0.42441990971565247, "learning_rate": 8.341460071855345e-06, "loss": 0.3375, "step": 18415 }, { "epoch": 0.8451195447661879, "grad_norm": 0.4489527940750122, "learning_rate": 8.341277672788834e-06, "loss": 0.4049, "step": 18416 }, { "epoch": 0.8451654352714424, "grad_norm": 0.4490134119987488, "learning_rate": 8.341095265687605e-06, "loss": 0.4034, "step": 18417 }, { "epoch": 0.8452113257766968, "grad_norm": 0.44045600295066833, "learning_rate": 8.340912850552096e-06, "loss": 0.3914, "step": 18418 }, { "epoch": 0.8452572162819513, "grad_norm": 0.4616234004497528, "learning_rate": 8.340730427382745e-06, "loss": 0.4417, "step": 18419 }, { "epoch": 0.8453031067872058, "grad_norm": 0.4152618646621704, "learning_rate": 8.340547996179994e-06, "loss": 0.3694, "step": 18420 }, { "epoch": 0.8453489972924602, "grad_norm": 0.45368459820747375, "learning_rate": 8.340365556944278e-06, "loss": 0.4182, "step": 18421 }, { "epoch": 0.8453948877977147, "grad_norm": 0.4593093991279602, "learning_rate": 8.340183109676037e-06, "loss": 0.3833, "step": 18422 }, { "epoch": 0.8454407783029692, "grad_norm": 0.44983601570129395, "learning_rate": 8.340000654375711e-06, "loss": 0.3924, "step": 18423 }, { "epoch": 0.8454866688082235, "grad_norm": 0.46669793128967285, "learning_rate": 8.339818191043736e-06, "loss": 0.5078, "step": 18424 }, { "epoch": 0.845532559313478, "grad_norm": 0.4939838647842407, "learning_rate": 8.339635719680554e-06, "loss": 0.4607, "step": 18425 }, { "epoch": 0.8455784498187325, "grad_norm": 0.44249817728996277, "learning_rate": 8.3394532402866e-06, "loss": 0.3549, "step": 18426 }, { "epoch": 0.8456243403239869, "grad_norm": 0.46848225593566895, "learning_rate": 8.339270752862316e-06, "loss": 0.4172, "step": 18427 }, { "epoch": 0.8456702308292414, "grad_norm": 0.44798702001571655, "learning_rate": 8.33908825740814e-06, "loss": 0.3945, "step": 18428 }, { "epoch": 0.8457161213344959, "grad_norm": 0.4461829662322998, "learning_rate": 8.338905753924512e-06, "loss": 0.3975, "step": 18429 }, { "epoch": 0.8457620118397503, "grad_norm": 0.47843924164772034, "learning_rate": 8.338723242411866e-06, "loss": 0.4731, "step": 18430 }, { "epoch": 0.8458079023450048, "grad_norm": 0.44294023513793945, "learning_rate": 8.338540722870647e-06, "loss": 0.4116, "step": 18431 }, { "epoch": 0.8458537928502593, "grad_norm": 0.4541102349758148, "learning_rate": 8.338358195301291e-06, "loss": 0.3913, "step": 18432 }, { "epoch": 0.8458996833555138, "grad_norm": 0.43997722864151, "learning_rate": 8.338175659704236e-06, "loss": 0.4244, "step": 18433 }, { "epoch": 0.8459455738607682, "grad_norm": 0.43457984924316406, "learning_rate": 8.337993116079922e-06, "loss": 0.3292, "step": 18434 }, { "epoch": 0.8459914643660227, "grad_norm": 0.5279772877693176, "learning_rate": 8.337810564428789e-06, "loss": 0.4977, "step": 18435 }, { "epoch": 0.8460373548712772, "grad_norm": 0.46021613478660583, "learning_rate": 8.337628004751275e-06, "loss": 0.4089, "step": 18436 }, { "epoch": 0.8460832453765316, "grad_norm": 0.4456234872341156, "learning_rate": 8.337445437047819e-06, "loss": 0.4165, "step": 18437 }, { "epoch": 0.8461291358817861, "grad_norm": 0.44975969195365906, "learning_rate": 8.33726286131886e-06, "loss": 0.3651, "step": 18438 }, { "epoch": 0.8461750263870406, "grad_norm": 0.4139656722545624, "learning_rate": 8.337080277564837e-06, "loss": 0.3413, "step": 18439 }, { "epoch": 0.846220916892295, "grad_norm": 0.49429044127464294, "learning_rate": 8.336897685786189e-06, "loss": 0.4377, "step": 18440 }, { "epoch": 0.8462668073975494, "grad_norm": 0.4104306101799011, "learning_rate": 8.336715085983355e-06, "loss": 0.3182, "step": 18441 }, { "epoch": 0.846312697902804, "grad_norm": 0.4455753564834595, "learning_rate": 8.336532478156775e-06, "loss": 0.3796, "step": 18442 }, { "epoch": 0.8463585884080583, "grad_norm": 0.45576009154319763, "learning_rate": 8.336349862306887e-06, "loss": 0.4097, "step": 18443 }, { "epoch": 0.8464044789133128, "grad_norm": 0.4532299339771271, "learning_rate": 8.33616723843413e-06, "loss": 0.3906, "step": 18444 }, { "epoch": 0.8464503694185673, "grad_norm": 0.39093905687332153, "learning_rate": 8.335984606538946e-06, "loss": 0.2928, "step": 18445 }, { "epoch": 0.8464962599238217, "grad_norm": 0.4254453778266907, "learning_rate": 8.335801966621768e-06, "loss": 0.3199, "step": 18446 }, { "epoch": 0.8465421504290762, "grad_norm": 0.4364699125289917, "learning_rate": 8.335619318683042e-06, "loss": 0.363, "step": 18447 }, { "epoch": 0.8465880409343307, "grad_norm": 0.440714955329895, "learning_rate": 8.335436662723203e-06, "loss": 0.3435, "step": 18448 }, { "epoch": 0.8466339314395851, "grad_norm": 0.4626173675060272, "learning_rate": 8.335253998742692e-06, "loss": 0.4146, "step": 18449 }, { "epoch": 0.8466798219448396, "grad_norm": 0.4529416859149933, "learning_rate": 8.335071326741948e-06, "loss": 0.3727, "step": 18450 }, { "epoch": 0.8467257124500941, "grad_norm": 0.44712498784065247, "learning_rate": 8.334888646721411e-06, "loss": 0.4054, "step": 18451 }, { "epoch": 0.8467716029553486, "grad_norm": 0.45306190848350525, "learning_rate": 8.334705958681517e-06, "loss": 0.4309, "step": 18452 }, { "epoch": 0.846817493460603, "grad_norm": 0.44328179955482483, "learning_rate": 8.334523262622708e-06, "loss": 0.3426, "step": 18453 }, { "epoch": 0.8468633839658575, "grad_norm": 0.42018651962280273, "learning_rate": 8.334340558545425e-06, "loss": 0.3619, "step": 18454 }, { "epoch": 0.846909274471112, "grad_norm": 0.45521971583366394, "learning_rate": 8.334157846450104e-06, "loss": 0.3669, "step": 18455 }, { "epoch": 0.8469551649763664, "grad_norm": 0.4465028643608093, "learning_rate": 8.333975126337187e-06, "loss": 0.3927, "step": 18456 }, { "epoch": 0.8470010554816209, "grad_norm": 0.4658412039279938, "learning_rate": 8.33379239820711e-06, "loss": 0.3816, "step": 18457 }, { "epoch": 0.8470469459868754, "grad_norm": 0.4661136567592621, "learning_rate": 8.333609662060315e-06, "loss": 0.404, "step": 18458 }, { "epoch": 0.8470928364921297, "grad_norm": 0.46162107586860657, "learning_rate": 8.333426917897241e-06, "loss": 0.4418, "step": 18459 }, { "epoch": 0.8471387269973842, "grad_norm": 0.4501282572746277, "learning_rate": 8.333244165718329e-06, "loss": 0.3695, "step": 18460 }, { "epoch": 0.8471846175026387, "grad_norm": 0.4711464047431946, "learning_rate": 8.333061405524015e-06, "loss": 0.3887, "step": 18461 }, { "epoch": 0.8472305080078931, "grad_norm": 0.4515655040740967, "learning_rate": 8.33287863731474e-06, "loss": 0.3872, "step": 18462 }, { "epoch": 0.8472763985131476, "grad_norm": 0.4372484087944031, "learning_rate": 8.332695861090945e-06, "loss": 0.4313, "step": 18463 }, { "epoch": 0.8473222890184021, "grad_norm": 0.4591462314128876, "learning_rate": 8.332513076853065e-06, "loss": 0.3766, "step": 18464 }, { "epoch": 0.8473681795236565, "grad_norm": 0.42607882618904114, "learning_rate": 8.332330284601548e-06, "loss": 0.3273, "step": 18465 }, { "epoch": 0.847414070028911, "grad_norm": 0.44026491045951843, "learning_rate": 8.332147484336825e-06, "loss": 0.3971, "step": 18466 }, { "epoch": 0.8474599605341655, "grad_norm": 0.43532034754753113, "learning_rate": 8.331964676059339e-06, "loss": 0.3756, "step": 18467 }, { "epoch": 0.8475058510394199, "grad_norm": 0.43992215394973755, "learning_rate": 8.331781859769531e-06, "loss": 0.3814, "step": 18468 }, { "epoch": 0.8475517415446744, "grad_norm": 0.4336024522781372, "learning_rate": 8.331599035467838e-06, "loss": 0.4217, "step": 18469 }, { "epoch": 0.8475976320499289, "grad_norm": 0.44434642791748047, "learning_rate": 8.331416203154701e-06, "loss": 0.3816, "step": 18470 }, { "epoch": 0.8476435225551834, "grad_norm": 0.417959600687027, "learning_rate": 8.33123336283056e-06, "loss": 0.3413, "step": 18471 }, { "epoch": 0.8476894130604378, "grad_norm": 0.44532114267349243, "learning_rate": 8.331050514495854e-06, "loss": 0.391, "step": 18472 }, { "epoch": 0.8477353035656923, "grad_norm": 0.4096547067165375, "learning_rate": 8.330867658151022e-06, "loss": 0.3494, "step": 18473 }, { "epoch": 0.8477811940709468, "grad_norm": 0.45381149649620056, "learning_rate": 8.330684793796506e-06, "loss": 0.391, "step": 18474 }, { "epoch": 0.8478270845762012, "grad_norm": 0.44133439660072327, "learning_rate": 8.330501921432744e-06, "loss": 0.4022, "step": 18475 }, { "epoch": 0.8478729750814556, "grad_norm": 0.46249449253082275, "learning_rate": 8.330319041060175e-06, "loss": 0.3803, "step": 18476 }, { "epoch": 0.8479188655867101, "grad_norm": 0.43686965107917786, "learning_rate": 8.33013615267924e-06, "loss": 0.4109, "step": 18477 }, { "epoch": 0.8479647560919645, "grad_norm": 0.450529009103775, "learning_rate": 8.329953256290379e-06, "loss": 0.3488, "step": 18478 }, { "epoch": 0.848010646597219, "grad_norm": 0.48208731412887573, "learning_rate": 8.329770351894031e-06, "loss": 0.4748, "step": 18479 }, { "epoch": 0.8480565371024735, "grad_norm": 0.47667890787124634, "learning_rate": 8.329587439490636e-06, "loss": 0.5026, "step": 18480 }, { "epoch": 0.8481024276077279, "grad_norm": 0.4826364815235138, "learning_rate": 8.329404519080634e-06, "loss": 0.4538, "step": 18481 }, { "epoch": 0.8481483181129824, "grad_norm": 0.4959820806980133, "learning_rate": 8.329221590664467e-06, "loss": 0.4832, "step": 18482 }, { "epoch": 0.8481942086182369, "grad_norm": 0.4422644078731537, "learning_rate": 8.329038654242572e-06, "loss": 0.4061, "step": 18483 }, { "epoch": 0.8482400991234913, "grad_norm": 0.4402746856212616, "learning_rate": 8.328855709815388e-06, "loss": 0.4055, "step": 18484 }, { "epoch": 0.8482859896287458, "grad_norm": 0.44546085596084595, "learning_rate": 8.328672757383358e-06, "loss": 0.3857, "step": 18485 }, { "epoch": 0.8483318801340003, "grad_norm": 0.41366586089134216, "learning_rate": 8.32848979694692e-06, "loss": 0.3514, "step": 18486 }, { "epoch": 0.8483777706392548, "grad_norm": 0.4797897934913635, "learning_rate": 8.328306828506515e-06, "loss": 0.4058, "step": 18487 }, { "epoch": 0.8484236611445092, "grad_norm": 0.4312388002872467, "learning_rate": 8.328123852062582e-06, "loss": 0.3356, "step": 18488 }, { "epoch": 0.8484695516497637, "grad_norm": 0.443000465631485, "learning_rate": 8.327940867615561e-06, "loss": 0.4115, "step": 18489 }, { "epoch": 0.8485154421550182, "grad_norm": 0.47328194975852966, "learning_rate": 8.327757875165894e-06, "loss": 0.4985, "step": 18490 }, { "epoch": 0.8485613326602726, "grad_norm": 0.42449358105659485, "learning_rate": 8.32757487471402e-06, "loss": 0.3807, "step": 18491 }, { "epoch": 0.8486072231655271, "grad_norm": 0.4469325840473175, "learning_rate": 8.327391866260376e-06, "loss": 0.382, "step": 18492 }, { "epoch": 0.8486531136707816, "grad_norm": 0.47733375430107117, "learning_rate": 8.327208849805407e-06, "loss": 0.403, "step": 18493 }, { "epoch": 0.8486990041760359, "grad_norm": 0.5070577263832092, "learning_rate": 8.32702582534955e-06, "loss": 0.4304, "step": 18494 }, { "epoch": 0.8487448946812904, "grad_norm": 0.46794626116752625, "learning_rate": 8.326842792893245e-06, "loss": 0.4226, "step": 18495 }, { "epoch": 0.8487907851865449, "grad_norm": 0.4611136019229889, "learning_rate": 8.326659752436933e-06, "loss": 0.4079, "step": 18496 }, { "epoch": 0.8488366756917993, "grad_norm": 0.4353518784046173, "learning_rate": 8.326476703981056e-06, "loss": 0.3284, "step": 18497 }, { "epoch": 0.8488825661970538, "grad_norm": 0.4331098794937134, "learning_rate": 8.326293647526052e-06, "loss": 0.4093, "step": 18498 }, { "epoch": 0.8489284567023083, "grad_norm": 0.5203938484191895, "learning_rate": 8.326110583072362e-06, "loss": 0.4435, "step": 18499 }, { "epoch": 0.8489743472075627, "grad_norm": 0.4364112913608551, "learning_rate": 8.325927510620426e-06, "loss": 0.391, "step": 18500 }, { "epoch": 0.8490202377128172, "grad_norm": 0.45561668276786804, "learning_rate": 8.32574443017068e-06, "loss": 0.3826, "step": 18501 }, { "epoch": 0.8490661282180717, "grad_norm": 0.4086368680000305, "learning_rate": 8.325561341723574e-06, "loss": 0.3244, "step": 18502 }, { "epoch": 0.8491120187233261, "grad_norm": 0.4363986849784851, "learning_rate": 8.325378245279538e-06, "loss": 0.3655, "step": 18503 }, { "epoch": 0.8491579092285806, "grad_norm": 0.42900383472442627, "learning_rate": 8.32519514083902e-06, "loss": 0.3601, "step": 18504 }, { "epoch": 0.8492037997338351, "grad_norm": 0.468606173992157, "learning_rate": 8.325012028402455e-06, "loss": 0.4435, "step": 18505 }, { "epoch": 0.8492496902390896, "grad_norm": 0.4418804347515106, "learning_rate": 8.324828907970287e-06, "loss": 0.3598, "step": 18506 }, { "epoch": 0.849295580744344, "grad_norm": 0.47901755571365356, "learning_rate": 8.324645779542954e-06, "loss": 0.4437, "step": 18507 }, { "epoch": 0.8493414712495985, "grad_norm": 0.4587444067001343, "learning_rate": 8.324462643120898e-06, "loss": 0.4468, "step": 18508 }, { "epoch": 0.849387361754853, "grad_norm": 0.48112258315086365, "learning_rate": 8.324279498704558e-06, "loss": 0.4598, "step": 18509 }, { "epoch": 0.8494332522601074, "grad_norm": 0.4726746380329132, "learning_rate": 8.324096346294375e-06, "loss": 0.433, "step": 18510 }, { "epoch": 0.8494791427653618, "grad_norm": 0.43957963585853577, "learning_rate": 8.323913185890788e-06, "loss": 0.3793, "step": 18511 }, { "epoch": 0.8495250332706163, "grad_norm": 0.5012571215629578, "learning_rate": 8.32373001749424e-06, "loss": 0.4783, "step": 18512 }, { "epoch": 0.8495709237758707, "grad_norm": 0.4703497588634491, "learning_rate": 8.323546841105172e-06, "loss": 0.4067, "step": 18513 }, { "epoch": 0.8496168142811252, "grad_norm": 0.44172635674476624, "learning_rate": 8.32336365672402e-06, "loss": 0.3572, "step": 18514 }, { "epoch": 0.8496627047863797, "grad_norm": 0.452589750289917, "learning_rate": 8.323180464351228e-06, "loss": 0.4158, "step": 18515 }, { "epoch": 0.8497085952916341, "grad_norm": 0.4321780502796173, "learning_rate": 8.322997263987239e-06, "loss": 0.339, "step": 18516 }, { "epoch": 0.8497544857968886, "grad_norm": 0.4353664517402649, "learning_rate": 8.322814055632486e-06, "loss": 0.3732, "step": 18517 }, { "epoch": 0.8498003763021431, "grad_norm": 0.45058342814445496, "learning_rate": 8.322630839287416e-06, "loss": 0.397, "step": 18518 }, { "epoch": 0.8498462668073975, "grad_norm": 0.430913507938385, "learning_rate": 8.322447614952468e-06, "loss": 0.3457, "step": 18519 }, { "epoch": 0.849892157312652, "grad_norm": 0.46599411964416504, "learning_rate": 8.32226438262808e-06, "loss": 0.4194, "step": 18520 }, { "epoch": 0.8499380478179065, "grad_norm": 0.47300225496292114, "learning_rate": 8.322081142314696e-06, "loss": 0.4706, "step": 18521 }, { "epoch": 0.849983938323161, "grad_norm": 0.4319131374359131, "learning_rate": 8.321897894012756e-06, "loss": 0.3355, "step": 18522 }, { "epoch": 0.8500298288284154, "grad_norm": 0.46814319491386414, "learning_rate": 8.321714637722697e-06, "loss": 0.3992, "step": 18523 }, { "epoch": 0.8500757193336699, "grad_norm": 0.46923312544822693, "learning_rate": 8.321531373444966e-06, "loss": 0.3778, "step": 18524 }, { "epoch": 0.8501216098389244, "grad_norm": 0.4729360342025757, "learning_rate": 8.32134810118e-06, "loss": 0.4113, "step": 18525 }, { "epoch": 0.8501675003441788, "grad_norm": 0.4241567552089691, "learning_rate": 8.321164820928238e-06, "loss": 0.3441, "step": 18526 }, { "epoch": 0.8502133908494333, "grad_norm": 0.5104889869689941, "learning_rate": 8.320981532690125e-06, "loss": 0.5479, "step": 18527 }, { "epoch": 0.8502592813546878, "grad_norm": 0.41151878237724304, "learning_rate": 8.3207982364661e-06, "loss": 0.344, "step": 18528 }, { "epoch": 0.8503051718599421, "grad_norm": 0.44778236746788025, "learning_rate": 8.3206149322566e-06, "loss": 0.369, "step": 18529 }, { "epoch": 0.8503510623651966, "grad_norm": 0.42515379190444946, "learning_rate": 8.320431620062072e-06, "loss": 0.3764, "step": 18530 }, { "epoch": 0.8503969528704511, "grad_norm": 0.45258450508117676, "learning_rate": 8.320248299882952e-06, "loss": 0.4129, "step": 18531 }, { "epoch": 0.8504428433757055, "grad_norm": 0.4497392475605011, "learning_rate": 8.320064971719684e-06, "loss": 0.3608, "step": 18532 }, { "epoch": 0.85048873388096, "grad_norm": 0.4949532151222229, "learning_rate": 8.319881635572708e-06, "loss": 0.4794, "step": 18533 }, { "epoch": 0.8505346243862145, "grad_norm": 0.5146548748016357, "learning_rate": 8.319698291442462e-06, "loss": 0.3877, "step": 18534 }, { "epoch": 0.8505805148914689, "grad_norm": 0.4761143922805786, "learning_rate": 8.319514939329393e-06, "loss": 0.4111, "step": 18535 }, { "epoch": 0.8506264053967234, "grad_norm": 0.4928510785102844, "learning_rate": 8.319331579233937e-06, "loss": 0.5018, "step": 18536 }, { "epoch": 0.8506722959019779, "grad_norm": 0.49157974123954773, "learning_rate": 8.319148211156534e-06, "loss": 0.4549, "step": 18537 }, { "epoch": 0.8507181864072323, "grad_norm": 0.5313320159912109, "learning_rate": 8.318964835097629e-06, "loss": 0.4106, "step": 18538 }, { "epoch": 0.8507640769124868, "grad_norm": 0.45103299617767334, "learning_rate": 8.31878145105766e-06, "loss": 0.3407, "step": 18539 }, { "epoch": 0.8508099674177413, "grad_norm": 0.47341710329055786, "learning_rate": 8.31859805903707e-06, "loss": 0.4069, "step": 18540 }, { "epoch": 0.8508558579229958, "grad_norm": 0.5049566030502319, "learning_rate": 8.318414659036299e-06, "loss": 0.4744, "step": 18541 }, { "epoch": 0.8509017484282502, "grad_norm": 0.4703834354877472, "learning_rate": 8.318231251055788e-06, "loss": 0.3931, "step": 18542 }, { "epoch": 0.8509476389335047, "grad_norm": 0.4895935654640198, "learning_rate": 8.318047835095977e-06, "loss": 0.4637, "step": 18543 }, { "epoch": 0.8509935294387592, "grad_norm": 0.5321122407913208, "learning_rate": 8.317864411157311e-06, "loss": 0.4621, "step": 18544 }, { "epoch": 0.8510394199440136, "grad_norm": 0.47079581022262573, "learning_rate": 8.317680979240225e-06, "loss": 0.3934, "step": 18545 }, { "epoch": 0.851085310449268, "grad_norm": 0.44207099080085754, "learning_rate": 8.317497539345164e-06, "loss": 0.3761, "step": 18546 }, { "epoch": 0.8511312009545225, "grad_norm": 0.5199916958808899, "learning_rate": 8.317314091472571e-06, "loss": 0.4984, "step": 18547 }, { "epoch": 0.8511770914597769, "grad_norm": 0.4560734033584595, "learning_rate": 8.31713063562288e-06, "loss": 0.4275, "step": 18548 }, { "epoch": 0.8512229819650314, "grad_norm": 0.46544256806373596, "learning_rate": 8.31694717179654e-06, "loss": 0.4061, "step": 18549 }, { "epoch": 0.8512688724702859, "grad_norm": 0.4387304186820984, "learning_rate": 8.316763699993988e-06, "loss": 0.3374, "step": 18550 }, { "epoch": 0.8513147629755403, "grad_norm": 0.4940645694732666, "learning_rate": 8.316580220215667e-06, "loss": 0.4477, "step": 18551 }, { "epoch": 0.8513606534807948, "grad_norm": 0.4244164228439331, "learning_rate": 8.316396732462017e-06, "loss": 0.4088, "step": 18552 }, { "epoch": 0.8514065439860493, "grad_norm": 0.516417384147644, "learning_rate": 8.31621323673348e-06, "loss": 0.5139, "step": 18553 }, { "epoch": 0.8514524344913037, "grad_norm": 0.4516875743865967, "learning_rate": 8.316029733030496e-06, "loss": 0.3484, "step": 18554 }, { "epoch": 0.8514983249965582, "grad_norm": 0.4425870180130005, "learning_rate": 8.315846221353507e-06, "loss": 0.4015, "step": 18555 }, { "epoch": 0.8515442155018127, "grad_norm": 0.4379623532295227, "learning_rate": 8.315662701702953e-06, "loss": 0.3755, "step": 18556 }, { "epoch": 0.8515901060070671, "grad_norm": 0.45250391960144043, "learning_rate": 8.31547917407928e-06, "loss": 0.3962, "step": 18557 }, { "epoch": 0.8516359965123216, "grad_norm": 0.4658588767051697, "learning_rate": 8.315295638482923e-06, "loss": 0.4651, "step": 18558 }, { "epoch": 0.8516818870175761, "grad_norm": 0.47544774413108826, "learning_rate": 8.315112094914328e-06, "loss": 0.4591, "step": 18559 }, { "epoch": 0.8517277775228306, "grad_norm": 0.42730605602264404, "learning_rate": 8.314928543373933e-06, "loss": 0.3413, "step": 18560 }, { "epoch": 0.851773668028085, "grad_norm": 0.4330829381942749, "learning_rate": 8.314744983862184e-06, "loss": 0.3714, "step": 18561 }, { "epoch": 0.8518195585333395, "grad_norm": 0.4877643287181854, "learning_rate": 8.314561416379516e-06, "loss": 0.4765, "step": 18562 }, { "epoch": 0.851865449038594, "grad_norm": 0.4689941108226776, "learning_rate": 8.314377840926375e-06, "loss": 0.4251, "step": 18563 }, { "epoch": 0.8519113395438483, "grad_norm": 0.44928622245788574, "learning_rate": 8.314194257503203e-06, "loss": 0.3528, "step": 18564 }, { "epoch": 0.8519572300491028, "grad_norm": 0.45337674021720886, "learning_rate": 8.31401066611044e-06, "loss": 0.4386, "step": 18565 }, { "epoch": 0.8520031205543573, "grad_norm": 0.46326684951782227, "learning_rate": 8.313827066748526e-06, "loss": 0.4128, "step": 18566 }, { "epoch": 0.8520490110596117, "grad_norm": 0.43587666749954224, "learning_rate": 8.313643459417903e-06, "loss": 0.3621, "step": 18567 }, { "epoch": 0.8520949015648662, "grad_norm": 0.38895031809806824, "learning_rate": 8.313459844119014e-06, "loss": 0.2821, "step": 18568 }, { "epoch": 0.8521407920701207, "grad_norm": 0.4513740837574005, "learning_rate": 8.3132762208523e-06, "loss": 0.3668, "step": 18569 }, { "epoch": 0.8521866825753751, "grad_norm": 0.4800945222377777, "learning_rate": 8.313092589618203e-06, "loss": 0.427, "step": 18570 }, { "epoch": 0.8522325730806296, "grad_norm": 0.44802138209342957, "learning_rate": 8.312908950417162e-06, "loss": 0.3725, "step": 18571 }, { "epoch": 0.8522784635858841, "grad_norm": 0.48024633526802063, "learning_rate": 8.312725303249622e-06, "loss": 0.4676, "step": 18572 }, { "epoch": 0.8523243540911385, "grad_norm": 0.4557300806045532, "learning_rate": 8.312541648116024e-06, "loss": 0.4089, "step": 18573 }, { "epoch": 0.852370244596393, "grad_norm": 0.44319817423820496, "learning_rate": 8.312357985016807e-06, "loss": 0.4264, "step": 18574 }, { "epoch": 0.8524161351016475, "grad_norm": 0.4716143310070038, "learning_rate": 8.312174313952415e-06, "loss": 0.4204, "step": 18575 }, { "epoch": 0.852462025606902, "grad_norm": 0.4595557153224945, "learning_rate": 8.311990634923289e-06, "loss": 0.4356, "step": 18576 }, { "epoch": 0.8525079161121564, "grad_norm": 0.42150160670280457, "learning_rate": 8.311806947929872e-06, "loss": 0.3187, "step": 18577 }, { "epoch": 0.8525538066174109, "grad_norm": 0.45504552125930786, "learning_rate": 8.311623252972603e-06, "loss": 0.4048, "step": 18578 }, { "epoch": 0.8525996971226654, "grad_norm": 0.42402997612953186, "learning_rate": 8.311439550051927e-06, "loss": 0.3682, "step": 18579 }, { "epoch": 0.8526455876279198, "grad_norm": 0.4750461280345917, "learning_rate": 8.311255839168282e-06, "loss": 0.4533, "step": 18580 }, { "epoch": 0.8526914781331743, "grad_norm": 0.46617522835731506, "learning_rate": 8.311072120322114e-06, "loss": 0.4073, "step": 18581 }, { "epoch": 0.8527373686384287, "grad_norm": 0.4335267245769501, "learning_rate": 8.310888393513861e-06, "loss": 0.364, "step": 18582 }, { "epoch": 0.8527832591436831, "grad_norm": 0.48728227615356445, "learning_rate": 8.310704658743966e-06, "loss": 0.5208, "step": 18583 }, { "epoch": 0.8528291496489376, "grad_norm": 0.47027409076690674, "learning_rate": 8.310520916012872e-06, "loss": 0.4293, "step": 18584 }, { "epoch": 0.8528750401541921, "grad_norm": 0.46774375438690186, "learning_rate": 8.310337165321021e-06, "loss": 0.4487, "step": 18585 }, { "epoch": 0.8529209306594465, "grad_norm": 0.4528515338897705, "learning_rate": 8.310153406668853e-06, "loss": 0.3858, "step": 18586 }, { "epoch": 0.852966821164701, "grad_norm": 0.43130457401275635, "learning_rate": 8.30996964005681e-06, "loss": 0.3484, "step": 18587 }, { "epoch": 0.8530127116699555, "grad_norm": 0.4431041181087494, "learning_rate": 8.309785865485335e-06, "loss": 0.4093, "step": 18588 }, { "epoch": 0.8530586021752099, "grad_norm": 0.4667322635650635, "learning_rate": 8.30960208295487e-06, "loss": 0.4439, "step": 18589 }, { "epoch": 0.8531044926804644, "grad_norm": 0.4563106596469879, "learning_rate": 8.309418292465857e-06, "loss": 0.4261, "step": 18590 }, { "epoch": 0.8531503831857189, "grad_norm": 0.4588871896266937, "learning_rate": 8.309234494018735e-06, "loss": 0.3695, "step": 18591 }, { "epoch": 0.8531962736909733, "grad_norm": 0.4740724563598633, "learning_rate": 8.309050687613952e-06, "loss": 0.4199, "step": 18592 }, { "epoch": 0.8532421641962278, "grad_norm": 0.4747357964515686, "learning_rate": 8.308866873251944e-06, "loss": 0.43, "step": 18593 }, { "epoch": 0.8532880547014823, "grad_norm": 0.43254557251930237, "learning_rate": 8.308683050933156e-06, "loss": 0.3535, "step": 18594 }, { "epoch": 0.8533339452067368, "grad_norm": 0.4472940266132355, "learning_rate": 8.308499220658031e-06, "loss": 0.4224, "step": 18595 }, { "epoch": 0.8533798357119912, "grad_norm": 0.4438001811504364, "learning_rate": 8.308315382427006e-06, "loss": 0.3622, "step": 18596 }, { "epoch": 0.8534257262172457, "grad_norm": 0.47395575046539307, "learning_rate": 8.308131536240529e-06, "loss": 0.4788, "step": 18597 }, { "epoch": 0.8534716167225002, "grad_norm": 0.47048479318618774, "learning_rate": 8.30794768209904e-06, "loss": 0.4443, "step": 18598 }, { "epoch": 0.8535175072277545, "grad_norm": 0.44436895847320557, "learning_rate": 8.307763820002981e-06, "loss": 0.3924, "step": 18599 }, { "epoch": 0.853563397733009, "grad_norm": 0.4608636200428009, "learning_rate": 8.307579949952794e-06, "loss": 0.376, "step": 18600 }, { "epoch": 0.8536092882382635, "grad_norm": 0.5043268203735352, "learning_rate": 8.307396071948922e-06, "loss": 0.5206, "step": 18601 }, { "epoch": 0.8536551787435179, "grad_norm": 0.42557060718536377, "learning_rate": 8.307212185991803e-06, "loss": 0.3709, "step": 18602 }, { "epoch": 0.8537010692487724, "grad_norm": 0.4356067478656769, "learning_rate": 8.307028292081885e-06, "loss": 0.4129, "step": 18603 }, { "epoch": 0.8537469597540269, "grad_norm": 0.45736825466156006, "learning_rate": 8.306844390219607e-06, "loss": 0.4255, "step": 18604 }, { "epoch": 0.8537928502592813, "grad_norm": 0.46578824520111084, "learning_rate": 8.306660480405412e-06, "loss": 0.4371, "step": 18605 }, { "epoch": 0.8538387407645358, "grad_norm": 0.43310442566871643, "learning_rate": 8.306476562639743e-06, "loss": 0.4351, "step": 18606 }, { "epoch": 0.8538846312697903, "grad_norm": 0.46917885541915894, "learning_rate": 8.306292636923038e-06, "loss": 0.3807, "step": 18607 }, { "epoch": 0.8539305217750447, "grad_norm": 0.5084924101829529, "learning_rate": 8.306108703255747e-06, "loss": 0.4844, "step": 18608 }, { "epoch": 0.8539764122802992, "grad_norm": 0.45002466440200806, "learning_rate": 8.305924761638306e-06, "loss": 0.3673, "step": 18609 }, { "epoch": 0.8540223027855537, "grad_norm": 0.4343791604042053, "learning_rate": 8.30574081207116e-06, "loss": 0.3481, "step": 18610 }, { "epoch": 0.8540681932908082, "grad_norm": 0.41949135065078735, "learning_rate": 8.30555685455475e-06, "loss": 0.3172, "step": 18611 }, { "epoch": 0.8541140837960626, "grad_norm": 0.45560339093208313, "learning_rate": 8.30537288908952e-06, "loss": 0.3854, "step": 18612 }, { "epoch": 0.8541599743013171, "grad_norm": 0.44569453597068787, "learning_rate": 8.30518891567591e-06, "loss": 0.3651, "step": 18613 }, { "epoch": 0.8542058648065716, "grad_norm": 0.441384494304657, "learning_rate": 8.305004934314365e-06, "loss": 0.3679, "step": 18614 }, { "epoch": 0.854251755311826, "grad_norm": 0.4357912540435791, "learning_rate": 8.304820945005325e-06, "loss": 0.3469, "step": 18615 }, { "epoch": 0.8542976458170805, "grad_norm": 0.48267582058906555, "learning_rate": 8.304636947749234e-06, "loss": 0.434, "step": 18616 }, { "epoch": 0.854343536322335, "grad_norm": 0.4754229784011841, "learning_rate": 8.304452942546534e-06, "loss": 0.49, "step": 18617 }, { "epoch": 0.8543894268275893, "grad_norm": 0.4574068784713745, "learning_rate": 8.304268929397669e-06, "loss": 0.3908, "step": 18618 }, { "epoch": 0.8544353173328438, "grad_norm": 0.42670682072639465, "learning_rate": 8.304084908303079e-06, "loss": 0.3148, "step": 18619 }, { "epoch": 0.8544812078380983, "grad_norm": 0.44537562131881714, "learning_rate": 8.303900879263208e-06, "loss": 0.3758, "step": 18620 }, { "epoch": 0.8545270983433527, "grad_norm": 0.5011444091796875, "learning_rate": 8.303716842278499e-06, "loss": 0.508, "step": 18621 }, { "epoch": 0.8545729888486072, "grad_norm": 0.4539116621017456, "learning_rate": 8.303532797349391e-06, "loss": 0.3978, "step": 18622 }, { "epoch": 0.8546188793538617, "grad_norm": 0.4529321789741516, "learning_rate": 8.303348744476331e-06, "loss": 0.3833, "step": 18623 }, { "epoch": 0.8546647698591161, "grad_norm": 0.4328615069389343, "learning_rate": 8.30316468365976e-06, "loss": 0.3186, "step": 18624 }, { "epoch": 0.8547106603643706, "grad_norm": 0.4334472715854645, "learning_rate": 8.302980614900121e-06, "loss": 0.3518, "step": 18625 }, { "epoch": 0.8547565508696251, "grad_norm": 0.4645361602306366, "learning_rate": 8.302796538197855e-06, "loss": 0.4387, "step": 18626 }, { "epoch": 0.8548024413748795, "grad_norm": 0.4892256557941437, "learning_rate": 8.302612453553404e-06, "loss": 0.4383, "step": 18627 }, { "epoch": 0.854848331880134, "grad_norm": 0.4317033588886261, "learning_rate": 8.302428360967216e-06, "loss": 0.377, "step": 18628 }, { "epoch": 0.8548942223853885, "grad_norm": 0.4649083912372589, "learning_rate": 8.30224426043973e-06, "loss": 0.4483, "step": 18629 }, { "epoch": 0.854940112890643, "grad_norm": 0.47438356280326843, "learning_rate": 8.302060151971386e-06, "loss": 0.404, "step": 18630 }, { "epoch": 0.8549860033958974, "grad_norm": 0.47471895813941956, "learning_rate": 8.301876035562631e-06, "loss": 0.464, "step": 18631 }, { "epoch": 0.8550318939011519, "grad_norm": 0.4541177749633789, "learning_rate": 8.301691911213905e-06, "loss": 0.3931, "step": 18632 }, { "epoch": 0.8550777844064064, "grad_norm": 0.4797215163707733, "learning_rate": 8.301507778925653e-06, "loss": 0.4198, "step": 18633 }, { "epoch": 0.8551236749116607, "grad_norm": 0.46634167432785034, "learning_rate": 8.301323638698317e-06, "loss": 0.4109, "step": 18634 }, { "epoch": 0.8551695654169152, "grad_norm": 0.5000649094581604, "learning_rate": 8.301139490532339e-06, "loss": 0.465, "step": 18635 }, { "epoch": 0.8552154559221697, "grad_norm": 0.4616800546646118, "learning_rate": 8.300955334428163e-06, "loss": 0.3838, "step": 18636 }, { "epoch": 0.8552613464274241, "grad_norm": 0.5212560892105103, "learning_rate": 8.30077117038623e-06, "loss": 0.4669, "step": 18637 }, { "epoch": 0.8553072369326786, "grad_norm": 0.436909556388855, "learning_rate": 8.300586998406986e-06, "loss": 0.3449, "step": 18638 }, { "epoch": 0.8553531274379331, "grad_norm": 0.4428279995918274, "learning_rate": 8.30040281849087e-06, "loss": 0.3696, "step": 18639 }, { "epoch": 0.8553990179431875, "grad_norm": 0.44313177466392517, "learning_rate": 8.300218630638328e-06, "loss": 0.3716, "step": 18640 }, { "epoch": 0.855444908448442, "grad_norm": 0.4456782341003418, "learning_rate": 8.300034434849801e-06, "loss": 0.4077, "step": 18641 }, { "epoch": 0.8554907989536965, "grad_norm": 0.4661862254142761, "learning_rate": 8.299850231125733e-06, "loss": 0.4072, "step": 18642 }, { "epoch": 0.8555366894589509, "grad_norm": 0.42991846799850464, "learning_rate": 8.299666019466569e-06, "loss": 0.324, "step": 18643 }, { "epoch": 0.8555825799642054, "grad_norm": 0.44299641251564026, "learning_rate": 8.299481799872747e-06, "loss": 0.4042, "step": 18644 }, { "epoch": 0.8556284704694599, "grad_norm": 0.5018248558044434, "learning_rate": 8.299297572344711e-06, "loss": 0.51, "step": 18645 }, { "epoch": 0.8556743609747143, "grad_norm": 0.4239949882030487, "learning_rate": 8.29911333688291e-06, "loss": 0.3129, "step": 18646 }, { "epoch": 0.8557202514799688, "grad_norm": 0.4197768568992615, "learning_rate": 8.298929093487779e-06, "loss": 0.3262, "step": 18647 }, { "epoch": 0.8557661419852233, "grad_norm": 0.4195209741592407, "learning_rate": 8.298744842159766e-06, "loss": 0.3253, "step": 18648 }, { "epoch": 0.8558120324904778, "grad_norm": 0.4123789072036743, "learning_rate": 8.298560582899314e-06, "loss": 0.32, "step": 18649 }, { "epoch": 0.8558579229957322, "grad_norm": 0.4651382267475128, "learning_rate": 8.298376315706863e-06, "loss": 0.3669, "step": 18650 }, { "epoch": 0.8559038135009867, "grad_norm": 0.4356662631034851, "learning_rate": 8.298192040582859e-06, "loss": 0.369, "step": 18651 }, { "epoch": 0.8559497040062412, "grad_norm": 0.45351937413215637, "learning_rate": 8.298007757527745e-06, "loss": 0.4056, "step": 18652 }, { "epoch": 0.8559955945114955, "grad_norm": 0.44646474719047546, "learning_rate": 8.297823466541961e-06, "loss": 0.3953, "step": 18653 }, { "epoch": 0.85604148501675, "grad_norm": 0.45580729842185974, "learning_rate": 8.297639167625953e-06, "loss": 0.3711, "step": 18654 }, { "epoch": 0.8560873755220045, "grad_norm": 0.4668574333190918, "learning_rate": 8.297454860780164e-06, "loss": 0.3977, "step": 18655 }, { "epoch": 0.8561332660272589, "grad_norm": 0.46768882870674133, "learning_rate": 8.297270546005036e-06, "loss": 0.4483, "step": 18656 }, { "epoch": 0.8561791565325134, "grad_norm": 0.42442333698272705, "learning_rate": 8.297086223301015e-06, "loss": 0.3607, "step": 18657 }, { "epoch": 0.8562250470377679, "grad_norm": 0.449093222618103, "learning_rate": 8.296901892668541e-06, "loss": 0.4262, "step": 18658 }, { "epoch": 0.8562709375430223, "grad_norm": 0.46689721941947937, "learning_rate": 8.296717554108057e-06, "loss": 0.4007, "step": 18659 }, { "epoch": 0.8563168280482768, "grad_norm": 0.43982720375061035, "learning_rate": 8.29653320762001e-06, "loss": 0.4177, "step": 18660 }, { "epoch": 0.8563627185535313, "grad_norm": 0.47519272565841675, "learning_rate": 8.29634885320484e-06, "loss": 0.4488, "step": 18661 }, { "epoch": 0.8564086090587857, "grad_norm": 0.5117306113243103, "learning_rate": 8.296164490862991e-06, "loss": 0.4836, "step": 18662 }, { "epoch": 0.8564544995640402, "grad_norm": 0.44239315390586853, "learning_rate": 8.295980120594908e-06, "loss": 0.3719, "step": 18663 }, { "epoch": 0.8565003900692947, "grad_norm": 0.4526567757129669, "learning_rate": 8.295795742401031e-06, "loss": 0.403, "step": 18664 }, { "epoch": 0.8565462805745492, "grad_norm": 0.41223886609077454, "learning_rate": 8.295611356281808e-06, "loss": 0.3551, "step": 18665 }, { "epoch": 0.8565921710798036, "grad_norm": 0.4405532777309418, "learning_rate": 8.295426962237678e-06, "loss": 0.3365, "step": 18666 }, { "epoch": 0.8566380615850581, "grad_norm": 0.4507589042186737, "learning_rate": 8.295242560269087e-06, "loss": 0.3982, "step": 18667 }, { "epoch": 0.8566839520903126, "grad_norm": 0.41458186507225037, "learning_rate": 8.295058150376478e-06, "loss": 0.3309, "step": 18668 }, { "epoch": 0.856729842595567, "grad_norm": 0.5396983623504639, "learning_rate": 8.294873732560294e-06, "loss": 0.551, "step": 18669 }, { "epoch": 0.8567757331008214, "grad_norm": 0.45366430282592773, "learning_rate": 8.294689306820979e-06, "loss": 0.3921, "step": 18670 }, { "epoch": 0.8568216236060759, "grad_norm": 0.43828102946281433, "learning_rate": 8.294504873158973e-06, "loss": 0.3962, "step": 18671 }, { "epoch": 0.8568675141113303, "grad_norm": 0.47282692790031433, "learning_rate": 8.294320431574724e-06, "loss": 0.4356, "step": 18672 }, { "epoch": 0.8569134046165848, "grad_norm": 0.44989287853240967, "learning_rate": 8.294135982068676e-06, "loss": 0.4196, "step": 18673 }, { "epoch": 0.8569592951218393, "grad_norm": 0.48343321681022644, "learning_rate": 8.293951524641269e-06, "loss": 0.4344, "step": 18674 }, { "epoch": 0.8570051856270937, "grad_norm": 0.4631628096103668, "learning_rate": 8.29376705929295e-06, "loss": 0.3957, "step": 18675 }, { "epoch": 0.8570510761323482, "grad_norm": 0.4562375247478485, "learning_rate": 8.293582586024158e-06, "loss": 0.3857, "step": 18676 }, { "epoch": 0.8570969666376027, "grad_norm": 0.45833832025527954, "learning_rate": 8.293398104835342e-06, "loss": 0.3971, "step": 18677 }, { "epoch": 0.8571428571428571, "grad_norm": 0.44750750064849854, "learning_rate": 8.293213615726941e-06, "loss": 0.374, "step": 18678 }, { "epoch": 0.8571887476481116, "grad_norm": 0.4641525149345398, "learning_rate": 8.2930291186994e-06, "loss": 0.4355, "step": 18679 }, { "epoch": 0.8572346381533661, "grad_norm": 0.4336824119091034, "learning_rate": 8.292844613753165e-06, "loss": 0.3266, "step": 18680 }, { "epoch": 0.8572805286586205, "grad_norm": 0.49045807123184204, "learning_rate": 8.292660100888678e-06, "loss": 0.3933, "step": 18681 }, { "epoch": 0.857326419163875, "grad_norm": 0.44010084867477417, "learning_rate": 8.292475580106383e-06, "loss": 0.3997, "step": 18682 }, { "epoch": 0.8573723096691295, "grad_norm": 0.44646966457366943, "learning_rate": 8.292291051406722e-06, "loss": 0.3818, "step": 18683 }, { "epoch": 0.857418200174384, "grad_norm": 0.4187343716621399, "learning_rate": 8.292106514790141e-06, "loss": 0.3297, "step": 18684 }, { "epoch": 0.8574640906796384, "grad_norm": 0.4503140449523926, "learning_rate": 8.291921970257081e-06, "loss": 0.4082, "step": 18685 }, { "epoch": 0.8575099811848929, "grad_norm": 0.43613624572753906, "learning_rate": 8.29173741780799e-06, "loss": 0.3359, "step": 18686 }, { "epoch": 0.8575558716901474, "grad_norm": 0.47834959626197815, "learning_rate": 8.291552857443308e-06, "loss": 0.394, "step": 18687 }, { "epoch": 0.8576017621954017, "grad_norm": 0.49096235632896423, "learning_rate": 8.291368289163481e-06, "loss": 0.4281, "step": 18688 }, { "epoch": 0.8576476527006562, "grad_norm": 0.43833431601524353, "learning_rate": 8.291183712968953e-06, "loss": 0.3746, "step": 18689 }, { "epoch": 0.8576935432059107, "grad_norm": 0.44437000155448914, "learning_rate": 8.290999128860165e-06, "loss": 0.3657, "step": 18690 }, { "epoch": 0.8577394337111651, "grad_norm": 0.46153831481933594, "learning_rate": 8.290814536837564e-06, "loss": 0.4234, "step": 18691 }, { "epoch": 0.8577853242164196, "grad_norm": 0.4904228746891022, "learning_rate": 8.290629936901592e-06, "loss": 0.4044, "step": 18692 }, { "epoch": 0.8578312147216741, "grad_norm": 0.44085782766342163, "learning_rate": 8.290445329052694e-06, "loss": 0.3581, "step": 18693 }, { "epoch": 0.8578771052269285, "grad_norm": 0.5074067115783691, "learning_rate": 8.290260713291313e-06, "loss": 0.4661, "step": 18694 }, { "epoch": 0.857922995732183, "grad_norm": 0.45443975925445557, "learning_rate": 8.290076089617893e-06, "loss": 0.3883, "step": 18695 }, { "epoch": 0.8579688862374375, "grad_norm": 0.44784635305404663, "learning_rate": 8.289891458032879e-06, "loss": 0.3704, "step": 18696 }, { "epoch": 0.8580147767426919, "grad_norm": 0.45705053210258484, "learning_rate": 8.289706818536715e-06, "loss": 0.4182, "step": 18697 }, { "epoch": 0.8580606672479464, "grad_norm": 0.4935908615589142, "learning_rate": 8.289522171129843e-06, "loss": 0.45, "step": 18698 }, { "epoch": 0.8581065577532009, "grad_norm": 0.4599513113498688, "learning_rate": 8.28933751581271e-06, "loss": 0.4033, "step": 18699 }, { "epoch": 0.8581524482584554, "grad_norm": 0.47726890444755554, "learning_rate": 8.289152852585759e-06, "loss": 0.4131, "step": 18700 }, { "epoch": 0.8581983387637098, "grad_norm": 0.4352322518825531, "learning_rate": 8.28896818144943e-06, "loss": 0.3757, "step": 18701 }, { "epoch": 0.8582442292689643, "grad_norm": 0.42752277851104736, "learning_rate": 8.288783502404173e-06, "loss": 0.3671, "step": 18702 }, { "epoch": 0.8582901197742188, "grad_norm": 0.5073032379150391, "learning_rate": 8.28859881545043e-06, "loss": 0.506, "step": 18703 }, { "epoch": 0.8583360102794731, "grad_norm": 0.46708911657333374, "learning_rate": 8.288414120588644e-06, "loss": 0.4275, "step": 18704 }, { "epoch": 0.8583819007847276, "grad_norm": 0.45909813046455383, "learning_rate": 8.28822941781926e-06, "loss": 0.3875, "step": 18705 }, { "epoch": 0.8584277912899821, "grad_norm": 0.41881436109542847, "learning_rate": 8.288044707142721e-06, "loss": 0.32, "step": 18706 }, { "epoch": 0.8584736817952365, "grad_norm": 0.4687194526195526, "learning_rate": 8.287859988559473e-06, "loss": 0.3865, "step": 18707 }, { "epoch": 0.858519572300491, "grad_norm": 0.4324939548969269, "learning_rate": 8.28767526206996e-06, "loss": 0.4157, "step": 18708 }, { "epoch": 0.8585654628057455, "grad_norm": 0.4381049573421478, "learning_rate": 8.287490527674624e-06, "loss": 0.3792, "step": 18709 }, { "epoch": 0.8586113533109999, "grad_norm": 0.47735533118247986, "learning_rate": 8.287305785373912e-06, "loss": 0.4707, "step": 18710 }, { "epoch": 0.8586572438162544, "grad_norm": 0.4391564726829529, "learning_rate": 8.287121035168269e-06, "loss": 0.4021, "step": 18711 }, { "epoch": 0.8587031343215089, "grad_norm": 0.4485732913017273, "learning_rate": 8.286936277058135e-06, "loss": 0.4079, "step": 18712 }, { "epoch": 0.8587490248267633, "grad_norm": 0.5098724365234375, "learning_rate": 8.286751511043955e-06, "loss": 0.4337, "step": 18713 }, { "epoch": 0.8587949153320178, "grad_norm": 0.422309935092926, "learning_rate": 8.286566737126179e-06, "loss": 0.351, "step": 18714 }, { "epoch": 0.8588408058372723, "grad_norm": 0.4879932105541229, "learning_rate": 8.286381955305244e-06, "loss": 0.4442, "step": 18715 }, { "epoch": 0.8588866963425267, "grad_norm": 0.5038551688194275, "learning_rate": 8.2861971655816e-06, "loss": 0.4103, "step": 18716 }, { "epoch": 0.8589325868477812, "grad_norm": 0.47220665216445923, "learning_rate": 8.286012367955687e-06, "loss": 0.4086, "step": 18717 }, { "epoch": 0.8589784773530357, "grad_norm": 0.5163639187812805, "learning_rate": 8.285827562427953e-06, "loss": 0.5222, "step": 18718 }, { "epoch": 0.8590243678582902, "grad_norm": 0.4519052505493164, "learning_rate": 8.285642748998839e-06, "loss": 0.4086, "step": 18719 }, { "epoch": 0.8590702583635446, "grad_norm": 0.47476083040237427, "learning_rate": 8.285457927668791e-06, "loss": 0.4979, "step": 18720 }, { "epoch": 0.8591161488687991, "grad_norm": 0.43274304270744324, "learning_rate": 8.285273098438255e-06, "loss": 0.3378, "step": 18721 }, { "epoch": 0.8591620393740536, "grad_norm": 0.4863831400871277, "learning_rate": 8.285088261307674e-06, "loss": 0.4995, "step": 18722 }, { "epoch": 0.8592079298793079, "grad_norm": 0.3944115936756134, "learning_rate": 8.284903416277494e-06, "loss": 0.293, "step": 18723 }, { "epoch": 0.8592538203845624, "grad_norm": 0.47482049465179443, "learning_rate": 8.284718563348155e-06, "loss": 0.3915, "step": 18724 }, { "epoch": 0.8592997108898169, "grad_norm": 0.4530799984931946, "learning_rate": 8.284533702520106e-06, "loss": 0.4082, "step": 18725 }, { "epoch": 0.8593456013950713, "grad_norm": 0.4449470639228821, "learning_rate": 8.284348833793791e-06, "loss": 0.3999, "step": 18726 }, { "epoch": 0.8593914919003258, "grad_norm": 0.45700088143348694, "learning_rate": 8.284163957169651e-06, "loss": 0.3598, "step": 18727 }, { "epoch": 0.8594373824055803, "grad_norm": 0.43505656719207764, "learning_rate": 8.283979072648135e-06, "loss": 0.3794, "step": 18728 }, { "epoch": 0.8594832729108347, "grad_norm": 0.44833219051361084, "learning_rate": 8.283794180229686e-06, "loss": 0.374, "step": 18729 }, { "epoch": 0.8595291634160892, "grad_norm": 0.4922808110713959, "learning_rate": 8.283609279914749e-06, "loss": 0.2908, "step": 18730 }, { "epoch": 0.8595750539213437, "grad_norm": 0.4684850871562958, "learning_rate": 8.283424371703765e-06, "loss": 0.4366, "step": 18731 }, { "epoch": 0.8596209444265981, "grad_norm": 0.46290862560272217, "learning_rate": 8.283239455597185e-06, "loss": 0.4576, "step": 18732 }, { "epoch": 0.8596668349318526, "grad_norm": 0.4056648910045624, "learning_rate": 8.28305453159545e-06, "loss": 0.3366, "step": 18733 }, { "epoch": 0.8597127254371071, "grad_norm": 0.43588516116142273, "learning_rate": 8.282869599699003e-06, "loss": 0.3381, "step": 18734 }, { "epoch": 0.8597586159423615, "grad_norm": 0.4483650028705597, "learning_rate": 8.282684659908291e-06, "loss": 0.4338, "step": 18735 }, { "epoch": 0.859804506447616, "grad_norm": 0.47305282950401306, "learning_rate": 8.282499712223759e-06, "loss": 0.3745, "step": 18736 }, { "epoch": 0.8598503969528705, "grad_norm": 0.451948881149292, "learning_rate": 8.282314756645853e-06, "loss": 0.4039, "step": 18737 }, { "epoch": 0.859896287458125, "grad_norm": 0.44452202320098877, "learning_rate": 8.282129793175014e-06, "loss": 0.331, "step": 18738 }, { "epoch": 0.8599421779633794, "grad_norm": 0.44473472237586975, "learning_rate": 8.281944821811689e-06, "loss": 0.3594, "step": 18739 }, { "epoch": 0.8599880684686338, "grad_norm": 0.4818864166736603, "learning_rate": 8.281759842556321e-06, "loss": 0.4491, "step": 18740 }, { "epoch": 0.8600339589738883, "grad_norm": 0.4821096956729889, "learning_rate": 8.281574855409357e-06, "loss": 0.4181, "step": 18741 }, { "epoch": 0.8600798494791427, "grad_norm": 0.43771257996559143, "learning_rate": 8.281389860371243e-06, "loss": 0.3676, "step": 18742 }, { "epoch": 0.8601257399843972, "grad_norm": 0.4804643988609314, "learning_rate": 8.281204857442422e-06, "loss": 0.4131, "step": 18743 }, { "epoch": 0.8601716304896517, "grad_norm": 0.48040661215782166, "learning_rate": 8.281019846623337e-06, "loss": 0.5054, "step": 18744 }, { "epoch": 0.8602175209949061, "grad_norm": 0.44240328669548035, "learning_rate": 8.280834827914436e-06, "loss": 0.3443, "step": 18745 }, { "epoch": 0.8602634115001606, "grad_norm": 0.4366568624973297, "learning_rate": 8.280649801316162e-06, "loss": 0.3426, "step": 18746 }, { "epoch": 0.8603093020054151, "grad_norm": 0.41266071796417236, "learning_rate": 8.280464766828962e-06, "loss": 0.2916, "step": 18747 }, { "epoch": 0.8603551925106695, "grad_norm": 0.4484911859035492, "learning_rate": 8.280279724453279e-06, "loss": 0.4316, "step": 18748 }, { "epoch": 0.860401083015924, "grad_norm": 0.445197731256485, "learning_rate": 8.280094674189556e-06, "loss": 0.3644, "step": 18749 }, { "epoch": 0.8604469735211785, "grad_norm": 0.5216633677482605, "learning_rate": 8.279909616038245e-06, "loss": 0.4693, "step": 18750 }, { "epoch": 0.8604928640264329, "grad_norm": 0.485584557056427, "learning_rate": 8.279724549999784e-06, "loss": 0.4725, "step": 18751 }, { "epoch": 0.8605387545316874, "grad_norm": 0.4341680407524109, "learning_rate": 8.27953947607462e-06, "loss": 0.3859, "step": 18752 }, { "epoch": 0.8605846450369419, "grad_norm": 0.4843033254146576, "learning_rate": 8.2793543942632e-06, "loss": 0.4162, "step": 18753 }, { "epoch": 0.8606305355421964, "grad_norm": 0.46687307953834534, "learning_rate": 8.279169304565965e-06, "loss": 0.4097, "step": 18754 }, { "epoch": 0.8606764260474508, "grad_norm": 0.47676655650138855, "learning_rate": 8.278984206983366e-06, "loss": 0.4575, "step": 18755 }, { "epoch": 0.8607223165527053, "grad_norm": 0.44742581248283386, "learning_rate": 8.278799101515845e-06, "loss": 0.3877, "step": 18756 }, { "epoch": 0.8607682070579598, "grad_norm": 0.46821296215057373, "learning_rate": 8.278613988163845e-06, "loss": 0.4131, "step": 18757 }, { "epoch": 0.8608140975632141, "grad_norm": 0.4712432026863098, "learning_rate": 8.278428866927813e-06, "loss": 0.3864, "step": 18758 }, { "epoch": 0.8608599880684686, "grad_norm": 0.4531097114086151, "learning_rate": 8.278243737808195e-06, "loss": 0.3753, "step": 18759 }, { "epoch": 0.8609058785737231, "grad_norm": 0.46649661660194397, "learning_rate": 8.278058600805436e-06, "loss": 0.3167, "step": 18760 }, { "epoch": 0.8609517690789775, "grad_norm": 0.4456283152103424, "learning_rate": 8.27787345591998e-06, "loss": 0.3792, "step": 18761 }, { "epoch": 0.860997659584232, "grad_norm": 0.44161224365234375, "learning_rate": 8.277688303152274e-06, "loss": 0.4132, "step": 18762 }, { "epoch": 0.8610435500894865, "grad_norm": 0.42752066254615784, "learning_rate": 8.277503142502761e-06, "loss": 0.3963, "step": 18763 }, { "epoch": 0.8610894405947409, "grad_norm": 0.428093284368515, "learning_rate": 8.277317973971888e-06, "loss": 0.3516, "step": 18764 }, { "epoch": 0.8611353310999954, "grad_norm": 0.4360617399215698, "learning_rate": 8.277132797560099e-06, "loss": 0.4017, "step": 18765 }, { "epoch": 0.8611812216052499, "grad_norm": 0.4670145809650421, "learning_rate": 8.276947613267838e-06, "loss": 0.4073, "step": 18766 }, { "epoch": 0.8612271121105043, "grad_norm": 0.4451378285884857, "learning_rate": 8.276762421095556e-06, "loss": 0.4104, "step": 18767 }, { "epoch": 0.8612730026157588, "grad_norm": 0.4484672546386719, "learning_rate": 8.276577221043692e-06, "loss": 0.3867, "step": 18768 }, { "epoch": 0.8613188931210133, "grad_norm": 0.47523221373558044, "learning_rate": 8.276392013112696e-06, "loss": 0.4791, "step": 18769 }, { "epoch": 0.8613647836262677, "grad_norm": 0.4498392641544342, "learning_rate": 8.27620679730301e-06, "loss": 0.3865, "step": 18770 }, { "epoch": 0.8614106741315222, "grad_norm": 0.4520481526851654, "learning_rate": 8.27602157361508e-06, "loss": 0.4076, "step": 18771 }, { "epoch": 0.8614565646367767, "grad_norm": 0.49443531036376953, "learning_rate": 8.275836342049353e-06, "loss": 0.3854, "step": 18772 }, { "epoch": 0.8615024551420312, "grad_norm": 0.4288277328014374, "learning_rate": 8.275651102606273e-06, "loss": 0.3735, "step": 18773 }, { "epoch": 0.8615483456472856, "grad_norm": 0.41894081234931946, "learning_rate": 8.275465855286285e-06, "loss": 0.3472, "step": 18774 }, { "epoch": 0.86159423615254, "grad_norm": 0.42563942074775696, "learning_rate": 8.275280600089836e-06, "loss": 0.3352, "step": 18775 }, { "epoch": 0.8616401266577945, "grad_norm": 0.44500932097435, "learning_rate": 8.275095337017374e-06, "loss": 0.3777, "step": 18776 }, { "epoch": 0.8616860171630489, "grad_norm": 0.43110716342926025, "learning_rate": 8.274910066069337e-06, "loss": 0.398, "step": 18777 }, { "epoch": 0.8617319076683034, "grad_norm": 0.5619285106658936, "learning_rate": 8.274724787246177e-06, "loss": 0.369, "step": 18778 }, { "epoch": 0.8617777981735579, "grad_norm": 0.4262608587741852, "learning_rate": 8.274539500548337e-06, "loss": 0.3207, "step": 18779 }, { "epoch": 0.8618236886788123, "grad_norm": 0.5058470964431763, "learning_rate": 8.274354205976262e-06, "loss": 0.5169, "step": 18780 }, { "epoch": 0.8618695791840668, "grad_norm": 0.5004580616950989, "learning_rate": 8.274168903530403e-06, "loss": 0.465, "step": 18781 }, { "epoch": 0.8619154696893213, "grad_norm": 0.4529927968978882, "learning_rate": 8.273983593211194e-06, "loss": 0.3909, "step": 18782 }, { "epoch": 0.8619613601945757, "grad_norm": 0.45167145133018494, "learning_rate": 8.273798275019092e-06, "loss": 0.3974, "step": 18783 }, { "epoch": 0.8620072506998302, "grad_norm": 0.44238150119781494, "learning_rate": 8.273612948954539e-06, "loss": 0.3991, "step": 18784 }, { "epoch": 0.8620531412050847, "grad_norm": 0.4169408977031708, "learning_rate": 8.273427615017978e-06, "loss": 0.318, "step": 18785 }, { "epoch": 0.8620990317103391, "grad_norm": 0.42822885513305664, "learning_rate": 8.273242273209858e-06, "loss": 0.3789, "step": 18786 }, { "epoch": 0.8621449222155936, "grad_norm": 0.42024970054626465, "learning_rate": 8.273056923530624e-06, "loss": 0.3529, "step": 18787 }, { "epoch": 0.8621908127208481, "grad_norm": 0.4831533432006836, "learning_rate": 8.27287156598072e-06, "loss": 0.3879, "step": 18788 }, { "epoch": 0.8622367032261026, "grad_norm": 0.44380542635917664, "learning_rate": 8.272686200560591e-06, "loss": 0.3583, "step": 18789 }, { "epoch": 0.862282593731357, "grad_norm": 0.4034491181373596, "learning_rate": 8.272500827270687e-06, "loss": 0.3084, "step": 18790 }, { "epoch": 0.8623284842366115, "grad_norm": 0.4250091314315796, "learning_rate": 8.272315446111452e-06, "loss": 0.3122, "step": 18791 }, { "epoch": 0.862374374741866, "grad_norm": 0.4368078410625458, "learning_rate": 8.27213005708333e-06, "loss": 0.3774, "step": 18792 }, { "epoch": 0.8624202652471203, "grad_norm": 0.489898145198822, "learning_rate": 8.271944660186768e-06, "loss": 0.5368, "step": 18793 }, { "epoch": 0.8624661557523748, "grad_norm": 0.50797438621521, "learning_rate": 8.271759255422211e-06, "loss": 0.5204, "step": 18794 }, { "epoch": 0.8625120462576293, "grad_norm": 0.4738045632839203, "learning_rate": 8.271573842790107e-06, "loss": 0.3832, "step": 18795 }, { "epoch": 0.8625579367628837, "grad_norm": 0.4486936330795288, "learning_rate": 8.271388422290897e-06, "loss": 0.3959, "step": 18796 }, { "epoch": 0.8626038272681382, "grad_norm": 0.4754834771156311, "learning_rate": 8.271202993925035e-06, "loss": 0.3931, "step": 18797 }, { "epoch": 0.8626497177733927, "grad_norm": 0.4636716842651367, "learning_rate": 8.27101755769296e-06, "loss": 0.4171, "step": 18798 }, { "epoch": 0.8626956082786471, "grad_norm": 0.42925846576690674, "learning_rate": 8.270832113595119e-06, "loss": 0.3415, "step": 18799 }, { "epoch": 0.8627414987839016, "grad_norm": 0.47872334718704224, "learning_rate": 8.270646661631961e-06, "loss": 0.4016, "step": 18800 }, { "epoch": 0.8627873892891561, "grad_norm": 0.5090519785881042, "learning_rate": 8.27046120180393e-06, "loss": 0.4043, "step": 18801 }, { "epoch": 0.8628332797944105, "grad_norm": 0.45238083600997925, "learning_rate": 8.270275734111473e-06, "loss": 0.3654, "step": 18802 }, { "epoch": 0.862879170299665, "grad_norm": 0.43062824010849, "learning_rate": 8.270090258555032e-06, "loss": 0.3457, "step": 18803 }, { "epoch": 0.8629250608049195, "grad_norm": 0.4600858688354492, "learning_rate": 8.269904775135059e-06, "loss": 0.4174, "step": 18804 }, { "epoch": 0.8629709513101739, "grad_norm": 0.42829200625419617, "learning_rate": 8.269719283851993e-06, "loss": 0.383, "step": 18805 }, { "epoch": 0.8630168418154284, "grad_norm": 0.4466390907764435, "learning_rate": 8.269533784706287e-06, "loss": 0.3521, "step": 18806 }, { "epoch": 0.8630627323206829, "grad_norm": 0.4547543227672577, "learning_rate": 8.269348277698383e-06, "loss": 0.3734, "step": 18807 }, { "epoch": 0.8631086228259374, "grad_norm": 0.4274538457393646, "learning_rate": 8.269162762828731e-06, "loss": 0.3557, "step": 18808 }, { "epoch": 0.8631545133311918, "grad_norm": 0.46227550506591797, "learning_rate": 8.268977240097771e-06, "loss": 0.385, "step": 18809 }, { "epoch": 0.8632004038364463, "grad_norm": 0.4527345895767212, "learning_rate": 8.268791709505954e-06, "loss": 0.3543, "step": 18810 }, { "epoch": 0.8632462943417007, "grad_norm": 0.4236842095851898, "learning_rate": 8.268606171053723e-06, "loss": 0.3491, "step": 18811 }, { "epoch": 0.8632921848469551, "grad_norm": 0.4758046567440033, "learning_rate": 8.268420624741529e-06, "loss": 0.432, "step": 18812 }, { "epoch": 0.8633380753522096, "grad_norm": 0.4896450340747833, "learning_rate": 8.268235070569812e-06, "loss": 0.4555, "step": 18813 }, { "epoch": 0.8633839658574641, "grad_norm": 0.5041840672492981, "learning_rate": 8.268049508539022e-06, "loss": 0.5418, "step": 18814 }, { "epoch": 0.8634298563627185, "grad_norm": 0.46307623386383057, "learning_rate": 8.267863938649604e-06, "loss": 0.4701, "step": 18815 }, { "epoch": 0.863475746867973, "grad_norm": 0.46478649973869324, "learning_rate": 8.267678360902006e-06, "loss": 0.4203, "step": 18816 }, { "epoch": 0.8635216373732275, "grad_norm": 0.4625875651836395, "learning_rate": 8.267492775296671e-06, "loss": 0.36, "step": 18817 }, { "epoch": 0.8635675278784819, "grad_norm": 0.4322952330112457, "learning_rate": 8.26730718183405e-06, "loss": 0.3775, "step": 18818 }, { "epoch": 0.8636134183837364, "grad_norm": 0.46911996603012085, "learning_rate": 8.267121580514583e-06, "loss": 0.3921, "step": 18819 }, { "epoch": 0.8636593088889909, "grad_norm": 0.43232011795043945, "learning_rate": 8.266935971338721e-06, "loss": 0.3507, "step": 18820 }, { "epoch": 0.8637051993942453, "grad_norm": 0.4690300524234772, "learning_rate": 8.266750354306909e-06, "loss": 0.4397, "step": 18821 }, { "epoch": 0.8637510898994998, "grad_norm": 0.47257089614868164, "learning_rate": 8.266564729419595e-06, "loss": 0.4168, "step": 18822 }, { "epoch": 0.8637969804047543, "grad_norm": 0.4271184802055359, "learning_rate": 8.266379096677222e-06, "loss": 0.3878, "step": 18823 }, { "epoch": 0.8638428709100087, "grad_norm": 0.498144268989563, "learning_rate": 8.26619345608024e-06, "loss": 0.5204, "step": 18824 }, { "epoch": 0.8638887614152632, "grad_norm": 0.4803226888179779, "learning_rate": 8.26600780762909e-06, "loss": 0.396, "step": 18825 }, { "epoch": 0.8639346519205177, "grad_norm": 0.47004008293151855, "learning_rate": 8.265822151324225e-06, "loss": 0.445, "step": 18826 }, { "epoch": 0.8639805424257722, "grad_norm": 0.5159043073654175, "learning_rate": 8.26563648716609e-06, "loss": 0.4985, "step": 18827 }, { "epoch": 0.8640264329310265, "grad_norm": 0.4207015037536621, "learning_rate": 8.265450815155125e-06, "loss": 0.3383, "step": 18828 }, { "epoch": 0.864072323436281, "grad_norm": 0.4481675624847412, "learning_rate": 8.265265135291785e-06, "loss": 0.4124, "step": 18829 }, { "epoch": 0.8641182139415355, "grad_norm": 0.4736972153186798, "learning_rate": 8.265079447576511e-06, "loss": 0.503, "step": 18830 }, { "epoch": 0.8641641044467899, "grad_norm": 0.4788176715373993, "learning_rate": 8.264893752009752e-06, "loss": 0.4876, "step": 18831 }, { "epoch": 0.8642099949520444, "grad_norm": 0.45518481731414795, "learning_rate": 8.264708048591955e-06, "loss": 0.3471, "step": 18832 }, { "epoch": 0.8642558854572989, "grad_norm": 0.46216586232185364, "learning_rate": 8.264522337323565e-06, "loss": 0.3826, "step": 18833 }, { "epoch": 0.8643017759625533, "grad_norm": 0.45073795318603516, "learning_rate": 8.26433661820503e-06, "loss": 0.407, "step": 18834 }, { "epoch": 0.8643476664678078, "grad_norm": 0.4431460499763489, "learning_rate": 8.264150891236794e-06, "loss": 0.3706, "step": 18835 }, { "epoch": 0.8643935569730623, "grad_norm": 0.45016369223594666, "learning_rate": 8.263965156419304e-06, "loss": 0.3712, "step": 18836 }, { "epoch": 0.8644394474783167, "grad_norm": 0.4362359344959259, "learning_rate": 8.26377941375301e-06, "loss": 0.3516, "step": 18837 }, { "epoch": 0.8644853379835712, "grad_norm": 0.47872158885002136, "learning_rate": 8.263593663238359e-06, "loss": 0.4214, "step": 18838 }, { "epoch": 0.8645312284888257, "grad_norm": 0.4413442611694336, "learning_rate": 8.26340790487579e-06, "loss": 0.3548, "step": 18839 }, { "epoch": 0.8645771189940801, "grad_norm": 0.4541882574558258, "learning_rate": 8.263222138665759e-06, "loss": 0.3644, "step": 18840 }, { "epoch": 0.8646230094993346, "grad_norm": 0.4624963104724884, "learning_rate": 8.263036364608707e-06, "loss": 0.4304, "step": 18841 }, { "epoch": 0.8646689000045891, "grad_norm": 0.4434128403663635, "learning_rate": 8.262850582705083e-06, "loss": 0.4376, "step": 18842 }, { "epoch": 0.8647147905098436, "grad_norm": 0.48018375039100647, "learning_rate": 8.262664792955333e-06, "loss": 0.4827, "step": 18843 }, { "epoch": 0.864760681015098, "grad_norm": 0.44617676734924316, "learning_rate": 8.262478995359905e-06, "loss": 0.3349, "step": 18844 }, { "epoch": 0.8648065715203525, "grad_norm": 0.4706483781337738, "learning_rate": 8.262293189919242e-06, "loss": 0.441, "step": 18845 }, { "epoch": 0.864852462025607, "grad_norm": 0.44068026542663574, "learning_rate": 8.262107376633794e-06, "loss": 0.332, "step": 18846 }, { "epoch": 0.8648983525308613, "grad_norm": 0.46326667070388794, "learning_rate": 8.26192155550401e-06, "loss": 0.3601, "step": 18847 }, { "epoch": 0.8649442430361158, "grad_norm": 0.48375481367111206, "learning_rate": 8.261735726530332e-06, "loss": 0.4311, "step": 18848 }, { "epoch": 0.8649901335413703, "grad_norm": 0.4684853255748749, "learning_rate": 8.261549889713209e-06, "loss": 0.397, "step": 18849 }, { "epoch": 0.8650360240466247, "grad_norm": 0.42297425866127014, "learning_rate": 8.261364045053088e-06, "loss": 0.3174, "step": 18850 }, { "epoch": 0.8650819145518792, "grad_norm": 0.4757521450519562, "learning_rate": 8.261178192550416e-06, "loss": 0.4633, "step": 18851 }, { "epoch": 0.8651278050571337, "grad_norm": 0.46760454773902893, "learning_rate": 8.26099233220564e-06, "loss": 0.4217, "step": 18852 }, { "epoch": 0.8651736955623881, "grad_norm": 0.45929697155952454, "learning_rate": 8.260806464019206e-06, "loss": 0.3428, "step": 18853 }, { "epoch": 0.8652195860676426, "grad_norm": 0.430281400680542, "learning_rate": 8.260620587991561e-06, "loss": 0.315, "step": 18854 }, { "epoch": 0.8652654765728971, "grad_norm": 0.4463421702384949, "learning_rate": 8.260434704123155e-06, "loss": 0.4245, "step": 18855 }, { "epoch": 0.8653113670781515, "grad_norm": 0.47195881605148315, "learning_rate": 8.26024881241443e-06, "loss": 0.3876, "step": 18856 }, { "epoch": 0.865357257583406, "grad_norm": 0.4438212215900421, "learning_rate": 8.260062912865835e-06, "loss": 0.3988, "step": 18857 }, { "epoch": 0.8654031480886605, "grad_norm": 0.4748304784297943, "learning_rate": 8.25987700547782e-06, "loss": 0.3946, "step": 18858 }, { "epoch": 0.8654490385939149, "grad_norm": 0.43733811378479004, "learning_rate": 8.25969109025083e-06, "loss": 0.3932, "step": 18859 }, { "epoch": 0.8654949290991694, "grad_norm": 0.45298895239830017, "learning_rate": 8.25950516718531e-06, "loss": 0.4099, "step": 18860 }, { "epoch": 0.8655408196044239, "grad_norm": 0.4728960692882538, "learning_rate": 8.259319236281709e-06, "loss": 0.4913, "step": 18861 }, { "epoch": 0.8655867101096784, "grad_norm": 0.4645313620567322, "learning_rate": 8.259133297540474e-06, "loss": 0.378, "step": 18862 }, { "epoch": 0.8656326006149327, "grad_norm": 0.4668658375740051, "learning_rate": 8.258947350962054e-06, "loss": 0.3918, "step": 18863 }, { "epoch": 0.8656784911201872, "grad_norm": 0.4860525131225586, "learning_rate": 8.258761396546892e-06, "loss": 0.4637, "step": 18864 }, { "epoch": 0.8657243816254417, "grad_norm": 0.4890578091144562, "learning_rate": 8.258575434295438e-06, "loss": 0.51, "step": 18865 }, { "epoch": 0.8657702721306961, "grad_norm": 0.45143988728523254, "learning_rate": 8.258389464208138e-06, "loss": 0.4269, "step": 18866 }, { "epoch": 0.8658161626359506, "grad_norm": 0.414246141910553, "learning_rate": 8.258203486285442e-06, "loss": 0.3436, "step": 18867 }, { "epoch": 0.8658620531412051, "grad_norm": 0.47159600257873535, "learning_rate": 8.25801750052779e-06, "loss": 0.4173, "step": 18868 }, { "epoch": 0.8659079436464595, "grad_norm": 0.44948068261146545, "learning_rate": 8.257831506935638e-06, "loss": 0.4243, "step": 18869 }, { "epoch": 0.865953834151714, "grad_norm": 0.4541017413139343, "learning_rate": 8.257645505509429e-06, "loss": 0.3663, "step": 18870 }, { "epoch": 0.8659997246569685, "grad_norm": 0.39956530928611755, "learning_rate": 8.257459496249608e-06, "loss": 0.3021, "step": 18871 }, { "epoch": 0.8660456151622229, "grad_norm": 0.6714522838592529, "learning_rate": 8.257273479156629e-06, "loss": 0.396, "step": 18872 }, { "epoch": 0.8660915056674774, "grad_norm": 0.47061097621917725, "learning_rate": 8.257087454230934e-06, "loss": 0.4329, "step": 18873 }, { "epoch": 0.8661373961727319, "grad_norm": 0.47941428422927856, "learning_rate": 8.256901421472971e-06, "loss": 0.3428, "step": 18874 }, { "epoch": 0.8661832866779863, "grad_norm": 0.46274253726005554, "learning_rate": 8.256715380883189e-06, "loss": 0.3713, "step": 18875 }, { "epoch": 0.8662291771832408, "grad_norm": 0.4951189160346985, "learning_rate": 8.256529332462033e-06, "loss": 0.4903, "step": 18876 }, { "epoch": 0.8662750676884953, "grad_norm": 0.45886680483818054, "learning_rate": 8.256343276209953e-06, "loss": 0.3921, "step": 18877 }, { "epoch": 0.8663209581937498, "grad_norm": 0.49009016156196594, "learning_rate": 8.256157212127394e-06, "loss": 0.386, "step": 18878 }, { "epoch": 0.8663668486990042, "grad_norm": 0.46046024560928345, "learning_rate": 8.255971140214804e-06, "loss": 0.412, "step": 18879 }, { "epoch": 0.8664127392042587, "grad_norm": 0.4428119659423828, "learning_rate": 8.255785060472634e-06, "loss": 0.3836, "step": 18880 }, { "epoch": 0.8664586297095132, "grad_norm": 0.4646400511264801, "learning_rate": 8.255598972901327e-06, "loss": 0.423, "step": 18881 }, { "epoch": 0.8665045202147675, "grad_norm": 0.4498136341571808, "learning_rate": 8.25541287750133e-06, "loss": 0.3831, "step": 18882 }, { "epoch": 0.866550410720022, "grad_norm": 0.5011245012283325, "learning_rate": 8.255226774273095e-06, "loss": 0.4586, "step": 18883 }, { "epoch": 0.8665963012252765, "grad_norm": 0.45868098735809326, "learning_rate": 8.255040663217066e-06, "loss": 0.4041, "step": 18884 }, { "epoch": 0.8666421917305309, "grad_norm": 0.44030332565307617, "learning_rate": 8.254854544333691e-06, "loss": 0.3901, "step": 18885 }, { "epoch": 0.8666880822357854, "grad_norm": 0.4524097740650177, "learning_rate": 8.25466841762342e-06, "loss": 0.4439, "step": 18886 }, { "epoch": 0.8667339727410399, "grad_norm": 0.4806548058986664, "learning_rate": 8.254482283086696e-06, "loss": 0.4949, "step": 18887 }, { "epoch": 0.8667798632462943, "grad_norm": 0.42640072107315063, "learning_rate": 8.254296140723969e-06, "loss": 0.3111, "step": 18888 }, { "epoch": 0.8668257537515488, "grad_norm": 0.48186102509498596, "learning_rate": 8.254109990535691e-06, "loss": 0.4387, "step": 18889 }, { "epoch": 0.8668716442568033, "grad_norm": 0.4597827196121216, "learning_rate": 8.2539238325223e-06, "loss": 0.3959, "step": 18890 }, { "epoch": 0.8669175347620577, "grad_norm": 0.45699310302734375, "learning_rate": 8.253737666684254e-06, "loss": 0.3857, "step": 18891 }, { "epoch": 0.8669634252673122, "grad_norm": 0.5193095207214355, "learning_rate": 8.253551493021994e-06, "loss": 0.5535, "step": 18892 }, { "epoch": 0.8670093157725667, "grad_norm": 0.4379752576351166, "learning_rate": 8.253365311535967e-06, "loss": 0.3737, "step": 18893 }, { "epoch": 0.8670552062778211, "grad_norm": 0.4666605293750763, "learning_rate": 8.253179122226627e-06, "loss": 0.4324, "step": 18894 }, { "epoch": 0.8671010967830756, "grad_norm": 0.4511547088623047, "learning_rate": 8.252992925094418e-06, "loss": 0.3947, "step": 18895 }, { "epoch": 0.8671469872883301, "grad_norm": 0.44838061928749084, "learning_rate": 8.252806720139785e-06, "loss": 0.4125, "step": 18896 }, { "epoch": 0.8671928777935846, "grad_norm": 0.48228031396865845, "learning_rate": 8.25262050736318e-06, "loss": 0.4575, "step": 18897 }, { "epoch": 0.867238768298839, "grad_norm": 0.4544408917427063, "learning_rate": 8.252434286765048e-06, "loss": 0.4058, "step": 18898 }, { "epoch": 0.8672846588040934, "grad_norm": 0.43944641947746277, "learning_rate": 8.25224805834584e-06, "loss": 0.3845, "step": 18899 }, { "epoch": 0.8673305493093479, "grad_norm": 0.4750918447971344, "learning_rate": 8.252061822106001e-06, "loss": 0.4626, "step": 18900 }, { "epoch": 0.8673764398146023, "grad_norm": 0.48726651072502136, "learning_rate": 8.25187557804598e-06, "loss": 0.4475, "step": 18901 }, { "epoch": 0.8674223303198568, "grad_norm": 0.4713202118873596, "learning_rate": 8.251689326166224e-06, "loss": 0.514, "step": 18902 }, { "epoch": 0.8674682208251113, "grad_norm": 0.6841461658477783, "learning_rate": 8.25150306646718e-06, "loss": 0.393, "step": 18903 }, { "epoch": 0.8675141113303657, "grad_norm": 0.4626181125640869, "learning_rate": 8.2513167989493e-06, "loss": 0.3756, "step": 18904 }, { "epoch": 0.8675600018356202, "grad_norm": 0.46891921758651733, "learning_rate": 8.251130523613027e-06, "loss": 0.4165, "step": 18905 }, { "epoch": 0.8676058923408747, "grad_norm": 0.415382981300354, "learning_rate": 8.250944240458813e-06, "loss": 0.305, "step": 18906 }, { "epoch": 0.8676517828461291, "grad_norm": 0.4734896719455719, "learning_rate": 8.250757949487103e-06, "loss": 0.4161, "step": 18907 }, { "epoch": 0.8676976733513836, "grad_norm": 0.4632915258407593, "learning_rate": 8.250571650698346e-06, "loss": 0.3782, "step": 18908 }, { "epoch": 0.8677435638566381, "grad_norm": 0.4909222722053528, "learning_rate": 8.250385344092992e-06, "loss": 0.4448, "step": 18909 }, { "epoch": 0.8677894543618925, "grad_norm": 0.4747442603111267, "learning_rate": 8.250199029671485e-06, "loss": 0.4056, "step": 18910 }, { "epoch": 0.867835344867147, "grad_norm": 0.5308018326759338, "learning_rate": 8.250012707434276e-06, "loss": 0.5501, "step": 18911 }, { "epoch": 0.8678812353724015, "grad_norm": 0.44531968235969543, "learning_rate": 8.24982637738181e-06, "loss": 0.382, "step": 18912 }, { "epoch": 0.8679271258776559, "grad_norm": 0.4495735168457031, "learning_rate": 8.249640039514542e-06, "loss": 0.3871, "step": 18913 }, { "epoch": 0.8679730163829104, "grad_norm": 0.4664168059825897, "learning_rate": 8.249453693832911e-06, "loss": 0.3973, "step": 18914 }, { "epoch": 0.8680189068881649, "grad_norm": 0.4821634590625763, "learning_rate": 8.249267340337371e-06, "loss": 0.4222, "step": 18915 }, { "epoch": 0.8680647973934194, "grad_norm": 0.4313802123069763, "learning_rate": 8.249080979028369e-06, "loss": 0.3544, "step": 18916 }, { "epoch": 0.8681106878986737, "grad_norm": 0.44742894172668457, "learning_rate": 8.248894609906353e-06, "loss": 0.3964, "step": 18917 }, { "epoch": 0.8681565784039282, "grad_norm": 0.5019928812980652, "learning_rate": 8.24870823297177e-06, "loss": 0.473, "step": 18918 }, { "epoch": 0.8682024689091827, "grad_norm": 0.4228384792804718, "learning_rate": 8.248521848225068e-06, "loss": 0.3336, "step": 18919 }, { "epoch": 0.8682483594144371, "grad_norm": 0.4851352572441101, "learning_rate": 8.248335455666698e-06, "loss": 0.4783, "step": 18920 }, { "epoch": 0.8682942499196916, "grad_norm": 0.47568362951278687, "learning_rate": 8.248149055297105e-06, "loss": 0.3757, "step": 18921 }, { "epoch": 0.8683401404249461, "grad_norm": 0.43546515703201294, "learning_rate": 8.247962647116738e-06, "loss": 0.3672, "step": 18922 }, { "epoch": 0.8683860309302005, "grad_norm": 0.42523741722106934, "learning_rate": 8.247776231126049e-06, "loss": 0.4136, "step": 18923 }, { "epoch": 0.868431921435455, "grad_norm": 0.4796278774738312, "learning_rate": 8.247589807325481e-06, "loss": 0.4977, "step": 18924 }, { "epoch": 0.8684778119407095, "grad_norm": 0.45294222235679626, "learning_rate": 8.247403375715485e-06, "loss": 0.4213, "step": 18925 }, { "epoch": 0.8685237024459639, "grad_norm": 0.4636421203613281, "learning_rate": 8.24721693629651e-06, "loss": 0.4012, "step": 18926 }, { "epoch": 0.8685695929512184, "grad_norm": 0.48574891686439514, "learning_rate": 8.247030489069002e-06, "loss": 0.4308, "step": 18927 }, { "epoch": 0.8686154834564729, "grad_norm": 0.4446605145931244, "learning_rate": 8.24684403403341e-06, "loss": 0.3599, "step": 18928 }, { "epoch": 0.8686613739617273, "grad_norm": 0.45167192816734314, "learning_rate": 8.246657571190182e-06, "loss": 0.4395, "step": 18929 }, { "epoch": 0.8687072644669818, "grad_norm": 0.45047852396965027, "learning_rate": 8.246471100539769e-06, "loss": 0.4222, "step": 18930 }, { "epoch": 0.8687531549722363, "grad_norm": 0.4923040270805359, "learning_rate": 8.246284622082617e-06, "loss": 0.4653, "step": 18931 }, { "epoch": 0.8687990454774908, "grad_norm": 0.44747284054756165, "learning_rate": 8.246098135819173e-06, "loss": 0.4096, "step": 18932 }, { "epoch": 0.8688449359827451, "grad_norm": 0.4296838641166687, "learning_rate": 8.245911641749891e-06, "loss": 0.3386, "step": 18933 }, { "epoch": 0.8688908264879996, "grad_norm": 0.4538974463939667, "learning_rate": 8.245725139875214e-06, "loss": 0.3581, "step": 18934 }, { "epoch": 0.8689367169932541, "grad_norm": 0.4676780700683594, "learning_rate": 8.245538630195592e-06, "loss": 0.3865, "step": 18935 }, { "epoch": 0.8689826074985085, "grad_norm": 0.4392344653606415, "learning_rate": 8.245352112711475e-06, "loss": 0.3712, "step": 18936 }, { "epoch": 0.869028498003763, "grad_norm": 0.43207836151123047, "learning_rate": 8.245165587423312e-06, "loss": 0.3321, "step": 18937 }, { "epoch": 0.8690743885090175, "grad_norm": 1.0018877983093262, "learning_rate": 8.244979054331548e-06, "loss": 0.4496, "step": 18938 }, { "epoch": 0.8691202790142719, "grad_norm": 0.5198634266853333, "learning_rate": 8.244792513436633e-06, "loss": 0.4679, "step": 18939 }, { "epoch": 0.8691661695195264, "grad_norm": 0.4379604756832123, "learning_rate": 8.244605964739017e-06, "loss": 0.4103, "step": 18940 }, { "epoch": 0.8692120600247809, "grad_norm": 0.4432399868965149, "learning_rate": 8.244419408239149e-06, "loss": 0.3553, "step": 18941 }, { "epoch": 0.8692579505300353, "grad_norm": 0.48757341504096985, "learning_rate": 8.244232843937473e-06, "loss": 0.3878, "step": 18942 }, { "epoch": 0.8693038410352898, "grad_norm": 0.5038857460021973, "learning_rate": 8.244046271834443e-06, "loss": 0.4388, "step": 18943 }, { "epoch": 0.8693497315405443, "grad_norm": 0.412352591753006, "learning_rate": 8.243859691930505e-06, "loss": 0.3514, "step": 18944 }, { "epoch": 0.8693956220457987, "grad_norm": 0.46718764305114746, "learning_rate": 8.243673104226108e-06, "loss": 0.4264, "step": 18945 }, { "epoch": 0.8694415125510532, "grad_norm": 0.4639747440814972, "learning_rate": 8.243486508721703e-06, "loss": 0.3984, "step": 18946 }, { "epoch": 0.8694874030563077, "grad_norm": 0.43775689601898193, "learning_rate": 8.243299905417734e-06, "loss": 0.3452, "step": 18947 }, { "epoch": 0.8695332935615621, "grad_norm": 0.4658461809158325, "learning_rate": 8.243113294314653e-06, "loss": 0.4497, "step": 18948 }, { "epoch": 0.8695791840668166, "grad_norm": 0.41739100217819214, "learning_rate": 8.24292667541291e-06, "loss": 0.3434, "step": 18949 }, { "epoch": 0.869625074572071, "grad_norm": 0.43689823150634766, "learning_rate": 8.242740048712949e-06, "loss": 0.3791, "step": 18950 }, { "epoch": 0.8696709650773256, "grad_norm": 0.4136965572834015, "learning_rate": 8.242553414215225e-06, "loss": 0.3271, "step": 18951 }, { "epoch": 0.8697168555825799, "grad_norm": 0.4825584590435028, "learning_rate": 8.242366771920181e-06, "loss": 0.4237, "step": 18952 }, { "epoch": 0.8697627460878344, "grad_norm": 0.43105411529541016, "learning_rate": 8.242180121828267e-06, "loss": 0.3479, "step": 18953 }, { "epoch": 0.8698086365930889, "grad_norm": 0.4749305844306946, "learning_rate": 8.241993463939934e-06, "loss": 0.37, "step": 18954 }, { "epoch": 0.8698545270983433, "grad_norm": 0.5094777345657349, "learning_rate": 8.241806798255631e-06, "loss": 0.4555, "step": 18955 }, { "epoch": 0.8699004176035978, "grad_norm": 0.4483426809310913, "learning_rate": 8.241620124775806e-06, "loss": 0.3528, "step": 18956 }, { "epoch": 0.8699463081088523, "grad_norm": 0.45195645093917847, "learning_rate": 8.241433443500907e-06, "loss": 0.4251, "step": 18957 }, { "epoch": 0.8699921986141067, "grad_norm": 0.4376021921634674, "learning_rate": 8.241246754431383e-06, "loss": 0.3619, "step": 18958 }, { "epoch": 0.8700380891193612, "grad_norm": 0.45279720425605774, "learning_rate": 8.241060057567685e-06, "loss": 0.3452, "step": 18959 }, { "epoch": 0.8700839796246157, "grad_norm": 0.4642816483974457, "learning_rate": 8.240873352910259e-06, "loss": 0.3771, "step": 18960 }, { "epoch": 0.8701298701298701, "grad_norm": 0.45004868507385254, "learning_rate": 8.240686640459557e-06, "loss": 0.4384, "step": 18961 }, { "epoch": 0.8701757606351246, "grad_norm": 0.4612734615802765, "learning_rate": 8.240499920216025e-06, "loss": 0.4433, "step": 18962 }, { "epoch": 0.8702216511403791, "grad_norm": 0.4524123966693878, "learning_rate": 8.240313192180114e-06, "loss": 0.3948, "step": 18963 }, { "epoch": 0.8702675416456335, "grad_norm": 0.4905470907688141, "learning_rate": 8.240126456352272e-06, "loss": 0.5041, "step": 18964 }, { "epoch": 0.870313432150888, "grad_norm": 0.44299811124801636, "learning_rate": 8.23993971273295e-06, "loss": 0.376, "step": 18965 }, { "epoch": 0.8703593226561425, "grad_norm": 0.46464115381240845, "learning_rate": 8.239752961322594e-06, "loss": 0.4058, "step": 18966 }, { "epoch": 0.870405213161397, "grad_norm": 0.4572221040725708, "learning_rate": 8.239566202121653e-06, "loss": 0.4469, "step": 18967 }, { "epoch": 0.8704511036666513, "grad_norm": 0.4484426975250244, "learning_rate": 8.239379435130581e-06, "loss": 0.3954, "step": 18968 }, { "epoch": 0.8704969941719058, "grad_norm": 0.4404235780239105, "learning_rate": 8.239192660349822e-06, "loss": 0.3412, "step": 18969 }, { "epoch": 0.8705428846771603, "grad_norm": 0.4255707561969757, "learning_rate": 8.239005877779829e-06, "loss": 0.3297, "step": 18970 }, { "epoch": 0.8705887751824147, "grad_norm": 0.4300839900970459, "learning_rate": 8.238819087421047e-06, "loss": 0.3896, "step": 18971 }, { "epoch": 0.8706346656876692, "grad_norm": 0.4114404320716858, "learning_rate": 8.238632289273929e-06, "loss": 0.2998, "step": 18972 }, { "epoch": 0.8706805561929237, "grad_norm": 0.45493021607398987, "learning_rate": 8.23844548333892e-06, "loss": 0.3901, "step": 18973 }, { "epoch": 0.8707264466981781, "grad_norm": 0.42662930488586426, "learning_rate": 8.238258669616474e-06, "loss": 0.3739, "step": 18974 }, { "epoch": 0.8707723372034326, "grad_norm": 0.4271929860115051, "learning_rate": 8.238071848107036e-06, "loss": 0.3469, "step": 18975 }, { "epoch": 0.8708182277086871, "grad_norm": 0.46819308400154114, "learning_rate": 8.237885018811059e-06, "loss": 0.3612, "step": 18976 }, { "epoch": 0.8708641182139415, "grad_norm": 0.4325588643550873, "learning_rate": 8.23769818172899e-06, "loss": 0.3985, "step": 18977 }, { "epoch": 0.870910008719196, "grad_norm": 0.4454439580440521, "learning_rate": 8.237511336861277e-06, "loss": 0.4144, "step": 18978 }, { "epoch": 0.8709558992244505, "grad_norm": 0.42623424530029297, "learning_rate": 8.237324484208374e-06, "loss": 0.3741, "step": 18979 }, { "epoch": 0.8710017897297049, "grad_norm": 0.5444596409797668, "learning_rate": 8.237137623770726e-06, "loss": 0.5267, "step": 18980 }, { "epoch": 0.8710476802349594, "grad_norm": 0.38361847400665283, "learning_rate": 8.236950755548782e-06, "loss": 0.2846, "step": 18981 }, { "epoch": 0.8710935707402139, "grad_norm": 0.45108702778816223, "learning_rate": 8.236763879542994e-06, "loss": 0.3312, "step": 18982 }, { "epoch": 0.8711394612454683, "grad_norm": 0.437872052192688, "learning_rate": 8.23657699575381e-06, "loss": 0.3938, "step": 18983 }, { "epoch": 0.8711853517507228, "grad_norm": 0.4380604326725006, "learning_rate": 8.236390104181682e-06, "loss": 0.368, "step": 18984 }, { "epoch": 0.8712312422559773, "grad_norm": 0.49645957350730896, "learning_rate": 8.236203204827057e-06, "loss": 0.4459, "step": 18985 }, { "epoch": 0.8712771327612318, "grad_norm": 0.4470571279525757, "learning_rate": 8.236016297690384e-06, "loss": 0.3791, "step": 18986 }, { "epoch": 0.8713230232664861, "grad_norm": 0.4759295880794525, "learning_rate": 8.235829382772111e-06, "loss": 0.4757, "step": 18987 }, { "epoch": 0.8713689137717406, "grad_norm": 0.4687975347042084, "learning_rate": 8.235642460072692e-06, "loss": 0.433, "step": 18988 }, { "epoch": 0.8714148042769951, "grad_norm": 0.4125252962112427, "learning_rate": 8.235455529592573e-06, "loss": 0.3391, "step": 18989 }, { "epoch": 0.8714606947822495, "grad_norm": 0.4338414669036865, "learning_rate": 8.235268591332204e-06, "loss": 0.3897, "step": 18990 }, { "epoch": 0.871506585287504, "grad_norm": 0.4246147871017456, "learning_rate": 8.235081645292039e-06, "loss": 0.3868, "step": 18991 }, { "epoch": 0.8715524757927585, "grad_norm": 0.7082940340042114, "learning_rate": 8.234894691472519e-06, "loss": 0.4117, "step": 18992 }, { "epoch": 0.8715983662980129, "grad_norm": 0.48406529426574707, "learning_rate": 8.234707729874101e-06, "loss": 0.406, "step": 18993 }, { "epoch": 0.8716442568032674, "grad_norm": 0.43817996978759766, "learning_rate": 8.23452076049723e-06, "loss": 0.374, "step": 18994 }, { "epoch": 0.8716901473085219, "grad_norm": 0.46787071228027344, "learning_rate": 8.234333783342358e-06, "loss": 0.4087, "step": 18995 }, { "epoch": 0.8717360378137763, "grad_norm": 0.476106733083725, "learning_rate": 8.234146798409935e-06, "loss": 0.4314, "step": 18996 }, { "epoch": 0.8717819283190308, "grad_norm": 0.43047866225242615, "learning_rate": 8.233959805700408e-06, "loss": 0.3203, "step": 18997 }, { "epoch": 0.8718278188242853, "grad_norm": 0.4719417989253998, "learning_rate": 8.23377280521423e-06, "loss": 0.4279, "step": 18998 }, { "epoch": 0.8718737093295397, "grad_norm": 0.4607294797897339, "learning_rate": 8.233585796951849e-06, "loss": 0.4264, "step": 18999 }, { "epoch": 0.8719195998347942, "grad_norm": 0.4721177816390991, "learning_rate": 8.233398780913712e-06, "loss": 0.3925, "step": 19000 }, { "epoch": 0.8719654903400487, "grad_norm": 0.4804874360561371, "learning_rate": 8.233211757100274e-06, "loss": 0.422, "step": 19001 }, { "epoch": 0.872011380845303, "grad_norm": 0.45685338973999023, "learning_rate": 8.233024725511983e-06, "loss": 0.3845, "step": 19002 }, { "epoch": 0.8720572713505576, "grad_norm": 0.4628857672214508, "learning_rate": 8.232837686149286e-06, "loss": 0.399, "step": 19003 }, { "epoch": 0.872103161855812, "grad_norm": 0.45493608713150024, "learning_rate": 8.232650639012635e-06, "loss": 0.3675, "step": 19004 }, { "epoch": 0.8721490523610665, "grad_norm": 0.4375508725643158, "learning_rate": 8.232463584102479e-06, "loss": 0.3223, "step": 19005 }, { "epoch": 0.8721949428663209, "grad_norm": 0.4677862524986267, "learning_rate": 8.23227652141927e-06, "loss": 0.4167, "step": 19006 }, { "epoch": 0.8722408333715754, "grad_norm": 0.41010594367980957, "learning_rate": 8.232089450963453e-06, "loss": 0.332, "step": 19007 }, { "epoch": 0.8722867238768299, "grad_norm": 0.5010298490524292, "learning_rate": 8.231902372735485e-06, "loss": 0.5066, "step": 19008 }, { "epoch": 0.8723326143820843, "grad_norm": 0.42294782400131226, "learning_rate": 8.231715286735808e-06, "loss": 0.3387, "step": 19009 }, { "epoch": 0.8723785048873388, "grad_norm": 0.48107004165649414, "learning_rate": 8.231528192964879e-06, "loss": 0.4038, "step": 19010 }, { "epoch": 0.8724243953925933, "grad_norm": 0.4320259094238281, "learning_rate": 8.231341091423143e-06, "loss": 0.3129, "step": 19011 }, { "epoch": 0.8724702858978477, "grad_norm": 0.4658711552619934, "learning_rate": 8.231153982111051e-06, "loss": 0.3647, "step": 19012 }, { "epoch": 0.8725161764031022, "grad_norm": 0.6039180755615234, "learning_rate": 8.230966865029055e-06, "loss": 0.4519, "step": 19013 }, { "epoch": 0.8725620669083567, "grad_norm": 0.4518488347530365, "learning_rate": 8.230779740177601e-06, "loss": 0.4049, "step": 19014 }, { "epoch": 0.8726079574136111, "grad_norm": 0.4543609321117401, "learning_rate": 8.230592607557145e-06, "loss": 0.3625, "step": 19015 }, { "epoch": 0.8726538479188656, "grad_norm": 0.5034462809562683, "learning_rate": 8.23040546716813e-06, "loss": 0.4367, "step": 19016 }, { "epoch": 0.8726997384241201, "grad_norm": 0.45420554280281067, "learning_rate": 8.230218319011011e-06, "loss": 0.4361, "step": 19017 }, { "epoch": 0.8727456289293745, "grad_norm": 0.4854552149772644, "learning_rate": 8.230031163086235e-06, "loss": 0.4314, "step": 19018 }, { "epoch": 0.872791519434629, "grad_norm": 0.4491330087184906, "learning_rate": 8.229843999394255e-06, "loss": 0.3849, "step": 19019 }, { "epoch": 0.8728374099398835, "grad_norm": 0.45390549302101135, "learning_rate": 8.229656827935519e-06, "loss": 0.4149, "step": 19020 }, { "epoch": 0.872883300445138, "grad_norm": 0.41391807794570923, "learning_rate": 8.229469648710477e-06, "loss": 0.3386, "step": 19021 }, { "epoch": 0.8729291909503923, "grad_norm": 0.4536173939704895, "learning_rate": 8.22928246171958e-06, "loss": 0.4153, "step": 19022 }, { "epoch": 0.8729750814556468, "grad_norm": 0.47305217385292053, "learning_rate": 8.229095266963279e-06, "loss": 0.4441, "step": 19023 }, { "epoch": 0.8730209719609013, "grad_norm": 0.40642306208610535, "learning_rate": 8.228908064442023e-06, "loss": 0.2845, "step": 19024 }, { "epoch": 0.8730668624661557, "grad_norm": 0.4578503370285034, "learning_rate": 8.228720854156262e-06, "loss": 0.3653, "step": 19025 }, { "epoch": 0.8731127529714102, "grad_norm": 0.4243710935115814, "learning_rate": 8.228533636106445e-06, "loss": 0.3605, "step": 19026 }, { "epoch": 0.8731586434766647, "grad_norm": 0.4913235902786255, "learning_rate": 8.228346410293025e-06, "loss": 0.4864, "step": 19027 }, { "epoch": 0.8732045339819191, "grad_norm": 0.5014052391052246, "learning_rate": 8.22815917671645e-06, "loss": 0.514, "step": 19028 }, { "epoch": 0.8732504244871736, "grad_norm": 0.446957528591156, "learning_rate": 8.22797193537717e-06, "loss": 0.3933, "step": 19029 }, { "epoch": 0.8732963149924281, "grad_norm": 0.43559396266937256, "learning_rate": 8.227784686275639e-06, "loss": 0.3524, "step": 19030 }, { "epoch": 0.8733422054976825, "grad_norm": 0.47324392199516296, "learning_rate": 8.227597429412303e-06, "loss": 0.3665, "step": 19031 }, { "epoch": 0.873388096002937, "grad_norm": 0.45771029591560364, "learning_rate": 8.227410164787613e-06, "loss": 0.3457, "step": 19032 }, { "epoch": 0.8734339865081915, "grad_norm": 0.4326404333114624, "learning_rate": 8.227222892402022e-06, "loss": 0.3516, "step": 19033 }, { "epoch": 0.8734798770134459, "grad_norm": 0.49116235971450806, "learning_rate": 8.227035612255979e-06, "loss": 0.4225, "step": 19034 }, { "epoch": 0.8735257675187004, "grad_norm": 0.4289708435535431, "learning_rate": 8.22684832434993e-06, "loss": 0.3725, "step": 19035 }, { "epoch": 0.8735716580239549, "grad_norm": 0.47323527932167053, "learning_rate": 8.226661028684332e-06, "loss": 0.4856, "step": 19036 }, { "epoch": 0.8736175485292093, "grad_norm": 0.481403112411499, "learning_rate": 8.226473725259631e-06, "loss": 0.4341, "step": 19037 }, { "epoch": 0.8736634390344638, "grad_norm": 0.45308342576026917, "learning_rate": 8.22628641407628e-06, "loss": 0.4133, "step": 19038 }, { "epoch": 0.8737093295397182, "grad_norm": 0.4240788519382477, "learning_rate": 8.22609909513473e-06, "loss": 0.3597, "step": 19039 }, { "epoch": 0.8737552200449727, "grad_norm": 0.44183915853500366, "learning_rate": 8.225911768435429e-06, "loss": 0.379, "step": 19040 }, { "epoch": 0.8738011105502271, "grad_norm": 0.4424301087856293, "learning_rate": 8.225724433978829e-06, "loss": 0.3993, "step": 19041 }, { "epoch": 0.8738470010554816, "grad_norm": 0.41829046607017517, "learning_rate": 8.225537091765377e-06, "loss": 0.3602, "step": 19042 }, { "epoch": 0.8738928915607361, "grad_norm": 0.48969876766204834, "learning_rate": 8.225349741795527e-06, "loss": 0.4225, "step": 19043 }, { "epoch": 0.8739387820659905, "grad_norm": 0.47473007440567017, "learning_rate": 8.22516238406973e-06, "loss": 0.377, "step": 19044 }, { "epoch": 0.873984672571245, "grad_norm": 0.4479834735393524, "learning_rate": 8.224975018588436e-06, "loss": 0.3745, "step": 19045 }, { "epoch": 0.8740305630764995, "grad_norm": 0.4330675005912781, "learning_rate": 8.224787645352094e-06, "loss": 0.4078, "step": 19046 }, { "epoch": 0.8740764535817539, "grad_norm": 0.4831138551235199, "learning_rate": 8.224600264361155e-06, "loss": 0.4365, "step": 19047 }, { "epoch": 0.8741223440870084, "grad_norm": 0.4695335626602173, "learning_rate": 8.22441287561607e-06, "loss": 0.3889, "step": 19048 }, { "epoch": 0.8741682345922629, "grad_norm": 0.44154486060142517, "learning_rate": 8.22422547911729e-06, "loss": 0.3969, "step": 19049 }, { "epoch": 0.8742141250975173, "grad_norm": 0.43685922026634216, "learning_rate": 8.224038074865266e-06, "loss": 0.3652, "step": 19050 }, { "epoch": 0.8742600156027718, "grad_norm": 0.4427495002746582, "learning_rate": 8.223850662860448e-06, "loss": 0.3511, "step": 19051 }, { "epoch": 0.8743059061080263, "grad_norm": 0.4470098912715912, "learning_rate": 8.223663243103287e-06, "loss": 0.3903, "step": 19052 }, { "epoch": 0.8743517966132807, "grad_norm": 0.42210766673088074, "learning_rate": 8.22347581559423e-06, "loss": 0.3422, "step": 19053 }, { "epoch": 0.8743976871185352, "grad_norm": 0.4271153509616852, "learning_rate": 8.223288380333733e-06, "loss": 0.3378, "step": 19054 }, { "epoch": 0.8744435776237897, "grad_norm": 0.4593006372451782, "learning_rate": 8.223100937322246e-06, "loss": 0.4304, "step": 19055 }, { "epoch": 0.8744894681290442, "grad_norm": 0.5093809366226196, "learning_rate": 8.222913486560219e-06, "loss": 0.4774, "step": 19056 }, { "epoch": 0.8745353586342985, "grad_norm": 0.48551321029663086, "learning_rate": 8.2227260280481e-06, "loss": 0.3749, "step": 19057 }, { "epoch": 0.874581249139553, "grad_norm": 0.45765891671180725, "learning_rate": 8.222538561786344e-06, "loss": 0.363, "step": 19058 }, { "epoch": 0.8746271396448075, "grad_norm": 0.4293176233768463, "learning_rate": 8.222351087775397e-06, "loss": 0.3477, "step": 19059 }, { "epoch": 0.8746730301500619, "grad_norm": 0.4705008268356323, "learning_rate": 8.222163606015717e-06, "loss": 0.4266, "step": 19060 }, { "epoch": 0.8747189206553164, "grad_norm": 0.46184825897216797, "learning_rate": 8.221976116507747e-06, "loss": 0.4191, "step": 19061 }, { "epoch": 0.8747648111605709, "grad_norm": 0.4687638580799103, "learning_rate": 8.221788619251942e-06, "loss": 0.3593, "step": 19062 }, { "epoch": 0.8748107016658253, "grad_norm": 0.42995747923851013, "learning_rate": 8.221601114248751e-06, "loss": 0.3872, "step": 19063 }, { "epoch": 0.8748565921710798, "grad_norm": 0.45432549715042114, "learning_rate": 8.221413601498629e-06, "loss": 0.4013, "step": 19064 }, { "epoch": 0.8749024826763343, "grad_norm": 0.5138528943061829, "learning_rate": 8.221226081002023e-06, "loss": 0.5026, "step": 19065 }, { "epoch": 0.8749483731815887, "grad_norm": 0.4685092866420746, "learning_rate": 8.221038552759383e-06, "loss": 0.4332, "step": 19066 }, { "epoch": 0.8749942636868432, "grad_norm": 0.46465200185775757, "learning_rate": 8.220851016771165e-06, "loss": 0.4145, "step": 19067 }, { "epoch": 0.8750401541920977, "grad_norm": 0.4531886577606201, "learning_rate": 8.220663473037814e-06, "loss": 0.3997, "step": 19068 }, { "epoch": 0.8750860446973521, "grad_norm": 0.49405908584594727, "learning_rate": 8.220475921559785e-06, "loss": 0.4468, "step": 19069 }, { "epoch": 0.8751319352026066, "grad_norm": 0.42974936962127686, "learning_rate": 8.220288362337527e-06, "loss": 0.3707, "step": 19070 }, { "epoch": 0.8751778257078611, "grad_norm": 0.4380975067615509, "learning_rate": 8.220100795371492e-06, "loss": 0.3733, "step": 19071 }, { "epoch": 0.8752237162131155, "grad_norm": 0.4969840347766876, "learning_rate": 8.219913220662132e-06, "loss": 0.4724, "step": 19072 }, { "epoch": 0.87526960671837, "grad_norm": 0.4308072030544281, "learning_rate": 8.219725638209897e-06, "loss": 0.3414, "step": 19073 }, { "epoch": 0.8753154972236245, "grad_norm": 1.2691686153411865, "learning_rate": 8.219538048015237e-06, "loss": 0.5702, "step": 19074 }, { "epoch": 0.875361387728879, "grad_norm": 0.42908746004104614, "learning_rate": 8.219350450078606e-06, "loss": 0.3945, "step": 19075 }, { "epoch": 0.8754072782341333, "grad_norm": 0.4702817499637604, "learning_rate": 8.21916284440045e-06, "loss": 0.3811, "step": 19076 }, { "epoch": 0.8754531687393878, "grad_norm": 0.5116704106330872, "learning_rate": 8.218975230981225e-06, "loss": 0.4832, "step": 19077 }, { "epoch": 0.8754990592446423, "grad_norm": 0.5056906342506409, "learning_rate": 8.218787609821382e-06, "loss": 0.4586, "step": 19078 }, { "epoch": 0.8755449497498967, "grad_norm": 0.4328555762767792, "learning_rate": 8.218599980921368e-06, "loss": 0.3988, "step": 19079 }, { "epoch": 0.8755908402551512, "grad_norm": 0.4441078305244446, "learning_rate": 8.218412344281639e-06, "loss": 0.3223, "step": 19080 }, { "epoch": 0.8756367307604057, "grad_norm": 0.4605567157268524, "learning_rate": 8.218224699902644e-06, "loss": 0.4304, "step": 19081 }, { "epoch": 0.8756826212656601, "grad_norm": 0.45149603486061096, "learning_rate": 8.218037047784833e-06, "loss": 0.4199, "step": 19082 }, { "epoch": 0.8757285117709146, "grad_norm": 0.4639022946357727, "learning_rate": 8.217849387928659e-06, "loss": 0.3796, "step": 19083 }, { "epoch": 0.8757744022761691, "grad_norm": 0.48578768968582153, "learning_rate": 8.217661720334573e-06, "loss": 0.475, "step": 19084 }, { "epoch": 0.8758202927814235, "grad_norm": 0.45571503043174744, "learning_rate": 8.217474045003026e-06, "loss": 0.3777, "step": 19085 }, { "epoch": 0.875866183286678, "grad_norm": 0.46412986516952515, "learning_rate": 8.21728636193447e-06, "loss": 0.3963, "step": 19086 }, { "epoch": 0.8759120737919325, "grad_norm": 0.44513291120529175, "learning_rate": 8.217098671129355e-06, "loss": 0.4224, "step": 19087 }, { "epoch": 0.8759579642971869, "grad_norm": 0.4397667944431305, "learning_rate": 8.216910972588133e-06, "loss": 0.3378, "step": 19088 }, { "epoch": 0.8760038548024414, "grad_norm": 0.44363319873809814, "learning_rate": 8.216723266311255e-06, "loss": 0.368, "step": 19089 }, { "epoch": 0.8760497453076959, "grad_norm": 0.4495210349559784, "learning_rate": 8.216535552299174e-06, "loss": 0.3483, "step": 19090 }, { "epoch": 0.8760956358129502, "grad_norm": 0.46948179602622986, "learning_rate": 8.216347830552339e-06, "loss": 0.3974, "step": 19091 }, { "epoch": 0.8761415263182047, "grad_norm": 0.46009600162506104, "learning_rate": 8.216160101071203e-06, "loss": 0.4348, "step": 19092 }, { "epoch": 0.8761874168234592, "grad_norm": 0.4910718500614166, "learning_rate": 8.215972363856217e-06, "loss": 0.4031, "step": 19093 }, { "epoch": 0.8762333073287137, "grad_norm": 0.4709832966327667, "learning_rate": 8.215784618907831e-06, "loss": 0.4033, "step": 19094 }, { "epoch": 0.8762791978339681, "grad_norm": 0.48434245586395264, "learning_rate": 8.215596866226501e-06, "loss": 0.4424, "step": 19095 }, { "epoch": 0.8763250883392226, "grad_norm": 0.45961257815361023, "learning_rate": 8.215409105812674e-06, "loss": 0.3994, "step": 19096 }, { "epoch": 0.8763709788444771, "grad_norm": 0.4488162398338318, "learning_rate": 8.215221337666802e-06, "loss": 0.3801, "step": 19097 }, { "epoch": 0.8764168693497315, "grad_norm": 0.45952269434928894, "learning_rate": 8.21503356178934e-06, "loss": 0.41, "step": 19098 }, { "epoch": 0.876462759854986, "grad_norm": 0.45821550488471985, "learning_rate": 8.214845778180733e-06, "loss": 0.3548, "step": 19099 }, { "epoch": 0.8765086503602405, "grad_norm": 0.43187767267227173, "learning_rate": 8.21465798684144e-06, "loss": 0.3674, "step": 19100 }, { "epoch": 0.8765545408654949, "grad_norm": 0.4792787432670593, "learning_rate": 8.214470187771907e-06, "loss": 0.4454, "step": 19101 }, { "epoch": 0.8766004313707494, "grad_norm": 0.44913235306739807, "learning_rate": 8.21428238097259e-06, "loss": 0.3836, "step": 19102 }, { "epoch": 0.8766463218760039, "grad_norm": 0.4326745271682739, "learning_rate": 8.214094566443936e-06, "loss": 0.324, "step": 19103 }, { "epoch": 0.8766922123812583, "grad_norm": 0.43010279536247253, "learning_rate": 8.213906744186399e-06, "loss": 0.3622, "step": 19104 }, { "epoch": 0.8767381028865128, "grad_norm": 0.48416927456855774, "learning_rate": 8.213718914200432e-06, "loss": 0.4649, "step": 19105 }, { "epoch": 0.8767839933917673, "grad_norm": 0.4400700628757477, "learning_rate": 8.213531076486484e-06, "loss": 0.394, "step": 19106 }, { "epoch": 0.8768298838970217, "grad_norm": 0.4606837332248688, "learning_rate": 8.21334323104501e-06, "loss": 0.391, "step": 19107 }, { "epoch": 0.8768757744022762, "grad_norm": 0.47147029638290405, "learning_rate": 8.213155377876458e-06, "loss": 0.388, "step": 19108 }, { "epoch": 0.8769216649075307, "grad_norm": 0.44349220395088196, "learning_rate": 8.212967516981281e-06, "loss": 0.4602, "step": 19109 }, { "epoch": 0.8769675554127851, "grad_norm": 0.4863872528076172, "learning_rate": 8.212779648359932e-06, "loss": 0.4602, "step": 19110 }, { "epoch": 0.8770134459180395, "grad_norm": 0.4206690788269043, "learning_rate": 8.21259177201286e-06, "loss": 0.3361, "step": 19111 }, { "epoch": 0.877059336423294, "grad_norm": 0.4568292498588562, "learning_rate": 8.212403887940523e-06, "loss": 0.4317, "step": 19112 }, { "epoch": 0.8771052269285485, "grad_norm": 0.4856686592102051, "learning_rate": 8.212215996143365e-06, "loss": 0.3888, "step": 19113 }, { "epoch": 0.8771511174338029, "grad_norm": 0.44106027483940125, "learning_rate": 8.212028096621843e-06, "loss": 0.3697, "step": 19114 }, { "epoch": 0.8771970079390574, "grad_norm": 0.4441925585269928, "learning_rate": 8.211840189376407e-06, "loss": 0.3883, "step": 19115 }, { "epoch": 0.8772428984443119, "grad_norm": 0.43758425116539, "learning_rate": 8.21165227440751e-06, "loss": 0.4002, "step": 19116 }, { "epoch": 0.8772887889495663, "grad_norm": 0.42489108443260193, "learning_rate": 8.2114643517156e-06, "loss": 0.3662, "step": 19117 }, { "epoch": 0.8773346794548208, "grad_norm": 0.43275192379951477, "learning_rate": 8.211276421301133e-06, "loss": 0.3759, "step": 19118 }, { "epoch": 0.8773805699600753, "grad_norm": 0.4375339448451996, "learning_rate": 8.21108848316456e-06, "loss": 0.3837, "step": 19119 }, { "epoch": 0.8774264604653297, "grad_norm": 0.487178236246109, "learning_rate": 8.210900537306334e-06, "loss": 0.4211, "step": 19120 }, { "epoch": 0.8774723509705842, "grad_norm": 0.46321022510528564, "learning_rate": 8.210712583726903e-06, "loss": 0.4348, "step": 19121 }, { "epoch": 0.8775182414758387, "grad_norm": 0.49689555168151855, "learning_rate": 8.210524622426723e-06, "loss": 0.4191, "step": 19122 }, { "epoch": 0.8775641319810931, "grad_norm": 0.4719978868961334, "learning_rate": 8.210336653406245e-06, "loss": 0.3815, "step": 19123 }, { "epoch": 0.8776100224863476, "grad_norm": 0.42269986867904663, "learning_rate": 8.21014867666592e-06, "loss": 0.359, "step": 19124 }, { "epoch": 0.8776559129916021, "grad_norm": 0.47000640630722046, "learning_rate": 8.2099606922062e-06, "loss": 0.3486, "step": 19125 }, { "epoch": 0.8777018034968564, "grad_norm": 0.5031851530075073, "learning_rate": 8.209772700027539e-06, "loss": 0.5425, "step": 19126 }, { "epoch": 0.877747694002111, "grad_norm": 0.4548361599445343, "learning_rate": 8.209584700130386e-06, "loss": 0.4071, "step": 19127 }, { "epoch": 0.8777935845073654, "grad_norm": 0.4200451076030731, "learning_rate": 8.209396692515197e-06, "loss": 0.3156, "step": 19128 }, { "epoch": 0.8778394750126199, "grad_norm": 0.44227859377861023, "learning_rate": 8.20920867718242e-06, "loss": 0.3847, "step": 19129 }, { "epoch": 0.8778853655178743, "grad_norm": 0.46306532621383667, "learning_rate": 8.209020654132508e-06, "loss": 0.4217, "step": 19130 }, { "epoch": 0.8779312560231288, "grad_norm": 0.49015966057777405, "learning_rate": 8.208832623365916e-06, "loss": 0.4639, "step": 19131 }, { "epoch": 0.8779771465283833, "grad_norm": 0.46084481477737427, "learning_rate": 8.208644584883096e-06, "loss": 0.4153, "step": 19132 }, { "epoch": 0.8780230370336377, "grad_norm": 0.4666290581226349, "learning_rate": 8.208456538684495e-06, "loss": 0.3982, "step": 19133 }, { "epoch": 0.8780689275388922, "grad_norm": 0.4263615012168884, "learning_rate": 8.208268484770569e-06, "loss": 0.349, "step": 19134 }, { "epoch": 0.8781148180441467, "grad_norm": 0.4571050703525543, "learning_rate": 8.208080423141772e-06, "loss": 0.3969, "step": 19135 }, { "epoch": 0.8781607085494011, "grad_norm": 0.4740146994590759, "learning_rate": 8.207892353798553e-06, "loss": 0.4822, "step": 19136 }, { "epoch": 0.8782065990546556, "grad_norm": 0.4939623475074768, "learning_rate": 8.207704276741365e-06, "loss": 0.4584, "step": 19137 }, { "epoch": 0.8782524895599101, "grad_norm": 0.4671875834465027, "learning_rate": 8.207516191970662e-06, "loss": 0.479, "step": 19138 }, { "epoch": 0.8782983800651645, "grad_norm": 0.4533805847167969, "learning_rate": 8.207328099486893e-06, "loss": 0.3422, "step": 19139 }, { "epoch": 0.878344270570419, "grad_norm": 0.48585575819015503, "learning_rate": 8.207139999290512e-06, "loss": 0.4072, "step": 19140 }, { "epoch": 0.8783901610756735, "grad_norm": 0.4155079424381256, "learning_rate": 8.206951891381973e-06, "loss": 0.3136, "step": 19141 }, { "epoch": 0.8784360515809279, "grad_norm": 0.44877225160598755, "learning_rate": 8.206763775761726e-06, "loss": 0.3597, "step": 19142 }, { "epoch": 0.8784819420861824, "grad_norm": 0.45714521408081055, "learning_rate": 8.206575652430225e-06, "loss": 0.4447, "step": 19143 }, { "epoch": 0.8785278325914369, "grad_norm": 0.43482619524002075, "learning_rate": 8.206387521387921e-06, "loss": 0.3604, "step": 19144 }, { "epoch": 0.8785737230966912, "grad_norm": 0.44424766302108765, "learning_rate": 8.206199382635267e-06, "loss": 0.3941, "step": 19145 }, { "epoch": 0.8786196136019457, "grad_norm": 0.456699013710022, "learning_rate": 8.206011236172715e-06, "loss": 0.3533, "step": 19146 }, { "epoch": 0.8786655041072002, "grad_norm": 0.46785077452659607, "learning_rate": 8.205823082000719e-06, "loss": 0.3988, "step": 19147 }, { "epoch": 0.8787113946124547, "grad_norm": 0.42983683943748474, "learning_rate": 8.20563492011973e-06, "loss": 0.353, "step": 19148 }, { "epoch": 0.8787572851177091, "grad_norm": 0.42816194891929626, "learning_rate": 8.2054467505302e-06, "loss": 0.3347, "step": 19149 }, { "epoch": 0.8788031756229636, "grad_norm": 0.4148336350917816, "learning_rate": 8.205258573232582e-06, "loss": 0.3583, "step": 19150 }, { "epoch": 0.8788490661282181, "grad_norm": 0.46545499563217163, "learning_rate": 8.20507038822733e-06, "loss": 0.3883, "step": 19151 }, { "epoch": 0.8788949566334725, "grad_norm": 0.47809427976608276, "learning_rate": 8.204882195514893e-06, "loss": 0.3982, "step": 19152 }, { "epoch": 0.878940847138727, "grad_norm": 0.4478949308395386, "learning_rate": 8.20469399509573e-06, "loss": 0.4083, "step": 19153 }, { "epoch": 0.8789867376439815, "grad_norm": 0.43192213773727417, "learning_rate": 8.204505786970284e-06, "loss": 0.3591, "step": 19154 }, { "epoch": 0.8790326281492359, "grad_norm": 0.4654843807220459, "learning_rate": 8.204317571139017e-06, "loss": 0.4525, "step": 19155 }, { "epoch": 0.8790785186544904, "grad_norm": 0.4616166651248932, "learning_rate": 8.204129347602377e-06, "loss": 0.3945, "step": 19156 }, { "epoch": 0.8791244091597449, "grad_norm": 0.4762967526912689, "learning_rate": 8.203941116360816e-06, "loss": 0.4315, "step": 19157 }, { "epoch": 0.8791702996649993, "grad_norm": 0.4483889937400818, "learning_rate": 8.203752877414787e-06, "loss": 0.3611, "step": 19158 }, { "epoch": 0.8792161901702538, "grad_norm": 0.46035635471343994, "learning_rate": 8.203564630764745e-06, "loss": 0.429, "step": 19159 }, { "epoch": 0.8792620806755083, "grad_norm": 0.5128582119941711, "learning_rate": 8.203376376411141e-06, "loss": 0.4537, "step": 19160 }, { "epoch": 0.8793079711807626, "grad_norm": 0.4780116677284241, "learning_rate": 8.203188114354429e-06, "loss": 0.4266, "step": 19161 }, { "epoch": 0.8793538616860171, "grad_norm": 0.44106414914131165, "learning_rate": 8.202999844595058e-06, "loss": 0.3786, "step": 19162 }, { "epoch": 0.8793997521912716, "grad_norm": 0.4755333960056305, "learning_rate": 8.202811567133484e-06, "loss": 0.4437, "step": 19163 }, { "epoch": 0.8794456426965261, "grad_norm": 0.4286287724971771, "learning_rate": 8.20262328197016e-06, "loss": 0.3903, "step": 19164 }, { "epoch": 0.8794915332017805, "grad_norm": 0.4155310094356537, "learning_rate": 8.202434989105537e-06, "loss": 0.3348, "step": 19165 }, { "epoch": 0.879537423707035, "grad_norm": 0.43057844042778015, "learning_rate": 8.202246688540069e-06, "loss": 0.3603, "step": 19166 }, { "epoch": 0.8795833142122895, "grad_norm": 0.5113843679428101, "learning_rate": 8.202058380274207e-06, "loss": 0.4967, "step": 19167 }, { "epoch": 0.8796292047175439, "grad_norm": 0.42816320061683655, "learning_rate": 8.201870064308406e-06, "loss": 0.3787, "step": 19168 }, { "epoch": 0.8796750952227984, "grad_norm": 0.43738627433776855, "learning_rate": 8.20168174064312e-06, "loss": 0.3681, "step": 19169 }, { "epoch": 0.8797209857280529, "grad_norm": 0.488862007856369, "learning_rate": 8.201493409278799e-06, "loss": 0.446, "step": 19170 }, { "epoch": 0.8797668762333073, "grad_norm": 0.4588591158390045, "learning_rate": 8.201305070215895e-06, "loss": 0.385, "step": 19171 }, { "epoch": 0.8798127667385618, "grad_norm": 0.49046656489372253, "learning_rate": 8.201116723454864e-06, "loss": 0.4596, "step": 19172 }, { "epoch": 0.8798586572438163, "grad_norm": 0.42806512117385864, "learning_rate": 8.200928368996157e-06, "loss": 0.3537, "step": 19173 }, { "epoch": 0.8799045477490707, "grad_norm": 0.4339893162250519, "learning_rate": 8.200740006840228e-06, "loss": 0.3469, "step": 19174 }, { "epoch": 0.8799504382543252, "grad_norm": 0.4892531931400299, "learning_rate": 8.20055163698753e-06, "loss": 0.4743, "step": 19175 }, { "epoch": 0.8799963287595797, "grad_norm": 0.45419391989707947, "learning_rate": 8.200363259438513e-06, "loss": 0.3634, "step": 19176 }, { "epoch": 0.8800422192648341, "grad_norm": 0.4677983820438385, "learning_rate": 8.200174874193637e-06, "loss": 0.4083, "step": 19177 }, { "epoch": 0.8800881097700886, "grad_norm": 0.4166705310344696, "learning_rate": 8.199986481253346e-06, "loss": 0.3117, "step": 19178 }, { "epoch": 0.880134000275343, "grad_norm": 0.4390350878238678, "learning_rate": 8.199798080618097e-06, "loss": 0.3637, "step": 19179 }, { "epoch": 0.8801798907805974, "grad_norm": 0.46039873361587524, "learning_rate": 8.199609672288348e-06, "loss": 0.3936, "step": 19180 }, { "epoch": 0.8802257812858519, "grad_norm": 0.4135136604309082, "learning_rate": 8.199421256264543e-06, "loss": 0.3721, "step": 19181 }, { "epoch": 0.8802716717911064, "grad_norm": 0.4674646258354187, "learning_rate": 8.199232832547144e-06, "loss": 0.4342, "step": 19182 }, { "epoch": 0.8803175622963609, "grad_norm": 0.46967923641204834, "learning_rate": 8.199044401136597e-06, "loss": 0.4042, "step": 19183 }, { "epoch": 0.8803634528016153, "grad_norm": 0.4395766854286194, "learning_rate": 8.19885596203336e-06, "loss": 0.37, "step": 19184 }, { "epoch": 0.8804093433068698, "grad_norm": 0.426149845123291, "learning_rate": 8.19866751523788e-06, "loss": 0.3292, "step": 19185 }, { "epoch": 0.8804552338121243, "grad_norm": 0.43080008029937744, "learning_rate": 8.198479060750616e-06, "loss": 0.2938, "step": 19186 }, { "epoch": 0.8805011243173787, "grad_norm": 0.4458032250404358, "learning_rate": 8.19829059857202e-06, "loss": 0.3785, "step": 19187 }, { "epoch": 0.8805470148226332, "grad_norm": 0.5042188167572021, "learning_rate": 8.198102128702546e-06, "loss": 0.5322, "step": 19188 }, { "epoch": 0.8805929053278877, "grad_norm": 0.41657865047454834, "learning_rate": 8.197913651142643e-06, "loss": 0.3355, "step": 19189 }, { "epoch": 0.8806387958331421, "grad_norm": 0.4787415564060211, "learning_rate": 8.197725165892768e-06, "loss": 0.4486, "step": 19190 }, { "epoch": 0.8806846863383966, "grad_norm": 0.4416230618953705, "learning_rate": 8.197536672953374e-06, "loss": 0.3629, "step": 19191 }, { "epoch": 0.8807305768436511, "grad_norm": 0.42962655425071716, "learning_rate": 8.197348172324913e-06, "loss": 0.3845, "step": 19192 }, { "epoch": 0.8807764673489055, "grad_norm": 0.4953404366970062, "learning_rate": 8.197159664007838e-06, "loss": 0.4766, "step": 19193 }, { "epoch": 0.88082235785416, "grad_norm": 0.43361860513687134, "learning_rate": 8.196971148002604e-06, "loss": 0.3509, "step": 19194 }, { "epoch": 0.8808682483594145, "grad_norm": 0.43758490681648254, "learning_rate": 8.196782624309662e-06, "loss": 0.4059, "step": 19195 }, { "epoch": 0.8809141388646688, "grad_norm": 0.4441516101360321, "learning_rate": 8.19659409292947e-06, "loss": 0.4421, "step": 19196 }, { "epoch": 0.8809600293699233, "grad_norm": 0.49107858538627625, "learning_rate": 8.196405553862474e-06, "loss": 0.4542, "step": 19197 }, { "epoch": 0.8810059198751778, "grad_norm": 0.45045992732048035, "learning_rate": 8.196217007109133e-06, "loss": 0.3674, "step": 19198 }, { "epoch": 0.8810518103804323, "grad_norm": 0.41383570432662964, "learning_rate": 8.196028452669898e-06, "loss": 0.3509, "step": 19199 }, { "epoch": 0.8810977008856867, "grad_norm": 0.47147536277770996, "learning_rate": 8.195839890545225e-06, "loss": 0.4257, "step": 19200 }, { "epoch": 0.8811435913909412, "grad_norm": 0.43871334195137024, "learning_rate": 8.195651320735564e-06, "loss": 0.3667, "step": 19201 }, { "epoch": 0.8811894818961957, "grad_norm": 0.4312410056591034, "learning_rate": 8.195462743241373e-06, "loss": 0.3377, "step": 19202 }, { "epoch": 0.8812353724014501, "grad_norm": 0.4550964832305908, "learning_rate": 8.1952741580631e-06, "loss": 0.4604, "step": 19203 }, { "epoch": 0.8812812629067046, "grad_norm": 0.47329220175743103, "learning_rate": 8.195085565201202e-06, "loss": 0.4485, "step": 19204 }, { "epoch": 0.8813271534119591, "grad_norm": 0.43617936968803406, "learning_rate": 8.194896964656131e-06, "loss": 0.375, "step": 19205 }, { "epoch": 0.8813730439172135, "grad_norm": 0.44242623448371887, "learning_rate": 8.194708356428342e-06, "loss": 0.3909, "step": 19206 }, { "epoch": 0.881418934422468, "grad_norm": 0.45489150285720825, "learning_rate": 8.194519740518288e-06, "loss": 0.3533, "step": 19207 }, { "epoch": 0.8814648249277225, "grad_norm": 0.4922693073749542, "learning_rate": 8.194331116926422e-06, "loss": 0.4515, "step": 19208 }, { "epoch": 0.8815107154329769, "grad_norm": 0.47709330916404724, "learning_rate": 8.194142485653196e-06, "loss": 0.4943, "step": 19209 }, { "epoch": 0.8815566059382314, "grad_norm": 0.42918339371681213, "learning_rate": 8.193953846699067e-06, "loss": 0.3616, "step": 19210 }, { "epoch": 0.8816024964434859, "grad_norm": 0.43919846415519714, "learning_rate": 8.193765200064488e-06, "loss": 0.3529, "step": 19211 }, { "epoch": 0.8816483869487403, "grad_norm": 0.4629294276237488, "learning_rate": 8.19357654574991e-06, "loss": 0.3806, "step": 19212 }, { "epoch": 0.8816942774539948, "grad_norm": 0.4489581882953644, "learning_rate": 8.193387883755788e-06, "loss": 0.3347, "step": 19213 }, { "epoch": 0.8817401679592493, "grad_norm": 0.5061411261558533, "learning_rate": 8.193199214082579e-06, "loss": 0.4636, "step": 19214 }, { "epoch": 0.8817860584645036, "grad_norm": 0.46993300318717957, "learning_rate": 8.19301053673073e-06, "loss": 0.4459, "step": 19215 }, { "epoch": 0.8818319489697581, "grad_norm": 0.45036575198173523, "learning_rate": 8.1928218517007e-06, "loss": 0.4146, "step": 19216 }, { "epoch": 0.8818778394750126, "grad_norm": 0.47681519389152527, "learning_rate": 8.192633158992941e-06, "loss": 0.4183, "step": 19217 }, { "epoch": 0.8819237299802671, "grad_norm": 0.4516066312789917, "learning_rate": 8.192444458607907e-06, "loss": 0.3764, "step": 19218 }, { "epoch": 0.8819696204855215, "grad_norm": 0.4285201132297516, "learning_rate": 8.192255750546052e-06, "loss": 0.3258, "step": 19219 }, { "epoch": 0.882015510990776, "grad_norm": 0.47717711329460144, "learning_rate": 8.192067034807828e-06, "loss": 0.3776, "step": 19220 }, { "epoch": 0.8820614014960305, "grad_norm": 0.44581788778305054, "learning_rate": 8.191878311393693e-06, "loss": 0.3441, "step": 19221 }, { "epoch": 0.8821072920012849, "grad_norm": 0.4888061583042145, "learning_rate": 8.191689580304096e-06, "loss": 0.4792, "step": 19222 }, { "epoch": 0.8821531825065394, "grad_norm": 0.44188132882118225, "learning_rate": 8.191500841539492e-06, "loss": 0.3679, "step": 19223 }, { "epoch": 0.8821990730117939, "grad_norm": 0.4308532178401947, "learning_rate": 8.191312095100336e-06, "loss": 0.3444, "step": 19224 }, { "epoch": 0.8822449635170483, "grad_norm": 0.48385700583457947, "learning_rate": 8.191123340987084e-06, "loss": 0.4421, "step": 19225 }, { "epoch": 0.8822908540223028, "grad_norm": 0.4256320893764496, "learning_rate": 8.190934579200187e-06, "loss": 0.3103, "step": 19226 }, { "epoch": 0.8823367445275573, "grad_norm": 0.47393524646759033, "learning_rate": 8.190745809740097e-06, "loss": 0.398, "step": 19227 }, { "epoch": 0.8823826350328117, "grad_norm": 0.4953345060348511, "learning_rate": 8.190557032607272e-06, "loss": 0.4916, "step": 19228 }, { "epoch": 0.8824285255380662, "grad_norm": 0.5270404815673828, "learning_rate": 8.190368247802165e-06, "loss": 0.5274, "step": 19229 }, { "epoch": 0.8824744160433207, "grad_norm": 0.4783361852169037, "learning_rate": 8.190179455325228e-06, "loss": 0.3336, "step": 19230 }, { "epoch": 0.882520306548575, "grad_norm": 0.6263243556022644, "learning_rate": 8.189990655176916e-06, "loss": 0.3912, "step": 19231 }, { "epoch": 0.8825661970538295, "grad_norm": 0.4723162055015564, "learning_rate": 8.189801847357685e-06, "loss": 0.4188, "step": 19232 }, { "epoch": 0.882612087559084, "grad_norm": 0.47945454716682434, "learning_rate": 8.189613031867986e-06, "loss": 0.4809, "step": 19233 }, { "epoch": 0.8826579780643384, "grad_norm": 0.4349053204059601, "learning_rate": 8.189424208708275e-06, "loss": 0.3721, "step": 19234 }, { "epoch": 0.8827038685695929, "grad_norm": 0.46414634585380554, "learning_rate": 8.189235377879004e-06, "loss": 0.3714, "step": 19235 }, { "epoch": 0.8827497590748474, "grad_norm": 0.5051312446594238, "learning_rate": 8.18904653938063e-06, "loss": 0.4797, "step": 19236 }, { "epoch": 0.8827956495801019, "grad_norm": 0.4527251422405243, "learning_rate": 8.188857693213605e-06, "loss": 0.4054, "step": 19237 }, { "epoch": 0.8828415400853563, "grad_norm": 0.4478316009044647, "learning_rate": 8.188668839378383e-06, "loss": 0.3223, "step": 19238 }, { "epoch": 0.8828874305906108, "grad_norm": 0.40648671984672546, "learning_rate": 8.188479977875421e-06, "loss": 0.2897, "step": 19239 }, { "epoch": 0.8829333210958653, "grad_norm": 0.4584740996360779, "learning_rate": 8.188291108705169e-06, "loss": 0.3973, "step": 19240 }, { "epoch": 0.8829792116011197, "grad_norm": 0.4096563458442688, "learning_rate": 8.188102231868082e-06, "loss": 0.3115, "step": 19241 }, { "epoch": 0.8830251021063742, "grad_norm": 0.4632634222507477, "learning_rate": 8.187913347364617e-06, "loss": 0.3664, "step": 19242 }, { "epoch": 0.8830709926116287, "grad_norm": 0.44173023104667664, "learning_rate": 8.187724455195227e-06, "loss": 0.3757, "step": 19243 }, { "epoch": 0.8831168831168831, "grad_norm": 0.4560413956642151, "learning_rate": 8.187535555360365e-06, "loss": 0.403, "step": 19244 }, { "epoch": 0.8831627736221376, "grad_norm": 0.44466885924339294, "learning_rate": 8.187346647860486e-06, "loss": 0.3713, "step": 19245 }, { "epoch": 0.8832086641273921, "grad_norm": 0.4600166976451874, "learning_rate": 8.187157732696045e-06, "loss": 0.3456, "step": 19246 }, { "epoch": 0.8832545546326465, "grad_norm": 0.4309530258178711, "learning_rate": 8.186968809867495e-06, "loss": 0.4007, "step": 19247 }, { "epoch": 0.883300445137901, "grad_norm": 0.44089317321777344, "learning_rate": 8.186779879375291e-06, "loss": 0.3504, "step": 19248 }, { "epoch": 0.8833463356431555, "grad_norm": 0.4826483428478241, "learning_rate": 8.186590941219887e-06, "loss": 0.4684, "step": 19249 }, { "epoch": 0.8833922261484098, "grad_norm": 0.45807716250419617, "learning_rate": 8.186401995401738e-06, "loss": 0.4353, "step": 19250 }, { "epoch": 0.8834381166536643, "grad_norm": 0.5067712664604187, "learning_rate": 8.186213041921299e-06, "loss": 0.4752, "step": 19251 }, { "epoch": 0.8834840071589188, "grad_norm": 0.4552050530910492, "learning_rate": 8.18602408077902e-06, "loss": 0.4034, "step": 19252 }, { "epoch": 0.8835298976641733, "grad_norm": 0.45308005809783936, "learning_rate": 8.18583511197536e-06, "loss": 0.3748, "step": 19253 }, { "epoch": 0.8835757881694277, "grad_norm": 0.43959176540374756, "learning_rate": 8.185646135510771e-06, "loss": 0.381, "step": 19254 }, { "epoch": 0.8836216786746822, "grad_norm": 0.4747507870197296, "learning_rate": 8.18545715138571e-06, "loss": 0.4829, "step": 19255 }, { "epoch": 0.8836675691799367, "grad_norm": 0.4480806291103363, "learning_rate": 8.18526815960063e-06, "loss": 0.3869, "step": 19256 }, { "epoch": 0.8837134596851911, "grad_norm": 0.43594253063201904, "learning_rate": 8.185079160155987e-06, "loss": 0.3535, "step": 19257 }, { "epoch": 0.8837593501904456, "grad_norm": 0.5030718445777893, "learning_rate": 8.18489015305223e-06, "loss": 0.5101, "step": 19258 }, { "epoch": 0.8838052406957001, "grad_norm": 0.4753264784812927, "learning_rate": 8.184701138289822e-06, "loss": 0.4829, "step": 19259 }, { "epoch": 0.8838511312009545, "grad_norm": 0.4426402449607849, "learning_rate": 8.184512115869207e-06, "loss": 0.3486, "step": 19260 }, { "epoch": 0.883897021706209, "grad_norm": 0.47613900899887085, "learning_rate": 8.18432308579085e-06, "loss": 0.4292, "step": 19261 }, { "epoch": 0.8839429122114635, "grad_norm": 0.44666293263435364, "learning_rate": 8.1841340480552e-06, "loss": 0.3956, "step": 19262 }, { "epoch": 0.8839888027167179, "grad_norm": 0.42694327235221863, "learning_rate": 8.18394500266271e-06, "loss": 0.359, "step": 19263 }, { "epoch": 0.8840346932219724, "grad_norm": 0.46381524205207825, "learning_rate": 8.183755949613841e-06, "loss": 0.3994, "step": 19264 }, { "epoch": 0.8840805837272269, "grad_norm": 0.471210241317749, "learning_rate": 8.183566888909043e-06, "loss": 0.4526, "step": 19265 }, { "epoch": 0.8841264742324813, "grad_norm": 0.448869526386261, "learning_rate": 8.183377820548767e-06, "loss": 0.4297, "step": 19266 }, { "epoch": 0.8841723647377357, "grad_norm": 0.43165886402130127, "learning_rate": 8.183188744533477e-06, "loss": 0.3601, "step": 19267 }, { "epoch": 0.8842182552429902, "grad_norm": 0.4133877456188202, "learning_rate": 8.182999660863622e-06, "loss": 0.3198, "step": 19268 }, { "epoch": 0.8842641457482446, "grad_norm": 0.4343601167201996, "learning_rate": 8.182810569539656e-06, "loss": 0.3786, "step": 19269 }, { "epoch": 0.8843100362534991, "grad_norm": 0.45204848051071167, "learning_rate": 8.182621470562035e-06, "loss": 0.3613, "step": 19270 }, { "epoch": 0.8843559267587536, "grad_norm": 0.44887587428092957, "learning_rate": 8.182432363931215e-06, "loss": 0.3869, "step": 19271 }, { "epoch": 0.8844018172640081, "grad_norm": 0.4319516122341156, "learning_rate": 8.18224324964765e-06, "loss": 0.395, "step": 19272 }, { "epoch": 0.8844477077692625, "grad_norm": 0.4634430408477783, "learning_rate": 8.182054127711794e-06, "loss": 0.4075, "step": 19273 }, { "epoch": 0.884493598274517, "grad_norm": 0.4933769404888153, "learning_rate": 8.181864998124101e-06, "loss": 0.528, "step": 19274 }, { "epoch": 0.8845394887797715, "grad_norm": 0.47723472118377686, "learning_rate": 8.181675860885026e-06, "loss": 0.3863, "step": 19275 }, { "epoch": 0.8845853792850259, "grad_norm": 0.4558815062046051, "learning_rate": 8.181486715995027e-06, "loss": 0.3617, "step": 19276 }, { "epoch": 0.8846312697902804, "grad_norm": 0.4399058520793915, "learning_rate": 8.181297563454556e-06, "loss": 0.3429, "step": 19277 }, { "epoch": 0.8846771602955349, "grad_norm": 0.4488939642906189, "learning_rate": 8.181108403264067e-06, "loss": 0.3535, "step": 19278 }, { "epoch": 0.8847230508007893, "grad_norm": 0.4201013147830963, "learning_rate": 8.180919235424019e-06, "loss": 0.3529, "step": 19279 }, { "epoch": 0.8847689413060438, "grad_norm": 0.4374241530895233, "learning_rate": 8.180730059934862e-06, "loss": 0.3702, "step": 19280 }, { "epoch": 0.8848148318112983, "grad_norm": 0.4302213788032532, "learning_rate": 8.180540876797053e-06, "loss": 0.344, "step": 19281 }, { "epoch": 0.8848607223165527, "grad_norm": 0.42838823795318604, "learning_rate": 8.180351686011047e-06, "loss": 0.3085, "step": 19282 }, { "epoch": 0.8849066128218072, "grad_norm": 0.42228570580482483, "learning_rate": 8.1801624875773e-06, "loss": 0.3306, "step": 19283 }, { "epoch": 0.8849525033270617, "grad_norm": 0.44273045659065247, "learning_rate": 8.179973281496264e-06, "loss": 0.4385, "step": 19284 }, { "epoch": 0.884998393832316, "grad_norm": 0.5049851536750793, "learning_rate": 8.1797840677684e-06, "loss": 0.5346, "step": 19285 }, { "epoch": 0.8850442843375705, "grad_norm": 0.43154647946357727, "learning_rate": 8.179594846394155e-06, "loss": 0.3589, "step": 19286 }, { "epoch": 0.885090174842825, "grad_norm": 0.397867351770401, "learning_rate": 8.17940561737399e-06, "loss": 0.2926, "step": 19287 }, { "epoch": 0.8851360653480795, "grad_norm": 0.4596880376338959, "learning_rate": 8.179216380708357e-06, "loss": 0.4383, "step": 19288 }, { "epoch": 0.8851819558533339, "grad_norm": 0.47491440176963806, "learning_rate": 8.179027136397712e-06, "loss": 0.4078, "step": 19289 }, { "epoch": 0.8852278463585884, "grad_norm": 0.4638192057609558, "learning_rate": 8.17883788444251e-06, "loss": 0.4266, "step": 19290 }, { "epoch": 0.8852737368638429, "grad_norm": 0.43218016624450684, "learning_rate": 8.178648624843206e-06, "loss": 0.3714, "step": 19291 }, { "epoch": 0.8853196273690973, "grad_norm": 0.47718334197998047, "learning_rate": 8.178459357600255e-06, "loss": 0.413, "step": 19292 }, { "epoch": 0.8853655178743518, "grad_norm": 0.44189852476119995, "learning_rate": 8.178270082714113e-06, "loss": 0.3808, "step": 19293 }, { "epoch": 0.8854114083796063, "grad_norm": 0.4517497718334198, "learning_rate": 8.178080800185235e-06, "loss": 0.372, "step": 19294 }, { "epoch": 0.8854572988848607, "grad_norm": 0.48035383224487305, "learning_rate": 8.177891510014076e-06, "loss": 0.3876, "step": 19295 }, { "epoch": 0.8855031893901152, "grad_norm": 0.4711470603942871, "learning_rate": 8.17770221220109e-06, "loss": 0.4336, "step": 19296 }, { "epoch": 0.8855490798953697, "grad_norm": 0.4574022591114044, "learning_rate": 8.177512906746732e-06, "loss": 0.396, "step": 19297 }, { "epoch": 0.8855949704006241, "grad_norm": 0.41939762234687805, "learning_rate": 8.177323593651459e-06, "loss": 0.3193, "step": 19298 }, { "epoch": 0.8856408609058786, "grad_norm": 0.46054959297180176, "learning_rate": 8.177134272915728e-06, "loss": 0.431, "step": 19299 }, { "epoch": 0.8856867514111331, "grad_norm": 0.45045262575149536, "learning_rate": 8.176944944539989e-06, "loss": 0.3818, "step": 19300 }, { "epoch": 0.8857326419163875, "grad_norm": 0.45295828580856323, "learning_rate": 8.1767556085247e-06, "loss": 0.3919, "step": 19301 }, { "epoch": 0.885778532421642, "grad_norm": 0.45955708622932434, "learning_rate": 8.176566264870319e-06, "loss": 0.4432, "step": 19302 }, { "epoch": 0.8858244229268964, "grad_norm": 0.4525637924671173, "learning_rate": 8.176376913577297e-06, "loss": 0.3716, "step": 19303 }, { "epoch": 0.8858703134321508, "grad_norm": 0.4529217779636383, "learning_rate": 8.17618755464609e-06, "loss": 0.4023, "step": 19304 }, { "epoch": 0.8859162039374053, "grad_norm": 0.4236811101436615, "learning_rate": 8.175998188077156e-06, "loss": 0.3336, "step": 19305 }, { "epoch": 0.8859620944426598, "grad_norm": 0.431275874376297, "learning_rate": 8.175808813870947e-06, "loss": 0.3596, "step": 19306 }, { "epoch": 0.8860079849479143, "grad_norm": 0.45622673630714417, "learning_rate": 8.175619432027922e-06, "loss": 0.3928, "step": 19307 }, { "epoch": 0.8860538754531687, "grad_norm": 0.4416138231754303, "learning_rate": 8.175430042548533e-06, "loss": 0.3979, "step": 19308 }, { "epoch": 0.8860997659584232, "grad_norm": 0.44979560375213623, "learning_rate": 8.175240645433238e-06, "loss": 0.3815, "step": 19309 }, { "epoch": 0.8861456564636777, "grad_norm": 0.48101869225502014, "learning_rate": 8.175051240682493e-06, "loss": 0.4039, "step": 19310 }, { "epoch": 0.8861915469689321, "grad_norm": 0.44577956199645996, "learning_rate": 8.174861828296748e-06, "loss": 0.3847, "step": 19311 }, { "epoch": 0.8862374374741866, "grad_norm": 0.44522202014923096, "learning_rate": 8.174672408276464e-06, "loss": 0.3657, "step": 19312 }, { "epoch": 0.8862833279794411, "grad_norm": 0.4311541020870209, "learning_rate": 8.174482980622096e-06, "loss": 0.3283, "step": 19313 }, { "epoch": 0.8863292184846955, "grad_norm": 0.49103447794914246, "learning_rate": 8.174293545334097e-06, "loss": 0.4119, "step": 19314 }, { "epoch": 0.88637510898995, "grad_norm": 0.5373789072036743, "learning_rate": 8.174104102412924e-06, "loss": 0.3826, "step": 19315 }, { "epoch": 0.8864209994952045, "grad_norm": 0.45798683166503906, "learning_rate": 8.173914651859033e-06, "loss": 0.3872, "step": 19316 }, { "epoch": 0.8864668900004589, "grad_norm": 0.46065694093704224, "learning_rate": 8.173725193672878e-06, "loss": 0.4294, "step": 19317 }, { "epoch": 0.8865127805057134, "grad_norm": 0.42420560121536255, "learning_rate": 8.173535727854917e-06, "loss": 0.3112, "step": 19318 }, { "epoch": 0.8865586710109679, "grad_norm": 0.43694329261779785, "learning_rate": 8.173346254405603e-06, "loss": 0.3574, "step": 19319 }, { "epoch": 0.8866045615162222, "grad_norm": 0.42598527669906616, "learning_rate": 8.173156773325392e-06, "loss": 0.3653, "step": 19320 }, { "epoch": 0.8866504520214767, "grad_norm": 0.43212082982063293, "learning_rate": 8.172967284614743e-06, "loss": 0.3699, "step": 19321 }, { "epoch": 0.8866963425267312, "grad_norm": 0.4443436563014984, "learning_rate": 8.172777788274109e-06, "loss": 0.4238, "step": 19322 }, { "epoch": 0.8867422330319856, "grad_norm": 0.4680856168270111, "learning_rate": 8.172588284303943e-06, "loss": 0.4438, "step": 19323 }, { "epoch": 0.8867881235372401, "grad_norm": 0.47539687156677246, "learning_rate": 8.172398772704704e-06, "loss": 0.4193, "step": 19324 }, { "epoch": 0.8868340140424946, "grad_norm": 0.4608498513698578, "learning_rate": 8.172209253476848e-06, "loss": 0.3983, "step": 19325 }, { "epoch": 0.8868799045477491, "grad_norm": 0.4740413427352905, "learning_rate": 8.172019726620832e-06, "loss": 0.4472, "step": 19326 }, { "epoch": 0.8869257950530035, "grad_norm": 0.44513195753097534, "learning_rate": 8.171830192137107e-06, "loss": 0.4111, "step": 19327 }, { "epoch": 0.886971685558258, "grad_norm": 0.44581103324890137, "learning_rate": 8.17164065002613e-06, "loss": 0.3915, "step": 19328 }, { "epoch": 0.8870175760635125, "grad_norm": 0.4642084240913391, "learning_rate": 8.171451100288363e-06, "loss": 0.3842, "step": 19329 }, { "epoch": 0.8870634665687669, "grad_norm": 0.4672383666038513, "learning_rate": 8.171261542924254e-06, "loss": 0.4206, "step": 19330 }, { "epoch": 0.8871093570740214, "grad_norm": 0.4160286486148834, "learning_rate": 8.171071977934262e-06, "loss": 0.3557, "step": 19331 }, { "epoch": 0.8871552475792759, "grad_norm": 0.43047216534614563, "learning_rate": 8.170882405318842e-06, "loss": 0.3295, "step": 19332 }, { "epoch": 0.8872011380845303, "grad_norm": 0.45906203985214233, "learning_rate": 8.170692825078452e-06, "loss": 0.4172, "step": 19333 }, { "epoch": 0.8872470285897848, "grad_norm": 0.4445382356643677, "learning_rate": 8.170503237213546e-06, "loss": 0.3602, "step": 19334 }, { "epoch": 0.8872929190950393, "grad_norm": 0.41604000329971313, "learning_rate": 8.170313641724581e-06, "loss": 0.3324, "step": 19335 }, { "epoch": 0.8873388096002937, "grad_norm": 0.42382022738456726, "learning_rate": 8.170124038612011e-06, "loss": 0.3639, "step": 19336 }, { "epoch": 0.8873847001055482, "grad_norm": 0.41887417435646057, "learning_rate": 8.169934427876295e-06, "loss": 0.3163, "step": 19337 }, { "epoch": 0.8874305906108026, "grad_norm": 0.4523906707763672, "learning_rate": 8.169744809517886e-06, "loss": 0.3908, "step": 19338 }, { "epoch": 0.887476481116057, "grad_norm": 0.4558180272579193, "learning_rate": 8.169555183537241e-06, "loss": 0.4401, "step": 19339 }, { "epoch": 0.8875223716213115, "grad_norm": 0.426932156085968, "learning_rate": 8.169365549934816e-06, "loss": 0.337, "step": 19340 }, { "epoch": 0.887568262126566, "grad_norm": 0.47371241450309753, "learning_rate": 8.169175908711067e-06, "loss": 0.4917, "step": 19341 }, { "epoch": 0.8876141526318205, "grad_norm": 0.4252172112464905, "learning_rate": 8.168986259866451e-06, "loss": 0.3109, "step": 19342 }, { "epoch": 0.8876600431370749, "grad_norm": 0.44239091873168945, "learning_rate": 8.168796603401423e-06, "loss": 0.4288, "step": 19343 }, { "epoch": 0.8877059336423294, "grad_norm": 0.42298561334609985, "learning_rate": 8.16860693931644e-06, "loss": 0.3796, "step": 19344 }, { "epoch": 0.8877518241475839, "grad_norm": 0.46961742639541626, "learning_rate": 8.168417267611955e-06, "loss": 0.4316, "step": 19345 }, { "epoch": 0.8877977146528383, "grad_norm": 0.49516335129737854, "learning_rate": 8.168227588288429e-06, "loss": 0.5119, "step": 19346 }, { "epoch": 0.8878436051580928, "grad_norm": 0.6207625269889832, "learning_rate": 8.168037901346314e-06, "loss": 0.4027, "step": 19347 }, { "epoch": 0.8878894956633473, "grad_norm": 0.42306700348854065, "learning_rate": 8.167848206786068e-06, "loss": 0.3537, "step": 19348 }, { "epoch": 0.8879353861686017, "grad_norm": 0.4330935776233673, "learning_rate": 8.167658504608147e-06, "loss": 0.3604, "step": 19349 }, { "epoch": 0.8879812766738562, "grad_norm": 0.47012266516685486, "learning_rate": 8.167468794813007e-06, "loss": 0.4203, "step": 19350 }, { "epoch": 0.8880271671791107, "grad_norm": 0.4753623604774475, "learning_rate": 8.167279077401105e-06, "loss": 0.4585, "step": 19351 }, { "epoch": 0.8880730576843651, "grad_norm": 0.44926005601882935, "learning_rate": 8.167089352372896e-06, "loss": 0.4219, "step": 19352 }, { "epoch": 0.8881189481896196, "grad_norm": 0.4491520822048187, "learning_rate": 8.166899619728837e-06, "loss": 0.402, "step": 19353 }, { "epoch": 0.8881648386948741, "grad_norm": 0.45438364148139954, "learning_rate": 8.166709879469382e-06, "loss": 0.4053, "step": 19354 }, { "epoch": 0.8882107292001284, "grad_norm": 0.47404855489730835, "learning_rate": 8.16652013159499e-06, "loss": 0.4175, "step": 19355 }, { "epoch": 0.8882566197053829, "grad_norm": 0.4858325719833374, "learning_rate": 8.166330376106117e-06, "loss": 0.4552, "step": 19356 }, { "epoch": 0.8883025102106374, "grad_norm": 0.43599143624305725, "learning_rate": 8.16614061300322e-06, "loss": 0.3832, "step": 19357 }, { "epoch": 0.8883484007158918, "grad_norm": 0.4565216898918152, "learning_rate": 8.165950842286752e-06, "loss": 0.4016, "step": 19358 }, { "epoch": 0.8883942912211463, "grad_norm": 0.4756777584552765, "learning_rate": 8.165761063957173e-06, "loss": 0.4104, "step": 19359 }, { "epoch": 0.8884401817264008, "grad_norm": 0.43310877680778503, "learning_rate": 8.165571278014937e-06, "loss": 0.3608, "step": 19360 }, { "epoch": 0.8884860722316553, "grad_norm": 0.46654167771339417, "learning_rate": 8.1653814844605e-06, "loss": 0.3933, "step": 19361 }, { "epoch": 0.8885319627369097, "grad_norm": 0.4520196318626404, "learning_rate": 8.165191683294321e-06, "loss": 0.4361, "step": 19362 }, { "epoch": 0.8885778532421642, "grad_norm": 0.450872540473938, "learning_rate": 8.165001874516854e-06, "loss": 0.4019, "step": 19363 }, { "epoch": 0.8886237437474187, "grad_norm": 0.4551583528518677, "learning_rate": 8.164812058128557e-06, "loss": 0.4125, "step": 19364 }, { "epoch": 0.8886696342526731, "grad_norm": 0.4707236886024475, "learning_rate": 8.164622234129886e-06, "loss": 0.4166, "step": 19365 }, { "epoch": 0.8887155247579276, "grad_norm": 0.42759400606155396, "learning_rate": 8.164432402521299e-06, "loss": 0.3469, "step": 19366 }, { "epoch": 0.8887614152631821, "grad_norm": 0.4595560133457184, "learning_rate": 8.164242563303248e-06, "loss": 0.3966, "step": 19367 }, { "epoch": 0.8888073057684365, "grad_norm": 0.4359815716743469, "learning_rate": 8.164052716476194e-06, "loss": 0.3524, "step": 19368 }, { "epoch": 0.888853196273691, "grad_norm": 0.5280541181564331, "learning_rate": 8.163862862040592e-06, "loss": 0.504, "step": 19369 }, { "epoch": 0.8888990867789455, "grad_norm": 0.47906383872032166, "learning_rate": 8.163672999996897e-06, "loss": 0.4936, "step": 19370 }, { "epoch": 0.8889449772841999, "grad_norm": 0.47506964206695557, "learning_rate": 8.163483130345566e-06, "loss": 0.3906, "step": 19371 }, { "epoch": 0.8889908677894544, "grad_norm": 0.45775508880615234, "learning_rate": 8.163293253087059e-06, "loss": 0.3548, "step": 19372 }, { "epoch": 0.8890367582947089, "grad_norm": 0.4755198061466217, "learning_rate": 8.16310336822183e-06, "loss": 0.441, "step": 19373 }, { "epoch": 0.8890826487999632, "grad_norm": 0.5096787214279175, "learning_rate": 8.162913475750334e-06, "loss": 0.588, "step": 19374 }, { "epoch": 0.8891285393052177, "grad_norm": 0.4467904269695282, "learning_rate": 8.16272357567303e-06, "loss": 0.4172, "step": 19375 }, { "epoch": 0.8891744298104722, "grad_norm": 0.4519994854927063, "learning_rate": 8.162533667990373e-06, "loss": 0.4157, "step": 19376 }, { "epoch": 0.8892203203157267, "grad_norm": 0.4626026451587677, "learning_rate": 8.162343752702822e-06, "loss": 0.447, "step": 19377 }, { "epoch": 0.8892662108209811, "grad_norm": 0.46387556195259094, "learning_rate": 8.162153829810832e-06, "loss": 0.3745, "step": 19378 }, { "epoch": 0.8893121013262356, "grad_norm": 0.47166651487350464, "learning_rate": 8.161963899314861e-06, "loss": 0.389, "step": 19379 }, { "epoch": 0.8893579918314901, "grad_norm": 0.46069812774658203, "learning_rate": 8.161773961215362e-06, "loss": 0.4462, "step": 19380 }, { "epoch": 0.8894038823367445, "grad_norm": 0.45364218950271606, "learning_rate": 8.161584015512798e-06, "loss": 0.4048, "step": 19381 }, { "epoch": 0.889449772841999, "grad_norm": 0.48208382725715637, "learning_rate": 8.16139406220762e-06, "loss": 0.4026, "step": 19382 }, { "epoch": 0.8894956633472535, "grad_norm": 0.4571487307548523, "learning_rate": 8.161204101300289e-06, "loss": 0.3607, "step": 19383 }, { "epoch": 0.8895415538525079, "grad_norm": 0.4381783902645111, "learning_rate": 8.161014132791258e-06, "loss": 0.3745, "step": 19384 }, { "epoch": 0.8895874443577624, "grad_norm": 0.44830700755119324, "learning_rate": 8.160824156680987e-06, "loss": 0.4434, "step": 19385 }, { "epoch": 0.8896333348630169, "grad_norm": 0.44642579555511475, "learning_rate": 8.16063417296993e-06, "loss": 0.3977, "step": 19386 }, { "epoch": 0.8896792253682713, "grad_norm": 0.6056147217750549, "learning_rate": 8.160444181658546e-06, "loss": 0.3896, "step": 19387 }, { "epoch": 0.8897251158735258, "grad_norm": 0.4642273187637329, "learning_rate": 8.16025418274729e-06, "loss": 0.4185, "step": 19388 }, { "epoch": 0.8897710063787803, "grad_norm": 0.46919429302215576, "learning_rate": 8.160064176236623e-06, "loss": 0.377, "step": 19389 }, { "epoch": 0.8898168968840346, "grad_norm": 0.41915735602378845, "learning_rate": 8.159874162126997e-06, "loss": 0.3309, "step": 19390 }, { "epoch": 0.8898627873892891, "grad_norm": 0.4426107406616211, "learning_rate": 8.159684140418871e-06, "loss": 0.3477, "step": 19391 }, { "epoch": 0.8899086778945436, "grad_norm": 0.45022663474082947, "learning_rate": 8.159494111112702e-06, "loss": 0.3971, "step": 19392 }, { "epoch": 0.889954568399798, "grad_norm": 0.4152921736240387, "learning_rate": 8.159304074208947e-06, "loss": 0.3506, "step": 19393 }, { "epoch": 0.8900004589050525, "grad_norm": 0.46691566705703735, "learning_rate": 8.159114029708062e-06, "loss": 0.4117, "step": 19394 }, { "epoch": 0.890046349410307, "grad_norm": 0.47423386573791504, "learning_rate": 8.158923977610507e-06, "loss": 0.4652, "step": 19395 }, { "epoch": 0.8900922399155615, "grad_norm": 0.4628556966781616, "learning_rate": 8.158733917916734e-06, "loss": 0.3962, "step": 19396 }, { "epoch": 0.8901381304208159, "grad_norm": 0.4876999258995056, "learning_rate": 8.158543850627205e-06, "loss": 0.4913, "step": 19397 }, { "epoch": 0.8901840209260704, "grad_norm": 0.463866263628006, "learning_rate": 8.158353775742373e-06, "loss": 0.4127, "step": 19398 }, { "epoch": 0.8902299114313249, "grad_norm": 0.47970008850097656, "learning_rate": 8.1581636932627e-06, "loss": 0.4333, "step": 19399 }, { "epoch": 0.8902758019365793, "grad_norm": 0.4780748188495636, "learning_rate": 8.157973603188636e-06, "loss": 0.4085, "step": 19400 }, { "epoch": 0.8903216924418338, "grad_norm": 0.43783771991729736, "learning_rate": 8.157783505520646e-06, "loss": 0.4089, "step": 19401 }, { "epoch": 0.8903675829470883, "grad_norm": 0.4221642315387726, "learning_rate": 8.15759340025918e-06, "loss": 0.3503, "step": 19402 }, { "epoch": 0.8904134734523427, "grad_norm": 0.3947789669036865, "learning_rate": 8.157403287404702e-06, "loss": 0.2945, "step": 19403 }, { "epoch": 0.8904593639575972, "grad_norm": 0.45561426877975464, "learning_rate": 8.157213166957662e-06, "loss": 0.3947, "step": 19404 }, { "epoch": 0.8905052544628517, "grad_norm": 0.44601961970329285, "learning_rate": 8.157023038918524e-06, "loss": 0.3466, "step": 19405 }, { "epoch": 0.8905511449681061, "grad_norm": 0.4656178057193756, "learning_rate": 8.15683290328774e-06, "loss": 0.3487, "step": 19406 }, { "epoch": 0.8905970354733606, "grad_norm": 0.45121946930885315, "learning_rate": 8.15664276006577e-06, "loss": 0.4242, "step": 19407 }, { "epoch": 0.890642925978615, "grad_norm": 0.442094087600708, "learning_rate": 8.15645260925307e-06, "loss": 0.3695, "step": 19408 }, { "epoch": 0.8906888164838694, "grad_norm": 0.485878050327301, "learning_rate": 8.156262450850098e-06, "loss": 0.4875, "step": 19409 }, { "epoch": 0.8907347069891239, "grad_norm": 0.48015186190605164, "learning_rate": 8.156072284857309e-06, "loss": 0.4757, "step": 19410 }, { "epoch": 0.8907805974943784, "grad_norm": 0.4578854441642761, "learning_rate": 8.155882111275164e-06, "loss": 0.441, "step": 19411 }, { "epoch": 0.8908264879996328, "grad_norm": 0.4316173195838928, "learning_rate": 8.155691930104119e-06, "loss": 0.3557, "step": 19412 }, { "epoch": 0.8908723785048873, "grad_norm": 0.4704018235206604, "learning_rate": 8.155501741344631e-06, "loss": 0.3957, "step": 19413 }, { "epoch": 0.8909182690101418, "grad_norm": 0.46602052450180054, "learning_rate": 8.155311544997156e-06, "loss": 0.4051, "step": 19414 }, { "epoch": 0.8909641595153963, "grad_norm": 0.4631572663784027, "learning_rate": 8.155121341062153e-06, "loss": 0.4095, "step": 19415 }, { "epoch": 0.8910100500206507, "grad_norm": 0.48925384879112244, "learning_rate": 8.15493112954008e-06, "loss": 0.4705, "step": 19416 }, { "epoch": 0.8910559405259052, "grad_norm": 0.4450492560863495, "learning_rate": 8.154740910431391e-06, "loss": 0.3583, "step": 19417 }, { "epoch": 0.8911018310311597, "grad_norm": 0.7268593907356262, "learning_rate": 8.154550683736548e-06, "loss": 0.4314, "step": 19418 }, { "epoch": 0.8911477215364141, "grad_norm": 0.41752228140830994, "learning_rate": 8.154360449456005e-06, "loss": 0.3139, "step": 19419 }, { "epoch": 0.8911936120416686, "grad_norm": 0.4441676437854767, "learning_rate": 8.154170207590222e-06, "loss": 0.4163, "step": 19420 }, { "epoch": 0.8912395025469231, "grad_norm": 0.4454962909221649, "learning_rate": 8.153979958139653e-06, "loss": 0.3932, "step": 19421 }, { "epoch": 0.8912853930521775, "grad_norm": 0.3958258032798767, "learning_rate": 8.153789701104759e-06, "loss": 0.2881, "step": 19422 }, { "epoch": 0.891331283557432, "grad_norm": 0.4647879898548126, "learning_rate": 8.153599436485996e-06, "loss": 0.4728, "step": 19423 }, { "epoch": 0.8913771740626865, "grad_norm": 0.48763689398765564, "learning_rate": 8.153409164283822e-06, "loss": 0.4135, "step": 19424 }, { "epoch": 0.8914230645679408, "grad_norm": 0.4693635404109955, "learning_rate": 8.153218884498694e-06, "loss": 0.3899, "step": 19425 }, { "epoch": 0.8914689550731953, "grad_norm": 0.48482662439346313, "learning_rate": 8.15302859713107e-06, "loss": 0.494, "step": 19426 }, { "epoch": 0.8915148455784498, "grad_norm": 0.4161026179790497, "learning_rate": 8.152838302181408e-06, "loss": 0.3518, "step": 19427 }, { "epoch": 0.8915607360837042, "grad_norm": 0.42444315552711487, "learning_rate": 8.152647999650164e-06, "loss": 0.3662, "step": 19428 }, { "epoch": 0.8916066265889587, "grad_norm": 0.47405731678009033, "learning_rate": 8.152457689537795e-06, "loss": 0.4703, "step": 19429 }, { "epoch": 0.8916525170942132, "grad_norm": 0.44718196988105774, "learning_rate": 8.152267371844761e-06, "loss": 0.375, "step": 19430 }, { "epoch": 0.8916984075994677, "grad_norm": 0.5276437997817993, "learning_rate": 8.15207704657152e-06, "loss": 0.5015, "step": 19431 }, { "epoch": 0.8917442981047221, "grad_norm": 0.45904842019081116, "learning_rate": 8.151886713718529e-06, "loss": 0.4428, "step": 19432 }, { "epoch": 0.8917901886099766, "grad_norm": 0.47445428371429443, "learning_rate": 8.151696373286243e-06, "loss": 0.4712, "step": 19433 }, { "epoch": 0.8918360791152311, "grad_norm": 0.42241159081459045, "learning_rate": 8.151506025275124e-06, "loss": 0.3146, "step": 19434 }, { "epoch": 0.8918819696204855, "grad_norm": 0.45344409346580505, "learning_rate": 8.151315669685626e-06, "loss": 0.3824, "step": 19435 }, { "epoch": 0.89192786012574, "grad_norm": 0.4522955119609833, "learning_rate": 8.151125306518209e-06, "loss": 0.3765, "step": 19436 }, { "epoch": 0.8919737506309945, "grad_norm": 0.46909478306770325, "learning_rate": 8.150934935773331e-06, "loss": 0.4037, "step": 19437 }, { "epoch": 0.8920196411362489, "grad_norm": 0.4537869393825531, "learning_rate": 8.15074455745145e-06, "loss": 0.4173, "step": 19438 }, { "epoch": 0.8920655316415034, "grad_norm": 0.46868228912353516, "learning_rate": 8.15055417155302e-06, "loss": 0.3929, "step": 19439 }, { "epoch": 0.8921114221467579, "grad_norm": 0.48121026158332825, "learning_rate": 8.150363778078503e-06, "loss": 0.4305, "step": 19440 }, { "epoch": 0.8921573126520123, "grad_norm": 0.46466583013534546, "learning_rate": 8.150173377028354e-06, "loss": 0.3833, "step": 19441 }, { "epoch": 0.8922032031572668, "grad_norm": 0.48855188488960266, "learning_rate": 8.149982968403033e-06, "loss": 0.4156, "step": 19442 }, { "epoch": 0.8922490936625213, "grad_norm": 0.4826533794403076, "learning_rate": 8.149792552202997e-06, "loss": 0.4471, "step": 19443 }, { "epoch": 0.8922949841677756, "grad_norm": 0.4371528625488281, "learning_rate": 8.149602128428707e-06, "loss": 0.3623, "step": 19444 }, { "epoch": 0.8923408746730301, "grad_norm": 0.4289834797382355, "learning_rate": 8.149411697080613e-06, "loss": 0.3436, "step": 19445 }, { "epoch": 0.8923867651782846, "grad_norm": 0.6618348360061646, "learning_rate": 8.149221258159182e-06, "loss": 0.5212, "step": 19446 }, { "epoch": 0.892432655683539, "grad_norm": 0.42819881439208984, "learning_rate": 8.149030811664866e-06, "loss": 0.3639, "step": 19447 }, { "epoch": 0.8924785461887935, "grad_norm": 0.46663379669189453, "learning_rate": 8.148840357598123e-06, "loss": 0.3956, "step": 19448 }, { "epoch": 0.892524436694048, "grad_norm": 0.43926116824150085, "learning_rate": 8.148649895959414e-06, "loss": 0.3862, "step": 19449 }, { "epoch": 0.8925703271993025, "grad_norm": 0.41980671882629395, "learning_rate": 8.148459426749198e-06, "loss": 0.299, "step": 19450 }, { "epoch": 0.8926162177045569, "grad_norm": 0.40325164794921875, "learning_rate": 8.148268949967929e-06, "loss": 0.3075, "step": 19451 }, { "epoch": 0.8926621082098114, "grad_norm": 0.46803274750709534, "learning_rate": 8.148078465616067e-06, "loss": 0.3607, "step": 19452 }, { "epoch": 0.8927079987150659, "grad_norm": 0.5105648636817932, "learning_rate": 8.147887973694068e-06, "loss": 0.3665, "step": 19453 }, { "epoch": 0.8927538892203203, "grad_norm": 0.4938548803329468, "learning_rate": 8.147697474202395e-06, "loss": 0.4717, "step": 19454 }, { "epoch": 0.8927997797255748, "grad_norm": 0.41242504119873047, "learning_rate": 8.147506967141502e-06, "loss": 0.29, "step": 19455 }, { "epoch": 0.8928456702308293, "grad_norm": 0.4445120096206665, "learning_rate": 8.147316452511846e-06, "loss": 0.3549, "step": 19456 }, { "epoch": 0.8928915607360837, "grad_norm": 0.48165270686149597, "learning_rate": 8.147125930313891e-06, "loss": 0.4379, "step": 19457 }, { "epoch": 0.8929374512413382, "grad_norm": 0.45806530117988586, "learning_rate": 8.146935400548088e-06, "loss": 0.4343, "step": 19458 }, { "epoch": 0.8929833417465927, "grad_norm": 0.4479197561740875, "learning_rate": 8.1467448632149e-06, "loss": 0.414, "step": 19459 }, { "epoch": 0.893029232251847, "grad_norm": 0.43394848704338074, "learning_rate": 8.146554318314785e-06, "loss": 0.3716, "step": 19460 }, { "epoch": 0.8930751227571015, "grad_norm": 0.4871610403060913, "learning_rate": 8.146363765848198e-06, "loss": 0.4521, "step": 19461 }, { "epoch": 0.893121013262356, "grad_norm": 0.4443134069442749, "learning_rate": 8.146173205815599e-06, "loss": 0.3725, "step": 19462 }, { "epoch": 0.8931669037676104, "grad_norm": 0.5022876858711243, "learning_rate": 8.145982638217449e-06, "loss": 0.471, "step": 19463 }, { "epoch": 0.8932127942728649, "grad_norm": 0.4829522967338562, "learning_rate": 8.1457920630542e-06, "loss": 0.4495, "step": 19464 }, { "epoch": 0.8932586847781194, "grad_norm": 0.4767495393753052, "learning_rate": 8.145601480326318e-06, "loss": 0.4627, "step": 19465 }, { "epoch": 0.8933045752833739, "grad_norm": 0.4510904550552368, "learning_rate": 8.145410890034254e-06, "loss": 0.357, "step": 19466 }, { "epoch": 0.8933504657886283, "grad_norm": 0.4517460763454437, "learning_rate": 8.145220292178471e-06, "loss": 0.3537, "step": 19467 }, { "epoch": 0.8933963562938828, "grad_norm": 0.4477836489677429, "learning_rate": 8.145029686759425e-06, "loss": 0.4395, "step": 19468 }, { "epoch": 0.8934422467991373, "grad_norm": 0.5164333581924438, "learning_rate": 8.144839073777575e-06, "loss": 0.4942, "step": 19469 }, { "epoch": 0.8934881373043917, "grad_norm": 0.4846258759498596, "learning_rate": 8.14464845323338e-06, "loss": 0.4961, "step": 19470 }, { "epoch": 0.8935340278096462, "grad_norm": 0.4388260543346405, "learning_rate": 8.144457825127298e-06, "loss": 0.3844, "step": 19471 }, { "epoch": 0.8935799183149007, "grad_norm": 0.4482676088809967, "learning_rate": 8.144267189459789e-06, "loss": 0.3695, "step": 19472 }, { "epoch": 0.8936258088201551, "grad_norm": 0.41031697392463684, "learning_rate": 8.144076546231307e-06, "loss": 0.3397, "step": 19473 }, { "epoch": 0.8936716993254096, "grad_norm": 0.4591888189315796, "learning_rate": 8.143885895442316e-06, "loss": 0.4345, "step": 19474 }, { "epoch": 0.8937175898306641, "grad_norm": 0.41118794679641724, "learning_rate": 8.14369523709327e-06, "loss": 0.2958, "step": 19475 }, { "epoch": 0.8937634803359185, "grad_norm": 0.4338220953941345, "learning_rate": 8.143504571184628e-06, "loss": 0.3604, "step": 19476 }, { "epoch": 0.893809370841173, "grad_norm": 0.47428327798843384, "learning_rate": 8.143313897716852e-06, "loss": 0.4571, "step": 19477 }, { "epoch": 0.8938552613464275, "grad_norm": 0.43253785371780396, "learning_rate": 8.143123216690396e-06, "loss": 0.389, "step": 19478 }, { "epoch": 0.8939011518516818, "grad_norm": 0.4552631974220276, "learning_rate": 8.142932528105722e-06, "loss": 0.3858, "step": 19479 }, { "epoch": 0.8939470423569363, "grad_norm": 0.5024164319038391, "learning_rate": 8.142741831963287e-06, "loss": 0.4098, "step": 19480 }, { "epoch": 0.8939929328621908, "grad_norm": 0.4136143624782562, "learning_rate": 8.14255112826355e-06, "loss": 0.3277, "step": 19481 }, { "epoch": 0.8940388233674452, "grad_norm": 0.4403443932533264, "learning_rate": 8.142360417006969e-06, "loss": 0.3789, "step": 19482 }, { "epoch": 0.8940847138726997, "grad_norm": 0.45116573572158813, "learning_rate": 8.142169698194002e-06, "loss": 0.3877, "step": 19483 }, { "epoch": 0.8941306043779542, "grad_norm": 0.45773056149482727, "learning_rate": 8.141978971825111e-06, "loss": 0.424, "step": 19484 }, { "epoch": 0.8941764948832087, "grad_norm": 0.46291065216064453, "learning_rate": 8.14178823790075e-06, "loss": 0.4208, "step": 19485 }, { "epoch": 0.8942223853884631, "grad_norm": 0.42325177788734436, "learning_rate": 8.14159749642138e-06, "loss": 0.3411, "step": 19486 }, { "epoch": 0.8942682758937176, "grad_norm": 0.45321425795555115, "learning_rate": 8.14140674738746e-06, "loss": 0.3633, "step": 19487 }, { "epoch": 0.8943141663989721, "grad_norm": 0.4999252259731293, "learning_rate": 8.14121599079945e-06, "loss": 0.4538, "step": 19488 }, { "epoch": 0.8943600569042265, "grad_norm": 0.4953620135784149, "learning_rate": 8.141025226657804e-06, "loss": 0.4249, "step": 19489 }, { "epoch": 0.894405947409481, "grad_norm": 0.44975546002388, "learning_rate": 8.140834454962985e-06, "loss": 0.4221, "step": 19490 }, { "epoch": 0.8944518379147355, "grad_norm": 0.4184361398220062, "learning_rate": 8.140643675715452e-06, "loss": 0.3573, "step": 19491 }, { "epoch": 0.8944977284199899, "grad_norm": 0.43070849776268005, "learning_rate": 8.14045288891566e-06, "loss": 0.3436, "step": 19492 }, { "epoch": 0.8945436189252444, "grad_norm": 0.46900585293769836, "learning_rate": 8.14026209456407e-06, "loss": 0.4992, "step": 19493 }, { "epoch": 0.8945895094304989, "grad_norm": 0.4533962607383728, "learning_rate": 8.140071292661141e-06, "loss": 0.3825, "step": 19494 }, { "epoch": 0.8946353999357533, "grad_norm": 0.45486292243003845, "learning_rate": 8.139880483207333e-06, "loss": 0.4242, "step": 19495 }, { "epoch": 0.8946812904410077, "grad_norm": 0.5144616961479187, "learning_rate": 8.139689666203101e-06, "loss": 0.4739, "step": 19496 }, { "epoch": 0.8947271809462622, "grad_norm": 0.42338499426841736, "learning_rate": 8.139498841648908e-06, "loss": 0.3798, "step": 19497 }, { "epoch": 0.8947730714515166, "grad_norm": 0.41857266426086426, "learning_rate": 8.13930800954521e-06, "loss": 0.3643, "step": 19498 }, { "epoch": 0.8948189619567711, "grad_norm": 0.5519826412200928, "learning_rate": 8.139117169892469e-06, "loss": 0.4356, "step": 19499 }, { "epoch": 0.8948648524620256, "grad_norm": 0.41876888275146484, "learning_rate": 8.13892632269114e-06, "loss": 0.3612, "step": 19500 }, { "epoch": 0.89491074296728, "grad_norm": 0.48882347345352173, "learning_rate": 8.138735467941684e-06, "loss": 0.4796, "step": 19501 }, { "epoch": 0.8949566334725345, "grad_norm": 0.4193541705608368, "learning_rate": 8.13854460564456e-06, "loss": 0.34, "step": 19502 }, { "epoch": 0.895002523977789, "grad_norm": 0.46662354469299316, "learning_rate": 8.138353735800228e-06, "loss": 0.454, "step": 19503 }, { "epoch": 0.8950484144830435, "grad_norm": 0.4273943603038788, "learning_rate": 8.138162858409144e-06, "loss": 0.3875, "step": 19504 }, { "epoch": 0.8950943049882979, "grad_norm": 0.4244268238544464, "learning_rate": 8.137971973471769e-06, "loss": 0.3485, "step": 19505 }, { "epoch": 0.8951401954935524, "grad_norm": 0.4576314687728882, "learning_rate": 8.137781080988564e-06, "loss": 0.4461, "step": 19506 }, { "epoch": 0.8951860859988069, "grad_norm": 0.4545513689517975, "learning_rate": 8.137590180959984e-06, "loss": 0.3799, "step": 19507 }, { "epoch": 0.8952319765040613, "grad_norm": 0.4554949700832367, "learning_rate": 8.13739927338649e-06, "loss": 0.3752, "step": 19508 }, { "epoch": 0.8952778670093158, "grad_norm": 0.4584297835826874, "learning_rate": 8.13720835826854e-06, "loss": 0.4076, "step": 19509 }, { "epoch": 0.8953237575145703, "grad_norm": 0.3998614549636841, "learning_rate": 8.137017435606595e-06, "loss": 0.3302, "step": 19510 }, { "epoch": 0.8953696480198247, "grad_norm": 0.4565545916557312, "learning_rate": 8.136826505401115e-06, "loss": 0.4467, "step": 19511 }, { "epoch": 0.8954155385250792, "grad_norm": 0.44657012820243835, "learning_rate": 8.136635567652555e-06, "loss": 0.3759, "step": 19512 }, { "epoch": 0.8954614290303337, "grad_norm": 0.4878428280353546, "learning_rate": 8.136444622361377e-06, "loss": 0.4024, "step": 19513 }, { "epoch": 0.895507319535588, "grad_norm": 0.4634716510772705, "learning_rate": 8.136253669528039e-06, "loss": 0.4205, "step": 19514 }, { "epoch": 0.8955532100408425, "grad_norm": 0.4810328185558319, "learning_rate": 8.136062709153001e-06, "loss": 0.4497, "step": 19515 }, { "epoch": 0.895599100546097, "grad_norm": 0.45394232869148254, "learning_rate": 8.135871741236723e-06, "loss": 0.3941, "step": 19516 }, { "epoch": 0.8956449910513514, "grad_norm": 0.4583016633987427, "learning_rate": 8.135680765779663e-06, "loss": 0.4446, "step": 19517 }, { "epoch": 0.8956908815566059, "grad_norm": 0.454923540353775, "learning_rate": 8.135489782782278e-06, "loss": 0.357, "step": 19518 }, { "epoch": 0.8957367720618604, "grad_norm": 0.45031121373176575, "learning_rate": 8.135298792245033e-06, "loss": 0.4097, "step": 19519 }, { "epoch": 0.8957826625671149, "grad_norm": 0.4369613230228424, "learning_rate": 8.135107794168381e-06, "loss": 0.3681, "step": 19520 }, { "epoch": 0.8958285530723693, "grad_norm": 0.42453238368034363, "learning_rate": 8.134916788552784e-06, "loss": 0.3637, "step": 19521 }, { "epoch": 0.8958744435776238, "grad_norm": 0.5763978958129883, "learning_rate": 8.134725775398704e-06, "loss": 0.3798, "step": 19522 }, { "epoch": 0.8959203340828783, "grad_norm": 0.4394003450870514, "learning_rate": 8.134534754706598e-06, "loss": 0.334, "step": 19523 }, { "epoch": 0.8959662245881327, "grad_norm": 0.46039608120918274, "learning_rate": 8.134343726476923e-06, "loss": 0.467, "step": 19524 }, { "epoch": 0.8960121150933872, "grad_norm": 0.4259471297264099, "learning_rate": 8.134152690710142e-06, "loss": 0.3353, "step": 19525 }, { "epoch": 0.8960580055986417, "grad_norm": 0.4664720594882965, "learning_rate": 8.133961647406715e-06, "loss": 0.3832, "step": 19526 }, { "epoch": 0.8961038961038961, "grad_norm": 0.46415975689888, "learning_rate": 8.133770596567095e-06, "loss": 0.412, "step": 19527 }, { "epoch": 0.8961497866091506, "grad_norm": 0.4015985429286957, "learning_rate": 8.133579538191748e-06, "loss": 0.3376, "step": 19528 }, { "epoch": 0.8961956771144051, "grad_norm": 0.4777730107307434, "learning_rate": 8.13338847228113e-06, "loss": 0.3748, "step": 19529 }, { "epoch": 0.8962415676196595, "grad_norm": 0.4471748173236847, "learning_rate": 8.133197398835704e-06, "loss": 0.3893, "step": 19530 }, { "epoch": 0.896287458124914, "grad_norm": 0.4751160144805908, "learning_rate": 8.133006317855925e-06, "loss": 0.4672, "step": 19531 }, { "epoch": 0.8963333486301684, "grad_norm": 0.4724672734737396, "learning_rate": 8.132815229342255e-06, "loss": 0.4506, "step": 19532 }, { "epoch": 0.8963792391354228, "grad_norm": 0.4228498041629791, "learning_rate": 8.132624133295155e-06, "loss": 0.3631, "step": 19533 }, { "epoch": 0.8964251296406773, "grad_norm": 0.45519793033599854, "learning_rate": 8.132433029715082e-06, "loss": 0.4204, "step": 19534 }, { "epoch": 0.8964710201459318, "grad_norm": 0.5106019973754883, "learning_rate": 8.132241918602493e-06, "loss": 0.5202, "step": 19535 }, { "epoch": 0.8965169106511862, "grad_norm": 0.48189252614974976, "learning_rate": 8.132050799957854e-06, "loss": 0.4086, "step": 19536 }, { "epoch": 0.8965628011564407, "grad_norm": 0.44232305884361267, "learning_rate": 8.13185967378162e-06, "loss": 0.3583, "step": 19537 }, { "epoch": 0.8966086916616952, "grad_norm": 0.43216806650161743, "learning_rate": 8.131668540074253e-06, "loss": 0.3812, "step": 19538 }, { "epoch": 0.8966545821669497, "grad_norm": 0.45896944403648376, "learning_rate": 8.13147739883621e-06, "loss": 0.4333, "step": 19539 }, { "epoch": 0.8967004726722041, "grad_norm": 0.4305015206336975, "learning_rate": 8.131286250067953e-06, "loss": 0.3465, "step": 19540 }, { "epoch": 0.8967463631774586, "grad_norm": 0.43023139238357544, "learning_rate": 8.131095093769941e-06, "loss": 0.3835, "step": 19541 }, { "epoch": 0.8967922536827131, "grad_norm": 0.41526398062705994, "learning_rate": 8.130903929942633e-06, "loss": 0.3219, "step": 19542 }, { "epoch": 0.8968381441879675, "grad_norm": 0.43843820691108704, "learning_rate": 8.13071275858649e-06, "loss": 0.3577, "step": 19543 }, { "epoch": 0.896884034693222, "grad_norm": 0.41350796818733215, "learning_rate": 8.13052157970197e-06, "loss": 0.347, "step": 19544 }, { "epoch": 0.8969299251984765, "grad_norm": 0.48723945021629333, "learning_rate": 8.130330393289533e-06, "loss": 0.4487, "step": 19545 }, { "epoch": 0.8969758157037309, "grad_norm": 0.448485791683197, "learning_rate": 8.130139199349642e-06, "loss": 0.3683, "step": 19546 }, { "epoch": 0.8970217062089854, "grad_norm": 0.48506516218185425, "learning_rate": 8.129947997882753e-06, "loss": 0.4672, "step": 19547 }, { "epoch": 0.8970675967142399, "grad_norm": 0.4289757311344147, "learning_rate": 8.129756788889325e-06, "loss": 0.3664, "step": 19548 }, { "epoch": 0.8971134872194942, "grad_norm": 0.4899705648422241, "learning_rate": 8.129565572369822e-06, "loss": 0.4229, "step": 19549 }, { "epoch": 0.8971593777247487, "grad_norm": 0.4408937990665436, "learning_rate": 8.129374348324701e-06, "loss": 0.3513, "step": 19550 }, { "epoch": 0.8972052682300032, "grad_norm": 0.4421949088573456, "learning_rate": 8.129183116754422e-06, "loss": 0.4231, "step": 19551 }, { "epoch": 0.8972511587352576, "grad_norm": 0.4801378846168518, "learning_rate": 8.128991877659444e-06, "loss": 0.4851, "step": 19552 }, { "epoch": 0.8972970492405121, "grad_norm": 0.4513093829154968, "learning_rate": 8.12880063104023e-06, "loss": 0.3491, "step": 19553 }, { "epoch": 0.8973429397457666, "grad_norm": 0.45260581374168396, "learning_rate": 8.128609376897237e-06, "loss": 0.3993, "step": 19554 }, { "epoch": 0.8973888302510211, "grad_norm": 0.45366472005844116, "learning_rate": 8.128418115230927e-06, "loss": 0.3746, "step": 19555 }, { "epoch": 0.8974347207562755, "grad_norm": 0.46830397844314575, "learning_rate": 8.128226846041758e-06, "loss": 0.4631, "step": 19556 }, { "epoch": 0.89748061126153, "grad_norm": 0.47571054100990295, "learning_rate": 8.12803556933019e-06, "loss": 0.4127, "step": 19557 }, { "epoch": 0.8975265017667845, "grad_norm": 0.4808880090713501, "learning_rate": 8.127844285096685e-06, "loss": 0.4326, "step": 19558 }, { "epoch": 0.8975723922720389, "grad_norm": 0.4478267431259155, "learning_rate": 8.127652993341701e-06, "loss": 0.3591, "step": 19559 }, { "epoch": 0.8976182827772934, "grad_norm": 0.46266263723373413, "learning_rate": 8.127461694065698e-06, "loss": 0.4416, "step": 19560 }, { "epoch": 0.8976641732825479, "grad_norm": 0.4565480053424835, "learning_rate": 8.127270387269137e-06, "loss": 0.3679, "step": 19561 }, { "epoch": 0.8977100637878023, "grad_norm": 0.4885430634021759, "learning_rate": 8.12707907295248e-06, "loss": 0.4122, "step": 19562 }, { "epoch": 0.8977559542930568, "grad_norm": 0.46203354001045227, "learning_rate": 8.126887751116181e-06, "loss": 0.3917, "step": 19563 }, { "epoch": 0.8978018447983113, "grad_norm": 0.45533332228660583, "learning_rate": 8.126696421760705e-06, "loss": 0.4326, "step": 19564 }, { "epoch": 0.8978477353035657, "grad_norm": 0.47094833850860596, "learning_rate": 8.126505084886514e-06, "loss": 0.3773, "step": 19565 }, { "epoch": 0.8978936258088202, "grad_norm": 0.4857780337333679, "learning_rate": 8.12631374049406e-06, "loss": 0.4352, "step": 19566 }, { "epoch": 0.8979395163140746, "grad_norm": 0.4782284200191498, "learning_rate": 8.126122388583812e-06, "loss": 0.389, "step": 19567 }, { "epoch": 0.897985406819329, "grad_norm": 0.47144559025764465, "learning_rate": 8.125931029156224e-06, "loss": 0.4197, "step": 19568 }, { "epoch": 0.8980312973245835, "grad_norm": 0.4282037913799286, "learning_rate": 8.12573966221176e-06, "loss": 0.3579, "step": 19569 }, { "epoch": 0.898077187829838, "grad_norm": 0.48832759261131287, "learning_rate": 8.12554828775088e-06, "loss": 0.4056, "step": 19570 }, { "epoch": 0.8981230783350924, "grad_norm": 0.44976314902305603, "learning_rate": 8.12535690577404e-06, "loss": 0.4303, "step": 19571 }, { "epoch": 0.8981689688403469, "grad_norm": 0.43278777599334717, "learning_rate": 8.125165516281703e-06, "loss": 0.3617, "step": 19572 }, { "epoch": 0.8982148593456014, "grad_norm": 0.477529376745224, "learning_rate": 8.124974119274331e-06, "loss": 0.4876, "step": 19573 }, { "epoch": 0.8982607498508559, "grad_norm": 0.40531647205352783, "learning_rate": 8.124782714752382e-06, "loss": 0.3093, "step": 19574 }, { "epoch": 0.8983066403561103, "grad_norm": 0.4100872278213501, "learning_rate": 8.124591302716316e-06, "loss": 0.3193, "step": 19575 }, { "epoch": 0.8983525308613648, "grad_norm": 0.43445339798927307, "learning_rate": 8.124399883166595e-06, "loss": 0.3474, "step": 19576 }, { "epoch": 0.8983984213666193, "grad_norm": 0.39190801978111267, "learning_rate": 8.124208456103677e-06, "loss": 0.2875, "step": 19577 }, { "epoch": 0.8984443118718737, "grad_norm": 0.45114728808403015, "learning_rate": 8.124017021528025e-06, "loss": 0.4313, "step": 19578 }, { "epoch": 0.8984902023771282, "grad_norm": 0.4255850315093994, "learning_rate": 8.123825579440098e-06, "loss": 0.3683, "step": 19579 }, { "epoch": 0.8985360928823827, "grad_norm": 0.4089559018611908, "learning_rate": 8.123634129840357e-06, "loss": 0.3592, "step": 19580 }, { "epoch": 0.8985819833876371, "grad_norm": 0.44748973846435547, "learning_rate": 8.12344267272926e-06, "loss": 0.407, "step": 19581 }, { "epoch": 0.8986278738928916, "grad_norm": 0.4420090317726135, "learning_rate": 8.12325120810727e-06, "loss": 0.3911, "step": 19582 }, { "epoch": 0.8986737643981461, "grad_norm": 0.4208831191062927, "learning_rate": 8.123059735974847e-06, "loss": 0.3505, "step": 19583 }, { "epoch": 0.8987196549034004, "grad_norm": 0.4403655230998993, "learning_rate": 8.12286825633245e-06, "loss": 0.3507, "step": 19584 }, { "epoch": 0.8987655454086549, "grad_norm": 0.49103429913520813, "learning_rate": 8.122676769180542e-06, "loss": 0.4367, "step": 19585 }, { "epoch": 0.8988114359139094, "grad_norm": 0.4869801700115204, "learning_rate": 8.12248527451958e-06, "loss": 0.4308, "step": 19586 }, { "epoch": 0.8988573264191638, "grad_norm": 0.4368007183074951, "learning_rate": 8.122293772350028e-06, "loss": 0.3526, "step": 19587 }, { "epoch": 0.8989032169244183, "grad_norm": 0.4613476097583771, "learning_rate": 8.122102262672344e-06, "loss": 0.3677, "step": 19588 }, { "epoch": 0.8989491074296728, "grad_norm": 0.4299507439136505, "learning_rate": 8.12191074548699e-06, "loss": 0.3715, "step": 19589 }, { "epoch": 0.8989949979349272, "grad_norm": 0.4662078619003296, "learning_rate": 8.121719220794426e-06, "loss": 0.4227, "step": 19590 }, { "epoch": 0.8990408884401817, "grad_norm": 0.4886345863342285, "learning_rate": 8.121527688595112e-06, "loss": 0.4282, "step": 19591 }, { "epoch": 0.8990867789454362, "grad_norm": 0.45164838433265686, "learning_rate": 8.121336148889508e-06, "loss": 0.3907, "step": 19592 }, { "epoch": 0.8991326694506907, "grad_norm": 0.4625115990638733, "learning_rate": 8.121144601678077e-06, "loss": 0.4151, "step": 19593 }, { "epoch": 0.8991785599559451, "grad_norm": 0.4364461898803711, "learning_rate": 8.120953046961279e-06, "loss": 0.4488, "step": 19594 }, { "epoch": 0.8992244504611996, "grad_norm": 0.4251542091369629, "learning_rate": 8.120761484739571e-06, "loss": 0.3757, "step": 19595 }, { "epoch": 0.8992703409664541, "grad_norm": 0.48554930090904236, "learning_rate": 8.120569915013421e-06, "loss": 0.4133, "step": 19596 }, { "epoch": 0.8993162314717085, "grad_norm": 0.47698280215263367, "learning_rate": 8.120378337783283e-06, "loss": 0.4238, "step": 19597 }, { "epoch": 0.899362121976963, "grad_norm": 0.4423588514328003, "learning_rate": 8.12018675304962e-06, "loss": 0.3888, "step": 19598 }, { "epoch": 0.8994080124822175, "grad_norm": 0.4697827398777008, "learning_rate": 8.119995160812893e-06, "loss": 0.4517, "step": 19599 }, { "epoch": 0.8994539029874719, "grad_norm": 0.47125673294067383, "learning_rate": 8.11980356107356e-06, "loss": 0.3779, "step": 19600 }, { "epoch": 0.8994997934927264, "grad_norm": 0.4564676284790039, "learning_rate": 8.119611953832087e-06, "loss": 0.3723, "step": 19601 }, { "epoch": 0.8995456839979808, "grad_norm": 0.44823724031448364, "learning_rate": 8.11942033908893e-06, "loss": 0.3204, "step": 19602 }, { "epoch": 0.8995915745032352, "grad_norm": 0.45878884196281433, "learning_rate": 8.11922871684455e-06, "loss": 0.3732, "step": 19603 }, { "epoch": 0.8996374650084897, "grad_norm": 0.4678589403629303, "learning_rate": 8.119037087099415e-06, "loss": 0.4272, "step": 19604 }, { "epoch": 0.8996833555137442, "grad_norm": 0.4269801676273346, "learning_rate": 8.118845449853976e-06, "loss": 0.3402, "step": 19605 }, { "epoch": 0.8997292460189986, "grad_norm": 0.45910999178886414, "learning_rate": 8.118653805108698e-06, "loss": 0.404, "step": 19606 }, { "epoch": 0.8997751365242531, "grad_norm": 0.4273187220096588, "learning_rate": 8.118462152864044e-06, "loss": 0.3369, "step": 19607 }, { "epoch": 0.8998210270295076, "grad_norm": 0.45106983184814453, "learning_rate": 8.118270493120472e-06, "loss": 0.3719, "step": 19608 }, { "epoch": 0.8998669175347621, "grad_norm": 0.4412156939506531, "learning_rate": 8.118078825878441e-06, "loss": 0.3556, "step": 19609 }, { "epoch": 0.8999128080400165, "grad_norm": 0.43864861130714417, "learning_rate": 8.117887151138418e-06, "loss": 0.3601, "step": 19610 }, { "epoch": 0.899958698545271, "grad_norm": 0.46257439255714417, "learning_rate": 8.11769546890086e-06, "loss": 0.3951, "step": 19611 }, { "epoch": 0.9000045890505255, "grad_norm": 0.46688273549079895, "learning_rate": 8.117503779166228e-06, "loss": 0.4378, "step": 19612 }, { "epoch": 0.9000504795557799, "grad_norm": 0.489102303981781, "learning_rate": 8.117312081934982e-06, "loss": 0.4875, "step": 19613 }, { "epoch": 0.9000963700610344, "grad_norm": 0.44962283968925476, "learning_rate": 8.117120377207585e-06, "loss": 0.3868, "step": 19614 }, { "epoch": 0.9001422605662889, "grad_norm": 0.47287270426750183, "learning_rate": 8.116928664984498e-06, "loss": 0.39, "step": 19615 }, { "epoch": 0.9001881510715433, "grad_norm": 0.43893179297447205, "learning_rate": 8.116736945266179e-06, "loss": 0.4042, "step": 19616 }, { "epoch": 0.9002340415767978, "grad_norm": 0.4542989730834961, "learning_rate": 8.116545218053093e-06, "loss": 0.4311, "step": 19617 }, { "epoch": 0.9002799320820523, "grad_norm": 0.5006599426269531, "learning_rate": 8.116353483345701e-06, "loss": 0.487, "step": 19618 }, { "epoch": 0.9003258225873066, "grad_norm": 0.434628963470459, "learning_rate": 8.11616174114446e-06, "loss": 0.3361, "step": 19619 }, { "epoch": 0.9003717130925611, "grad_norm": 0.4790666103363037, "learning_rate": 8.115969991449835e-06, "loss": 0.5053, "step": 19620 }, { "epoch": 0.9004176035978156, "grad_norm": 0.4567667245864868, "learning_rate": 8.115778234262284e-06, "loss": 0.3786, "step": 19621 }, { "epoch": 0.90046349410307, "grad_norm": 0.41192808747291565, "learning_rate": 8.115586469582271e-06, "loss": 0.3136, "step": 19622 }, { "epoch": 0.9005093846083245, "grad_norm": 0.4622694253921509, "learning_rate": 8.115394697410254e-06, "loss": 0.4316, "step": 19623 }, { "epoch": 0.900555275113579, "grad_norm": 0.4549066424369812, "learning_rate": 8.115202917746699e-06, "loss": 0.417, "step": 19624 }, { "epoch": 0.9006011656188334, "grad_norm": 0.46292001008987427, "learning_rate": 8.115011130592061e-06, "loss": 0.4331, "step": 19625 }, { "epoch": 0.9006470561240879, "grad_norm": 0.39980098605155945, "learning_rate": 8.114819335946807e-06, "loss": 0.3193, "step": 19626 }, { "epoch": 0.9006929466293424, "grad_norm": 0.439362108707428, "learning_rate": 8.114627533811394e-06, "loss": 0.383, "step": 19627 }, { "epoch": 0.9007388371345969, "grad_norm": 0.4720875918865204, "learning_rate": 8.114435724186287e-06, "loss": 0.3825, "step": 19628 }, { "epoch": 0.9007847276398513, "grad_norm": 0.4588710367679596, "learning_rate": 8.114243907071942e-06, "loss": 0.3452, "step": 19629 }, { "epoch": 0.9008306181451058, "grad_norm": 0.4508844316005707, "learning_rate": 8.114052082468825e-06, "loss": 0.3733, "step": 19630 }, { "epoch": 0.9008765086503603, "grad_norm": 0.4787546992301941, "learning_rate": 8.113860250377395e-06, "loss": 0.4559, "step": 19631 }, { "epoch": 0.9009223991556147, "grad_norm": 0.4768097400665283, "learning_rate": 8.113668410798114e-06, "loss": 0.4449, "step": 19632 }, { "epoch": 0.9009682896608692, "grad_norm": 0.5088914036750793, "learning_rate": 8.113476563731444e-06, "loss": 0.5128, "step": 19633 }, { "epoch": 0.9010141801661237, "grad_norm": 0.42240893840789795, "learning_rate": 8.113284709177844e-06, "loss": 0.3298, "step": 19634 }, { "epoch": 0.901060070671378, "grad_norm": 0.48385047912597656, "learning_rate": 8.113092847137778e-06, "loss": 0.4638, "step": 19635 }, { "epoch": 0.9011059611766326, "grad_norm": 0.49349191784858704, "learning_rate": 8.112900977611706e-06, "loss": 0.4078, "step": 19636 }, { "epoch": 0.901151851681887, "grad_norm": 0.4590332508087158, "learning_rate": 8.11270910060009e-06, "loss": 0.3944, "step": 19637 }, { "epoch": 0.9011977421871414, "grad_norm": 0.4321778416633606, "learning_rate": 8.112517216103391e-06, "loss": 0.3755, "step": 19638 }, { "epoch": 0.9012436326923959, "grad_norm": 0.4985816180706024, "learning_rate": 8.112325324122069e-06, "loss": 0.5117, "step": 19639 }, { "epoch": 0.9012895231976504, "grad_norm": 0.4458997845649719, "learning_rate": 8.112133424656587e-06, "loss": 0.3632, "step": 19640 }, { "epoch": 0.9013354137029048, "grad_norm": 0.4296756386756897, "learning_rate": 8.111941517707406e-06, "loss": 0.3431, "step": 19641 }, { "epoch": 0.9013813042081593, "grad_norm": 0.455159455537796, "learning_rate": 8.11174960327499e-06, "loss": 0.4295, "step": 19642 }, { "epoch": 0.9014271947134138, "grad_norm": 0.4792745113372803, "learning_rate": 8.111557681359796e-06, "loss": 0.4772, "step": 19643 }, { "epoch": 0.9014730852186683, "grad_norm": 0.4505153298377991, "learning_rate": 8.11136575196229e-06, "loss": 0.4169, "step": 19644 }, { "epoch": 0.9015189757239227, "grad_norm": 0.42460453510284424, "learning_rate": 8.111173815082931e-06, "loss": 0.3971, "step": 19645 }, { "epoch": 0.9015648662291772, "grad_norm": 0.4281695485115051, "learning_rate": 8.110981870722179e-06, "loss": 0.3691, "step": 19646 }, { "epoch": 0.9016107567344317, "grad_norm": 0.4597682058811188, "learning_rate": 8.1107899188805e-06, "loss": 0.43, "step": 19647 }, { "epoch": 0.9016566472396861, "grad_norm": 0.47610172629356384, "learning_rate": 8.11059795955835e-06, "loss": 0.5058, "step": 19648 }, { "epoch": 0.9017025377449406, "grad_norm": 0.4642077386379242, "learning_rate": 8.110405992756197e-06, "loss": 0.4283, "step": 19649 }, { "epoch": 0.9017484282501951, "grad_norm": 0.4710111916065216, "learning_rate": 8.110214018474497e-06, "loss": 0.4772, "step": 19650 }, { "epoch": 0.9017943187554495, "grad_norm": 0.44823944568634033, "learning_rate": 8.110022036713715e-06, "loss": 0.4001, "step": 19651 }, { "epoch": 0.901840209260704, "grad_norm": 0.46762052178382874, "learning_rate": 8.109830047474311e-06, "loss": 0.4389, "step": 19652 }, { "epoch": 0.9018860997659585, "grad_norm": 0.4464554786682129, "learning_rate": 8.109638050756747e-06, "loss": 0.3994, "step": 19653 }, { "epoch": 0.9019319902712128, "grad_norm": 0.42765629291534424, "learning_rate": 8.109446046561486e-06, "loss": 0.354, "step": 19654 }, { "epoch": 0.9019778807764673, "grad_norm": 0.47394561767578125, "learning_rate": 8.109254034888987e-06, "loss": 0.4081, "step": 19655 }, { "epoch": 0.9020237712817218, "grad_norm": 0.4481166899204254, "learning_rate": 8.109062015739715e-06, "loss": 0.37, "step": 19656 }, { "epoch": 0.9020696617869762, "grad_norm": 0.5158683657646179, "learning_rate": 8.108869989114128e-06, "loss": 0.5874, "step": 19657 }, { "epoch": 0.9021155522922307, "grad_norm": 0.4390256404876709, "learning_rate": 8.108677955012692e-06, "loss": 0.3885, "step": 19658 }, { "epoch": 0.9021614427974852, "grad_norm": 0.4856985807418823, "learning_rate": 8.108485913435866e-06, "loss": 0.4557, "step": 19659 }, { "epoch": 0.9022073333027396, "grad_norm": 0.4250340163707733, "learning_rate": 8.108293864384111e-06, "loss": 0.3444, "step": 19660 }, { "epoch": 0.9022532238079941, "grad_norm": 0.46024569869041443, "learning_rate": 8.108101807857893e-06, "loss": 0.4347, "step": 19661 }, { "epoch": 0.9022991143132486, "grad_norm": 0.4689312279224396, "learning_rate": 8.107909743857668e-06, "loss": 0.4106, "step": 19662 }, { "epoch": 0.9023450048185031, "grad_norm": 0.47165659070014954, "learning_rate": 8.107717672383903e-06, "loss": 0.3943, "step": 19663 }, { "epoch": 0.9023908953237575, "grad_norm": 0.43660423159599304, "learning_rate": 8.107525593437058e-06, "loss": 0.3986, "step": 19664 }, { "epoch": 0.902436785829012, "grad_norm": 0.4649445116519928, "learning_rate": 8.107333507017595e-06, "loss": 0.411, "step": 19665 }, { "epoch": 0.9024826763342665, "grad_norm": 0.4339834749698639, "learning_rate": 8.107141413125974e-06, "loss": 0.3468, "step": 19666 }, { "epoch": 0.9025285668395209, "grad_norm": 0.4421994686126709, "learning_rate": 8.10694931176266e-06, "loss": 0.3054, "step": 19667 }, { "epoch": 0.9025744573447754, "grad_norm": 0.4310256838798523, "learning_rate": 8.106757202928112e-06, "loss": 0.3524, "step": 19668 }, { "epoch": 0.9026203478500299, "grad_norm": 0.49119946360588074, "learning_rate": 8.106565086622796e-06, "loss": 0.4681, "step": 19669 }, { "epoch": 0.9026662383552843, "grad_norm": 0.44626155495643616, "learning_rate": 8.106372962847168e-06, "loss": 0.4243, "step": 19670 }, { "epoch": 0.9027121288605388, "grad_norm": 0.45187458395957947, "learning_rate": 8.106180831601695e-06, "loss": 0.3848, "step": 19671 }, { "epoch": 0.9027580193657933, "grad_norm": 0.4735959470272064, "learning_rate": 8.105988692886838e-06, "loss": 0.4805, "step": 19672 }, { "epoch": 0.9028039098710476, "grad_norm": 0.4806026518344879, "learning_rate": 8.105796546703057e-06, "loss": 0.4554, "step": 19673 }, { "epoch": 0.9028498003763021, "grad_norm": 0.4555499255657196, "learning_rate": 8.105604393050816e-06, "loss": 0.3798, "step": 19674 }, { "epoch": 0.9028956908815566, "grad_norm": 0.4304138422012329, "learning_rate": 8.105412231930579e-06, "loss": 0.3734, "step": 19675 }, { "epoch": 0.902941581386811, "grad_norm": 0.463528037071228, "learning_rate": 8.105220063342802e-06, "loss": 0.3765, "step": 19676 }, { "epoch": 0.9029874718920655, "grad_norm": 0.43964168429374695, "learning_rate": 8.105027887287953e-06, "loss": 0.3585, "step": 19677 }, { "epoch": 0.90303336239732, "grad_norm": 0.4342395067214966, "learning_rate": 8.104835703766492e-06, "loss": 0.3945, "step": 19678 }, { "epoch": 0.9030792529025744, "grad_norm": 0.43120867013931274, "learning_rate": 8.10464351277888e-06, "loss": 0.3402, "step": 19679 }, { "epoch": 0.9031251434078289, "grad_norm": 0.46068307757377625, "learning_rate": 8.104451314325581e-06, "loss": 0.3732, "step": 19680 }, { "epoch": 0.9031710339130834, "grad_norm": 0.4437250792980194, "learning_rate": 8.104259108407055e-06, "loss": 0.3487, "step": 19681 }, { "epoch": 0.9032169244183379, "grad_norm": 0.4706919193267822, "learning_rate": 8.104066895023769e-06, "loss": 0.4619, "step": 19682 }, { "epoch": 0.9032628149235923, "grad_norm": 0.6024381518363953, "learning_rate": 8.103874674176178e-06, "loss": 0.4279, "step": 19683 }, { "epoch": 0.9033087054288468, "grad_norm": 0.5033702850341797, "learning_rate": 8.10368244586475e-06, "loss": 0.479, "step": 19684 }, { "epoch": 0.9033545959341013, "grad_norm": 0.39673084020614624, "learning_rate": 8.103490210089943e-06, "loss": 0.3073, "step": 19685 }, { "epoch": 0.9034004864393557, "grad_norm": 0.5670091509819031, "learning_rate": 8.103297966852225e-06, "loss": 0.3645, "step": 19686 }, { "epoch": 0.9034463769446102, "grad_norm": 0.46902140974998474, "learning_rate": 8.103105716152053e-06, "loss": 0.4012, "step": 19687 }, { "epoch": 0.9034922674498647, "grad_norm": 0.4556429386138916, "learning_rate": 8.10291345798989e-06, "loss": 0.4015, "step": 19688 }, { "epoch": 0.903538157955119, "grad_norm": 0.4349612891674042, "learning_rate": 8.102721192366202e-06, "loss": 0.3649, "step": 19689 }, { "epoch": 0.9035840484603735, "grad_norm": 0.44740423560142517, "learning_rate": 8.102528919281447e-06, "loss": 0.3565, "step": 19690 }, { "epoch": 0.903629938965628, "grad_norm": 0.4618123471736908, "learning_rate": 8.102336638736089e-06, "loss": 0.3929, "step": 19691 }, { "epoch": 0.9036758294708824, "grad_norm": 0.4255295395851135, "learning_rate": 8.102144350730592e-06, "loss": 0.3454, "step": 19692 }, { "epoch": 0.9037217199761369, "grad_norm": 0.46345847845077515, "learning_rate": 8.101952055265415e-06, "loss": 0.3731, "step": 19693 }, { "epoch": 0.9037676104813914, "grad_norm": 0.4830007255077362, "learning_rate": 8.101759752341024e-06, "loss": 0.4272, "step": 19694 }, { "epoch": 0.9038135009866458, "grad_norm": 0.4533284306526184, "learning_rate": 8.101567441957879e-06, "loss": 0.4166, "step": 19695 }, { "epoch": 0.9038593914919003, "grad_norm": 0.47592705488204956, "learning_rate": 8.101375124116442e-06, "loss": 0.4573, "step": 19696 }, { "epoch": 0.9039052819971548, "grad_norm": 0.4929142892360687, "learning_rate": 8.10118279881718e-06, "loss": 0.4403, "step": 19697 }, { "epoch": 0.9039511725024093, "grad_norm": 0.4358735680580139, "learning_rate": 8.100990466060548e-06, "loss": 0.3767, "step": 19698 }, { "epoch": 0.9039970630076637, "grad_norm": 0.4009090065956116, "learning_rate": 8.100798125847015e-06, "loss": 0.3252, "step": 19699 }, { "epoch": 0.9040429535129182, "grad_norm": 0.4724183976650238, "learning_rate": 8.100605778177041e-06, "loss": 0.4651, "step": 19700 }, { "epoch": 0.9040888440181727, "grad_norm": 0.6317563652992249, "learning_rate": 8.100413423051088e-06, "loss": 0.3671, "step": 19701 }, { "epoch": 0.9041347345234271, "grad_norm": 0.4557824432849884, "learning_rate": 8.10022106046962e-06, "loss": 0.4204, "step": 19702 }, { "epoch": 0.9041806250286816, "grad_norm": 0.4439411759376526, "learning_rate": 8.1000286904331e-06, "loss": 0.4014, "step": 19703 }, { "epoch": 0.9042265155339361, "grad_norm": 0.435846209526062, "learning_rate": 8.099836312941987e-06, "loss": 0.3511, "step": 19704 }, { "epoch": 0.9042724060391905, "grad_norm": 0.47710445523262024, "learning_rate": 8.099643927996746e-06, "loss": 0.4292, "step": 19705 }, { "epoch": 0.904318296544445, "grad_norm": 0.41192251443862915, "learning_rate": 8.099451535597842e-06, "loss": 0.3224, "step": 19706 }, { "epoch": 0.9043641870496995, "grad_norm": 0.48495781421661377, "learning_rate": 8.099259135745732e-06, "loss": 0.4691, "step": 19707 }, { "epoch": 0.9044100775549538, "grad_norm": 0.4241655468940735, "learning_rate": 8.099066728440885e-06, "loss": 0.3095, "step": 19708 }, { "epoch": 0.9044559680602083, "grad_norm": 0.4737190902233124, "learning_rate": 8.09887431368376e-06, "loss": 0.4292, "step": 19709 }, { "epoch": 0.9045018585654628, "grad_norm": 0.4402461051940918, "learning_rate": 8.098681891474818e-06, "loss": 0.3813, "step": 19710 }, { "epoch": 0.9045477490707172, "grad_norm": 0.4946078956127167, "learning_rate": 8.098489461814527e-06, "loss": 0.4747, "step": 19711 }, { "epoch": 0.9045936395759717, "grad_norm": 0.4547879695892334, "learning_rate": 8.098297024703345e-06, "loss": 0.3686, "step": 19712 }, { "epoch": 0.9046395300812262, "grad_norm": 0.4980894923210144, "learning_rate": 8.098104580141736e-06, "loss": 0.4627, "step": 19713 }, { "epoch": 0.9046854205864806, "grad_norm": 0.43748903274536133, "learning_rate": 8.097912128130166e-06, "loss": 0.3812, "step": 19714 }, { "epoch": 0.9047313110917351, "grad_norm": 0.42651379108428955, "learning_rate": 8.097719668669092e-06, "loss": 0.3572, "step": 19715 }, { "epoch": 0.9047772015969896, "grad_norm": 0.4586358964443207, "learning_rate": 8.097527201758981e-06, "loss": 0.4143, "step": 19716 }, { "epoch": 0.9048230921022441, "grad_norm": 0.45888128876686096, "learning_rate": 8.097334727400295e-06, "loss": 0.4015, "step": 19717 }, { "epoch": 0.9048689826074985, "grad_norm": 0.503143846988678, "learning_rate": 8.097142245593497e-06, "loss": 0.4646, "step": 19718 }, { "epoch": 0.904914873112753, "grad_norm": 0.47953394055366516, "learning_rate": 8.096949756339047e-06, "loss": 0.5367, "step": 19719 }, { "epoch": 0.9049607636180075, "grad_norm": 0.4579585790634155, "learning_rate": 8.096757259637414e-06, "loss": 0.4313, "step": 19720 }, { "epoch": 0.9050066541232619, "grad_norm": 0.4499635696411133, "learning_rate": 8.096564755489053e-06, "loss": 0.3513, "step": 19721 }, { "epoch": 0.9050525446285164, "grad_norm": 0.4733886420726776, "learning_rate": 8.096372243894434e-06, "loss": 0.4272, "step": 19722 }, { "epoch": 0.9050984351337709, "grad_norm": 0.42385441064834595, "learning_rate": 8.096179724854016e-06, "loss": 0.3411, "step": 19723 }, { "epoch": 0.9051443256390252, "grad_norm": 0.46012550592422485, "learning_rate": 8.095987198368263e-06, "loss": 0.3899, "step": 19724 }, { "epoch": 0.9051902161442797, "grad_norm": 0.43229904770851135, "learning_rate": 8.095794664437638e-06, "loss": 0.3621, "step": 19725 }, { "epoch": 0.9052361066495342, "grad_norm": 0.4131454825401306, "learning_rate": 8.095602123062604e-06, "loss": 0.337, "step": 19726 }, { "epoch": 0.9052819971547886, "grad_norm": 0.4104025363922119, "learning_rate": 8.095409574243625e-06, "loss": 0.3026, "step": 19727 }, { "epoch": 0.9053278876600431, "grad_norm": 0.44264572858810425, "learning_rate": 8.095217017981163e-06, "loss": 0.3369, "step": 19728 }, { "epoch": 0.9053737781652976, "grad_norm": 0.44883832335472107, "learning_rate": 8.095024454275679e-06, "loss": 0.4187, "step": 19729 }, { "epoch": 0.905419668670552, "grad_norm": 0.486637681722641, "learning_rate": 8.094831883127639e-06, "loss": 0.3784, "step": 19730 }, { "epoch": 0.9054655591758065, "grad_norm": 0.44340798258781433, "learning_rate": 8.094639304537503e-06, "loss": 0.3357, "step": 19731 }, { "epoch": 0.905511449681061, "grad_norm": 0.5192316174507141, "learning_rate": 8.094446718505739e-06, "loss": 0.5257, "step": 19732 }, { "epoch": 0.9055573401863154, "grad_norm": 0.4040854871273041, "learning_rate": 8.094254125032806e-06, "loss": 0.3076, "step": 19733 }, { "epoch": 0.9056032306915699, "grad_norm": 0.4588043689727783, "learning_rate": 8.094061524119169e-06, "loss": 0.386, "step": 19734 }, { "epoch": 0.9056491211968244, "grad_norm": 0.4462362825870514, "learning_rate": 8.09386891576529e-06, "loss": 0.4073, "step": 19735 }, { "epoch": 0.9056950117020789, "grad_norm": 0.42839282751083374, "learning_rate": 8.093676299971632e-06, "loss": 0.3176, "step": 19736 }, { "epoch": 0.9057409022073333, "grad_norm": 0.46844154596328735, "learning_rate": 8.09348367673866e-06, "loss": 0.423, "step": 19737 }, { "epoch": 0.9057867927125878, "grad_norm": 0.5201075077056885, "learning_rate": 8.093291046066835e-06, "loss": 0.5594, "step": 19738 }, { "epoch": 0.9058326832178423, "grad_norm": 0.4085085093975067, "learning_rate": 8.093098407956624e-06, "loss": 0.3497, "step": 19739 }, { "epoch": 0.9058785737230967, "grad_norm": 0.4580923914909363, "learning_rate": 8.092905762408485e-06, "loss": 0.4414, "step": 19740 }, { "epoch": 0.9059244642283512, "grad_norm": 0.491268515586853, "learning_rate": 8.092713109422884e-06, "loss": 0.4009, "step": 19741 }, { "epoch": 0.9059703547336057, "grad_norm": 0.5214706063270569, "learning_rate": 8.092520449000285e-06, "loss": 0.5347, "step": 19742 }, { "epoch": 0.90601624523886, "grad_norm": 0.4822435975074768, "learning_rate": 8.09232778114115e-06, "loss": 0.5074, "step": 19743 }, { "epoch": 0.9060621357441145, "grad_norm": 0.45583540201187134, "learning_rate": 8.092135105845943e-06, "loss": 0.3695, "step": 19744 }, { "epoch": 0.906108026249369, "grad_norm": 0.4919693171977997, "learning_rate": 8.091942423115127e-06, "loss": 0.4122, "step": 19745 }, { "epoch": 0.9061539167546234, "grad_norm": 0.4304092228412628, "learning_rate": 8.091749732949164e-06, "loss": 0.3745, "step": 19746 }, { "epoch": 0.9061998072598779, "grad_norm": 0.47973886132240295, "learning_rate": 8.09155703534852e-06, "loss": 0.4426, "step": 19747 }, { "epoch": 0.9062456977651324, "grad_norm": 0.4488719701766968, "learning_rate": 8.091364330313657e-06, "loss": 0.3689, "step": 19748 }, { "epoch": 0.9062915882703868, "grad_norm": 0.4439634382724762, "learning_rate": 8.091171617845038e-06, "loss": 0.372, "step": 19749 }, { "epoch": 0.9063374787756413, "grad_norm": 0.47419968247413635, "learning_rate": 8.090978897943126e-06, "loss": 0.4593, "step": 19750 }, { "epoch": 0.9063833692808958, "grad_norm": 0.4732223153114319, "learning_rate": 8.090786170608387e-06, "loss": 0.4224, "step": 19751 }, { "epoch": 0.9064292597861503, "grad_norm": 0.4581719934940338, "learning_rate": 8.090593435841282e-06, "loss": 0.4042, "step": 19752 }, { "epoch": 0.9064751502914047, "grad_norm": 0.43798547983169556, "learning_rate": 8.090400693642275e-06, "loss": 0.3868, "step": 19753 }, { "epoch": 0.9065210407966592, "grad_norm": 0.44240084290504456, "learning_rate": 8.09020794401183e-06, "loss": 0.4101, "step": 19754 }, { "epoch": 0.9065669313019137, "grad_norm": 0.4754796326160431, "learning_rate": 8.09001518695041e-06, "loss": 0.4768, "step": 19755 }, { "epoch": 0.9066128218071681, "grad_norm": 0.49641653895378113, "learning_rate": 8.089822422458477e-06, "loss": 0.37, "step": 19756 }, { "epoch": 0.9066587123124226, "grad_norm": 0.4078812897205353, "learning_rate": 8.0896296505365e-06, "loss": 0.3272, "step": 19757 }, { "epoch": 0.9067046028176771, "grad_norm": 0.49050173163414, "learning_rate": 8.089436871184936e-06, "loss": 0.3953, "step": 19758 }, { "epoch": 0.9067504933229315, "grad_norm": 0.4416808485984802, "learning_rate": 8.089244084404252e-06, "loss": 0.4268, "step": 19759 }, { "epoch": 0.906796383828186, "grad_norm": 0.4150858521461487, "learning_rate": 8.089051290194912e-06, "loss": 0.3428, "step": 19760 }, { "epoch": 0.9068422743334404, "grad_norm": 0.48082083463668823, "learning_rate": 8.088858488557377e-06, "loss": 0.3959, "step": 19761 }, { "epoch": 0.9068881648386948, "grad_norm": 0.4783626198768616, "learning_rate": 8.088665679492114e-06, "loss": 0.4319, "step": 19762 }, { "epoch": 0.9069340553439493, "grad_norm": 0.4647321105003357, "learning_rate": 8.088472862999583e-06, "loss": 0.358, "step": 19763 }, { "epoch": 0.9069799458492038, "grad_norm": 0.4621385931968689, "learning_rate": 8.088280039080249e-06, "loss": 0.335, "step": 19764 }, { "epoch": 0.9070258363544582, "grad_norm": 0.4645345211029053, "learning_rate": 8.088087207734579e-06, "loss": 0.3836, "step": 19765 }, { "epoch": 0.9070717268597127, "grad_norm": 0.4636897146701813, "learning_rate": 8.087894368963032e-06, "loss": 0.4543, "step": 19766 }, { "epoch": 0.9071176173649672, "grad_norm": 0.48653948307037354, "learning_rate": 8.087701522766073e-06, "loss": 0.4341, "step": 19767 }, { "epoch": 0.9071635078702216, "grad_norm": 0.4266863167285919, "learning_rate": 8.087508669144168e-06, "loss": 0.3898, "step": 19768 }, { "epoch": 0.9072093983754761, "grad_norm": 0.4340168535709381, "learning_rate": 8.08731580809778e-06, "loss": 0.3109, "step": 19769 }, { "epoch": 0.9072552888807306, "grad_norm": 0.49628832936286926, "learning_rate": 8.08712293962737e-06, "loss": 0.4759, "step": 19770 }, { "epoch": 0.9073011793859851, "grad_norm": 0.520392656326294, "learning_rate": 8.086930063733404e-06, "loss": 0.4677, "step": 19771 }, { "epoch": 0.9073470698912395, "grad_norm": 0.4720556437969208, "learning_rate": 8.086737180416344e-06, "loss": 0.3397, "step": 19772 }, { "epoch": 0.907392960396494, "grad_norm": 0.5229846835136414, "learning_rate": 8.086544289676657e-06, "loss": 0.5064, "step": 19773 }, { "epoch": 0.9074388509017485, "grad_norm": 0.43044430017471313, "learning_rate": 8.086351391514807e-06, "loss": 0.3096, "step": 19774 }, { "epoch": 0.9074847414070029, "grad_norm": 0.4077827036380768, "learning_rate": 8.086158485931252e-06, "loss": 0.3027, "step": 19775 }, { "epoch": 0.9075306319122574, "grad_norm": 0.46161678433418274, "learning_rate": 8.085965572926461e-06, "loss": 0.3893, "step": 19776 }, { "epoch": 0.9075765224175119, "grad_norm": 0.4817831814289093, "learning_rate": 8.085772652500899e-06, "loss": 0.4474, "step": 19777 }, { "epoch": 0.9076224129227662, "grad_norm": 0.4783119559288025, "learning_rate": 8.085579724655027e-06, "loss": 0.4144, "step": 19778 }, { "epoch": 0.9076683034280207, "grad_norm": 0.43341249227523804, "learning_rate": 8.085386789389306e-06, "loss": 0.3478, "step": 19779 }, { "epoch": 0.9077141939332752, "grad_norm": 0.5075531005859375, "learning_rate": 8.085193846704206e-06, "loss": 0.4774, "step": 19780 }, { "epoch": 0.9077600844385296, "grad_norm": 0.4320594370365143, "learning_rate": 8.08500089660019e-06, "loss": 0.3683, "step": 19781 }, { "epoch": 0.9078059749437841, "grad_norm": 0.42616933584213257, "learning_rate": 8.084807939077719e-06, "loss": 0.3979, "step": 19782 }, { "epoch": 0.9078518654490386, "grad_norm": 0.49974194169044495, "learning_rate": 8.08461497413726e-06, "loss": 0.4522, "step": 19783 }, { "epoch": 0.907897755954293, "grad_norm": 0.4830091893672943, "learning_rate": 8.084422001779273e-06, "loss": 0.431, "step": 19784 }, { "epoch": 0.9079436464595475, "grad_norm": 0.45190754532814026, "learning_rate": 8.084229022004226e-06, "loss": 0.3531, "step": 19785 }, { "epoch": 0.907989536964802, "grad_norm": 0.5041647553443909, "learning_rate": 8.08403603481258e-06, "loss": 0.4798, "step": 19786 }, { "epoch": 0.9080354274700565, "grad_norm": 0.42994552850723267, "learning_rate": 8.083843040204805e-06, "loss": 0.364, "step": 19787 }, { "epoch": 0.9080813179753109, "grad_norm": 0.45369604229927063, "learning_rate": 8.083650038181357e-06, "loss": 0.445, "step": 19788 }, { "epoch": 0.9081272084805654, "grad_norm": 0.4129980504512787, "learning_rate": 8.083457028742705e-06, "loss": 0.3227, "step": 19789 }, { "epoch": 0.9081730989858199, "grad_norm": 0.48503991961479187, "learning_rate": 8.083264011889311e-06, "loss": 0.4515, "step": 19790 }, { "epoch": 0.9082189894910743, "grad_norm": 0.47612816095352173, "learning_rate": 8.083070987621641e-06, "loss": 0.4655, "step": 19791 }, { "epoch": 0.9082648799963288, "grad_norm": 0.4916422963142395, "learning_rate": 8.082877955940158e-06, "loss": 0.4408, "step": 19792 }, { "epoch": 0.9083107705015833, "grad_norm": 0.43666934967041016, "learning_rate": 8.082684916845327e-06, "loss": 0.3652, "step": 19793 }, { "epoch": 0.9083566610068377, "grad_norm": 0.42117840051651, "learning_rate": 8.082491870337612e-06, "loss": 0.3495, "step": 19794 }, { "epoch": 0.9084025515120921, "grad_norm": 0.4729611873626709, "learning_rate": 8.082298816417477e-06, "loss": 0.4331, "step": 19795 }, { "epoch": 0.9084484420173466, "grad_norm": 0.4573438763618469, "learning_rate": 8.082105755085385e-06, "loss": 0.4016, "step": 19796 }, { "epoch": 0.908494332522601, "grad_norm": 0.4837673604488373, "learning_rate": 8.081912686341802e-06, "loss": 0.4623, "step": 19797 }, { "epoch": 0.9085402230278555, "grad_norm": 0.4852626323699951, "learning_rate": 8.081719610187192e-06, "loss": 0.5122, "step": 19798 }, { "epoch": 0.90858611353311, "grad_norm": 0.48174750804901123, "learning_rate": 8.081526526622018e-06, "loss": 0.4641, "step": 19799 }, { "epoch": 0.9086320040383644, "grad_norm": 0.4797123670578003, "learning_rate": 8.081333435646745e-06, "loss": 0.4096, "step": 19800 }, { "epoch": 0.9086778945436189, "grad_norm": 0.4647011458873749, "learning_rate": 8.081140337261839e-06, "loss": 0.3364, "step": 19801 }, { "epoch": 0.9087237850488734, "grad_norm": 0.45662909746170044, "learning_rate": 8.080947231467763e-06, "loss": 0.3653, "step": 19802 }, { "epoch": 0.9087696755541278, "grad_norm": 0.4086991846561432, "learning_rate": 8.080754118264981e-06, "loss": 0.3084, "step": 19803 }, { "epoch": 0.9088155660593823, "grad_norm": 0.4146935045719147, "learning_rate": 8.080560997653956e-06, "loss": 0.3149, "step": 19804 }, { "epoch": 0.9088614565646368, "grad_norm": 0.49385935068130493, "learning_rate": 8.080367869635157e-06, "loss": 0.4775, "step": 19805 }, { "epoch": 0.9089073470698913, "grad_norm": 0.4818452000617981, "learning_rate": 8.080174734209043e-06, "loss": 0.5147, "step": 19806 }, { "epoch": 0.9089532375751457, "grad_norm": 0.46864286065101624, "learning_rate": 8.079981591376081e-06, "loss": 0.4153, "step": 19807 }, { "epoch": 0.9089991280804002, "grad_norm": 0.472176194190979, "learning_rate": 8.079788441136737e-06, "loss": 0.4551, "step": 19808 }, { "epoch": 0.9090450185856547, "grad_norm": 0.45830628275871277, "learning_rate": 8.079595283491473e-06, "loss": 0.3701, "step": 19809 }, { "epoch": 0.9090909090909091, "grad_norm": 0.48785656690597534, "learning_rate": 8.079402118440754e-06, "loss": 0.4689, "step": 19810 }, { "epoch": 0.9091367995961636, "grad_norm": 0.45173221826553345, "learning_rate": 8.079208945985044e-06, "loss": 0.4162, "step": 19811 }, { "epoch": 0.9091826901014181, "grad_norm": 0.4538235664367676, "learning_rate": 8.079015766124808e-06, "loss": 0.402, "step": 19812 }, { "epoch": 0.9092285806066724, "grad_norm": 0.44791096448898315, "learning_rate": 8.078822578860515e-06, "loss": 0.365, "step": 19813 }, { "epoch": 0.9092744711119269, "grad_norm": 0.47733187675476074, "learning_rate": 8.078629384192622e-06, "loss": 0.4148, "step": 19814 }, { "epoch": 0.9093203616171814, "grad_norm": 0.49350598454475403, "learning_rate": 8.078436182121596e-06, "loss": 0.4302, "step": 19815 }, { "epoch": 0.9093662521224358, "grad_norm": 0.4441482722759247, "learning_rate": 8.078242972647904e-06, "loss": 0.422, "step": 19816 }, { "epoch": 0.9094121426276903, "grad_norm": 0.4430387020111084, "learning_rate": 8.07804975577201e-06, "loss": 0.3952, "step": 19817 }, { "epoch": 0.9094580331329448, "grad_norm": 0.4480283260345459, "learning_rate": 8.077856531494376e-06, "loss": 0.4257, "step": 19818 }, { "epoch": 0.9095039236381992, "grad_norm": 0.45085176825523376, "learning_rate": 8.07766329981547e-06, "loss": 0.3773, "step": 19819 }, { "epoch": 0.9095498141434537, "grad_norm": 0.46520161628723145, "learning_rate": 8.077470060735755e-06, "loss": 0.3381, "step": 19820 }, { "epoch": 0.9095957046487082, "grad_norm": 0.4408283233642578, "learning_rate": 8.077276814255695e-06, "loss": 0.3625, "step": 19821 }, { "epoch": 0.9096415951539626, "grad_norm": 0.4771498143672943, "learning_rate": 8.077083560375755e-06, "loss": 0.4321, "step": 19822 }, { "epoch": 0.9096874856592171, "grad_norm": 0.4559750258922577, "learning_rate": 8.076890299096402e-06, "loss": 0.4335, "step": 19823 }, { "epoch": 0.9097333761644716, "grad_norm": 0.42101922631263733, "learning_rate": 8.076697030418099e-06, "loss": 0.3129, "step": 19824 }, { "epoch": 0.9097792666697261, "grad_norm": 0.4436720311641693, "learning_rate": 8.07650375434131e-06, "loss": 0.3988, "step": 19825 }, { "epoch": 0.9098251571749805, "grad_norm": 0.4155568480491638, "learning_rate": 8.076310470866498e-06, "loss": 0.3239, "step": 19826 }, { "epoch": 0.909871047680235, "grad_norm": 0.4136298894882202, "learning_rate": 8.076117179994134e-06, "loss": 0.3226, "step": 19827 }, { "epoch": 0.9099169381854895, "grad_norm": 0.45082250237464905, "learning_rate": 8.075923881724677e-06, "loss": 0.3586, "step": 19828 }, { "epoch": 0.9099628286907439, "grad_norm": 0.44058364629745483, "learning_rate": 8.075730576058593e-06, "loss": 0.3983, "step": 19829 }, { "epoch": 0.9100087191959984, "grad_norm": 0.4453901946544647, "learning_rate": 8.07553726299635e-06, "loss": 0.3627, "step": 19830 }, { "epoch": 0.9100546097012528, "grad_norm": 0.4743308126926422, "learning_rate": 8.075343942538409e-06, "loss": 0.3877, "step": 19831 }, { "epoch": 0.9101005002065072, "grad_norm": 0.44347691535949707, "learning_rate": 8.075150614685237e-06, "loss": 0.4089, "step": 19832 }, { "epoch": 0.9101463907117617, "grad_norm": 0.4939873516559601, "learning_rate": 8.074957279437298e-06, "loss": 0.5108, "step": 19833 }, { "epoch": 0.9101922812170162, "grad_norm": 0.4718095064163208, "learning_rate": 8.074763936795056e-06, "loss": 0.4908, "step": 19834 }, { "epoch": 0.9102381717222706, "grad_norm": 0.49301615357398987, "learning_rate": 8.07457058675898e-06, "loss": 0.5109, "step": 19835 }, { "epoch": 0.9102840622275251, "grad_norm": 0.46436113119125366, "learning_rate": 8.074377229329531e-06, "loss": 0.455, "step": 19836 }, { "epoch": 0.9103299527327796, "grad_norm": 0.5124197006225586, "learning_rate": 8.074183864507174e-06, "loss": 0.4732, "step": 19837 }, { "epoch": 0.910375843238034, "grad_norm": 0.4848898649215698, "learning_rate": 8.073990492292376e-06, "loss": 0.4178, "step": 19838 }, { "epoch": 0.9104217337432885, "grad_norm": 0.505601704120636, "learning_rate": 8.073797112685601e-06, "loss": 0.4688, "step": 19839 }, { "epoch": 0.910467624248543, "grad_norm": 0.47832393646240234, "learning_rate": 8.073603725687314e-06, "loss": 0.4475, "step": 19840 }, { "epoch": 0.9105135147537975, "grad_norm": 0.44735515117645264, "learning_rate": 8.07341033129798e-06, "loss": 0.3819, "step": 19841 }, { "epoch": 0.9105594052590519, "grad_norm": 0.5028278827667236, "learning_rate": 8.073216929518064e-06, "loss": 0.4476, "step": 19842 }, { "epoch": 0.9106052957643064, "grad_norm": 0.43822920322418213, "learning_rate": 8.073023520348032e-06, "loss": 0.3756, "step": 19843 }, { "epoch": 0.9106511862695609, "grad_norm": 0.4420967400074005, "learning_rate": 8.072830103788347e-06, "loss": 0.3508, "step": 19844 }, { "epoch": 0.9106970767748153, "grad_norm": 0.4544496536254883, "learning_rate": 8.072636679839477e-06, "loss": 0.3701, "step": 19845 }, { "epoch": 0.9107429672800698, "grad_norm": 0.4804290533065796, "learning_rate": 8.072443248501884e-06, "loss": 0.3595, "step": 19846 }, { "epoch": 0.9107888577853243, "grad_norm": 0.4530576169490814, "learning_rate": 8.072249809776035e-06, "loss": 0.3991, "step": 19847 }, { "epoch": 0.9108347482905786, "grad_norm": 0.4644770920276642, "learning_rate": 8.072056363662395e-06, "loss": 0.3893, "step": 19848 }, { "epoch": 0.9108806387958331, "grad_norm": 0.48486125469207764, "learning_rate": 8.07186291016143e-06, "loss": 0.4514, "step": 19849 }, { "epoch": 0.9109265293010876, "grad_norm": 0.9036334753036499, "learning_rate": 8.071669449273603e-06, "loss": 0.5016, "step": 19850 }, { "epoch": 0.910972419806342, "grad_norm": 0.4751227796077728, "learning_rate": 8.071475980999379e-06, "loss": 0.4692, "step": 19851 }, { "epoch": 0.9110183103115965, "grad_norm": 0.4260842502117157, "learning_rate": 8.071282505339227e-06, "loss": 0.353, "step": 19852 }, { "epoch": 0.911064200816851, "grad_norm": 0.47310179471969604, "learning_rate": 8.07108902229361e-06, "loss": 0.416, "step": 19853 }, { "epoch": 0.9111100913221054, "grad_norm": 0.4531238377094269, "learning_rate": 8.070895531862993e-06, "loss": 0.4025, "step": 19854 }, { "epoch": 0.9111559818273599, "grad_norm": 0.4726232886314392, "learning_rate": 8.070702034047839e-06, "loss": 0.4631, "step": 19855 }, { "epoch": 0.9112018723326144, "grad_norm": 0.4613339900970459, "learning_rate": 8.070508528848619e-06, "loss": 0.3946, "step": 19856 }, { "epoch": 0.9112477628378688, "grad_norm": 0.4331399202346802, "learning_rate": 8.070315016265793e-06, "loss": 0.3683, "step": 19857 }, { "epoch": 0.9112936533431233, "grad_norm": 0.4676276743412018, "learning_rate": 8.07012149629983e-06, "loss": 0.3971, "step": 19858 }, { "epoch": 0.9113395438483778, "grad_norm": 0.4394001066684723, "learning_rate": 8.069927968951193e-06, "loss": 0.3748, "step": 19859 }, { "epoch": 0.9113854343536323, "grad_norm": 0.42299097776412964, "learning_rate": 8.069734434220347e-06, "loss": 0.3271, "step": 19860 }, { "epoch": 0.9114313248588867, "grad_norm": 0.4486300051212311, "learning_rate": 8.06954089210776e-06, "loss": 0.3649, "step": 19861 }, { "epoch": 0.9114772153641412, "grad_norm": 0.4315125048160553, "learning_rate": 8.069347342613896e-06, "loss": 0.3226, "step": 19862 }, { "epoch": 0.9115231058693957, "grad_norm": 0.41584667563438416, "learning_rate": 8.06915378573922e-06, "loss": 0.3526, "step": 19863 }, { "epoch": 0.91156899637465, "grad_norm": 0.4659893810749054, "learning_rate": 8.068960221484197e-06, "loss": 0.4397, "step": 19864 }, { "epoch": 0.9116148868799046, "grad_norm": 0.4490303099155426, "learning_rate": 8.068766649849293e-06, "loss": 0.4026, "step": 19865 }, { "epoch": 0.911660777385159, "grad_norm": 0.46143200993537903, "learning_rate": 8.068573070834975e-06, "loss": 0.3662, "step": 19866 }, { "epoch": 0.9117066678904134, "grad_norm": 0.4608106315135956, "learning_rate": 8.068379484441707e-06, "loss": 0.4089, "step": 19867 }, { "epoch": 0.9117525583956679, "grad_norm": 0.45863357186317444, "learning_rate": 8.068185890669954e-06, "loss": 0.4211, "step": 19868 }, { "epoch": 0.9117984489009224, "grad_norm": 0.4429805874824524, "learning_rate": 8.067992289520184e-06, "loss": 0.3909, "step": 19869 }, { "epoch": 0.9118443394061768, "grad_norm": 0.46386998891830444, "learning_rate": 8.06779868099286e-06, "loss": 0.4042, "step": 19870 }, { "epoch": 0.9118902299114313, "grad_norm": 0.46973997354507446, "learning_rate": 8.067605065088447e-06, "loss": 0.4609, "step": 19871 }, { "epoch": 0.9119361204166858, "grad_norm": 0.46516549587249756, "learning_rate": 8.067411441807413e-06, "loss": 0.4079, "step": 19872 }, { "epoch": 0.9119820109219402, "grad_norm": 0.4418089985847473, "learning_rate": 8.067217811150224e-06, "loss": 0.4042, "step": 19873 }, { "epoch": 0.9120279014271947, "grad_norm": 0.41043296456336975, "learning_rate": 8.067024173117342e-06, "loss": 0.3078, "step": 19874 }, { "epoch": 0.9120737919324492, "grad_norm": 0.4313291311264038, "learning_rate": 8.066830527709235e-06, "loss": 0.3174, "step": 19875 }, { "epoch": 0.9121196824377037, "grad_norm": 0.44088274240493774, "learning_rate": 8.06663687492637e-06, "loss": 0.3989, "step": 19876 }, { "epoch": 0.9121655729429581, "grad_norm": 0.4955364763736725, "learning_rate": 8.06644321476921e-06, "loss": 0.5142, "step": 19877 }, { "epoch": 0.9122114634482126, "grad_norm": 0.44110217690467834, "learning_rate": 8.066249547238222e-06, "loss": 0.3664, "step": 19878 }, { "epoch": 0.9122573539534671, "grad_norm": 0.43643081188201904, "learning_rate": 8.066055872333873e-06, "loss": 0.3236, "step": 19879 }, { "epoch": 0.9123032444587215, "grad_norm": 0.45125144720077515, "learning_rate": 8.065862190056625e-06, "loss": 0.3877, "step": 19880 }, { "epoch": 0.912349134963976, "grad_norm": 0.43269750475883484, "learning_rate": 8.065668500406949e-06, "loss": 0.3709, "step": 19881 }, { "epoch": 0.9123950254692305, "grad_norm": 0.4689232409000397, "learning_rate": 8.065474803385305e-06, "loss": 0.4402, "step": 19882 }, { "epoch": 0.9124409159744848, "grad_norm": 0.4378909468650818, "learning_rate": 8.065281098992161e-06, "loss": 0.3454, "step": 19883 }, { "epoch": 0.9124868064797393, "grad_norm": 0.4553365707397461, "learning_rate": 8.065087387227986e-06, "loss": 0.4899, "step": 19884 }, { "epoch": 0.9125326969849938, "grad_norm": 0.5806536078453064, "learning_rate": 8.064893668093243e-06, "loss": 0.493, "step": 19885 }, { "epoch": 0.9125785874902482, "grad_norm": 0.47003448009490967, "learning_rate": 8.064699941588397e-06, "loss": 0.4509, "step": 19886 }, { "epoch": 0.9126244779955027, "grad_norm": 0.44664162397384644, "learning_rate": 8.064506207713916e-06, "loss": 0.381, "step": 19887 }, { "epoch": 0.9126703685007572, "grad_norm": 0.42585647106170654, "learning_rate": 8.064312466470263e-06, "loss": 0.3048, "step": 19888 }, { "epoch": 0.9127162590060116, "grad_norm": 0.5074831247329712, "learning_rate": 8.064118717857906e-06, "loss": 0.5234, "step": 19889 }, { "epoch": 0.9127621495112661, "grad_norm": 0.43581974506378174, "learning_rate": 8.063924961877311e-06, "loss": 0.3638, "step": 19890 }, { "epoch": 0.9128080400165206, "grad_norm": 0.4487568140029907, "learning_rate": 8.063731198528943e-06, "loss": 0.393, "step": 19891 }, { "epoch": 0.912853930521775, "grad_norm": 0.4952116012573242, "learning_rate": 8.063537427813271e-06, "loss": 0.498, "step": 19892 }, { "epoch": 0.9128998210270295, "grad_norm": 0.5095416903495789, "learning_rate": 8.063343649730755e-06, "loss": 0.4865, "step": 19893 }, { "epoch": 0.912945711532284, "grad_norm": 0.39973193407058716, "learning_rate": 8.063149864281865e-06, "loss": 0.3132, "step": 19894 }, { "epoch": 0.9129916020375385, "grad_norm": 0.44961073994636536, "learning_rate": 8.062956071467068e-06, "loss": 0.3699, "step": 19895 }, { "epoch": 0.9130374925427929, "grad_norm": 0.4452573359012604, "learning_rate": 8.062762271286828e-06, "loss": 0.3553, "step": 19896 }, { "epoch": 0.9130833830480474, "grad_norm": 0.4196516275405884, "learning_rate": 8.06256846374161e-06, "loss": 0.3343, "step": 19897 }, { "epoch": 0.9131292735533019, "grad_norm": 0.40853917598724365, "learning_rate": 8.062374648831883e-06, "loss": 0.3333, "step": 19898 }, { "epoch": 0.9131751640585563, "grad_norm": 0.4579904079437256, "learning_rate": 8.062180826558109e-06, "loss": 0.37, "step": 19899 }, { "epoch": 0.9132210545638108, "grad_norm": 0.4494280219078064, "learning_rate": 8.061986996920759e-06, "loss": 0.4049, "step": 19900 }, { "epoch": 0.9132669450690653, "grad_norm": 0.4431005120277405, "learning_rate": 8.061793159920295e-06, "loss": 0.4046, "step": 19901 }, { "epoch": 0.9133128355743196, "grad_norm": 0.4471645951271057, "learning_rate": 8.061599315557186e-06, "loss": 0.4016, "step": 19902 }, { "epoch": 0.9133587260795741, "grad_norm": 0.46120721101760864, "learning_rate": 8.061405463831897e-06, "loss": 0.4149, "step": 19903 }, { "epoch": 0.9134046165848286, "grad_norm": 0.4943949282169342, "learning_rate": 8.061211604744894e-06, "loss": 0.5121, "step": 19904 }, { "epoch": 0.913450507090083, "grad_norm": 0.4496508240699768, "learning_rate": 8.061017738296642e-06, "loss": 0.4046, "step": 19905 }, { "epoch": 0.9134963975953375, "grad_norm": 0.45679551362991333, "learning_rate": 8.06082386448761e-06, "loss": 0.4141, "step": 19906 }, { "epoch": 0.913542288100592, "grad_norm": 0.4222595691680908, "learning_rate": 8.060629983318261e-06, "loss": 0.3812, "step": 19907 }, { "epoch": 0.9135881786058464, "grad_norm": 0.4915534257888794, "learning_rate": 8.060436094789063e-06, "loss": 0.4432, "step": 19908 }, { "epoch": 0.9136340691111009, "grad_norm": 0.4558557868003845, "learning_rate": 8.060242198900484e-06, "loss": 0.4186, "step": 19909 }, { "epoch": 0.9136799596163554, "grad_norm": 0.4360322058200836, "learning_rate": 8.060048295652986e-06, "loss": 0.343, "step": 19910 }, { "epoch": 0.9137258501216098, "grad_norm": 0.46147429943084717, "learning_rate": 8.059854385047039e-06, "loss": 0.3794, "step": 19911 }, { "epoch": 0.9137717406268643, "grad_norm": 0.47554031014442444, "learning_rate": 8.059660467083107e-06, "loss": 0.4283, "step": 19912 }, { "epoch": 0.9138176311321188, "grad_norm": 0.4555242955684662, "learning_rate": 8.05946654176166e-06, "loss": 0.4083, "step": 19913 }, { "epoch": 0.9138635216373733, "grad_norm": 0.45906832814216614, "learning_rate": 8.059272609083158e-06, "loss": 0.3667, "step": 19914 }, { "epoch": 0.9139094121426277, "grad_norm": 0.49573925137519836, "learning_rate": 8.059078669048074e-06, "loss": 0.4579, "step": 19915 }, { "epoch": 0.9139553026478822, "grad_norm": 0.4538806676864624, "learning_rate": 8.058884721656868e-06, "loss": 0.415, "step": 19916 }, { "epoch": 0.9140011931531367, "grad_norm": 0.4226219058036804, "learning_rate": 8.058690766910013e-06, "loss": 0.3171, "step": 19917 }, { "epoch": 0.914047083658391, "grad_norm": 0.6014626622200012, "learning_rate": 8.058496804807968e-06, "loss": 0.2844, "step": 19918 }, { "epoch": 0.9140929741636455, "grad_norm": 0.4240710139274597, "learning_rate": 8.058302835351208e-06, "loss": 0.37, "step": 19919 }, { "epoch": 0.9141388646689, "grad_norm": 0.5129101276397705, "learning_rate": 8.058108858540192e-06, "loss": 0.464, "step": 19920 }, { "epoch": 0.9141847551741544, "grad_norm": 0.4401739835739136, "learning_rate": 8.05791487437539e-06, "loss": 0.3532, "step": 19921 }, { "epoch": 0.9142306456794089, "grad_norm": 0.46502354741096497, "learning_rate": 8.057720882857268e-06, "loss": 0.4096, "step": 19922 }, { "epoch": 0.9142765361846634, "grad_norm": 0.4845057725906372, "learning_rate": 8.057526883986293e-06, "loss": 0.4383, "step": 19923 }, { "epoch": 0.9143224266899178, "grad_norm": 0.4627869129180908, "learning_rate": 8.05733287776293e-06, "loss": 0.3899, "step": 19924 }, { "epoch": 0.9143683171951723, "grad_norm": 0.41061171889305115, "learning_rate": 8.057138864187646e-06, "loss": 0.3357, "step": 19925 }, { "epoch": 0.9144142077004268, "grad_norm": 0.4645308554172516, "learning_rate": 8.056944843260909e-06, "loss": 0.3974, "step": 19926 }, { "epoch": 0.9144600982056812, "grad_norm": 0.48850756883621216, "learning_rate": 8.056750814983183e-06, "loss": 0.478, "step": 19927 }, { "epoch": 0.9145059887109357, "grad_norm": 0.44728296995162964, "learning_rate": 8.056556779354938e-06, "loss": 0.3874, "step": 19928 }, { "epoch": 0.9145518792161902, "grad_norm": 0.4454975128173828, "learning_rate": 8.056362736376637e-06, "loss": 0.3744, "step": 19929 }, { "epoch": 0.9145977697214447, "grad_norm": 0.5057328343391418, "learning_rate": 8.056168686048749e-06, "loss": 0.471, "step": 19930 }, { "epoch": 0.9146436602266991, "grad_norm": 0.4555968940258026, "learning_rate": 8.055974628371739e-06, "loss": 0.4284, "step": 19931 }, { "epoch": 0.9146895507319536, "grad_norm": 0.4612051844596863, "learning_rate": 8.055780563346076e-06, "loss": 0.3834, "step": 19932 }, { "epoch": 0.9147354412372081, "grad_norm": 0.4492006301879883, "learning_rate": 8.055586490972224e-06, "loss": 0.4029, "step": 19933 }, { "epoch": 0.9147813317424625, "grad_norm": 0.48880547285079956, "learning_rate": 8.055392411250651e-06, "loss": 0.3611, "step": 19934 }, { "epoch": 0.914827222247717, "grad_norm": 0.47262459993362427, "learning_rate": 8.055198324181823e-06, "loss": 0.4158, "step": 19935 }, { "epoch": 0.9148731127529715, "grad_norm": 0.44569703936576843, "learning_rate": 8.055004229766207e-06, "loss": 0.3799, "step": 19936 }, { "epoch": 0.9149190032582258, "grad_norm": 0.4228842258453369, "learning_rate": 8.054810128004273e-06, "loss": 0.3545, "step": 19937 }, { "epoch": 0.9149648937634803, "grad_norm": 0.42943331599235535, "learning_rate": 8.054616018896481e-06, "loss": 0.3312, "step": 19938 }, { "epoch": 0.9150107842687348, "grad_norm": 0.48538121581077576, "learning_rate": 8.054421902443304e-06, "loss": 0.4179, "step": 19939 }, { "epoch": 0.9150566747739892, "grad_norm": 0.4716055989265442, "learning_rate": 8.054227778645205e-06, "loss": 0.3715, "step": 19940 }, { "epoch": 0.9151025652792437, "grad_norm": 0.4691489040851593, "learning_rate": 8.054033647502654e-06, "loss": 0.3795, "step": 19941 }, { "epoch": 0.9151484557844982, "grad_norm": 0.4136064350605011, "learning_rate": 8.053839509016116e-06, "loss": 0.351, "step": 19942 }, { "epoch": 0.9151943462897526, "grad_norm": 0.4332267642021179, "learning_rate": 8.053645363186056e-06, "loss": 0.3902, "step": 19943 }, { "epoch": 0.9152402367950071, "grad_norm": 0.4606090486049652, "learning_rate": 8.053451210012942e-06, "loss": 0.3707, "step": 19944 }, { "epoch": 0.9152861273002616, "grad_norm": 0.47834905982017517, "learning_rate": 8.053257049497243e-06, "loss": 0.4658, "step": 19945 }, { "epoch": 0.915332017805516, "grad_norm": 0.4282083511352539, "learning_rate": 8.053062881639424e-06, "loss": 0.3798, "step": 19946 }, { "epoch": 0.9153779083107705, "grad_norm": 0.4588189423084259, "learning_rate": 8.052868706439953e-06, "loss": 0.4247, "step": 19947 }, { "epoch": 0.915423798816025, "grad_norm": 0.5361977815628052, "learning_rate": 8.052674523899296e-06, "loss": 0.3573, "step": 19948 }, { "epoch": 0.9154696893212795, "grad_norm": 0.46259623765945435, "learning_rate": 8.05248033401792e-06, "loss": 0.4022, "step": 19949 }, { "epoch": 0.9155155798265339, "grad_norm": 0.4493080675601959, "learning_rate": 8.052286136796293e-06, "loss": 0.4291, "step": 19950 }, { "epoch": 0.9155614703317884, "grad_norm": 0.49828290939331055, "learning_rate": 8.052091932234877e-06, "loss": 0.5224, "step": 19951 }, { "epoch": 0.9156073608370429, "grad_norm": 0.4824344515800476, "learning_rate": 8.05189772033415e-06, "loss": 0.4669, "step": 19952 }, { "epoch": 0.9156532513422972, "grad_norm": 0.40372446179389954, "learning_rate": 8.051703501094567e-06, "loss": 0.2576, "step": 19953 }, { "epoch": 0.9156991418475517, "grad_norm": 0.4589158594608307, "learning_rate": 8.051509274516602e-06, "loss": 0.4159, "step": 19954 }, { "epoch": 0.9157450323528062, "grad_norm": 0.46112895011901855, "learning_rate": 8.05131504060072e-06, "loss": 0.3726, "step": 19955 }, { "epoch": 0.9157909228580606, "grad_norm": 0.4675448536872864, "learning_rate": 8.051120799347389e-06, "loss": 0.3208, "step": 19956 }, { "epoch": 0.9158368133633151, "grad_norm": 0.4543447494506836, "learning_rate": 8.050926550757074e-06, "loss": 0.3947, "step": 19957 }, { "epoch": 0.9158827038685696, "grad_norm": 0.44077757000923157, "learning_rate": 8.050732294830246e-06, "loss": 0.3527, "step": 19958 }, { "epoch": 0.915928594373824, "grad_norm": 0.42064371705055237, "learning_rate": 8.050538031567367e-06, "loss": 0.3216, "step": 19959 }, { "epoch": 0.9159744848790785, "grad_norm": 0.492087721824646, "learning_rate": 8.050343760968908e-06, "loss": 0.4724, "step": 19960 }, { "epoch": 0.916020375384333, "grad_norm": 0.46470850706100464, "learning_rate": 8.050149483035336e-06, "loss": 0.4136, "step": 19961 }, { "epoch": 0.9160662658895874, "grad_norm": 0.44022080302238464, "learning_rate": 8.049955197767115e-06, "loss": 0.3909, "step": 19962 }, { "epoch": 0.9161121563948419, "grad_norm": 0.4655168056488037, "learning_rate": 8.049760905164716e-06, "loss": 0.428, "step": 19963 }, { "epoch": 0.9161580469000964, "grad_norm": 0.42304664850234985, "learning_rate": 8.049566605228606e-06, "loss": 0.3525, "step": 19964 }, { "epoch": 0.9162039374053509, "grad_norm": 0.4397325813770294, "learning_rate": 8.049372297959246e-06, "loss": 0.3929, "step": 19965 }, { "epoch": 0.9162498279106053, "grad_norm": 0.4821664094924927, "learning_rate": 8.049177983357113e-06, "loss": 0.3569, "step": 19966 }, { "epoch": 0.9162957184158598, "grad_norm": 0.45952117443084717, "learning_rate": 8.048983661422667e-06, "loss": 0.4112, "step": 19967 }, { "epoch": 0.9163416089211143, "grad_norm": 0.41459429264068604, "learning_rate": 8.048789332156377e-06, "loss": 0.3127, "step": 19968 }, { "epoch": 0.9163874994263687, "grad_norm": 0.4986949563026428, "learning_rate": 8.048594995558712e-06, "loss": 0.4403, "step": 19969 }, { "epoch": 0.9164333899316232, "grad_norm": 0.4376751482486725, "learning_rate": 8.048400651630136e-06, "loss": 0.3592, "step": 19970 }, { "epoch": 0.9164792804368777, "grad_norm": 0.4477252960205078, "learning_rate": 8.048206300371122e-06, "loss": 0.3843, "step": 19971 }, { "epoch": 0.916525170942132, "grad_norm": 0.44658413529396057, "learning_rate": 8.048011941782132e-06, "loss": 0.4374, "step": 19972 }, { "epoch": 0.9165710614473865, "grad_norm": 0.4536164402961731, "learning_rate": 8.047817575863634e-06, "loss": 0.4159, "step": 19973 }, { "epoch": 0.916616951952641, "grad_norm": 0.49663740396499634, "learning_rate": 8.0476232026161e-06, "loss": 0.4357, "step": 19974 }, { "epoch": 0.9166628424578954, "grad_norm": 0.48059847950935364, "learning_rate": 8.047428822039992e-06, "loss": 0.425, "step": 19975 }, { "epoch": 0.9167087329631499, "grad_norm": 0.41684722900390625, "learning_rate": 8.04723443413578e-06, "loss": 0.3373, "step": 19976 }, { "epoch": 0.9167546234684044, "grad_norm": 0.4607388973236084, "learning_rate": 8.04704003890393e-06, "loss": 0.4456, "step": 19977 }, { "epoch": 0.9168005139736588, "grad_norm": 0.49048784375190735, "learning_rate": 8.046845636344912e-06, "loss": 0.5197, "step": 19978 }, { "epoch": 0.9168464044789133, "grad_norm": 0.485093355178833, "learning_rate": 8.04665122645919e-06, "loss": 0.399, "step": 19979 }, { "epoch": 0.9168922949841678, "grad_norm": 0.46711257100105286, "learning_rate": 8.046456809247234e-06, "loss": 0.4088, "step": 19980 }, { "epoch": 0.9169381854894222, "grad_norm": 0.4614191949367523, "learning_rate": 8.04626238470951e-06, "loss": 0.4342, "step": 19981 }, { "epoch": 0.9169840759946767, "grad_norm": 0.47194069623947144, "learning_rate": 8.046067952846487e-06, "loss": 0.4618, "step": 19982 }, { "epoch": 0.9170299664999312, "grad_norm": 0.41760337352752686, "learning_rate": 8.045873513658634e-06, "loss": 0.3651, "step": 19983 }, { "epoch": 0.9170758570051857, "grad_norm": 0.4043579697608948, "learning_rate": 8.045679067146415e-06, "loss": 0.3126, "step": 19984 }, { "epoch": 0.9171217475104401, "grad_norm": 0.4702475070953369, "learning_rate": 8.0454846133103e-06, "loss": 0.4424, "step": 19985 }, { "epoch": 0.9171676380156946, "grad_norm": 0.5053181052207947, "learning_rate": 8.045290152150753e-06, "loss": 0.4882, "step": 19986 }, { "epoch": 0.9172135285209491, "grad_norm": 0.40354758501052856, "learning_rate": 8.045095683668247e-06, "loss": 0.3292, "step": 19987 }, { "epoch": 0.9172594190262034, "grad_norm": 0.45261818170547485, "learning_rate": 8.044901207863245e-06, "loss": 0.4013, "step": 19988 }, { "epoch": 0.917305309531458, "grad_norm": 0.4700707495212555, "learning_rate": 8.044706724736217e-06, "loss": 0.4063, "step": 19989 }, { "epoch": 0.9173512000367124, "grad_norm": 0.4582161605358124, "learning_rate": 8.044512234287631e-06, "loss": 0.4639, "step": 19990 }, { "epoch": 0.9173970905419668, "grad_norm": 0.42430198192596436, "learning_rate": 8.044317736517955e-06, "loss": 0.3375, "step": 19991 }, { "epoch": 0.9174429810472213, "grad_norm": 0.44287121295928955, "learning_rate": 8.044123231427654e-06, "loss": 0.3383, "step": 19992 }, { "epoch": 0.9174888715524758, "grad_norm": 0.4396236836910248, "learning_rate": 8.043928719017198e-06, "loss": 0.3898, "step": 19993 }, { "epoch": 0.9175347620577302, "grad_norm": 0.4521273374557495, "learning_rate": 8.043734199287054e-06, "loss": 0.3822, "step": 19994 }, { "epoch": 0.9175806525629847, "grad_norm": 0.4202002286911011, "learning_rate": 8.043539672237689e-06, "loss": 0.3467, "step": 19995 }, { "epoch": 0.9176265430682392, "grad_norm": 0.46127039194107056, "learning_rate": 8.043345137869572e-06, "loss": 0.4181, "step": 19996 }, { "epoch": 0.9176724335734936, "grad_norm": 0.484381765127182, "learning_rate": 8.043150596183172e-06, "loss": 0.4604, "step": 19997 }, { "epoch": 0.9177183240787481, "grad_norm": 0.4581066071987152, "learning_rate": 8.042956047178953e-06, "loss": 0.3909, "step": 19998 }, { "epoch": 0.9177642145840026, "grad_norm": 0.4827679991722107, "learning_rate": 8.042761490857387e-06, "loss": 0.4763, "step": 19999 }, { "epoch": 0.917810105089257, "grad_norm": 0.4396640658378601, "learning_rate": 8.04256692721894e-06, "loss": 0.3916, "step": 20000 }, { "epoch": 0.9178559955945115, "grad_norm": 0.4575493037700653, "learning_rate": 8.042372356264078e-06, "loss": 0.459, "step": 20001 }, { "epoch": 0.917901886099766, "grad_norm": 0.43947094678878784, "learning_rate": 8.042177777993272e-06, "loss": 0.3648, "step": 20002 }, { "epoch": 0.9179477766050205, "grad_norm": 0.46974506974220276, "learning_rate": 8.041983192406989e-06, "loss": 0.3986, "step": 20003 }, { "epoch": 0.9179936671102749, "grad_norm": 0.4219629466533661, "learning_rate": 8.041788599505695e-06, "loss": 0.3473, "step": 20004 }, { "epoch": 0.9180395576155294, "grad_norm": 0.4444812536239624, "learning_rate": 8.041593999289859e-06, "loss": 0.3948, "step": 20005 }, { "epoch": 0.9180854481207839, "grad_norm": 0.42322418093681335, "learning_rate": 8.041399391759953e-06, "loss": 0.3511, "step": 20006 }, { "epoch": 0.9181313386260382, "grad_norm": 0.46399614214897156, "learning_rate": 8.041204776916439e-06, "loss": 0.4547, "step": 20007 }, { "epoch": 0.9181772291312927, "grad_norm": 0.4384278953075409, "learning_rate": 8.041010154759786e-06, "loss": 0.396, "step": 20008 }, { "epoch": 0.9182231196365472, "grad_norm": 0.434222936630249, "learning_rate": 8.040815525290464e-06, "loss": 0.3772, "step": 20009 }, { "epoch": 0.9182690101418016, "grad_norm": 0.4341912865638733, "learning_rate": 8.04062088850894e-06, "loss": 0.3813, "step": 20010 }, { "epoch": 0.9183149006470561, "grad_norm": 0.4584401547908783, "learning_rate": 8.040426244415685e-06, "loss": 0.3765, "step": 20011 }, { "epoch": 0.9183607911523106, "grad_norm": 0.4477466940879822, "learning_rate": 8.040231593011162e-06, "loss": 0.354, "step": 20012 }, { "epoch": 0.918406681657565, "grad_norm": 0.4670342803001404, "learning_rate": 8.040036934295843e-06, "loss": 0.3703, "step": 20013 }, { "epoch": 0.9184525721628195, "grad_norm": 0.4566385746002197, "learning_rate": 8.039842268270195e-06, "loss": 0.3713, "step": 20014 }, { "epoch": 0.918498462668074, "grad_norm": 0.4833486080169678, "learning_rate": 8.039647594934684e-06, "loss": 0.4588, "step": 20015 }, { "epoch": 0.9185443531733284, "grad_norm": 0.4764838218688965, "learning_rate": 8.039452914289781e-06, "loss": 0.3828, "step": 20016 }, { "epoch": 0.9185902436785829, "grad_norm": 0.5860703587532043, "learning_rate": 8.039258226335951e-06, "loss": 0.3878, "step": 20017 }, { "epoch": 0.9186361341838374, "grad_norm": 0.4405742883682251, "learning_rate": 8.039063531073667e-06, "loss": 0.3742, "step": 20018 }, { "epoch": 0.9186820246890919, "grad_norm": 0.4318040609359741, "learning_rate": 8.038868828503393e-06, "loss": 0.3583, "step": 20019 }, { "epoch": 0.9187279151943463, "grad_norm": 0.44224053621292114, "learning_rate": 8.038674118625601e-06, "loss": 0.3587, "step": 20020 }, { "epoch": 0.9187738056996008, "grad_norm": 0.42527085542678833, "learning_rate": 8.038479401440752e-06, "loss": 0.353, "step": 20021 }, { "epoch": 0.9188196962048553, "grad_norm": 0.43495824933052063, "learning_rate": 8.038284676949321e-06, "loss": 0.3638, "step": 20022 }, { "epoch": 0.9188655867101096, "grad_norm": 0.48858779668807983, "learning_rate": 8.038089945151777e-06, "loss": 0.5264, "step": 20023 }, { "epoch": 0.9189114772153641, "grad_norm": 0.4363512396812439, "learning_rate": 8.037895206048582e-06, "loss": 0.3412, "step": 20024 }, { "epoch": 0.9189573677206186, "grad_norm": 0.4311063289642334, "learning_rate": 8.037700459640209e-06, "loss": 0.3787, "step": 20025 }, { "epoch": 0.919003258225873, "grad_norm": 0.47028255462646484, "learning_rate": 8.037505705927127e-06, "loss": 0.3232, "step": 20026 }, { "epoch": 0.9190491487311275, "grad_norm": 0.42899638414382935, "learning_rate": 8.037310944909799e-06, "loss": 0.4066, "step": 20027 }, { "epoch": 0.919095039236382, "grad_norm": 0.6811000108718872, "learning_rate": 8.037116176588699e-06, "loss": 0.4209, "step": 20028 }, { "epoch": 0.9191409297416364, "grad_norm": 0.42036280035972595, "learning_rate": 8.036921400964293e-06, "loss": 0.3289, "step": 20029 }, { "epoch": 0.9191868202468909, "grad_norm": 0.4487990438938141, "learning_rate": 8.03672661803705e-06, "loss": 0.364, "step": 20030 }, { "epoch": 0.9192327107521454, "grad_norm": 0.4839121699333191, "learning_rate": 8.036531827807437e-06, "loss": 0.4054, "step": 20031 }, { "epoch": 0.9192786012573998, "grad_norm": 0.450795441865921, "learning_rate": 8.036337030275923e-06, "loss": 0.3916, "step": 20032 }, { "epoch": 0.9193244917626543, "grad_norm": 0.4859609603881836, "learning_rate": 8.036142225442976e-06, "loss": 0.4448, "step": 20033 }, { "epoch": 0.9193703822679088, "grad_norm": 0.4316902458667755, "learning_rate": 8.035947413309068e-06, "loss": 0.3317, "step": 20034 }, { "epoch": 0.9194162727731632, "grad_norm": 0.5015361905097961, "learning_rate": 8.035752593874661e-06, "loss": 0.48, "step": 20035 }, { "epoch": 0.9194621632784177, "grad_norm": 0.5162017941474915, "learning_rate": 8.03555776714023e-06, "loss": 0.4559, "step": 20036 }, { "epoch": 0.9195080537836722, "grad_norm": 0.4470103681087494, "learning_rate": 8.03536293310624e-06, "loss": 0.399, "step": 20037 }, { "epoch": 0.9195539442889267, "grad_norm": 0.4689035415649414, "learning_rate": 8.03516809177316e-06, "loss": 0.4016, "step": 20038 }, { "epoch": 0.9195998347941811, "grad_norm": 0.4740046262741089, "learning_rate": 8.034973243141458e-06, "loss": 0.4962, "step": 20039 }, { "epoch": 0.9196457252994356, "grad_norm": 0.43549588322639465, "learning_rate": 8.034778387211604e-06, "loss": 0.3851, "step": 20040 }, { "epoch": 0.91969161580469, "grad_norm": 0.4624907076358795, "learning_rate": 8.034583523984066e-06, "loss": 0.3467, "step": 20041 }, { "epoch": 0.9197375063099444, "grad_norm": 0.4143589735031128, "learning_rate": 8.034388653459311e-06, "loss": 0.2863, "step": 20042 }, { "epoch": 0.9197833968151989, "grad_norm": 0.47427281737327576, "learning_rate": 8.03419377563781e-06, "loss": 0.4501, "step": 20043 }, { "epoch": 0.9198292873204534, "grad_norm": 0.43546831607818604, "learning_rate": 8.033998890520031e-06, "loss": 0.373, "step": 20044 }, { "epoch": 0.9198751778257078, "grad_norm": 0.48160433769226074, "learning_rate": 8.033803998106443e-06, "loss": 0.3408, "step": 20045 }, { "epoch": 0.9199210683309623, "grad_norm": 0.46631836891174316, "learning_rate": 8.03360909839751e-06, "loss": 0.4566, "step": 20046 }, { "epoch": 0.9199669588362168, "grad_norm": 0.43506160378456116, "learning_rate": 8.033414191393706e-06, "loss": 0.3533, "step": 20047 }, { "epoch": 0.9200128493414712, "grad_norm": 0.4559778869152069, "learning_rate": 8.0332192770955e-06, "loss": 0.3432, "step": 20048 }, { "epoch": 0.9200587398467257, "grad_norm": 0.4768766760826111, "learning_rate": 8.033024355503358e-06, "loss": 0.4827, "step": 20049 }, { "epoch": 0.9201046303519802, "grad_norm": 0.44587239623069763, "learning_rate": 8.03282942661775e-06, "loss": 0.3481, "step": 20050 }, { "epoch": 0.9201505208572346, "grad_norm": 0.44141775369644165, "learning_rate": 8.032634490439145e-06, "loss": 0.3577, "step": 20051 }, { "epoch": 0.9201964113624891, "grad_norm": 0.4313547909259796, "learning_rate": 8.032439546968012e-06, "loss": 0.3614, "step": 20052 }, { "epoch": 0.9202423018677436, "grad_norm": 0.47053706645965576, "learning_rate": 8.032244596204817e-06, "loss": 0.4278, "step": 20053 }, { "epoch": 0.9202881923729981, "grad_norm": 0.39661288261413574, "learning_rate": 8.032049638150031e-06, "loss": 0.2892, "step": 20054 }, { "epoch": 0.9203340828782525, "grad_norm": 0.4752148687839508, "learning_rate": 8.031854672804122e-06, "loss": 0.4296, "step": 20055 }, { "epoch": 0.920379973383507, "grad_norm": 0.4749104082584381, "learning_rate": 8.03165970016756e-06, "loss": 0.4455, "step": 20056 }, { "epoch": 0.9204258638887615, "grad_norm": 0.4639680087566376, "learning_rate": 8.031464720240813e-06, "loss": 0.3953, "step": 20057 }, { "epoch": 0.9204717543940159, "grad_norm": 0.4204411804676056, "learning_rate": 8.031269733024351e-06, "loss": 0.3213, "step": 20058 }, { "epoch": 0.9205176448992703, "grad_norm": 0.45318472385406494, "learning_rate": 8.031074738518642e-06, "loss": 0.4161, "step": 20059 }, { "epoch": 0.9205635354045248, "grad_norm": 0.4550301134586334, "learning_rate": 8.030879736724155e-06, "loss": 0.4025, "step": 20060 }, { "epoch": 0.9206094259097792, "grad_norm": 0.42499274015426636, "learning_rate": 8.030684727641358e-06, "loss": 0.3417, "step": 20061 }, { "epoch": 0.9206553164150337, "grad_norm": 0.4087652266025543, "learning_rate": 8.030489711270722e-06, "loss": 0.2857, "step": 20062 }, { "epoch": 0.9207012069202882, "grad_norm": 0.4118112325668335, "learning_rate": 8.030294687612712e-06, "loss": 0.3055, "step": 20063 }, { "epoch": 0.9207470974255426, "grad_norm": 0.48875170946121216, "learning_rate": 8.030099656667802e-06, "loss": 0.5009, "step": 20064 }, { "epoch": 0.9207929879307971, "grad_norm": 0.4298875033855438, "learning_rate": 8.029904618436458e-06, "loss": 0.3868, "step": 20065 }, { "epoch": 0.9208388784360516, "grad_norm": 0.42632368206977844, "learning_rate": 8.02970957291915e-06, "loss": 0.3502, "step": 20066 }, { "epoch": 0.920884768941306, "grad_norm": 0.4784495532512665, "learning_rate": 8.029514520116346e-06, "loss": 0.4702, "step": 20067 }, { "epoch": 0.9209306594465605, "grad_norm": 0.44892582297325134, "learning_rate": 8.029319460028516e-06, "loss": 0.3774, "step": 20068 }, { "epoch": 0.920976549951815, "grad_norm": 0.4614852964878082, "learning_rate": 8.029124392656129e-06, "loss": 0.4035, "step": 20069 }, { "epoch": 0.9210224404570694, "grad_norm": 0.44602301716804504, "learning_rate": 8.028929317999653e-06, "loss": 0.3819, "step": 20070 }, { "epoch": 0.9210683309623239, "grad_norm": 0.47170859575271606, "learning_rate": 8.028734236059559e-06, "loss": 0.4034, "step": 20071 }, { "epoch": 0.9211142214675784, "grad_norm": 0.44395312666893005, "learning_rate": 8.028539146836315e-06, "loss": 0.3904, "step": 20072 }, { "epoch": 0.9211601119728329, "grad_norm": 0.4048980176448822, "learning_rate": 8.02834405033039e-06, "loss": 0.2981, "step": 20073 }, { "epoch": 0.9212060024780873, "grad_norm": 0.43619513511657715, "learning_rate": 8.028148946542254e-06, "loss": 0.3579, "step": 20074 }, { "epoch": 0.9212518929833418, "grad_norm": 0.46999847888946533, "learning_rate": 8.027953835472374e-06, "loss": 0.4417, "step": 20075 }, { "epoch": 0.9212977834885963, "grad_norm": 0.48393091559410095, "learning_rate": 8.027758717121222e-06, "loss": 0.4378, "step": 20076 }, { "epoch": 0.9213436739938506, "grad_norm": 0.47350654006004333, "learning_rate": 8.027563591489265e-06, "loss": 0.4778, "step": 20077 }, { "epoch": 0.9213895644991051, "grad_norm": 0.48437902331352234, "learning_rate": 8.027368458576971e-06, "loss": 0.4554, "step": 20078 }, { "epoch": 0.9214354550043596, "grad_norm": 0.43889960646629333, "learning_rate": 8.027173318384816e-06, "loss": 0.3562, "step": 20079 }, { "epoch": 0.921481345509614, "grad_norm": 0.4920620322227478, "learning_rate": 8.026978170913262e-06, "loss": 0.3858, "step": 20080 }, { "epoch": 0.9215272360148685, "grad_norm": 0.41910985112190247, "learning_rate": 8.02678301616278e-06, "loss": 0.3628, "step": 20081 }, { "epoch": 0.921573126520123, "grad_norm": 0.4516465961933136, "learning_rate": 8.026587854133841e-06, "loss": 0.4541, "step": 20082 }, { "epoch": 0.9216190170253774, "grad_norm": 0.45636186003685, "learning_rate": 8.026392684826913e-06, "loss": 0.4223, "step": 20083 }, { "epoch": 0.9216649075306319, "grad_norm": 0.4509398937225342, "learning_rate": 8.026197508242467e-06, "loss": 0.4182, "step": 20084 }, { "epoch": 0.9217107980358864, "grad_norm": 0.4412829875946045, "learning_rate": 8.026002324380972e-06, "loss": 0.4368, "step": 20085 }, { "epoch": 0.9217566885411408, "grad_norm": 0.44978055357933044, "learning_rate": 8.025807133242893e-06, "loss": 0.3996, "step": 20086 }, { "epoch": 0.9218025790463953, "grad_norm": 0.47315067052841187, "learning_rate": 8.025611934828705e-06, "loss": 0.4247, "step": 20087 }, { "epoch": 0.9218484695516498, "grad_norm": 0.4475835859775543, "learning_rate": 8.025416729138876e-06, "loss": 0.4324, "step": 20088 }, { "epoch": 0.9218943600569042, "grad_norm": 0.4375230669975281, "learning_rate": 8.025221516173873e-06, "loss": 0.3693, "step": 20089 }, { "epoch": 0.9219402505621587, "grad_norm": 0.4591505825519562, "learning_rate": 8.025026295934169e-06, "loss": 0.4283, "step": 20090 }, { "epoch": 0.9219861410674132, "grad_norm": 0.4266013503074646, "learning_rate": 8.02483106842023e-06, "loss": 0.343, "step": 20091 }, { "epoch": 0.9220320315726677, "grad_norm": 0.4743383526802063, "learning_rate": 8.024635833632525e-06, "loss": 0.3979, "step": 20092 }, { "epoch": 0.922077922077922, "grad_norm": 0.45974451303482056, "learning_rate": 8.024440591571529e-06, "loss": 0.4193, "step": 20093 }, { "epoch": 0.9221238125831765, "grad_norm": 0.475330114364624, "learning_rate": 8.024245342237706e-06, "loss": 0.429, "step": 20094 }, { "epoch": 0.922169703088431, "grad_norm": 0.48697683215141296, "learning_rate": 8.024050085631527e-06, "loss": 0.4344, "step": 20095 }, { "epoch": 0.9222155935936854, "grad_norm": 0.4553377330303192, "learning_rate": 8.023854821753464e-06, "loss": 0.4091, "step": 20096 }, { "epoch": 0.9222614840989399, "grad_norm": 0.4588093161582947, "learning_rate": 8.023659550603983e-06, "loss": 0.3754, "step": 20097 }, { "epoch": 0.9223073746041944, "grad_norm": 0.444650799036026, "learning_rate": 8.023464272183555e-06, "loss": 0.3891, "step": 20098 }, { "epoch": 0.9223532651094488, "grad_norm": 0.4421999156475067, "learning_rate": 8.02326898649265e-06, "loss": 0.3887, "step": 20099 }, { "epoch": 0.9223991556147033, "grad_norm": 0.4136192202568054, "learning_rate": 8.023073693531738e-06, "loss": 0.381, "step": 20100 }, { "epoch": 0.9224450461199578, "grad_norm": 0.4877961575984955, "learning_rate": 8.022878393301286e-06, "loss": 0.5001, "step": 20101 }, { "epoch": 0.9224909366252122, "grad_norm": 0.4695016145706177, "learning_rate": 8.022683085801768e-06, "loss": 0.4151, "step": 20102 }, { "epoch": 0.9225368271304667, "grad_norm": 0.42179787158966064, "learning_rate": 8.022487771033649e-06, "loss": 0.3761, "step": 20103 }, { "epoch": 0.9225827176357212, "grad_norm": 0.4642117917537689, "learning_rate": 8.022292448997402e-06, "loss": 0.4799, "step": 20104 }, { "epoch": 0.9226286081409756, "grad_norm": 0.437956303358078, "learning_rate": 8.022097119693495e-06, "loss": 0.3855, "step": 20105 }, { "epoch": 0.9226744986462301, "grad_norm": 0.46346476674079895, "learning_rate": 8.021901783122398e-06, "loss": 0.4583, "step": 20106 }, { "epoch": 0.9227203891514846, "grad_norm": 0.4763093590736389, "learning_rate": 8.021706439284583e-06, "loss": 0.5004, "step": 20107 }, { "epoch": 0.9227662796567391, "grad_norm": 0.4424879848957062, "learning_rate": 8.021511088180513e-06, "loss": 0.3886, "step": 20108 }, { "epoch": 0.9228121701619935, "grad_norm": 0.47741714119911194, "learning_rate": 8.021315729810667e-06, "loss": 0.425, "step": 20109 }, { "epoch": 0.922858060667248, "grad_norm": 0.4704485833644867, "learning_rate": 8.02112036417551e-06, "loss": 0.4276, "step": 20110 }, { "epoch": 0.9229039511725025, "grad_norm": 0.46119987964630127, "learning_rate": 8.02092499127551e-06, "loss": 0.3763, "step": 20111 }, { "epoch": 0.9229498416777568, "grad_norm": 0.429625004529953, "learning_rate": 8.02072961111114e-06, "loss": 0.3968, "step": 20112 }, { "epoch": 0.9229957321830113, "grad_norm": 0.453208863735199, "learning_rate": 8.020534223682867e-06, "loss": 0.4158, "step": 20113 }, { "epoch": 0.9230416226882658, "grad_norm": 0.44942331314086914, "learning_rate": 8.020338828991163e-06, "loss": 0.4078, "step": 20114 }, { "epoch": 0.9230875131935202, "grad_norm": 5.099334239959717, "learning_rate": 8.0201434270365e-06, "loss": 0.3706, "step": 20115 }, { "epoch": 0.9231334036987747, "grad_norm": 0.4599052667617798, "learning_rate": 8.019948017819343e-06, "loss": 0.3558, "step": 20116 }, { "epoch": 0.9231792942040292, "grad_norm": 0.4213622510433197, "learning_rate": 8.019752601340163e-06, "loss": 0.3486, "step": 20117 }, { "epoch": 0.9232251847092836, "grad_norm": 0.45478516817092896, "learning_rate": 8.019557177599434e-06, "loss": 0.4009, "step": 20118 }, { "epoch": 0.9232710752145381, "grad_norm": 0.46097317337989807, "learning_rate": 8.01936174659762e-06, "loss": 0.4205, "step": 20119 }, { "epoch": 0.9233169657197926, "grad_norm": 0.4721134603023529, "learning_rate": 8.019166308335195e-06, "loss": 0.4693, "step": 20120 }, { "epoch": 0.923362856225047, "grad_norm": 0.44674164056777954, "learning_rate": 8.01897086281263e-06, "loss": 0.3164, "step": 20121 }, { "epoch": 0.9234087467303015, "grad_norm": 0.4272751808166504, "learning_rate": 8.018775410030391e-06, "loss": 0.3643, "step": 20122 }, { "epoch": 0.923454637235556, "grad_norm": 0.4608440399169922, "learning_rate": 8.018579949988949e-06, "loss": 0.4397, "step": 20123 }, { "epoch": 0.9235005277408104, "grad_norm": 0.46902430057525635, "learning_rate": 8.018384482688776e-06, "loss": 0.3754, "step": 20124 }, { "epoch": 0.9235464182460649, "grad_norm": 0.4426458775997162, "learning_rate": 8.018189008130341e-06, "loss": 0.3738, "step": 20125 }, { "epoch": 0.9235923087513194, "grad_norm": 0.9838173985481262, "learning_rate": 8.017993526314114e-06, "loss": 0.4594, "step": 20126 }, { "epoch": 0.9236381992565739, "grad_norm": 0.4765073359012604, "learning_rate": 8.017798037240567e-06, "loss": 0.4034, "step": 20127 }, { "epoch": 0.9236840897618283, "grad_norm": 0.44089826941490173, "learning_rate": 8.017602540910166e-06, "loss": 0.3262, "step": 20128 }, { "epoch": 0.9237299802670828, "grad_norm": 0.4565177261829376, "learning_rate": 8.017407037323383e-06, "loss": 0.4016, "step": 20129 }, { "epoch": 0.9237758707723372, "grad_norm": 0.4744439423084259, "learning_rate": 8.017211526480688e-06, "loss": 0.4537, "step": 20130 }, { "epoch": 0.9238217612775916, "grad_norm": 0.48319289088249207, "learning_rate": 8.017016008382553e-06, "loss": 0.4483, "step": 20131 }, { "epoch": 0.9238676517828461, "grad_norm": 0.4439082145690918, "learning_rate": 8.016820483029446e-06, "loss": 0.4143, "step": 20132 }, { "epoch": 0.9239135422881006, "grad_norm": 0.43708109855651855, "learning_rate": 8.016624950421839e-06, "loss": 0.3739, "step": 20133 }, { "epoch": 0.923959432793355, "grad_norm": 0.4252750277519226, "learning_rate": 8.0164294105602e-06, "loss": 0.3337, "step": 20134 }, { "epoch": 0.9240053232986095, "grad_norm": 0.4561387002468109, "learning_rate": 8.016233863444998e-06, "loss": 0.388, "step": 20135 }, { "epoch": 0.924051213803864, "grad_norm": 0.4649335741996765, "learning_rate": 8.01603830907671e-06, "loss": 0.4489, "step": 20136 }, { "epoch": 0.9240971043091184, "grad_norm": 0.4832724928855896, "learning_rate": 8.015842747455798e-06, "loss": 0.4688, "step": 20137 }, { "epoch": 0.9241429948143729, "grad_norm": 0.405512273311615, "learning_rate": 8.01564717858274e-06, "loss": 0.3192, "step": 20138 }, { "epoch": 0.9241888853196274, "grad_norm": 0.44366398453712463, "learning_rate": 8.015451602457997e-06, "loss": 0.3836, "step": 20139 }, { "epoch": 0.9242347758248818, "grad_norm": 0.6866410374641418, "learning_rate": 8.015256019082049e-06, "loss": 0.4445, "step": 20140 }, { "epoch": 0.9242806663301363, "grad_norm": 0.4202505052089691, "learning_rate": 8.015060428455361e-06, "loss": 0.3179, "step": 20141 }, { "epoch": 0.9243265568353908, "grad_norm": 0.45955929160118103, "learning_rate": 8.014864830578404e-06, "loss": 0.4358, "step": 20142 }, { "epoch": 0.9243724473406453, "grad_norm": 0.47378605604171753, "learning_rate": 8.014669225451647e-06, "loss": 0.3847, "step": 20143 }, { "epoch": 0.9244183378458997, "grad_norm": 0.4619116187095642, "learning_rate": 8.014473613075565e-06, "loss": 0.4137, "step": 20144 }, { "epoch": 0.9244642283511542, "grad_norm": 0.4496588706970215, "learning_rate": 8.014277993450623e-06, "loss": 0.3871, "step": 20145 }, { "epoch": 0.9245101188564087, "grad_norm": 0.5027875900268555, "learning_rate": 8.014082366577293e-06, "loss": 0.4595, "step": 20146 }, { "epoch": 0.924556009361663, "grad_norm": 0.44417211413383484, "learning_rate": 8.013886732456049e-06, "loss": 0.3703, "step": 20147 }, { "epoch": 0.9246018998669175, "grad_norm": 0.45157408714294434, "learning_rate": 8.013691091087356e-06, "loss": 0.4061, "step": 20148 }, { "epoch": 0.924647790372172, "grad_norm": 0.44583213329315186, "learning_rate": 8.013495442471689e-06, "loss": 0.3976, "step": 20149 }, { "epoch": 0.9246936808774264, "grad_norm": 0.44881442189216614, "learning_rate": 8.013299786609517e-06, "loss": 0.4059, "step": 20150 }, { "epoch": 0.9247395713826809, "grad_norm": 0.4406236708164215, "learning_rate": 8.013104123501307e-06, "loss": 0.3763, "step": 20151 }, { "epoch": 0.9247854618879354, "grad_norm": 0.5094813108444214, "learning_rate": 8.012908453147534e-06, "loss": 0.5005, "step": 20152 }, { "epoch": 0.9248313523931898, "grad_norm": 0.42629730701446533, "learning_rate": 8.012712775548669e-06, "loss": 0.3677, "step": 20153 }, { "epoch": 0.9248772428984443, "grad_norm": 0.444350004196167, "learning_rate": 8.012517090705179e-06, "loss": 0.3918, "step": 20154 }, { "epoch": 0.9249231334036988, "grad_norm": 0.45564523339271545, "learning_rate": 8.012321398617536e-06, "loss": 0.3831, "step": 20155 }, { "epoch": 0.9249690239089532, "grad_norm": 0.5198468565940857, "learning_rate": 8.01212569928621e-06, "loss": 0.401, "step": 20156 }, { "epoch": 0.9250149144142077, "grad_norm": 0.4857330620288849, "learning_rate": 8.011929992711674e-06, "loss": 0.4181, "step": 20157 }, { "epoch": 0.9250608049194622, "grad_norm": 0.4150199890136719, "learning_rate": 8.011734278894396e-06, "loss": 0.3555, "step": 20158 }, { "epoch": 0.9251066954247166, "grad_norm": 0.5214964747428894, "learning_rate": 8.011538557834849e-06, "loss": 0.4432, "step": 20159 }, { "epoch": 0.9251525859299711, "grad_norm": 0.45616814494132996, "learning_rate": 8.0113428295335e-06, "loss": 0.3556, "step": 20160 }, { "epoch": 0.9251984764352256, "grad_norm": 0.45148035883903503, "learning_rate": 8.011147093990823e-06, "loss": 0.3517, "step": 20161 }, { "epoch": 0.9252443669404801, "grad_norm": 0.4500608742237091, "learning_rate": 8.010951351207288e-06, "loss": 0.4106, "step": 20162 }, { "epoch": 0.9252902574457345, "grad_norm": 0.4324566125869751, "learning_rate": 8.010755601183364e-06, "loss": 0.3378, "step": 20163 }, { "epoch": 0.925336147950989, "grad_norm": 0.4405294358730316, "learning_rate": 8.010559843919526e-06, "loss": 0.3646, "step": 20164 }, { "epoch": 0.9253820384562434, "grad_norm": 0.39832887053489685, "learning_rate": 8.010364079416239e-06, "loss": 0.2831, "step": 20165 }, { "epoch": 0.9254279289614978, "grad_norm": 0.5034353137016296, "learning_rate": 8.010168307673977e-06, "loss": 0.4928, "step": 20166 }, { "epoch": 0.9254738194667523, "grad_norm": 0.4509574770927429, "learning_rate": 8.00997252869321e-06, "loss": 0.4148, "step": 20167 }, { "epoch": 0.9255197099720068, "grad_norm": 0.4546881914138794, "learning_rate": 8.00977674247441e-06, "loss": 0.4365, "step": 20168 }, { "epoch": 0.9255656004772612, "grad_norm": 0.519939124584198, "learning_rate": 8.009580949018046e-06, "loss": 0.4972, "step": 20169 }, { "epoch": 0.9256114909825157, "grad_norm": 0.47314193844795227, "learning_rate": 8.00938514832459e-06, "loss": 0.4006, "step": 20170 }, { "epoch": 0.9256573814877702, "grad_norm": 0.45718103647232056, "learning_rate": 8.009189340394513e-06, "loss": 0.3915, "step": 20171 }, { "epoch": 0.9257032719930246, "grad_norm": 0.43417084217071533, "learning_rate": 8.008993525228283e-06, "loss": 0.3884, "step": 20172 }, { "epoch": 0.9257491624982791, "grad_norm": 0.43725332617759705, "learning_rate": 8.008797702826376e-06, "loss": 0.3967, "step": 20173 }, { "epoch": 0.9257950530035336, "grad_norm": 0.48367947340011597, "learning_rate": 8.00860187318926e-06, "loss": 0.4433, "step": 20174 }, { "epoch": 0.925840943508788, "grad_norm": 0.441753089427948, "learning_rate": 8.008406036317404e-06, "loss": 0.3638, "step": 20175 }, { "epoch": 0.9258868340140425, "grad_norm": 0.4716617465019226, "learning_rate": 8.008210192211283e-06, "loss": 0.4681, "step": 20176 }, { "epoch": 0.925932724519297, "grad_norm": 0.48167508840560913, "learning_rate": 8.008014340871364e-06, "loss": 0.4604, "step": 20177 }, { "epoch": 0.9259786150245514, "grad_norm": 0.452449232339859, "learning_rate": 8.007818482298121e-06, "loss": 0.3927, "step": 20178 }, { "epoch": 0.9260245055298059, "grad_norm": 0.46801790595054626, "learning_rate": 8.007622616492024e-06, "loss": 0.3827, "step": 20179 }, { "epoch": 0.9260703960350604, "grad_norm": 0.3981826603412628, "learning_rate": 8.007426743453544e-06, "loss": 0.3064, "step": 20180 }, { "epoch": 0.9261162865403149, "grad_norm": 0.47127172350883484, "learning_rate": 8.007230863183152e-06, "loss": 0.3514, "step": 20181 }, { "epoch": 0.9261621770455692, "grad_norm": 0.4735325276851654, "learning_rate": 8.007034975681319e-06, "loss": 0.4181, "step": 20182 }, { "epoch": 0.9262080675508237, "grad_norm": 0.47084641456604004, "learning_rate": 8.006839080948514e-06, "loss": 0.3889, "step": 20183 }, { "epoch": 0.9262539580560782, "grad_norm": 0.6653023362159729, "learning_rate": 8.006643178985212e-06, "loss": 0.3866, "step": 20184 }, { "epoch": 0.9262998485613326, "grad_norm": 0.451484352350235, "learning_rate": 8.006447269791881e-06, "loss": 0.3882, "step": 20185 }, { "epoch": 0.9263457390665871, "grad_norm": 0.45510998368263245, "learning_rate": 8.006251353368994e-06, "loss": 0.3988, "step": 20186 }, { "epoch": 0.9263916295718416, "grad_norm": 0.42842280864715576, "learning_rate": 8.00605542971702e-06, "loss": 0.3843, "step": 20187 }, { "epoch": 0.926437520077096, "grad_norm": 0.5064033269882202, "learning_rate": 8.005859498836432e-06, "loss": 0.4596, "step": 20188 }, { "epoch": 0.9264834105823505, "grad_norm": 0.456572026014328, "learning_rate": 8.005663560727702e-06, "loss": 0.4091, "step": 20189 }, { "epoch": 0.926529301087605, "grad_norm": 0.4626701772212982, "learning_rate": 8.005467615391298e-06, "loss": 0.4468, "step": 20190 }, { "epoch": 0.9265751915928594, "grad_norm": 0.49238666892051697, "learning_rate": 8.005271662827694e-06, "loss": 0.4446, "step": 20191 }, { "epoch": 0.9266210820981139, "grad_norm": 0.4442947208881378, "learning_rate": 8.005075703037359e-06, "loss": 0.4082, "step": 20192 }, { "epoch": 0.9266669726033684, "grad_norm": 0.4686109721660614, "learning_rate": 8.004879736020766e-06, "loss": 0.3892, "step": 20193 }, { "epoch": 0.9267128631086228, "grad_norm": 0.440669983625412, "learning_rate": 8.004683761778386e-06, "loss": 0.3981, "step": 20194 }, { "epoch": 0.9267587536138773, "grad_norm": 0.43945297598838806, "learning_rate": 8.00448778031069e-06, "loss": 0.3492, "step": 20195 }, { "epoch": 0.9268046441191318, "grad_norm": 0.4125298857688904, "learning_rate": 8.004291791618149e-06, "loss": 0.3223, "step": 20196 }, { "epoch": 0.9268505346243863, "grad_norm": 0.4818645119667053, "learning_rate": 8.004095795701233e-06, "loss": 0.3943, "step": 20197 }, { "epoch": 0.9268964251296407, "grad_norm": 0.4749382436275482, "learning_rate": 8.003899792560416e-06, "loss": 0.4584, "step": 20198 }, { "epoch": 0.9269423156348952, "grad_norm": 0.44277647137641907, "learning_rate": 8.003703782196169e-06, "loss": 0.3488, "step": 20199 }, { "epoch": 0.9269882061401497, "grad_norm": 0.4344618022441864, "learning_rate": 8.00350776460896e-06, "loss": 0.3582, "step": 20200 }, { "epoch": 0.927034096645404, "grad_norm": 0.4796612560749054, "learning_rate": 8.003311739799264e-06, "loss": 0.4864, "step": 20201 }, { "epoch": 0.9270799871506585, "grad_norm": 0.45990604162216187, "learning_rate": 8.003115707767553e-06, "loss": 0.3976, "step": 20202 }, { "epoch": 0.927125877655913, "grad_norm": 0.4325892925262451, "learning_rate": 8.002919668514294e-06, "loss": 0.3676, "step": 20203 }, { "epoch": 0.9271717681611674, "grad_norm": 0.4816102981567383, "learning_rate": 8.002723622039961e-06, "loss": 0.4824, "step": 20204 }, { "epoch": 0.9272176586664219, "grad_norm": 0.4213895797729492, "learning_rate": 8.002527568345027e-06, "loss": 0.3878, "step": 20205 }, { "epoch": 0.9272635491716764, "grad_norm": 0.4386197030544281, "learning_rate": 8.00233150742996e-06, "loss": 0.3727, "step": 20206 }, { "epoch": 0.9273094396769308, "grad_norm": 0.6181452870368958, "learning_rate": 8.002135439295234e-06, "loss": 0.4254, "step": 20207 }, { "epoch": 0.9273553301821853, "grad_norm": 0.4586374759674072, "learning_rate": 8.00193936394132e-06, "loss": 0.3798, "step": 20208 }, { "epoch": 0.9274012206874398, "grad_norm": 0.4967198371887207, "learning_rate": 8.00174328136869e-06, "loss": 0.5202, "step": 20209 }, { "epoch": 0.9274471111926942, "grad_norm": 0.45364734530448914, "learning_rate": 8.001547191577814e-06, "loss": 0.3352, "step": 20210 }, { "epoch": 0.9274930016979487, "grad_norm": 0.48298773169517517, "learning_rate": 8.001351094569164e-06, "loss": 0.4848, "step": 20211 }, { "epoch": 0.9275388922032032, "grad_norm": 0.43446314334869385, "learning_rate": 8.001154990343212e-06, "loss": 0.3639, "step": 20212 }, { "epoch": 0.9275847827084576, "grad_norm": 0.44874873757362366, "learning_rate": 8.00095887890043e-06, "loss": 0.4134, "step": 20213 }, { "epoch": 0.9276306732137121, "grad_norm": 0.481990784406662, "learning_rate": 8.000762760241287e-06, "loss": 0.4814, "step": 20214 }, { "epoch": 0.9276765637189666, "grad_norm": 0.48851269483566284, "learning_rate": 8.000566634366259e-06, "loss": 0.4551, "step": 20215 }, { "epoch": 0.9277224542242211, "grad_norm": 0.46863317489624023, "learning_rate": 8.000370501275814e-06, "loss": 0.3918, "step": 20216 }, { "epoch": 0.9277683447294754, "grad_norm": 0.4697883725166321, "learning_rate": 8.000174360970425e-06, "loss": 0.3421, "step": 20217 }, { "epoch": 0.92781423523473, "grad_norm": 0.4470983147621155, "learning_rate": 7.999978213450566e-06, "loss": 0.4035, "step": 20218 }, { "epoch": 0.9278601257399844, "grad_norm": 0.4085080921649933, "learning_rate": 7.999782058716703e-06, "loss": 0.3045, "step": 20219 }, { "epoch": 0.9279060162452388, "grad_norm": 0.4527370035648346, "learning_rate": 7.999585896769314e-06, "loss": 0.3876, "step": 20220 }, { "epoch": 0.9279519067504933, "grad_norm": 0.45354482531547546, "learning_rate": 7.999389727608864e-06, "loss": 0.4025, "step": 20221 }, { "epoch": 0.9279977972557478, "grad_norm": 0.4277035892009735, "learning_rate": 7.99919355123583e-06, "loss": 0.3524, "step": 20222 }, { "epoch": 0.9280436877610022, "grad_norm": 0.4565936326980591, "learning_rate": 7.998997367650683e-06, "loss": 0.4241, "step": 20223 }, { "epoch": 0.9280895782662567, "grad_norm": 0.45384135842323303, "learning_rate": 7.998801176853893e-06, "loss": 0.412, "step": 20224 }, { "epoch": 0.9281354687715112, "grad_norm": 0.424320787191391, "learning_rate": 7.998604978845932e-06, "loss": 0.3433, "step": 20225 }, { "epoch": 0.9281813592767656, "grad_norm": 0.43937960267066956, "learning_rate": 7.998408773627273e-06, "loss": 0.3589, "step": 20226 }, { "epoch": 0.9282272497820201, "grad_norm": 0.4242733120918274, "learning_rate": 7.998212561198386e-06, "loss": 0.3556, "step": 20227 }, { "epoch": 0.9282731402872746, "grad_norm": 0.4264681339263916, "learning_rate": 7.998016341559745e-06, "loss": 0.3627, "step": 20228 }, { "epoch": 0.928319030792529, "grad_norm": 0.44101452827453613, "learning_rate": 7.997820114711822e-06, "loss": 0.3785, "step": 20229 }, { "epoch": 0.9283649212977835, "grad_norm": 0.46124914288520813, "learning_rate": 7.997623880655087e-06, "loss": 0.4366, "step": 20230 }, { "epoch": 0.928410811803038, "grad_norm": 0.42966219782829285, "learning_rate": 7.997427639390011e-06, "loss": 0.3627, "step": 20231 }, { "epoch": 0.9284567023082925, "grad_norm": 0.43765339255332947, "learning_rate": 7.99723139091707e-06, "loss": 0.3926, "step": 20232 }, { "epoch": 0.9285025928135469, "grad_norm": 0.42160043120384216, "learning_rate": 7.997035135236732e-06, "loss": 0.2973, "step": 20233 }, { "epoch": 0.9285484833188014, "grad_norm": 0.42998433113098145, "learning_rate": 7.996838872349471e-06, "loss": 0.3295, "step": 20234 }, { "epoch": 0.9285943738240559, "grad_norm": 0.4393029510974884, "learning_rate": 7.996642602255759e-06, "loss": 0.3439, "step": 20235 }, { "epoch": 0.9286402643293102, "grad_norm": 0.47461357712745667, "learning_rate": 7.996446324956067e-06, "loss": 0.3829, "step": 20236 }, { "epoch": 0.9286861548345647, "grad_norm": 0.47021517157554626, "learning_rate": 7.996250040450866e-06, "loss": 0.4406, "step": 20237 }, { "epoch": 0.9287320453398192, "grad_norm": 0.4942142367362976, "learning_rate": 7.996053748740631e-06, "loss": 0.4161, "step": 20238 }, { "epoch": 0.9287779358450736, "grad_norm": 0.446237176656723, "learning_rate": 7.995857449825832e-06, "loss": 0.3393, "step": 20239 }, { "epoch": 0.9288238263503281, "grad_norm": 0.421702116727829, "learning_rate": 7.995661143706941e-06, "loss": 0.3168, "step": 20240 }, { "epoch": 0.9288697168555826, "grad_norm": 0.4544486999511719, "learning_rate": 7.99546483038443e-06, "loss": 0.4166, "step": 20241 }, { "epoch": 0.928915607360837, "grad_norm": 0.5076224207878113, "learning_rate": 7.99526850985877e-06, "loss": 0.4473, "step": 20242 }, { "epoch": 0.9289614978660915, "grad_norm": 0.39172518253326416, "learning_rate": 7.99507218213044e-06, "loss": 0.3092, "step": 20243 }, { "epoch": 0.929007388371346, "grad_norm": 0.44923603534698486, "learning_rate": 7.994875847199901e-06, "loss": 0.3855, "step": 20244 }, { "epoch": 0.9290532788766004, "grad_norm": 0.42723509669303894, "learning_rate": 7.994679505067633e-06, "loss": 0.364, "step": 20245 }, { "epoch": 0.9290991693818549, "grad_norm": 0.4429986774921417, "learning_rate": 7.994483155734105e-06, "loss": 0.4011, "step": 20246 }, { "epoch": 0.9291450598871094, "grad_norm": 0.46733662486076355, "learning_rate": 7.994286799199791e-06, "loss": 0.3823, "step": 20247 }, { "epoch": 0.9291909503923638, "grad_norm": 0.46118026971817017, "learning_rate": 7.994090435465162e-06, "loss": 0.3961, "step": 20248 }, { "epoch": 0.9292368408976183, "grad_norm": 0.44163617491722107, "learning_rate": 7.99389406453069e-06, "loss": 0.3762, "step": 20249 }, { "epoch": 0.9292827314028728, "grad_norm": 0.51311856508255, "learning_rate": 7.993697686396848e-06, "loss": 0.5183, "step": 20250 }, { "epoch": 0.9293286219081273, "grad_norm": 0.4374142289161682, "learning_rate": 7.993501301064109e-06, "loss": 0.3621, "step": 20251 }, { "epoch": 0.9293745124133816, "grad_norm": 0.42916128039360046, "learning_rate": 7.993304908532944e-06, "loss": 0.3733, "step": 20252 }, { "epoch": 0.9294204029186361, "grad_norm": 0.48459580540657043, "learning_rate": 7.993108508803825e-06, "loss": 0.4256, "step": 20253 }, { "epoch": 0.9294662934238906, "grad_norm": 0.42442217469215393, "learning_rate": 7.992912101877224e-06, "loss": 0.3325, "step": 20254 }, { "epoch": 0.929512183929145, "grad_norm": 0.45944973826408386, "learning_rate": 7.992715687753614e-06, "loss": 0.4378, "step": 20255 }, { "epoch": 0.9295580744343995, "grad_norm": 0.4844738841056824, "learning_rate": 7.992519266433467e-06, "loss": 0.4322, "step": 20256 }, { "epoch": 0.929603964939654, "grad_norm": 0.4237200617790222, "learning_rate": 7.992322837917256e-06, "loss": 0.3583, "step": 20257 }, { "epoch": 0.9296498554449084, "grad_norm": 0.47028473019599915, "learning_rate": 7.992126402205454e-06, "loss": 0.3961, "step": 20258 }, { "epoch": 0.9296957459501629, "grad_norm": 0.440351665019989, "learning_rate": 7.991929959298533e-06, "loss": 0.3921, "step": 20259 }, { "epoch": 0.9297416364554174, "grad_norm": 0.4935568571090698, "learning_rate": 7.991733509196962e-06, "loss": 0.4865, "step": 20260 }, { "epoch": 0.9297875269606718, "grad_norm": 0.49787211418151855, "learning_rate": 7.991537051901216e-06, "loss": 0.4678, "step": 20261 }, { "epoch": 0.9298334174659263, "grad_norm": 0.47834399342536926, "learning_rate": 7.99134058741177e-06, "loss": 0.4534, "step": 20262 }, { "epoch": 0.9298793079711808, "grad_norm": 0.46421730518341064, "learning_rate": 7.991144115729094e-06, "loss": 0.374, "step": 20263 }, { "epoch": 0.9299251984764352, "grad_norm": 0.4456886947154999, "learning_rate": 7.990947636853658e-06, "loss": 0.3795, "step": 20264 }, { "epoch": 0.9299710889816897, "grad_norm": 0.4817242920398712, "learning_rate": 7.990751150785938e-06, "loss": 0.4159, "step": 20265 }, { "epoch": 0.9300169794869442, "grad_norm": 0.474458783864975, "learning_rate": 7.990554657526407e-06, "loss": 0.4355, "step": 20266 }, { "epoch": 0.9300628699921986, "grad_norm": 0.4144432842731476, "learning_rate": 7.990358157075535e-06, "loss": 0.3368, "step": 20267 }, { "epoch": 0.9301087604974531, "grad_norm": 0.47062501311302185, "learning_rate": 7.990161649433795e-06, "loss": 0.4567, "step": 20268 }, { "epoch": 0.9301546510027076, "grad_norm": 0.6802839040756226, "learning_rate": 7.989965134601659e-06, "loss": 0.4008, "step": 20269 }, { "epoch": 0.930200541507962, "grad_norm": 0.44600000977516174, "learning_rate": 7.989768612579603e-06, "loss": 0.3317, "step": 20270 }, { "epoch": 0.9302464320132164, "grad_norm": 0.5261929631233215, "learning_rate": 7.989572083368095e-06, "loss": 0.4635, "step": 20271 }, { "epoch": 0.9302923225184709, "grad_norm": 0.4651545584201813, "learning_rate": 7.98937554696761e-06, "loss": 0.4539, "step": 20272 }, { "epoch": 0.9303382130237254, "grad_norm": 0.48982152342796326, "learning_rate": 7.98917900337862e-06, "loss": 0.4352, "step": 20273 }, { "epoch": 0.9303841035289798, "grad_norm": 0.4288163185119629, "learning_rate": 7.9889824526016e-06, "loss": 0.3159, "step": 20274 }, { "epoch": 0.9304299940342343, "grad_norm": 0.49071741104125977, "learning_rate": 7.988785894637018e-06, "loss": 0.4307, "step": 20275 }, { "epoch": 0.9304758845394888, "grad_norm": 0.4224623739719391, "learning_rate": 7.98858932948535e-06, "loss": 0.3389, "step": 20276 }, { "epoch": 0.9305217750447432, "grad_norm": 0.4941238462924957, "learning_rate": 7.988392757147067e-06, "loss": 0.5115, "step": 20277 }, { "epoch": 0.9305676655499977, "grad_norm": 0.5270593762397766, "learning_rate": 7.988196177622645e-06, "loss": 0.4703, "step": 20278 }, { "epoch": 0.9306135560552522, "grad_norm": 0.4579271376132965, "learning_rate": 7.987999590912552e-06, "loss": 0.3839, "step": 20279 }, { "epoch": 0.9306594465605066, "grad_norm": 0.45771870017051697, "learning_rate": 7.987802997017265e-06, "loss": 0.4263, "step": 20280 }, { "epoch": 0.9307053370657611, "grad_norm": 0.46832096576690674, "learning_rate": 7.987606395937253e-06, "loss": 0.4243, "step": 20281 }, { "epoch": 0.9307512275710156, "grad_norm": 0.4530988335609436, "learning_rate": 7.987409787672992e-06, "loss": 0.4262, "step": 20282 }, { "epoch": 0.93079711807627, "grad_norm": 0.47719812393188477, "learning_rate": 7.987213172224952e-06, "loss": 0.4251, "step": 20283 }, { "epoch": 0.9308430085815245, "grad_norm": 0.4545637369155884, "learning_rate": 7.987016549593608e-06, "loss": 0.3854, "step": 20284 }, { "epoch": 0.930888899086779, "grad_norm": 0.4217701852321625, "learning_rate": 7.986819919779432e-06, "loss": 0.3313, "step": 20285 }, { "epoch": 0.9309347895920335, "grad_norm": 0.4428486227989197, "learning_rate": 7.986623282782897e-06, "loss": 0.4072, "step": 20286 }, { "epoch": 0.9309806800972878, "grad_norm": 0.5097830891609192, "learning_rate": 7.986426638604475e-06, "loss": 0.3917, "step": 20287 }, { "epoch": 0.9310265706025423, "grad_norm": 0.4565395414829254, "learning_rate": 7.98622998724464e-06, "loss": 0.3843, "step": 20288 }, { "epoch": 0.9310724611077968, "grad_norm": 0.4487934112548828, "learning_rate": 7.986033328703864e-06, "loss": 0.3677, "step": 20289 }, { "epoch": 0.9311183516130512, "grad_norm": 0.47644639015197754, "learning_rate": 7.98583666298262e-06, "loss": 0.3911, "step": 20290 }, { "epoch": 0.9311642421183057, "grad_norm": 0.4260871112346649, "learning_rate": 7.98563999008138e-06, "loss": 0.3829, "step": 20291 }, { "epoch": 0.9312101326235602, "grad_norm": 0.41332828998565674, "learning_rate": 7.98544331000062e-06, "loss": 0.3215, "step": 20292 }, { "epoch": 0.9312560231288146, "grad_norm": 0.42774856090545654, "learning_rate": 7.98524662274081e-06, "loss": 0.3751, "step": 20293 }, { "epoch": 0.9313019136340691, "grad_norm": 0.4162341356277466, "learning_rate": 7.985049928302426e-06, "loss": 0.3333, "step": 20294 }, { "epoch": 0.9313478041393236, "grad_norm": 0.43373873829841614, "learning_rate": 7.984853226685936e-06, "loss": 0.345, "step": 20295 }, { "epoch": 0.931393694644578, "grad_norm": 0.4557092785835266, "learning_rate": 7.984656517891819e-06, "loss": 0.3962, "step": 20296 }, { "epoch": 0.9314395851498325, "grad_norm": 0.45525923371315, "learning_rate": 7.984459801920542e-06, "loss": 0.3868, "step": 20297 }, { "epoch": 0.931485475655087, "grad_norm": 0.44930750131607056, "learning_rate": 7.984263078772583e-06, "loss": 0.3625, "step": 20298 }, { "epoch": 0.9315313661603414, "grad_norm": 0.4670160114765167, "learning_rate": 7.984066348448413e-06, "loss": 0.4444, "step": 20299 }, { "epoch": 0.9315772566655959, "grad_norm": 0.47160497307777405, "learning_rate": 7.983869610948505e-06, "loss": 0.3931, "step": 20300 }, { "epoch": 0.9316231471708504, "grad_norm": 0.4703989028930664, "learning_rate": 7.983672866273332e-06, "loss": 0.4068, "step": 20301 }, { "epoch": 0.9316690376761048, "grad_norm": 0.4948856830596924, "learning_rate": 7.98347611442337e-06, "loss": 0.3906, "step": 20302 }, { "epoch": 0.9317149281813593, "grad_norm": 0.4895839989185333, "learning_rate": 7.983279355399088e-06, "loss": 0.4743, "step": 20303 }, { "epoch": 0.9317608186866138, "grad_norm": 0.47057029604911804, "learning_rate": 7.983082589200959e-06, "loss": 0.3756, "step": 20304 }, { "epoch": 0.9318067091918683, "grad_norm": 0.4436322748661041, "learning_rate": 7.98288581582946e-06, "loss": 0.3742, "step": 20305 }, { "epoch": 0.9318525996971226, "grad_norm": 0.4512331485748291, "learning_rate": 7.98268903528506e-06, "loss": 0.3841, "step": 20306 }, { "epoch": 0.9318984902023771, "grad_norm": 0.4703245162963867, "learning_rate": 7.982492247568233e-06, "loss": 0.3756, "step": 20307 }, { "epoch": 0.9319443807076316, "grad_norm": 0.46462470293045044, "learning_rate": 7.982295452679458e-06, "loss": 0.4346, "step": 20308 }, { "epoch": 0.931990271212886, "grad_norm": 0.45305514335632324, "learning_rate": 7.9820986506192e-06, "loss": 0.3817, "step": 20309 }, { "epoch": 0.9320361617181405, "grad_norm": 0.4430617094039917, "learning_rate": 7.981901841387939e-06, "loss": 0.3965, "step": 20310 }, { "epoch": 0.932082052223395, "grad_norm": 0.43959179520606995, "learning_rate": 7.981705024986144e-06, "loss": 0.3835, "step": 20311 }, { "epoch": 0.9321279427286494, "grad_norm": 0.43895307183265686, "learning_rate": 7.981508201414287e-06, "loss": 0.353, "step": 20312 }, { "epoch": 0.9321738332339039, "grad_norm": 0.4520552158355713, "learning_rate": 7.981311370672847e-06, "loss": 0.3773, "step": 20313 }, { "epoch": 0.9322197237391584, "grad_norm": 0.7275726795196533, "learning_rate": 7.981114532762293e-06, "loss": 0.3479, "step": 20314 }, { "epoch": 0.9322656142444128, "grad_norm": 0.43414735794067383, "learning_rate": 7.980917687683099e-06, "loss": 0.3737, "step": 20315 }, { "epoch": 0.9323115047496673, "grad_norm": 0.4539187252521515, "learning_rate": 7.980720835435739e-06, "loss": 0.4287, "step": 20316 }, { "epoch": 0.9323573952549218, "grad_norm": 0.467336505651474, "learning_rate": 7.980523976020687e-06, "loss": 0.419, "step": 20317 }, { "epoch": 0.9324032857601762, "grad_norm": 0.46483537554740906, "learning_rate": 7.980327109438415e-06, "loss": 0.4167, "step": 20318 }, { "epoch": 0.9324491762654307, "grad_norm": 0.4978630244731903, "learning_rate": 7.980130235689396e-06, "loss": 0.4392, "step": 20319 }, { "epoch": 0.9324950667706852, "grad_norm": 0.4810475707054138, "learning_rate": 7.979933354774105e-06, "loss": 0.4129, "step": 20320 }, { "epoch": 0.9325409572759396, "grad_norm": 0.4841994345188141, "learning_rate": 7.979736466693015e-06, "loss": 0.4371, "step": 20321 }, { "epoch": 0.932586847781194, "grad_norm": 0.5075539946556091, "learning_rate": 7.979539571446599e-06, "loss": 0.4438, "step": 20322 }, { "epoch": 0.9326327382864485, "grad_norm": 0.4406054615974426, "learning_rate": 7.979342669035331e-06, "loss": 0.4068, "step": 20323 }, { "epoch": 0.932678628791703, "grad_norm": 0.4545266926288605, "learning_rate": 7.979145759459684e-06, "loss": 0.4097, "step": 20324 }, { "epoch": 0.9327245192969574, "grad_norm": 0.43244898319244385, "learning_rate": 7.97894884272013e-06, "loss": 0.3541, "step": 20325 }, { "epoch": 0.9327704098022119, "grad_norm": 0.44165661931037903, "learning_rate": 7.978751918817146e-06, "loss": 0.386, "step": 20326 }, { "epoch": 0.9328163003074664, "grad_norm": 0.4612163305282593, "learning_rate": 7.978554987751204e-06, "loss": 0.418, "step": 20327 }, { "epoch": 0.9328621908127208, "grad_norm": 0.4388722777366638, "learning_rate": 7.978358049522777e-06, "loss": 0.3154, "step": 20328 }, { "epoch": 0.9329080813179753, "grad_norm": 0.43616822361946106, "learning_rate": 7.978161104132338e-06, "loss": 0.3794, "step": 20329 }, { "epoch": 0.9329539718232298, "grad_norm": 0.4512796998023987, "learning_rate": 7.977964151580361e-06, "loss": 0.3774, "step": 20330 }, { "epoch": 0.9329998623284842, "grad_norm": 0.4513742923736572, "learning_rate": 7.977767191867323e-06, "loss": 0.4075, "step": 20331 }, { "epoch": 0.9330457528337387, "grad_norm": 0.4227915108203888, "learning_rate": 7.977570224993693e-06, "loss": 0.4004, "step": 20332 }, { "epoch": 0.9330916433389932, "grad_norm": 0.4252769649028778, "learning_rate": 7.977373250959947e-06, "loss": 0.3662, "step": 20333 }, { "epoch": 0.9331375338442476, "grad_norm": 0.44232627749443054, "learning_rate": 7.977176269766556e-06, "loss": 0.3664, "step": 20334 }, { "epoch": 0.9331834243495021, "grad_norm": 0.4657588303089142, "learning_rate": 7.976979281413997e-06, "loss": 0.4052, "step": 20335 }, { "epoch": 0.9332293148547566, "grad_norm": 0.4564688801765442, "learning_rate": 7.976782285902743e-06, "loss": 0.4025, "step": 20336 }, { "epoch": 0.933275205360011, "grad_norm": 0.41343116760253906, "learning_rate": 7.976585283233267e-06, "loss": 0.3549, "step": 20337 }, { "epoch": 0.9333210958652655, "grad_norm": 0.43240121006965637, "learning_rate": 7.976388273406041e-06, "loss": 0.362, "step": 20338 }, { "epoch": 0.93336698637052, "grad_norm": 0.4117271900177002, "learning_rate": 7.976191256421544e-06, "loss": 0.3331, "step": 20339 }, { "epoch": 0.9334128768757745, "grad_norm": 0.431355744600296, "learning_rate": 7.975994232280242e-06, "loss": 0.331, "step": 20340 }, { "epoch": 0.9334587673810288, "grad_norm": 0.4309585392475128, "learning_rate": 7.975797200982617e-06, "loss": 0.3847, "step": 20341 }, { "epoch": 0.9335046578862833, "grad_norm": 0.4521945118904114, "learning_rate": 7.975600162529138e-06, "loss": 0.3793, "step": 20342 }, { "epoch": 0.9335505483915378, "grad_norm": 0.42503949999809265, "learning_rate": 7.975403116920279e-06, "loss": 0.35, "step": 20343 }, { "epoch": 0.9335964388967922, "grad_norm": 0.4731177091598511, "learning_rate": 7.975206064156512e-06, "loss": 0.4491, "step": 20344 }, { "epoch": 0.9336423294020467, "grad_norm": 0.5135696530342102, "learning_rate": 7.97500900423832e-06, "loss": 0.3289, "step": 20345 }, { "epoch": 0.9336882199073012, "grad_norm": 0.42334139347076416, "learning_rate": 7.974811937166164e-06, "loss": 0.3227, "step": 20346 }, { "epoch": 0.9337341104125556, "grad_norm": 0.40354225039482117, "learning_rate": 7.974614862940526e-06, "loss": 0.2903, "step": 20347 }, { "epoch": 0.9337800009178101, "grad_norm": 0.4627375602722168, "learning_rate": 7.974417781561879e-06, "loss": 0.4055, "step": 20348 }, { "epoch": 0.9338258914230646, "grad_norm": 0.44367659091949463, "learning_rate": 7.974220693030696e-06, "loss": 0.3758, "step": 20349 }, { "epoch": 0.933871781928319, "grad_norm": 0.4567160904407501, "learning_rate": 7.974023597347451e-06, "loss": 0.3798, "step": 20350 }, { "epoch": 0.9339176724335735, "grad_norm": 0.451743483543396, "learning_rate": 7.973826494512618e-06, "loss": 0.4048, "step": 20351 }, { "epoch": 0.933963562938828, "grad_norm": 0.48589593172073364, "learning_rate": 7.973629384526668e-06, "loss": 0.38, "step": 20352 }, { "epoch": 0.9340094534440824, "grad_norm": 0.46221789717674255, "learning_rate": 7.973432267390082e-06, "loss": 0.4306, "step": 20353 }, { "epoch": 0.9340553439493369, "grad_norm": 0.5218206644058228, "learning_rate": 7.973235143103327e-06, "loss": 0.5291, "step": 20354 }, { "epoch": 0.9341012344545914, "grad_norm": 0.44337570667266846, "learning_rate": 7.97303801166688e-06, "loss": 0.4109, "step": 20355 }, { "epoch": 0.9341471249598458, "grad_norm": 0.41345709562301636, "learning_rate": 7.972840873081217e-06, "loss": 0.3393, "step": 20356 }, { "epoch": 0.9341930154651003, "grad_norm": 0.4655740559101105, "learning_rate": 7.972643727346809e-06, "loss": 0.4, "step": 20357 }, { "epoch": 0.9342389059703547, "grad_norm": 0.5042961835861206, "learning_rate": 7.97244657446413e-06, "loss": 0.4546, "step": 20358 }, { "epoch": 0.9342847964756092, "grad_norm": 0.43266043066978455, "learning_rate": 7.972249414433657e-06, "loss": 0.3475, "step": 20359 }, { "epoch": 0.9343306869808636, "grad_norm": 0.4629168212413788, "learning_rate": 7.97205224725586e-06, "loss": 0.3716, "step": 20360 }, { "epoch": 0.9343765774861181, "grad_norm": 0.4277596175670624, "learning_rate": 7.971855072931217e-06, "loss": 0.376, "step": 20361 }, { "epoch": 0.9344224679913726, "grad_norm": 0.4335121512413025, "learning_rate": 7.971657891460199e-06, "loss": 0.3687, "step": 20362 }, { "epoch": 0.934468358496627, "grad_norm": 0.4694441556930542, "learning_rate": 7.971460702843283e-06, "loss": 0.4486, "step": 20363 }, { "epoch": 0.9345142490018815, "grad_norm": 0.4622114300727844, "learning_rate": 7.97126350708094e-06, "loss": 0.4866, "step": 20364 }, { "epoch": 0.934560139507136, "grad_norm": 0.4361158609390259, "learning_rate": 7.971066304173649e-06, "loss": 0.3551, "step": 20365 }, { "epoch": 0.9346060300123904, "grad_norm": 0.47331929206848145, "learning_rate": 7.970869094121878e-06, "loss": 0.4246, "step": 20366 }, { "epoch": 0.9346519205176449, "grad_norm": 0.4339955151081085, "learning_rate": 7.970671876926108e-06, "loss": 0.3374, "step": 20367 }, { "epoch": 0.9346978110228994, "grad_norm": 0.4230981171131134, "learning_rate": 7.970474652586807e-06, "loss": 0.3337, "step": 20368 }, { "epoch": 0.9347437015281538, "grad_norm": 0.48214393854141235, "learning_rate": 7.970277421104452e-06, "loss": 0.4184, "step": 20369 }, { "epoch": 0.9347895920334083, "grad_norm": 0.4599495232105255, "learning_rate": 7.970080182479519e-06, "loss": 0.3888, "step": 20370 }, { "epoch": 0.9348354825386628, "grad_norm": 0.45671120285987854, "learning_rate": 7.969882936712478e-06, "loss": 0.3895, "step": 20371 }, { "epoch": 0.9348813730439172, "grad_norm": 0.4504025876522064, "learning_rate": 7.969685683803808e-06, "loss": 0.4116, "step": 20372 }, { "epoch": 0.9349272635491717, "grad_norm": 0.4363076388835907, "learning_rate": 7.969488423753981e-06, "loss": 0.3894, "step": 20373 }, { "epoch": 0.9349731540544262, "grad_norm": 0.43687349557876587, "learning_rate": 7.96929115656347e-06, "loss": 0.4082, "step": 20374 }, { "epoch": 0.9350190445596807, "grad_norm": 0.43681254982948303, "learning_rate": 7.969093882232752e-06, "loss": 0.3989, "step": 20375 }, { "epoch": 0.935064935064935, "grad_norm": 0.4616544842720032, "learning_rate": 7.968896600762302e-06, "loss": 0.4028, "step": 20376 }, { "epoch": 0.9351108255701895, "grad_norm": 0.43930792808532715, "learning_rate": 7.968699312152591e-06, "loss": 0.3782, "step": 20377 }, { "epoch": 0.935156716075444, "grad_norm": 0.44546934962272644, "learning_rate": 7.968502016404095e-06, "loss": 0.3732, "step": 20378 }, { "epoch": 0.9352026065806984, "grad_norm": 0.44198182225227356, "learning_rate": 7.968304713517287e-06, "loss": 0.3757, "step": 20379 }, { "epoch": 0.9352484970859529, "grad_norm": 0.4190891683101654, "learning_rate": 7.968107403492645e-06, "loss": 0.3177, "step": 20380 }, { "epoch": 0.9352943875912074, "grad_norm": 0.46070799231529236, "learning_rate": 7.967910086330641e-06, "loss": 0.3991, "step": 20381 }, { "epoch": 0.9353402780964618, "grad_norm": 0.42522281408309937, "learning_rate": 7.96771276203175e-06, "loss": 0.3237, "step": 20382 }, { "epoch": 0.9353861686017163, "grad_norm": 0.45431822538375854, "learning_rate": 7.967515430596447e-06, "loss": 0.3971, "step": 20383 }, { "epoch": 0.9354320591069708, "grad_norm": 0.44196534156799316, "learning_rate": 7.967318092025205e-06, "loss": 0.4057, "step": 20384 }, { "epoch": 0.9354779496122252, "grad_norm": 0.42814087867736816, "learning_rate": 7.967120746318499e-06, "loss": 0.3866, "step": 20385 }, { "epoch": 0.9355238401174797, "grad_norm": 0.4517696797847748, "learning_rate": 7.966923393476807e-06, "loss": 0.3823, "step": 20386 }, { "epoch": 0.9355697306227342, "grad_norm": 0.4592064917087555, "learning_rate": 7.966726033500598e-06, "loss": 0.3588, "step": 20387 }, { "epoch": 0.9356156211279886, "grad_norm": 0.4304770231246948, "learning_rate": 7.96652866639035e-06, "loss": 0.4023, "step": 20388 }, { "epoch": 0.9356615116332431, "grad_norm": 0.4599981904029846, "learning_rate": 7.966331292146536e-06, "loss": 0.4582, "step": 20389 }, { "epoch": 0.9357074021384976, "grad_norm": 0.42678970098495483, "learning_rate": 7.966133910769634e-06, "loss": 0.3494, "step": 20390 }, { "epoch": 0.935753292643752, "grad_norm": 0.4330138564109802, "learning_rate": 7.965936522260113e-06, "loss": 0.3165, "step": 20391 }, { "epoch": 0.9357991831490065, "grad_norm": 0.42991411685943604, "learning_rate": 7.965739126618451e-06, "loss": 0.3481, "step": 20392 }, { "epoch": 0.935845073654261, "grad_norm": 0.4437566101551056, "learning_rate": 7.965541723845125e-06, "loss": 0.4191, "step": 20393 }, { "epoch": 0.9358909641595154, "grad_norm": 0.49238455295562744, "learning_rate": 7.965344313940604e-06, "loss": 0.4834, "step": 20394 }, { "epoch": 0.9359368546647698, "grad_norm": 0.445453941822052, "learning_rate": 7.965146896905366e-06, "loss": 0.369, "step": 20395 }, { "epoch": 0.9359827451700243, "grad_norm": 0.5172765851020813, "learning_rate": 7.964949472739889e-06, "loss": 0.5219, "step": 20396 }, { "epoch": 0.9360286356752788, "grad_norm": 0.423674613237381, "learning_rate": 7.964752041444642e-06, "loss": 0.3265, "step": 20397 }, { "epoch": 0.9360745261805332, "grad_norm": 0.5061588287353516, "learning_rate": 7.9645546030201e-06, "loss": 0.4757, "step": 20398 }, { "epoch": 0.9361204166857877, "grad_norm": 0.47754406929016113, "learning_rate": 7.964357157466744e-06, "loss": 0.4947, "step": 20399 }, { "epoch": 0.9361663071910422, "grad_norm": 0.4733749330043793, "learning_rate": 7.964159704785041e-06, "loss": 0.4653, "step": 20400 }, { "epoch": 0.9362121976962966, "grad_norm": 0.456330806016922, "learning_rate": 7.963962244975472e-06, "loss": 0.3753, "step": 20401 }, { "epoch": 0.9362580882015511, "grad_norm": 0.4928845465183258, "learning_rate": 7.963764778038507e-06, "loss": 0.4028, "step": 20402 }, { "epoch": 0.9363039787068056, "grad_norm": 0.45592936873435974, "learning_rate": 7.963567303974625e-06, "loss": 0.3488, "step": 20403 }, { "epoch": 0.93634986921206, "grad_norm": 0.4464655816555023, "learning_rate": 7.963369822784299e-06, "loss": 0.3111, "step": 20404 }, { "epoch": 0.9363957597173145, "grad_norm": 0.4612474739551544, "learning_rate": 7.963172334468004e-06, "loss": 0.3762, "step": 20405 }, { "epoch": 0.936441650222569, "grad_norm": 0.4287279546260834, "learning_rate": 7.962974839026211e-06, "loss": 0.3699, "step": 20406 }, { "epoch": 0.9364875407278234, "grad_norm": 0.49074408411979675, "learning_rate": 7.962777336459405e-06, "loss": 0.4481, "step": 20407 }, { "epoch": 0.9365334312330779, "grad_norm": 0.4766649603843689, "learning_rate": 7.962579826768048e-06, "loss": 0.4713, "step": 20408 }, { "epoch": 0.9365793217383324, "grad_norm": 0.46244728565216064, "learning_rate": 7.962382309952626e-06, "loss": 0.4029, "step": 20409 }, { "epoch": 0.9366252122435867, "grad_norm": 0.43920326232910156, "learning_rate": 7.962184786013608e-06, "loss": 0.3638, "step": 20410 }, { "epoch": 0.9366711027488412, "grad_norm": 0.45780420303344727, "learning_rate": 7.961987254951471e-06, "loss": 0.3586, "step": 20411 }, { "epoch": 0.9367169932540957, "grad_norm": 0.45953822135925293, "learning_rate": 7.961789716766689e-06, "loss": 0.3774, "step": 20412 }, { "epoch": 0.9367628837593502, "grad_norm": 0.4686456322669983, "learning_rate": 7.961592171459738e-06, "loss": 0.4109, "step": 20413 }, { "epoch": 0.9368087742646046, "grad_norm": 0.45339253544807434, "learning_rate": 7.96139461903109e-06, "loss": 0.4045, "step": 20414 }, { "epoch": 0.9368546647698591, "grad_norm": 0.4976917803287506, "learning_rate": 7.961197059481228e-06, "loss": 0.5062, "step": 20415 }, { "epoch": 0.9369005552751136, "grad_norm": 0.4211575388908386, "learning_rate": 7.960999492810617e-06, "loss": 0.3706, "step": 20416 }, { "epoch": 0.936946445780368, "grad_norm": 0.48366421461105347, "learning_rate": 7.960801919019738e-06, "loss": 0.4443, "step": 20417 }, { "epoch": 0.9369923362856225, "grad_norm": 0.43069833517074585, "learning_rate": 7.960604338109066e-06, "loss": 0.3553, "step": 20418 }, { "epoch": 0.937038226790877, "grad_norm": 0.6587809920310974, "learning_rate": 7.960406750079073e-06, "loss": 0.4857, "step": 20419 }, { "epoch": 0.9370841172961314, "grad_norm": 0.44759494066238403, "learning_rate": 7.960209154930236e-06, "loss": 0.4094, "step": 20420 }, { "epoch": 0.9371300078013859, "grad_norm": 0.44655126333236694, "learning_rate": 7.960011552663032e-06, "loss": 0.3584, "step": 20421 }, { "epoch": 0.9371758983066404, "grad_norm": 0.4556111693382263, "learning_rate": 7.959813943277933e-06, "loss": 0.4029, "step": 20422 }, { "epoch": 0.9372217888118948, "grad_norm": 0.4245489239692688, "learning_rate": 7.959616326775417e-06, "loss": 0.3837, "step": 20423 }, { "epoch": 0.9372676793171493, "grad_norm": 0.44670405983924866, "learning_rate": 7.959418703155956e-06, "loss": 0.4045, "step": 20424 }, { "epoch": 0.9373135698224038, "grad_norm": 0.4947430491447449, "learning_rate": 7.959221072420029e-06, "loss": 0.4641, "step": 20425 }, { "epoch": 0.9373594603276582, "grad_norm": 0.45807018876075745, "learning_rate": 7.959023434568108e-06, "loss": 0.4043, "step": 20426 }, { "epoch": 0.9374053508329127, "grad_norm": 0.4390721619129181, "learning_rate": 7.95882578960067e-06, "loss": 0.3856, "step": 20427 }, { "epoch": 0.9374512413381672, "grad_norm": 0.44408053159713745, "learning_rate": 7.958628137518191e-06, "loss": 0.3847, "step": 20428 }, { "epoch": 0.9374971318434216, "grad_norm": 0.4389475882053375, "learning_rate": 7.958430478321144e-06, "loss": 0.3983, "step": 20429 }, { "epoch": 0.937543022348676, "grad_norm": 0.45934879779815674, "learning_rate": 7.958232812010005e-06, "loss": 0.424, "step": 20430 }, { "epoch": 0.9375889128539305, "grad_norm": 0.4600032567977905, "learning_rate": 7.958035138585249e-06, "loss": 0.4207, "step": 20431 }, { "epoch": 0.937634803359185, "grad_norm": 0.4573203921318054, "learning_rate": 7.957837458047354e-06, "loss": 0.3927, "step": 20432 }, { "epoch": 0.9376806938644394, "grad_norm": 0.4376120865345001, "learning_rate": 7.957639770396792e-06, "loss": 0.37, "step": 20433 }, { "epoch": 0.9377265843696939, "grad_norm": 0.4864038825035095, "learning_rate": 7.957442075634041e-06, "loss": 0.5003, "step": 20434 }, { "epoch": 0.9377724748749484, "grad_norm": 0.4329606592655182, "learning_rate": 7.957244373759574e-06, "loss": 0.3318, "step": 20435 }, { "epoch": 0.9378183653802028, "grad_norm": 0.4388364851474762, "learning_rate": 7.957046664773869e-06, "loss": 0.3154, "step": 20436 }, { "epoch": 0.9378642558854573, "grad_norm": 0.4855524003505707, "learning_rate": 7.956848948677398e-06, "loss": 0.4364, "step": 20437 }, { "epoch": 0.9379101463907118, "grad_norm": 0.47706377506256104, "learning_rate": 7.95665122547064e-06, "loss": 0.4316, "step": 20438 }, { "epoch": 0.9379560368959662, "grad_norm": 0.4241121709346771, "learning_rate": 7.956453495154069e-06, "loss": 0.3421, "step": 20439 }, { "epoch": 0.9380019274012207, "grad_norm": 0.48005160689353943, "learning_rate": 7.95625575772816e-06, "loss": 0.4906, "step": 20440 }, { "epoch": 0.9380478179064752, "grad_norm": 0.4619748592376709, "learning_rate": 7.956058013193389e-06, "loss": 0.3642, "step": 20441 }, { "epoch": 0.9380937084117296, "grad_norm": 0.47387444972991943, "learning_rate": 7.955860261550232e-06, "loss": 0.3681, "step": 20442 }, { "epoch": 0.9381395989169841, "grad_norm": 0.4567619562149048, "learning_rate": 7.955662502799163e-06, "loss": 0.3738, "step": 20443 }, { "epoch": 0.9381854894222386, "grad_norm": 0.4508633017539978, "learning_rate": 7.955464736940658e-06, "loss": 0.4289, "step": 20444 }, { "epoch": 0.938231379927493, "grad_norm": 0.5032143592834473, "learning_rate": 7.955266963975194e-06, "loss": 0.4889, "step": 20445 }, { "epoch": 0.9382772704327474, "grad_norm": 0.4434879422187805, "learning_rate": 7.955069183903246e-06, "loss": 0.3607, "step": 20446 }, { "epoch": 0.9383231609380019, "grad_norm": 0.4410766065120697, "learning_rate": 7.954871396725289e-06, "loss": 0.3437, "step": 20447 }, { "epoch": 0.9383690514432564, "grad_norm": 0.45540422201156616, "learning_rate": 7.954673602441799e-06, "loss": 0.4294, "step": 20448 }, { "epoch": 0.9384149419485108, "grad_norm": 0.44661667943000793, "learning_rate": 7.954475801053251e-06, "loss": 0.3385, "step": 20449 }, { "epoch": 0.9384608324537653, "grad_norm": 0.4406035542488098, "learning_rate": 7.954277992560123e-06, "loss": 0.3618, "step": 20450 }, { "epoch": 0.9385067229590198, "grad_norm": 0.4627743363380432, "learning_rate": 7.954080176962885e-06, "loss": 0.4299, "step": 20451 }, { "epoch": 0.9385526134642742, "grad_norm": 0.4619785249233246, "learning_rate": 7.953882354262019e-06, "loss": 0.4026, "step": 20452 }, { "epoch": 0.9385985039695287, "grad_norm": 0.4931475818157196, "learning_rate": 7.953684524457999e-06, "loss": 0.4647, "step": 20453 }, { "epoch": 0.9386443944747832, "grad_norm": 0.48674526810646057, "learning_rate": 7.953486687551298e-06, "loss": 0.4789, "step": 20454 }, { "epoch": 0.9386902849800376, "grad_norm": 0.4216320216655731, "learning_rate": 7.953288843542395e-06, "loss": 0.3546, "step": 20455 }, { "epoch": 0.9387361754852921, "grad_norm": 0.46260204911231995, "learning_rate": 7.953090992431765e-06, "loss": 0.3816, "step": 20456 }, { "epoch": 0.9387820659905466, "grad_norm": 0.4285966157913208, "learning_rate": 7.952893134219881e-06, "loss": 0.3851, "step": 20457 }, { "epoch": 0.938827956495801, "grad_norm": 0.4379470646381378, "learning_rate": 7.952695268907222e-06, "loss": 0.3848, "step": 20458 }, { "epoch": 0.9388738470010555, "grad_norm": 0.45765313506126404, "learning_rate": 7.952497396494263e-06, "loss": 0.3935, "step": 20459 }, { "epoch": 0.93891973750631, "grad_norm": 0.40586400032043457, "learning_rate": 7.95229951698148e-06, "loss": 0.3466, "step": 20460 }, { "epoch": 0.9389656280115644, "grad_norm": 0.44342708587646484, "learning_rate": 7.952101630369347e-06, "loss": 0.381, "step": 20461 }, { "epoch": 0.9390115185168189, "grad_norm": 0.4633656442165375, "learning_rate": 7.951903736658341e-06, "loss": 0.4039, "step": 20462 }, { "epoch": 0.9390574090220734, "grad_norm": 0.4547712504863739, "learning_rate": 7.95170583584894e-06, "loss": 0.371, "step": 20463 }, { "epoch": 0.9391032995273279, "grad_norm": 0.44663509726524353, "learning_rate": 7.951507927941618e-06, "loss": 0.3727, "step": 20464 }, { "epoch": 0.9391491900325822, "grad_norm": 0.44239839911460876, "learning_rate": 7.95131001293685e-06, "loss": 0.3964, "step": 20465 }, { "epoch": 0.9391950805378367, "grad_norm": 0.46243494749069214, "learning_rate": 7.951112090835114e-06, "loss": 0.4166, "step": 20466 }, { "epoch": 0.9392409710430912, "grad_norm": 0.4498955309391022, "learning_rate": 7.950914161636885e-06, "loss": 0.3986, "step": 20467 }, { "epoch": 0.9392868615483456, "grad_norm": 0.4769071638584137, "learning_rate": 7.950716225342637e-06, "loss": 0.4745, "step": 20468 }, { "epoch": 0.9393327520536001, "grad_norm": 0.5031524300575256, "learning_rate": 7.950518281952849e-06, "loss": 0.3978, "step": 20469 }, { "epoch": 0.9393786425588546, "grad_norm": 0.4308260977268219, "learning_rate": 7.950320331467996e-06, "loss": 0.3452, "step": 20470 }, { "epoch": 0.939424533064109, "grad_norm": 0.4867488145828247, "learning_rate": 7.950122373888553e-06, "loss": 0.4589, "step": 20471 }, { "epoch": 0.9394704235693635, "grad_norm": 0.45031046867370605, "learning_rate": 7.949924409214997e-06, "loss": 0.3789, "step": 20472 }, { "epoch": 0.939516314074618, "grad_norm": 0.4227551221847534, "learning_rate": 7.949726437447803e-06, "loss": 0.3841, "step": 20473 }, { "epoch": 0.9395622045798724, "grad_norm": 0.4574127793312073, "learning_rate": 7.949528458587448e-06, "loss": 0.4045, "step": 20474 }, { "epoch": 0.9396080950851269, "grad_norm": 0.4406052529811859, "learning_rate": 7.949330472634409e-06, "loss": 0.4271, "step": 20475 }, { "epoch": 0.9396539855903814, "grad_norm": 0.44044560194015503, "learning_rate": 7.949132479589161e-06, "loss": 0.3389, "step": 20476 }, { "epoch": 0.9396998760956358, "grad_norm": 0.4874519109725952, "learning_rate": 7.948934479452179e-06, "loss": 0.5168, "step": 20477 }, { "epoch": 0.9397457666008903, "grad_norm": 0.46071672439575195, "learning_rate": 7.948736472223942e-06, "loss": 0.332, "step": 20478 }, { "epoch": 0.9397916571061448, "grad_norm": 0.45353972911834717, "learning_rate": 7.948538457904923e-06, "loss": 0.371, "step": 20479 }, { "epoch": 0.9398375476113991, "grad_norm": 0.4417482912540436, "learning_rate": 7.9483404364956e-06, "loss": 0.3356, "step": 20480 }, { "epoch": 0.9398834381166536, "grad_norm": 0.4866461157798767, "learning_rate": 7.948142407996449e-06, "loss": 0.4622, "step": 20481 }, { "epoch": 0.9399293286219081, "grad_norm": 0.4350813925266266, "learning_rate": 7.947944372407945e-06, "loss": 0.3773, "step": 20482 }, { "epoch": 0.9399752191271626, "grad_norm": 0.4414445757865906, "learning_rate": 7.947746329730568e-06, "loss": 0.4034, "step": 20483 }, { "epoch": 0.940021109632417, "grad_norm": 0.44155555963516235, "learning_rate": 7.947548279964787e-06, "loss": 0.3983, "step": 20484 }, { "epoch": 0.9400670001376715, "grad_norm": 0.5072855353355408, "learning_rate": 7.947350223111085e-06, "loss": 0.5023, "step": 20485 }, { "epoch": 0.940112890642926, "grad_norm": 0.4436251223087311, "learning_rate": 7.947152159169938e-06, "loss": 0.3721, "step": 20486 }, { "epoch": 0.9401587811481804, "grad_norm": 0.43811696767807007, "learning_rate": 7.946954088141818e-06, "loss": 0.408, "step": 20487 }, { "epoch": 0.9402046716534349, "grad_norm": 0.4547506272792816, "learning_rate": 7.946756010027203e-06, "loss": 0.3765, "step": 20488 }, { "epoch": 0.9402505621586894, "grad_norm": 0.4463140666484833, "learning_rate": 7.946557924826571e-06, "loss": 0.3928, "step": 20489 }, { "epoch": 0.9402964526639438, "grad_norm": 0.5091535449028015, "learning_rate": 7.946359832540395e-06, "loss": 0.5123, "step": 20490 }, { "epoch": 0.9403423431691983, "grad_norm": 0.41807276010513306, "learning_rate": 7.946161733169156e-06, "loss": 0.3297, "step": 20491 }, { "epoch": 0.9403882336744528, "grad_norm": 0.4202379286289215, "learning_rate": 7.945963626713329e-06, "loss": 0.3396, "step": 20492 }, { "epoch": 0.9404341241797072, "grad_norm": 0.4405921399593353, "learning_rate": 7.945765513173385e-06, "loss": 0.408, "step": 20493 }, { "epoch": 0.9404800146849617, "grad_norm": 0.4647994041442871, "learning_rate": 7.945567392549807e-06, "loss": 0.4377, "step": 20494 }, { "epoch": 0.9405259051902162, "grad_norm": 0.4153446555137634, "learning_rate": 7.945369264843071e-06, "loss": 0.3302, "step": 20495 }, { "epoch": 0.9405717956954706, "grad_norm": 0.4453582465648651, "learning_rate": 7.945171130053648e-06, "loss": 0.3683, "step": 20496 }, { "epoch": 0.9406176862007251, "grad_norm": 0.44170495867729187, "learning_rate": 7.94497298818202e-06, "loss": 0.3588, "step": 20497 }, { "epoch": 0.9406635767059796, "grad_norm": 0.4771113991737366, "learning_rate": 7.944774839228661e-06, "loss": 0.4313, "step": 20498 }, { "epoch": 0.9407094672112339, "grad_norm": 0.432087242603302, "learning_rate": 7.944576683194047e-06, "loss": 0.3837, "step": 20499 }, { "epoch": 0.9407553577164884, "grad_norm": 0.44468769431114197, "learning_rate": 7.944378520078658e-06, "loss": 0.3901, "step": 20500 }, { "epoch": 0.9408012482217429, "grad_norm": 0.43146848678588867, "learning_rate": 7.944180349882966e-06, "loss": 0.4161, "step": 20501 }, { "epoch": 0.9408471387269974, "grad_norm": 0.46250441670417786, "learning_rate": 7.943982172607449e-06, "loss": 0.4459, "step": 20502 }, { "epoch": 0.9408930292322518, "grad_norm": 0.46426820755004883, "learning_rate": 7.943783988252584e-06, "loss": 0.3854, "step": 20503 }, { "epoch": 0.9409389197375063, "grad_norm": 0.45717164874076843, "learning_rate": 7.943585796818849e-06, "loss": 0.3806, "step": 20504 }, { "epoch": 0.9409848102427608, "grad_norm": 0.4335591197013855, "learning_rate": 7.943387598306718e-06, "loss": 0.3504, "step": 20505 }, { "epoch": 0.9410307007480152, "grad_norm": 0.4693076014518738, "learning_rate": 7.943189392716669e-06, "loss": 0.4584, "step": 20506 }, { "epoch": 0.9410765912532697, "grad_norm": 0.4661948084831238, "learning_rate": 7.94299118004918e-06, "loss": 0.4282, "step": 20507 }, { "epoch": 0.9411224817585242, "grad_norm": 0.45022183656692505, "learning_rate": 7.942792960304723e-06, "loss": 0.4266, "step": 20508 }, { "epoch": 0.9411683722637786, "grad_norm": 0.47297295928001404, "learning_rate": 7.942594733483779e-06, "loss": 0.3875, "step": 20509 }, { "epoch": 0.9412142627690331, "grad_norm": 0.446294367313385, "learning_rate": 7.942396499586825e-06, "loss": 0.3784, "step": 20510 }, { "epoch": 0.9412601532742876, "grad_norm": 0.41808173060417175, "learning_rate": 7.942198258614334e-06, "loss": 0.3508, "step": 20511 }, { "epoch": 0.941306043779542, "grad_norm": 0.4683513939380646, "learning_rate": 7.942000010566786e-06, "loss": 0.4148, "step": 20512 }, { "epoch": 0.9413519342847965, "grad_norm": 0.44048818945884705, "learning_rate": 7.941801755444655e-06, "loss": 0.3725, "step": 20513 }, { "epoch": 0.941397824790051, "grad_norm": 0.43999844789505005, "learning_rate": 7.941603493248422e-06, "loss": 0.3567, "step": 20514 }, { "epoch": 0.9414437152953054, "grad_norm": 0.4405522048473358, "learning_rate": 7.941405223978558e-06, "loss": 0.3886, "step": 20515 }, { "epoch": 0.9414896058005598, "grad_norm": 0.45946046710014343, "learning_rate": 7.941206947635544e-06, "loss": 0.4154, "step": 20516 }, { "epoch": 0.9415354963058143, "grad_norm": 0.4293038249015808, "learning_rate": 7.941008664219857e-06, "loss": 0.3398, "step": 20517 }, { "epoch": 0.9415813868110688, "grad_norm": 0.4158743619918823, "learning_rate": 7.94081037373197e-06, "loss": 0.3254, "step": 20518 }, { "epoch": 0.9416272773163232, "grad_norm": 0.48147132992744446, "learning_rate": 7.940612076172363e-06, "loss": 0.4805, "step": 20519 }, { "epoch": 0.9416731678215777, "grad_norm": 0.4803291857242584, "learning_rate": 7.940413771541514e-06, "loss": 0.3906, "step": 20520 }, { "epoch": 0.9417190583268322, "grad_norm": 0.48220449686050415, "learning_rate": 7.940215459839894e-06, "loss": 0.4324, "step": 20521 }, { "epoch": 0.9417649488320866, "grad_norm": 0.45679429173469543, "learning_rate": 7.940017141067986e-06, "loss": 0.4041, "step": 20522 }, { "epoch": 0.9418108393373411, "grad_norm": 0.4601984918117523, "learning_rate": 7.939818815226266e-06, "loss": 0.4016, "step": 20523 }, { "epoch": 0.9418567298425956, "grad_norm": 0.47521165013313293, "learning_rate": 7.939620482315206e-06, "loss": 0.4541, "step": 20524 }, { "epoch": 0.94190262034785, "grad_norm": 0.49009448289871216, "learning_rate": 7.939422142335288e-06, "loss": 0.4407, "step": 20525 }, { "epoch": 0.9419485108531045, "grad_norm": 0.46726834774017334, "learning_rate": 7.93922379528699e-06, "loss": 0.4268, "step": 20526 }, { "epoch": 0.941994401358359, "grad_norm": 0.4550146460533142, "learning_rate": 7.939025441170785e-06, "loss": 0.4198, "step": 20527 }, { "epoch": 0.9420402918636134, "grad_norm": 0.45934078097343445, "learning_rate": 7.93882707998715e-06, "loss": 0.4295, "step": 20528 }, { "epoch": 0.9420861823688679, "grad_norm": 0.45375436544418335, "learning_rate": 7.938628711736565e-06, "loss": 0.4052, "step": 20529 }, { "epoch": 0.9421320728741224, "grad_norm": 0.47653117775917053, "learning_rate": 7.938430336419505e-06, "loss": 0.3923, "step": 20530 }, { "epoch": 0.9421779633793768, "grad_norm": 0.4723985195159912, "learning_rate": 7.938231954036447e-06, "loss": 0.4428, "step": 20531 }, { "epoch": 0.9422238538846313, "grad_norm": 0.4493003785610199, "learning_rate": 7.938033564587867e-06, "loss": 0.4453, "step": 20532 }, { "epoch": 0.9422697443898858, "grad_norm": 0.4434211850166321, "learning_rate": 7.937835168074247e-06, "loss": 0.3989, "step": 20533 }, { "epoch": 0.9423156348951401, "grad_norm": 0.47145169973373413, "learning_rate": 7.93763676449606e-06, "loss": 0.4008, "step": 20534 }, { "epoch": 0.9423615254003946, "grad_norm": 0.44714632630348206, "learning_rate": 7.93743835385378e-06, "loss": 0.3402, "step": 20535 }, { "epoch": 0.9424074159056491, "grad_norm": 0.4586819112300873, "learning_rate": 7.937239936147892e-06, "loss": 0.3638, "step": 20536 }, { "epoch": 0.9424533064109036, "grad_norm": 0.44157424569129944, "learning_rate": 7.937041511378867e-06, "loss": 0.3735, "step": 20537 }, { "epoch": 0.942499196916158, "grad_norm": 0.45087864995002747, "learning_rate": 7.936843079547186e-06, "loss": 0.3601, "step": 20538 }, { "epoch": 0.9425450874214125, "grad_norm": 0.4548627436161041, "learning_rate": 7.936644640653322e-06, "loss": 0.4057, "step": 20539 }, { "epoch": 0.942590977926667, "grad_norm": 0.4633611738681793, "learning_rate": 7.936446194697755e-06, "loss": 0.3818, "step": 20540 }, { "epoch": 0.9426368684319214, "grad_norm": 0.48234230279922485, "learning_rate": 7.936247741680962e-06, "loss": 0.4655, "step": 20541 }, { "epoch": 0.9426827589371759, "grad_norm": 0.44105643033981323, "learning_rate": 7.936049281603419e-06, "loss": 0.374, "step": 20542 }, { "epoch": 0.9427286494424304, "grad_norm": 0.44646039605140686, "learning_rate": 7.935850814465606e-06, "loss": 0.3674, "step": 20543 }, { "epoch": 0.9427745399476848, "grad_norm": 0.4160648584365845, "learning_rate": 7.935652340267997e-06, "loss": 0.3087, "step": 20544 }, { "epoch": 0.9428204304529393, "grad_norm": 0.4639683663845062, "learning_rate": 7.93545385901107e-06, "loss": 0.4297, "step": 20545 }, { "epoch": 0.9428663209581938, "grad_norm": 0.4474770426750183, "learning_rate": 7.935255370695305e-06, "loss": 0.363, "step": 20546 }, { "epoch": 0.9429122114634482, "grad_norm": 0.43001678586006165, "learning_rate": 7.935056875321177e-06, "loss": 0.329, "step": 20547 }, { "epoch": 0.9429581019687027, "grad_norm": 0.444112628698349, "learning_rate": 7.934858372889163e-06, "loss": 0.3825, "step": 20548 }, { "epoch": 0.9430039924739572, "grad_norm": 0.4795422852039337, "learning_rate": 7.934659863399738e-06, "loss": 0.4728, "step": 20549 }, { "epoch": 0.9430498829792116, "grad_norm": 0.4481862783432007, "learning_rate": 7.934461346853387e-06, "loss": 0.4282, "step": 20550 }, { "epoch": 0.943095773484466, "grad_norm": 0.43370258808135986, "learning_rate": 7.934262823250579e-06, "loss": 0.3918, "step": 20551 }, { "epoch": 0.9431416639897205, "grad_norm": 0.45356547832489014, "learning_rate": 7.934064292591796e-06, "loss": 0.3973, "step": 20552 }, { "epoch": 0.943187554494975, "grad_norm": 0.4091116487979889, "learning_rate": 7.933865754877514e-06, "loss": 0.3087, "step": 20553 }, { "epoch": 0.9432334450002294, "grad_norm": 0.44692227244377136, "learning_rate": 7.933667210108213e-06, "loss": 0.3874, "step": 20554 }, { "epoch": 0.9432793355054839, "grad_norm": 0.47830453515052795, "learning_rate": 7.933468658284368e-06, "loss": 0.4373, "step": 20555 }, { "epoch": 0.9433252260107384, "grad_norm": 0.437059611082077, "learning_rate": 7.933270099406454e-06, "loss": 0.3545, "step": 20556 }, { "epoch": 0.9433711165159928, "grad_norm": 0.460025429725647, "learning_rate": 7.933071533474953e-06, "loss": 0.4303, "step": 20557 }, { "epoch": 0.9434170070212473, "grad_norm": 0.46681928634643555, "learning_rate": 7.93287296049034e-06, "loss": 0.3844, "step": 20558 }, { "epoch": 0.9434628975265018, "grad_norm": 0.45596134662628174, "learning_rate": 7.932674380453093e-06, "loss": 0.3945, "step": 20559 }, { "epoch": 0.9435087880317562, "grad_norm": 0.4022560119628906, "learning_rate": 7.93247579336369e-06, "loss": 0.2916, "step": 20560 }, { "epoch": 0.9435546785370107, "grad_norm": 0.4666641056537628, "learning_rate": 7.932277199222609e-06, "loss": 0.4167, "step": 20561 }, { "epoch": 0.9436005690422652, "grad_norm": 0.4485519528388977, "learning_rate": 7.932078598030328e-06, "loss": 0.3714, "step": 20562 }, { "epoch": 0.9436464595475196, "grad_norm": 0.4475579559803009, "learning_rate": 7.931879989787322e-06, "loss": 0.3691, "step": 20563 }, { "epoch": 0.9436923500527741, "grad_norm": 0.4419931471347809, "learning_rate": 7.931681374494068e-06, "loss": 0.371, "step": 20564 }, { "epoch": 0.9437382405580286, "grad_norm": 0.42998212575912476, "learning_rate": 7.931482752151047e-06, "loss": 0.3598, "step": 20565 }, { "epoch": 0.943784131063283, "grad_norm": 0.4144437313079834, "learning_rate": 7.931284122758736e-06, "loss": 0.3354, "step": 20566 }, { "epoch": 0.9438300215685375, "grad_norm": 0.4541586637496948, "learning_rate": 7.93108548631761e-06, "loss": 0.3829, "step": 20567 }, { "epoch": 0.943875912073792, "grad_norm": 0.41379663348197937, "learning_rate": 7.930886842828151e-06, "loss": 0.2998, "step": 20568 }, { "epoch": 0.9439218025790463, "grad_norm": 0.4231383800506592, "learning_rate": 7.930688192290832e-06, "loss": 0.3246, "step": 20569 }, { "epoch": 0.9439676930843008, "grad_norm": 0.5150914192199707, "learning_rate": 7.930489534706134e-06, "loss": 0.365, "step": 20570 }, { "epoch": 0.9440135835895553, "grad_norm": 0.4140473008155823, "learning_rate": 7.930290870074532e-06, "loss": 0.3285, "step": 20571 }, { "epoch": 0.9440594740948098, "grad_norm": 0.4788515865802765, "learning_rate": 7.930092198396508e-06, "loss": 0.4577, "step": 20572 }, { "epoch": 0.9441053646000642, "grad_norm": 0.4513852596282959, "learning_rate": 7.929893519672534e-06, "loss": 0.4023, "step": 20573 }, { "epoch": 0.9441512551053187, "grad_norm": 0.46589335799217224, "learning_rate": 7.929694833903093e-06, "loss": 0.3527, "step": 20574 }, { "epoch": 0.9441971456105732, "grad_norm": 0.44956740736961365, "learning_rate": 7.92949614108866e-06, "loss": 0.3999, "step": 20575 }, { "epoch": 0.9442430361158276, "grad_norm": 0.4463992118835449, "learning_rate": 7.929297441229714e-06, "loss": 0.3719, "step": 20576 }, { "epoch": 0.9442889266210821, "grad_norm": 0.46329715847969055, "learning_rate": 7.929098734326731e-06, "loss": 0.3625, "step": 20577 }, { "epoch": 0.9443348171263366, "grad_norm": 0.4653306007385254, "learning_rate": 7.92890002038019e-06, "loss": 0.416, "step": 20578 }, { "epoch": 0.944380707631591, "grad_norm": 0.43152478337287903, "learning_rate": 7.928701299390568e-06, "loss": 0.3312, "step": 20579 }, { "epoch": 0.9444265981368455, "grad_norm": 0.44317883253097534, "learning_rate": 7.928502571358347e-06, "loss": 0.3534, "step": 20580 }, { "epoch": 0.9444724886421, "grad_norm": 0.4298538565635681, "learning_rate": 7.928303836283999e-06, "loss": 0.3579, "step": 20581 }, { "epoch": 0.9445183791473544, "grad_norm": 0.4726695418357849, "learning_rate": 7.928105094168005e-06, "loss": 0.4102, "step": 20582 }, { "epoch": 0.9445642696526089, "grad_norm": 0.45329025387763977, "learning_rate": 7.927906345010843e-06, "loss": 0.3734, "step": 20583 }, { "epoch": 0.9446101601578634, "grad_norm": 0.4720298945903778, "learning_rate": 7.927707588812988e-06, "loss": 0.4127, "step": 20584 }, { "epoch": 0.9446560506631178, "grad_norm": 0.4826267957687378, "learning_rate": 7.927508825574923e-06, "loss": 0.4049, "step": 20585 }, { "epoch": 0.9447019411683723, "grad_norm": 0.47594770789146423, "learning_rate": 7.927310055297121e-06, "loss": 0.4457, "step": 20586 }, { "epoch": 0.9447478316736267, "grad_norm": 0.4583769142627716, "learning_rate": 7.927111277980065e-06, "loss": 0.4402, "step": 20587 }, { "epoch": 0.9447937221788811, "grad_norm": 0.5291416645050049, "learning_rate": 7.926912493624228e-06, "loss": 0.4824, "step": 20588 }, { "epoch": 0.9448396126841356, "grad_norm": 0.43763118982315063, "learning_rate": 7.92671370223009e-06, "loss": 0.3775, "step": 20589 }, { "epoch": 0.9448855031893901, "grad_norm": 0.4599716365337372, "learning_rate": 7.92651490379813e-06, "loss": 0.3965, "step": 20590 }, { "epoch": 0.9449313936946446, "grad_norm": 0.4727698564529419, "learning_rate": 7.926316098328825e-06, "loss": 0.3925, "step": 20591 }, { "epoch": 0.944977284199899, "grad_norm": 0.43567967414855957, "learning_rate": 7.926117285822654e-06, "loss": 0.3482, "step": 20592 }, { "epoch": 0.9450231747051535, "grad_norm": 0.4175252318382263, "learning_rate": 7.925918466280093e-06, "loss": 0.3107, "step": 20593 }, { "epoch": 0.945069065210408, "grad_norm": 0.45489469170570374, "learning_rate": 7.925719639701624e-06, "loss": 0.4015, "step": 20594 }, { "epoch": 0.9451149557156624, "grad_norm": 0.45946747064590454, "learning_rate": 7.92552080608772e-06, "loss": 0.4001, "step": 20595 }, { "epoch": 0.9451608462209169, "grad_norm": 0.4459153711795807, "learning_rate": 7.925321965438864e-06, "loss": 0.3661, "step": 20596 }, { "epoch": 0.9452067367261714, "grad_norm": 0.417134553194046, "learning_rate": 7.925123117755531e-06, "loss": 0.3228, "step": 20597 }, { "epoch": 0.9452526272314258, "grad_norm": 0.478728324174881, "learning_rate": 7.924924263038201e-06, "loss": 0.4073, "step": 20598 }, { "epoch": 0.9452985177366803, "grad_norm": 0.4658650755882263, "learning_rate": 7.92472540128735e-06, "loss": 0.43, "step": 20599 }, { "epoch": 0.9453444082419348, "grad_norm": 0.4409851133823395, "learning_rate": 7.924526532503459e-06, "loss": 0.3557, "step": 20600 }, { "epoch": 0.9453902987471892, "grad_norm": 0.4726598858833313, "learning_rate": 7.924327656687002e-06, "loss": 0.4445, "step": 20601 }, { "epoch": 0.9454361892524437, "grad_norm": 0.4191921353340149, "learning_rate": 7.924128773838463e-06, "loss": 0.3286, "step": 20602 }, { "epoch": 0.9454820797576982, "grad_norm": 0.5137768983840942, "learning_rate": 7.923929883958315e-06, "loss": 0.5433, "step": 20603 }, { "epoch": 0.9455279702629525, "grad_norm": 0.43485307693481445, "learning_rate": 7.923730987047038e-06, "loss": 0.3726, "step": 20604 }, { "epoch": 0.945573860768207, "grad_norm": 0.46116751432418823, "learning_rate": 7.923532083105112e-06, "loss": 0.413, "step": 20605 }, { "epoch": 0.9456197512734615, "grad_norm": 0.4280173182487488, "learning_rate": 7.923333172133013e-06, "loss": 0.3357, "step": 20606 }, { "epoch": 0.945665641778716, "grad_norm": 0.46372225880622864, "learning_rate": 7.923134254131221e-06, "loss": 0.3641, "step": 20607 }, { "epoch": 0.9457115322839704, "grad_norm": 0.4610538184642792, "learning_rate": 7.922935329100216e-06, "loss": 0.42, "step": 20608 }, { "epoch": 0.9457574227892249, "grad_norm": 0.45174291729927063, "learning_rate": 7.922736397040471e-06, "loss": 0.4167, "step": 20609 }, { "epoch": 0.9458033132944794, "grad_norm": 0.4547256827354431, "learning_rate": 7.922537457952468e-06, "loss": 0.361, "step": 20610 }, { "epoch": 0.9458492037997338, "grad_norm": 0.4482285678386688, "learning_rate": 7.922338511836686e-06, "loss": 0.4119, "step": 20611 }, { "epoch": 0.9458950943049883, "grad_norm": 0.4133509397506714, "learning_rate": 7.922139558693601e-06, "loss": 0.3624, "step": 20612 }, { "epoch": 0.9459409848102428, "grad_norm": 0.4788237512111664, "learning_rate": 7.921940598523695e-06, "loss": 0.475, "step": 20613 }, { "epoch": 0.9459868753154972, "grad_norm": 0.5016488432884216, "learning_rate": 7.92174163132744e-06, "loss": 0.4567, "step": 20614 }, { "epoch": 0.9460327658207517, "grad_norm": 0.41740864515304565, "learning_rate": 7.921542657105321e-06, "loss": 0.3528, "step": 20615 }, { "epoch": 0.9460786563260062, "grad_norm": 0.45040395855903625, "learning_rate": 7.921343675857813e-06, "loss": 0.4155, "step": 20616 }, { "epoch": 0.9461245468312606, "grad_norm": 0.4375624358654022, "learning_rate": 7.921144687585397e-06, "loss": 0.3534, "step": 20617 }, { "epoch": 0.9461704373365151, "grad_norm": 0.41773343086242676, "learning_rate": 7.92094569228855e-06, "loss": 0.3349, "step": 20618 }, { "epoch": 0.9462163278417696, "grad_norm": 0.45106643438339233, "learning_rate": 7.920746689967748e-06, "loss": 0.3813, "step": 20619 }, { "epoch": 0.946262218347024, "grad_norm": 0.4562336206436157, "learning_rate": 7.920547680623472e-06, "loss": 0.402, "step": 20620 }, { "epoch": 0.9463081088522785, "grad_norm": 0.45603933930397034, "learning_rate": 7.920348664256203e-06, "loss": 0.4117, "step": 20621 }, { "epoch": 0.946353999357533, "grad_norm": 0.42456790804862976, "learning_rate": 7.920149640866415e-06, "loss": 0.3332, "step": 20622 }, { "epoch": 0.9463998898627873, "grad_norm": 0.4077037274837494, "learning_rate": 7.91995061045459e-06, "loss": 0.3086, "step": 20623 }, { "epoch": 0.9464457803680418, "grad_norm": 0.45619267225265503, "learning_rate": 7.919751573021207e-06, "loss": 0.4484, "step": 20624 }, { "epoch": 0.9464916708732963, "grad_norm": 0.4805120527744293, "learning_rate": 7.919552528566741e-06, "loss": 0.484, "step": 20625 }, { "epoch": 0.9465375613785508, "grad_norm": 0.4972884953022003, "learning_rate": 7.91935347709167e-06, "loss": 0.4779, "step": 20626 }, { "epoch": 0.9465834518838052, "grad_norm": 0.46159547567367554, "learning_rate": 7.919154418596479e-06, "loss": 0.4523, "step": 20627 }, { "epoch": 0.9466293423890597, "grad_norm": 0.4503284990787506, "learning_rate": 7.918955353081644e-06, "loss": 0.391, "step": 20628 }, { "epoch": 0.9466752328943142, "grad_norm": 0.45481449365615845, "learning_rate": 7.91875628054764e-06, "loss": 0.3924, "step": 20629 }, { "epoch": 0.9467211233995686, "grad_norm": 0.40619122982025146, "learning_rate": 7.91855720099495e-06, "loss": 0.2942, "step": 20630 }, { "epoch": 0.9467670139048231, "grad_norm": 0.4413028955459595, "learning_rate": 7.918358114424048e-06, "loss": 0.3321, "step": 20631 }, { "epoch": 0.9468129044100776, "grad_norm": 0.45986267924308777, "learning_rate": 7.918159020835419e-06, "loss": 0.408, "step": 20632 }, { "epoch": 0.946858794915332, "grad_norm": 0.6788783073425293, "learning_rate": 7.917959920229537e-06, "loss": 0.4702, "step": 20633 }, { "epoch": 0.9469046854205865, "grad_norm": 0.44639572501182556, "learning_rate": 7.917760812606883e-06, "loss": 0.3881, "step": 20634 }, { "epoch": 0.946950575925841, "grad_norm": 0.43014928698539734, "learning_rate": 7.917561697967936e-06, "loss": 0.3422, "step": 20635 }, { "epoch": 0.9469964664310954, "grad_norm": 0.43558913469314575, "learning_rate": 7.917362576313171e-06, "loss": 0.3342, "step": 20636 }, { "epoch": 0.9470423569363499, "grad_norm": 0.49919962882995605, "learning_rate": 7.917163447643072e-06, "loss": 0.4866, "step": 20637 }, { "epoch": 0.9470882474416044, "grad_norm": 0.4474511444568634, "learning_rate": 7.916964311958115e-06, "loss": 0.4468, "step": 20638 }, { "epoch": 0.9471341379468587, "grad_norm": 0.442472904920578, "learning_rate": 7.91676516925878e-06, "loss": 0.3878, "step": 20639 }, { "epoch": 0.9471800284521132, "grad_norm": 0.4569697678089142, "learning_rate": 7.916566019545545e-06, "loss": 0.4273, "step": 20640 }, { "epoch": 0.9472259189573677, "grad_norm": 0.42616939544677734, "learning_rate": 7.916366862818886e-06, "loss": 0.3539, "step": 20641 }, { "epoch": 0.9472718094626222, "grad_norm": 0.49635106325149536, "learning_rate": 7.91616769907929e-06, "loss": 0.4673, "step": 20642 }, { "epoch": 0.9473176999678766, "grad_norm": 0.48511362075805664, "learning_rate": 7.915968528327229e-06, "loss": 0.4588, "step": 20643 }, { "epoch": 0.9473635904731311, "grad_norm": 0.4555867612361908, "learning_rate": 7.915769350563184e-06, "loss": 0.3828, "step": 20644 }, { "epoch": 0.9474094809783856, "grad_norm": 0.48230454325675964, "learning_rate": 7.915570165787637e-06, "loss": 0.3997, "step": 20645 }, { "epoch": 0.94745537148364, "grad_norm": 0.4838423430919647, "learning_rate": 7.91537097400106e-06, "loss": 0.4573, "step": 20646 }, { "epoch": 0.9475012619888945, "grad_norm": 0.4689597189426422, "learning_rate": 7.915171775203936e-06, "loss": 0.3862, "step": 20647 }, { "epoch": 0.947547152494149, "grad_norm": 0.47464242577552795, "learning_rate": 7.914972569396746e-06, "loss": 0.4179, "step": 20648 }, { "epoch": 0.9475930429994034, "grad_norm": 0.430608332157135, "learning_rate": 7.914773356579966e-06, "loss": 0.3228, "step": 20649 }, { "epoch": 0.9476389335046579, "grad_norm": 0.4151085317134857, "learning_rate": 7.914574136754075e-06, "loss": 0.313, "step": 20650 }, { "epoch": 0.9476848240099124, "grad_norm": 0.4168482720851898, "learning_rate": 7.914374909919554e-06, "loss": 0.3585, "step": 20651 }, { "epoch": 0.9477307145151668, "grad_norm": 0.5066651105880737, "learning_rate": 7.91417567607688e-06, "loss": 0.4477, "step": 20652 }, { "epoch": 0.9477766050204213, "grad_norm": 0.4801095128059387, "learning_rate": 7.913976435226537e-06, "loss": 0.4523, "step": 20653 }, { "epoch": 0.9478224955256758, "grad_norm": 0.42309999465942383, "learning_rate": 7.913777187368996e-06, "loss": 0.3095, "step": 20654 }, { "epoch": 0.9478683860309302, "grad_norm": 0.490983247756958, "learning_rate": 7.913577932504742e-06, "loss": 0.4194, "step": 20655 }, { "epoch": 0.9479142765361847, "grad_norm": 0.4292239546775818, "learning_rate": 7.913378670634254e-06, "loss": 0.3263, "step": 20656 }, { "epoch": 0.9479601670414392, "grad_norm": 0.49975183606147766, "learning_rate": 7.913179401758008e-06, "loss": 0.469, "step": 20657 }, { "epoch": 0.9480060575466935, "grad_norm": 0.4253979027271271, "learning_rate": 7.912980125876486e-06, "loss": 0.3346, "step": 20658 }, { "epoch": 0.948051948051948, "grad_norm": 0.4483835697174072, "learning_rate": 7.912780842990166e-06, "loss": 0.3816, "step": 20659 }, { "epoch": 0.9480978385572025, "grad_norm": 0.435971736907959, "learning_rate": 7.912581553099527e-06, "loss": 0.4119, "step": 20660 }, { "epoch": 0.948143729062457, "grad_norm": 0.4344462454319, "learning_rate": 7.912382256205049e-06, "loss": 0.3594, "step": 20661 }, { "epoch": 0.9481896195677114, "grad_norm": 0.4509414732456207, "learning_rate": 7.91218295230721e-06, "loss": 0.3616, "step": 20662 }, { "epoch": 0.9482355100729659, "grad_norm": 0.4775043725967407, "learning_rate": 7.91198364140649e-06, "loss": 0.4025, "step": 20663 }, { "epoch": 0.9482814005782204, "grad_norm": 0.483792781829834, "learning_rate": 7.91178432350337e-06, "loss": 0.4983, "step": 20664 }, { "epoch": 0.9483272910834748, "grad_norm": 0.5223774313926697, "learning_rate": 7.911584998598328e-06, "loss": 0.5122, "step": 20665 }, { "epoch": 0.9483731815887293, "grad_norm": 0.4326172173023224, "learning_rate": 7.91138566669184e-06, "loss": 0.3483, "step": 20666 }, { "epoch": 0.9484190720939838, "grad_norm": 0.4393482804298401, "learning_rate": 7.911186327784391e-06, "loss": 0.3757, "step": 20667 }, { "epoch": 0.9484649625992382, "grad_norm": 0.45733290910720825, "learning_rate": 7.910986981876457e-06, "loss": 0.3374, "step": 20668 }, { "epoch": 0.9485108531044927, "grad_norm": 0.4015486240386963, "learning_rate": 7.910787628968516e-06, "loss": 0.296, "step": 20669 }, { "epoch": 0.9485567436097472, "grad_norm": 0.4993155300617218, "learning_rate": 7.910588269061053e-06, "loss": 0.4599, "step": 20670 }, { "epoch": 0.9486026341150016, "grad_norm": 0.4317129850387573, "learning_rate": 7.910388902154541e-06, "loss": 0.3018, "step": 20671 }, { "epoch": 0.9486485246202561, "grad_norm": 0.48533621430397034, "learning_rate": 7.910189528249464e-06, "loss": 0.4709, "step": 20672 }, { "epoch": 0.9486944151255106, "grad_norm": 0.48693662881851196, "learning_rate": 7.909990147346299e-06, "loss": 0.457, "step": 20673 }, { "epoch": 0.948740305630765, "grad_norm": 0.4365951716899872, "learning_rate": 7.909790759445525e-06, "loss": 0.4013, "step": 20674 }, { "epoch": 0.9487861961360194, "grad_norm": 0.43023934960365295, "learning_rate": 7.909591364547624e-06, "loss": 0.3515, "step": 20675 }, { "epoch": 0.9488320866412739, "grad_norm": 0.497045636177063, "learning_rate": 7.909391962653075e-06, "loss": 0.4461, "step": 20676 }, { "epoch": 0.9488779771465283, "grad_norm": 0.5104749798774719, "learning_rate": 7.909192553762355e-06, "loss": 0.4884, "step": 20677 }, { "epoch": 0.9489238676517828, "grad_norm": 0.43133291602134705, "learning_rate": 7.908993137875946e-06, "loss": 0.3429, "step": 20678 }, { "epoch": 0.9489697581570373, "grad_norm": 0.46663662791252136, "learning_rate": 7.908793714994325e-06, "loss": 0.3907, "step": 20679 }, { "epoch": 0.9490156486622918, "grad_norm": 0.43567994236946106, "learning_rate": 7.908594285117974e-06, "loss": 0.3703, "step": 20680 }, { "epoch": 0.9490615391675462, "grad_norm": 0.43265992403030396, "learning_rate": 7.908394848247373e-06, "loss": 0.3345, "step": 20681 }, { "epoch": 0.9491074296728007, "grad_norm": 0.41491079330444336, "learning_rate": 7.908195404382998e-06, "loss": 0.3619, "step": 20682 }, { "epoch": 0.9491533201780552, "grad_norm": 0.47136786580085754, "learning_rate": 7.907995953525332e-06, "loss": 0.4477, "step": 20683 }, { "epoch": 0.9491992106833096, "grad_norm": 0.48946306109428406, "learning_rate": 7.907796495674856e-06, "loss": 0.4438, "step": 20684 }, { "epoch": 0.9492451011885641, "grad_norm": 0.5118829011917114, "learning_rate": 7.907597030832042e-06, "loss": 0.4825, "step": 20685 }, { "epoch": 0.9492909916938186, "grad_norm": 0.48146259784698486, "learning_rate": 7.907397558997378e-06, "loss": 0.4707, "step": 20686 }, { "epoch": 0.949336882199073, "grad_norm": 0.4402076005935669, "learning_rate": 7.90719808017134e-06, "loss": 0.3964, "step": 20687 }, { "epoch": 0.9493827727043275, "grad_norm": 0.4592094123363495, "learning_rate": 7.906998594354408e-06, "loss": 0.4473, "step": 20688 }, { "epoch": 0.949428663209582, "grad_norm": 0.46768084168434143, "learning_rate": 7.906799101547062e-06, "loss": 0.3994, "step": 20689 }, { "epoch": 0.9494745537148364, "grad_norm": 0.41256946325302124, "learning_rate": 7.906599601749782e-06, "loss": 0.3389, "step": 20690 }, { "epoch": 0.9495204442200909, "grad_norm": 0.4697292149066925, "learning_rate": 7.906400094963047e-06, "loss": 0.4382, "step": 20691 }, { "epoch": 0.9495663347253454, "grad_norm": 0.4967111647129059, "learning_rate": 7.906200581187338e-06, "loss": 0.5058, "step": 20692 }, { "epoch": 0.9496122252305997, "grad_norm": 0.4405536949634552, "learning_rate": 7.906001060423132e-06, "loss": 0.3582, "step": 20693 }, { "epoch": 0.9496581157358542, "grad_norm": 0.4394819438457489, "learning_rate": 7.905801532670912e-06, "loss": 0.4288, "step": 20694 }, { "epoch": 0.9497040062411087, "grad_norm": 0.9397099614143372, "learning_rate": 7.905601997931156e-06, "loss": 0.3832, "step": 20695 }, { "epoch": 0.9497498967463632, "grad_norm": 0.48197418451309204, "learning_rate": 7.905402456204343e-06, "loss": 0.434, "step": 20696 }, { "epoch": 0.9497957872516176, "grad_norm": 0.48722895979881287, "learning_rate": 7.905202907490956e-06, "loss": 0.4268, "step": 20697 }, { "epoch": 0.9498416777568721, "grad_norm": 0.440244197845459, "learning_rate": 7.905003351791472e-06, "loss": 0.3725, "step": 20698 }, { "epoch": 0.9498875682621266, "grad_norm": 0.41337862610816956, "learning_rate": 7.90480378910637e-06, "loss": 0.3324, "step": 20699 }, { "epoch": 0.949933458767381, "grad_norm": 0.5012292265892029, "learning_rate": 7.904604219436135e-06, "loss": 0.4619, "step": 20700 }, { "epoch": 0.9499793492726355, "grad_norm": 0.48096033930778503, "learning_rate": 7.904404642781243e-06, "loss": 0.4099, "step": 20701 }, { "epoch": 0.95002523977789, "grad_norm": 0.6364090442657471, "learning_rate": 7.904205059142173e-06, "loss": 0.3482, "step": 20702 }, { "epoch": 0.9500711302831444, "grad_norm": 0.4528335630893707, "learning_rate": 7.904005468519407e-06, "loss": 0.3697, "step": 20703 }, { "epoch": 0.9501170207883989, "grad_norm": 0.4561319947242737, "learning_rate": 7.903805870913424e-06, "loss": 0.3253, "step": 20704 }, { "epoch": 0.9501629112936534, "grad_norm": 0.4610041677951813, "learning_rate": 7.903606266324706e-06, "loss": 0.428, "step": 20705 }, { "epoch": 0.9502088017989078, "grad_norm": 0.47606360912323, "learning_rate": 7.90340665475373e-06, "loss": 0.4612, "step": 20706 }, { "epoch": 0.9502546923041623, "grad_norm": 0.41068035364151, "learning_rate": 7.903207036200977e-06, "loss": 0.3229, "step": 20707 }, { "epoch": 0.9503005828094168, "grad_norm": 0.5289531350135803, "learning_rate": 7.903007410666927e-06, "loss": 0.4248, "step": 20708 }, { "epoch": 0.9503464733146711, "grad_norm": 0.4537547826766968, "learning_rate": 7.902807778152061e-06, "loss": 0.4066, "step": 20709 }, { "epoch": 0.9503923638199256, "grad_norm": 0.4345776438713074, "learning_rate": 7.902608138656859e-06, "loss": 0.3457, "step": 20710 }, { "epoch": 0.9504382543251801, "grad_norm": 0.44283393025398254, "learning_rate": 7.902408492181798e-06, "loss": 0.3896, "step": 20711 }, { "epoch": 0.9504841448304345, "grad_norm": 0.4521791636943817, "learning_rate": 7.902208838727366e-06, "loss": 0.3989, "step": 20712 }, { "epoch": 0.950530035335689, "grad_norm": 0.454632967710495, "learning_rate": 7.902009178294033e-06, "loss": 0.3949, "step": 20713 }, { "epoch": 0.9505759258409435, "grad_norm": 0.41678258776664734, "learning_rate": 7.901809510882285e-06, "loss": 0.3321, "step": 20714 }, { "epoch": 0.950621816346198, "grad_norm": 0.4800679385662079, "learning_rate": 7.9016098364926e-06, "loss": 0.4598, "step": 20715 }, { "epoch": 0.9506677068514524, "grad_norm": 0.45400476455688477, "learning_rate": 7.901410155125462e-06, "loss": 0.38, "step": 20716 }, { "epoch": 0.9507135973567069, "grad_norm": 0.44501349329948425, "learning_rate": 7.901210466781347e-06, "loss": 0.3791, "step": 20717 }, { "epoch": 0.9507594878619614, "grad_norm": 0.43400585651397705, "learning_rate": 7.901010771460736e-06, "loss": 0.3862, "step": 20718 }, { "epoch": 0.9508053783672158, "grad_norm": 0.43263059854507446, "learning_rate": 7.90081106916411e-06, "loss": 0.3346, "step": 20719 }, { "epoch": 0.9508512688724703, "grad_norm": 0.4371837377548218, "learning_rate": 7.900611359891948e-06, "loss": 0.3623, "step": 20720 }, { "epoch": 0.9508971593777248, "grad_norm": 0.4345303773880005, "learning_rate": 7.900411643644732e-06, "loss": 0.367, "step": 20721 }, { "epoch": 0.9509430498829792, "grad_norm": 0.4456794857978821, "learning_rate": 7.90021192042294e-06, "loss": 0.3879, "step": 20722 }, { "epoch": 0.9509889403882337, "grad_norm": 0.4169251322746277, "learning_rate": 7.900012190227056e-06, "loss": 0.3399, "step": 20723 }, { "epoch": 0.9510348308934882, "grad_norm": 0.5187283754348755, "learning_rate": 7.899812453057555e-06, "loss": 0.409, "step": 20724 }, { "epoch": 0.9510807213987426, "grad_norm": 0.4520646929740906, "learning_rate": 7.899612708914924e-06, "loss": 0.3881, "step": 20725 }, { "epoch": 0.951126611903997, "grad_norm": 0.5836667418479919, "learning_rate": 7.899412957799638e-06, "loss": 0.372, "step": 20726 }, { "epoch": 0.9511725024092516, "grad_norm": 0.4666779935359955, "learning_rate": 7.899213199712177e-06, "loss": 0.359, "step": 20727 }, { "epoch": 0.9512183929145059, "grad_norm": 0.46841078996658325, "learning_rate": 7.899013434653025e-06, "loss": 0.4067, "step": 20728 }, { "epoch": 0.9512642834197604, "grad_norm": 0.4843532145023346, "learning_rate": 7.89881366262266e-06, "loss": 0.4617, "step": 20729 }, { "epoch": 0.9513101739250149, "grad_norm": 0.47399812936782837, "learning_rate": 7.898613883621563e-06, "loss": 0.4484, "step": 20730 }, { "epoch": 0.9513560644302694, "grad_norm": 0.46369126439094543, "learning_rate": 7.898414097650216e-06, "loss": 0.4273, "step": 20731 }, { "epoch": 0.9514019549355238, "grad_norm": 0.5030677318572998, "learning_rate": 7.898214304709098e-06, "loss": 0.5421, "step": 20732 }, { "epoch": 0.9514478454407783, "grad_norm": 0.4228552579879761, "learning_rate": 7.898014504798688e-06, "loss": 0.3583, "step": 20733 }, { "epoch": 0.9514937359460328, "grad_norm": 0.4302417039871216, "learning_rate": 7.897814697919467e-06, "loss": 0.3899, "step": 20734 }, { "epoch": 0.9515396264512872, "grad_norm": 0.43733251094818115, "learning_rate": 7.897614884071918e-06, "loss": 0.4149, "step": 20735 }, { "epoch": 0.9515855169565417, "grad_norm": 0.4229368567466736, "learning_rate": 7.897415063256516e-06, "loss": 0.3587, "step": 20736 }, { "epoch": 0.9516314074617962, "grad_norm": 0.4426785409450531, "learning_rate": 7.89721523547375e-06, "loss": 0.3554, "step": 20737 }, { "epoch": 0.9516772979670506, "grad_norm": 0.43730103969573975, "learning_rate": 7.897015400724094e-06, "loss": 0.403, "step": 20738 }, { "epoch": 0.9517231884723051, "grad_norm": 0.46226879954338074, "learning_rate": 7.89681555900803e-06, "loss": 0.3828, "step": 20739 }, { "epoch": 0.9517690789775596, "grad_norm": 0.49004727602005005, "learning_rate": 7.896615710326039e-06, "loss": 0.4405, "step": 20740 }, { "epoch": 0.951814969482814, "grad_norm": 0.41702762246131897, "learning_rate": 7.896415854678601e-06, "loss": 0.3253, "step": 20741 }, { "epoch": 0.9518608599880685, "grad_norm": 0.4566820561885834, "learning_rate": 7.896215992066198e-06, "loss": 0.4242, "step": 20742 }, { "epoch": 0.951906750493323, "grad_norm": 0.42468541860580444, "learning_rate": 7.896016122489308e-06, "loss": 0.3775, "step": 20743 }, { "epoch": 0.9519526409985773, "grad_norm": 1.297866940498352, "learning_rate": 7.895816245948415e-06, "loss": 0.4291, "step": 20744 }, { "epoch": 0.9519985315038318, "grad_norm": 0.43348339200019836, "learning_rate": 7.895616362443997e-06, "loss": 0.335, "step": 20745 }, { "epoch": 0.9520444220090863, "grad_norm": 0.49392303824424744, "learning_rate": 7.895416471976536e-06, "loss": 0.3662, "step": 20746 }, { "epoch": 0.9520903125143407, "grad_norm": 0.44551748037338257, "learning_rate": 7.895216574546511e-06, "loss": 0.3639, "step": 20747 }, { "epoch": 0.9521362030195952, "grad_norm": 0.521085798740387, "learning_rate": 7.895016670154404e-06, "loss": 0.4802, "step": 20748 }, { "epoch": 0.9521820935248497, "grad_norm": 0.4230537414550781, "learning_rate": 7.894816758800698e-06, "loss": 0.3342, "step": 20749 }, { "epoch": 0.9522279840301042, "grad_norm": 0.4655434191226959, "learning_rate": 7.89461684048587e-06, "loss": 0.4184, "step": 20750 }, { "epoch": 0.9522738745353586, "grad_norm": 0.4300721287727356, "learning_rate": 7.8944169152104e-06, "loss": 0.3741, "step": 20751 }, { "epoch": 0.9523197650406131, "grad_norm": 0.45191511511802673, "learning_rate": 7.894216982974775e-06, "loss": 0.4049, "step": 20752 }, { "epoch": 0.9523656555458676, "grad_norm": 0.46399304270744324, "learning_rate": 7.894017043779468e-06, "loss": 0.3554, "step": 20753 }, { "epoch": 0.952411546051122, "grad_norm": 0.49666401743888855, "learning_rate": 7.893817097624962e-06, "loss": 0.4969, "step": 20754 }, { "epoch": 0.9524574365563765, "grad_norm": 0.44455644488334656, "learning_rate": 7.893617144511742e-06, "loss": 0.3802, "step": 20755 }, { "epoch": 0.952503327061631, "grad_norm": 0.43826672434806824, "learning_rate": 7.893417184440287e-06, "loss": 0.3319, "step": 20756 }, { "epoch": 0.9525492175668854, "grad_norm": 0.46469804644584656, "learning_rate": 7.893217217411076e-06, "loss": 0.4246, "step": 20757 }, { "epoch": 0.9525951080721399, "grad_norm": 0.612488865852356, "learning_rate": 7.89301724342459e-06, "loss": 0.3574, "step": 20758 }, { "epoch": 0.9526409985773944, "grad_norm": 0.4565034806728363, "learning_rate": 7.89281726248131e-06, "loss": 0.3851, "step": 20759 }, { "epoch": 0.9526868890826488, "grad_norm": 0.4787023365497589, "learning_rate": 7.89261727458172e-06, "loss": 0.4298, "step": 20760 }, { "epoch": 0.9527327795879033, "grad_norm": 0.4315487742424011, "learning_rate": 7.892417279726296e-06, "loss": 0.3318, "step": 20761 }, { "epoch": 0.9527786700931578, "grad_norm": 0.4546329975128174, "learning_rate": 7.892217277915522e-06, "loss": 0.3912, "step": 20762 }, { "epoch": 0.9528245605984121, "grad_norm": 0.4488099217414856, "learning_rate": 7.892017269149879e-06, "loss": 0.4382, "step": 20763 }, { "epoch": 0.9528704511036666, "grad_norm": 0.47105345129966736, "learning_rate": 7.891817253429845e-06, "loss": 0.4058, "step": 20764 }, { "epoch": 0.9529163416089211, "grad_norm": 0.45920976996421814, "learning_rate": 7.891617230755906e-06, "loss": 0.4499, "step": 20765 }, { "epoch": 0.9529622321141755, "grad_norm": 0.44265061616897583, "learning_rate": 7.891417201128538e-06, "loss": 0.3386, "step": 20766 }, { "epoch": 0.95300812261943, "grad_norm": 0.42424607276916504, "learning_rate": 7.891217164548227e-06, "loss": 0.3487, "step": 20767 }, { "epoch": 0.9530540131246845, "grad_norm": 0.4415209889411926, "learning_rate": 7.891017121015447e-06, "loss": 0.3918, "step": 20768 }, { "epoch": 0.953099903629939, "grad_norm": 0.44983819127082825, "learning_rate": 7.890817070530686e-06, "loss": 0.4442, "step": 20769 }, { "epoch": 0.9531457941351934, "grad_norm": 0.46973806619644165, "learning_rate": 7.890617013094421e-06, "loss": 0.3998, "step": 20770 }, { "epoch": 0.9531916846404479, "grad_norm": 0.4453645944595337, "learning_rate": 7.890416948707135e-06, "loss": 0.3927, "step": 20771 }, { "epoch": 0.9532375751457024, "grad_norm": 0.4991135597229004, "learning_rate": 7.89021687736931e-06, "loss": 0.497, "step": 20772 }, { "epoch": 0.9532834656509568, "grad_norm": 0.5002502202987671, "learning_rate": 7.890016799081424e-06, "loss": 0.4351, "step": 20773 }, { "epoch": 0.9533293561562113, "grad_norm": 0.45086973905563354, "learning_rate": 7.88981671384396e-06, "loss": 0.3636, "step": 20774 }, { "epoch": 0.9533752466614658, "grad_norm": 0.44091665744781494, "learning_rate": 7.889616621657399e-06, "loss": 0.3162, "step": 20775 }, { "epoch": 0.9534211371667202, "grad_norm": 0.4561329782009125, "learning_rate": 7.889416522522223e-06, "loss": 0.4344, "step": 20776 }, { "epoch": 0.9534670276719747, "grad_norm": 0.424672394990921, "learning_rate": 7.889216416438911e-06, "loss": 0.3273, "step": 20777 }, { "epoch": 0.9535129181772292, "grad_norm": 0.4568372070789337, "learning_rate": 7.889016303407944e-06, "loss": 0.3587, "step": 20778 }, { "epoch": 0.9535588086824835, "grad_norm": 0.4501543343067169, "learning_rate": 7.888816183429806e-06, "loss": 0.3833, "step": 20779 }, { "epoch": 0.953604699187738, "grad_norm": 0.4728638231754303, "learning_rate": 7.888616056504978e-06, "loss": 0.4522, "step": 20780 }, { "epoch": 0.9536505896929925, "grad_norm": 0.4682685136795044, "learning_rate": 7.888415922633938e-06, "loss": 0.3971, "step": 20781 }, { "epoch": 0.9536964801982469, "grad_norm": 0.42580798268318176, "learning_rate": 7.88821578181717e-06, "loss": 0.4002, "step": 20782 }, { "epoch": 0.9537423707035014, "grad_norm": 0.45228299498558044, "learning_rate": 7.888015634055155e-06, "loss": 0.4241, "step": 20783 }, { "epoch": 0.9537882612087559, "grad_norm": 0.4737030565738678, "learning_rate": 7.887815479348375e-06, "loss": 0.4563, "step": 20784 }, { "epoch": 0.9538341517140104, "grad_norm": 0.4778626561164856, "learning_rate": 7.887615317697308e-06, "loss": 0.462, "step": 20785 }, { "epoch": 0.9538800422192648, "grad_norm": 0.45809170603752136, "learning_rate": 7.887415149102438e-06, "loss": 0.4037, "step": 20786 }, { "epoch": 0.9539259327245193, "grad_norm": 0.4994875490665436, "learning_rate": 7.887214973564248e-06, "loss": 0.422, "step": 20787 }, { "epoch": 0.9539718232297738, "grad_norm": 0.4918251633644104, "learning_rate": 7.887014791083213e-06, "loss": 0.5085, "step": 20788 }, { "epoch": 0.9540177137350282, "grad_norm": 0.5089061260223389, "learning_rate": 7.886814601659822e-06, "loss": 0.4307, "step": 20789 }, { "epoch": 0.9540636042402827, "grad_norm": 0.4239089787006378, "learning_rate": 7.886614405294552e-06, "loss": 0.3622, "step": 20790 }, { "epoch": 0.9541094947455372, "grad_norm": 0.5597901344299316, "learning_rate": 7.886414201987886e-06, "loss": 0.5794, "step": 20791 }, { "epoch": 0.9541553852507916, "grad_norm": 0.45320701599121094, "learning_rate": 7.886213991740305e-06, "loss": 0.4071, "step": 20792 }, { "epoch": 0.9542012757560461, "grad_norm": 0.474230021238327, "learning_rate": 7.88601377455229e-06, "loss": 0.4685, "step": 20793 }, { "epoch": 0.9542471662613006, "grad_norm": 0.4227583110332489, "learning_rate": 7.885813550424322e-06, "loss": 0.3669, "step": 20794 }, { "epoch": 0.954293056766555, "grad_norm": 0.46631181240081787, "learning_rate": 7.885613319356885e-06, "loss": 0.4262, "step": 20795 }, { "epoch": 0.9543389472718095, "grad_norm": 0.45406773686408997, "learning_rate": 7.885413081350455e-06, "loss": 0.4159, "step": 20796 }, { "epoch": 0.954384837777064, "grad_norm": 0.45712265372276306, "learning_rate": 7.88521283640552e-06, "loss": 0.4107, "step": 20797 }, { "epoch": 0.9544307282823183, "grad_norm": 0.7764745354652405, "learning_rate": 7.885012584522557e-06, "loss": 0.4635, "step": 20798 }, { "epoch": 0.9544766187875728, "grad_norm": 0.44126757979393005, "learning_rate": 7.884812325702051e-06, "loss": 0.4059, "step": 20799 }, { "epoch": 0.9545225092928273, "grad_norm": 0.449358195066452, "learning_rate": 7.884612059944481e-06, "loss": 0.3043, "step": 20800 }, { "epoch": 0.9545683997980817, "grad_norm": 0.437394380569458, "learning_rate": 7.88441178725033e-06, "loss": 0.4103, "step": 20801 }, { "epoch": 0.9546142903033362, "grad_norm": 0.4676348865032196, "learning_rate": 7.884211507620079e-06, "loss": 0.3948, "step": 20802 }, { "epoch": 0.9546601808085907, "grad_norm": 0.4307887852191925, "learning_rate": 7.884011221054208e-06, "loss": 0.3514, "step": 20803 }, { "epoch": 0.9547060713138452, "grad_norm": 0.5165539979934692, "learning_rate": 7.883810927553203e-06, "loss": 0.4866, "step": 20804 }, { "epoch": 0.9547519618190996, "grad_norm": 0.44806474447250366, "learning_rate": 7.88361062711754e-06, "loss": 0.3778, "step": 20805 }, { "epoch": 0.9547978523243541, "grad_norm": 0.47774726152420044, "learning_rate": 7.883410319747706e-06, "loss": 0.4258, "step": 20806 }, { "epoch": 0.9548437428296086, "grad_norm": 0.49291083216667175, "learning_rate": 7.883210005444179e-06, "loss": 0.4453, "step": 20807 }, { "epoch": 0.954889633334863, "grad_norm": 0.4454309940338135, "learning_rate": 7.883009684207441e-06, "loss": 0.3518, "step": 20808 }, { "epoch": 0.9549355238401175, "grad_norm": 0.46012288331985474, "learning_rate": 7.882809356037977e-06, "loss": 0.4144, "step": 20809 }, { "epoch": 0.954981414345372, "grad_norm": 0.4394964873790741, "learning_rate": 7.882609020936265e-06, "loss": 0.3166, "step": 20810 }, { "epoch": 0.9550273048506264, "grad_norm": 0.43290844559669495, "learning_rate": 7.882408678902789e-06, "loss": 0.3412, "step": 20811 }, { "epoch": 0.9550731953558809, "grad_norm": 0.4824354648590088, "learning_rate": 7.882208329938028e-06, "loss": 0.4184, "step": 20812 }, { "epoch": 0.9551190858611354, "grad_norm": 0.45939159393310547, "learning_rate": 7.882007974042466e-06, "loss": 0.3937, "step": 20813 }, { "epoch": 0.9551649763663898, "grad_norm": 0.4506071209907532, "learning_rate": 7.881807611216585e-06, "loss": 0.4092, "step": 20814 }, { "epoch": 0.9552108668716442, "grad_norm": 0.48458293080329895, "learning_rate": 7.881607241460865e-06, "loss": 0.4812, "step": 20815 }, { "epoch": 0.9552567573768987, "grad_norm": 0.4451203942298889, "learning_rate": 7.88140686477579e-06, "loss": 0.4007, "step": 20816 }, { "epoch": 0.9553026478821531, "grad_norm": 0.44466251134872437, "learning_rate": 7.881206481161843e-06, "loss": 0.3636, "step": 20817 }, { "epoch": 0.9553485383874076, "grad_norm": 0.4523621201515198, "learning_rate": 7.8810060906195e-06, "loss": 0.3544, "step": 20818 }, { "epoch": 0.9553944288926621, "grad_norm": 0.438016414642334, "learning_rate": 7.880805693149249e-06, "loss": 0.3771, "step": 20819 }, { "epoch": 0.9554403193979166, "grad_norm": 0.4882332384586334, "learning_rate": 7.88060528875157e-06, "loss": 0.4456, "step": 20820 }, { "epoch": 0.955486209903171, "grad_norm": 0.4875311255455017, "learning_rate": 7.880404877426943e-06, "loss": 0.4042, "step": 20821 }, { "epoch": 0.9555321004084255, "grad_norm": 0.46924158930778503, "learning_rate": 7.880204459175851e-06, "loss": 0.4049, "step": 20822 }, { "epoch": 0.95557799091368, "grad_norm": 0.4193507432937622, "learning_rate": 7.880004033998778e-06, "loss": 0.3298, "step": 20823 }, { "epoch": 0.9556238814189344, "grad_norm": 0.5255727171897888, "learning_rate": 7.879803601896203e-06, "loss": 0.4959, "step": 20824 }, { "epoch": 0.9556697719241889, "grad_norm": 0.44380924105644226, "learning_rate": 7.87960316286861e-06, "loss": 0.3631, "step": 20825 }, { "epoch": 0.9557156624294434, "grad_norm": 0.4121224880218506, "learning_rate": 7.87940271691648e-06, "loss": 0.3359, "step": 20826 }, { "epoch": 0.9557615529346978, "grad_norm": 0.48192572593688965, "learning_rate": 7.879202264040295e-06, "loss": 0.4581, "step": 20827 }, { "epoch": 0.9558074434399523, "grad_norm": 0.4743208587169647, "learning_rate": 7.879001804240538e-06, "loss": 0.4369, "step": 20828 }, { "epoch": 0.9558533339452068, "grad_norm": 0.48124873638153076, "learning_rate": 7.878801337517687e-06, "loss": 0.4854, "step": 20829 }, { "epoch": 0.9558992244504612, "grad_norm": 0.46403101086616516, "learning_rate": 7.87860086387223e-06, "loss": 0.3861, "step": 20830 }, { "epoch": 0.9559451149557157, "grad_norm": 0.4555166959762573, "learning_rate": 7.878400383304647e-06, "loss": 0.3918, "step": 20831 }, { "epoch": 0.9559910054609702, "grad_norm": 0.43670785427093506, "learning_rate": 7.878199895815419e-06, "loss": 0.4028, "step": 20832 }, { "epoch": 0.9560368959662245, "grad_norm": 0.4959140419960022, "learning_rate": 7.877999401405028e-06, "loss": 0.459, "step": 20833 }, { "epoch": 0.956082786471479, "grad_norm": 0.4522519111633301, "learning_rate": 7.877798900073958e-06, "loss": 0.3892, "step": 20834 }, { "epoch": 0.9561286769767335, "grad_norm": 0.43764665722846985, "learning_rate": 7.87759839182269e-06, "loss": 0.3898, "step": 20835 }, { "epoch": 0.9561745674819879, "grad_norm": 0.46248313784599304, "learning_rate": 7.877397876651704e-06, "loss": 0.4378, "step": 20836 }, { "epoch": 0.9562204579872424, "grad_norm": 0.45338696241378784, "learning_rate": 7.877197354561485e-06, "loss": 0.3937, "step": 20837 }, { "epoch": 0.9562663484924969, "grad_norm": 0.4152873754501343, "learning_rate": 7.876996825552515e-06, "loss": 0.3622, "step": 20838 }, { "epoch": 0.9563122389977514, "grad_norm": 0.4421502947807312, "learning_rate": 7.876796289625276e-06, "loss": 0.3506, "step": 20839 }, { "epoch": 0.9563581295030058, "grad_norm": 0.4926114082336426, "learning_rate": 7.876595746780248e-06, "loss": 0.4249, "step": 20840 }, { "epoch": 0.9564040200082603, "grad_norm": 0.4404122233390808, "learning_rate": 7.876395197017918e-06, "loss": 0.3656, "step": 20841 }, { "epoch": 0.9564499105135148, "grad_norm": 0.44246649742126465, "learning_rate": 7.876194640338764e-06, "loss": 0.348, "step": 20842 }, { "epoch": 0.9564958010187692, "grad_norm": 0.4617062211036682, "learning_rate": 7.87599407674327e-06, "loss": 0.3688, "step": 20843 }, { "epoch": 0.9565416915240237, "grad_norm": 0.5105952024459839, "learning_rate": 7.875793506231917e-06, "loss": 0.4995, "step": 20844 }, { "epoch": 0.9565875820292782, "grad_norm": 0.4528931677341461, "learning_rate": 7.875592928805189e-06, "loss": 0.3884, "step": 20845 }, { "epoch": 0.9566334725345326, "grad_norm": 0.43256860971450806, "learning_rate": 7.875392344463567e-06, "loss": 0.3758, "step": 20846 }, { "epoch": 0.9566793630397871, "grad_norm": 0.4371575117111206, "learning_rate": 7.875191753207534e-06, "loss": 0.3407, "step": 20847 }, { "epoch": 0.9567252535450416, "grad_norm": 0.4334792196750641, "learning_rate": 7.874991155037572e-06, "loss": 0.3409, "step": 20848 }, { "epoch": 0.956771144050296, "grad_norm": 0.48748162388801575, "learning_rate": 7.874790549954165e-06, "loss": 0.4891, "step": 20849 }, { "epoch": 0.9568170345555504, "grad_norm": 0.46133026480674744, "learning_rate": 7.874589937957793e-06, "loss": 0.4194, "step": 20850 }, { "epoch": 0.956862925060805, "grad_norm": 0.43453001976013184, "learning_rate": 7.874389319048938e-06, "loss": 0.3542, "step": 20851 }, { "epoch": 0.9569088155660593, "grad_norm": 0.4716585576534271, "learning_rate": 7.874188693228087e-06, "loss": 0.4297, "step": 20852 }, { "epoch": 0.9569547060713138, "grad_norm": 0.44578486680984497, "learning_rate": 7.873988060495717e-06, "loss": 0.3966, "step": 20853 }, { "epoch": 0.9570005965765683, "grad_norm": 0.4477722644805908, "learning_rate": 7.873787420852315e-06, "loss": 0.4122, "step": 20854 }, { "epoch": 0.9570464870818227, "grad_norm": 0.43393674492836, "learning_rate": 7.873586774298359e-06, "loss": 0.3834, "step": 20855 }, { "epoch": 0.9570923775870772, "grad_norm": 0.47393274307250977, "learning_rate": 7.873386120834334e-06, "loss": 0.3582, "step": 20856 }, { "epoch": 0.9571382680923317, "grad_norm": 0.4155539572238922, "learning_rate": 7.873185460460724e-06, "loss": 0.3437, "step": 20857 }, { "epoch": 0.9571841585975862, "grad_norm": 0.47553786635398865, "learning_rate": 7.872984793178007e-06, "loss": 0.4073, "step": 20858 }, { "epoch": 0.9572300491028406, "grad_norm": 0.4622083604335785, "learning_rate": 7.87278411898667e-06, "loss": 0.3941, "step": 20859 }, { "epoch": 0.9572759396080951, "grad_norm": 0.406045138835907, "learning_rate": 7.872583437887194e-06, "loss": 0.3177, "step": 20860 }, { "epoch": 0.9573218301133496, "grad_norm": 0.47201257944107056, "learning_rate": 7.87238274988006e-06, "loss": 0.4067, "step": 20861 }, { "epoch": 0.957367720618604, "grad_norm": 0.4298066794872284, "learning_rate": 7.872182054965755e-06, "loss": 0.3962, "step": 20862 }, { "epoch": 0.9574136111238585, "grad_norm": 0.468861848115921, "learning_rate": 7.871981353144756e-06, "loss": 0.4244, "step": 20863 }, { "epoch": 0.957459501629113, "grad_norm": 0.5078380107879639, "learning_rate": 7.871780644417549e-06, "loss": 0.4262, "step": 20864 }, { "epoch": 0.9575053921343674, "grad_norm": 0.4903586506843567, "learning_rate": 7.871579928784617e-06, "loss": 0.4362, "step": 20865 }, { "epoch": 0.9575512826396219, "grad_norm": 0.47607383131980896, "learning_rate": 7.87137920624644e-06, "loss": 0.4865, "step": 20866 }, { "epoch": 0.9575971731448764, "grad_norm": 0.4929946959018707, "learning_rate": 7.871178476803503e-06, "loss": 0.4602, "step": 20867 }, { "epoch": 0.9576430636501307, "grad_norm": 0.4569588303565979, "learning_rate": 7.870977740456289e-06, "loss": 0.3949, "step": 20868 }, { "epoch": 0.9576889541553852, "grad_norm": 0.43647006154060364, "learning_rate": 7.870776997205275e-06, "loss": 0.3603, "step": 20869 }, { "epoch": 0.9577348446606397, "grad_norm": 0.44699007272720337, "learning_rate": 7.870576247050954e-06, "loss": 0.4012, "step": 20870 }, { "epoch": 0.9577807351658941, "grad_norm": 0.45056474208831787, "learning_rate": 7.8703754899938e-06, "loss": 0.4085, "step": 20871 }, { "epoch": 0.9578266256711486, "grad_norm": 0.4675581753253937, "learning_rate": 7.870174726034301e-06, "loss": 0.4308, "step": 20872 }, { "epoch": 0.9578725161764031, "grad_norm": 0.4479956030845642, "learning_rate": 7.869973955172936e-06, "loss": 0.3491, "step": 20873 }, { "epoch": 0.9579184066816576, "grad_norm": 0.45006901025772095, "learning_rate": 7.86977317741019e-06, "loss": 0.3839, "step": 20874 }, { "epoch": 0.957964297186912, "grad_norm": 0.4226112961769104, "learning_rate": 7.869572392746543e-06, "loss": 0.3572, "step": 20875 }, { "epoch": 0.9580101876921665, "grad_norm": 0.4571353495121002, "learning_rate": 7.869371601182483e-06, "loss": 0.4363, "step": 20876 }, { "epoch": 0.958056078197421, "grad_norm": 0.4391780495643616, "learning_rate": 7.86917080271849e-06, "loss": 0.3488, "step": 20877 }, { "epoch": 0.9581019687026754, "grad_norm": 0.43637895584106445, "learning_rate": 7.868969997355045e-06, "loss": 0.4066, "step": 20878 }, { "epoch": 0.9581478592079299, "grad_norm": 0.479347288608551, "learning_rate": 7.868769185092634e-06, "loss": 0.4566, "step": 20879 }, { "epoch": 0.9581937497131844, "grad_norm": 0.4303530752658844, "learning_rate": 7.868568365931737e-06, "loss": 0.2995, "step": 20880 }, { "epoch": 0.9582396402184388, "grad_norm": 0.45962753891944885, "learning_rate": 7.86836753987284e-06, "loss": 0.4237, "step": 20881 }, { "epoch": 0.9582855307236933, "grad_norm": 0.4313022494316101, "learning_rate": 7.868166706916424e-06, "loss": 0.3594, "step": 20882 }, { "epoch": 0.9583314212289478, "grad_norm": 0.42465588450431824, "learning_rate": 7.867965867062972e-06, "loss": 0.3177, "step": 20883 }, { "epoch": 0.9583773117342022, "grad_norm": 0.4715488851070404, "learning_rate": 7.867765020312967e-06, "loss": 0.4126, "step": 20884 }, { "epoch": 0.9584232022394567, "grad_norm": 0.42767980694770813, "learning_rate": 7.867564166666892e-06, "loss": 0.3868, "step": 20885 }, { "epoch": 0.9584690927447111, "grad_norm": 0.44922980666160583, "learning_rate": 7.867363306125231e-06, "loss": 0.3999, "step": 20886 }, { "epoch": 0.9585149832499655, "grad_norm": 0.44685885310173035, "learning_rate": 7.867162438688464e-06, "loss": 0.3643, "step": 20887 }, { "epoch": 0.95856087375522, "grad_norm": 0.4425986111164093, "learning_rate": 7.866961564357081e-06, "loss": 0.4006, "step": 20888 }, { "epoch": 0.9586067642604745, "grad_norm": 0.45024463534355164, "learning_rate": 7.866760683131557e-06, "loss": 0.3878, "step": 20889 }, { "epoch": 0.9586526547657289, "grad_norm": 0.4349338710308075, "learning_rate": 7.866559795012378e-06, "loss": 0.3493, "step": 20890 }, { "epoch": 0.9586985452709834, "grad_norm": 0.42824018001556396, "learning_rate": 7.866358900000028e-06, "loss": 0.3583, "step": 20891 }, { "epoch": 0.9587444357762379, "grad_norm": 0.468671590089798, "learning_rate": 7.86615799809499e-06, "loss": 0.4025, "step": 20892 }, { "epoch": 0.9587903262814924, "grad_norm": 0.44965383410453796, "learning_rate": 7.865957089297747e-06, "loss": 0.371, "step": 20893 }, { "epoch": 0.9588362167867468, "grad_norm": 0.4732389450073242, "learning_rate": 7.865756173608779e-06, "loss": 0.4197, "step": 20894 }, { "epoch": 0.9588821072920013, "grad_norm": 0.4478726387023926, "learning_rate": 7.865555251028573e-06, "loss": 0.34, "step": 20895 }, { "epoch": 0.9589279977972558, "grad_norm": 0.41316279768943787, "learning_rate": 7.865354321557614e-06, "loss": 0.3472, "step": 20896 }, { "epoch": 0.9589738883025102, "grad_norm": 0.4628801941871643, "learning_rate": 7.86515338519638e-06, "loss": 0.4314, "step": 20897 }, { "epoch": 0.9590197788077647, "grad_norm": 0.4545949697494507, "learning_rate": 7.864952441945355e-06, "loss": 0.3971, "step": 20898 }, { "epoch": 0.9590656693130192, "grad_norm": 0.4226926565170288, "learning_rate": 7.864751491805026e-06, "loss": 0.352, "step": 20899 }, { "epoch": 0.9591115598182736, "grad_norm": 0.4720945656299591, "learning_rate": 7.864550534775872e-06, "loss": 0.476, "step": 20900 }, { "epoch": 0.9591574503235281, "grad_norm": 0.6004335284233093, "learning_rate": 7.864349570858378e-06, "loss": 0.346, "step": 20901 }, { "epoch": 0.9592033408287826, "grad_norm": 0.4516894817352295, "learning_rate": 7.86414860005303e-06, "loss": 0.4036, "step": 20902 }, { "epoch": 0.959249231334037, "grad_norm": 0.43953096866607666, "learning_rate": 7.863947622360307e-06, "loss": 0.3593, "step": 20903 }, { "epoch": 0.9592951218392914, "grad_norm": 0.45459914207458496, "learning_rate": 7.863746637780691e-06, "loss": 0.4401, "step": 20904 }, { "epoch": 0.9593410123445459, "grad_norm": 0.6540465354919434, "learning_rate": 7.863545646314672e-06, "loss": 0.4282, "step": 20905 }, { "epoch": 0.9593869028498003, "grad_norm": 0.4416063725948334, "learning_rate": 7.863344647962729e-06, "loss": 0.3214, "step": 20906 }, { "epoch": 0.9594327933550548, "grad_norm": 0.44792690873146057, "learning_rate": 7.863143642725343e-06, "loss": 0.3353, "step": 20907 }, { "epoch": 0.9594786838603093, "grad_norm": 0.4199868142604828, "learning_rate": 7.862942630603004e-06, "loss": 0.3419, "step": 20908 }, { "epoch": 0.9595245743655637, "grad_norm": 0.41641104221343994, "learning_rate": 7.86274161159619e-06, "loss": 0.3035, "step": 20909 }, { "epoch": 0.9595704648708182, "grad_norm": 0.4729260206222534, "learning_rate": 7.862540585705384e-06, "loss": 0.4102, "step": 20910 }, { "epoch": 0.9596163553760727, "grad_norm": 0.45794007182121277, "learning_rate": 7.862339552931073e-06, "loss": 0.3912, "step": 20911 }, { "epoch": 0.9596622458813272, "grad_norm": 0.4144395589828491, "learning_rate": 7.862138513273739e-06, "loss": 0.3077, "step": 20912 }, { "epoch": 0.9597081363865816, "grad_norm": 0.4522329270839691, "learning_rate": 7.861937466733866e-06, "loss": 0.3491, "step": 20913 }, { "epoch": 0.9597540268918361, "grad_norm": 0.4080905020236969, "learning_rate": 7.861736413311935e-06, "loss": 0.3622, "step": 20914 }, { "epoch": 0.9597999173970906, "grad_norm": 0.4383505582809448, "learning_rate": 7.861535353008431e-06, "loss": 0.3965, "step": 20915 }, { "epoch": 0.959845807902345, "grad_norm": 0.4504050612449646, "learning_rate": 7.861334285823838e-06, "loss": 0.3965, "step": 20916 }, { "epoch": 0.9598916984075995, "grad_norm": 0.48084089159965515, "learning_rate": 7.86113321175864e-06, "loss": 0.4578, "step": 20917 }, { "epoch": 0.959937588912854, "grad_norm": 0.49057522416114807, "learning_rate": 7.86093213081332e-06, "loss": 0.4462, "step": 20918 }, { "epoch": 0.9599834794181084, "grad_norm": 0.45509740710258484, "learning_rate": 7.86073104298836e-06, "loss": 0.4448, "step": 20919 }, { "epoch": 0.9600293699233629, "grad_norm": 0.4939304292201996, "learning_rate": 7.860529948284243e-06, "loss": 0.5069, "step": 20920 }, { "epoch": 0.9600752604286173, "grad_norm": 0.4295014441013336, "learning_rate": 7.860328846701457e-06, "loss": 0.3644, "step": 20921 }, { "epoch": 0.9601211509338717, "grad_norm": 0.4555025100708008, "learning_rate": 7.860127738240481e-06, "loss": 0.4255, "step": 20922 }, { "epoch": 0.9601670414391262, "grad_norm": 0.4333624243736267, "learning_rate": 7.859926622901804e-06, "loss": 0.3702, "step": 20923 }, { "epoch": 0.9602129319443807, "grad_norm": 0.4415133595466614, "learning_rate": 7.859725500685904e-06, "loss": 0.3791, "step": 20924 }, { "epoch": 0.9602588224496351, "grad_norm": 0.4623734951019287, "learning_rate": 7.859524371593266e-06, "loss": 0.4285, "step": 20925 }, { "epoch": 0.9603047129548896, "grad_norm": 0.5063101649284363, "learning_rate": 7.859323235624373e-06, "loss": 0.4574, "step": 20926 }, { "epoch": 0.9603506034601441, "grad_norm": 0.45520374178886414, "learning_rate": 7.859122092779712e-06, "loss": 0.4017, "step": 20927 }, { "epoch": 0.9603964939653986, "grad_norm": 0.43664202094078064, "learning_rate": 7.858920943059765e-06, "loss": 0.3731, "step": 20928 }, { "epoch": 0.960442384470653, "grad_norm": 0.45038917660713196, "learning_rate": 7.858719786465015e-06, "loss": 0.3485, "step": 20929 }, { "epoch": 0.9604882749759075, "grad_norm": 0.46419450640678406, "learning_rate": 7.858518622995945e-06, "loss": 0.3368, "step": 20930 }, { "epoch": 0.960534165481162, "grad_norm": 0.46753352880477905, "learning_rate": 7.858317452653042e-06, "loss": 0.3796, "step": 20931 }, { "epoch": 0.9605800559864164, "grad_norm": 0.4680713415145874, "learning_rate": 7.858116275436785e-06, "loss": 0.4106, "step": 20932 }, { "epoch": 0.9606259464916709, "grad_norm": 0.46388867497444153, "learning_rate": 7.857915091347663e-06, "loss": 0.3925, "step": 20933 }, { "epoch": 0.9606718369969254, "grad_norm": 0.4984924793243408, "learning_rate": 7.857713900386157e-06, "loss": 0.4396, "step": 20934 }, { "epoch": 0.9607177275021798, "grad_norm": 0.43912291526794434, "learning_rate": 7.85751270255275e-06, "loss": 0.3252, "step": 20935 }, { "epoch": 0.9607636180074343, "grad_norm": 0.48950526118278503, "learning_rate": 7.857311497847926e-06, "loss": 0.4279, "step": 20936 }, { "epoch": 0.9608095085126888, "grad_norm": 0.4625595211982727, "learning_rate": 7.857110286272171e-06, "loss": 0.3978, "step": 20937 }, { "epoch": 0.9608553990179431, "grad_norm": 0.457743763923645, "learning_rate": 7.856909067825967e-06, "loss": 0.3734, "step": 20938 }, { "epoch": 0.9609012895231976, "grad_norm": 0.44777795672416687, "learning_rate": 7.8567078425098e-06, "loss": 0.3738, "step": 20939 }, { "epoch": 0.9609471800284521, "grad_norm": 0.43666982650756836, "learning_rate": 7.856506610324149e-06, "loss": 0.3561, "step": 20940 }, { "epoch": 0.9609930705337065, "grad_norm": 0.43774130940437317, "learning_rate": 7.856305371269504e-06, "loss": 0.3882, "step": 20941 }, { "epoch": 0.961038961038961, "grad_norm": 0.4220998287200928, "learning_rate": 7.856104125346345e-06, "loss": 0.3022, "step": 20942 }, { "epoch": 0.9610848515442155, "grad_norm": 0.4717370569705963, "learning_rate": 7.855902872555155e-06, "loss": 0.3858, "step": 20943 }, { "epoch": 0.9611307420494699, "grad_norm": 0.4620283842086792, "learning_rate": 7.855701612896423e-06, "loss": 0.4077, "step": 20944 }, { "epoch": 0.9611766325547244, "grad_norm": 0.4665840268135071, "learning_rate": 7.855500346370628e-06, "loss": 0.3592, "step": 20945 }, { "epoch": 0.9612225230599789, "grad_norm": 0.4513375163078308, "learning_rate": 7.855299072978258e-06, "loss": 0.3693, "step": 20946 }, { "epoch": 0.9612684135652334, "grad_norm": 0.42944517731666565, "learning_rate": 7.855097792719794e-06, "loss": 0.3375, "step": 20947 }, { "epoch": 0.9613143040704878, "grad_norm": 0.45802319049835205, "learning_rate": 7.854896505595719e-06, "loss": 0.419, "step": 20948 }, { "epoch": 0.9613601945757423, "grad_norm": 0.44568687677383423, "learning_rate": 7.85469521160652e-06, "loss": 0.3452, "step": 20949 }, { "epoch": 0.9614060850809968, "grad_norm": 0.43978944420814514, "learning_rate": 7.854493910752681e-06, "loss": 0.3525, "step": 20950 }, { "epoch": 0.9614519755862512, "grad_norm": 0.44539856910705566, "learning_rate": 7.854292603034685e-06, "loss": 0.3518, "step": 20951 }, { "epoch": 0.9614978660915057, "grad_norm": 0.43859735131263733, "learning_rate": 7.854091288453015e-06, "loss": 0.3841, "step": 20952 }, { "epoch": 0.9615437565967602, "grad_norm": 0.4652654528617859, "learning_rate": 7.853889967008157e-06, "loss": 0.4054, "step": 20953 }, { "epoch": 0.9615896471020146, "grad_norm": 0.4313921630382538, "learning_rate": 7.853688638700594e-06, "loss": 0.369, "step": 20954 }, { "epoch": 0.961635537607269, "grad_norm": 0.4485391676425934, "learning_rate": 7.85348730353081e-06, "loss": 0.3667, "step": 20955 }, { "epoch": 0.9616814281125236, "grad_norm": 0.46054553985595703, "learning_rate": 7.853285961499291e-06, "loss": 0.4359, "step": 20956 }, { "epoch": 0.9617273186177779, "grad_norm": 0.4694073498249054, "learning_rate": 7.853084612606519e-06, "loss": 0.3659, "step": 20957 }, { "epoch": 0.9617732091230324, "grad_norm": 0.47876498103141785, "learning_rate": 7.85288325685298e-06, "loss": 0.4051, "step": 20958 }, { "epoch": 0.9618190996282869, "grad_norm": 0.44158291816711426, "learning_rate": 7.852681894239155e-06, "loss": 0.395, "step": 20959 }, { "epoch": 0.9618649901335413, "grad_norm": 0.4468650817871094, "learning_rate": 7.85248052476553e-06, "loss": 0.3955, "step": 20960 }, { "epoch": 0.9619108806387958, "grad_norm": 0.42960789799690247, "learning_rate": 7.852279148432594e-06, "loss": 0.3786, "step": 20961 }, { "epoch": 0.9619567711440503, "grad_norm": 0.4395516812801361, "learning_rate": 7.852077765240823e-06, "loss": 0.375, "step": 20962 }, { "epoch": 0.9620026616493048, "grad_norm": 0.45655351877212524, "learning_rate": 7.851876375190706e-06, "loss": 0.3913, "step": 20963 }, { "epoch": 0.9620485521545592, "grad_norm": 0.44486233592033386, "learning_rate": 7.851674978282725e-06, "loss": 0.3391, "step": 20964 }, { "epoch": 0.9620944426598137, "grad_norm": 0.44822797179222107, "learning_rate": 7.851473574517368e-06, "loss": 0.381, "step": 20965 }, { "epoch": 0.9621403331650682, "grad_norm": 0.44449833035469055, "learning_rate": 7.851272163895116e-06, "loss": 0.4055, "step": 20966 }, { "epoch": 0.9621862236703226, "grad_norm": 0.43741005659103394, "learning_rate": 7.851070746416454e-06, "loss": 0.3562, "step": 20967 }, { "epoch": 0.9622321141755771, "grad_norm": 0.4560190439224243, "learning_rate": 7.850869322081867e-06, "loss": 0.3746, "step": 20968 }, { "epoch": 0.9622780046808316, "grad_norm": 0.43159958720207214, "learning_rate": 7.85066789089184e-06, "loss": 0.3675, "step": 20969 }, { "epoch": 0.962323895186086, "grad_norm": 0.44962483644485474, "learning_rate": 7.850466452846855e-06, "loss": 0.4299, "step": 20970 }, { "epoch": 0.9623697856913405, "grad_norm": 0.3931387960910797, "learning_rate": 7.8502650079474e-06, "loss": 0.2903, "step": 20971 }, { "epoch": 0.962415676196595, "grad_norm": 0.43479329347610474, "learning_rate": 7.850063556193954e-06, "loss": 0.3648, "step": 20972 }, { "epoch": 0.9624615667018493, "grad_norm": 0.46950218081474304, "learning_rate": 7.849862097587007e-06, "loss": 0.4388, "step": 20973 }, { "epoch": 0.9625074572071038, "grad_norm": 0.44674310088157654, "learning_rate": 7.84966063212704e-06, "loss": 0.3559, "step": 20974 }, { "epoch": 0.9625533477123583, "grad_norm": 0.4506414234638214, "learning_rate": 7.84945915981454e-06, "loss": 0.3176, "step": 20975 }, { "epoch": 0.9625992382176127, "grad_norm": 0.43529245257377625, "learning_rate": 7.84925768064999e-06, "loss": 0.3358, "step": 20976 }, { "epoch": 0.9626451287228672, "grad_norm": 0.4200156629085541, "learning_rate": 7.849056194633872e-06, "loss": 0.3879, "step": 20977 }, { "epoch": 0.9626910192281217, "grad_norm": 0.47084841132164, "learning_rate": 7.848854701766674e-06, "loss": 0.4465, "step": 20978 }, { "epoch": 0.9627369097333761, "grad_norm": 0.45223331451416016, "learning_rate": 7.84865320204888e-06, "loss": 0.4225, "step": 20979 }, { "epoch": 0.9627828002386306, "grad_norm": 0.48021236062049866, "learning_rate": 7.848451695480974e-06, "loss": 0.4146, "step": 20980 }, { "epoch": 0.9628286907438851, "grad_norm": 0.4375455379486084, "learning_rate": 7.848250182063441e-06, "loss": 0.365, "step": 20981 }, { "epoch": 0.9628745812491396, "grad_norm": 0.4971928298473358, "learning_rate": 7.848048661796765e-06, "loss": 0.4547, "step": 20982 }, { "epoch": 0.962920471754394, "grad_norm": 0.46941232681274414, "learning_rate": 7.847847134681432e-06, "loss": 0.4001, "step": 20983 }, { "epoch": 0.9629663622596485, "grad_norm": 0.46193164587020874, "learning_rate": 7.847645600717923e-06, "loss": 0.4513, "step": 20984 }, { "epoch": 0.963012252764903, "grad_norm": 0.43983450531959534, "learning_rate": 7.847444059906728e-06, "loss": 0.366, "step": 20985 }, { "epoch": 0.9630581432701574, "grad_norm": 0.4574683904647827, "learning_rate": 7.847242512248326e-06, "loss": 0.3674, "step": 20986 }, { "epoch": 0.9631040337754119, "grad_norm": 0.43494752049446106, "learning_rate": 7.847040957743207e-06, "loss": 0.3434, "step": 20987 }, { "epoch": 0.9631499242806664, "grad_norm": 0.40936917066574097, "learning_rate": 7.846839396391852e-06, "loss": 0.3169, "step": 20988 }, { "epoch": 0.9631958147859208, "grad_norm": 0.43817195296287537, "learning_rate": 7.846637828194746e-06, "loss": 0.3793, "step": 20989 }, { "epoch": 0.9632417052911753, "grad_norm": 0.4614676535129547, "learning_rate": 7.846436253152375e-06, "loss": 0.4012, "step": 20990 }, { "epoch": 0.9632875957964298, "grad_norm": 0.46629729866981506, "learning_rate": 7.846234671265223e-06, "loss": 0.4019, "step": 20991 }, { "epoch": 0.9633334863016841, "grad_norm": 0.4600580632686615, "learning_rate": 7.846033082533775e-06, "loss": 0.4099, "step": 20992 }, { "epoch": 0.9633793768069386, "grad_norm": 0.4633762836456299, "learning_rate": 7.845831486958515e-06, "loss": 0.415, "step": 20993 }, { "epoch": 0.9634252673121931, "grad_norm": 0.5003665089607239, "learning_rate": 7.84562988453993e-06, "loss": 0.4958, "step": 20994 }, { "epoch": 0.9634711578174475, "grad_norm": 0.422722727060318, "learning_rate": 7.845428275278502e-06, "loss": 0.3444, "step": 20995 }, { "epoch": 0.963517048322702, "grad_norm": 0.48283904790878296, "learning_rate": 7.845226659174718e-06, "loss": 0.4693, "step": 20996 }, { "epoch": 0.9635629388279565, "grad_norm": 0.49916785955429077, "learning_rate": 7.84502503622906e-06, "loss": 0.4765, "step": 20997 }, { "epoch": 0.9636088293332109, "grad_norm": 0.44016388058662415, "learning_rate": 7.844823406442015e-06, "loss": 0.4169, "step": 20998 }, { "epoch": 0.9636547198384654, "grad_norm": 0.4360785484313965, "learning_rate": 7.84462176981407e-06, "loss": 0.3507, "step": 20999 }, { "epoch": 0.9637006103437199, "grad_norm": 0.42157822847366333, "learning_rate": 7.844420126345705e-06, "loss": 0.3367, "step": 21000 }, { "epoch": 0.9637465008489744, "grad_norm": 0.46879708766937256, "learning_rate": 7.844218476037408e-06, "loss": 0.4685, "step": 21001 }, { "epoch": 0.9637923913542288, "grad_norm": 0.41719454526901245, "learning_rate": 7.844016818889665e-06, "loss": 0.3618, "step": 21002 }, { "epoch": 0.9638382818594833, "grad_norm": 0.4576605260372162, "learning_rate": 7.843815154902956e-06, "loss": 0.413, "step": 21003 }, { "epoch": 0.9638841723647378, "grad_norm": 0.46193671226501465, "learning_rate": 7.843613484077772e-06, "loss": 0.3368, "step": 21004 }, { "epoch": 0.9639300628699922, "grad_norm": 0.5043448209762573, "learning_rate": 7.843411806414592e-06, "loss": 0.4139, "step": 21005 }, { "epoch": 0.9639759533752467, "grad_norm": 0.451667845249176, "learning_rate": 7.843210121913907e-06, "loss": 0.4207, "step": 21006 }, { "epoch": 0.9640218438805012, "grad_norm": 0.4618699252605438, "learning_rate": 7.843008430576198e-06, "loss": 0.4121, "step": 21007 }, { "epoch": 0.9640677343857555, "grad_norm": 0.4833938777446747, "learning_rate": 7.84280673240195e-06, "loss": 0.4302, "step": 21008 }, { "epoch": 0.96411362489101, "grad_norm": 0.4793543815612793, "learning_rate": 7.84260502739165e-06, "loss": 0.374, "step": 21009 }, { "epoch": 0.9641595153962645, "grad_norm": 0.44299066066741943, "learning_rate": 7.842403315545781e-06, "loss": 0.3669, "step": 21010 }, { "epoch": 0.9642054059015189, "grad_norm": 0.45445573329925537, "learning_rate": 7.84220159686483e-06, "loss": 0.3856, "step": 21011 }, { "epoch": 0.9642512964067734, "grad_norm": 0.438190758228302, "learning_rate": 7.841999871349282e-06, "loss": 0.3919, "step": 21012 }, { "epoch": 0.9642971869120279, "grad_norm": 0.4598987102508545, "learning_rate": 7.84179813899962e-06, "loss": 0.4253, "step": 21013 }, { "epoch": 0.9643430774172823, "grad_norm": 0.45900970697402954, "learning_rate": 7.841596399816331e-06, "loss": 0.4082, "step": 21014 }, { "epoch": 0.9643889679225368, "grad_norm": 0.4682950973510742, "learning_rate": 7.8413946537999e-06, "loss": 0.3842, "step": 21015 }, { "epoch": 0.9644348584277913, "grad_norm": 0.4462374150753021, "learning_rate": 7.84119290095081e-06, "loss": 0.3508, "step": 21016 }, { "epoch": 0.9644807489330458, "grad_norm": 0.4466807246208191, "learning_rate": 7.840991141269548e-06, "loss": 0.3515, "step": 21017 }, { "epoch": 0.9645266394383002, "grad_norm": 0.4427233040332794, "learning_rate": 7.840789374756601e-06, "loss": 0.3656, "step": 21018 }, { "epoch": 0.9645725299435547, "grad_norm": 0.44723081588745117, "learning_rate": 7.840587601412451e-06, "loss": 0.3239, "step": 21019 }, { "epoch": 0.9646184204488092, "grad_norm": 0.46869444847106934, "learning_rate": 7.840385821237583e-06, "loss": 0.4478, "step": 21020 }, { "epoch": 0.9646643109540636, "grad_norm": 0.46258923411369324, "learning_rate": 7.840184034232485e-06, "loss": 0.4799, "step": 21021 }, { "epoch": 0.9647102014593181, "grad_norm": 0.46032845973968506, "learning_rate": 7.83998224039764e-06, "loss": 0.3959, "step": 21022 }, { "epoch": 0.9647560919645726, "grad_norm": 0.43300849199295044, "learning_rate": 7.839780439733535e-06, "loss": 0.3844, "step": 21023 }, { "epoch": 0.964801982469827, "grad_norm": 0.46978551149368286, "learning_rate": 7.839578632240654e-06, "loss": 0.437, "step": 21024 }, { "epoch": 0.9648478729750815, "grad_norm": 0.4499961733818054, "learning_rate": 7.839376817919482e-06, "loss": 0.4128, "step": 21025 }, { "epoch": 0.964893763480336, "grad_norm": 0.5136227607727051, "learning_rate": 7.839174996770505e-06, "loss": 0.5116, "step": 21026 }, { "epoch": 0.9649396539855903, "grad_norm": 0.4140956997871399, "learning_rate": 7.838973168794208e-06, "loss": 0.3553, "step": 21027 }, { "epoch": 0.9649855444908448, "grad_norm": 0.47263965010643005, "learning_rate": 7.838771333991077e-06, "loss": 0.4575, "step": 21028 }, { "epoch": 0.9650314349960993, "grad_norm": 0.4347728192806244, "learning_rate": 7.838569492361597e-06, "loss": 0.4071, "step": 21029 }, { "epoch": 0.9650773255013537, "grad_norm": 0.4320104718208313, "learning_rate": 7.838367643906253e-06, "loss": 0.3186, "step": 21030 }, { "epoch": 0.9651232160066082, "grad_norm": 0.42781323194503784, "learning_rate": 7.838165788625531e-06, "loss": 0.3421, "step": 21031 }, { "epoch": 0.9651691065118627, "grad_norm": 0.45310214161872864, "learning_rate": 7.837963926519916e-06, "loss": 0.3606, "step": 21032 }, { "epoch": 0.9652149970171171, "grad_norm": 0.44083476066589355, "learning_rate": 7.837762057589893e-06, "loss": 0.3582, "step": 21033 }, { "epoch": 0.9652608875223716, "grad_norm": 0.4155220687389374, "learning_rate": 7.837560181835947e-06, "loss": 0.3218, "step": 21034 }, { "epoch": 0.9653067780276261, "grad_norm": 0.4668925106525421, "learning_rate": 7.837358299258565e-06, "loss": 0.3452, "step": 21035 }, { "epoch": 0.9653526685328806, "grad_norm": 0.4866597056388855, "learning_rate": 7.837156409858233e-06, "loss": 0.4141, "step": 21036 }, { "epoch": 0.965398559038135, "grad_norm": 0.4267602264881134, "learning_rate": 7.836954513635435e-06, "loss": 0.3619, "step": 21037 }, { "epoch": 0.9654444495433895, "grad_norm": 0.4759226441383362, "learning_rate": 7.836752610590657e-06, "loss": 0.4767, "step": 21038 }, { "epoch": 0.965490340048644, "grad_norm": 0.42197442054748535, "learning_rate": 7.836550700724383e-06, "loss": 0.3296, "step": 21039 }, { "epoch": 0.9655362305538984, "grad_norm": 0.44642555713653564, "learning_rate": 7.8363487840371e-06, "loss": 0.4062, "step": 21040 }, { "epoch": 0.9655821210591529, "grad_norm": 0.571806013584137, "learning_rate": 7.836146860529293e-06, "loss": 0.4227, "step": 21041 }, { "epoch": 0.9656280115644074, "grad_norm": 0.4606640934944153, "learning_rate": 7.83594493020145e-06, "loss": 0.3871, "step": 21042 }, { "epoch": 0.9656739020696617, "grad_norm": 0.4591871201992035, "learning_rate": 7.835742993054051e-06, "loss": 0.3578, "step": 21043 }, { "epoch": 0.9657197925749162, "grad_norm": 0.4183909296989441, "learning_rate": 7.835541049087589e-06, "loss": 0.3201, "step": 21044 }, { "epoch": 0.9657656830801707, "grad_norm": 0.49177372455596924, "learning_rate": 7.835339098302544e-06, "loss": 0.4626, "step": 21045 }, { "epoch": 0.9658115735854251, "grad_norm": 0.5221524238586426, "learning_rate": 7.835137140699403e-06, "loss": 0.4181, "step": 21046 }, { "epoch": 0.9658574640906796, "grad_norm": 0.45898792147636414, "learning_rate": 7.83493517627865e-06, "loss": 0.394, "step": 21047 }, { "epoch": 0.9659033545959341, "grad_norm": 0.4413659870624542, "learning_rate": 7.834733205040776e-06, "loss": 0.3642, "step": 21048 }, { "epoch": 0.9659492451011885, "grad_norm": 0.4048059284687042, "learning_rate": 7.834531226986262e-06, "loss": 0.3191, "step": 21049 }, { "epoch": 0.965995135606443, "grad_norm": 0.486472487449646, "learning_rate": 7.834329242115594e-06, "loss": 0.4802, "step": 21050 }, { "epoch": 0.9660410261116975, "grad_norm": 0.4253324270248413, "learning_rate": 7.83412725042926e-06, "loss": 0.3773, "step": 21051 }, { "epoch": 0.966086916616952, "grad_norm": 0.45402684807777405, "learning_rate": 7.833925251927746e-06, "loss": 0.3646, "step": 21052 }, { "epoch": 0.9661328071222064, "grad_norm": 0.46813470125198364, "learning_rate": 7.833723246611534e-06, "loss": 0.4363, "step": 21053 }, { "epoch": 0.9661786976274609, "grad_norm": 0.48408305644989014, "learning_rate": 7.833521234481112e-06, "loss": 0.3983, "step": 21054 }, { "epoch": 0.9662245881327154, "grad_norm": 0.42531606554985046, "learning_rate": 7.833319215536965e-06, "loss": 0.3695, "step": 21055 }, { "epoch": 0.9662704786379698, "grad_norm": 0.4530765414237976, "learning_rate": 7.833117189779583e-06, "loss": 0.4031, "step": 21056 }, { "epoch": 0.9663163691432243, "grad_norm": 0.4948331117630005, "learning_rate": 7.832915157209444e-06, "loss": 0.5189, "step": 21057 }, { "epoch": 0.9663622596484788, "grad_norm": 0.4198783040046692, "learning_rate": 7.832713117827041e-06, "loss": 0.3443, "step": 21058 }, { "epoch": 0.9664081501537332, "grad_norm": 0.4507180154323578, "learning_rate": 7.832511071632856e-06, "loss": 0.4008, "step": 21059 }, { "epoch": 0.9664540406589877, "grad_norm": 0.47342008352279663, "learning_rate": 7.832309018627375e-06, "loss": 0.4691, "step": 21060 }, { "epoch": 0.9664999311642422, "grad_norm": 0.4678534269332886, "learning_rate": 7.832106958811087e-06, "loss": 0.392, "step": 21061 }, { "epoch": 0.9665458216694965, "grad_norm": 0.4501871168613434, "learning_rate": 7.831904892184473e-06, "loss": 0.417, "step": 21062 }, { "epoch": 0.966591712174751, "grad_norm": 0.4435878396034241, "learning_rate": 7.831702818748024e-06, "loss": 0.3537, "step": 21063 }, { "epoch": 0.9666376026800055, "grad_norm": 0.42602279782295227, "learning_rate": 7.83150073850222e-06, "loss": 0.375, "step": 21064 }, { "epoch": 0.9666834931852599, "grad_norm": 0.44667181372642517, "learning_rate": 7.831298651447553e-06, "loss": 0.4209, "step": 21065 }, { "epoch": 0.9667293836905144, "grad_norm": 0.427642822265625, "learning_rate": 7.831096557584506e-06, "loss": 0.3737, "step": 21066 }, { "epoch": 0.9667752741957689, "grad_norm": 0.4266265332698822, "learning_rate": 7.830894456913565e-06, "loss": 0.3259, "step": 21067 }, { "epoch": 0.9668211647010233, "grad_norm": 0.4106382429599762, "learning_rate": 7.830692349435217e-06, "loss": 0.3306, "step": 21068 }, { "epoch": 0.9668670552062778, "grad_norm": 0.43538612127304077, "learning_rate": 7.830490235149946e-06, "loss": 0.3844, "step": 21069 }, { "epoch": 0.9669129457115323, "grad_norm": 0.504745364189148, "learning_rate": 7.83028811405824e-06, "loss": 0.5082, "step": 21070 }, { "epoch": 0.9669588362167868, "grad_norm": 0.438167542219162, "learning_rate": 7.830085986160584e-06, "loss": 0.3666, "step": 21071 }, { "epoch": 0.9670047267220412, "grad_norm": 0.47544726729393005, "learning_rate": 7.829883851457466e-06, "loss": 0.4385, "step": 21072 }, { "epoch": 0.9670506172272957, "grad_norm": 0.47371727228164673, "learning_rate": 7.829681709949367e-06, "loss": 0.4712, "step": 21073 }, { "epoch": 0.9670965077325502, "grad_norm": 0.43370920419692993, "learning_rate": 7.82947956163678e-06, "loss": 0.3615, "step": 21074 }, { "epoch": 0.9671423982378046, "grad_norm": 0.410230815410614, "learning_rate": 7.829277406520187e-06, "loss": 0.3019, "step": 21075 }, { "epoch": 0.9671882887430591, "grad_norm": 0.5153003334999084, "learning_rate": 7.829075244600073e-06, "loss": 0.4857, "step": 21076 }, { "epoch": 0.9672341792483136, "grad_norm": 0.48740634322166443, "learning_rate": 7.828873075876929e-06, "loss": 0.4853, "step": 21077 }, { "epoch": 0.967280069753568, "grad_norm": 0.4406638443470001, "learning_rate": 7.828670900351237e-06, "loss": 0.3795, "step": 21078 }, { "epoch": 0.9673259602588224, "grad_norm": 0.45234760642051697, "learning_rate": 7.828468718023482e-06, "loss": 0.4253, "step": 21079 }, { "epoch": 0.967371850764077, "grad_norm": 0.44228604435920715, "learning_rate": 7.828266528894155e-06, "loss": 0.374, "step": 21080 }, { "epoch": 0.9674177412693313, "grad_norm": 0.44766926765441895, "learning_rate": 7.828064332963737e-06, "loss": 0.3742, "step": 21081 }, { "epoch": 0.9674636317745858, "grad_norm": 0.46373316645622253, "learning_rate": 7.82786213023272e-06, "loss": 0.4184, "step": 21082 }, { "epoch": 0.9675095222798403, "grad_norm": 0.442690908908844, "learning_rate": 7.827659920701585e-06, "loss": 0.4045, "step": 21083 }, { "epoch": 0.9675554127850947, "grad_norm": 0.44209998846054077, "learning_rate": 7.82745770437082e-06, "loss": 0.3812, "step": 21084 }, { "epoch": 0.9676013032903492, "grad_norm": 0.47506585717201233, "learning_rate": 7.827255481240912e-06, "loss": 0.4477, "step": 21085 }, { "epoch": 0.9676471937956037, "grad_norm": 0.4498728811740875, "learning_rate": 7.827053251312348e-06, "loss": 0.348, "step": 21086 }, { "epoch": 0.9676930843008581, "grad_norm": 0.43462246656417847, "learning_rate": 7.826851014585613e-06, "loss": 0.3296, "step": 21087 }, { "epoch": 0.9677389748061126, "grad_norm": 0.4444442391395569, "learning_rate": 7.826648771061192e-06, "loss": 0.4297, "step": 21088 }, { "epoch": 0.9677848653113671, "grad_norm": 0.4629170596599579, "learning_rate": 7.826446520739575e-06, "loss": 0.3847, "step": 21089 }, { "epoch": 0.9678307558166216, "grad_norm": 0.47325995564460754, "learning_rate": 7.826244263621243e-06, "loss": 0.376, "step": 21090 }, { "epoch": 0.967876646321876, "grad_norm": 0.3935380280017853, "learning_rate": 7.826041999706689e-06, "loss": 0.2765, "step": 21091 }, { "epoch": 0.9679225368271305, "grad_norm": 0.4389955401420593, "learning_rate": 7.825839728996394e-06, "loss": 0.3521, "step": 21092 }, { "epoch": 0.967968427332385, "grad_norm": 0.4877501130104065, "learning_rate": 7.825637451490846e-06, "loss": 0.4147, "step": 21093 }, { "epoch": 0.9680143178376394, "grad_norm": 0.47158002853393555, "learning_rate": 7.825435167190533e-06, "loss": 0.4302, "step": 21094 }, { "epoch": 0.9680602083428939, "grad_norm": 0.4394701421260834, "learning_rate": 7.825232876095939e-06, "loss": 0.4122, "step": 21095 }, { "epoch": 0.9681060988481484, "grad_norm": 0.4515072703361511, "learning_rate": 7.825030578207552e-06, "loss": 0.3639, "step": 21096 }, { "epoch": 0.9681519893534027, "grad_norm": 0.4458642899990082, "learning_rate": 7.824828273525858e-06, "loss": 0.4163, "step": 21097 }, { "epoch": 0.9681978798586572, "grad_norm": 0.44289305806159973, "learning_rate": 7.824625962051344e-06, "loss": 0.3678, "step": 21098 }, { "epoch": 0.9682437703639117, "grad_norm": 0.5296807289123535, "learning_rate": 7.824423643784495e-06, "loss": 0.5281, "step": 21099 }, { "epoch": 0.9682896608691661, "grad_norm": 0.45049849152565, "learning_rate": 7.8242213187258e-06, "loss": 0.375, "step": 21100 }, { "epoch": 0.9683355513744206, "grad_norm": 0.43462711572647095, "learning_rate": 7.824018986875743e-06, "loss": 0.3342, "step": 21101 }, { "epoch": 0.9683814418796751, "grad_norm": 0.46062541007995605, "learning_rate": 7.823816648234813e-06, "loss": 0.3784, "step": 21102 }, { "epoch": 0.9684273323849295, "grad_norm": 0.453469455242157, "learning_rate": 7.823614302803493e-06, "loss": 0.4211, "step": 21103 }, { "epoch": 0.968473222890184, "grad_norm": 0.43442052602767944, "learning_rate": 7.823411950582273e-06, "loss": 0.3566, "step": 21104 }, { "epoch": 0.9685191133954385, "grad_norm": 0.4289034605026245, "learning_rate": 7.823209591571638e-06, "loss": 0.3398, "step": 21105 }, { "epoch": 0.968565003900693, "grad_norm": 0.43402358889579773, "learning_rate": 7.823007225772077e-06, "loss": 0.3721, "step": 21106 }, { "epoch": 0.9686108944059474, "grad_norm": 0.4767535328865051, "learning_rate": 7.822804853184072e-06, "loss": 0.4797, "step": 21107 }, { "epoch": 0.9686567849112019, "grad_norm": 0.4255218505859375, "learning_rate": 7.822602473808114e-06, "loss": 0.3872, "step": 21108 }, { "epoch": 0.9687026754164564, "grad_norm": 0.4691382348537445, "learning_rate": 7.822400087644686e-06, "loss": 0.3923, "step": 21109 }, { "epoch": 0.9687485659217108, "grad_norm": 0.42823004722595215, "learning_rate": 7.822197694694278e-06, "loss": 0.3972, "step": 21110 }, { "epoch": 0.9687944564269653, "grad_norm": 0.4656471014022827, "learning_rate": 7.821995294957374e-06, "loss": 0.3782, "step": 21111 }, { "epoch": 0.9688403469322198, "grad_norm": 0.4245080351829529, "learning_rate": 7.821792888434463e-06, "loss": 0.3502, "step": 21112 }, { "epoch": 0.9688862374374742, "grad_norm": 0.41375812888145447, "learning_rate": 7.821590475126031e-06, "loss": 0.297, "step": 21113 }, { "epoch": 0.9689321279427286, "grad_norm": 0.5158706903457642, "learning_rate": 7.821388055032564e-06, "loss": 0.4769, "step": 21114 }, { "epoch": 0.9689780184479831, "grad_norm": 0.4364645779132843, "learning_rate": 7.821185628154548e-06, "loss": 0.3315, "step": 21115 }, { "epoch": 0.9690239089532375, "grad_norm": 0.6473841667175293, "learning_rate": 7.820983194492473e-06, "loss": 0.35, "step": 21116 }, { "epoch": 0.969069799458492, "grad_norm": 0.4310930371284485, "learning_rate": 7.820780754046822e-06, "loss": 0.3448, "step": 21117 }, { "epoch": 0.9691156899637465, "grad_norm": 0.4209231734275818, "learning_rate": 7.820578306818086e-06, "loss": 0.3445, "step": 21118 }, { "epoch": 0.9691615804690009, "grad_norm": 0.44525182247161865, "learning_rate": 7.820375852806748e-06, "loss": 0.3936, "step": 21119 }, { "epoch": 0.9692074709742554, "grad_norm": 0.4654008150100708, "learning_rate": 7.820173392013295e-06, "loss": 0.4431, "step": 21120 }, { "epoch": 0.9692533614795099, "grad_norm": 0.4261218309402466, "learning_rate": 7.819970924438216e-06, "loss": 0.3627, "step": 21121 }, { "epoch": 0.9692992519847643, "grad_norm": 0.4572601318359375, "learning_rate": 7.819768450081997e-06, "loss": 0.3545, "step": 21122 }, { "epoch": 0.9693451424900188, "grad_norm": 0.43420615792274475, "learning_rate": 7.819565968945123e-06, "loss": 0.3455, "step": 21123 }, { "epoch": 0.9693910329952733, "grad_norm": 0.4477049708366394, "learning_rate": 7.819363481028087e-06, "loss": 0.3986, "step": 21124 }, { "epoch": 0.9694369235005278, "grad_norm": 0.4257064461708069, "learning_rate": 7.819160986331368e-06, "loss": 0.3416, "step": 21125 }, { "epoch": 0.9694828140057822, "grad_norm": 0.4889322519302368, "learning_rate": 7.818958484855456e-06, "loss": 0.4409, "step": 21126 }, { "epoch": 0.9695287045110367, "grad_norm": 0.45275017619132996, "learning_rate": 7.818755976600842e-06, "loss": 0.3871, "step": 21127 }, { "epoch": 0.9695745950162912, "grad_norm": 0.4485742747783661, "learning_rate": 7.818553461568009e-06, "loss": 0.4102, "step": 21128 }, { "epoch": 0.9696204855215456, "grad_norm": 0.4430753290653229, "learning_rate": 7.818350939757442e-06, "loss": 0.4007, "step": 21129 }, { "epoch": 0.9696663760268001, "grad_norm": 0.4442596137523651, "learning_rate": 7.81814841116963e-06, "loss": 0.3871, "step": 21130 }, { "epoch": 0.9697122665320546, "grad_norm": 0.4851895868778229, "learning_rate": 7.817945875805062e-06, "loss": 0.4121, "step": 21131 }, { "epoch": 0.9697581570373089, "grad_norm": 0.44421815872192383, "learning_rate": 7.817743333664226e-06, "loss": 0.3984, "step": 21132 }, { "epoch": 0.9698040475425634, "grad_norm": 0.4512902498245239, "learning_rate": 7.817540784747603e-06, "loss": 0.4218, "step": 21133 }, { "epoch": 0.9698499380478179, "grad_norm": 0.4349035918712616, "learning_rate": 7.817338229055686e-06, "loss": 0.3482, "step": 21134 }, { "epoch": 0.9698958285530723, "grad_norm": 0.44838371872901917, "learning_rate": 7.817135666588957e-06, "loss": 0.3546, "step": 21135 }, { "epoch": 0.9699417190583268, "grad_norm": 0.47474604845046997, "learning_rate": 7.81693309734791e-06, "loss": 0.4221, "step": 21136 }, { "epoch": 0.9699876095635813, "grad_norm": 0.45858582854270935, "learning_rate": 7.816730521333024e-06, "loss": 0.3988, "step": 21137 }, { "epoch": 0.9700335000688357, "grad_norm": 0.4085386395454407, "learning_rate": 7.81652793854479e-06, "loss": 0.3196, "step": 21138 }, { "epoch": 0.9700793905740902, "grad_norm": 0.5764844417572021, "learning_rate": 7.816325348983698e-06, "loss": 0.3993, "step": 21139 }, { "epoch": 0.9701252810793447, "grad_norm": 0.4346778988838196, "learning_rate": 7.816122752650232e-06, "loss": 0.3432, "step": 21140 }, { "epoch": 0.9701711715845992, "grad_norm": 0.5452931523323059, "learning_rate": 7.815920149544877e-06, "loss": 0.4214, "step": 21141 }, { "epoch": 0.9702170620898536, "grad_norm": 0.4635372757911682, "learning_rate": 7.815717539668127e-06, "loss": 0.4394, "step": 21142 }, { "epoch": 0.9702629525951081, "grad_norm": 0.47558003664016724, "learning_rate": 7.815514923020463e-06, "loss": 0.4838, "step": 21143 }, { "epoch": 0.9703088431003626, "grad_norm": 0.4808276295661926, "learning_rate": 7.815312299602371e-06, "loss": 0.4896, "step": 21144 }, { "epoch": 0.970354733605617, "grad_norm": 0.418119341135025, "learning_rate": 7.815109669414345e-06, "loss": 0.3234, "step": 21145 }, { "epoch": 0.9704006241108715, "grad_norm": 0.4798365831375122, "learning_rate": 7.814907032456869e-06, "loss": 0.4698, "step": 21146 }, { "epoch": 0.970446514616126, "grad_norm": 0.4244768023490906, "learning_rate": 7.814704388730427e-06, "loss": 0.3539, "step": 21147 }, { "epoch": 0.9704924051213804, "grad_norm": 0.43037888407707214, "learning_rate": 7.814501738235513e-06, "loss": 0.3373, "step": 21148 }, { "epoch": 0.9705382956266349, "grad_norm": 0.4320886731147766, "learning_rate": 7.814299080972609e-06, "loss": 0.3163, "step": 21149 }, { "epoch": 0.9705841861318893, "grad_norm": 0.43833795189857483, "learning_rate": 7.814096416942203e-06, "loss": 0.3364, "step": 21150 }, { "epoch": 0.9706300766371437, "grad_norm": 0.4364391267299652, "learning_rate": 7.813893746144783e-06, "loss": 0.376, "step": 21151 }, { "epoch": 0.9706759671423982, "grad_norm": 0.4638986885547638, "learning_rate": 7.813691068580837e-06, "loss": 0.3938, "step": 21152 }, { "epoch": 0.9707218576476527, "grad_norm": 0.4367411434650421, "learning_rate": 7.813488384250853e-06, "loss": 0.3725, "step": 21153 }, { "epoch": 0.9707677481529071, "grad_norm": 0.5896845459938049, "learning_rate": 7.813285693155319e-06, "loss": 0.3863, "step": 21154 }, { "epoch": 0.9708136386581616, "grad_norm": 0.4394785165786743, "learning_rate": 7.813082995294718e-06, "loss": 0.386, "step": 21155 }, { "epoch": 0.9708595291634161, "grad_norm": 0.47244152426719666, "learning_rate": 7.812880290669542e-06, "loss": 0.4755, "step": 21156 }, { "epoch": 0.9709054196686705, "grad_norm": 0.4761359989643097, "learning_rate": 7.812677579280274e-06, "loss": 0.394, "step": 21157 }, { "epoch": 0.970951310173925, "grad_norm": 0.5197685956954956, "learning_rate": 7.812474861127405e-06, "loss": 0.5072, "step": 21158 }, { "epoch": 0.9709972006791795, "grad_norm": 0.45970508456230164, "learning_rate": 7.812272136211426e-06, "loss": 0.3974, "step": 21159 }, { "epoch": 0.971043091184434, "grad_norm": 0.487955778837204, "learning_rate": 7.812069404532815e-06, "loss": 0.4804, "step": 21160 }, { "epoch": 0.9710889816896884, "grad_norm": 0.4408271610736847, "learning_rate": 7.811866666092065e-06, "loss": 0.3387, "step": 21161 }, { "epoch": 0.9711348721949429, "grad_norm": 0.4879717528820038, "learning_rate": 7.811663920889665e-06, "loss": 0.5086, "step": 21162 }, { "epoch": 0.9711807627001974, "grad_norm": 0.44969722628593445, "learning_rate": 7.8114611689261e-06, "loss": 0.3655, "step": 21163 }, { "epoch": 0.9712266532054518, "grad_norm": 0.46268007159233093, "learning_rate": 7.811258410201859e-06, "loss": 0.3541, "step": 21164 }, { "epoch": 0.9712725437107063, "grad_norm": 0.39891159534454346, "learning_rate": 7.811055644717426e-06, "loss": 0.2671, "step": 21165 }, { "epoch": 0.9713184342159608, "grad_norm": 0.5018169283866882, "learning_rate": 7.810852872473294e-06, "loss": 0.4653, "step": 21166 }, { "epoch": 0.9713643247212151, "grad_norm": 0.4044756293296814, "learning_rate": 7.810650093469948e-06, "loss": 0.291, "step": 21167 }, { "epoch": 0.9714102152264696, "grad_norm": 0.4241901934146881, "learning_rate": 7.810447307707875e-06, "loss": 0.2995, "step": 21168 }, { "epoch": 0.9714561057317241, "grad_norm": 0.45387449860572815, "learning_rate": 7.810244515187564e-06, "loss": 0.4064, "step": 21169 }, { "epoch": 0.9715019962369785, "grad_norm": 0.44848009943962097, "learning_rate": 7.8100417159095e-06, "loss": 0.4305, "step": 21170 }, { "epoch": 0.971547886742233, "grad_norm": 0.43236395716667175, "learning_rate": 7.809838909874174e-06, "loss": 0.3945, "step": 21171 }, { "epoch": 0.9715937772474875, "grad_norm": 0.47296348214149475, "learning_rate": 7.809636097082072e-06, "loss": 0.3897, "step": 21172 }, { "epoch": 0.9716396677527419, "grad_norm": 0.45015496015548706, "learning_rate": 7.809433277533681e-06, "loss": 0.3663, "step": 21173 }, { "epoch": 0.9716855582579964, "grad_norm": 0.4243199825286865, "learning_rate": 7.809230451229491e-06, "loss": 0.3466, "step": 21174 }, { "epoch": 0.9717314487632509, "grad_norm": 0.46948209404945374, "learning_rate": 7.809027618169986e-06, "loss": 0.4214, "step": 21175 }, { "epoch": 0.9717773392685053, "grad_norm": 0.421957790851593, "learning_rate": 7.80882477835566e-06, "loss": 0.3309, "step": 21176 }, { "epoch": 0.9718232297737598, "grad_norm": 0.42971280217170715, "learning_rate": 7.808621931786992e-06, "loss": 0.352, "step": 21177 }, { "epoch": 0.9718691202790143, "grad_norm": 0.4489957094192505, "learning_rate": 7.808419078464479e-06, "loss": 0.4202, "step": 21178 }, { "epoch": 0.9719150107842688, "grad_norm": 0.42000627517700195, "learning_rate": 7.808216218388602e-06, "loss": 0.3301, "step": 21179 }, { "epoch": 0.9719609012895232, "grad_norm": 0.43955910205841064, "learning_rate": 7.808013351559853e-06, "loss": 0.3903, "step": 21180 }, { "epoch": 0.9720067917947777, "grad_norm": 0.4894885718822479, "learning_rate": 7.807810477978716e-06, "loss": 0.4783, "step": 21181 }, { "epoch": 0.9720526823000322, "grad_norm": 0.4848614037036896, "learning_rate": 7.807607597645684e-06, "loss": 0.3969, "step": 21182 }, { "epoch": 0.9720985728052866, "grad_norm": 0.4755922853946686, "learning_rate": 7.80740471056124e-06, "loss": 0.333, "step": 21183 }, { "epoch": 0.972144463310541, "grad_norm": 0.4566933214664459, "learning_rate": 7.807201816725873e-06, "loss": 0.3954, "step": 21184 }, { "epoch": 0.9721903538157955, "grad_norm": 0.41520270705223083, "learning_rate": 7.806998916140075e-06, "loss": 0.3111, "step": 21185 }, { "epoch": 0.9722362443210499, "grad_norm": 0.45734426379203796, "learning_rate": 7.806796008804327e-06, "loss": 0.3766, "step": 21186 }, { "epoch": 0.9722821348263044, "grad_norm": 0.4437125623226166, "learning_rate": 7.806593094719122e-06, "loss": 0.3519, "step": 21187 }, { "epoch": 0.9723280253315589, "grad_norm": 0.4472242295742035, "learning_rate": 7.806390173884944e-06, "loss": 0.3862, "step": 21188 }, { "epoch": 0.9723739158368133, "grad_norm": 0.4509631395339966, "learning_rate": 7.806187246302286e-06, "loss": 0.3667, "step": 21189 }, { "epoch": 0.9724198063420678, "grad_norm": 0.44084715843200684, "learning_rate": 7.805984311971632e-06, "loss": 0.3625, "step": 21190 }, { "epoch": 0.9724656968473223, "grad_norm": 0.48754870891571045, "learning_rate": 7.805781370893471e-06, "loss": 0.4501, "step": 21191 }, { "epoch": 0.9725115873525767, "grad_norm": 0.44010522961616516, "learning_rate": 7.805578423068293e-06, "loss": 0.3676, "step": 21192 }, { "epoch": 0.9725574778578312, "grad_norm": 0.4474983513355255, "learning_rate": 7.805375468496583e-06, "loss": 0.4333, "step": 21193 }, { "epoch": 0.9726033683630857, "grad_norm": 0.43101781606674194, "learning_rate": 7.80517250717883e-06, "loss": 0.3148, "step": 21194 }, { "epoch": 0.9726492588683402, "grad_norm": 0.4258679151535034, "learning_rate": 7.804969539115525e-06, "loss": 0.3564, "step": 21195 }, { "epoch": 0.9726951493735946, "grad_norm": 0.4388556480407715, "learning_rate": 7.804766564307152e-06, "loss": 0.3925, "step": 21196 }, { "epoch": 0.9727410398788491, "grad_norm": 0.45231202244758606, "learning_rate": 7.8045635827542e-06, "loss": 0.3976, "step": 21197 }, { "epoch": 0.9727869303841036, "grad_norm": 0.47787415981292725, "learning_rate": 7.804360594457158e-06, "loss": 0.4625, "step": 21198 }, { "epoch": 0.972832820889358, "grad_norm": 0.45963776111602783, "learning_rate": 7.804157599416516e-06, "loss": 0.3989, "step": 21199 }, { "epoch": 0.9728787113946125, "grad_norm": 0.4579993784427643, "learning_rate": 7.803954597632757e-06, "loss": 0.4139, "step": 21200 }, { "epoch": 0.972924601899867, "grad_norm": 0.44423502683639526, "learning_rate": 7.803751589106374e-06, "loss": 0.345, "step": 21201 }, { "epoch": 0.9729704924051213, "grad_norm": 0.38857337832450867, "learning_rate": 7.803548573837851e-06, "loss": 0.3109, "step": 21202 }, { "epoch": 0.9730163829103758, "grad_norm": 0.4532497823238373, "learning_rate": 7.803345551827681e-06, "loss": 0.4056, "step": 21203 }, { "epoch": 0.9730622734156303, "grad_norm": 0.4809609353542328, "learning_rate": 7.803142523076348e-06, "loss": 0.4587, "step": 21204 }, { "epoch": 0.9731081639208847, "grad_norm": 0.40073466300964355, "learning_rate": 7.802939487584343e-06, "loss": 0.3098, "step": 21205 }, { "epoch": 0.9731540544261392, "grad_norm": 0.4464949369430542, "learning_rate": 7.802736445352152e-06, "loss": 0.3664, "step": 21206 }, { "epoch": 0.9731999449313937, "grad_norm": 0.4136262536048889, "learning_rate": 7.802533396380268e-06, "loss": 0.3525, "step": 21207 }, { "epoch": 0.9732458354366481, "grad_norm": 0.44977107644081116, "learning_rate": 7.80233034066917e-06, "loss": 0.3864, "step": 21208 }, { "epoch": 0.9732917259419026, "grad_norm": 0.44609615206718445, "learning_rate": 7.802127278219356e-06, "loss": 0.3833, "step": 21209 }, { "epoch": 0.9733376164471571, "grad_norm": 0.47108766436576843, "learning_rate": 7.80192420903131e-06, "loss": 0.4076, "step": 21210 }, { "epoch": 0.9733835069524115, "grad_norm": 0.46633467078208923, "learning_rate": 7.80172113310552e-06, "loss": 0.3995, "step": 21211 }, { "epoch": 0.973429397457666, "grad_norm": 0.4224914312362671, "learning_rate": 7.801518050442473e-06, "loss": 0.3394, "step": 21212 }, { "epoch": 0.9734752879629205, "grad_norm": 0.44341763854026794, "learning_rate": 7.801314961042661e-06, "loss": 0.3569, "step": 21213 }, { "epoch": 0.973521178468175, "grad_norm": 0.45019346475601196, "learning_rate": 7.801111864906571e-06, "loss": 0.3801, "step": 21214 }, { "epoch": 0.9735670689734294, "grad_norm": 0.44327664375305176, "learning_rate": 7.80090876203469e-06, "loss": 0.3898, "step": 21215 }, { "epoch": 0.9736129594786839, "grad_norm": 0.45129847526550293, "learning_rate": 7.800705652427509e-06, "loss": 0.4004, "step": 21216 }, { "epoch": 0.9736588499839384, "grad_norm": 0.4433116018772125, "learning_rate": 7.800502536085514e-06, "loss": 0.3998, "step": 21217 }, { "epoch": 0.9737047404891928, "grad_norm": 0.4202738404273987, "learning_rate": 7.800299413009195e-06, "loss": 0.3308, "step": 21218 }, { "epoch": 0.9737506309944473, "grad_norm": 0.44121769070625305, "learning_rate": 7.80009628319904e-06, "loss": 0.3602, "step": 21219 }, { "epoch": 0.9737965214997018, "grad_norm": 0.4185062646865845, "learning_rate": 7.799893146655536e-06, "loss": 0.3001, "step": 21220 }, { "epoch": 0.9738424120049561, "grad_norm": 0.46005144715309143, "learning_rate": 7.799690003379172e-06, "loss": 0.388, "step": 21221 }, { "epoch": 0.9738883025102106, "grad_norm": 0.44479671120643616, "learning_rate": 7.799486853370436e-06, "loss": 0.3703, "step": 21222 }, { "epoch": 0.9739341930154651, "grad_norm": 0.5487539172172546, "learning_rate": 7.79928369662982e-06, "loss": 0.3823, "step": 21223 }, { "epoch": 0.9739800835207195, "grad_norm": 0.42862531542778015, "learning_rate": 7.79908053315781e-06, "loss": 0.4007, "step": 21224 }, { "epoch": 0.974025974025974, "grad_norm": 0.45887845754623413, "learning_rate": 7.798877362954895e-06, "loss": 0.3923, "step": 21225 }, { "epoch": 0.9740718645312285, "grad_norm": 0.4348030090332031, "learning_rate": 7.798674186021563e-06, "loss": 0.3599, "step": 21226 }, { "epoch": 0.9741177550364829, "grad_norm": 0.45813387632369995, "learning_rate": 7.798471002358303e-06, "loss": 0.4275, "step": 21227 }, { "epoch": 0.9741636455417374, "grad_norm": 0.4671647548675537, "learning_rate": 7.798267811965603e-06, "loss": 0.3307, "step": 21228 }, { "epoch": 0.9742095360469919, "grad_norm": 0.4675450026988983, "learning_rate": 7.798064614843953e-06, "loss": 0.3933, "step": 21229 }, { "epoch": 0.9742554265522464, "grad_norm": 0.4739743173122406, "learning_rate": 7.797861410993839e-06, "loss": 0.4181, "step": 21230 }, { "epoch": 0.9743013170575008, "grad_norm": 0.4238833785057068, "learning_rate": 7.797658200415752e-06, "loss": 0.3638, "step": 21231 }, { "epoch": 0.9743472075627553, "grad_norm": 0.46204468607902527, "learning_rate": 7.79745498311018e-06, "loss": 0.4128, "step": 21232 }, { "epoch": 0.9743930980680098, "grad_norm": 0.5130534768104553, "learning_rate": 7.797251759077612e-06, "loss": 0.5088, "step": 21233 }, { "epoch": 0.9744389885732642, "grad_norm": 0.43905895948410034, "learning_rate": 7.797048528318537e-06, "loss": 0.3916, "step": 21234 }, { "epoch": 0.9744848790785187, "grad_norm": 0.4126501679420471, "learning_rate": 7.79684529083344e-06, "loss": 0.324, "step": 21235 }, { "epoch": 0.9745307695837732, "grad_norm": 0.44259434938430786, "learning_rate": 7.796642046622817e-06, "loss": 0.3917, "step": 21236 }, { "epoch": 0.9745766600890275, "grad_norm": 0.4427030086517334, "learning_rate": 7.79643879568715e-06, "loss": 0.3769, "step": 21237 }, { "epoch": 0.974622550594282, "grad_norm": 0.44074147939682007, "learning_rate": 7.796235538026931e-06, "loss": 0.389, "step": 21238 }, { "epoch": 0.9746684410995365, "grad_norm": 0.48109227418899536, "learning_rate": 7.796032273642649e-06, "loss": 0.4461, "step": 21239 }, { "epoch": 0.9747143316047909, "grad_norm": 0.4920327663421631, "learning_rate": 7.795829002534789e-06, "loss": 0.476, "step": 21240 }, { "epoch": 0.9747602221100454, "grad_norm": 0.4298250675201416, "learning_rate": 7.795625724703846e-06, "loss": 0.3411, "step": 21241 }, { "epoch": 0.9748061126152999, "grad_norm": 0.5079853534698486, "learning_rate": 7.795422440150302e-06, "loss": 0.4579, "step": 21242 }, { "epoch": 0.9748520031205543, "grad_norm": 0.4972611963748932, "learning_rate": 7.795219148874651e-06, "loss": 0.4889, "step": 21243 }, { "epoch": 0.9748978936258088, "grad_norm": 0.44802266359329224, "learning_rate": 7.795015850877381e-06, "loss": 0.3993, "step": 21244 }, { "epoch": 0.9749437841310633, "grad_norm": 0.4328779876232147, "learning_rate": 7.79481254615898e-06, "loss": 0.3291, "step": 21245 }, { "epoch": 0.9749896746363177, "grad_norm": 0.456595242023468, "learning_rate": 7.794609234719936e-06, "loss": 0.3895, "step": 21246 }, { "epoch": 0.9750355651415722, "grad_norm": 0.46625617146492004, "learning_rate": 7.794405916560738e-06, "loss": 0.3816, "step": 21247 }, { "epoch": 0.9750814556468267, "grad_norm": 0.5095096230506897, "learning_rate": 7.794202591681878e-06, "loss": 0.4663, "step": 21248 }, { "epoch": 0.9751273461520812, "grad_norm": 0.4543043076992035, "learning_rate": 7.793999260083842e-06, "loss": 0.3773, "step": 21249 }, { "epoch": 0.9751732366573356, "grad_norm": 0.41282159090042114, "learning_rate": 7.793795921767118e-06, "loss": 0.3197, "step": 21250 }, { "epoch": 0.9752191271625901, "grad_norm": 0.4473336935043335, "learning_rate": 7.793592576732197e-06, "loss": 0.3975, "step": 21251 }, { "epoch": 0.9752650176678446, "grad_norm": 0.4673854410648346, "learning_rate": 7.793389224979568e-06, "loss": 0.4064, "step": 21252 }, { "epoch": 0.975310908173099, "grad_norm": 0.4323561489582062, "learning_rate": 7.79318586650972e-06, "loss": 0.3272, "step": 21253 }, { "epoch": 0.9753567986783535, "grad_norm": 0.43160274624824524, "learning_rate": 7.79298250132314e-06, "loss": 0.3748, "step": 21254 }, { "epoch": 0.975402689183608, "grad_norm": 0.4504204988479614, "learning_rate": 7.79277912942032e-06, "loss": 0.3937, "step": 21255 }, { "epoch": 0.9754485796888623, "grad_norm": 0.466084361076355, "learning_rate": 7.792575750801748e-06, "loss": 0.4584, "step": 21256 }, { "epoch": 0.9754944701941168, "grad_norm": 0.44342315196990967, "learning_rate": 7.792372365467912e-06, "loss": 0.354, "step": 21257 }, { "epoch": 0.9755403606993713, "grad_norm": 0.4270223379135132, "learning_rate": 7.792168973419301e-06, "loss": 0.3491, "step": 21258 }, { "epoch": 0.9755862512046257, "grad_norm": 0.46173208951950073, "learning_rate": 7.791965574656406e-06, "loss": 0.361, "step": 21259 }, { "epoch": 0.9756321417098802, "grad_norm": 0.5064407587051392, "learning_rate": 7.791762169179715e-06, "loss": 0.425, "step": 21260 }, { "epoch": 0.9756780322151347, "grad_norm": 0.42244037985801697, "learning_rate": 7.791558756989716e-06, "loss": 0.3744, "step": 21261 }, { "epoch": 0.9757239227203891, "grad_norm": 0.5055184364318848, "learning_rate": 7.7913553380869e-06, "loss": 0.4271, "step": 21262 }, { "epoch": 0.9757698132256436, "grad_norm": 0.40691080689430237, "learning_rate": 7.791151912471755e-06, "loss": 0.3124, "step": 21263 }, { "epoch": 0.9758157037308981, "grad_norm": 0.41500139236450195, "learning_rate": 7.790948480144771e-06, "loss": 0.3222, "step": 21264 }, { "epoch": 0.9758615942361525, "grad_norm": 0.4799208641052246, "learning_rate": 7.790745041106437e-06, "loss": 0.4426, "step": 21265 }, { "epoch": 0.975907484741407, "grad_norm": 0.4495140314102173, "learning_rate": 7.79054159535724e-06, "loss": 0.3828, "step": 21266 }, { "epoch": 0.9759533752466615, "grad_norm": 0.40395477414131165, "learning_rate": 7.790338142897675e-06, "loss": 0.3175, "step": 21267 }, { "epoch": 0.975999265751916, "grad_norm": 0.4843035936355591, "learning_rate": 7.790134683728224e-06, "loss": 0.4215, "step": 21268 }, { "epoch": 0.9760451562571704, "grad_norm": 0.45031601190567017, "learning_rate": 7.78993121784938e-06, "loss": 0.3719, "step": 21269 }, { "epoch": 0.9760910467624249, "grad_norm": 0.4547281265258789, "learning_rate": 7.789727745261635e-06, "loss": 0.4158, "step": 21270 }, { "epoch": 0.9761369372676794, "grad_norm": 0.4601418375968933, "learning_rate": 7.789524265965472e-06, "loss": 0.396, "step": 21271 }, { "epoch": 0.9761828277729337, "grad_norm": 0.4114876985549927, "learning_rate": 7.789320779961385e-06, "loss": 0.317, "step": 21272 }, { "epoch": 0.9762287182781882, "grad_norm": 0.43526938557624817, "learning_rate": 7.789117287249861e-06, "loss": 0.3713, "step": 21273 }, { "epoch": 0.9762746087834427, "grad_norm": 0.46151474118232727, "learning_rate": 7.78891378783139e-06, "loss": 0.4576, "step": 21274 }, { "epoch": 0.9763204992886971, "grad_norm": 0.4807446599006653, "learning_rate": 7.788710281706463e-06, "loss": 0.4393, "step": 21275 }, { "epoch": 0.9763663897939516, "grad_norm": 0.450620174407959, "learning_rate": 7.788506768875567e-06, "loss": 0.3955, "step": 21276 }, { "epoch": 0.9764122802992061, "grad_norm": 0.47831541299819946, "learning_rate": 7.78830324933919e-06, "loss": 0.473, "step": 21277 }, { "epoch": 0.9764581708044605, "grad_norm": 0.49323272705078125, "learning_rate": 7.788099723097829e-06, "loss": 0.4659, "step": 21278 }, { "epoch": 0.976504061309715, "grad_norm": 0.46170616149902344, "learning_rate": 7.787896190151964e-06, "loss": 0.437, "step": 21279 }, { "epoch": 0.9765499518149695, "grad_norm": 0.4136543273925781, "learning_rate": 7.78769265050209e-06, "loss": 0.3737, "step": 21280 }, { "epoch": 0.9765958423202239, "grad_norm": 0.4596693813800812, "learning_rate": 7.787489104148696e-06, "loss": 0.374, "step": 21281 }, { "epoch": 0.9766417328254784, "grad_norm": 0.4372900426387787, "learning_rate": 7.787285551092268e-06, "loss": 0.3675, "step": 21282 }, { "epoch": 0.9766876233307329, "grad_norm": 0.48668229579925537, "learning_rate": 7.787081991333299e-06, "loss": 0.4074, "step": 21283 }, { "epoch": 0.9767335138359874, "grad_norm": 0.3620638847351074, "learning_rate": 7.786878424872277e-06, "loss": 0.2345, "step": 21284 }, { "epoch": 0.9767794043412418, "grad_norm": 0.4575035274028778, "learning_rate": 7.786674851709692e-06, "loss": 0.4152, "step": 21285 }, { "epoch": 0.9768252948464963, "grad_norm": 0.49209102988243103, "learning_rate": 7.786471271846034e-06, "loss": 0.4525, "step": 21286 }, { "epoch": 0.9768711853517508, "grad_norm": 0.4940110743045807, "learning_rate": 7.786267685281792e-06, "loss": 0.3959, "step": 21287 }, { "epoch": 0.9769170758570052, "grad_norm": 0.4221439063549042, "learning_rate": 7.786064092017456e-06, "loss": 0.3466, "step": 21288 }, { "epoch": 0.9769629663622597, "grad_norm": 0.4160853326320648, "learning_rate": 7.785860492053516e-06, "loss": 0.316, "step": 21289 }, { "epoch": 0.9770088568675142, "grad_norm": 0.5121729373931885, "learning_rate": 7.785656885390459e-06, "loss": 0.575, "step": 21290 }, { "epoch": 0.9770547473727685, "grad_norm": 0.45629557967185974, "learning_rate": 7.785453272028776e-06, "loss": 0.4339, "step": 21291 }, { "epoch": 0.977100637878023, "grad_norm": 0.4879673421382904, "learning_rate": 7.78524965196896e-06, "loss": 0.3919, "step": 21292 }, { "epoch": 0.9771465283832775, "grad_norm": 0.43076443672180176, "learning_rate": 7.785046025211495e-06, "loss": 0.3607, "step": 21293 }, { "epoch": 0.9771924188885319, "grad_norm": 0.4592038094997406, "learning_rate": 7.784842391756872e-06, "loss": 0.3992, "step": 21294 }, { "epoch": 0.9772383093937864, "grad_norm": 0.5081127285957336, "learning_rate": 7.784638751605586e-06, "loss": 0.4755, "step": 21295 }, { "epoch": 0.9772841998990409, "grad_norm": 0.4801236093044281, "learning_rate": 7.78443510475812e-06, "loss": 0.3923, "step": 21296 }, { "epoch": 0.9773300904042953, "grad_norm": 0.4494534730911255, "learning_rate": 7.784231451214965e-06, "loss": 0.3865, "step": 21297 }, { "epoch": 0.9773759809095498, "grad_norm": 0.4855068325996399, "learning_rate": 7.784027790976616e-06, "loss": 0.4493, "step": 21298 }, { "epoch": 0.9774218714148043, "grad_norm": 0.4874282479286194, "learning_rate": 7.783824124043556e-06, "loss": 0.4962, "step": 21299 }, { "epoch": 0.9774677619200587, "grad_norm": 0.4335455000400543, "learning_rate": 7.783620450416277e-06, "loss": 0.3479, "step": 21300 }, { "epoch": 0.9775136524253132, "grad_norm": 0.44533759355545044, "learning_rate": 7.783416770095272e-06, "loss": 0.3868, "step": 21301 }, { "epoch": 0.9775595429305677, "grad_norm": 0.4257276654243469, "learning_rate": 7.783213083081028e-06, "loss": 0.3363, "step": 21302 }, { "epoch": 0.9776054334358222, "grad_norm": 0.4454062581062317, "learning_rate": 7.783009389374034e-06, "loss": 0.3675, "step": 21303 }, { "epoch": 0.9776513239410766, "grad_norm": 0.5463020205497742, "learning_rate": 7.78280568897478e-06, "loss": 0.4869, "step": 21304 }, { "epoch": 0.9776972144463311, "grad_norm": 0.44738584756851196, "learning_rate": 7.782601981883755e-06, "loss": 0.3246, "step": 21305 }, { "epoch": 0.9777431049515856, "grad_norm": 0.4820520579814911, "learning_rate": 7.782398268101454e-06, "loss": 0.4739, "step": 21306 }, { "epoch": 0.97778899545684, "grad_norm": 0.46542009711265564, "learning_rate": 7.78219454762836e-06, "loss": 0.4137, "step": 21307 }, { "epoch": 0.9778348859620944, "grad_norm": 0.4879249930381775, "learning_rate": 7.78199082046497e-06, "loss": 0.4728, "step": 21308 }, { "epoch": 0.9778807764673489, "grad_norm": 0.47857338190078735, "learning_rate": 7.781787086611768e-06, "loss": 0.422, "step": 21309 }, { "epoch": 0.9779266669726033, "grad_norm": 0.43803471326828003, "learning_rate": 7.781583346069246e-06, "loss": 0.3979, "step": 21310 }, { "epoch": 0.9779725574778578, "grad_norm": 0.4761965572834015, "learning_rate": 7.781379598837893e-06, "loss": 0.4081, "step": 21311 }, { "epoch": 0.9780184479831123, "grad_norm": 0.4983188211917877, "learning_rate": 7.781175844918203e-06, "loss": 0.4873, "step": 21312 }, { "epoch": 0.9780643384883667, "grad_norm": 0.47174206376075745, "learning_rate": 7.78097208431066e-06, "loss": 0.4497, "step": 21313 }, { "epoch": 0.9781102289936212, "grad_norm": 0.4350973069667816, "learning_rate": 7.780768317015758e-06, "loss": 0.4037, "step": 21314 }, { "epoch": 0.9781561194988757, "grad_norm": 0.46594348549842834, "learning_rate": 7.780564543033986e-06, "loss": 0.4302, "step": 21315 }, { "epoch": 0.9782020100041301, "grad_norm": 0.42700326442718506, "learning_rate": 7.780360762365833e-06, "loss": 0.3752, "step": 21316 }, { "epoch": 0.9782479005093846, "grad_norm": 0.43529656529426575, "learning_rate": 7.78015697501179e-06, "loss": 0.3707, "step": 21317 }, { "epoch": 0.9782937910146391, "grad_norm": 0.45831698179244995, "learning_rate": 7.779953180972346e-06, "loss": 0.4508, "step": 21318 }, { "epoch": 0.9783396815198936, "grad_norm": 0.5671592950820923, "learning_rate": 7.779749380247995e-06, "loss": 0.3461, "step": 21319 }, { "epoch": 0.978385572025148, "grad_norm": 0.4470706880092621, "learning_rate": 7.779545572839223e-06, "loss": 0.3537, "step": 21320 }, { "epoch": 0.9784314625304025, "grad_norm": 0.45512986183166504, "learning_rate": 7.77934175874652e-06, "loss": 0.4098, "step": 21321 }, { "epoch": 0.978477353035657, "grad_norm": 0.4430018961429596, "learning_rate": 7.779137937970375e-06, "loss": 0.3799, "step": 21322 }, { "epoch": 0.9785232435409114, "grad_norm": 0.4226023256778717, "learning_rate": 7.778934110511285e-06, "loss": 0.344, "step": 21323 }, { "epoch": 0.9785691340461659, "grad_norm": 0.4300136864185333, "learning_rate": 7.778730276369733e-06, "loss": 0.3708, "step": 21324 }, { "epoch": 0.9786150245514204, "grad_norm": 0.43972426652908325, "learning_rate": 7.778526435546211e-06, "loss": 0.3872, "step": 21325 }, { "epoch": 0.9786609150566747, "grad_norm": 0.45692771673202515, "learning_rate": 7.778322588041212e-06, "loss": 0.3705, "step": 21326 }, { "epoch": 0.9787068055619292, "grad_norm": 0.4309283494949341, "learning_rate": 7.778118733855224e-06, "loss": 0.369, "step": 21327 }, { "epoch": 0.9787526960671837, "grad_norm": 0.4978989064693451, "learning_rate": 7.777914872988736e-06, "loss": 0.5149, "step": 21328 }, { "epoch": 0.9787985865724381, "grad_norm": 0.4511111080646515, "learning_rate": 7.77771100544224e-06, "loss": 0.3991, "step": 21329 }, { "epoch": 0.9788444770776926, "grad_norm": 0.4558192193508148, "learning_rate": 7.777507131216226e-06, "loss": 0.4615, "step": 21330 }, { "epoch": 0.9788903675829471, "grad_norm": 0.4370228350162506, "learning_rate": 7.777303250311183e-06, "loss": 0.3271, "step": 21331 }, { "epoch": 0.9789362580882015, "grad_norm": 0.4747064411640167, "learning_rate": 7.777099362727602e-06, "loss": 0.4273, "step": 21332 }, { "epoch": 0.978982148593456, "grad_norm": 0.4303992986679077, "learning_rate": 7.776895468465974e-06, "loss": 0.344, "step": 21333 }, { "epoch": 0.9790280390987105, "grad_norm": 0.4882585406303406, "learning_rate": 7.776691567526791e-06, "loss": 0.3492, "step": 21334 }, { "epoch": 0.9790739296039649, "grad_norm": 0.402586966753006, "learning_rate": 7.776487659910537e-06, "loss": 0.3432, "step": 21335 }, { "epoch": 0.9791198201092194, "grad_norm": 0.4301648437976837, "learning_rate": 7.77628374561771e-06, "loss": 0.3524, "step": 21336 }, { "epoch": 0.9791657106144739, "grad_norm": 0.44376108050346375, "learning_rate": 7.776079824648795e-06, "loss": 0.3596, "step": 21337 }, { "epoch": 0.9792116011197284, "grad_norm": 0.44621798396110535, "learning_rate": 7.775875897004284e-06, "loss": 0.4547, "step": 21338 }, { "epoch": 0.9792574916249828, "grad_norm": 0.44577354192733765, "learning_rate": 7.775671962684667e-06, "loss": 0.3892, "step": 21339 }, { "epoch": 0.9793033821302373, "grad_norm": 0.468477725982666, "learning_rate": 7.775468021690438e-06, "loss": 0.4118, "step": 21340 }, { "epoch": 0.9793492726354918, "grad_norm": 0.44831007719039917, "learning_rate": 7.775264074022082e-06, "loss": 0.3343, "step": 21341 }, { "epoch": 0.9793951631407461, "grad_norm": 0.4985017478466034, "learning_rate": 7.775060119680092e-06, "loss": 0.4381, "step": 21342 }, { "epoch": 0.9794410536460006, "grad_norm": 0.5300086140632629, "learning_rate": 7.774856158664958e-06, "loss": 0.4295, "step": 21343 }, { "epoch": 0.9794869441512551, "grad_norm": 0.47846633195877075, "learning_rate": 7.774652190977172e-06, "loss": 0.4111, "step": 21344 }, { "epoch": 0.9795328346565095, "grad_norm": 0.44218578934669495, "learning_rate": 7.774448216617224e-06, "loss": 0.3795, "step": 21345 }, { "epoch": 0.979578725161764, "grad_norm": 0.4750552773475647, "learning_rate": 7.7742442355856e-06, "loss": 0.4566, "step": 21346 }, { "epoch": 0.9796246156670185, "grad_norm": 0.4614761471748352, "learning_rate": 7.774040247882798e-06, "loss": 0.3875, "step": 21347 }, { "epoch": 0.9796705061722729, "grad_norm": 0.4379121959209442, "learning_rate": 7.773836253509302e-06, "loss": 0.3631, "step": 21348 }, { "epoch": 0.9797163966775274, "grad_norm": 0.44099926948547363, "learning_rate": 7.773632252465606e-06, "loss": 0.3951, "step": 21349 }, { "epoch": 0.9797622871827819, "grad_norm": 0.46316298842430115, "learning_rate": 7.7734282447522e-06, "loss": 0.4105, "step": 21350 }, { "epoch": 0.9798081776880363, "grad_norm": 0.4682682454586029, "learning_rate": 7.773224230369576e-06, "loss": 0.4671, "step": 21351 }, { "epoch": 0.9798540681932908, "grad_norm": 0.4232224225997925, "learning_rate": 7.77302020931822e-06, "loss": 0.3117, "step": 21352 }, { "epoch": 0.9798999586985453, "grad_norm": 0.4490441381931305, "learning_rate": 7.772816181598627e-06, "loss": 0.3763, "step": 21353 }, { "epoch": 0.9799458492037997, "grad_norm": 0.471253365278244, "learning_rate": 7.772612147211287e-06, "loss": 0.4326, "step": 21354 }, { "epoch": 0.9799917397090542, "grad_norm": 0.43836891651153564, "learning_rate": 7.77240810615669e-06, "loss": 0.336, "step": 21355 }, { "epoch": 0.9800376302143087, "grad_norm": 0.47028762102127075, "learning_rate": 7.772204058435325e-06, "loss": 0.419, "step": 21356 }, { "epoch": 0.9800835207195632, "grad_norm": 0.4634118974208832, "learning_rate": 7.772000004047684e-06, "loss": 0.445, "step": 21357 }, { "epoch": 0.9801294112248176, "grad_norm": 0.4604529142379761, "learning_rate": 7.771795942994259e-06, "loss": 0.4378, "step": 21358 }, { "epoch": 0.9801753017300721, "grad_norm": 0.4447031319141388, "learning_rate": 7.771591875275539e-06, "loss": 0.372, "step": 21359 }, { "epoch": 0.9802211922353266, "grad_norm": 0.46683269739151, "learning_rate": 7.771387800892016e-06, "loss": 0.4355, "step": 21360 }, { "epoch": 0.9802670827405809, "grad_norm": 0.43625301122665405, "learning_rate": 7.77118371984418e-06, "loss": 0.351, "step": 21361 }, { "epoch": 0.9803129732458354, "grad_norm": 0.44543755054473877, "learning_rate": 7.77097963213252e-06, "loss": 0.3932, "step": 21362 }, { "epoch": 0.9803588637510899, "grad_norm": 0.5127671360969543, "learning_rate": 7.770775537757532e-06, "loss": 0.4604, "step": 21363 }, { "epoch": 0.9804047542563443, "grad_norm": 0.43850618600845337, "learning_rate": 7.770571436719698e-06, "loss": 0.3897, "step": 21364 }, { "epoch": 0.9804506447615988, "grad_norm": 0.43809282779693604, "learning_rate": 7.770367329019519e-06, "loss": 0.3509, "step": 21365 }, { "epoch": 0.9804965352668533, "grad_norm": 0.4692409932613373, "learning_rate": 7.770163214657477e-06, "loss": 0.4612, "step": 21366 }, { "epoch": 0.9805424257721077, "grad_norm": 0.44617512822151184, "learning_rate": 7.76995909363407e-06, "loss": 0.3635, "step": 21367 }, { "epoch": 0.9805883162773622, "grad_norm": 0.42949068546295166, "learning_rate": 7.769754965949785e-06, "loss": 0.3733, "step": 21368 }, { "epoch": 0.9806342067826167, "grad_norm": 0.47484028339385986, "learning_rate": 7.769550831605112e-06, "loss": 0.4098, "step": 21369 }, { "epoch": 0.9806800972878711, "grad_norm": 0.5011220574378967, "learning_rate": 7.769346690600546e-06, "loss": 0.4773, "step": 21370 }, { "epoch": 0.9807259877931256, "grad_norm": 0.43971386551856995, "learning_rate": 7.769142542936572e-06, "loss": 0.3832, "step": 21371 }, { "epoch": 0.9807718782983801, "grad_norm": 0.442545086145401, "learning_rate": 7.768938388613687e-06, "loss": 0.3766, "step": 21372 }, { "epoch": 0.9808177688036346, "grad_norm": 0.4600960910320282, "learning_rate": 7.768734227632377e-06, "loss": 0.3953, "step": 21373 }, { "epoch": 0.980863659308889, "grad_norm": 0.4301464855670929, "learning_rate": 7.768530059993135e-06, "loss": 0.3698, "step": 21374 }, { "epoch": 0.9809095498141435, "grad_norm": 0.45661431550979614, "learning_rate": 7.768325885696454e-06, "loss": 0.4081, "step": 21375 }, { "epoch": 0.980955440319398, "grad_norm": 0.4460228383541107, "learning_rate": 7.768121704742822e-06, "loss": 0.3747, "step": 21376 }, { "epoch": 0.9810013308246524, "grad_norm": 0.5190106630325317, "learning_rate": 7.76791751713273e-06, "loss": 0.4475, "step": 21377 }, { "epoch": 0.9810472213299068, "grad_norm": 0.4498862326145172, "learning_rate": 7.76771332286667e-06, "loss": 0.3375, "step": 21378 }, { "epoch": 0.9810931118351613, "grad_norm": 0.4673357903957367, "learning_rate": 7.767509121945134e-06, "loss": 0.3915, "step": 21379 }, { "epoch": 0.9811390023404157, "grad_norm": 0.4895716607570648, "learning_rate": 7.767304914368613e-06, "loss": 0.5354, "step": 21380 }, { "epoch": 0.9811848928456702, "grad_norm": 0.43378815054893494, "learning_rate": 7.767100700137594e-06, "loss": 0.325, "step": 21381 }, { "epoch": 0.9812307833509247, "grad_norm": 0.4778186082839966, "learning_rate": 7.766896479252574e-06, "loss": 0.4569, "step": 21382 }, { "epoch": 0.9812766738561791, "grad_norm": 0.46774858236312866, "learning_rate": 7.766692251714039e-06, "loss": 0.3482, "step": 21383 }, { "epoch": 0.9813225643614336, "grad_norm": 0.4294929802417755, "learning_rate": 7.766488017522484e-06, "loss": 0.3832, "step": 21384 }, { "epoch": 0.9813684548666881, "grad_norm": 0.43981316685676575, "learning_rate": 7.766283776678398e-06, "loss": 0.3742, "step": 21385 }, { "epoch": 0.9814143453719425, "grad_norm": 0.45869478583335876, "learning_rate": 7.766079529182273e-06, "loss": 0.4192, "step": 21386 }, { "epoch": 0.981460235877197, "grad_norm": 0.4419727921485901, "learning_rate": 7.765875275034598e-06, "loss": 0.3874, "step": 21387 }, { "epoch": 0.9815061263824515, "grad_norm": 0.43578439950942993, "learning_rate": 7.765671014235867e-06, "loss": 0.3907, "step": 21388 }, { "epoch": 0.9815520168877059, "grad_norm": 0.45674291253089905, "learning_rate": 7.76546674678657e-06, "loss": 0.3853, "step": 21389 }, { "epoch": 0.9815979073929604, "grad_norm": 0.4652651250362396, "learning_rate": 7.765262472687198e-06, "loss": 0.4219, "step": 21390 }, { "epoch": 0.9816437978982149, "grad_norm": 0.4809570014476776, "learning_rate": 7.765058191938243e-06, "loss": 0.4897, "step": 21391 }, { "epoch": 0.9816896884034694, "grad_norm": 0.4326867163181305, "learning_rate": 7.764853904540197e-06, "loss": 0.4157, "step": 21392 }, { "epoch": 0.9817355789087238, "grad_norm": 0.43645909428596497, "learning_rate": 7.764649610493548e-06, "loss": 0.3618, "step": 21393 }, { "epoch": 0.9817814694139783, "grad_norm": 0.42483335733413696, "learning_rate": 7.76444530979879e-06, "loss": 0.319, "step": 21394 }, { "epoch": 0.9818273599192328, "grad_norm": 0.4879879951477051, "learning_rate": 7.764241002456414e-06, "loss": 0.4356, "step": 21395 }, { "epoch": 0.9818732504244871, "grad_norm": 0.4646546542644501, "learning_rate": 7.76403668846691e-06, "loss": 0.4412, "step": 21396 }, { "epoch": 0.9819191409297416, "grad_norm": 0.43390581011772156, "learning_rate": 7.76383236783077e-06, "loss": 0.3747, "step": 21397 }, { "epoch": 0.9819650314349961, "grad_norm": 0.46593526005744934, "learning_rate": 7.763628040548485e-06, "loss": 0.3867, "step": 21398 }, { "epoch": 0.9820109219402505, "grad_norm": 0.44699764251708984, "learning_rate": 7.763423706620547e-06, "loss": 0.3927, "step": 21399 }, { "epoch": 0.982056812445505, "grad_norm": 0.4481673240661621, "learning_rate": 7.763219366047449e-06, "loss": 0.3931, "step": 21400 }, { "epoch": 0.9821027029507595, "grad_norm": 0.46811288595199585, "learning_rate": 7.763015018829679e-06, "loss": 0.3982, "step": 21401 }, { "epoch": 0.9821485934560139, "grad_norm": 0.5100700855255127, "learning_rate": 7.76281066496773e-06, "loss": 0.448, "step": 21402 }, { "epoch": 0.9821944839612684, "grad_norm": 0.4577378034591675, "learning_rate": 7.762606304462092e-06, "loss": 0.4182, "step": 21403 }, { "epoch": 0.9822403744665229, "grad_norm": 0.4335230588912964, "learning_rate": 7.76240193731326e-06, "loss": 0.3404, "step": 21404 }, { "epoch": 0.9822862649717773, "grad_norm": 0.5428340435028076, "learning_rate": 7.762197563521723e-06, "loss": 0.4815, "step": 21405 }, { "epoch": 0.9823321554770318, "grad_norm": 0.4262878894805908, "learning_rate": 7.761993183087973e-06, "loss": 0.3217, "step": 21406 }, { "epoch": 0.9823780459822863, "grad_norm": 0.4616797864437103, "learning_rate": 7.761788796012498e-06, "loss": 0.3745, "step": 21407 }, { "epoch": 0.9824239364875408, "grad_norm": 0.49222397804260254, "learning_rate": 7.761584402295796e-06, "loss": 0.4363, "step": 21408 }, { "epoch": 0.9824698269927952, "grad_norm": 0.46838656067848206, "learning_rate": 7.761380001938354e-06, "loss": 0.458, "step": 21409 }, { "epoch": 0.9825157174980497, "grad_norm": 0.44366154074668884, "learning_rate": 7.761175594940663e-06, "loss": 0.3787, "step": 21410 }, { "epoch": 0.9825616080033042, "grad_norm": 0.4549409747123718, "learning_rate": 7.760971181303218e-06, "loss": 0.4404, "step": 21411 }, { "epoch": 0.9826074985085586, "grad_norm": 0.43890735507011414, "learning_rate": 7.760766761026508e-06, "loss": 0.3638, "step": 21412 }, { "epoch": 0.982653389013813, "grad_norm": 0.4766365885734558, "learning_rate": 7.760562334111026e-06, "loss": 0.4074, "step": 21413 }, { "epoch": 0.9826992795190675, "grad_norm": 0.47251564264297485, "learning_rate": 7.760357900557261e-06, "loss": 0.4745, "step": 21414 }, { "epoch": 0.9827451700243219, "grad_norm": 0.41670334339141846, "learning_rate": 7.760153460365709e-06, "loss": 0.3315, "step": 21415 }, { "epoch": 0.9827910605295764, "grad_norm": 0.40815308690071106, "learning_rate": 7.759949013536858e-06, "loss": 0.32, "step": 21416 }, { "epoch": 0.9828369510348309, "grad_norm": 0.4602589011192322, "learning_rate": 7.7597445600712e-06, "loss": 0.4036, "step": 21417 }, { "epoch": 0.9828828415400853, "grad_norm": 0.4398094713687897, "learning_rate": 7.759540099969227e-06, "loss": 0.36, "step": 21418 }, { "epoch": 0.9829287320453398, "grad_norm": 0.4365086257457733, "learning_rate": 7.759335633231433e-06, "loss": 0.3931, "step": 21419 }, { "epoch": 0.9829746225505943, "grad_norm": 0.508642852306366, "learning_rate": 7.759131159858307e-06, "loss": 0.4806, "step": 21420 }, { "epoch": 0.9830205130558487, "grad_norm": 0.4520360827445984, "learning_rate": 7.75892667985034e-06, "loss": 0.3607, "step": 21421 }, { "epoch": 0.9830664035611032, "grad_norm": 0.4412452280521393, "learning_rate": 7.758722193208026e-06, "loss": 0.3806, "step": 21422 }, { "epoch": 0.9831122940663577, "grad_norm": 0.4269466698169708, "learning_rate": 7.758517699931856e-06, "loss": 0.3095, "step": 21423 }, { "epoch": 0.9831581845716121, "grad_norm": 0.47109657526016235, "learning_rate": 7.75831320002232e-06, "loss": 0.4583, "step": 21424 }, { "epoch": 0.9832040750768666, "grad_norm": 0.4524524211883545, "learning_rate": 7.758108693479914e-06, "loss": 0.4248, "step": 21425 }, { "epoch": 0.9832499655821211, "grad_norm": 0.43599799275398254, "learning_rate": 7.757904180305125e-06, "loss": 0.3326, "step": 21426 }, { "epoch": 0.9832958560873756, "grad_norm": 0.41575875878334045, "learning_rate": 7.757699660498448e-06, "loss": 0.3457, "step": 21427 }, { "epoch": 0.98334174659263, "grad_norm": 0.707442045211792, "learning_rate": 7.757495134060373e-06, "loss": 0.4842, "step": 21428 }, { "epoch": 0.9833876370978845, "grad_norm": 0.4459569454193115, "learning_rate": 7.757290600991391e-06, "loss": 0.3772, "step": 21429 }, { "epoch": 0.983433527603139, "grad_norm": 0.45639532804489136, "learning_rate": 7.757086061292e-06, "loss": 0.4159, "step": 21430 }, { "epoch": 0.9834794181083933, "grad_norm": 0.4375492334365845, "learning_rate": 7.756881514962682e-06, "loss": 0.3439, "step": 21431 }, { "epoch": 0.9835253086136478, "grad_norm": 0.41172361373901367, "learning_rate": 7.756676962003937e-06, "loss": 0.2978, "step": 21432 }, { "epoch": 0.9835711991189023, "grad_norm": 0.4495747685432434, "learning_rate": 7.756472402416254e-06, "loss": 0.4454, "step": 21433 }, { "epoch": 0.9836170896241567, "grad_norm": 0.4470408260822296, "learning_rate": 7.756267836200125e-06, "loss": 0.4074, "step": 21434 }, { "epoch": 0.9836629801294112, "grad_norm": 0.44430989027023315, "learning_rate": 7.756063263356039e-06, "loss": 0.4113, "step": 21435 }, { "epoch": 0.9837088706346657, "grad_norm": 0.4442964196205139, "learning_rate": 7.755858683884493e-06, "loss": 0.3907, "step": 21436 }, { "epoch": 0.9837547611399201, "grad_norm": 0.48289164900779724, "learning_rate": 7.755654097785976e-06, "loss": 0.4799, "step": 21437 }, { "epoch": 0.9838006516451746, "grad_norm": 0.4302290976047516, "learning_rate": 7.755449505060982e-06, "loss": 0.3702, "step": 21438 }, { "epoch": 0.9838465421504291, "grad_norm": 0.46389952301979065, "learning_rate": 7.75524490571e-06, "loss": 0.389, "step": 21439 }, { "epoch": 0.9838924326556835, "grad_norm": 0.5733098387718201, "learning_rate": 7.755040299733522e-06, "loss": 0.5442, "step": 21440 }, { "epoch": 0.983938323160938, "grad_norm": 0.4466260075569153, "learning_rate": 7.754835687132044e-06, "loss": 0.3657, "step": 21441 }, { "epoch": 0.9839842136661925, "grad_norm": 0.4355860650539398, "learning_rate": 7.754631067906055e-06, "loss": 0.3725, "step": 21442 }, { "epoch": 0.9840301041714469, "grad_norm": 0.4843028485774994, "learning_rate": 7.754426442056049e-06, "loss": 0.4774, "step": 21443 }, { "epoch": 0.9840759946767014, "grad_norm": 0.4177616834640503, "learning_rate": 7.754221809582514e-06, "loss": 0.3237, "step": 21444 }, { "epoch": 0.9841218851819559, "grad_norm": 0.4778716266155243, "learning_rate": 7.754017170485949e-06, "loss": 0.3935, "step": 21445 }, { "epoch": 0.9841677756872104, "grad_norm": 0.4714200496673584, "learning_rate": 7.753812524766838e-06, "loss": 0.4357, "step": 21446 }, { "epoch": 0.9842136661924648, "grad_norm": 0.4577895402908325, "learning_rate": 7.753607872425678e-06, "loss": 0.4082, "step": 21447 }, { "epoch": 0.9842595566977193, "grad_norm": 0.4341447949409485, "learning_rate": 7.75340321346296e-06, "loss": 0.3996, "step": 21448 }, { "epoch": 0.9843054472029737, "grad_norm": 0.4190983474254608, "learning_rate": 7.753198547879177e-06, "loss": 0.3264, "step": 21449 }, { "epoch": 0.9843513377082281, "grad_norm": 0.43608057498931885, "learning_rate": 7.75299387567482e-06, "loss": 0.3553, "step": 21450 }, { "epoch": 0.9843972282134826, "grad_norm": 0.4212685823440552, "learning_rate": 7.75278919685038e-06, "loss": 0.3445, "step": 21451 }, { "epoch": 0.9844431187187371, "grad_norm": 0.43053552508354187, "learning_rate": 7.752584511406354e-06, "loss": 0.3289, "step": 21452 }, { "epoch": 0.9844890092239915, "grad_norm": 0.46430766582489014, "learning_rate": 7.75237981934323e-06, "loss": 0.4367, "step": 21453 }, { "epoch": 0.984534899729246, "grad_norm": 0.44362694025039673, "learning_rate": 7.7521751206615e-06, "loss": 0.3871, "step": 21454 }, { "epoch": 0.9845807902345005, "grad_norm": 0.461493581533432, "learning_rate": 7.751970415361658e-06, "loss": 0.4497, "step": 21455 }, { "epoch": 0.9846266807397549, "grad_norm": 0.4793708622455597, "learning_rate": 7.751765703444197e-06, "loss": 0.4265, "step": 21456 }, { "epoch": 0.9846725712450094, "grad_norm": 0.49860772490501404, "learning_rate": 7.751560984909605e-06, "loss": 0.485, "step": 21457 }, { "epoch": 0.9847184617502639, "grad_norm": 0.4721015989780426, "learning_rate": 7.751356259758379e-06, "loss": 0.3945, "step": 21458 }, { "epoch": 0.9847643522555183, "grad_norm": 0.46541470289230347, "learning_rate": 7.75115152799101e-06, "loss": 0.4013, "step": 21459 }, { "epoch": 0.9848102427607728, "grad_norm": 0.44064638018608093, "learning_rate": 7.75094678960799e-06, "loss": 0.3588, "step": 21460 }, { "epoch": 0.9848561332660273, "grad_norm": 0.440988689661026, "learning_rate": 7.750742044609808e-06, "loss": 0.3489, "step": 21461 }, { "epoch": 0.9849020237712818, "grad_norm": 0.43416938185691833, "learning_rate": 7.750537292996964e-06, "loss": 0.3679, "step": 21462 }, { "epoch": 0.9849479142765362, "grad_norm": 0.45450904965400696, "learning_rate": 7.750332534769945e-06, "loss": 0.38, "step": 21463 }, { "epoch": 0.9849938047817907, "grad_norm": 0.4444591701030731, "learning_rate": 7.750127769929243e-06, "loss": 0.351, "step": 21464 }, { "epoch": 0.9850396952870452, "grad_norm": 0.4505695402622223, "learning_rate": 7.749922998475352e-06, "loss": 0.3247, "step": 21465 }, { "epoch": 0.9850855857922995, "grad_norm": 0.44429805874824524, "learning_rate": 7.749718220408765e-06, "loss": 0.3575, "step": 21466 }, { "epoch": 0.985131476297554, "grad_norm": 0.49736854434013367, "learning_rate": 7.749513435729972e-06, "loss": 0.5173, "step": 21467 }, { "epoch": 0.9851773668028085, "grad_norm": 0.4590285122394562, "learning_rate": 7.749308644439467e-06, "loss": 0.4094, "step": 21468 }, { "epoch": 0.9852232573080629, "grad_norm": 0.48910629749298096, "learning_rate": 7.749103846537743e-06, "loss": 0.4398, "step": 21469 }, { "epoch": 0.9852691478133174, "grad_norm": 0.47019514441490173, "learning_rate": 7.748899042025293e-06, "loss": 0.4568, "step": 21470 }, { "epoch": 0.9853150383185719, "grad_norm": 0.44276532530784607, "learning_rate": 7.748694230902607e-06, "loss": 0.3539, "step": 21471 }, { "epoch": 0.9853609288238263, "grad_norm": 0.4677266478538513, "learning_rate": 7.748489413170178e-06, "loss": 0.3014, "step": 21472 }, { "epoch": 0.9854068193290808, "grad_norm": 0.4319685697555542, "learning_rate": 7.7482845888285e-06, "loss": 0.3548, "step": 21473 }, { "epoch": 0.9854527098343353, "grad_norm": 0.4747561812400818, "learning_rate": 7.748079757878066e-06, "loss": 0.4033, "step": 21474 }, { "epoch": 0.9854986003395897, "grad_norm": 0.41377580165863037, "learning_rate": 7.747874920319367e-06, "loss": 0.3112, "step": 21475 }, { "epoch": 0.9855444908448442, "grad_norm": 0.4243742525577545, "learning_rate": 7.747670076152897e-06, "loss": 0.331, "step": 21476 }, { "epoch": 0.9855903813500987, "grad_norm": 0.483267217874527, "learning_rate": 7.747465225379148e-06, "loss": 0.4284, "step": 21477 }, { "epoch": 0.9856362718553531, "grad_norm": 0.44622886180877686, "learning_rate": 7.74726036799861e-06, "loss": 0.3535, "step": 21478 }, { "epoch": 0.9856821623606076, "grad_norm": 0.4148627817630768, "learning_rate": 7.747055504011777e-06, "loss": 0.2955, "step": 21479 }, { "epoch": 0.9857280528658621, "grad_norm": 0.5059966444969177, "learning_rate": 7.746850633419144e-06, "loss": 0.4539, "step": 21480 }, { "epoch": 0.9857739433711166, "grad_norm": 0.45976540446281433, "learning_rate": 7.746645756221204e-06, "loss": 0.4188, "step": 21481 }, { "epoch": 0.985819833876371, "grad_norm": 0.42554357647895813, "learning_rate": 7.746440872418447e-06, "loss": 0.3676, "step": 21482 }, { "epoch": 0.9858657243816255, "grad_norm": 0.45818156003952026, "learning_rate": 7.746235982011365e-06, "loss": 0.3794, "step": 21483 }, { "epoch": 0.98591161488688, "grad_norm": 0.4491177201271057, "learning_rate": 7.746031085000454e-06, "loss": 0.3697, "step": 21484 }, { "epoch": 0.9859575053921343, "grad_norm": 0.4614400267601013, "learning_rate": 7.745826181386204e-06, "loss": 0.3853, "step": 21485 }, { "epoch": 0.9860033958973888, "grad_norm": 0.49437251687049866, "learning_rate": 7.745621271169107e-06, "loss": 0.4828, "step": 21486 }, { "epoch": 0.9860492864026433, "grad_norm": 0.5032725930213928, "learning_rate": 7.74541635434966e-06, "loss": 0.4222, "step": 21487 }, { "epoch": 0.9860951769078977, "grad_norm": 0.45090243220329285, "learning_rate": 7.745211430928352e-06, "loss": 0.3947, "step": 21488 }, { "epoch": 0.9861410674131522, "grad_norm": 0.47474828362464905, "learning_rate": 7.745006500905677e-06, "loss": 0.4096, "step": 21489 }, { "epoch": 0.9861869579184067, "grad_norm": 0.4694094955921173, "learning_rate": 7.744801564282128e-06, "loss": 0.425, "step": 21490 }, { "epoch": 0.9862328484236611, "grad_norm": 0.48593080043792725, "learning_rate": 7.744596621058199e-06, "loss": 0.4749, "step": 21491 }, { "epoch": 0.9862787389289156, "grad_norm": 0.3990936577320099, "learning_rate": 7.744391671234376e-06, "loss": 0.3063, "step": 21492 }, { "epoch": 0.9863246294341701, "grad_norm": 0.4184863269329071, "learning_rate": 7.744186714811163e-06, "loss": 0.3824, "step": 21493 }, { "epoch": 0.9863705199394245, "grad_norm": 0.4958454966545105, "learning_rate": 7.743981751789045e-06, "loss": 0.4769, "step": 21494 }, { "epoch": 0.986416410444679, "grad_norm": 0.43260058760643005, "learning_rate": 7.743776782168517e-06, "loss": 0.3175, "step": 21495 }, { "epoch": 0.9864623009499335, "grad_norm": 0.48893362283706665, "learning_rate": 7.743571805950073e-06, "loss": 0.4516, "step": 21496 }, { "epoch": 0.9865081914551879, "grad_norm": 0.4828796088695526, "learning_rate": 7.743366823134202e-06, "loss": 0.3794, "step": 21497 }, { "epoch": 0.9865540819604424, "grad_norm": 0.46408811211586, "learning_rate": 7.743161833721402e-06, "loss": 0.4557, "step": 21498 }, { "epoch": 0.9865999724656969, "grad_norm": 0.4445958435535431, "learning_rate": 7.742956837712161e-06, "loss": 0.328, "step": 21499 }, { "epoch": 0.9866458629709514, "grad_norm": 0.43978598713874817, "learning_rate": 7.742751835106977e-06, "loss": 0.3646, "step": 21500 }, { "epoch": 0.9866917534762057, "grad_norm": 0.4201546609401703, "learning_rate": 7.742546825906338e-06, "loss": 0.3474, "step": 21501 }, { "epoch": 0.9867376439814602, "grad_norm": 0.4824674427509308, "learning_rate": 7.742341810110741e-06, "loss": 0.4708, "step": 21502 }, { "epoch": 0.9867835344867147, "grad_norm": 0.4747612476348877, "learning_rate": 7.742136787720678e-06, "loss": 0.4049, "step": 21503 }, { "epoch": 0.9868294249919691, "grad_norm": 0.7261636853218079, "learning_rate": 7.74193175873664e-06, "loss": 0.4409, "step": 21504 }, { "epoch": 0.9868753154972236, "grad_norm": 0.44836804270744324, "learning_rate": 7.741726723159122e-06, "loss": 0.4114, "step": 21505 }, { "epoch": 0.9869212060024781, "grad_norm": 0.45542311668395996, "learning_rate": 7.741521680988617e-06, "loss": 0.3968, "step": 21506 }, { "epoch": 0.9869670965077325, "grad_norm": 0.4229353666305542, "learning_rate": 7.74131663222562e-06, "loss": 0.3489, "step": 21507 }, { "epoch": 0.987012987012987, "grad_norm": 0.4770808517932892, "learning_rate": 7.741111576870618e-06, "loss": 0.3872, "step": 21508 }, { "epoch": 0.9870588775182415, "grad_norm": 0.432889848947525, "learning_rate": 7.740906514924109e-06, "loss": 0.3563, "step": 21509 }, { "epoch": 0.9871047680234959, "grad_norm": 0.47834312915802, "learning_rate": 7.740701446386585e-06, "loss": 0.4296, "step": 21510 }, { "epoch": 0.9871506585287504, "grad_norm": 0.4557517170906067, "learning_rate": 7.740496371258538e-06, "loss": 0.4572, "step": 21511 }, { "epoch": 0.9871965490340049, "grad_norm": 0.4850899875164032, "learning_rate": 7.740291289540464e-06, "loss": 0.4527, "step": 21512 }, { "epoch": 0.9872424395392593, "grad_norm": 0.47328728437423706, "learning_rate": 7.740086201232852e-06, "loss": 0.4522, "step": 21513 }, { "epoch": 0.9872883300445138, "grad_norm": 0.45360296964645386, "learning_rate": 7.7398811063362e-06, "loss": 0.3582, "step": 21514 }, { "epoch": 0.9873342205497683, "grad_norm": 0.4558408558368683, "learning_rate": 7.739676004850997e-06, "loss": 0.4076, "step": 21515 }, { "epoch": 0.9873801110550228, "grad_norm": 0.4704846739768982, "learning_rate": 7.73947089677774e-06, "loss": 0.3849, "step": 21516 }, { "epoch": 0.9874260015602772, "grad_norm": 0.4336792230606079, "learning_rate": 7.739265782116916e-06, "loss": 0.3325, "step": 21517 }, { "epoch": 0.9874718920655317, "grad_norm": 0.4466922879219055, "learning_rate": 7.739060660869026e-06, "loss": 0.3458, "step": 21518 }, { "epoch": 0.9875177825707862, "grad_norm": 0.4665732681751251, "learning_rate": 7.738855533034557e-06, "loss": 0.3661, "step": 21519 }, { "epoch": 0.9875636730760405, "grad_norm": 0.721197783946991, "learning_rate": 7.738650398614007e-06, "loss": 0.3462, "step": 21520 }, { "epoch": 0.987609563581295, "grad_norm": 0.4455837905406952, "learning_rate": 7.738445257607868e-06, "loss": 0.4257, "step": 21521 }, { "epoch": 0.9876554540865495, "grad_norm": 0.4395209550857544, "learning_rate": 7.738240110016628e-06, "loss": 0.3597, "step": 21522 }, { "epoch": 0.9877013445918039, "grad_norm": 0.44374555349349976, "learning_rate": 7.738034955840788e-06, "loss": 0.3592, "step": 21523 }, { "epoch": 0.9877472350970584, "grad_norm": 0.49391478300094604, "learning_rate": 7.737829795080837e-06, "loss": 0.4479, "step": 21524 }, { "epoch": 0.9877931256023129, "grad_norm": 0.4366222620010376, "learning_rate": 7.737624627737272e-06, "loss": 0.3704, "step": 21525 }, { "epoch": 0.9878390161075673, "grad_norm": 0.47419601678848267, "learning_rate": 7.73741945381058e-06, "loss": 0.4302, "step": 21526 }, { "epoch": 0.9878849066128218, "grad_norm": 0.44391629099845886, "learning_rate": 7.737214273301261e-06, "loss": 0.3577, "step": 21527 }, { "epoch": 0.9879307971180763, "grad_norm": 0.4660033583641052, "learning_rate": 7.737009086209804e-06, "loss": 0.4041, "step": 21528 }, { "epoch": 0.9879766876233307, "grad_norm": 0.511532187461853, "learning_rate": 7.736803892536706e-06, "loss": 0.5074, "step": 21529 }, { "epoch": 0.9880225781285852, "grad_norm": 0.4907664954662323, "learning_rate": 7.736598692282455e-06, "loss": 0.4506, "step": 21530 }, { "epoch": 0.9880684686338397, "grad_norm": 0.43260014057159424, "learning_rate": 7.73639348544755e-06, "loss": 0.3777, "step": 21531 }, { "epoch": 0.9881143591390941, "grad_norm": 0.42581993341445923, "learning_rate": 7.736188272032482e-06, "loss": 0.3603, "step": 21532 }, { "epoch": 0.9881602496443486, "grad_norm": 0.41736534237861633, "learning_rate": 7.735983052037746e-06, "loss": 0.3488, "step": 21533 }, { "epoch": 0.9882061401496031, "grad_norm": 0.4813328683376312, "learning_rate": 7.735777825463833e-06, "loss": 0.3712, "step": 21534 }, { "epoch": 0.9882520306548576, "grad_norm": 0.5098647475242615, "learning_rate": 7.735572592311239e-06, "loss": 0.4511, "step": 21535 }, { "epoch": 0.988297921160112, "grad_norm": 0.4447896480560303, "learning_rate": 7.735367352580455e-06, "loss": 0.3805, "step": 21536 }, { "epoch": 0.9883438116653664, "grad_norm": 0.42714807391166687, "learning_rate": 7.735162106271973e-06, "loss": 0.3678, "step": 21537 }, { "epoch": 0.9883897021706209, "grad_norm": 0.4847141206264496, "learning_rate": 7.734956853386295e-06, "loss": 0.4429, "step": 21538 }, { "epoch": 0.9884355926758753, "grad_norm": 0.4179786443710327, "learning_rate": 7.734751593923906e-06, "loss": 0.3308, "step": 21539 }, { "epoch": 0.9884814831811298, "grad_norm": 0.45779165625572205, "learning_rate": 7.734546327885301e-06, "loss": 0.4118, "step": 21540 }, { "epoch": 0.9885273736863843, "grad_norm": 0.45173683762550354, "learning_rate": 7.734341055270979e-06, "loss": 0.3948, "step": 21541 }, { "epoch": 0.9885732641916387, "grad_norm": 0.4324975907802582, "learning_rate": 7.734135776081426e-06, "loss": 0.3483, "step": 21542 }, { "epoch": 0.9886191546968932, "grad_norm": 0.4419863224029541, "learning_rate": 7.733930490317144e-06, "loss": 0.3175, "step": 21543 }, { "epoch": 0.9886650452021477, "grad_norm": 0.8894885182380676, "learning_rate": 7.733725197978617e-06, "loss": 0.4889, "step": 21544 }, { "epoch": 0.9887109357074021, "grad_norm": 0.45484885573387146, "learning_rate": 7.733519899066346e-06, "loss": 0.4199, "step": 21545 }, { "epoch": 0.9887568262126566, "grad_norm": 0.47910836338996887, "learning_rate": 7.733314593580822e-06, "loss": 0.4079, "step": 21546 }, { "epoch": 0.9888027167179111, "grad_norm": 0.5267349481582642, "learning_rate": 7.733109281522539e-06, "loss": 0.469, "step": 21547 }, { "epoch": 0.9888486072231655, "grad_norm": 0.47669488191604614, "learning_rate": 7.73290396289199e-06, "loss": 0.4661, "step": 21548 }, { "epoch": 0.98889449772842, "grad_norm": 0.49703556299209595, "learning_rate": 7.732698637689672e-06, "loss": 0.4459, "step": 21549 }, { "epoch": 0.9889403882336745, "grad_norm": 0.443128764629364, "learning_rate": 7.732493305916074e-06, "loss": 0.3163, "step": 21550 }, { "epoch": 0.988986278738929, "grad_norm": 0.4509771466255188, "learning_rate": 7.732287967571692e-06, "loss": 0.35, "step": 21551 }, { "epoch": 0.9890321692441834, "grad_norm": 0.4455101788043976, "learning_rate": 7.732082622657022e-06, "loss": 0.398, "step": 21552 }, { "epoch": 0.9890780597494379, "grad_norm": 0.45555704832077026, "learning_rate": 7.731877271172555e-06, "loss": 0.3506, "step": 21553 }, { "epoch": 0.9891239502546924, "grad_norm": 0.6252763867378235, "learning_rate": 7.731671913118783e-06, "loss": 0.3779, "step": 21554 }, { "epoch": 0.9891698407599467, "grad_norm": 0.5046229362487793, "learning_rate": 7.731466548496204e-06, "loss": 0.3319, "step": 21555 }, { "epoch": 0.9892157312652012, "grad_norm": 0.4692378044128418, "learning_rate": 7.731261177305309e-06, "loss": 0.4181, "step": 21556 }, { "epoch": 0.9892616217704557, "grad_norm": 0.46070632338523865, "learning_rate": 7.731055799546594e-06, "loss": 0.3578, "step": 21557 }, { "epoch": 0.9893075122757101, "grad_norm": 0.506351113319397, "learning_rate": 7.73085041522055e-06, "loss": 0.4795, "step": 21558 }, { "epoch": 0.9893534027809646, "grad_norm": 0.46631091833114624, "learning_rate": 7.730645024327674e-06, "loss": 0.3966, "step": 21559 }, { "epoch": 0.9893992932862191, "grad_norm": 0.45831891894340515, "learning_rate": 7.73043962686846e-06, "loss": 0.3728, "step": 21560 }, { "epoch": 0.9894451837914735, "grad_norm": 0.4572312831878662, "learning_rate": 7.730234222843398e-06, "loss": 0.3211, "step": 21561 }, { "epoch": 0.989491074296728, "grad_norm": 0.46948060393333435, "learning_rate": 7.730028812252985e-06, "loss": 0.4537, "step": 21562 }, { "epoch": 0.9895369648019825, "grad_norm": 0.4926060438156128, "learning_rate": 7.729823395097715e-06, "loss": 0.4461, "step": 21563 }, { "epoch": 0.9895828553072369, "grad_norm": 0.4430238902568817, "learning_rate": 7.729617971378081e-06, "loss": 0.3096, "step": 21564 }, { "epoch": 0.9896287458124914, "grad_norm": 0.44670823216438293, "learning_rate": 7.729412541094576e-06, "loss": 0.3575, "step": 21565 }, { "epoch": 0.9896746363177459, "grad_norm": 0.4326602518558502, "learning_rate": 7.729207104247699e-06, "loss": 0.3602, "step": 21566 }, { "epoch": 0.9897205268230003, "grad_norm": 0.4646248519420624, "learning_rate": 7.729001660837936e-06, "loss": 0.388, "step": 21567 }, { "epoch": 0.9897664173282548, "grad_norm": 0.48971086740493774, "learning_rate": 7.728796210865788e-06, "loss": 0.4093, "step": 21568 }, { "epoch": 0.9898123078335093, "grad_norm": 0.4400186538696289, "learning_rate": 7.728590754331744e-06, "loss": 0.3357, "step": 21569 }, { "epoch": 0.9898581983387638, "grad_norm": 0.44209638237953186, "learning_rate": 7.728385291236302e-06, "loss": 0.3822, "step": 21570 }, { "epoch": 0.9899040888440181, "grad_norm": 0.4958653450012207, "learning_rate": 7.728179821579954e-06, "loss": 0.3935, "step": 21571 }, { "epoch": 0.9899499793492726, "grad_norm": 0.4243580996990204, "learning_rate": 7.727974345363197e-06, "loss": 0.3558, "step": 21572 }, { "epoch": 0.9899958698545271, "grad_norm": 0.4566342234611511, "learning_rate": 7.727768862586519e-06, "loss": 0.4048, "step": 21573 }, { "epoch": 0.9900417603597815, "grad_norm": 0.4585874676704407, "learning_rate": 7.727563373250419e-06, "loss": 0.3752, "step": 21574 }, { "epoch": 0.990087650865036, "grad_norm": 0.4663224518299103, "learning_rate": 7.72735787735539e-06, "loss": 0.3834, "step": 21575 }, { "epoch": 0.9901335413702905, "grad_norm": 0.44143539667129517, "learning_rate": 7.727152374901927e-06, "loss": 0.3782, "step": 21576 }, { "epoch": 0.9901794318755449, "grad_norm": 0.43273141980171204, "learning_rate": 7.726946865890523e-06, "loss": 0.3531, "step": 21577 }, { "epoch": 0.9902253223807994, "grad_norm": 0.43265241384506226, "learning_rate": 7.726741350321672e-06, "loss": 0.3315, "step": 21578 }, { "epoch": 0.9902712128860539, "grad_norm": 0.42842116951942444, "learning_rate": 7.726535828195867e-06, "loss": 0.3578, "step": 21579 }, { "epoch": 0.9903171033913083, "grad_norm": 0.43500638008117676, "learning_rate": 7.726330299513608e-06, "loss": 0.3605, "step": 21580 }, { "epoch": 0.9903629938965628, "grad_norm": 0.4316575229167938, "learning_rate": 7.726124764275381e-06, "loss": 0.3555, "step": 21581 }, { "epoch": 0.9904088844018173, "grad_norm": 0.47060224413871765, "learning_rate": 7.725919222481686e-06, "loss": 0.4559, "step": 21582 }, { "epoch": 0.9904547749070717, "grad_norm": 0.43209201097488403, "learning_rate": 7.725713674133014e-06, "loss": 0.3684, "step": 21583 }, { "epoch": 0.9905006654123262, "grad_norm": 0.46089810132980347, "learning_rate": 7.725508119229863e-06, "loss": 0.4388, "step": 21584 }, { "epoch": 0.9905465559175807, "grad_norm": 0.39437952637672424, "learning_rate": 7.725302557772725e-06, "loss": 0.2912, "step": 21585 }, { "epoch": 0.9905924464228351, "grad_norm": 0.4594399631023407, "learning_rate": 7.725096989762094e-06, "loss": 0.4131, "step": 21586 }, { "epoch": 0.9906383369280896, "grad_norm": 0.5201370120048523, "learning_rate": 7.724891415198466e-06, "loss": 0.3625, "step": 21587 }, { "epoch": 0.9906842274333441, "grad_norm": 0.4138689935207367, "learning_rate": 7.724685834082332e-06, "loss": 0.3017, "step": 21588 }, { "epoch": 0.9907301179385986, "grad_norm": 0.44101497530937195, "learning_rate": 7.72448024641419e-06, "loss": 0.3345, "step": 21589 }, { "epoch": 0.9907760084438529, "grad_norm": 0.4594927728176117, "learning_rate": 7.724274652194531e-06, "loss": 0.4434, "step": 21590 }, { "epoch": 0.9908218989491074, "grad_norm": 0.4490070343017578, "learning_rate": 7.724069051423853e-06, "loss": 0.4037, "step": 21591 }, { "epoch": 0.9908677894543619, "grad_norm": 0.4688624143600464, "learning_rate": 7.723863444102648e-06, "loss": 0.4123, "step": 21592 }, { "epoch": 0.9909136799596163, "grad_norm": 0.44659847021102905, "learning_rate": 7.723657830231411e-06, "loss": 0.3959, "step": 21593 }, { "epoch": 0.9909595704648708, "grad_norm": 0.44296732544898987, "learning_rate": 7.723452209810637e-06, "loss": 0.3793, "step": 21594 }, { "epoch": 0.9910054609701253, "grad_norm": 0.46276307106018066, "learning_rate": 7.72324658284082e-06, "loss": 0.4222, "step": 21595 }, { "epoch": 0.9910513514753797, "grad_norm": 0.387199342250824, "learning_rate": 7.723040949322455e-06, "loss": 0.2598, "step": 21596 }, { "epoch": 0.9910972419806342, "grad_norm": 0.47061440348625183, "learning_rate": 7.722835309256036e-06, "loss": 0.3929, "step": 21597 }, { "epoch": 0.9911431324858887, "grad_norm": 0.4351453483104706, "learning_rate": 7.722629662642056e-06, "loss": 0.3659, "step": 21598 }, { "epoch": 0.9911890229911431, "grad_norm": 0.4130133092403412, "learning_rate": 7.72242400948101e-06, "loss": 0.3239, "step": 21599 }, { "epoch": 0.9912349134963976, "grad_norm": 0.41191717982292175, "learning_rate": 7.722218349773395e-06, "loss": 0.296, "step": 21600 }, { "epoch": 0.9912808040016521, "grad_norm": 0.44369596242904663, "learning_rate": 7.722012683519706e-06, "loss": 0.3722, "step": 21601 }, { "epoch": 0.9913266945069065, "grad_norm": 0.49208709597587585, "learning_rate": 7.721807010720432e-06, "loss": 0.417, "step": 21602 }, { "epoch": 0.991372585012161, "grad_norm": 0.39060091972351074, "learning_rate": 7.721601331376074e-06, "loss": 0.2928, "step": 21603 }, { "epoch": 0.9914184755174155, "grad_norm": 0.4794404208660126, "learning_rate": 7.721395645487122e-06, "loss": 0.4176, "step": 21604 }, { "epoch": 0.99146436602267, "grad_norm": 0.5412167906761169, "learning_rate": 7.721189953054074e-06, "loss": 0.4544, "step": 21605 }, { "epoch": 0.9915102565279243, "grad_norm": 0.4557233452796936, "learning_rate": 7.720984254077421e-06, "loss": 0.421, "step": 21606 }, { "epoch": 0.9915561470331788, "grad_norm": 0.4553992748260498, "learning_rate": 7.720778548557661e-06, "loss": 0.4069, "step": 21607 }, { "epoch": 0.9916020375384333, "grad_norm": 0.4262290298938751, "learning_rate": 7.720572836495287e-06, "loss": 0.3762, "step": 21608 }, { "epoch": 0.9916479280436877, "grad_norm": 0.45328226685523987, "learning_rate": 7.720367117890794e-06, "loss": 0.4046, "step": 21609 }, { "epoch": 0.9916938185489422, "grad_norm": 0.4489854574203491, "learning_rate": 7.720161392744677e-06, "loss": 0.4014, "step": 21610 }, { "epoch": 0.9917397090541967, "grad_norm": 0.4762423634529114, "learning_rate": 7.71995566105743e-06, "loss": 0.4318, "step": 21611 }, { "epoch": 0.9917855995594511, "grad_norm": 0.4956006705760956, "learning_rate": 7.719749922829547e-06, "loss": 0.4896, "step": 21612 }, { "epoch": 0.9918314900647056, "grad_norm": 0.44897791743278503, "learning_rate": 7.719544178061526e-06, "loss": 0.4264, "step": 21613 }, { "epoch": 0.9918773805699601, "grad_norm": 0.42185404896736145, "learning_rate": 7.719338426753859e-06, "loss": 0.3064, "step": 21614 }, { "epoch": 0.9919232710752145, "grad_norm": 0.4731590449810028, "learning_rate": 7.719132668907039e-06, "loss": 0.3953, "step": 21615 }, { "epoch": 0.991969161580469, "grad_norm": 0.5093813538551331, "learning_rate": 7.718926904521566e-06, "loss": 0.4764, "step": 21616 }, { "epoch": 0.9920150520857235, "grad_norm": 0.430996298789978, "learning_rate": 7.718721133597931e-06, "loss": 0.365, "step": 21617 }, { "epoch": 0.9920609425909779, "grad_norm": 0.5503207445144653, "learning_rate": 7.718515356136629e-06, "loss": 0.4755, "step": 21618 }, { "epoch": 0.9921068330962324, "grad_norm": 0.5031556487083435, "learning_rate": 7.718309572138157e-06, "loss": 0.4633, "step": 21619 }, { "epoch": 0.9921527236014869, "grad_norm": 0.4741862714290619, "learning_rate": 7.718103781603006e-06, "loss": 0.469, "step": 21620 }, { "epoch": 0.9921986141067413, "grad_norm": 0.4880513548851013, "learning_rate": 7.717897984531675e-06, "loss": 0.4062, "step": 21621 }, { "epoch": 0.9922445046119958, "grad_norm": 0.466846764087677, "learning_rate": 7.717692180924656e-06, "loss": 0.4579, "step": 21622 }, { "epoch": 0.9922903951172503, "grad_norm": 0.4392637014389038, "learning_rate": 7.717486370782447e-06, "loss": 0.3605, "step": 21623 }, { "epoch": 0.9923362856225048, "grad_norm": 0.4225007891654968, "learning_rate": 7.717280554105539e-06, "loss": 0.3875, "step": 21624 }, { "epoch": 0.9923821761277591, "grad_norm": 0.5165314674377441, "learning_rate": 7.71707473089443e-06, "loss": 0.5394, "step": 21625 }, { "epoch": 0.9924280666330136, "grad_norm": 0.47258275747299194, "learning_rate": 7.716868901149613e-06, "loss": 0.413, "step": 21626 }, { "epoch": 0.9924739571382681, "grad_norm": 0.45230138301849365, "learning_rate": 7.716663064871584e-06, "loss": 0.3946, "step": 21627 }, { "epoch": 0.9925198476435225, "grad_norm": 0.43344438076019287, "learning_rate": 7.71645722206084e-06, "loss": 0.343, "step": 21628 }, { "epoch": 0.992565738148777, "grad_norm": 0.42270031571388245, "learning_rate": 7.716251372717872e-06, "loss": 0.3445, "step": 21629 }, { "epoch": 0.9926116286540315, "grad_norm": 0.42478659749031067, "learning_rate": 7.716045516843175e-06, "loss": 0.2986, "step": 21630 }, { "epoch": 0.9926575191592859, "grad_norm": 0.4442451596260071, "learning_rate": 7.715839654437247e-06, "loss": 0.37, "step": 21631 }, { "epoch": 0.9927034096645404, "grad_norm": 0.4787028431892395, "learning_rate": 7.715633785500582e-06, "loss": 0.4284, "step": 21632 }, { "epoch": 0.9927493001697949, "grad_norm": 0.4370770752429962, "learning_rate": 7.715427910033675e-06, "loss": 0.3681, "step": 21633 }, { "epoch": 0.9927951906750493, "grad_norm": 0.44050177931785583, "learning_rate": 7.71522202803702e-06, "loss": 0.3469, "step": 21634 }, { "epoch": 0.9928410811803038, "grad_norm": 0.4654043912887573, "learning_rate": 7.715016139511114e-06, "loss": 0.3574, "step": 21635 }, { "epoch": 0.9928869716855583, "grad_norm": 0.47407266497612, "learning_rate": 7.71481024445645e-06, "loss": 0.4247, "step": 21636 }, { "epoch": 0.9929328621908127, "grad_norm": 0.4280468225479126, "learning_rate": 7.714604342873526e-06, "loss": 0.3264, "step": 21637 }, { "epoch": 0.9929787526960672, "grad_norm": 0.4646967947483063, "learning_rate": 7.714398434762833e-06, "loss": 0.4172, "step": 21638 }, { "epoch": 0.9930246432013217, "grad_norm": 0.4821687936782837, "learning_rate": 7.71419252012487e-06, "loss": 0.4618, "step": 21639 }, { "epoch": 0.9930705337065762, "grad_norm": 0.4772447347640991, "learning_rate": 7.71398659896013e-06, "loss": 0.4414, "step": 21640 }, { "epoch": 0.9931164242118306, "grad_norm": 0.4389433264732361, "learning_rate": 7.713780671269109e-06, "loss": 0.296, "step": 21641 }, { "epoch": 0.993162314717085, "grad_norm": 0.4953770041465759, "learning_rate": 7.713574737052303e-06, "loss": 0.4707, "step": 21642 }, { "epoch": 0.9932082052223395, "grad_norm": 0.41828301548957825, "learning_rate": 7.713368796310206e-06, "loss": 0.3651, "step": 21643 }, { "epoch": 0.9932540957275939, "grad_norm": 0.471941739320755, "learning_rate": 7.71316284904331e-06, "loss": 0.4027, "step": 21644 }, { "epoch": 0.9932999862328484, "grad_norm": 0.4663860499858856, "learning_rate": 7.712956895252117e-06, "loss": 0.4798, "step": 21645 }, { "epoch": 0.9933458767381029, "grad_norm": 0.4490600526332855, "learning_rate": 7.712750934937118e-06, "loss": 0.4023, "step": 21646 }, { "epoch": 0.9933917672433573, "grad_norm": 0.47742733359336853, "learning_rate": 7.712544968098809e-06, "loss": 0.4088, "step": 21647 }, { "epoch": 0.9934376577486118, "grad_norm": 0.4789804518222809, "learning_rate": 7.712338994737687e-06, "loss": 0.4748, "step": 21648 }, { "epoch": 0.9934835482538663, "grad_norm": 0.41957616806030273, "learning_rate": 7.712133014854245e-06, "loss": 0.3423, "step": 21649 }, { "epoch": 0.9935294387591207, "grad_norm": 0.4999156594276428, "learning_rate": 7.711927028448977e-06, "loss": 0.4646, "step": 21650 }, { "epoch": 0.9935753292643752, "grad_norm": 0.4107596278190613, "learning_rate": 7.711721035522383e-06, "loss": 0.3235, "step": 21651 }, { "epoch": 0.9936212197696297, "grad_norm": 0.3989337384700775, "learning_rate": 7.711515036074955e-06, "loss": 0.3136, "step": 21652 }, { "epoch": 0.9936671102748841, "grad_norm": 0.4458697736263275, "learning_rate": 7.711309030107188e-06, "loss": 0.3512, "step": 21653 }, { "epoch": 0.9937130007801386, "grad_norm": 0.4967235326766968, "learning_rate": 7.71110301761958e-06, "loss": 0.3707, "step": 21654 }, { "epoch": 0.9937588912853931, "grad_norm": 0.5026987195014954, "learning_rate": 7.710896998612626e-06, "loss": 0.5037, "step": 21655 }, { "epoch": 0.9938047817906475, "grad_norm": 0.4506838023662567, "learning_rate": 7.710690973086817e-06, "loss": 0.3704, "step": 21656 }, { "epoch": 0.993850672295902, "grad_norm": 0.4644213318824768, "learning_rate": 7.710484941042654e-06, "loss": 0.4294, "step": 21657 }, { "epoch": 0.9938965628011565, "grad_norm": 0.456689715385437, "learning_rate": 7.710278902480628e-06, "loss": 0.4328, "step": 21658 }, { "epoch": 0.993942453306411, "grad_norm": 0.4821111559867859, "learning_rate": 7.710072857401238e-06, "loss": 0.3946, "step": 21659 }, { "epoch": 0.9939883438116653, "grad_norm": 0.4732898473739624, "learning_rate": 7.70986680580498e-06, "loss": 0.4369, "step": 21660 }, { "epoch": 0.9940342343169198, "grad_norm": 0.4998220205307007, "learning_rate": 7.709660747692344e-06, "loss": 0.4792, "step": 21661 }, { "epoch": 0.9940801248221743, "grad_norm": 0.4651615619659424, "learning_rate": 7.709454683063832e-06, "loss": 0.4424, "step": 21662 }, { "epoch": 0.9941260153274287, "grad_norm": 0.4596205949783325, "learning_rate": 7.709248611919935e-06, "loss": 0.4252, "step": 21663 }, { "epoch": 0.9941719058326832, "grad_norm": 0.45824873447418213, "learning_rate": 7.709042534261151e-06, "loss": 0.3765, "step": 21664 }, { "epoch": 0.9942177963379377, "grad_norm": 0.44609710574150085, "learning_rate": 7.708836450087975e-06, "loss": 0.4265, "step": 21665 }, { "epoch": 0.9942636868431921, "grad_norm": 0.436423122882843, "learning_rate": 7.7086303594009e-06, "loss": 0.3078, "step": 21666 }, { "epoch": 0.9943095773484466, "grad_norm": 0.46959188580513, "learning_rate": 7.708424262200426e-06, "loss": 0.439, "step": 21667 }, { "epoch": 0.9943554678537011, "grad_norm": 0.43696752190589905, "learning_rate": 7.708218158487047e-06, "loss": 0.3468, "step": 21668 }, { "epoch": 0.9944013583589555, "grad_norm": 0.4352855086326599, "learning_rate": 7.708012048261255e-06, "loss": 0.3332, "step": 21669 }, { "epoch": 0.99444724886421, "grad_norm": 0.4379773437976837, "learning_rate": 7.707805931523552e-06, "loss": 0.3393, "step": 21670 }, { "epoch": 0.9944931393694645, "grad_norm": 0.4358983337879181, "learning_rate": 7.707599808274428e-06, "loss": 0.3656, "step": 21671 }, { "epoch": 0.9945390298747189, "grad_norm": 0.41573044657707214, "learning_rate": 7.707393678514383e-06, "loss": 0.3611, "step": 21672 }, { "epoch": 0.9945849203799734, "grad_norm": 0.434416264295578, "learning_rate": 7.707187542243909e-06, "loss": 0.3348, "step": 21673 }, { "epoch": 0.9946308108852279, "grad_norm": 0.5044904947280884, "learning_rate": 7.706981399463505e-06, "loss": 0.4103, "step": 21674 }, { "epoch": 0.9946767013904823, "grad_norm": 0.46483391523361206, "learning_rate": 7.706775250173663e-06, "loss": 0.4008, "step": 21675 }, { "epoch": 0.9947225918957368, "grad_norm": 0.40895169973373413, "learning_rate": 7.706569094374883e-06, "loss": 0.3169, "step": 21676 }, { "epoch": 0.9947684824009912, "grad_norm": 0.4912654459476471, "learning_rate": 7.706362932067656e-06, "loss": 0.4795, "step": 21677 }, { "epoch": 0.9948143729062457, "grad_norm": 0.44605332612991333, "learning_rate": 7.706156763252484e-06, "loss": 0.3415, "step": 21678 }, { "epoch": 0.9948602634115001, "grad_norm": 0.4418478012084961, "learning_rate": 7.705950587929855e-06, "loss": 0.3813, "step": 21679 }, { "epoch": 0.9949061539167546, "grad_norm": 0.5477805137634277, "learning_rate": 7.705744406100272e-06, "loss": 0.4568, "step": 21680 }, { "epoch": 0.9949520444220091, "grad_norm": 0.47867539525032043, "learning_rate": 7.705538217764226e-06, "loss": 0.4426, "step": 21681 }, { "epoch": 0.9949979349272635, "grad_norm": 0.4280222952365875, "learning_rate": 7.705332022922216e-06, "loss": 0.308, "step": 21682 }, { "epoch": 0.995043825432518, "grad_norm": 0.45183125138282776, "learning_rate": 7.705125821574734e-06, "loss": 0.3823, "step": 21683 }, { "epoch": 0.9950897159377725, "grad_norm": 0.4185204803943634, "learning_rate": 7.704919613722279e-06, "loss": 0.3081, "step": 21684 }, { "epoch": 0.9951356064430269, "grad_norm": 0.43980467319488525, "learning_rate": 7.704713399365347e-06, "loss": 0.3804, "step": 21685 }, { "epoch": 0.9951814969482814, "grad_norm": 0.44416001439094543, "learning_rate": 7.704507178504433e-06, "loss": 0.4303, "step": 21686 }, { "epoch": 0.9952273874535359, "grad_norm": 0.42041924595832825, "learning_rate": 7.70430095114003e-06, "loss": 0.3332, "step": 21687 }, { "epoch": 0.9952732779587903, "grad_norm": 0.9331541061401367, "learning_rate": 7.704094717272641e-06, "loss": 0.3972, "step": 21688 }, { "epoch": 0.9953191684640448, "grad_norm": 0.47611746191978455, "learning_rate": 7.703888476902754e-06, "loss": 0.4071, "step": 21689 }, { "epoch": 0.9953650589692993, "grad_norm": 0.4737643599510193, "learning_rate": 7.703682230030872e-06, "loss": 0.461, "step": 21690 }, { "epoch": 0.9954109494745537, "grad_norm": 0.427826464176178, "learning_rate": 7.703475976657484e-06, "loss": 0.3398, "step": 21691 }, { "epoch": 0.9954568399798082, "grad_norm": 0.4618673622608185, "learning_rate": 7.703269716783092e-06, "loss": 0.4115, "step": 21692 }, { "epoch": 0.9955027304850627, "grad_norm": 0.46752244234085083, "learning_rate": 7.703063450408189e-06, "loss": 0.4221, "step": 21693 }, { "epoch": 0.9955486209903172, "grad_norm": 0.42925283312797546, "learning_rate": 7.702857177533269e-06, "loss": 0.3596, "step": 21694 }, { "epoch": 0.9955945114955715, "grad_norm": 0.45623883605003357, "learning_rate": 7.702650898158834e-06, "loss": 0.384, "step": 21695 }, { "epoch": 0.995640402000826, "grad_norm": 0.4588542580604553, "learning_rate": 7.702444612285375e-06, "loss": 0.3704, "step": 21696 }, { "epoch": 0.9956862925060805, "grad_norm": 0.4644734859466553, "learning_rate": 7.702238319913392e-06, "loss": 0.4068, "step": 21697 }, { "epoch": 0.9957321830113349, "grad_norm": 0.42213189601898193, "learning_rate": 7.702032021043374e-06, "loss": 0.3388, "step": 21698 }, { "epoch": 0.9957780735165894, "grad_norm": 0.4811408519744873, "learning_rate": 7.701825715675826e-06, "loss": 0.4573, "step": 21699 }, { "epoch": 0.9958239640218439, "grad_norm": 0.44757407903671265, "learning_rate": 7.701619403811238e-06, "loss": 0.4053, "step": 21700 }, { "epoch": 0.9958698545270983, "grad_norm": 0.4473450481891632, "learning_rate": 7.701413085450106e-06, "loss": 0.3848, "step": 21701 }, { "epoch": 0.9959157450323528, "grad_norm": 0.4346800148487091, "learning_rate": 7.701206760592932e-06, "loss": 0.3787, "step": 21702 }, { "epoch": 0.9959616355376073, "grad_norm": 0.5073715448379517, "learning_rate": 7.701000429240207e-06, "loss": 0.3921, "step": 21703 }, { "epoch": 0.9960075260428617, "grad_norm": 0.5634883046150208, "learning_rate": 7.700794091392427e-06, "loss": 0.409, "step": 21704 }, { "epoch": 0.9960534165481162, "grad_norm": 0.42938217520713806, "learning_rate": 7.700587747050091e-06, "loss": 0.323, "step": 21705 }, { "epoch": 0.9960993070533707, "grad_norm": 0.47685715556144714, "learning_rate": 7.700381396213693e-06, "loss": 0.3688, "step": 21706 }, { "epoch": 0.9961451975586251, "grad_norm": 0.4331689178943634, "learning_rate": 7.70017503888373e-06, "loss": 0.3075, "step": 21707 }, { "epoch": 0.9961910880638796, "grad_norm": 0.4987686574459076, "learning_rate": 7.699968675060698e-06, "loss": 0.4481, "step": 21708 }, { "epoch": 0.9962369785691341, "grad_norm": 0.4822427034378052, "learning_rate": 7.699762304745095e-06, "loss": 0.4288, "step": 21709 }, { "epoch": 0.9962828690743885, "grad_norm": 0.47968757152557373, "learning_rate": 7.699555927937416e-06, "loss": 0.4152, "step": 21710 }, { "epoch": 0.996328759579643, "grad_norm": 0.4196031391620636, "learning_rate": 7.699349544638155e-06, "loss": 0.3659, "step": 21711 }, { "epoch": 0.9963746500848975, "grad_norm": 0.4223105013370514, "learning_rate": 7.69914315484781e-06, "loss": 0.3705, "step": 21712 }, { "epoch": 0.996420540590152, "grad_norm": 0.46879667043685913, "learning_rate": 7.69893675856688e-06, "loss": 0.4012, "step": 21713 }, { "epoch": 0.9964664310954063, "grad_norm": 0.4587671458721161, "learning_rate": 7.698730355795858e-06, "loss": 0.3977, "step": 21714 }, { "epoch": 0.9965123216006608, "grad_norm": 0.4594936966896057, "learning_rate": 7.698523946535241e-06, "loss": 0.381, "step": 21715 }, { "epoch": 0.9965582121059153, "grad_norm": 0.4323621094226837, "learning_rate": 7.698317530785524e-06, "loss": 0.3582, "step": 21716 }, { "epoch": 0.9966041026111697, "grad_norm": 0.43070316314697266, "learning_rate": 7.698111108547208e-06, "loss": 0.3657, "step": 21717 }, { "epoch": 0.9966499931164242, "grad_norm": 0.4744878113269806, "learning_rate": 7.697904679820784e-06, "loss": 0.4306, "step": 21718 }, { "epoch": 0.9966958836216787, "grad_norm": 0.5006880164146423, "learning_rate": 7.697698244606751e-06, "loss": 0.5074, "step": 21719 }, { "epoch": 0.9967417741269331, "grad_norm": 0.44790035486221313, "learning_rate": 7.697491802905606e-06, "loss": 0.3931, "step": 21720 }, { "epoch": 0.9967876646321876, "grad_norm": 0.44855648279190063, "learning_rate": 7.697285354717844e-06, "loss": 0.3575, "step": 21721 }, { "epoch": 0.9968335551374421, "grad_norm": 0.44482141733169556, "learning_rate": 7.697078900043963e-06, "loss": 0.4326, "step": 21722 }, { "epoch": 0.9968794456426965, "grad_norm": 0.4320143759250641, "learning_rate": 7.696872438884458e-06, "loss": 0.3614, "step": 21723 }, { "epoch": 0.996925336147951, "grad_norm": 0.5439617037773132, "learning_rate": 7.696665971239825e-06, "loss": 0.4861, "step": 21724 }, { "epoch": 0.9969712266532055, "grad_norm": 0.46905937790870667, "learning_rate": 7.696459497110561e-06, "loss": 0.4173, "step": 21725 }, { "epoch": 0.9970171171584599, "grad_norm": 0.44150659441947937, "learning_rate": 7.696253016497165e-06, "loss": 0.3674, "step": 21726 }, { "epoch": 0.9970630076637144, "grad_norm": 0.4883023202419281, "learning_rate": 7.69604652940013e-06, "loss": 0.4249, "step": 21727 }, { "epoch": 0.9971088981689689, "grad_norm": 0.47890734672546387, "learning_rate": 7.695840035819956e-06, "loss": 0.4406, "step": 21728 }, { "epoch": 0.9971547886742234, "grad_norm": 0.5204247832298279, "learning_rate": 7.695633535757135e-06, "loss": 0.4554, "step": 21729 }, { "epoch": 0.9972006791794777, "grad_norm": 0.5043074488639832, "learning_rate": 7.695427029212165e-06, "loss": 0.5133, "step": 21730 }, { "epoch": 0.9972465696847322, "grad_norm": 0.44892171025276184, "learning_rate": 7.695220516185548e-06, "loss": 0.422, "step": 21731 }, { "epoch": 0.9972924601899867, "grad_norm": 0.44195860624313354, "learning_rate": 7.695013996677774e-06, "loss": 0.4242, "step": 21732 }, { "epoch": 0.9973383506952411, "grad_norm": 0.48590004444122314, "learning_rate": 7.694807470689341e-06, "loss": 0.4726, "step": 21733 }, { "epoch": 0.9973842412004956, "grad_norm": 0.4636612832546234, "learning_rate": 7.694600938220748e-06, "loss": 0.3646, "step": 21734 }, { "epoch": 0.9974301317057501, "grad_norm": 0.48089244961738586, "learning_rate": 7.694394399272489e-06, "loss": 0.4993, "step": 21735 }, { "epoch": 0.9974760222110045, "grad_norm": 0.4314274787902832, "learning_rate": 7.694187853845063e-06, "loss": 0.3645, "step": 21736 }, { "epoch": 0.997521912716259, "grad_norm": 0.6634402871131897, "learning_rate": 7.693981301938966e-06, "loss": 0.3954, "step": 21737 }, { "epoch": 0.9975678032215135, "grad_norm": 0.47214239835739136, "learning_rate": 7.693774743554691e-06, "loss": 0.3893, "step": 21738 }, { "epoch": 0.9976136937267679, "grad_norm": 0.46950823068618774, "learning_rate": 7.693568178692742e-06, "loss": 0.422, "step": 21739 }, { "epoch": 0.9976595842320224, "grad_norm": 0.46035197377204895, "learning_rate": 7.69336160735361e-06, "loss": 0.3882, "step": 21740 }, { "epoch": 0.9977054747372769, "grad_norm": 0.474260151386261, "learning_rate": 7.693155029537794e-06, "loss": 0.4222, "step": 21741 }, { "epoch": 0.9977513652425313, "grad_norm": 0.45628631114959717, "learning_rate": 7.692948445245788e-06, "loss": 0.3784, "step": 21742 }, { "epoch": 0.9977972557477858, "grad_norm": 0.41917580366134644, "learning_rate": 7.692741854478091e-06, "loss": 0.3801, "step": 21743 }, { "epoch": 0.9978431462530403, "grad_norm": 0.4488079845905304, "learning_rate": 7.692535257235202e-06, "loss": 0.4226, "step": 21744 }, { "epoch": 0.9978890367582947, "grad_norm": 0.39841407537460327, "learning_rate": 7.692328653517614e-06, "loss": 0.2944, "step": 21745 }, { "epoch": 0.9979349272635492, "grad_norm": 0.4876246452331543, "learning_rate": 7.692122043325824e-06, "loss": 0.4891, "step": 21746 }, { "epoch": 0.9979808177688037, "grad_norm": 0.46109312772750854, "learning_rate": 7.691915426660334e-06, "loss": 0.3978, "step": 21747 }, { "epoch": 0.9980267082740581, "grad_norm": 0.5009638071060181, "learning_rate": 7.691708803521635e-06, "loss": 0.4096, "step": 21748 }, { "epoch": 0.9980725987793125, "grad_norm": 0.4462440609931946, "learning_rate": 7.691502173910226e-06, "loss": 0.3927, "step": 21749 }, { "epoch": 0.998118489284567, "grad_norm": 0.4259359836578369, "learning_rate": 7.691295537826604e-06, "loss": 0.3148, "step": 21750 }, { "epoch": 0.9981643797898215, "grad_norm": 0.46323710680007935, "learning_rate": 7.691088895271264e-06, "loss": 0.4048, "step": 21751 }, { "epoch": 0.9982102702950759, "grad_norm": 0.4062822461128235, "learning_rate": 7.690882246244706e-06, "loss": 0.3231, "step": 21752 }, { "epoch": 0.9982561608003304, "grad_norm": 0.45418989658355713, "learning_rate": 7.690675590747426e-06, "loss": 0.3882, "step": 21753 }, { "epoch": 0.9983020513055849, "grad_norm": 0.48705634474754333, "learning_rate": 7.69046892877992e-06, "loss": 0.4736, "step": 21754 }, { "epoch": 0.9983479418108393, "grad_norm": 0.462112694978714, "learning_rate": 7.690262260342682e-06, "loss": 0.366, "step": 21755 }, { "epoch": 0.9983938323160938, "grad_norm": 0.4815134108066559, "learning_rate": 7.690055585436216e-06, "loss": 0.4584, "step": 21756 }, { "epoch": 0.9984397228213483, "grad_norm": 0.4438479542732239, "learning_rate": 7.689848904061014e-06, "loss": 0.4083, "step": 21757 }, { "epoch": 0.9984856133266027, "grad_norm": 0.4772718548774719, "learning_rate": 7.689642216217577e-06, "loss": 0.4697, "step": 21758 }, { "epoch": 0.9985315038318572, "grad_norm": 0.45480525493621826, "learning_rate": 7.689435521906396e-06, "loss": 0.3954, "step": 21759 }, { "epoch": 0.9985773943371117, "grad_norm": 0.430342435836792, "learning_rate": 7.689228821127973e-06, "loss": 0.3789, "step": 21760 }, { "epoch": 0.9986232848423661, "grad_norm": 0.47365885972976685, "learning_rate": 7.689022113882803e-06, "loss": 0.4528, "step": 21761 }, { "epoch": 0.9986691753476206, "grad_norm": 0.46474674344062805, "learning_rate": 7.688815400171383e-06, "loss": 0.4082, "step": 21762 }, { "epoch": 0.9987150658528751, "grad_norm": 0.5705292224884033, "learning_rate": 7.688608679994211e-06, "loss": 0.4184, "step": 21763 }, { "epoch": 0.9987609563581294, "grad_norm": 0.46210139989852905, "learning_rate": 7.688401953351784e-06, "loss": 0.407, "step": 21764 }, { "epoch": 0.998806846863384, "grad_norm": 0.456602543592453, "learning_rate": 7.688195220244599e-06, "loss": 0.3848, "step": 21765 }, { "epoch": 0.9988527373686384, "grad_norm": 0.45093870162963867, "learning_rate": 7.687988480673151e-06, "loss": 0.3776, "step": 21766 }, { "epoch": 0.9988986278738929, "grad_norm": 0.446113646030426, "learning_rate": 7.687781734637942e-06, "loss": 0.3999, "step": 21767 }, { "epoch": 0.9989445183791473, "grad_norm": 0.47175708413124084, "learning_rate": 7.687574982139464e-06, "loss": 0.4173, "step": 21768 }, { "epoch": 0.9989904088844018, "grad_norm": 0.4484696090221405, "learning_rate": 7.687368223178217e-06, "loss": 0.3678, "step": 21769 }, { "epoch": 0.9990362993896563, "grad_norm": 0.4323655366897583, "learning_rate": 7.687161457754699e-06, "loss": 0.3768, "step": 21770 }, { "epoch": 0.9990821898949107, "grad_norm": 0.4674985408782959, "learning_rate": 7.686954685869403e-06, "loss": 0.4292, "step": 21771 }, { "epoch": 0.9991280804001652, "grad_norm": 0.4495973289012909, "learning_rate": 7.686747907522832e-06, "loss": 0.4136, "step": 21772 }, { "epoch": 0.9991739709054197, "grad_norm": 0.4574570953845978, "learning_rate": 7.68654112271548e-06, "loss": 0.3973, "step": 21773 }, { "epoch": 0.9992198614106741, "grad_norm": 0.4700450003147125, "learning_rate": 7.686334331447842e-06, "loss": 0.3816, "step": 21774 }, { "epoch": 0.9992657519159286, "grad_norm": 0.46827003359794617, "learning_rate": 7.68612753372042e-06, "loss": 0.437, "step": 21775 }, { "epoch": 0.9993116424211831, "grad_norm": 0.4525164067745209, "learning_rate": 7.685920729533708e-06, "loss": 0.3627, "step": 21776 }, { "epoch": 0.9993575329264375, "grad_norm": 0.4704478085041046, "learning_rate": 7.685713918888203e-06, "loss": 0.3883, "step": 21777 }, { "epoch": 0.999403423431692, "grad_norm": 0.5542486310005188, "learning_rate": 7.685507101784406e-06, "loss": 0.5781, "step": 21778 }, { "epoch": 0.9994493139369465, "grad_norm": 0.4475703835487366, "learning_rate": 7.685300278222809e-06, "loss": 0.3699, "step": 21779 }, { "epoch": 0.9994952044422009, "grad_norm": 0.42029353976249695, "learning_rate": 7.685093448203916e-06, "loss": 0.3214, "step": 21780 }, { "epoch": 0.9995410949474554, "grad_norm": 0.48289719223976135, "learning_rate": 7.68488661172822e-06, "loss": 0.4012, "step": 21781 }, { "epoch": 0.9995869854527099, "grad_norm": 0.5010626316070557, "learning_rate": 7.684679768796218e-06, "loss": 0.486, "step": 21782 }, { "epoch": 0.9996328759579644, "grad_norm": 0.4751811623573303, "learning_rate": 7.684472919408408e-06, "loss": 0.4817, "step": 21783 }, { "epoch": 0.9996787664632187, "grad_norm": 0.428971529006958, "learning_rate": 7.684266063565288e-06, "loss": 0.3478, "step": 21784 }, { "epoch": 0.9997246569684732, "grad_norm": 0.5563684105873108, "learning_rate": 7.684059201267357e-06, "loss": 0.478, "step": 21785 }, { "epoch": 0.9997705474737277, "grad_norm": 0.4718800485134125, "learning_rate": 7.683852332515108e-06, "loss": 0.4161, "step": 21786 }, { "epoch": 0.9998164379789821, "grad_norm": 0.4454321563243866, "learning_rate": 7.683645457309043e-06, "loss": 0.3814, "step": 21787 }, { "epoch": 0.9998623284842366, "grad_norm": 0.5012182593345642, "learning_rate": 7.683438575649658e-06, "loss": 0.5173, "step": 21788 }, { "epoch": 0.9999082189894911, "grad_norm": 0.5023024678230286, "learning_rate": 7.683231687537449e-06, "loss": 0.4572, "step": 21789 }, { "epoch": 0.9999541094947455, "grad_norm": 0.40421074628829956, "learning_rate": 7.683024792972916e-06, "loss": 0.3074, "step": 21790 }, { "epoch": 1.0, "grad_norm": 0.4496239125728607, "learning_rate": 7.682817891956553e-06, "loss": 0.3598, "step": 21791 }, { "epoch": 1.0000458905052545, "grad_norm": 0.42824703454971313, "learning_rate": 7.682610984488863e-06, "loss": 0.2908, "step": 21792 }, { "epoch": 1.000091781010509, "grad_norm": 0.47233837842941284, "learning_rate": 7.682404070570337e-06, "loss": 0.352, "step": 21793 }, { "epoch": 1.0001376715157635, "grad_norm": 0.509062647819519, "learning_rate": 7.682197150201475e-06, "loss": 0.3996, "step": 21794 }, { "epoch": 1.0001835620210178, "grad_norm": 0.4523948132991791, "learning_rate": 7.68199022338278e-06, "loss": 0.3485, "step": 21795 }, { "epoch": 1.0002294525262723, "grad_norm": 0.47760623693466187, "learning_rate": 7.68178329011474e-06, "loss": 0.3841, "step": 21796 }, { "epoch": 1.0002753430315268, "grad_norm": 0.44862669706344604, "learning_rate": 7.681576350397861e-06, "loss": 0.3152, "step": 21797 }, { "epoch": 1.0003212335367813, "grad_norm": 0.45125558972358704, "learning_rate": 7.681369404232638e-06, "loss": 0.3284, "step": 21798 }, { "epoch": 1.0003671240420358, "grad_norm": 0.457284539937973, "learning_rate": 7.681162451619564e-06, "loss": 0.3306, "step": 21799 }, { "epoch": 1.0004130145472903, "grad_norm": 0.48717260360717773, "learning_rate": 7.680955492559144e-06, "loss": 0.3977, "step": 21800 }, { "epoch": 1.0004589050525445, "grad_norm": 0.4457889795303345, "learning_rate": 7.68074852705187e-06, "loss": 0.3105, "step": 21801 }, { "epoch": 1.000504795557799, "grad_norm": 0.4630339443683624, "learning_rate": 7.680541555098244e-06, "loss": 0.2963, "step": 21802 }, { "epoch": 1.0005506860630535, "grad_norm": 0.5100385546684265, "learning_rate": 7.68033457669876e-06, "loss": 0.3438, "step": 21803 }, { "epoch": 1.000596576568308, "grad_norm": 0.5151930451393127, "learning_rate": 7.680127591853916e-06, "loss": 0.4002, "step": 21804 }, { "epoch": 1.0006424670735625, "grad_norm": 0.4654126465320587, "learning_rate": 7.679920600564212e-06, "loss": 0.3522, "step": 21805 }, { "epoch": 1.000688357578817, "grad_norm": 0.48202213644981384, "learning_rate": 7.679713602830146e-06, "loss": 0.3588, "step": 21806 }, { "epoch": 1.0007342480840713, "grad_norm": 0.47520971298217773, "learning_rate": 7.679506598652214e-06, "loss": 0.389, "step": 21807 }, { "epoch": 1.0007801385893258, "grad_norm": 0.44335997104644775, "learning_rate": 7.679299588030914e-06, "loss": 0.2935, "step": 21808 }, { "epoch": 1.0008260290945803, "grad_norm": 0.4476031959056854, "learning_rate": 7.679092570966745e-06, "loss": 0.2821, "step": 21809 }, { "epoch": 1.0008719195998348, "grad_norm": 0.4472014009952545, "learning_rate": 7.678885547460202e-06, "loss": 0.3017, "step": 21810 }, { "epoch": 1.0009178101050893, "grad_norm": 0.45240941643714905, "learning_rate": 7.678678517511786e-06, "loss": 0.3034, "step": 21811 }, { "epoch": 1.0009637006103438, "grad_norm": 0.4844726026058197, "learning_rate": 7.678471481121994e-06, "loss": 0.3801, "step": 21812 }, { "epoch": 1.0010095911155983, "grad_norm": 0.4395175576210022, "learning_rate": 7.678264438291322e-06, "loss": 0.3341, "step": 21813 }, { "epoch": 1.0010554816208526, "grad_norm": 0.480497270822525, "learning_rate": 7.67805738902027e-06, "loss": 0.3875, "step": 21814 }, { "epoch": 1.001101372126107, "grad_norm": 0.724362313747406, "learning_rate": 7.677850333309336e-06, "loss": 0.3151, "step": 21815 }, { "epoch": 1.0011472626313616, "grad_norm": 0.436196893453598, "learning_rate": 7.677643271159016e-06, "loss": 0.2892, "step": 21816 }, { "epoch": 1.001193153136616, "grad_norm": 0.4740312695503235, "learning_rate": 7.677436202569809e-06, "loss": 0.3879, "step": 21817 }, { "epoch": 1.0012390436418706, "grad_norm": 0.431088924407959, "learning_rate": 7.677229127542214e-06, "loss": 0.2956, "step": 21818 }, { "epoch": 1.001284934147125, "grad_norm": 0.5393555164337158, "learning_rate": 7.677022046076727e-06, "loss": 0.449, "step": 21819 }, { "epoch": 1.0013308246523793, "grad_norm": 0.44642388820648193, "learning_rate": 7.676814958173847e-06, "loss": 0.302, "step": 21820 }, { "epoch": 1.0013767151576338, "grad_norm": 0.4677377939224243, "learning_rate": 7.676607863834072e-06, "loss": 0.349, "step": 21821 }, { "epoch": 1.0014226056628883, "grad_norm": 0.5177029371261597, "learning_rate": 7.676400763057901e-06, "loss": 0.3671, "step": 21822 }, { "epoch": 1.0014684961681428, "grad_norm": 0.503667414188385, "learning_rate": 7.676193655845829e-06, "loss": 0.435, "step": 21823 }, { "epoch": 1.0015143866733973, "grad_norm": 0.46695178747177124, "learning_rate": 7.675986542198357e-06, "loss": 0.3465, "step": 21824 }, { "epoch": 1.0015602771786518, "grad_norm": 0.46708744764328003, "learning_rate": 7.67577942211598e-06, "loss": 0.358, "step": 21825 }, { "epoch": 1.001606167683906, "grad_norm": 0.4863606095314026, "learning_rate": 7.675572295599201e-06, "loss": 0.407, "step": 21826 }, { "epoch": 1.0016520581891606, "grad_norm": 0.44961273670196533, "learning_rate": 7.675365162648514e-06, "loss": 0.3472, "step": 21827 }, { "epoch": 1.001697948694415, "grad_norm": 0.47937706112861633, "learning_rate": 7.675158023264418e-06, "loss": 0.4133, "step": 21828 }, { "epoch": 1.0017438391996696, "grad_norm": 0.46879705786705017, "learning_rate": 7.674950877447412e-06, "loss": 0.3555, "step": 21829 }, { "epoch": 1.001789729704924, "grad_norm": 0.4375566840171814, "learning_rate": 7.67474372519799e-06, "loss": 0.3088, "step": 21830 }, { "epoch": 1.0018356202101786, "grad_norm": 0.4670071601867676, "learning_rate": 7.674536566516658e-06, "loss": 0.3923, "step": 21831 }, { "epoch": 1.001881510715433, "grad_norm": 0.5112287402153015, "learning_rate": 7.674329401403906e-06, "loss": 0.4083, "step": 21832 }, { "epoch": 1.0019274012206874, "grad_norm": 0.4465145170688629, "learning_rate": 7.674122229860237e-06, "loss": 0.327, "step": 21833 }, { "epoch": 1.0019732917259419, "grad_norm": 0.4728211760520935, "learning_rate": 7.673915051886149e-06, "loss": 0.3607, "step": 21834 }, { "epoch": 1.0020191822311963, "grad_norm": 0.4644700288772583, "learning_rate": 7.67370786748214e-06, "loss": 0.3703, "step": 21835 }, { "epoch": 1.0020650727364508, "grad_norm": 0.4294247329235077, "learning_rate": 7.673500676648706e-06, "loss": 0.3106, "step": 21836 }, { "epoch": 1.0021109632417053, "grad_norm": 0.46702876687049866, "learning_rate": 7.673293479386346e-06, "loss": 0.383, "step": 21837 }, { "epoch": 1.0021568537469598, "grad_norm": 0.4914146363735199, "learning_rate": 7.67308627569556e-06, "loss": 0.3599, "step": 21838 }, { "epoch": 1.0022027442522141, "grad_norm": 0.4601387083530426, "learning_rate": 7.672879065576843e-06, "loss": 0.3416, "step": 21839 }, { "epoch": 1.0022486347574686, "grad_norm": 0.547964870929718, "learning_rate": 7.672671849030699e-06, "loss": 0.3251, "step": 21840 }, { "epoch": 1.0022945252627231, "grad_norm": 0.49092257022857666, "learning_rate": 7.672464626057621e-06, "loss": 0.3638, "step": 21841 }, { "epoch": 1.0023404157679776, "grad_norm": 0.48189881443977356, "learning_rate": 7.672257396658108e-06, "loss": 0.3993, "step": 21842 }, { "epoch": 1.002386306273232, "grad_norm": 0.5036380290985107, "learning_rate": 7.67205016083266e-06, "loss": 0.4284, "step": 21843 }, { "epoch": 1.0024321967784866, "grad_norm": 0.49139124155044556, "learning_rate": 7.671842918581776e-06, "loss": 0.3726, "step": 21844 }, { "epoch": 1.002478087283741, "grad_norm": 0.4567021429538727, "learning_rate": 7.671635669905953e-06, "loss": 0.3115, "step": 21845 }, { "epoch": 1.0025239777889954, "grad_norm": 0.4727135896682739, "learning_rate": 7.671428414805689e-06, "loss": 0.31, "step": 21846 }, { "epoch": 1.0025698682942499, "grad_norm": 0.44256845116615295, "learning_rate": 7.67122115328148e-06, "loss": 0.2916, "step": 21847 }, { "epoch": 1.0026157587995044, "grad_norm": 0.5284841060638428, "learning_rate": 7.67101388533383e-06, "loss": 0.4183, "step": 21848 }, { "epoch": 1.0026616493047589, "grad_norm": 0.4531751573085785, "learning_rate": 7.670806610963234e-06, "loss": 0.3172, "step": 21849 }, { "epoch": 1.0027075398100134, "grad_norm": 0.47695717215538025, "learning_rate": 7.67059933017019e-06, "loss": 0.3648, "step": 21850 }, { "epoch": 1.0027534303152679, "grad_norm": 0.46809929609298706, "learning_rate": 7.670392042955199e-06, "loss": 0.3632, "step": 21851 }, { "epoch": 1.0027993208205221, "grad_norm": 0.4728301167488098, "learning_rate": 7.670184749318758e-06, "loss": 0.3123, "step": 21852 }, { "epoch": 1.0028452113257766, "grad_norm": 0.461937814950943, "learning_rate": 7.669977449261366e-06, "loss": 0.3438, "step": 21853 }, { "epoch": 1.0028911018310311, "grad_norm": 0.5011401772499084, "learning_rate": 7.66977014278352e-06, "loss": 0.4198, "step": 21854 }, { "epoch": 1.0029369923362856, "grad_norm": 0.5030772686004639, "learning_rate": 7.669562829885717e-06, "loss": 0.4005, "step": 21855 }, { "epoch": 1.0029828828415401, "grad_norm": 0.5199286341667175, "learning_rate": 7.669355510568462e-06, "loss": 0.3862, "step": 21856 }, { "epoch": 1.0030287733467946, "grad_norm": 0.4653865098953247, "learning_rate": 7.669148184832247e-06, "loss": 0.3161, "step": 21857 }, { "epoch": 1.003074663852049, "grad_norm": 0.47198596596717834, "learning_rate": 7.668940852677573e-06, "loss": 0.3896, "step": 21858 }, { "epoch": 1.0031205543573034, "grad_norm": 0.5021759271621704, "learning_rate": 7.668733514104938e-06, "loss": 0.4297, "step": 21859 }, { "epoch": 1.003166444862558, "grad_norm": 0.5020312666893005, "learning_rate": 7.668526169114843e-06, "loss": 0.3891, "step": 21860 }, { "epoch": 1.0032123353678124, "grad_norm": 0.46755024790763855, "learning_rate": 7.668318817707786e-06, "loss": 0.3875, "step": 21861 }, { "epoch": 1.003258225873067, "grad_norm": 0.4605557322502136, "learning_rate": 7.668111459884262e-06, "loss": 0.3456, "step": 21862 }, { "epoch": 1.0033041163783214, "grad_norm": 0.44801077246665955, "learning_rate": 7.667904095644773e-06, "loss": 0.3866, "step": 21863 }, { "epoch": 1.003350006883576, "grad_norm": 0.48443830013275146, "learning_rate": 7.667696724989815e-06, "loss": 0.2886, "step": 21864 }, { "epoch": 1.0033958973888302, "grad_norm": 0.4684072732925415, "learning_rate": 7.66748934791989e-06, "loss": 0.3328, "step": 21865 }, { "epoch": 1.0034417878940847, "grad_norm": 0.4466891884803772, "learning_rate": 7.667281964435495e-06, "loss": 0.3197, "step": 21866 }, { "epoch": 1.0034876783993392, "grad_norm": 0.469329297542572, "learning_rate": 7.667074574537127e-06, "loss": 0.3854, "step": 21867 }, { "epoch": 1.0035335689045937, "grad_norm": 0.4909505546092987, "learning_rate": 7.666867178225287e-06, "loss": 0.3736, "step": 21868 }, { "epoch": 1.0035794594098482, "grad_norm": 0.41680142283439636, "learning_rate": 7.666659775500476e-06, "loss": 0.275, "step": 21869 }, { "epoch": 1.0036253499151027, "grad_norm": 0.45755481719970703, "learning_rate": 7.666452366363187e-06, "loss": 0.3215, "step": 21870 }, { "epoch": 1.003671240420357, "grad_norm": 0.47610172629356384, "learning_rate": 7.666244950813923e-06, "loss": 0.4085, "step": 21871 }, { "epoch": 1.0037171309256114, "grad_norm": 0.46707797050476074, "learning_rate": 7.66603752885318e-06, "loss": 0.3401, "step": 21872 }, { "epoch": 1.003763021430866, "grad_norm": 0.4567943513393402, "learning_rate": 7.66583010048146e-06, "loss": 0.3355, "step": 21873 }, { "epoch": 1.0038089119361204, "grad_norm": 0.4661584794521332, "learning_rate": 7.665622665699256e-06, "loss": 0.3766, "step": 21874 }, { "epoch": 1.003854802441375, "grad_norm": 0.44932228326797485, "learning_rate": 7.665415224507076e-06, "loss": 0.2956, "step": 21875 }, { "epoch": 1.0039006929466294, "grad_norm": 0.46109265089035034, "learning_rate": 7.66520777690541e-06, "loss": 0.336, "step": 21876 }, { "epoch": 1.0039465834518837, "grad_norm": 0.4688689112663269, "learning_rate": 7.665000322894762e-06, "loss": 0.3868, "step": 21877 }, { "epoch": 1.0039924739571382, "grad_norm": 0.42947739362716675, "learning_rate": 7.66479286247563e-06, "loss": 0.3205, "step": 21878 }, { "epoch": 1.0040383644623927, "grad_norm": 0.46897774934768677, "learning_rate": 7.664585395648509e-06, "loss": 0.3143, "step": 21879 }, { "epoch": 1.0040842549676472, "grad_norm": 0.45936164259910583, "learning_rate": 7.664377922413905e-06, "loss": 0.3573, "step": 21880 }, { "epoch": 1.0041301454729017, "grad_norm": 0.511625349521637, "learning_rate": 7.664170442772309e-06, "loss": 0.4068, "step": 21881 }, { "epoch": 1.0041760359781562, "grad_norm": 0.4670616388320923, "learning_rate": 7.663962956724226e-06, "loss": 0.3756, "step": 21882 }, { "epoch": 1.0042219264834107, "grad_norm": 0.4762481451034546, "learning_rate": 7.663755464270156e-06, "loss": 0.3653, "step": 21883 }, { "epoch": 1.004267816988665, "grad_norm": 0.49966031312942505, "learning_rate": 7.663547965410591e-06, "loss": 0.3737, "step": 21884 }, { "epoch": 1.0043137074939195, "grad_norm": 0.4603038430213928, "learning_rate": 7.663340460146033e-06, "loss": 0.3465, "step": 21885 }, { "epoch": 1.004359597999174, "grad_norm": 0.48679646849632263, "learning_rate": 7.663132948476985e-06, "loss": 0.3836, "step": 21886 }, { "epoch": 1.0044054885044285, "grad_norm": 0.47332707047462463, "learning_rate": 7.66292543040394e-06, "loss": 0.3929, "step": 21887 }, { "epoch": 1.004451379009683, "grad_norm": 0.48933321237564087, "learning_rate": 7.6627179059274e-06, "loss": 0.4085, "step": 21888 }, { "epoch": 1.0044972695149375, "grad_norm": 0.4848455786705017, "learning_rate": 7.662510375047867e-06, "loss": 0.3665, "step": 21889 }, { "epoch": 1.0045431600201917, "grad_norm": 0.4562510848045349, "learning_rate": 7.662302837765834e-06, "loss": 0.3861, "step": 21890 }, { "epoch": 1.0045890505254462, "grad_norm": 0.49389615654945374, "learning_rate": 7.662095294081805e-06, "loss": 0.3625, "step": 21891 }, { "epoch": 1.0046349410307007, "grad_norm": 0.46587130427360535, "learning_rate": 7.661887743996275e-06, "loss": 0.3115, "step": 21892 }, { "epoch": 1.0046808315359552, "grad_norm": 0.4521610140800476, "learning_rate": 7.661680187509744e-06, "loss": 0.348, "step": 21893 }, { "epoch": 1.0047267220412097, "grad_norm": 0.46794214844703674, "learning_rate": 7.661472624622715e-06, "loss": 0.3497, "step": 21894 }, { "epoch": 1.0047726125464642, "grad_norm": 0.47309887409210205, "learning_rate": 7.661265055335683e-06, "loss": 0.3306, "step": 21895 }, { "epoch": 1.0048185030517185, "grad_norm": 0.4100973308086395, "learning_rate": 7.661057479649148e-06, "loss": 0.2625, "step": 21896 }, { "epoch": 1.004864393556973, "grad_norm": 0.5007053017616272, "learning_rate": 7.660849897563611e-06, "loss": 0.4348, "step": 21897 }, { "epoch": 1.0049102840622275, "grad_norm": 0.43340030312538147, "learning_rate": 7.660642309079569e-06, "loss": 0.3068, "step": 21898 }, { "epoch": 1.004956174567482, "grad_norm": 0.51333087682724, "learning_rate": 7.660434714197522e-06, "loss": 0.4471, "step": 21899 }, { "epoch": 1.0050020650727365, "grad_norm": 0.43661823868751526, "learning_rate": 7.66022711291797e-06, "loss": 0.3365, "step": 21900 }, { "epoch": 1.005047955577991, "grad_norm": 0.44621703028678894, "learning_rate": 7.660019505241409e-06, "loss": 0.333, "step": 21901 }, { "epoch": 1.0050938460832455, "grad_norm": 0.46153634786605835, "learning_rate": 7.659811891168344e-06, "loss": 0.3652, "step": 21902 }, { "epoch": 1.0051397365884998, "grad_norm": 0.4785188138484955, "learning_rate": 7.659604270699268e-06, "loss": 0.3722, "step": 21903 }, { "epoch": 1.0051856270937543, "grad_norm": 0.44658544659614563, "learning_rate": 7.659396643834684e-06, "loss": 0.3468, "step": 21904 }, { "epoch": 1.0052315175990088, "grad_norm": 0.4938446581363678, "learning_rate": 7.65918901057509e-06, "loss": 0.3762, "step": 21905 }, { "epoch": 1.0052774081042632, "grad_norm": 0.4482554793357849, "learning_rate": 7.658981370920986e-06, "loss": 0.3194, "step": 21906 }, { "epoch": 1.0053232986095177, "grad_norm": 0.4671158790588379, "learning_rate": 7.658773724872873e-06, "loss": 0.3418, "step": 21907 }, { "epoch": 1.0053691891147722, "grad_norm": 0.4733157455921173, "learning_rate": 7.658566072431246e-06, "loss": 0.3091, "step": 21908 }, { "epoch": 1.0054150796200265, "grad_norm": 0.4278928339481354, "learning_rate": 7.658358413596608e-06, "loss": 0.3066, "step": 21909 }, { "epoch": 1.005460970125281, "grad_norm": 0.45247340202331543, "learning_rate": 7.658150748369456e-06, "loss": 0.3476, "step": 21910 }, { "epoch": 1.0055068606305355, "grad_norm": 0.494045615196228, "learning_rate": 7.65794307675029e-06, "loss": 0.3504, "step": 21911 }, { "epoch": 1.00555275113579, "grad_norm": 0.46854332089424133, "learning_rate": 7.65773539873961e-06, "loss": 0.3548, "step": 21912 }, { "epoch": 1.0055986416410445, "grad_norm": 0.5320063233375549, "learning_rate": 7.657527714337916e-06, "loss": 0.4251, "step": 21913 }, { "epoch": 1.005644532146299, "grad_norm": 0.4261787235736847, "learning_rate": 7.657320023545705e-06, "loss": 0.2969, "step": 21914 }, { "epoch": 1.0056904226515533, "grad_norm": 0.4555548131465912, "learning_rate": 7.65711232636348e-06, "loss": 0.3174, "step": 21915 }, { "epoch": 1.0057363131568078, "grad_norm": 0.4771327078342438, "learning_rate": 7.656904622791737e-06, "loss": 0.3838, "step": 21916 }, { "epoch": 1.0057822036620623, "grad_norm": 0.4637584984302521, "learning_rate": 7.656696912830978e-06, "loss": 0.3391, "step": 21917 }, { "epoch": 1.0058280941673168, "grad_norm": 0.4800531268119812, "learning_rate": 7.656489196481701e-06, "loss": 0.412, "step": 21918 }, { "epoch": 1.0058739846725713, "grad_norm": 0.4903578758239746, "learning_rate": 7.656281473744405e-06, "loss": 0.4026, "step": 21919 }, { "epoch": 1.0059198751778258, "grad_norm": 0.5070230960845947, "learning_rate": 7.656073744619592e-06, "loss": 0.4128, "step": 21920 }, { "epoch": 1.0059657656830803, "grad_norm": 0.46659526228904724, "learning_rate": 7.65586600910776e-06, "loss": 0.3424, "step": 21921 }, { "epoch": 1.0060116561883345, "grad_norm": 0.42623820900917053, "learning_rate": 7.655658267209408e-06, "loss": 0.3171, "step": 21922 }, { "epoch": 1.006057546693589, "grad_norm": 0.4843745827674866, "learning_rate": 7.655450518925035e-06, "loss": 0.3687, "step": 21923 }, { "epoch": 1.0061034371988435, "grad_norm": 0.5140113830566406, "learning_rate": 7.655242764255142e-06, "loss": 0.4488, "step": 21924 }, { "epoch": 1.006149327704098, "grad_norm": 0.49371007084846497, "learning_rate": 7.655035003200229e-06, "loss": 0.3789, "step": 21925 }, { "epoch": 1.0061952182093525, "grad_norm": 0.4540064036846161, "learning_rate": 7.654827235760793e-06, "loss": 0.3261, "step": 21926 }, { "epoch": 1.006241108714607, "grad_norm": 0.4493793249130249, "learning_rate": 7.654619461937337e-06, "loss": 0.3361, "step": 21927 }, { "epoch": 1.0062869992198613, "grad_norm": 0.48265212774276733, "learning_rate": 7.654411681730359e-06, "loss": 0.4193, "step": 21928 }, { "epoch": 1.0063328897251158, "grad_norm": 0.49837997555732727, "learning_rate": 7.654203895140358e-06, "loss": 0.4, "step": 21929 }, { "epoch": 1.0063787802303703, "grad_norm": 0.45213600993156433, "learning_rate": 7.653996102167834e-06, "loss": 0.344, "step": 21930 }, { "epoch": 1.0064246707356248, "grad_norm": 0.488018661737442, "learning_rate": 7.65378830281329e-06, "loss": 0.4059, "step": 21931 }, { "epoch": 1.0064705612408793, "grad_norm": 0.4590126574039459, "learning_rate": 7.65358049707722e-06, "loss": 0.3626, "step": 21932 }, { "epoch": 1.0065164517461338, "grad_norm": 0.48965781927108765, "learning_rate": 7.653372684960125e-06, "loss": 0.3723, "step": 21933 }, { "epoch": 1.0065623422513883, "grad_norm": 0.5480174422264099, "learning_rate": 7.653164866462507e-06, "loss": 0.3568, "step": 21934 }, { "epoch": 1.0066082327566426, "grad_norm": 0.4429236948490143, "learning_rate": 7.652957041584868e-06, "loss": 0.4014, "step": 21935 }, { "epoch": 1.006654123261897, "grad_norm": 0.4574386477470398, "learning_rate": 7.652749210327701e-06, "loss": 0.3494, "step": 21936 }, { "epoch": 1.0067000137671516, "grad_norm": 0.4678099453449249, "learning_rate": 7.65254137269151e-06, "loss": 0.3413, "step": 21937 }, { "epoch": 1.006745904272406, "grad_norm": 0.4735844135284424, "learning_rate": 7.652333528676796e-06, "loss": 0.3703, "step": 21938 }, { "epoch": 1.0067917947776606, "grad_norm": 0.5432665348052979, "learning_rate": 7.652125678284058e-06, "loss": 0.4741, "step": 21939 }, { "epoch": 1.006837685282915, "grad_norm": 0.47513774037361145, "learning_rate": 7.65191782151379e-06, "loss": 0.3884, "step": 21940 }, { "epoch": 1.0068835757881693, "grad_norm": 0.45072948932647705, "learning_rate": 7.651709958366498e-06, "loss": 0.3198, "step": 21941 }, { "epoch": 1.0069294662934238, "grad_norm": 0.45452946424484253, "learning_rate": 7.651502088842682e-06, "loss": 0.3392, "step": 21942 }, { "epoch": 1.0069753567986783, "grad_norm": 0.4512861967086792, "learning_rate": 7.651294212942842e-06, "loss": 0.3279, "step": 21943 }, { "epoch": 1.0070212473039328, "grad_norm": 0.42003974318504333, "learning_rate": 7.651086330667473e-06, "loss": 0.329, "step": 21944 }, { "epoch": 1.0070671378091873, "grad_norm": 0.5066702961921692, "learning_rate": 7.65087844201708e-06, "loss": 0.4268, "step": 21945 }, { "epoch": 1.0071130283144418, "grad_norm": 0.430603563785553, "learning_rate": 7.65067054699216e-06, "loss": 0.2816, "step": 21946 }, { "epoch": 1.007158918819696, "grad_norm": 0.4638647139072418, "learning_rate": 7.650462645593213e-06, "loss": 0.37, "step": 21947 }, { "epoch": 1.0072048093249506, "grad_norm": 0.4421878159046173, "learning_rate": 7.650254737820742e-06, "loss": 0.3056, "step": 21948 }, { "epoch": 1.007250699830205, "grad_norm": 0.4775584936141968, "learning_rate": 7.650046823675243e-06, "loss": 0.4191, "step": 21949 }, { "epoch": 1.0072965903354596, "grad_norm": 0.48896414041519165, "learning_rate": 7.649838903157218e-06, "loss": 0.3634, "step": 21950 }, { "epoch": 1.007342480840714, "grad_norm": 0.4620627760887146, "learning_rate": 7.649630976267169e-06, "loss": 0.3515, "step": 21951 }, { "epoch": 1.0073883713459686, "grad_norm": 0.44375136494636536, "learning_rate": 7.64942304300559e-06, "loss": 0.314, "step": 21952 }, { "epoch": 1.007434261851223, "grad_norm": 0.44201672077178955, "learning_rate": 7.649215103372987e-06, "loss": 0.3086, "step": 21953 }, { "epoch": 1.0074801523564774, "grad_norm": 0.45628949999809265, "learning_rate": 7.649007157369858e-06, "loss": 0.3484, "step": 21954 }, { "epoch": 1.0075260428617319, "grad_norm": 0.45369455218315125, "learning_rate": 7.648799204996702e-06, "loss": 0.3219, "step": 21955 }, { "epoch": 1.0075719333669864, "grad_norm": 0.4913589060306549, "learning_rate": 7.648591246254023e-06, "loss": 0.4055, "step": 21956 }, { "epoch": 1.0076178238722409, "grad_norm": 0.42942869663238525, "learning_rate": 7.648383281142314e-06, "loss": 0.2915, "step": 21957 }, { "epoch": 1.0076637143774954, "grad_norm": 0.4813835918903351, "learning_rate": 7.64817530966208e-06, "loss": 0.3754, "step": 21958 }, { "epoch": 1.0077096048827499, "grad_norm": 0.5294176936149597, "learning_rate": 7.64796733181382e-06, "loss": 0.4681, "step": 21959 }, { "epoch": 1.0077554953880041, "grad_norm": 0.46044209599494934, "learning_rate": 7.647759347598036e-06, "loss": 0.3876, "step": 21960 }, { "epoch": 1.0078013858932586, "grad_norm": 0.48537254333496094, "learning_rate": 7.647551357015224e-06, "loss": 0.3688, "step": 21961 }, { "epoch": 1.0078472763985131, "grad_norm": 0.44438159465789795, "learning_rate": 7.64734336006589e-06, "loss": 0.3267, "step": 21962 }, { "epoch": 1.0078931669037676, "grad_norm": 0.4591151177883148, "learning_rate": 7.647135356750527e-06, "loss": 0.3632, "step": 21963 }, { "epoch": 1.0079390574090221, "grad_norm": 0.4508124589920044, "learning_rate": 7.646927347069641e-06, "loss": 0.3289, "step": 21964 }, { "epoch": 1.0079849479142766, "grad_norm": 0.42664802074432373, "learning_rate": 7.64671933102373e-06, "loss": 0.2926, "step": 21965 }, { "epoch": 1.008030838419531, "grad_norm": 0.5398070216178894, "learning_rate": 7.646511308613294e-06, "loss": 0.5411, "step": 21966 }, { "epoch": 1.0080767289247854, "grad_norm": 0.47213688492774963, "learning_rate": 7.646303279838832e-06, "loss": 0.3601, "step": 21967 }, { "epoch": 1.00812261943004, "grad_norm": 0.4734417498111725, "learning_rate": 7.646095244700848e-06, "loss": 0.3695, "step": 21968 }, { "epoch": 1.0081685099352944, "grad_norm": 0.4657507538795471, "learning_rate": 7.645887203199838e-06, "loss": 0.3911, "step": 21969 }, { "epoch": 1.0082144004405489, "grad_norm": 0.4774101674556732, "learning_rate": 7.645679155336308e-06, "loss": 0.3264, "step": 21970 }, { "epoch": 1.0082602909458034, "grad_norm": 0.4606582522392273, "learning_rate": 7.645471101110752e-06, "loss": 0.3165, "step": 21971 }, { "epoch": 1.0083061814510579, "grad_norm": 0.4135957360267639, "learning_rate": 7.645263040523672e-06, "loss": 0.2728, "step": 21972 }, { "epoch": 1.0083520719563122, "grad_norm": 0.4791407585144043, "learning_rate": 7.645054973575571e-06, "loss": 0.3807, "step": 21973 }, { "epoch": 1.0083979624615667, "grad_norm": 0.4825863242149353, "learning_rate": 7.644846900266946e-06, "loss": 0.3287, "step": 21974 }, { "epoch": 1.0084438529668212, "grad_norm": 0.4641238749027252, "learning_rate": 7.644638820598303e-06, "loss": 0.3536, "step": 21975 }, { "epoch": 1.0084897434720757, "grad_norm": 0.4588776230812073, "learning_rate": 7.644430734570134e-06, "loss": 0.3181, "step": 21976 }, { "epoch": 1.0085356339773301, "grad_norm": 0.476427286863327, "learning_rate": 7.644222642182946e-06, "loss": 0.3512, "step": 21977 }, { "epoch": 1.0085815244825846, "grad_norm": 0.4788590669631958, "learning_rate": 7.644014543437235e-06, "loss": 0.3801, "step": 21978 }, { "epoch": 1.008627414987839, "grad_norm": 0.5241381525993347, "learning_rate": 7.643806438333505e-06, "loss": 0.4341, "step": 21979 }, { "epoch": 1.0086733054930934, "grad_norm": 0.45544761419296265, "learning_rate": 7.643598326872254e-06, "loss": 0.3528, "step": 21980 }, { "epoch": 1.008719195998348, "grad_norm": 0.45860937237739563, "learning_rate": 7.643390209053984e-06, "loss": 0.3573, "step": 21981 }, { "epoch": 1.0087650865036024, "grad_norm": 0.5526385307312012, "learning_rate": 7.643182084879195e-06, "loss": 0.4955, "step": 21982 }, { "epoch": 1.008810977008857, "grad_norm": 0.4804658591747284, "learning_rate": 7.642973954348387e-06, "loss": 0.3936, "step": 21983 }, { "epoch": 1.0088568675141114, "grad_norm": 0.4508894979953766, "learning_rate": 7.642765817462061e-06, "loss": 0.3517, "step": 21984 }, { "epoch": 1.0089027580193657, "grad_norm": 0.4676394462585449, "learning_rate": 7.642557674220717e-06, "loss": 0.3206, "step": 21985 }, { "epoch": 1.0089486485246202, "grad_norm": 0.4926576316356659, "learning_rate": 7.642349524624857e-06, "loss": 0.4203, "step": 21986 }, { "epoch": 1.0089945390298747, "grad_norm": 0.4182214140892029, "learning_rate": 7.64214136867498e-06, "loss": 0.2693, "step": 21987 }, { "epoch": 1.0090404295351292, "grad_norm": 0.4686330556869507, "learning_rate": 7.641933206371586e-06, "loss": 0.3717, "step": 21988 }, { "epoch": 1.0090863200403837, "grad_norm": 0.46840035915374756, "learning_rate": 7.641725037715175e-06, "loss": 0.3866, "step": 21989 }, { "epoch": 1.0091322105456382, "grad_norm": 0.4498589336872101, "learning_rate": 7.64151686270625e-06, "loss": 0.3249, "step": 21990 }, { "epoch": 1.0091781010508927, "grad_norm": 0.4583662450313568, "learning_rate": 7.641308681345311e-06, "loss": 0.3168, "step": 21991 }, { "epoch": 1.009223991556147, "grad_norm": 0.47554442286491394, "learning_rate": 7.64110049363286e-06, "loss": 0.4262, "step": 21992 }, { "epoch": 1.0092698820614014, "grad_norm": 0.4735945165157318, "learning_rate": 7.640892299569395e-06, "loss": 0.3788, "step": 21993 }, { "epoch": 1.009315772566656, "grad_norm": 0.4630342125892639, "learning_rate": 7.640684099155416e-06, "loss": 0.3648, "step": 21994 }, { "epoch": 1.0093616630719104, "grad_norm": 0.4587978422641754, "learning_rate": 7.640475892391425e-06, "loss": 0.3358, "step": 21995 }, { "epoch": 1.009407553577165, "grad_norm": 0.4704074561595917, "learning_rate": 7.640267679277926e-06, "loss": 0.3771, "step": 21996 }, { "epoch": 1.0094534440824194, "grad_norm": 0.4564983546733856, "learning_rate": 7.640059459815414e-06, "loss": 0.3881, "step": 21997 }, { "epoch": 1.0094993345876737, "grad_norm": 0.5114113092422485, "learning_rate": 7.639851234004392e-06, "loss": 0.424, "step": 21998 }, { "epoch": 1.0095452250929282, "grad_norm": 0.48488306999206543, "learning_rate": 7.639643001845363e-06, "loss": 0.3837, "step": 21999 }, { "epoch": 1.0095911155981827, "grad_norm": 0.5178372859954834, "learning_rate": 7.639434763338824e-06, "loss": 0.44, "step": 22000 }, { "epoch": 1.0096370061034372, "grad_norm": 0.42639902234077454, "learning_rate": 7.639226518485278e-06, "loss": 0.2889, "step": 22001 }, { "epoch": 1.0096828966086917, "grad_norm": 0.4664183557033539, "learning_rate": 7.639018267285224e-06, "loss": 0.3595, "step": 22002 }, { "epoch": 1.0097287871139462, "grad_norm": 0.5068734288215637, "learning_rate": 7.638810009739166e-06, "loss": 0.388, "step": 22003 }, { "epoch": 1.0097746776192005, "grad_norm": 0.4730684459209442, "learning_rate": 7.638601745847602e-06, "loss": 0.319, "step": 22004 }, { "epoch": 1.009820568124455, "grad_norm": 0.5206175446510315, "learning_rate": 7.638393475611032e-06, "loss": 0.3928, "step": 22005 }, { "epoch": 1.0098664586297095, "grad_norm": 0.474385529756546, "learning_rate": 7.638185199029959e-06, "loss": 0.3293, "step": 22006 }, { "epoch": 1.009912349134964, "grad_norm": 0.47156351804733276, "learning_rate": 7.637976916104886e-06, "loss": 0.3725, "step": 22007 }, { "epoch": 1.0099582396402185, "grad_norm": 0.44430676102638245, "learning_rate": 7.637768626836308e-06, "loss": 0.3003, "step": 22008 }, { "epoch": 1.010004130145473, "grad_norm": 0.4669210612773895, "learning_rate": 7.63756033122473e-06, "loss": 0.3869, "step": 22009 }, { "epoch": 1.0100500206507275, "grad_norm": 0.4672679901123047, "learning_rate": 7.637352029270653e-06, "loss": 0.3233, "step": 22010 }, { "epoch": 1.0100959111559817, "grad_norm": 0.4440314769744873, "learning_rate": 7.637143720974574e-06, "loss": 0.3169, "step": 22011 }, { "epoch": 1.0101418016612362, "grad_norm": 0.45189571380615234, "learning_rate": 7.636935406336998e-06, "loss": 0.2825, "step": 22012 }, { "epoch": 1.0101876921664907, "grad_norm": 0.4757099151611328, "learning_rate": 7.636727085358424e-06, "loss": 0.3569, "step": 22013 }, { "epoch": 1.0102335826717452, "grad_norm": 0.44854632019996643, "learning_rate": 7.636518758039356e-06, "loss": 0.3316, "step": 22014 }, { "epoch": 1.0102794731769997, "grad_norm": 0.4765455722808838, "learning_rate": 7.63631042438029e-06, "loss": 0.3827, "step": 22015 }, { "epoch": 1.0103253636822542, "grad_norm": 0.441520631313324, "learning_rate": 7.63610208438173e-06, "loss": 0.3495, "step": 22016 }, { "epoch": 1.0103712541875085, "grad_norm": 0.4542381465435028, "learning_rate": 7.635893738044176e-06, "loss": 0.3717, "step": 22017 }, { "epoch": 1.010417144692763, "grad_norm": 0.45801815390586853, "learning_rate": 7.635685385368132e-06, "loss": 0.3461, "step": 22018 }, { "epoch": 1.0104630351980175, "grad_norm": 0.46151214838027954, "learning_rate": 7.635477026354094e-06, "loss": 0.3385, "step": 22019 }, { "epoch": 1.010508925703272, "grad_norm": 0.4726400077342987, "learning_rate": 7.635268661002566e-06, "loss": 0.3682, "step": 22020 }, { "epoch": 1.0105548162085265, "grad_norm": 0.488136351108551, "learning_rate": 7.635060289314049e-06, "loss": 0.3808, "step": 22021 }, { "epoch": 1.010600706713781, "grad_norm": 0.4829421937465668, "learning_rate": 7.634851911289042e-06, "loss": 0.4273, "step": 22022 }, { "epoch": 1.0106465972190355, "grad_norm": 0.4190484583377838, "learning_rate": 7.63464352692805e-06, "loss": 0.2723, "step": 22023 }, { "epoch": 1.0106924877242898, "grad_norm": 0.47775572538375854, "learning_rate": 7.63443513623157e-06, "loss": 0.3372, "step": 22024 }, { "epoch": 1.0107383782295443, "grad_norm": 0.4634079933166504, "learning_rate": 7.634226739200108e-06, "loss": 0.3329, "step": 22025 }, { "epoch": 1.0107842687347988, "grad_norm": 0.459500253200531, "learning_rate": 7.634018335834158e-06, "loss": 0.3685, "step": 22026 }, { "epoch": 1.0108301592400533, "grad_norm": 0.44724348187446594, "learning_rate": 7.63380992613423e-06, "loss": 0.2979, "step": 22027 }, { "epoch": 1.0108760497453078, "grad_norm": 0.4745018184185028, "learning_rate": 7.633601510100817e-06, "loss": 0.3483, "step": 22028 }, { "epoch": 1.0109219402505623, "grad_norm": 0.5010080337524414, "learning_rate": 7.633393087734422e-06, "loss": 0.4189, "step": 22029 }, { "epoch": 1.0109678307558165, "grad_norm": 0.634324848651886, "learning_rate": 7.633184659035551e-06, "loss": 0.4415, "step": 22030 }, { "epoch": 1.011013721261071, "grad_norm": 0.45068231225013733, "learning_rate": 7.6329762240047e-06, "loss": 0.3038, "step": 22031 }, { "epoch": 1.0110596117663255, "grad_norm": 0.4984112083911896, "learning_rate": 7.632767782642374e-06, "loss": 0.4408, "step": 22032 }, { "epoch": 1.01110550227158, "grad_norm": 0.4544280171394348, "learning_rate": 7.632559334949072e-06, "loss": 0.3284, "step": 22033 }, { "epoch": 1.0111513927768345, "grad_norm": 0.4905652105808258, "learning_rate": 7.632350880925295e-06, "loss": 0.4332, "step": 22034 }, { "epoch": 1.011197283282089, "grad_norm": 0.46233922243118286, "learning_rate": 7.632142420571544e-06, "loss": 0.3227, "step": 22035 }, { "epoch": 1.0112431737873433, "grad_norm": 0.4680502712726593, "learning_rate": 7.631933953888324e-06, "loss": 0.3367, "step": 22036 }, { "epoch": 1.0112890642925978, "grad_norm": 0.47527310252189636, "learning_rate": 7.63172548087613e-06, "loss": 0.3461, "step": 22037 }, { "epoch": 1.0113349547978523, "grad_norm": 0.5120348334312439, "learning_rate": 7.63151700153547e-06, "loss": 0.4237, "step": 22038 }, { "epoch": 1.0113808453031068, "grad_norm": 0.44648757576942444, "learning_rate": 7.63130851586684e-06, "loss": 0.3164, "step": 22039 }, { "epoch": 1.0114267358083613, "grad_norm": 0.46843788027763367, "learning_rate": 7.631100023870743e-06, "loss": 0.3391, "step": 22040 }, { "epoch": 1.0114726263136158, "grad_norm": 0.4264986217021942, "learning_rate": 7.630891525547684e-06, "loss": 0.2909, "step": 22041 }, { "epoch": 1.0115185168188703, "grad_norm": 0.4607801139354706, "learning_rate": 7.63068302089816e-06, "loss": 0.3275, "step": 22042 }, { "epoch": 1.0115644073241246, "grad_norm": 0.4534933567047119, "learning_rate": 7.630474509922672e-06, "loss": 0.3536, "step": 22043 }, { "epoch": 1.011610297829379, "grad_norm": 0.4521688222885132, "learning_rate": 7.630265992621724e-06, "loss": 0.3343, "step": 22044 }, { "epoch": 1.0116561883346336, "grad_norm": 0.5108500123023987, "learning_rate": 7.630057468995815e-06, "loss": 0.3754, "step": 22045 }, { "epoch": 1.011702078839888, "grad_norm": 0.46493223309516907, "learning_rate": 7.62984893904545e-06, "loss": 0.3568, "step": 22046 }, { "epoch": 1.0117479693451426, "grad_norm": 0.47341030836105347, "learning_rate": 7.629640402771127e-06, "loss": 0.3782, "step": 22047 }, { "epoch": 1.011793859850397, "grad_norm": 0.4748906195163727, "learning_rate": 7.62943186017335e-06, "loss": 0.365, "step": 22048 }, { "epoch": 1.0118397503556513, "grad_norm": 0.4835397005081177, "learning_rate": 7.629223311252618e-06, "loss": 0.3636, "step": 22049 }, { "epoch": 1.0118856408609058, "grad_norm": 0.4603281617164612, "learning_rate": 7.629014756009434e-06, "loss": 0.3733, "step": 22050 }, { "epoch": 1.0119315313661603, "grad_norm": 0.44926467537879944, "learning_rate": 7.628806194444299e-06, "loss": 0.3839, "step": 22051 }, { "epoch": 1.0119774218714148, "grad_norm": 0.5048958659172058, "learning_rate": 7.628597626557717e-06, "loss": 0.382, "step": 22052 }, { "epoch": 1.0120233123766693, "grad_norm": 0.4764832854270935, "learning_rate": 7.628389052350185e-06, "loss": 0.3755, "step": 22053 }, { "epoch": 1.0120692028819238, "grad_norm": 0.4480188488960266, "learning_rate": 7.6281804718222065e-06, "loss": 0.3114, "step": 22054 }, { "epoch": 1.012115093387178, "grad_norm": 0.4723823666572571, "learning_rate": 7.627971884974286e-06, "loss": 0.3541, "step": 22055 }, { "epoch": 1.0121609838924326, "grad_norm": 0.4636901319026947, "learning_rate": 7.62776329180692e-06, "loss": 0.3137, "step": 22056 }, { "epoch": 1.012206874397687, "grad_norm": 0.4176749587059021, "learning_rate": 7.627554692320613e-06, "loss": 0.2731, "step": 22057 }, { "epoch": 1.0122527649029416, "grad_norm": 0.47738325595855713, "learning_rate": 7.627346086515868e-06, "loss": 0.3945, "step": 22058 }, { "epoch": 1.012298655408196, "grad_norm": 0.48821792006492615, "learning_rate": 7.627137474393183e-06, "loss": 0.4184, "step": 22059 }, { "epoch": 1.0123445459134506, "grad_norm": 0.49111419916152954, "learning_rate": 7.626928855953062e-06, "loss": 0.402, "step": 22060 }, { "epoch": 1.012390436418705, "grad_norm": 0.5020433068275452, "learning_rate": 7.626720231196006e-06, "loss": 0.4581, "step": 22061 }, { "epoch": 1.0124363269239594, "grad_norm": 0.49074047803878784, "learning_rate": 7.626511600122517e-06, "loss": 0.4346, "step": 22062 }, { "epoch": 1.0124822174292138, "grad_norm": 0.476683646440506, "learning_rate": 7.626302962733097e-06, "loss": 0.3367, "step": 22063 }, { "epoch": 1.0125281079344683, "grad_norm": 0.4653279185295105, "learning_rate": 7.626094319028248e-06, "loss": 0.3068, "step": 22064 }, { "epoch": 1.0125739984397228, "grad_norm": 0.4887130856513977, "learning_rate": 7.62588566900847e-06, "loss": 0.3564, "step": 22065 }, { "epoch": 1.0126198889449773, "grad_norm": 0.5384306907653809, "learning_rate": 7.625677012674264e-06, "loss": 0.4069, "step": 22066 }, { "epoch": 1.0126657794502318, "grad_norm": 0.43069005012512207, "learning_rate": 7.625468350026138e-06, "loss": 0.2997, "step": 22067 }, { "epoch": 1.0127116699554861, "grad_norm": 0.4755381643772125, "learning_rate": 7.625259681064585e-06, "loss": 0.3479, "step": 22068 }, { "epoch": 1.0127575604607406, "grad_norm": 0.47719621658325195, "learning_rate": 7.625051005790114e-06, "loss": 0.3845, "step": 22069 }, { "epoch": 1.0128034509659951, "grad_norm": 0.456089049577713, "learning_rate": 7.624842324203221e-06, "loss": 0.3396, "step": 22070 }, { "epoch": 1.0128493414712496, "grad_norm": 0.4463846683502197, "learning_rate": 7.624633636304411e-06, "loss": 0.3491, "step": 22071 }, { "epoch": 1.012895231976504, "grad_norm": 0.4942513108253479, "learning_rate": 7.624424942094188e-06, "loss": 0.3959, "step": 22072 }, { "epoch": 1.0129411224817586, "grad_norm": 0.5073105692863464, "learning_rate": 7.624216241573048e-06, "loss": 0.3661, "step": 22073 }, { "epoch": 1.0129870129870129, "grad_norm": 0.45593196153640747, "learning_rate": 7.624007534741499e-06, "loss": 0.3714, "step": 22074 }, { "epoch": 1.0130329034922674, "grad_norm": 0.45498326420783997, "learning_rate": 7.6237988216000394e-06, "loss": 0.3553, "step": 22075 }, { "epoch": 1.0130787939975219, "grad_norm": 0.4758533835411072, "learning_rate": 7.62359010214917e-06, "loss": 0.3803, "step": 22076 }, { "epoch": 1.0131246845027764, "grad_norm": 0.49344056844711304, "learning_rate": 7.623381376389396e-06, "loss": 0.4449, "step": 22077 }, { "epoch": 1.0131705750080309, "grad_norm": 0.42184823751449585, "learning_rate": 7.623172644321217e-06, "loss": 0.2739, "step": 22078 }, { "epoch": 1.0132164655132854, "grad_norm": 0.4517514407634735, "learning_rate": 7.6229639059451375e-06, "loss": 0.2948, "step": 22079 }, { "epoch": 1.0132623560185399, "grad_norm": 0.4632375240325928, "learning_rate": 7.622755161261655e-06, "loss": 0.4019, "step": 22080 }, { "epoch": 1.0133082465237941, "grad_norm": 0.38979053497314453, "learning_rate": 7.6225464102712766e-06, "loss": 0.2445, "step": 22081 }, { "epoch": 1.0133541370290486, "grad_norm": 0.50668865442276, "learning_rate": 7.6223376529745e-06, "loss": 0.4549, "step": 22082 }, { "epoch": 1.0134000275343031, "grad_norm": 0.477242648601532, "learning_rate": 7.622128889371829e-06, "loss": 0.3815, "step": 22083 }, { "epoch": 1.0134459180395576, "grad_norm": 0.4867419898509979, "learning_rate": 7.621920119463766e-06, "loss": 0.3795, "step": 22084 }, { "epoch": 1.0134918085448121, "grad_norm": 0.4102017879486084, "learning_rate": 7.621711343250813e-06, "loss": 0.2787, "step": 22085 }, { "epoch": 1.0135376990500666, "grad_norm": 0.4611830413341522, "learning_rate": 7.621502560733471e-06, "loss": 0.3307, "step": 22086 }, { "epoch": 1.013583589555321, "grad_norm": 0.4710094630718231, "learning_rate": 7.621293771912244e-06, "loss": 0.3803, "step": 22087 }, { "epoch": 1.0136294800605754, "grad_norm": 0.4376713037490845, "learning_rate": 7.621084976787632e-06, "loss": 0.3315, "step": 22088 }, { "epoch": 1.01367537056583, "grad_norm": 0.49071240425109863, "learning_rate": 7.620876175360137e-06, "loss": 0.3488, "step": 22089 }, { "epoch": 1.0137212610710844, "grad_norm": 0.46005964279174805, "learning_rate": 7.620667367630263e-06, "loss": 0.3383, "step": 22090 }, { "epoch": 1.013767151576339, "grad_norm": 0.44709059596061707, "learning_rate": 7.620458553598511e-06, "loss": 0.3598, "step": 22091 }, { "epoch": 1.0138130420815934, "grad_norm": 0.48072659969329834, "learning_rate": 7.620249733265383e-06, "loss": 0.3431, "step": 22092 }, { "epoch": 1.0138589325868477, "grad_norm": 0.4581223428249359, "learning_rate": 7.620040906631381e-06, "loss": 0.3581, "step": 22093 }, { "epoch": 1.0139048230921022, "grad_norm": 0.4809369146823883, "learning_rate": 7.6198320736970065e-06, "loss": 0.35, "step": 22094 }, { "epoch": 1.0139507135973567, "grad_norm": 0.5036215782165527, "learning_rate": 7.6196232344627655e-06, "loss": 0.4093, "step": 22095 }, { "epoch": 1.0139966041026112, "grad_norm": 0.5208719968795776, "learning_rate": 7.619414388929156e-06, "loss": 0.3769, "step": 22096 }, { "epoch": 1.0140424946078657, "grad_norm": 0.4754723906517029, "learning_rate": 7.619205537096681e-06, "loss": 0.3392, "step": 22097 }, { "epoch": 1.0140883851131202, "grad_norm": 0.4784787893295288, "learning_rate": 7.618996678965844e-06, "loss": 0.3603, "step": 22098 }, { "epoch": 1.0141342756183747, "grad_norm": 0.404583215713501, "learning_rate": 7.6187878145371475e-06, "loss": 0.2388, "step": 22099 }, { "epoch": 1.014180166123629, "grad_norm": 0.48957884311676025, "learning_rate": 7.618578943811092e-06, "loss": 0.4256, "step": 22100 }, { "epoch": 1.0142260566288834, "grad_norm": 0.46243757009506226, "learning_rate": 7.618370066788181e-06, "loss": 0.3476, "step": 22101 }, { "epoch": 1.014271947134138, "grad_norm": 0.47779539227485657, "learning_rate": 7.618161183468915e-06, "loss": 0.3823, "step": 22102 }, { "epoch": 1.0143178376393924, "grad_norm": 0.4467626214027405, "learning_rate": 7.617952293853798e-06, "loss": 0.3037, "step": 22103 }, { "epoch": 1.014363728144647, "grad_norm": 0.46788108348846436, "learning_rate": 7.617743397943333e-06, "loss": 0.3359, "step": 22104 }, { "epoch": 1.0144096186499014, "grad_norm": 0.46843335032463074, "learning_rate": 7.61753449573802e-06, "loss": 0.4208, "step": 22105 }, { "epoch": 1.0144555091551557, "grad_norm": 0.47303012013435364, "learning_rate": 7.617325587238365e-06, "loss": 0.3788, "step": 22106 }, { "epoch": 1.0145013996604102, "grad_norm": 0.5298566222190857, "learning_rate": 7.617116672444867e-06, "loss": 0.4689, "step": 22107 }, { "epoch": 1.0145472901656647, "grad_norm": 0.4463147819042206, "learning_rate": 7.616907751358027e-06, "loss": 0.3538, "step": 22108 }, { "epoch": 1.0145931806709192, "grad_norm": 0.4658949375152588, "learning_rate": 7.616698823978353e-06, "loss": 0.3709, "step": 22109 }, { "epoch": 1.0146390711761737, "grad_norm": 0.4489476978778839, "learning_rate": 7.616489890306341e-06, "loss": 0.3157, "step": 22110 }, { "epoch": 1.0146849616814282, "grad_norm": 0.4422892928123474, "learning_rate": 7.616280950342499e-06, "loss": 0.2934, "step": 22111 }, { "epoch": 1.0147308521866827, "grad_norm": 0.470743864774704, "learning_rate": 7.616072004087327e-06, "loss": 0.3352, "step": 22112 }, { "epoch": 1.014776742691937, "grad_norm": 0.493049681186676, "learning_rate": 7.615863051541325e-06, "loss": 0.4096, "step": 22113 }, { "epoch": 1.0148226331971915, "grad_norm": 0.44023841619491577, "learning_rate": 7.6156540927050005e-06, "loss": 0.2967, "step": 22114 }, { "epoch": 1.014868523702446, "grad_norm": 0.48853105306625366, "learning_rate": 7.615445127578853e-06, "loss": 0.3974, "step": 22115 }, { "epoch": 1.0149144142077005, "grad_norm": 0.4452958405017853, "learning_rate": 7.615236156163385e-06, "loss": 0.3108, "step": 22116 }, { "epoch": 1.014960304712955, "grad_norm": 0.5077585577964783, "learning_rate": 7.6150271784591e-06, "loss": 0.4559, "step": 22117 }, { "epoch": 1.0150061952182095, "grad_norm": 0.4653472602367401, "learning_rate": 7.6148181944665e-06, "loss": 0.3645, "step": 22118 }, { "epoch": 1.0150520857234637, "grad_norm": 0.4486215114593506, "learning_rate": 7.614609204186088e-06, "loss": 0.3269, "step": 22119 }, { "epoch": 1.0150979762287182, "grad_norm": 0.4825180470943451, "learning_rate": 7.6144002076183644e-06, "loss": 0.3551, "step": 22120 }, { "epoch": 1.0151438667339727, "grad_norm": 0.45526447892189026, "learning_rate": 7.614191204763834e-06, "loss": 0.3062, "step": 22121 }, { "epoch": 1.0151897572392272, "grad_norm": 0.4712159335613251, "learning_rate": 7.613982195623e-06, "loss": 0.3644, "step": 22122 }, { "epoch": 1.0152356477444817, "grad_norm": 0.45140278339385986, "learning_rate": 7.613773180196362e-06, "loss": 0.3637, "step": 22123 }, { "epoch": 1.0152815382497362, "grad_norm": 0.4519461989402771, "learning_rate": 7.613564158484426e-06, "loss": 0.3644, "step": 22124 }, { "epoch": 1.0153274287549905, "grad_norm": 0.4816216826438904, "learning_rate": 7.6133551304876905e-06, "loss": 0.3745, "step": 22125 }, { "epoch": 1.015373319260245, "grad_norm": 0.5087295174598694, "learning_rate": 7.613146096206663e-06, "loss": 0.4226, "step": 22126 }, { "epoch": 1.0154192097654995, "grad_norm": 0.4526520073413849, "learning_rate": 7.6129370556418424e-06, "loss": 0.3111, "step": 22127 }, { "epoch": 1.015465100270754, "grad_norm": 0.42599615454673767, "learning_rate": 7.612728008793735e-06, "loss": 0.2775, "step": 22128 }, { "epoch": 1.0155109907760085, "grad_norm": 0.4877137839794159, "learning_rate": 7.612518955662841e-06, "loss": 0.3461, "step": 22129 }, { "epoch": 1.015556881281263, "grad_norm": 0.47664228081703186, "learning_rate": 7.612309896249661e-06, "loss": 0.3419, "step": 22130 }, { "epoch": 1.0156027717865175, "grad_norm": 0.4753614664077759, "learning_rate": 7.612100830554703e-06, "loss": 0.3501, "step": 22131 }, { "epoch": 1.0156486622917718, "grad_norm": 0.5116774439811707, "learning_rate": 7.611891758578465e-06, "loss": 0.4237, "step": 22132 }, { "epoch": 1.0156945527970263, "grad_norm": 0.4641915261745453, "learning_rate": 7.6116826803214526e-06, "loss": 0.3205, "step": 22133 }, { "epoch": 1.0157404433022807, "grad_norm": 0.494960218667984, "learning_rate": 7.611473595784169e-06, "loss": 0.3826, "step": 22134 }, { "epoch": 1.0157863338075352, "grad_norm": 0.4413314461708069, "learning_rate": 7.611264504967113e-06, "loss": 0.2815, "step": 22135 }, { "epoch": 1.0158322243127897, "grad_norm": 0.48673513531684875, "learning_rate": 7.611055407870791e-06, "loss": 0.4043, "step": 22136 }, { "epoch": 1.0158781148180442, "grad_norm": 0.44349953532218933, "learning_rate": 7.610846304495705e-06, "loss": 0.3262, "step": 22137 }, { "epoch": 1.0159240053232985, "grad_norm": 0.4706258475780487, "learning_rate": 7.610637194842356e-06, "loss": 0.3706, "step": 22138 }, { "epoch": 1.015969895828553, "grad_norm": 0.4683552384376526, "learning_rate": 7.610428078911252e-06, "loss": 0.4074, "step": 22139 }, { "epoch": 1.0160157863338075, "grad_norm": 0.40438202023506165, "learning_rate": 7.610218956702889e-06, "loss": 0.2679, "step": 22140 }, { "epoch": 1.016061676839062, "grad_norm": 0.42577850818634033, "learning_rate": 7.610009828217775e-06, "loss": 0.306, "step": 22141 }, { "epoch": 1.0161075673443165, "grad_norm": 0.4815780818462372, "learning_rate": 7.60980069345641e-06, "loss": 0.3597, "step": 22142 }, { "epoch": 1.016153457849571, "grad_norm": 0.4786836802959442, "learning_rate": 7.609591552419299e-06, "loss": 0.3653, "step": 22143 }, { "epoch": 1.0161993483548253, "grad_norm": 0.453630268573761, "learning_rate": 7.609382405106945e-06, "loss": 0.3442, "step": 22144 }, { "epoch": 1.0162452388600798, "grad_norm": 0.4582040309906006, "learning_rate": 7.6091732515198465e-06, "loss": 0.3685, "step": 22145 }, { "epoch": 1.0162911293653343, "grad_norm": 0.45196786522865295, "learning_rate": 7.608964091658513e-06, "loss": 0.3028, "step": 22146 }, { "epoch": 1.0163370198705888, "grad_norm": 0.488829106092453, "learning_rate": 7.608754925523442e-06, "loss": 0.3768, "step": 22147 }, { "epoch": 1.0163829103758433, "grad_norm": 0.4882957935333252, "learning_rate": 7.608545753115139e-06, "loss": 0.3666, "step": 22148 }, { "epoch": 1.0164288008810978, "grad_norm": 0.5124367475509644, "learning_rate": 7.608336574434107e-06, "loss": 0.3818, "step": 22149 }, { "epoch": 1.0164746913863523, "grad_norm": 0.46642589569091797, "learning_rate": 7.608127389480851e-06, "loss": 0.3997, "step": 22150 }, { "epoch": 1.0165205818916065, "grad_norm": 0.5109196305274963, "learning_rate": 7.607918198255871e-06, "loss": 0.386, "step": 22151 }, { "epoch": 1.016566472396861, "grad_norm": 0.473605215549469, "learning_rate": 7.607709000759669e-06, "loss": 0.3946, "step": 22152 }, { "epoch": 1.0166123629021155, "grad_norm": 0.47982141375541687, "learning_rate": 7.60749979699275e-06, "loss": 0.3781, "step": 22153 }, { "epoch": 1.01665825340737, "grad_norm": 0.44807589054107666, "learning_rate": 7.607290586955618e-06, "loss": 0.3205, "step": 22154 }, { "epoch": 1.0167041439126245, "grad_norm": 0.47111403942108154, "learning_rate": 7.607081370648774e-06, "loss": 0.3551, "step": 22155 }, { "epoch": 1.016750034417879, "grad_norm": 0.4593442678451538, "learning_rate": 7.606872148072724e-06, "loss": 0.3392, "step": 22156 }, { "epoch": 1.0167959249231333, "grad_norm": 0.5090833306312561, "learning_rate": 7.606662919227967e-06, "loss": 0.3956, "step": 22157 }, { "epoch": 1.0168418154283878, "grad_norm": 0.4910643994808197, "learning_rate": 7.606453684115009e-06, "loss": 0.3442, "step": 22158 }, { "epoch": 1.0168877059336423, "grad_norm": 0.44392526149749756, "learning_rate": 7.606244442734353e-06, "loss": 0.3184, "step": 22159 }, { "epoch": 1.0169335964388968, "grad_norm": 0.42901718616485596, "learning_rate": 7.606035195086502e-06, "loss": 0.319, "step": 22160 }, { "epoch": 1.0169794869441513, "grad_norm": 0.4865281879901886, "learning_rate": 7.605825941171958e-06, "loss": 0.4078, "step": 22161 }, { "epoch": 1.0170253774494058, "grad_norm": 0.4496835470199585, "learning_rate": 7.605616680991226e-06, "loss": 0.3339, "step": 22162 }, { "epoch": 1.01707126795466, "grad_norm": 0.4560079574584961, "learning_rate": 7.605407414544807e-06, "loss": 0.3647, "step": 22163 }, { "epoch": 1.0171171584599146, "grad_norm": 0.4802283048629761, "learning_rate": 7.605198141833206e-06, "loss": 0.3636, "step": 22164 }, { "epoch": 1.017163048965169, "grad_norm": 0.47219231724739075, "learning_rate": 7.604988862856927e-06, "loss": 0.4009, "step": 22165 }, { "epoch": 1.0172089394704236, "grad_norm": 0.5257166028022766, "learning_rate": 7.604779577616471e-06, "loss": 0.4435, "step": 22166 }, { "epoch": 1.017254829975678, "grad_norm": 0.4396454989910126, "learning_rate": 7.604570286112342e-06, "loss": 0.3027, "step": 22167 }, { "epoch": 1.0173007204809326, "grad_norm": 0.4743387699127197, "learning_rate": 7.604360988345043e-06, "loss": 0.3754, "step": 22168 }, { "epoch": 1.017346610986187, "grad_norm": 0.49622318148612976, "learning_rate": 7.604151684315077e-06, "loss": 0.37, "step": 22169 }, { "epoch": 1.0173925014914413, "grad_norm": 0.4840548038482666, "learning_rate": 7.60394237402295e-06, "loss": 0.3448, "step": 22170 }, { "epoch": 1.0174383919966958, "grad_norm": 0.5096202492713928, "learning_rate": 7.603733057469163e-06, "loss": 0.4565, "step": 22171 }, { "epoch": 1.0174842825019503, "grad_norm": 0.4843490719795227, "learning_rate": 7.60352373465422e-06, "loss": 0.3535, "step": 22172 }, { "epoch": 1.0175301730072048, "grad_norm": 0.4585023820400238, "learning_rate": 7.603314405578624e-06, "loss": 0.342, "step": 22173 }, { "epoch": 1.0175760635124593, "grad_norm": 0.4567861258983612, "learning_rate": 7.6031050702428774e-06, "loss": 0.3399, "step": 22174 }, { "epoch": 1.0176219540177138, "grad_norm": 0.5014970302581787, "learning_rate": 7.602895728647486e-06, "loss": 0.4148, "step": 22175 }, { "epoch": 1.017667844522968, "grad_norm": 0.454193115234375, "learning_rate": 7.602686380792952e-06, "loss": 0.3307, "step": 22176 }, { "epoch": 1.0177137350282226, "grad_norm": 0.46800950169563293, "learning_rate": 7.602477026679778e-06, "loss": 0.3302, "step": 22177 }, { "epoch": 1.017759625533477, "grad_norm": 0.4763777256011963, "learning_rate": 7.602267666308467e-06, "loss": 0.3201, "step": 22178 }, { "epoch": 1.0178055160387316, "grad_norm": 0.49628642201423645, "learning_rate": 7.602058299679525e-06, "loss": 0.4226, "step": 22179 }, { "epoch": 1.017851406543986, "grad_norm": 0.4462299048900604, "learning_rate": 7.6018489267934535e-06, "loss": 0.3075, "step": 22180 }, { "epoch": 1.0178972970492406, "grad_norm": 0.4279725253582001, "learning_rate": 7.6016395476507565e-06, "loss": 0.3319, "step": 22181 }, { "epoch": 1.0179431875544949, "grad_norm": 0.47382137179374695, "learning_rate": 7.601430162251937e-06, "loss": 0.4164, "step": 22182 }, { "epoch": 1.0179890780597494, "grad_norm": 0.4706905484199524, "learning_rate": 7.6012207705975e-06, "loss": 0.3965, "step": 22183 }, { "epoch": 1.0180349685650039, "grad_norm": 0.4728710353374481, "learning_rate": 7.601011372687947e-06, "loss": 0.3989, "step": 22184 }, { "epoch": 1.0180808590702584, "grad_norm": 0.4665845036506653, "learning_rate": 7.6008019685237845e-06, "loss": 0.3428, "step": 22185 }, { "epoch": 1.0181267495755129, "grad_norm": 0.49987250566482544, "learning_rate": 7.600592558105513e-06, "loss": 0.3552, "step": 22186 }, { "epoch": 1.0181726400807674, "grad_norm": 0.4747629761695862, "learning_rate": 7.600383141433636e-06, "loss": 0.3458, "step": 22187 }, { "epoch": 1.0182185305860219, "grad_norm": 0.4755016565322876, "learning_rate": 7.600173718508661e-06, "loss": 0.3747, "step": 22188 }, { "epoch": 1.0182644210912761, "grad_norm": 0.4548684358596802, "learning_rate": 7.599964289331085e-06, "loss": 0.3263, "step": 22189 }, { "epoch": 1.0183103115965306, "grad_norm": 0.48045143485069275, "learning_rate": 7.599754853901418e-06, "loss": 0.3708, "step": 22190 }, { "epoch": 1.0183562021017851, "grad_norm": 0.4917643964290619, "learning_rate": 7.5995454122201615e-06, "loss": 0.3801, "step": 22191 }, { "epoch": 1.0184020926070396, "grad_norm": 0.4784308671951294, "learning_rate": 7.599335964287817e-06, "loss": 0.3704, "step": 22192 }, { "epoch": 1.0184479831122941, "grad_norm": 0.5150562524795532, "learning_rate": 7.59912651010489e-06, "loss": 0.3754, "step": 22193 }, { "epoch": 1.0184938736175486, "grad_norm": 0.47991785407066345, "learning_rate": 7.598917049671886e-06, "loss": 0.3675, "step": 22194 }, { "epoch": 1.018539764122803, "grad_norm": 0.45261263847351074, "learning_rate": 7.598707582989305e-06, "loss": 0.3431, "step": 22195 }, { "epoch": 1.0185856546280574, "grad_norm": 0.45501869916915894, "learning_rate": 7.598498110057653e-06, "loss": 0.3046, "step": 22196 }, { "epoch": 1.0186315451333119, "grad_norm": 0.5878657698631287, "learning_rate": 7.598288630877433e-06, "loss": 0.4706, "step": 22197 }, { "epoch": 1.0186774356385664, "grad_norm": 0.4284994602203369, "learning_rate": 7.59807914544915e-06, "loss": 0.2728, "step": 22198 }, { "epoch": 1.0187233261438209, "grad_norm": 0.48287561535835266, "learning_rate": 7.597869653773305e-06, "loss": 0.3283, "step": 22199 }, { "epoch": 1.0187692166490754, "grad_norm": 0.5041671395301819, "learning_rate": 7.597660155850404e-06, "loss": 0.422, "step": 22200 }, { "epoch": 1.0188151071543299, "grad_norm": 0.4774300754070282, "learning_rate": 7.59745065168095e-06, "loss": 0.3642, "step": 22201 }, { "epoch": 1.0188609976595842, "grad_norm": 0.4760906398296356, "learning_rate": 7.597241141265448e-06, "loss": 0.3888, "step": 22202 }, { "epoch": 1.0189068881648387, "grad_norm": 0.471473753452301, "learning_rate": 7.5970316246043985e-06, "loss": 0.4215, "step": 22203 }, { "epoch": 1.0189527786700932, "grad_norm": 0.45234355330467224, "learning_rate": 7.596822101698309e-06, "loss": 0.333, "step": 22204 }, { "epoch": 1.0189986691753476, "grad_norm": 0.49176210165023804, "learning_rate": 7.5966125725476836e-06, "loss": 0.4209, "step": 22205 }, { "epoch": 1.0190445596806021, "grad_norm": 0.5233871340751648, "learning_rate": 7.596403037153022e-06, "loss": 0.4668, "step": 22206 }, { "epoch": 1.0190904501858566, "grad_norm": 0.4806680977344513, "learning_rate": 7.596193495514831e-06, "loss": 0.3374, "step": 22207 }, { "epoch": 1.019136340691111, "grad_norm": 0.4347662031650543, "learning_rate": 7.595983947633614e-06, "loss": 0.3319, "step": 22208 }, { "epoch": 1.0191822311963654, "grad_norm": 0.44121137261390686, "learning_rate": 7.595774393509876e-06, "loss": 0.3411, "step": 22209 }, { "epoch": 1.01922812170162, "grad_norm": 0.447936087846756, "learning_rate": 7.595564833144119e-06, "loss": 0.3346, "step": 22210 }, { "epoch": 1.0192740122068744, "grad_norm": 0.4689260721206665, "learning_rate": 7.595355266536848e-06, "loss": 0.3576, "step": 22211 }, { "epoch": 1.019319902712129, "grad_norm": 0.5022258758544922, "learning_rate": 7.595145693688566e-06, "loss": 0.3895, "step": 22212 }, { "epoch": 1.0193657932173834, "grad_norm": 0.4731845557689667, "learning_rate": 7.594936114599778e-06, "loss": 0.3454, "step": 22213 }, { "epoch": 1.0194116837226377, "grad_norm": 0.4816189706325531, "learning_rate": 7.594726529270988e-06, "loss": 0.3797, "step": 22214 }, { "epoch": 1.0194575742278922, "grad_norm": 0.47558578848838806, "learning_rate": 7.594516937702701e-06, "loss": 0.3788, "step": 22215 }, { "epoch": 1.0195034647331467, "grad_norm": 0.7088468074798584, "learning_rate": 7.594307339895417e-06, "loss": 0.3268, "step": 22216 }, { "epoch": 1.0195493552384012, "grad_norm": 0.5633447170257568, "learning_rate": 7.594097735849644e-06, "loss": 0.4885, "step": 22217 }, { "epoch": 1.0195952457436557, "grad_norm": 0.4830838143825531, "learning_rate": 7.593888125565884e-06, "loss": 0.3362, "step": 22218 }, { "epoch": 1.0196411362489102, "grad_norm": 0.47700080275535583, "learning_rate": 7.593678509044642e-06, "loss": 0.3747, "step": 22219 }, { "epoch": 1.0196870267541647, "grad_norm": 0.4795612692832947, "learning_rate": 7.5934688862864236e-06, "loss": 0.3506, "step": 22220 }, { "epoch": 1.019732917259419, "grad_norm": 0.4981966018676758, "learning_rate": 7.593259257291727e-06, "loss": 0.4436, "step": 22221 }, { "epoch": 1.0197788077646734, "grad_norm": 0.5193784236907959, "learning_rate": 7.593049622061064e-06, "loss": 0.4309, "step": 22222 }, { "epoch": 1.019824698269928, "grad_norm": 0.458997905254364, "learning_rate": 7.592839980594934e-06, "loss": 0.3287, "step": 22223 }, { "epoch": 1.0198705887751824, "grad_norm": 0.4828920066356659, "learning_rate": 7.592630332893841e-06, "loss": 0.3525, "step": 22224 }, { "epoch": 1.019916479280437, "grad_norm": 0.4917052686214447, "learning_rate": 7.592420678958292e-06, "loss": 0.4105, "step": 22225 }, { "epoch": 1.0199623697856914, "grad_norm": 0.47202107310295105, "learning_rate": 7.592211018788789e-06, "loss": 0.3447, "step": 22226 }, { "epoch": 1.0200082602909457, "grad_norm": 0.4994295835494995, "learning_rate": 7.592001352385836e-06, "loss": 0.4273, "step": 22227 }, { "epoch": 1.0200541507962002, "grad_norm": 0.47072696685791016, "learning_rate": 7.591791679749938e-06, "loss": 0.3622, "step": 22228 }, { "epoch": 1.0201000413014547, "grad_norm": 0.46992817521095276, "learning_rate": 7.591582000881599e-06, "loss": 0.3538, "step": 22229 }, { "epoch": 1.0201459318067092, "grad_norm": 0.44903314113616943, "learning_rate": 7.591372315781324e-06, "loss": 0.3373, "step": 22230 }, { "epoch": 1.0201918223119637, "grad_norm": 0.4431670904159546, "learning_rate": 7.5911626244496155e-06, "loss": 0.3132, "step": 22231 }, { "epoch": 1.0202377128172182, "grad_norm": 0.463265061378479, "learning_rate": 7.5909529268869786e-06, "loss": 0.3407, "step": 22232 }, { "epoch": 1.0202836033224725, "grad_norm": 0.45582839846611023, "learning_rate": 7.590743223093918e-06, "loss": 0.3596, "step": 22233 }, { "epoch": 1.020329493827727, "grad_norm": 0.48671987652778625, "learning_rate": 7.590533513070937e-06, "loss": 0.3776, "step": 22234 }, { "epoch": 1.0203753843329815, "grad_norm": 0.49230876564979553, "learning_rate": 7.590323796818542e-06, "loss": 0.4038, "step": 22235 }, { "epoch": 1.020421274838236, "grad_norm": 0.4757263660430908, "learning_rate": 7.590114074337234e-06, "loss": 0.3632, "step": 22236 }, { "epoch": 1.0204671653434905, "grad_norm": 0.46610596776008606, "learning_rate": 7.589904345627521e-06, "loss": 0.3761, "step": 22237 }, { "epoch": 1.020513055848745, "grad_norm": 0.4523918926715851, "learning_rate": 7.589694610689904e-06, "loss": 0.3525, "step": 22238 }, { "epoch": 1.0205589463539995, "grad_norm": 0.48547810316085815, "learning_rate": 7.58948486952489e-06, "loss": 0.4132, "step": 22239 }, { "epoch": 1.0206048368592537, "grad_norm": 0.44026634097099304, "learning_rate": 7.58927512213298e-06, "loss": 0.3454, "step": 22240 }, { "epoch": 1.0206507273645082, "grad_norm": 0.46190929412841797, "learning_rate": 7.589065368514682e-06, "loss": 0.3051, "step": 22241 }, { "epoch": 1.0206966178697627, "grad_norm": 0.4953574240207672, "learning_rate": 7.5888556086705005e-06, "loss": 0.3709, "step": 22242 }, { "epoch": 1.0207425083750172, "grad_norm": 0.46077170968055725, "learning_rate": 7.588645842600936e-06, "loss": 0.3588, "step": 22243 }, { "epoch": 1.0207883988802717, "grad_norm": 0.5069426894187927, "learning_rate": 7.588436070306495e-06, "loss": 0.4364, "step": 22244 }, { "epoch": 1.0208342893855262, "grad_norm": 0.4488179683685303, "learning_rate": 7.588226291787683e-06, "loss": 0.3428, "step": 22245 }, { "epoch": 1.0208801798907805, "grad_norm": 0.4825003147125244, "learning_rate": 7.5880165070450044e-06, "loss": 0.356, "step": 22246 }, { "epoch": 1.020926070396035, "grad_norm": 0.432256817817688, "learning_rate": 7.587806716078963e-06, "loss": 0.2962, "step": 22247 }, { "epoch": 1.0209719609012895, "grad_norm": 0.4861150085926056, "learning_rate": 7.587596918890062e-06, "loss": 0.3505, "step": 22248 }, { "epoch": 1.021017851406544, "grad_norm": 0.4912828505039215, "learning_rate": 7.587387115478807e-06, "loss": 0.4064, "step": 22249 }, { "epoch": 1.0210637419117985, "grad_norm": 0.4781213104724884, "learning_rate": 7.587177305845703e-06, "loss": 0.3889, "step": 22250 }, { "epoch": 1.021109632417053, "grad_norm": 0.44183406233787537, "learning_rate": 7.586967489991255e-06, "loss": 0.2849, "step": 22251 }, { "epoch": 1.0211555229223073, "grad_norm": 0.45234328508377075, "learning_rate": 7.586757667915965e-06, "loss": 0.3247, "step": 22252 }, { "epoch": 1.0212014134275618, "grad_norm": 0.4553004503250122, "learning_rate": 7.586547839620342e-06, "loss": 0.3461, "step": 22253 }, { "epoch": 1.0212473039328163, "grad_norm": 0.4535216689109802, "learning_rate": 7.586338005104885e-06, "loss": 0.3283, "step": 22254 }, { "epoch": 1.0212931944380708, "grad_norm": 0.4335271120071411, "learning_rate": 7.586128164370101e-06, "loss": 0.3036, "step": 22255 }, { "epoch": 1.0213390849433253, "grad_norm": 0.4839659631252289, "learning_rate": 7.585918317416496e-06, "loss": 0.3481, "step": 22256 }, { "epoch": 1.0213849754485798, "grad_norm": 0.48041415214538574, "learning_rate": 7.585708464244574e-06, "loss": 0.3667, "step": 22257 }, { "epoch": 1.0214308659538343, "grad_norm": 0.45650431513786316, "learning_rate": 7.585498604854839e-06, "loss": 0.3721, "step": 22258 }, { "epoch": 1.0214767564590885, "grad_norm": 0.4898580014705658, "learning_rate": 7.585288739247797e-06, "loss": 0.3974, "step": 22259 }, { "epoch": 1.021522646964343, "grad_norm": 0.4334639310836792, "learning_rate": 7.5850788674239494e-06, "loss": 0.321, "step": 22260 }, { "epoch": 1.0215685374695975, "grad_norm": 0.531714916229248, "learning_rate": 7.584868989383804e-06, "loss": 0.4067, "step": 22261 }, { "epoch": 1.021614427974852, "grad_norm": 0.4242710769176483, "learning_rate": 7.584659105127865e-06, "loss": 0.2899, "step": 22262 }, { "epoch": 1.0216603184801065, "grad_norm": 0.42120546102523804, "learning_rate": 7.584449214656635e-06, "loss": 0.2983, "step": 22263 }, { "epoch": 1.021706208985361, "grad_norm": 0.4590929448604584, "learning_rate": 7.584239317970623e-06, "loss": 0.3597, "step": 22264 }, { "epoch": 1.0217520994906153, "grad_norm": 0.45939624309539795, "learning_rate": 7.5840294150703285e-06, "loss": 0.3646, "step": 22265 }, { "epoch": 1.0217979899958698, "grad_norm": 0.43377286195755005, "learning_rate": 7.5838195059562604e-06, "loss": 0.2648, "step": 22266 }, { "epoch": 1.0218438805011243, "grad_norm": 0.46497035026550293, "learning_rate": 7.583609590628922e-06, "loss": 0.3401, "step": 22267 }, { "epoch": 1.0218897710063788, "grad_norm": 0.45689284801483154, "learning_rate": 7.583399669088815e-06, "loss": 0.3344, "step": 22268 }, { "epoch": 1.0219356615116333, "grad_norm": 0.46664556860923767, "learning_rate": 7.58318974133645e-06, "loss": 0.3344, "step": 22269 }, { "epoch": 1.0219815520168878, "grad_norm": 0.4780256152153015, "learning_rate": 7.58297980737233e-06, "loss": 0.3253, "step": 22270 }, { "epoch": 1.022027442522142, "grad_norm": 0.46115368604660034, "learning_rate": 7.582769867196955e-06, "loss": 0.3519, "step": 22271 }, { "epoch": 1.0220733330273966, "grad_norm": 0.4536869525909424, "learning_rate": 7.582559920810837e-06, "loss": 0.3057, "step": 22272 }, { "epoch": 1.022119223532651, "grad_norm": 0.4997319281101227, "learning_rate": 7.582349968214475e-06, "loss": 0.4833, "step": 22273 }, { "epoch": 1.0221651140379056, "grad_norm": 0.4599466323852539, "learning_rate": 7.582140009408378e-06, "loss": 0.3722, "step": 22274 }, { "epoch": 1.02221100454316, "grad_norm": 0.4423424005508423, "learning_rate": 7.581930044393049e-06, "loss": 0.3337, "step": 22275 }, { "epoch": 1.0222568950484145, "grad_norm": 0.45354461669921875, "learning_rate": 7.5817200731689924e-06, "loss": 0.3306, "step": 22276 }, { "epoch": 1.022302785553669, "grad_norm": 0.4293654263019562, "learning_rate": 7.581510095736715e-06, "loss": 0.3303, "step": 22277 }, { "epoch": 1.0223486760589233, "grad_norm": 0.4467923939228058, "learning_rate": 7.581300112096719e-06, "loss": 0.3317, "step": 22278 }, { "epoch": 1.0223945665641778, "grad_norm": 0.42587533593177795, "learning_rate": 7.581090122249512e-06, "loss": 0.2877, "step": 22279 }, { "epoch": 1.0224404570694323, "grad_norm": 0.46021565794944763, "learning_rate": 7.5808801261955975e-06, "loss": 0.3167, "step": 22280 }, { "epoch": 1.0224863475746868, "grad_norm": 0.5043947100639343, "learning_rate": 7.580670123935482e-06, "loss": 0.4324, "step": 22281 }, { "epoch": 1.0225322380799413, "grad_norm": 0.45424848794937134, "learning_rate": 7.580460115469668e-06, "loss": 0.3529, "step": 22282 }, { "epoch": 1.0225781285851958, "grad_norm": 0.4748769998550415, "learning_rate": 7.5802501007986605e-06, "loss": 0.3872, "step": 22283 }, { "epoch": 1.02262401909045, "grad_norm": 0.5129307508468628, "learning_rate": 7.580040079922968e-06, "loss": 0.4047, "step": 22284 }, { "epoch": 1.0226699095957046, "grad_norm": 0.45659831166267395, "learning_rate": 7.579830052843093e-06, "loss": 0.3274, "step": 22285 }, { "epoch": 1.022715800100959, "grad_norm": 0.4234340488910675, "learning_rate": 7.579620019559542e-06, "loss": 0.2857, "step": 22286 }, { "epoch": 1.0227616906062136, "grad_norm": 0.4579523801803589, "learning_rate": 7.579409980072817e-06, "loss": 0.3701, "step": 22287 }, { "epoch": 1.022807581111468, "grad_norm": 0.46205469965934753, "learning_rate": 7.579199934383426e-06, "loss": 0.3143, "step": 22288 }, { "epoch": 1.0228534716167226, "grad_norm": 0.47239184379577637, "learning_rate": 7.578989882491871e-06, "loss": 0.3603, "step": 22289 }, { "epoch": 1.022899362121977, "grad_norm": 0.4593476951122284, "learning_rate": 7.5787798243986635e-06, "loss": 0.3271, "step": 22290 }, { "epoch": 1.0229452526272313, "grad_norm": 0.5322347283363342, "learning_rate": 7.578569760104302e-06, "loss": 0.4463, "step": 22291 }, { "epoch": 1.0229911431324858, "grad_norm": 0.45614251494407654, "learning_rate": 7.578359689609294e-06, "loss": 0.3366, "step": 22292 }, { "epoch": 1.0230370336377403, "grad_norm": 0.4433298408985138, "learning_rate": 7.578149612914145e-06, "loss": 0.3109, "step": 22293 }, { "epoch": 1.0230829241429948, "grad_norm": 0.4425281882286072, "learning_rate": 7.5779395300193606e-06, "loss": 0.2965, "step": 22294 }, { "epoch": 1.0231288146482493, "grad_norm": 0.5146587491035461, "learning_rate": 7.577729440925443e-06, "loss": 0.3712, "step": 22295 }, { "epoch": 1.0231747051535038, "grad_norm": 0.5087910294532776, "learning_rate": 7.577519345632902e-06, "loss": 0.4746, "step": 22296 }, { "epoch": 1.0232205956587581, "grad_norm": 0.48190248012542725, "learning_rate": 7.577309244142239e-06, "loss": 0.3913, "step": 22297 }, { "epoch": 1.0232664861640126, "grad_norm": 0.42928194999694824, "learning_rate": 7.577099136453962e-06, "loss": 0.3275, "step": 22298 }, { "epoch": 1.023312376669267, "grad_norm": 0.44376417994499207, "learning_rate": 7.576889022568575e-06, "loss": 0.3069, "step": 22299 }, { "epoch": 1.0233582671745216, "grad_norm": 0.46500691771507263, "learning_rate": 7.57667890248658e-06, "loss": 0.4016, "step": 22300 }, { "epoch": 1.023404157679776, "grad_norm": 0.45745527744293213, "learning_rate": 7.57646877620849e-06, "loss": 0.3742, "step": 22301 }, { "epoch": 1.0234500481850306, "grad_norm": 0.41581276059150696, "learning_rate": 7.576258643734804e-06, "loss": 0.2956, "step": 22302 }, { "epoch": 1.0234959386902849, "grad_norm": 0.46407440304756165, "learning_rate": 7.576048505066028e-06, "loss": 0.3286, "step": 22303 }, { "epoch": 1.0235418291955394, "grad_norm": 0.46160003542900085, "learning_rate": 7.575838360202669e-06, "loss": 0.3865, "step": 22304 }, { "epoch": 1.0235877197007939, "grad_norm": 0.46395233273506165, "learning_rate": 7.5756282091452315e-06, "loss": 0.3657, "step": 22305 }, { "epoch": 1.0236336102060484, "grad_norm": 0.470740407705307, "learning_rate": 7.575418051894221e-06, "loss": 0.3761, "step": 22306 }, { "epoch": 1.0236795007113029, "grad_norm": 0.4814998209476471, "learning_rate": 7.575207888450144e-06, "loss": 0.3611, "step": 22307 }, { "epoch": 1.0237253912165574, "grad_norm": 0.45941299200057983, "learning_rate": 7.574997718813504e-06, "loss": 0.3401, "step": 22308 }, { "epoch": 1.0237712817218119, "grad_norm": 0.47474756836891174, "learning_rate": 7.574787542984807e-06, "loss": 0.3656, "step": 22309 }, { "epoch": 1.0238171722270661, "grad_norm": 0.4592050015926361, "learning_rate": 7.574577360964559e-06, "loss": 0.3506, "step": 22310 }, { "epoch": 1.0238630627323206, "grad_norm": 0.4951978623867035, "learning_rate": 7.574367172753265e-06, "loss": 0.4082, "step": 22311 }, { "epoch": 1.0239089532375751, "grad_norm": 0.4819735288619995, "learning_rate": 7.57415697835143e-06, "loss": 0.3606, "step": 22312 }, { "epoch": 1.0239548437428296, "grad_norm": 0.5496799945831299, "learning_rate": 7.573946777759561e-06, "loss": 0.4771, "step": 22313 }, { "epoch": 1.0240007342480841, "grad_norm": 0.5177422761917114, "learning_rate": 7.573736570978161e-06, "loss": 0.414, "step": 22314 }, { "epoch": 1.0240466247533386, "grad_norm": 0.477649986743927, "learning_rate": 7.5735263580077365e-06, "loss": 0.4393, "step": 22315 }, { "epoch": 1.024092515258593, "grad_norm": 0.4308584928512573, "learning_rate": 7.573316138848794e-06, "loss": 0.2752, "step": 22316 }, { "epoch": 1.0241384057638474, "grad_norm": 0.45932307839393616, "learning_rate": 7.573105913501839e-06, "loss": 0.3971, "step": 22317 }, { "epoch": 1.024184296269102, "grad_norm": 0.46062222123146057, "learning_rate": 7.572895681967376e-06, "loss": 0.3178, "step": 22318 }, { "epoch": 1.0242301867743564, "grad_norm": 0.4273023009300232, "learning_rate": 7.572685444245911e-06, "loss": 0.3436, "step": 22319 }, { "epoch": 1.024276077279611, "grad_norm": 0.4688393175601959, "learning_rate": 7.57247520033795e-06, "loss": 0.4284, "step": 22320 }, { "epoch": 1.0243219677848654, "grad_norm": 0.44319379329681396, "learning_rate": 7.572264950243997e-06, "loss": 0.3157, "step": 22321 }, { "epoch": 1.0243678582901197, "grad_norm": 0.45848655700683594, "learning_rate": 7.5720546939645585e-06, "loss": 0.3344, "step": 22322 }, { "epoch": 1.0244137487953742, "grad_norm": 0.4423292875289917, "learning_rate": 7.571844431500142e-06, "loss": 0.2761, "step": 22323 }, { "epoch": 1.0244596393006287, "grad_norm": 0.44142836332321167, "learning_rate": 7.571634162851249e-06, "loss": 0.3337, "step": 22324 }, { "epoch": 1.0245055298058832, "grad_norm": 0.4918602705001831, "learning_rate": 7.571423888018388e-06, "loss": 0.409, "step": 22325 }, { "epoch": 1.0245514203111377, "grad_norm": 0.4588117003440857, "learning_rate": 7.571213607002064e-06, "loss": 0.3437, "step": 22326 }, { "epoch": 1.0245973108163922, "grad_norm": 0.44028419256210327, "learning_rate": 7.571003319802785e-06, "loss": 0.2856, "step": 22327 }, { "epoch": 1.0246432013216467, "grad_norm": 0.49826285243034363, "learning_rate": 7.570793026421052e-06, "loss": 0.4433, "step": 22328 }, { "epoch": 1.024689091826901, "grad_norm": 0.43680697679519653, "learning_rate": 7.570582726857374e-06, "loss": 0.3065, "step": 22329 }, { "epoch": 1.0247349823321554, "grad_norm": 0.4948607385158539, "learning_rate": 7.570372421112255e-06, "loss": 0.3827, "step": 22330 }, { "epoch": 1.02478087283741, "grad_norm": 0.462436318397522, "learning_rate": 7.570162109186203e-06, "loss": 0.3735, "step": 22331 }, { "epoch": 1.0248267633426644, "grad_norm": 0.5094931125640869, "learning_rate": 7.5699517910797216e-06, "loss": 0.4005, "step": 22332 }, { "epoch": 1.024872653847919, "grad_norm": 0.45026957988739014, "learning_rate": 7.569741466793317e-06, "loss": 0.2676, "step": 22333 }, { "epoch": 1.0249185443531734, "grad_norm": 0.5112370252609253, "learning_rate": 7.569531136327497e-06, "loss": 0.4344, "step": 22334 }, { "epoch": 1.0249644348584277, "grad_norm": 0.4346585273742676, "learning_rate": 7.569320799682765e-06, "loss": 0.328, "step": 22335 }, { "epoch": 1.0250103253636822, "grad_norm": 0.43592745065689087, "learning_rate": 7.569110456859625e-06, "loss": 0.3012, "step": 22336 }, { "epoch": 1.0250562158689367, "grad_norm": 0.4990173280239105, "learning_rate": 7.568900107858586e-06, "loss": 0.3874, "step": 22337 }, { "epoch": 1.0251021063741912, "grad_norm": 0.48806604743003845, "learning_rate": 7.568689752680154e-06, "loss": 0.4017, "step": 22338 }, { "epoch": 1.0251479968794457, "grad_norm": 0.5158664584159851, "learning_rate": 7.568479391324832e-06, "loss": 0.5076, "step": 22339 }, { "epoch": 1.0251938873847002, "grad_norm": 0.47117242217063904, "learning_rate": 7.568269023793131e-06, "loss": 0.3217, "step": 22340 }, { "epoch": 1.0252397778899545, "grad_norm": 0.5159891247749329, "learning_rate": 7.568058650085552e-06, "loss": 0.4878, "step": 22341 }, { "epoch": 1.025285668395209, "grad_norm": 0.47598549723625183, "learning_rate": 7.5678482702026016e-06, "loss": 0.3868, "step": 22342 }, { "epoch": 1.0253315589004635, "grad_norm": 0.4446183741092682, "learning_rate": 7.5676378841447875e-06, "loss": 0.3575, "step": 22343 }, { "epoch": 1.025377449405718, "grad_norm": 0.44090571999549866, "learning_rate": 7.567427491912613e-06, "loss": 0.2941, "step": 22344 }, { "epoch": 1.0254233399109725, "grad_norm": 0.48144838213920593, "learning_rate": 7.567217093506588e-06, "loss": 0.384, "step": 22345 }, { "epoch": 1.025469230416227, "grad_norm": 0.4567750096321106, "learning_rate": 7.567006688927217e-06, "loss": 0.2881, "step": 22346 }, { "epoch": 1.0255151209214814, "grad_norm": 0.511216938495636, "learning_rate": 7.566796278175002e-06, "loss": 0.4982, "step": 22347 }, { "epoch": 1.0255610114267357, "grad_norm": 0.46285387873649597, "learning_rate": 7.566585861250453e-06, "loss": 0.3172, "step": 22348 }, { "epoch": 1.0256069019319902, "grad_norm": 0.43159592151641846, "learning_rate": 7.566375438154075e-06, "loss": 0.2929, "step": 22349 }, { "epoch": 1.0256527924372447, "grad_norm": 0.48541539907455444, "learning_rate": 7.566165008886375e-06, "loss": 0.4359, "step": 22350 }, { "epoch": 1.0256986829424992, "grad_norm": 0.44930824637413025, "learning_rate": 7.565954573447857e-06, "loss": 0.3247, "step": 22351 }, { "epoch": 1.0257445734477537, "grad_norm": 0.4637567698955536, "learning_rate": 7.565744131839029e-06, "loss": 0.3545, "step": 22352 }, { "epoch": 1.0257904639530082, "grad_norm": 0.4864486753940582, "learning_rate": 7.565533684060396e-06, "loss": 0.3706, "step": 22353 }, { "epoch": 1.0258363544582625, "grad_norm": 0.47738155722618103, "learning_rate": 7.565323230112462e-06, "loss": 0.3973, "step": 22354 }, { "epoch": 1.025882244963517, "grad_norm": 0.4547047019004822, "learning_rate": 7.565112769995737e-06, "loss": 0.3622, "step": 22355 }, { "epoch": 1.0259281354687715, "grad_norm": 0.48292824625968933, "learning_rate": 7.5649023037107265e-06, "loss": 0.4028, "step": 22356 }, { "epoch": 1.025974025974026, "grad_norm": 0.4564628601074219, "learning_rate": 7.5646918312579345e-06, "loss": 0.3505, "step": 22357 }, { "epoch": 1.0260199164792805, "grad_norm": 0.4875357151031494, "learning_rate": 7.564481352637868e-06, "loss": 0.3746, "step": 22358 }, { "epoch": 1.026065806984535, "grad_norm": 0.484312504529953, "learning_rate": 7.5642708678510335e-06, "loss": 0.3452, "step": 22359 }, { "epoch": 1.0261116974897893, "grad_norm": 0.5036768913269043, "learning_rate": 7.564060376897935e-06, "loss": 0.3694, "step": 22360 }, { "epoch": 1.0261575879950438, "grad_norm": 0.48736995458602905, "learning_rate": 7.563849879779083e-06, "loss": 0.369, "step": 22361 }, { "epoch": 1.0262034785002982, "grad_norm": 0.4783438444137573, "learning_rate": 7.56363937649498e-06, "loss": 0.3026, "step": 22362 }, { "epoch": 1.0262493690055527, "grad_norm": 0.522567629814148, "learning_rate": 7.563428867046135e-06, "loss": 0.4368, "step": 22363 }, { "epoch": 1.0262952595108072, "grad_norm": 0.4245157539844513, "learning_rate": 7.563218351433051e-06, "loss": 0.2918, "step": 22364 }, { "epoch": 1.0263411500160617, "grad_norm": 0.4428459405899048, "learning_rate": 7.563007829656235e-06, "loss": 0.3266, "step": 22365 }, { "epoch": 1.0263870405213162, "grad_norm": 0.4249100089073181, "learning_rate": 7.562797301716196e-06, "loss": 0.2836, "step": 22366 }, { "epoch": 1.0264329310265705, "grad_norm": 0.4268397092819214, "learning_rate": 7.5625867676134375e-06, "loss": 0.3194, "step": 22367 }, { "epoch": 1.026478821531825, "grad_norm": 0.4480585753917694, "learning_rate": 7.562376227348467e-06, "loss": 0.3298, "step": 22368 }, { "epoch": 1.0265247120370795, "grad_norm": 0.46138545870780945, "learning_rate": 7.5621656809217915e-06, "loss": 0.3741, "step": 22369 }, { "epoch": 1.026570602542334, "grad_norm": 0.46135807037353516, "learning_rate": 7.5619551283339136e-06, "loss": 0.3364, "step": 22370 }, { "epoch": 1.0266164930475885, "grad_norm": 0.4899735748767853, "learning_rate": 7.561744569585343e-06, "loss": 0.3498, "step": 22371 }, { "epoch": 1.026662383552843, "grad_norm": 0.5253533124923706, "learning_rate": 7.561534004676585e-06, "loss": 0.4429, "step": 22372 }, { "epoch": 1.0267082740580973, "grad_norm": 0.44150033593177795, "learning_rate": 7.561323433608147e-06, "loss": 0.3043, "step": 22373 }, { "epoch": 1.0267541645633518, "grad_norm": 0.4299364387989044, "learning_rate": 7.561112856380535e-06, "loss": 0.3034, "step": 22374 }, { "epoch": 1.0268000550686063, "grad_norm": 0.45328256487846375, "learning_rate": 7.5609022729942546e-06, "loss": 0.3018, "step": 22375 }, { "epoch": 1.0268459455738608, "grad_norm": 0.4577149450778961, "learning_rate": 7.560691683449811e-06, "loss": 0.3207, "step": 22376 }, { "epoch": 1.0268918360791153, "grad_norm": 0.49876320362091064, "learning_rate": 7.5604810877477145e-06, "loss": 0.3512, "step": 22377 }, { "epoch": 1.0269377265843698, "grad_norm": 0.4864700734615326, "learning_rate": 7.560270485888468e-06, "loss": 0.3865, "step": 22378 }, { "epoch": 1.0269836170896243, "grad_norm": 0.4928778409957886, "learning_rate": 7.560059877872578e-06, "loss": 0.3884, "step": 22379 }, { "epoch": 1.0270295075948785, "grad_norm": 0.48024070262908936, "learning_rate": 7.5598492637005535e-06, "loss": 0.3936, "step": 22380 }, { "epoch": 1.027075398100133, "grad_norm": 0.5223364233970642, "learning_rate": 7.559638643372897e-06, "loss": 0.4064, "step": 22381 }, { "epoch": 1.0271212886053875, "grad_norm": 0.4410637319087982, "learning_rate": 7.559428016890119e-06, "loss": 0.3369, "step": 22382 }, { "epoch": 1.027167179110642, "grad_norm": 0.44987592101097107, "learning_rate": 7.559217384252725e-06, "loss": 0.3394, "step": 22383 }, { "epoch": 1.0272130696158965, "grad_norm": 0.4607445299625397, "learning_rate": 7.5590067454612215e-06, "loss": 0.2943, "step": 22384 }, { "epoch": 1.027258960121151, "grad_norm": 0.48052191734313965, "learning_rate": 7.558796100516113e-06, "loss": 0.4004, "step": 22385 }, { "epoch": 1.0273048506264053, "grad_norm": 0.45145803689956665, "learning_rate": 7.558585449417908e-06, "loss": 0.3537, "step": 22386 }, { "epoch": 1.0273507411316598, "grad_norm": 0.47235146164894104, "learning_rate": 7.5583747921671115e-06, "loss": 0.3792, "step": 22387 }, { "epoch": 1.0273966316369143, "grad_norm": 0.44858264923095703, "learning_rate": 7.5581641287642315e-06, "loss": 0.3211, "step": 22388 }, { "epoch": 1.0274425221421688, "grad_norm": 0.4441016614437103, "learning_rate": 7.557953459209775e-06, "loss": 0.3091, "step": 22389 }, { "epoch": 1.0274884126474233, "grad_norm": 0.4486677944660187, "learning_rate": 7.5577427835042476e-06, "loss": 0.3193, "step": 22390 }, { "epoch": 1.0275343031526778, "grad_norm": 0.437423437833786, "learning_rate": 7.557532101648155e-06, "loss": 0.3192, "step": 22391 }, { "epoch": 1.027580193657932, "grad_norm": 0.4621531367301941, "learning_rate": 7.557321413642006e-06, "loss": 0.3558, "step": 22392 }, { "epoch": 1.0276260841631866, "grad_norm": 0.44279295206069946, "learning_rate": 7.557110719486305e-06, "loss": 0.278, "step": 22393 }, { "epoch": 1.027671974668441, "grad_norm": 0.4348197877407074, "learning_rate": 7.556900019181561e-06, "loss": 0.307, "step": 22394 }, { "epoch": 1.0277178651736956, "grad_norm": 0.4530927836894989, "learning_rate": 7.556689312728279e-06, "loss": 0.3569, "step": 22395 }, { "epoch": 1.02776375567895, "grad_norm": 0.4591359794139862, "learning_rate": 7.556478600126965e-06, "loss": 0.3062, "step": 22396 }, { "epoch": 1.0278096461842046, "grad_norm": 0.4886361360549927, "learning_rate": 7.556267881378129e-06, "loss": 0.4112, "step": 22397 }, { "epoch": 1.027855536689459, "grad_norm": 0.4790240228176117, "learning_rate": 7.556057156482273e-06, "loss": 0.3501, "step": 22398 }, { "epoch": 1.0279014271947133, "grad_norm": 0.4777625799179077, "learning_rate": 7.555846425439908e-06, "loss": 0.3785, "step": 22399 }, { "epoch": 1.0279473176999678, "grad_norm": 0.4528575539588928, "learning_rate": 7.555635688251539e-06, "loss": 0.327, "step": 22400 }, { "epoch": 1.0279932082052223, "grad_norm": 0.43054521083831787, "learning_rate": 7.555424944917673e-06, "loss": 0.2655, "step": 22401 }, { "epoch": 1.0280390987104768, "grad_norm": 0.47178617119789124, "learning_rate": 7.555214195438815e-06, "loss": 0.3343, "step": 22402 }, { "epoch": 1.0280849892157313, "grad_norm": 0.44585275650024414, "learning_rate": 7.555003439815473e-06, "loss": 0.3348, "step": 22403 }, { "epoch": 1.0281308797209858, "grad_norm": 0.5001834630966187, "learning_rate": 7.554792678048156e-06, "loss": 0.3791, "step": 22404 }, { "epoch": 1.02817677022624, "grad_norm": 0.47962501645088196, "learning_rate": 7.554581910137369e-06, "loss": 0.3706, "step": 22405 }, { "epoch": 1.0282226607314946, "grad_norm": 0.46621567010879517, "learning_rate": 7.554371136083618e-06, "loss": 0.2978, "step": 22406 }, { "epoch": 1.028268551236749, "grad_norm": 0.46204936504364014, "learning_rate": 7.55416035588741e-06, "loss": 0.3623, "step": 22407 }, { "epoch": 1.0283144417420036, "grad_norm": 0.4533815085887909, "learning_rate": 7.553949569549253e-06, "loss": 0.362, "step": 22408 }, { "epoch": 1.028360332247258, "grad_norm": 0.4397274851799011, "learning_rate": 7.553738777069654e-06, "loss": 0.3278, "step": 22409 }, { "epoch": 1.0284062227525126, "grad_norm": 0.4385591745376587, "learning_rate": 7.5535279784491174e-06, "loss": 0.3379, "step": 22410 }, { "epoch": 1.0284521132577669, "grad_norm": 0.4620034098625183, "learning_rate": 7.553317173688155e-06, "loss": 0.3337, "step": 22411 }, { "epoch": 1.0284980037630214, "grad_norm": 0.4451151490211487, "learning_rate": 7.553106362787268e-06, "loss": 0.3408, "step": 22412 }, { "epoch": 1.0285438942682759, "grad_norm": 0.4579235315322876, "learning_rate": 7.552895545746966e-06, "loss": 0.3705, "step": 22413 }, { "epoch": 1.0285897847735304, "grad_norm": 0.45619964599609375, "learning_rate": 7.552684722567757e-06, "loss": 0.3306, "step": 22414 }, { "epoch": 1.0286356752787849, "grad_norm": 0.5159657597541809, "learning_rate": 7.552473893250146e-06, "loss": 0.3547, "step": 22415 }, { "epoch": 1.0286815657840394, "grad_norm": 0.46768614649772644, "learning_rate": 7.552263057794642e-06, "loss": 0.355, "step": 22416 }, { "epoch": 1.0287274562892939, "grad_norm": 0.4567314684391022, "learning_rate": 7.552052216201751e-06, "loss": 0.341, "step": 22417 }, { "epoch": 1.0287733467945481, "grad_norm": 0.49946001172065735, "learning_rate": 7.551841368471978e-06, "loss": 0.4013, "step": 22418 }, { "epoch": 1.0288192372998026, "grad_norm": 0.4695288836956024, "learning_rate": 7.551630514605832e-06, "loss": 0.3745, "step": 22419 }, { "epoch": 1.0288651278050571, "grad_norm": 0.4795888364315033, "learning_rate": 7.5514196546038224e-06, "loss": 0.3683, "step": 22420 }, { "epoch": 1.0289110183103116, "grad_norm": 0.48820048570632935, "learning_rate": 7.551208788466453e-06, "loss": 0.3745, "step": 22421 }, { "epoch": 1.0289569088155661, "grad_norm": 0.505426824092865, "learning_rate": 7.5509979161942304e-06, "loss": 0.386, "step": 22422 }, { "epoch": 1.0290027993208206, "grad_norm": 0.48134180903434753, "learning_rate": 7.550787037787664e-06, "loss": 0.3212, "step": 22423 }, { "epoch": 1.029048689826075, "grad_norm": 0.46590206027030945, "learning_rate": 7.550576153247259e-06, "loss": 0.4346, "step": 22424 }, { "epoch": 1.0290945803313294, "grad_norm": 0.4820304811000824, "learning_rate": 7.550365262573523e-06, "loss": 0.3483, "step": 22425 }, { "epoch": 1.0291404708365839, "grad_norm": 0.45988038182258606, "learning_rate": 7.550154365766964e-06, "loss": 0.3355, "step": 22426 }, { "epoch": 1.0291863613418384, "grad_norm": 0.44796645641326904, "learning_rate": 7.54994346282809e-06, "loss": 0.2933, "step": 22427 }, { "epoch": 1.0292322518470929, "grad_norm": 0.48457062244415283, "learning_rate": 7.549732553757405e-06, "loss": 0.3485, "step": 22428 }, { "epoch": 1.0292781423523474, "grad_norm": 0.4248168170452118, "learning_rate": 7.549521638555419e-06, "loss": 0.3095, "step": 22429 }, { "epoch": 1.0293240328576017, "grad_norm": 0.4504879117012024, "learning_rate": 7.549310717222636e-06, "loss": 0.3436, "step": 22430 }, { "epoch": 1.0293699233628562, "grad_norm": 0.5392913818359375, "learning_rate": 7.549099789759569e-06, "loss": 0.4611, "step": 22431 }, { "epoch": 1.0294158138681107, "grad_norm": 0.42346471548080444, "learning_rate": 7.548888856166718e-06, "loss": 0.2717, "step": 22432 }, { "epoch": 1.0294617043733651, "grad_norm": 0.49569517374038696, "learning_rate": 7.548677916444596e-06, "loss": 0.4101, "step": 22433 }, { "epoch": 1.0295075948786196, "grad_norm": 0.4461812674999237, "learning_rate": 7.548466970593707e-06, "loss": 0.3189, "step": 22434 }, { "epoch": 1.0295534853838741, "grad_norm": 0.4729243218898773, "learning_rate": 7.548256018614559e-06, "loss": 0.3769, "step": 22435 }, { "epoch": 1.0295993758891286, "grad_norm": 0.47125616669654846, "learning_rate": 7.548045060507659e-06, "loss": 0.4048, "step": 22436 }, { "epoch": 1.029645266394383, "grad_norm": 0.4643147587776184, "learning_rate": 7.547834096273516e-06, "loss": 0.3922, "step": 22437 }, { "epoch": 1.0296911568996374, "grad_norm": 0.502774715423584, "learning_rate": 7.547623125912637e-06, "loss": 0.4128, "step": 22438 }, { "epoch": 1.029737047404892, "grad_norm": 0.49359238147735596, "learning_rate": 7.5474121494255255e-06, "loss": 0.3545, "step": 22439 }, { "epoch": 1.0297829379101464, "grad_norm": 0.46166208386421204, "learning_rate": 7.547201166812694e-06, "loss": 0.3654, "step": 22440 }, { "epoch": 1.029828828415401, "grad_norm": 0.4602765738964081, "learning_rate": 7.546990178074648e-06, "loss": 0.3383, "step": 22441 }, { "epoch": 1.0298747189206554, "grad_norm": 0.4695824980735779, "learning_rate": 7.546779183211893e-06, "loss": 0.3435, "step": 22442 }, { "epoch": 1.0299206094259097, "grad_norm": 0.5052186250686646, "learning_rate": 7.5465681822249405e-06, "loss": 0.367, "step": 22443 }, { "epoch": 1.0299664999311642, "grad_norm": 0.4670850336551666, "learning_rate": 7.5463571751142915e-06, "loss": 0.3171, "step": 22444 }, { "epoch": 1.0300123904364187, "grad_norm": 0.48633328080177307, "learning_rate": 7.54614616188046e-06, "loss": 0.3527, "step": 22445 }, { "epoch": 1.0300582809416732, "grad_norm": 0.48769477009773254, "learning_rate": 7.545935142523948e-06, "loss": 0.4101, "step": 22446 }, { "epoch": 1.0301041714469277, "grad_norm": 0.5118875503540039, "learning_rate": 7.545724117045268e-06, "loss": 0.4122, "step": 22447 }, { "epoch": 1.0301500619521822, "grad_norm": 0.45789262652397156, "learning_rate": 7.545513085444924e-06, "loss": 0.3717, "step": 22448 }, { "epoch": 1.0301959524574364, "grad_norm": 0.4542714059352875, "learning_rate": 7.545302047723424e-06, "loss": 0.3339, "step": 22449 }, { "epoch": 1.030241842962691, "grad_norm": 0.4558868408203125, "learning_rate": 7.545091003881276e-06, "loss": 0.3077, "step": 22450 }, { "epoch": 1.0302877334679454, "grad_norm": 0.5119561553001404, "learning_rate": 7.544879953918988e-06, "loss": 0.395, "step": 22451 }, { "epoch": 1.0303336239732, "grad_norm": 1.0092087984085083, "learning_rate": 7.544668897837066e-06, "loss": 0.321, "step": 22452 }, { "epoch": 1.0303795144784544, "grad_norm": 0.5007705092430115, "learning_rate": 7.544457835636018e-06, "loss": 0.4672, "step": 22453 }, { "epoch": 1.030425404983709, "grad_norm": 0.4739861786365509, "learning_rate": 7.5442467673163546e-06, "loss": 0.3628, "step": 22454 }, { "epoch": 1.0304712954889634, "grad_norm": 0.45855095982551575, "learning_rate": 7.5440356928785786e-06, "loss": 0.3249, "step": 22455 }, { "epoch": 1.0305171859942177, "grad_norm": 0.4487762153148651, "learning_rate": 7.543824612323199e-06, "loss": 0.3154, "step": 22456 }, { "epoch": 1.0305630764994722, "grad_norm": 0.4607127606868744, "learning_rate": 7.543613525650726e-06, "loss": 0.3682, "step": 22457 }, { "epoch": 1.0306089670047267, "grad_norm": 0.45102253556251526, "learning_rate": 7.543402432861664e-06, "loss": 0.3564, "step": 22458 }, { "epoch": 1.0306548575099812, "grad_norm": 0.4379313588142395, "learning_rate": 7.543191333956524e-06, "loss": 0.3144, "step": 22459 }, { "epoch": 1.0307007480152357, "grad_norm": 0.4747353196144104, "learning_rate": 7.542980228935809e-06, "loss": 0.4, "step": 22460 }, { "epoch": 1.0307466385204902, "grad_norm": 0.4937050938606262, "learning_rate": 7.54276911780003e-06, "loss": 0.3691, "step": 22461 }, { "epoch": 1.0307925290257445, "grad_norm": 0.474519819021225, "learning_rate": 7.542558000549694e-06, "loss": 0.3733, "step": 22462 }, { "epoch": 1.030838419530999, "grad_norm": 0.49929216504096985, "learning_rate": 7.542346877185307e-06, "loss": 0.386, "step": 22463 }, { "epoch": 1.0308843100362535, "grad_norm": 0.4621559679508209, "learning_rate": 7.54213574770738e-06, "loss": 0.3374, "step": 22464 }, { "epoch": 1.030930200541508, "grad_norm": 0.4684692323207855, "learning_rate": 7.541924612116418e-06, "loss": 0.3907, "step": 22465 }, { "epoch": 1.0309760910467625, "grad_norm": 0.47210416197776794, "learning_rate": 7.54171347041293e-06, "loss": 0.3683, "step": 22466 }, { "epoch": 1.031021981552017, "grad_norm": 0.45705515146255493, "learning_rate": 7.541502322597423e-06, "loss": 0.3523, "step": 22467 }, { "epoch": 1.0310678720572715, "grad_norm": 0.4867582321166992, "learning_rate": 7.541291168670405e-06, "loss": 0.3563, "step": 22468 }, { "epoch": 1.0311137625625257, "grad_norm": 0.4607963562011719, "learning_rate": 7.541080008632383e-06, "loss": 0.3873, "step": 22469 }, { "epoch": 1.0311596530677802, "grad_norm": 0.44386789202690125, "learning_rate": 7.540868842483867e-06, "loss": 0.3271, "step": 22470 }, { "epoch": 1.0312055435730347, "grad_norm": 0.47724539041519165, "learning_rate": 7.540657670225363e-06, "loss": 0.3566, "step": 22471 }, { "epoch": 1.0312514340782892, "grad_norm": 0.4534601867198944, "learning_rate": 7.540446491857379e-06, "loss": 0.3138, "step": 22472 }, { "epoch": 1.0312973245835437, "grad_norm": 0.44626832008361816, "learning_rate": 7.540235307380422e-06, "loss": 0.3465, "step": 22473 }, { "epoch": 1.0313432150887982, "grad_norm": 0.49037685990333557, "learning_rate": 7.540024116795002e-06, "loss": 0.3913, "step": 22474 }, { "epoch": 1.0313891055940525, "grad_norm": 0.43536630272865295, "learning_rate": 7.539812920101626e-06, "loss": 0.3111, "step": 22475 }, { "epoch": 1.031434996099307, "grad_norm": 0.4773887097835541, "learning_rate": 7.539601717300802e-06, "loss": 0.3564, "step": 22476 }, { "epoch": 1.0314808866045615, "grad_norm": 0.41378337144851685, "learning_rate": 7.539390508393035e-06, "loss": 0.3069, "step": 22477 }, { "epoch": 1.031526777109816, "grad_norm": 0.4721636176109314, "learning_rate": 7.539179293378835e-06, "loss": 0.3578, "step": 22478 }, { "epoch": 1.0315726676150705, "grad_norm": 0.44223642349243164, "learning_rate": 7.538968072258713e-06, "loss": 0.3562, "step": 22479 }, { "epoch": 1.031618558120325, "grad_norm": 0.45466575026512146, "learning_rate": 7.538756845033173e-06, "loss": 0.3438, "step": 22480 }, { "epoch": 1.0316644486255793, "grad_norm": 0.44065895676612854, "learning_rate": 7.538545611702723e-06, "loss": 0.3637, "step": 22481 }, { "epoch": 1.0317103391308338, "grad_norm": 0.4750717580318451, "learning_rate": 7.538334372267875e-06, "loss": 0.3592, "step": 22482 }, { "epoch": 1.0317562296360883, "grad_norm": 0.6003902554512024, "learning_rate": 7.538123126729131e-06, "loss": 0.2882, "step": 22483 }, { "epoch": 1.0318021201413428, "grad_norm": 0.49109312891960144, "learning_rate": 7.5379118750870016e-06, "loss": 0.4042, "step": 22484 }, { "epoch": 1.0318480106465973, "grad_norm": 0.48089855909347534, "learning_rate": 7.537700617341996e-06, "loss": 0.3814, "step": 22485 }, { "epoch": 1.0318939011518518, "grad_norm": 0.4734475314617157, "learning_rate": 7.537489353494622e-06, "loss": 0.3347, "step": 22486 }, { "epoch": 1.031939791657106, "grad_norm": 0.4818091094493866, "learning_rate": 7.537278083545388e-06, "loss": 0.331, "step": 22487 }, { "epoch": 1.0319856821623605, "grad_norm": 0.47763901948928833, "learning_rate": 7.537066807494799e-06, "loss": 0.3867, "step": 22488 }, { "epoch": 1.032031572667615, "grad_norm": 0.45094922184944153, "learning_rate": 7.536855525343365e-06, "loss": 0.35, "step": 22489 }, { "epoch": 1.0320774631728695, "grad_norm": 0.4774303436279297, "learning_rate": 7.536644237091595e-06, "loss": 0.3475, "step": 22490 }, { "epoch": 1.032123353678124, "grad_norm": 0.4614737629890442, "learning_rate": 7.536432942739995e-06, "loss": 0.334, "step": 22491 }, { "epoch": 1.0321692441833785, "grad_norm": 0.44377779960632324, "learning_rate": 7.5362216422890766e-06, "loss": 0.3142, "step": 22492 }, { "epoch": 1.032215134688633, "grad_norm": 0.454022079706192, "learning_rate": 7.536010335739346e-06, "loss": 0.3599, "step": 22493 }, { "epoch": 1.0322610251938873, "grad_norm": 0.4996393322944641, "learning_rate": 7.535799023091308e-06, "loss": 0.418, "step": 22494 }, { "epoch": 1.0323069156991418, "grad_norm": 0.48204198479652405, "learning_rate": 7.535587704345474e-06, "loss": 0.4166, "step": 22495 }, { "epoch": 1.0323528062043963, "grad_norm": 0.4685451090335846, "learning_rate": 7.535376379502353e-06, "loss": 0.3339, "step": 22496 }, { "epoch": 1.0323986967096508, "grad_norm": 0.4433494806289673, "learning_rate": 7.535165048562452e-06, "loss": 0.3454, "step": 22497 }, { "epoch": 1.0324445872149053, "grad_norm": 0.4790070056915283, "learning_rate": 7.534953711526279e-06, "loss": 0.371, "step": 22498 }, { "epoch": 1.0324904777201598, "grad_norm": 0.516436755657196, "learning_rate": 7.5347423683943425e-06, "loss": 0.3939, "step": 22499 }, { "epoch": 1.032536368225414, "grad_norm": 0.44650667905807495, "learning_rate": 7.53453101916715e-06, "loss": 0.3283, "step": 22500 }, { "epoch": 1.0325822587306686, "grad_norm": 0.4918571412563324, "learning_rate": 7.534319663845209e-06, "loss": 0.4, "step": 22501 }, { "epoch": 1.032628149235923, "grad_norm": 0.4394564628601074, "learning_rate": 7.534108302429032e-06, "loss": 0.3027, "step": 22502 }, { "epoch": 1.0326740397411776, "grad_norm": 0.4654005765914917, "learning_rate": 7.533896934919122e-06, "loss": 0.3092, "step": 22503 }, { "epoch": 1.032719930246432, "grad_norm": 0.4264509677886963, "learning_rate": 7.533685561315991e-06, "loss": 0.2842, "step": 22504 }, { "epoch": 1.0327658207516865, "grad_norm": 0.43899038434028625, "learning_rate": 7.533474181620146e-06, "loss": 0.3363, "step": 22505 }, { "epoch": 1.032811711256941, "grad_norm": 0.4411478340625763, "learning_rate": 7.533262795832095e-06, "loss": 0.3095, "step": 22506 }, { "epoch": 1.0328576017621953, "grad_norm": 0.5276771187782288, "learning_rate": 7.533051403952345e-06, "loss": 0.4097, "step": 22507 }, { "epoch": 1.0329034922674498, "grad_norm": 0.501353919506073, "learning_rate": 7.532840005981407e-06, "loss": 0.3623, "step": 22508 }, { "epoch": 1.0329493827727043, "grad_norm": 0.4690876603126526, "learning_rate": 7.532628601919789e-06, "loss": 0.3719, "step": 22509 }, { "epoch": 1.0329952732779588, "grad_norm": 0.4469498097896576, "learning_rate": 7.5324171917679974e-06, "loss": 0.3235, "step": 22510 }, { "epoch": 1.0330411637832133, "grad_norm": 0.45745769143104553, "learning_rate": 7.532205775526542e-06, "loss": 0.3924, "step": 22511 }, { "epoch": 1.0330870542884678, "grad_norm": 0.4838915765285492, "learning_rate": 7.531994353195931e-06, "loss": 0.3445, "step": 22512 }, { "epoch": 1.033132944793722, "grad_norm": 0.4959193170070648, "learning_rate": 7.531782924776672e-06, "loss": 0.3854, "step": 22513 }, { "epoch": 1.0331788352989766, "grad_norm": 0.49049198627471924, "learning_rate": 7.531571490269276e-06, "loss": 0.3288, "step": 22514 }, { "epoch": 1.033224725804231, "grad_norm": 0.4525477886199951, "learning_rate": 7.531360049674248e-06, "loss": 0.3098, "step": 22515 }, { "epoch": 1.0332706163094856, "grad_norm": 0.4442325532436371, "learning_rate": 7.531148602992098e-06, "loss": 0.287, "step": 22516 }, { "epoch": 1.03331650681474, "grad_norm": 0.4822787344455719, "learning_rate": 7.530937150223334e-06, "loss": 0.412, "step": 22517 }, { "epoch": 1.0333623973199946, "grad_norm": 0.5001789331436157, "learning_rate": 7.530725691368466e-06, "loss": 0.3966, "step": 22518 }, { "epoch": 1.0334082878252489, "grad_norm": 0.4512426257133484, "learning_rate": 7.5305142264280005e-06, "loss": 0.3156, "step": 22519 }, { "epoch": 1.0334541783305033, "grad_norm": 0.4955967962741852, "learning_rate": 7.530302755402449e-06, "loss": 0.4016, "step": 22520 }, { "epoch": 1.0335000688357578, "grad_norm": 0.48659852147102356, "learning_rate": 7.530091278292315e-06, "loss": 0.404, "step": 22521 }, { "epoch": 1.0335459593410123, "grad_norm": 0.4869542717933655, "learning_rate": 7.529879795098113e-06, "loss": 0.3817, "step": 22522 }, { "epoch": 1.0335918498462668, "grad_norm": 0.510161817073822, "learning_rate": 7.529668305820346e-06, "loss": 0.4427, "step": 22523 }, { "epoch": 1.0336377403515213, "grad_norm": 0.4754716455936432, "learning_rate": 7.5294568104595276e-06, "loss": 0.3747, "step": 22524 }, { "epoch": 1.0336836308567758, "grad_norm": 0.5018302798271179, "learning_rate": 7.529245309016162e-06, "loss": 0.4186, "step": 22525 }, { "epoch": 1.0337295213620301, "grad_norm": 0.48589888215065, "learning_rate": 7.529033801490759e-06, "loss": 0.4025, "step": 22526 }, { "epoch": 1.0337754118672846, "grad_norm": 0.4735371768474579, "learning_rate": 7.5288222878838305e-06, "loss": 0.3753, "step": 22527 }, { "epoch": 1.033821302372539, "grad_norm": 0.46141767501831055, "learning_rate": 7.528610768195881e-06, "loss": 0.3529, "step": 22528 }, { "epoch": 1.0338671928777936, "grad_norm": 0.42864179611206055, "learning_rate": 7.52839924242742e-06, "loss": 0.3034, "step": 22529 }, { "epoch": 1.033913083383048, "grad_norm": 0.4565361738204956, "learning_rate": 7.528187710578958e-06, "loss": 0.313, "step": 22530 }, { "epoch": 1.0339589738883026, "grad_norm": 0.5064765810966492, "learning_rate": 7.527976172651002e-06, "loss": 0.3511, "step": 22531 }, { "epoch": 1.0340048643935569, "grad_norm": 0.4575657546520233, "learning_rate": 7.527764628644061e-06, "loss": 0.3425, "step": 22532 }, { "epoch": 1.0340507548988114, "grad_norm": 0.4638778567314148, "learning_rate": 7.5275530785586445e-06, "loss": 0.3382, "step": 22533 }, { "epoch": 1.0340966454040659, "grad_norm": 0.4642266035079956, "learning_rate": 7.52734152239526e-06, "loss": 0.3906, "step": 22534 }, { "epoch": 1.0341425359093204, "grad_norm": 0.47281980514526367, "learning_rate": 7.527129960154416e-06, "loss": 0.3753, "step": 22535 }, { "epoch": 1.0341884264145749, "grad_norm": 0.4999763071537018, "learning_rate": 7.526918391836624e-06, "loss": 0.4138, "step": 22536 }, { "epoch": 1.0342343169198294, "grad_norm": 0.457609087228775, "learning_rate": 7.526706817442388e-06, "loss": 0.3828, "step": 22537 }, { "epoch": 1.0342802074250836, "grad_norm": 0.4578341543674469, "learning_rate": 7.526495236972222e-06, "loss": 0.3289, "step": 22538 }, { "epoch": 1.0343260979303381, "grad_norm": 0.5044934749603271, "learning_rate": 7.526283650426631e-06, "loss": 0.4268, "step": 22539 }, { "epoch": 1.0343719884355926, "grad_norm": 0.46051138639450073, "learning_rate": 7.526072057806127e-06, "loss": 0.3223, "step": 22540 }, { "epoch": 1.0344178789408471, "grad_norm": 0.49362966418266296, "learning_rate": 7.525860459111215e-06, "loss": 0.3809, "step": 22541 }, { "epoch": 1.0344637694461016, "grad_norm": 0.4496925473213196, "learning_rate": 7.525648854342406e-06, "loss": 0.3493, "step": 22542 }, { "epoch": 1.0345096599513561, "grad_norm": 0.4411328434944153, "learning_rate": 7.525437243500209e-06, "loss": 0.3199, "step": 22543 }, { "epoch": 1.0345555504566106, "grad_norm": 0.4550195634365082, "learning_rate": 7.525225626585133e-06, "loss": 0.3437, "step": 22544 }, { "epoch": 1.034601440961865, "grad_norm": 0.44751280546188354, "learning_rate": 7.525014003597686e-06, "loss": 0.3359, "step": 22545 }, { "epoch": 1.0346473314671194, "grad_norm": 0.4736498296260834, "learning_rate": 7.524802374538378e-06, "loss": 0.4137, "step": 22546 }, { "epoch": 1.034693221972374, "grad_norm": 0.4899050295352936, "learning_rate": 7.524590739407715e-06, "loss": 0.3337, "step": 22547 }, { "epoch": 1.0347391124776284, "grad_norm": 0.47566068172454834, "learning_rate": 7.524379098206209e-06, "loss": 0.368, "step": 22548 }, { "epoch": 1.034785002982883, "grad_norm": 0.496096134185791, "learning_rate": 7.524167450934369e-06, "loss": 0.3804, "step": 22549 }, { "epoch": 1.0348308934881374, "grad_norm": 0.4961836636066437, "learning_rate": 7.523955797592703e-06, "loss": 0.3503, "step": 22550 }, { "epoch": 1.0348767839933917, "grad_norm": 0.4667801260948181, "learning_rate": 7.5237441381817185e-06, "loss": 0.3427, "step": 22551 }, { "epoch": 1.0349226744986462, "grad_norm": 0.522922933101654, "learning_rate": 7.523532472701926e-06, "loss": 0.4887, "step": 22552 }, { "epoch": 1.0349685650039007, "grad_norm": 0.4789958894252777, "learning_rate": 7.5233208011538364e-06, "loss": 0.3663, "step": 22553 }, { "epoch": 1.0350144555091552, "grad_norm": 0.4837660491466522, "learning_rate": 7.523109123537955e-06, "loss": 0.4513, "step": 22554 }, { "epoch": 1.0350603460144097, "grad_norm": 0.47886380553245544, "learning_rate": 7.52289743985479e-06, "loss": 0.3436, "step": 22555 }, { "epoch": 1.0351062365196642, "grad_norm": 0.5638324618339539, "learning_rate": 7.522685750104857e-06, "loss": 0.3796, "step": 22556 }, { "epoch": 1.0351521270249187, "grad_norm": 0.5084211230278015, "learning_rate": 7.52247405428866e-06, "loss": 0.3719, "step": 22557 }, { "epoch": 1.035198017530173, "grad_norm": 0.4882853627204895, "learning_rate": 7.5222623524067086e-06, "loss": 0.4071, "step": 22558 }, { "epoch": 1.0352439080354274, "grad_norm": 0.5179264545440674, "learning_rate": 7.5220506444595125e-06, "loss": 0.352, "step": 22559 }, { "epoch": 1.035289798540682, "grad_norm": 0.5166230797767639, "learning_rate": 7.521838930447579e-06, "loss": 0.3289, "step": 22560 }, { "epoch": 1.0353356890459364, "grad_norm": 0.4785296320915222, "learning_rate": 7.521627210371421e-06, "loss": 0.292, "step": 22561 }, { "epoch": 1.035381579551191, "grad_norm": 0.46852248907089233, "learning_rate": 7.521415484231544e-06, "loss": 0.3439, "step": 22562 }, { "epoch": 1.0354274700564454, "grad_norm": 0.45344555377960205, "learning_rate": 7.521203752028459e-06, "loss": 0.3172, "step": 22563 }, { "epoch": 1.0354733605616997, "grad_norm": 0.48477643728256226, "learning_rate": 7.5209920137626755e-06, "loss": 0.3688, "step": 22564 }, { "epoch": 1.0355192510669542, "grad_norm": 0.4915164113044739, "learning_rate": 7.5207802694347e-06, "loss": 0.3235, "step": 22565 }, { "epoch": 1.0355651415722087, "grad_norm": 0.5388010740280151, "learning_rate": 7.520568519045044e-06, "loss": 0.3831, "step": 22566 }, { "epoch": 1.0356110320774632, "grad_norm": 0.6704277396202087, "learning_rate": 7.520356762594218e-06, "loss": 0.2776, "step": 22567 }, { "epoch": 1.0356569225827177, "grad_norm": 0.48292919993400574, "learning_rate": 7.520145000082728e-06, "loss": 0.3918, "step": 22568 }, { "epoch": 1.0357028130879722, "grad_norm": 0.4321908950805664, "learning_rate": 7.519933231511085e-06, "loss": 0.3141, "step": 22569 }, { "epoch": 1.0357487035932265, "grad_norm": 0.4793359935283661, "learning_rate": 7.519721456879798e-06, "loss": 0.4296, "step": 22570 }, { "epoch": 1.035794594098481, "grad_norm": 0.4791930317878723, "learning_rate": 7.519509676189375e-06, "loss": 0.3902, "step": 22571 }, { "epoch": 1.0358404846037355, "grad_norm": 0.4553140103816986, "learning_rate": 7.519297889440327e-06, "loss": 0.3165, "step": 22572 }, { "epoch": 1.03588637510899, "grad_norm": 0.4684256315231323, "learning_rate": 7.519086096633163e-06, "loss": 0.4146, "step": 22573 }, { "epoch": 1.0359322656142445, "grad_norm": 0.47407516837120056, "learning_rate": 7.518874297768392e-06, "loss": 0.309, "step": 22574 }, { "epoch": 1.035978156119499, "grad_norm": 0.46104568243026733, "learning_rate": 7.518662492846524e-06, "loss": 0.3538, "step": 22575 }, { "epoch": 1.0360240466247532, "grad_norm": 0.455872118473053, "learning_rate": 7.518450681868067e-06, "loss": 0.348, "step": 22576 }, { "epoch": 1.0360699371300077, "grad_norm": 0.4080817997455597, "learning_rate": 7.51823886483353e-06, "loss": 0.2732, "step": 22577 }, { "epoch": 1.0361158276352622, "grad_norm": 0.4523342549800873, "learning_rate": 7.5180270417434256e-06, "loss": 0.371, "step": 22578 }, { "epoch": 1.0361617181405167, "grad_norm": 0.5079509019851685, "learning_rate": 7.5178152125982585e-06, "loss": 0.3771, "step": 22579 }, { "epoch": 1.0362076086457712, "grad_norm": 0.5014461278915405, "learning_rate": 7.517603377398541e-06, "loss": 0.4585, "step": 22580 }, { "epoch": 1.0362534991510257, "grad_norm": 0.4383416473865509, "learning_rate": 7.517391536144783e-06, "loss": 0.3451, "step": 22581 }, { "epoch": 1.0362993896562802, "grad_norm": 0.492683082818985, "learning_rate": 7.517179688837493e-06, "loss": 0.3875, "step": 22582 }, { "epoch": 1.0363452801615345, "grad_norm": 0.4581277668476105, "learning_rate": 7.516967835477178e-06, "loss": 0.323, "step": 22583 }, { "epoch": 1.036391170666789, "grad_norm": 0.4390082359313965, "learning_rate": 7.516755976064352e-06, "loss": 0.3261, "step": 22584 }, { "epoch": 1.0364370611720435, "grad_norm": 0.48436179757118225, "learning_rate": 7.5165441105995215e-06, "loss": 0.3806, "step": 22585 }, { "epoch": 1.036482951677298, "grad_norm": 0.4714542329311371, "learning_rate": 7.516332239083196e-06, "loss": 0.3279, "step": 22586 }, { "epoch": 1.0365288421825525, "grad_norm": 0.4567766785621643, "learning_rate": 7.516120361515886e-06, "loss": 0.3457, "step": 22587 }, { "epoch": 1.036574732687807, "grad_norm": 0.4897473156452179, "learning_rate": 7.515908477898102e-06, "loss": 0.3952, "step": 22588 }, { "epoch": 1.0366206231930613, "grad_norm": 0.4591223895549774, "learning_rate": 7.51569658823035e-06, "loss": 0.3352, "step": 22589 }, { "epoch": 1.0366665136983158, "grad_norm": 0.425460547208786, "learning_rate": 7.515484692513144e-06, "loss": 0.2697, "step": 22590 }, { "epoch": 1.0367124042035702, "grad_norm": 0.4897994101047516, "learning_rate": 7.5152727907469895e-06, "loss": 0.3967, "step": 22591 }, { "epoch": 1.0367582947088247, "grad_norm": 0.45869019627571106, "learning_rate": 7.515060882932399e-06, "loss": 0.3638, "step": 22592 }, { "epoch": 1.0368041852140792, "grad_norm": 0.46208545565605164, "learning_rate": 7.514848969069879e-06, "loss": 0.3789, "step": 22593 }, { "epoch": 1.0368500757193337, "grad_norm": 0.45192182064056396, "learning_rate": 7.514637049159941e-06, "loss": 0.301, "step": 22594 }, { "epoch": 1.0368959662245882, "grad_norm": 0.476268470287323, "learning_rate": 7.5144251232030975e-06, "loss": 0.3269, "step": 22595 }, { "epoch": 1.0369418567298425, "grad_norm": 0.4789285361766815, "learning_rate": 7.5142131911998524e-06, "loss": 0.2842, "step": 22596 }, { "epoch": 1.036987747235097, "grad_norm": 0.533838152885437, "learning_rate": 7.514001253150719e-06, "loss": 0.3816, "step": 22597 }, { "epoch": 1.0370336377403515, "grad_norm": 0.4740827679634094, "learning_rate": 7.513789309056205e-06, "loss": 0.3739, "step": 22598 }, { "epoch": 1.037079528245606, "grad_norm": 0.4522237479686737, "learning_rate": 7.513577358916822e-06, "loss": 0.3388, "step": 22599 }, { "epoch": 1.0371254187508605, "grad_norm": 0.4588858187198639, "learning_rate": 7.5133654027330795e-06, "loss": 0.3045, "step": 22600 }, { "epoch": 1.037171309256115, "grad_norm": 0.5120354294776917, "learning_rate": 7.513153440505486e-06, "loss": 0.4203, "step": 22601 }, { "epoch": 1.0372171997613693, "grad_norm": 0.4492606818675995, "learning_rate": 7.512941472234552e-06, "loss": 0.3582, "step": 22602 }, { "epoch": 1.0372630902666238, "grad_norm": 0.5001031756401062, "learning_rate": 7.512729497920786e-06, "loss": 0.4383, "step": 22603 }, { "epoch": 1.0373089807718783, "grad_norm": 0.4281148910522461, "learning_rate": 7.512517517564698e-06, "loss": 0.301, "step": 22604 }, { "epoch": 1.0373548712771328, "grad_norm": 0.46850812435150146, "learning_rate": 7.512305531166799e-06, "loss": 0.3786, "step": 22605 }, { "epoch": 1.0374007617823873, "grad_norm": 0.44978564977645874, "learning_rate": 7.5120935387275984e-06, "loss": 0.3337, "step": 22606 }, { "epoch": 1.0374466522876418, "grad_norm": 0.4837632179260254, "learning_rate": 7.511881540247607e-06, "loss": 0.4107, "step": 22607 }, { "epoch": 1.037492542792896, "grad_norm": 0.7123207449913025, "learning_rate": 7.511669535727331e-06, "loss": 0.4368, "step": 22608 }, { "epoch": 1.0375384332981505, "grad_norm": 0.4978242516517639, "learning_rate": 7.511457525167283e-06, "loss": 0.3347, "step": 22609 }, { "epoch": 1.037584323803405, "grad_norm": 0.4432408809661865, "learning_rate": 7.5112455085679725e-06, "loss": 0.3154, "step": 22610 }, { "epoch": 1.0376302143086595, "grad_norm": 0.5042496919631958, "learning_rate": 7.511033485929908e-06, "loss": 0.3367, "step": 22611 }, { "epoch": 1.037676104813914, "grad_norm": 0.5159973502159119, "learning_rate": 7.510821457253603e-06, "loss": 0.4577, "step": 22612 }, { "epoch": 1.0377219953191685, "grad_norm": 0.4588887393474579, "learning_rate": 7.510609422539563e-06, "loss": 0.283, "step": 22613 }, { "epoch": 1.037767885824423, "grad_norm": 0.4738307297229767, "learning_rate": 7.5103973817883e-06, "loss": 0.3621, "step": 22614 }, { "epoch": 1.0378137763296773, "grad_norm": 0.4923470914363861, "learning_rate": 7.510185335000324e-06, "loss": 0.3967, "step": 22615 }, { "epoch": 1.0378596668349318, "grad_norm": 0.44362521171569824, "learning_rate": 7.509973282176144e-06, "loss": 0.3437, "step": 22616 }, { "epoch": 1.0379055573401863, "grad_norm": 0.4708520770072937, "learning_rate": 7.509761223316272e-06, "loss": 0.37, "step": 22617 }, { "epoch": 1.0379514478454408, "grad_norm": 0.42257457971572876, "learning_rate": 7.509549158421215e-06, "loss": 0.3076, "step": 22618 }, { "epoch": 1.0379973383506953, "grad_norm": 0.44826650619506836, "learning_rate": 7.509337087491484e-06, "loss": 0.3493, "step": 22619 }, { "epoch": 1.0380432288559498, "grad_norm": 0.5177218317985535, "learning_rate": 7.509125010527589e-06, "loss": 0.4741, "step": 22620 }, { "epoch": 1.038089119361204, "grad_norm": 0.45172804594039917, "learning_rate": 7.508912927530041e-06, "loss": 0.3258, "step": 22621 }, { "epoch": 1.0381350098664586, "grad_norm": 0.47864997386932373, "learning_rate": 7.50870083849935e-06, "loss": 0.366, "step": 22622 }, { "epoch": 1.038180900371713, "grad_norm": 0.4468690752983093, "learning_rate": 7.508488743436025e-06, "loss": 0.3334, "step": 22623 }, { "epoch": 1.0382267908769676, "grad_norm": 0.4550061821937561, "learning_rate": 7.508276642340575e-06, "loss": 0.3672, "step": 22624 }, { "epoch": 1.038272681382222, "grad_norm": 0.4632030129432678, "learning_rate": 7.5080645352135115e-06, "loss": 0.3787, "step": 22625 }, { "epoch": 1.0383185718874766, "grad_norm": 0.5070819854736328, "learning_rate": 7.507852422055346e-06, "loss": 0.4104, "step": 22626 }, { "epoch": 1.0383644623927308, "grad_norm": 0.43967822194099426, "learning_rate": 7.507640302866586e-06, "loss": 0.3264, "step": 22627 }, { "epoch": 1.0384103528979853, "grad_norm": 0.452687531709671, "learning_rate": 7.507428177647742e-06, "loss": 0.2952, "step": 22628 }, { "epoch": 1.0384562434032398, "grad_norm": 0.5061430335044861, "learning_rate": 7.507216046399326e-06, "loss": 0.4171, "step": 22629 }, { "epoch": 1.0385021339084943, "grad_norm": 0.4669508635997772, "learning_rate": 7.507003909121844e-06, "loss": 0.353, "step": 22630 }, { "epoch": 1.0385480244137488, "grad_norm": 0.46823179721832275, "learning_rate": 7.506791765815811e-06, "loss": 0.3541, "step": 22631 }, { "epoch": 1.0385939149190033, "grad_norm": 0.4741915762424469, "learning_rate": 7.506579616481734e-06, "loss": 0.3441, "step": 22632 }, { "epoch": 1.0386398054242578, "grad_norm": 0.4528752863407135, "learning_rate": 7.506367461120124e-06, "loss": 0.3231, "step": 22633 }, { "epoch": 1.038685695929512, "grad_norm": 0.4942726492881775, "learning_rate": 7.5061552997314925e-06, "loss": 0.3764, "step": 22634 }, { "epoch": 1.0387315864347666, "grad_norm": 0.5069977641105652, "learning_rate": 7.505943132316349e-06, "loss": 0.4211, "step": 22635 }, { "epoch": 1.038777476940021, "grad_norm": 0.4757709205150604, "learning_rate": 7.505730958875202e-06, "loss": 0.3672, "step": 22636 }, { "epoch": 1.0388233674452756, "grad_norm": 0.4170849025249481, "learning_rate": 7.50551877940856e-06, "loss": 0.2808, "step": 22637 }, { "epoch": 1.03886925795053, "grad_norm": 0.4478960633277893, "learning_rate": 7.505306593916941e-06, "loss": 0.31, "step": 22638 }, { "epoch": 1.0389151484557846, "grad_norm": 0.48747068643569946, "learning_rate": 7.5050944024008475e-06, "loss": 0.4127, "step": 22639 }, { "epoch": 1.0389610389610389, "grad_norm": 0.4746107757091522, "learning_rate": 7.504882204860795e-06, "loss": 0.3455, "step": 22640 }, { "epoch": 1.0390069294662934, "grad_norm": 0.43102437257766724, "learning_rate": 7.504670001297289e-06, "loss": 0.2999, "step": 22641 }, { "epoch": 1.0390528199715479, "grad_norm": 0.5030134320259094, "learning_rate": 7.504457791710842e-06, "loss": 0.4436, "step": 22642 }, { "epoch": 1.0390987104768024, "grad_norm": 0.5042721629142761, "learning_rate": 7.504245576101966e-06, "loss": 0.387, "step": 22643 }, { "epoch": 1.0391446009820569, "grad_norm": 0.4811035692691803, "learning_rate": 7.504033354471168e-06, "loss": 0.3678, "step": 22644 }, { "epoch": 1.0391904914873114, "grad_norm": 0.4951101243495941, "learning_rate": 7.503821126818962e-06, "loss": 0.3788, "step": 22645 }, { "epoch": 1.0392363819925658, "grad_norm": 0.5301887392997742, "learning_rate": 7.503608893145856e-06, "loss": 0.4489, "step": 22646 }, { "epoch": 1.0392822724978201, "grad_norm": 0.4571898579597473, "learning_rate": 7.503396653452359e-06, "loss": 0.3303, "step": 22647 }, { "epoch": 1.0393281630030746, "grad_norm": 0.4381439685821533, "learning_rate": 7.503184407738985e-06, "loss": 0.336, "step": 22648 }, { "epoch": 1.0393740535083291, "grad_norm": 0.5590527653694153, "learning_rate": 7.502972156006243e-06, "loss": 0.4194, "step": 22649 }, { "epoch": 1.0394199440135836, "grad_norm": 0.4503445327281952, "learning_rate": 7.502759898254641e-06, "loss": 0.3431, "step": 22650 }, { "epoch": 1.0394658345188381, "grad_norm": 0.46255195140838623, "learning_rate": 7.502547634484693e-06, "loss": 0.3629, "step": 22651 }, { "epoch": 1.0395117250240926, "grad_norm": 0.4917968809604645, "learning_rate": 7.502335364696907e-06, "loss": 0.3681, "step": 22652 }, { "epoch": 1.039557615529347, "grad_norm": 0.479777455329895, "learning_rate": 7.502123088891794e-06, "loss": 0.3602, "step": 22653 }, { "epoch": 1.0396035060346014, "grad_norm": 0.45955249667167664, "learning_rate": 7.501910807069864e-06, "loss": 0.3248, "step": 22654 }, { "epoch": 1.0396493965398559, "grad_norm": 0.44344815611839294, "learning_rate": 7.50169851923163e-06, "loss": 0.3435, "step": 22655 }, { "epoch": 1.0396952870451104, "grad_norm": 0.4244428873062134, "learning_rate": 7.5014862253776e-06, "loss": 0.2685, "step": 22656 }, { "epoch": 1.0397411775503649, "grad_norm": 0.4281753897666931, "learning_rate": 7.501273925508286e-06, "loss": 0.29, "step": 22657 }, { "epoch": 1.0397870680556194, "grad_norm": 0.5082271099090576, "learning_rate": 7.5010616196241945e-06, "loss": 0.3853, "step": 22658 }, { "epoch": 1.0398329585608737, "grad_norm": 0.5000761151313782, "learning_rate": 7.500849307725842e-06, "loss": 0.3952, "step": 22659 }, { "epoch": 1.0398788490661282, "grad_norm": 0.46886980533599854, "learning_rate": 7.5006369898137344e-06, "loss": 0.3295, "step": 22660 }, { "epoch": 1.0399247395713827, "grad_norm": 0.45340049266815186, "learning_rate": 7.500424665888386e-06, "loss": 0.3238, "step": 22661 }, { "epoch": 1.0399706300766371, "grad_norm": 0.4961346685886383, "learning_rate": 7.500212335950303e-06, "loss": 0.3405, "step": 22662 }, { "epoch": 1.0400165205818916, "grad_norm": 0.4630239009857178, "learning_rate": 7.500000000000001e-06, "loss": 0.3059, "step": 22663 }, { "epoch": 1.0400624110871461, "grad_norm": 0.4659516513347626, "learning_rate": 7.4997876580379865e-06, "loss": 0.3297, "step": 22664 }, { "epoch": 1.0401083015924004, "grad_norm": 0.44990962743759155, "learning_rate": 7.4995753100647715e-06, "loss": 0.2981, "step": 22665 }, { "epoch": 1.040154192097655, "grad_norm": 0.519379734992981, "learning_rate": 7.4993629560808675e-06, "loss": 0.4338, "step": 22666 }, { "epoch": 1.0402000826029094, "grad_norm": 0.44374313950538635, "learning_rate": 7.499150596086785e-06, "loss": 0.2961, "step": 22667 }, { "epoch": 1.040245973108164, "grad_norm": 0.4624730050563812, "learning_rate": 7.498938230083032e-06, "loss": 0.342, "step": 22668 }, { "epoch": 1.0402918636134184, "grad_norm": 0.4602864682674408, "learning_rate": 7.498725858070124e-06, "loss": 0.3594, "step": 22669 }, { "epoch": 1.040337754118673, "grad_norm": 0.4900275468826294, "learning_rate": 7.4985134800485655e-06, "loss": 0.4081, "step": 22670 }, { "epoch": 1.0403836446239274, "grad_norm": 0.4650912880897522, "learning_rate": 7.498301096018874e-06, "loss": 0.3618, "step": 22671 }, { "epoch": 1.0404295351291817, "grad_norm": 0.5254382491111755, "learning_rate": 7.498088705981556e-06, "loss": 0.3991, "step": 22672 }, { "epoch": 1.0404754256344362, "grad_norm": 0.45846128463745117, "learning_rate": 7.497876309937122e-06, "loss": 0.3639, "step": 22673 }, { "epoch": 1.0405213161396907, "grad_norm": 0.4317995309829712, "learning_rate": 7.497663907886084e-06, "loss": 0.3293, "step": 22674 }, { "epoch": 1.0405672066449452, "grad_norm": 0.47075486183166504, "learning_rate": 7.497451499828952e-06, "loss": 0.3318, "step": 22675 }, { "epoch": 1.0406130971501997, "grad_norm": 0.4526347219944, "learning_rate": 7.497239085766238e-06, "loss": 0.3544, "step": 22676 }, { "epoch": 1.0406589876554542, "grad_norm": 0.4874648153781891, "learning_rate": 7.497026665698453e-06, "loss": 0.3455, "step": 22677 }, { "epoch": 1.0407048781607084, "grad_norm": 0.4680350124835968, "learning_rate": 7.496814239626107e-06, "loss": 0.4068, "step": 22678 }, { "epoch": 1.040750768665963, "grad_norm": 0.46096980571746826, "learning_rate": 7.496601807549709e-06, "loss": 0.3517, "step": 22679 }, { "epoch": 1.0407966591712174, "grad_norm": 0.4481469988822937, "learning_rate": 7.4963893694697744e-06, "loss": 0.3305, "step": 22680 }, { "epoch": 1.040842549676472, "grad_norm": 0.4577075242996216, "learning_rate": 7.496176925386809e-06, "loss": 0.3569, "step": 22681 }, { "epoch": 1.0408884401817264, "grad_norm": 0.47864237427711487, "learning_rate": 7.495964475301328e-06, "loss": 0.3418, "step": 22682 }, { "epoch": 1.040934330686981, "grad_norm": 0.4881422519683838, "learning_rate": 7.495752019213838e-06, "loss": 0.3744, "step": 22683 }, { "epoch": 1.0409802211922354, "grad_norm": 0.46769267320632935, "learning_rate": 7.495539557124851e-06, "loss": 0.3547, "step": 22684 }, { "epoch": 1.0410261116974897, "grad_norm": 0.4983559548854828, "learning_rate": 7.49532708903488e-06, "loss": 0.4137, "step": 22685 }, { "epoch": 1.0410720022027442, "grad_norm": 0.43223729729652405, "learning_rate": 7.495114614944438e-06, "loss": 0.2843, "step": 22686 }, { "epoch": 1.0411178927079987, "grad_norm": 0.45847636461257935, "learning_rate": 7.49490213485403e-06, "loss": 0.3344, "step": 22687 }, { "epoch": 1.0411637832132532, "grad_norm": 0.46122318506240845, "learning_rate": 7.4946896487641696e-06, "loss": 0.3316, "step": 22688 }, { "epoch": 1.0412096737185077, "grad_norm": 0.48886874318122864, "learning_rate": 7.4944771566753696e-06, "loss": 0.4359, "step": 22689 }, { "epoch": 1.0412555642237622, "grad_norm": 0.46576988697052, "learning_rate": 7.494264658588137e-06, "loss": 0.398, "step": 22690 }, { "epoch": 1.0413014547290165, "grad_norm": 0.4574759304523468, "learning_rate": 7.4940521545029875e-06, "loss": 0.3496, "step": 22691 }, { "epoch": 1.041347345234271, "grad_norm": 0.4674251973628998, "learning_rate": 7.493839644420428e-06, "loss": 0.3761, "step": 22692 }, { "epoch": 1.0413932357395255, "grad_norm": 0.47321823239326477, "learning_rate": 7.493627128340972e-06, "loss": 0.3713, "step": 22693 }, { "epoch": 1.04143912624478, "grad_norm": 0.48859670758247375, "learning_rate": 7.493414606265131e-06, "loss": 0.4428, "step": 22694 }, { "epoch": 1.0414850167500345, "grad_norm": 0.4617638885974884, "learning_rate": 7.493202078193412e-06, "loss": 0.3666, "step": 22695 }, { "epoch": 1.041530907255289, "grad_norm": 0.4848746657371521, "learning_rate": 7.49298954412633e-06, "loss": 0.3572, "step": 22696 }, { "epoch": 1.0415767977605432, "grad_norm": 0.4204821288585663, "learning_rate": 7.492777004064395e-06, "loss": 0.2638, "step": 22697 }, { "epoch": 1.0416226882657977, "grad_norm": 0.43197301030158997, "learning_rate": 7.492564458008118e-06, "loss": 0.3001, "step": 22698 }, { "epoch": 1.0416685787710522, "grad_norm": 0.4667450189590454, "learning_rate": 7.492351905958012e-06, "loss": 0.3788, "step": 22699 }, { "epoch": 1.0417144692763067, "grad_norm": 0.4375399947166443, "learning_rate": 7.492139347914584e-06, "loss": 0.2847, "step": 22700 }, { "epoch": 1.0417603597815612, "grad_norm": 0.4769582152366638, "learning_rate": 7.491926783878348e-06, "loss": 0.3806, "step": 22701 }, { "epoch": 1.0418062502868157, "grad_norm": 0.5065696835517883, "learning_rate": 7.491714213849814e-06, "loss": 0.4085, "step": 22702 }, { "epoch": 1.0418521407920702, "grad_norm": 0.5016975998878479, "learning_rate": 7.491501637829496e-06, "loss": 0.279, "step": 22703 }, { "epoch": 1.0418980312973245, "grad_norm": 0.4557572603225708, "learning_rate": 7.4912890558179005e-06, "loss": 0.3287, "step": 22704 }, { "epoch": 1.041943921802579, "grad_norm": 0.444677472114563, "learning_rate": 7.491076467815543e-06, "loss": 0.3626, "step": 22705 }, { "epoch": 1.0419898123078335, "grad_norm": 0.5197951197624207, "learning_rate": 7.490863873822931e-06, "loss": 0.4402, "step": 22706 }, { "epoch": 1.042035702813088, "grad_norm": 0.49283885955810547, "learning_rate": 7.490651273840578e-06, "loss": 0.4022, "step": 22707 }, { "epoch": 1.0420815933183425, "grad_norm": 0.4817564785480499, "learning_rate": 7.490438667868996e-06, "loss": 0.3448, "step": 22708 }, { "epoch": 1.042127483823597, "grad_norm": 0.45132896304130554, "learning_rate": 7.490226055908694e-06, "loss": 0.3499, "step": 22709 }, { "epoch": 1.0421733743288513, "grad_norm": 0.5125287771224976, "learning_rate": 7.4900134379601845e-06, "loss": 0.4471, "step": 22710 }, { "epoch": 1.0422192648341058, "grad_norm": 0.4781976044178009, "learning_rate": 7.489800814023978e-06, "loss": 0.3621, "step": 22711 }, { "epoch": 1.0422651553393603, "grad_norm": 0.46670758724212646, "learning_rate": 7.489588184100587e-06, "loss": 0.382, "step": 22712 }, { "epoch": 1.0423110458446148, "grad_norm": 0.4502623975276947, "learning_rate": 7.48937554819052e-06, "loss": 0.325, "step": 22713 }, { "epoch": 1.0423569363498693, "grad_norm": 0.46638572216033936, "learning_rate": 7.489162906294294e-06, "loss": 0.2733, "step": 22714 }, { "epoch": 1.0424028268551238, "grad_norm": 0.47697123885154724, "learning_rate": 7.488950258412415e-06, "loss": 0.3554, "step": 22715 }, { "epoch": 1.042448717360378, "grad_norm": 0.48570483922958374, "learning_rate": 7.4887376045453965e-06, "loss": 0.3729, "step": 22716 }, { "epoch": 1.0424946078656325, "grad_norm": 0.4322909116744995, "learning_rate": 7.488524944693749e-06, "loss": 0.3461, "step": 22717 }, { "epoch": 1.042540498370887, "grad_norm": 0.47378742694854736, "learning_rate": 7.488312278857984e-06, "loss": 0.3248, "step": 22718 }, { "epoch": 1.0425863888761415, "grad_norm": 0.4562162458896637, "learning_rate": 7.488099607038615e-06, "loss": 0.3533, "step": 22719 }, { "epoch": 1.042632279381396, "grad_norm": 0.47150278091430664, "learning_rate": 7.48788692923615e-06, "loss": 0.3804, "step": 22720 }, { "epoch": 1.0426781698866505, "grad_norm": 0.5105905532836914, "learning_rate": 7.487674245451104e-06, "loss": 0.3983, "step": 22721 }, { "epoch": 1.042724060391905, "grad_norm": 0.5189118981361389, "learning_rate": 7.487461555683986e-06, "loss": 0.4123, "step": 22722 }, { "epoch": 1.0427699508971593, "grad_norm": 0.4761057198047638, "learning_rate": 7.487248859935307e-06, "loss": 0.364, "step": 22723 }, { "epoch": 1.0428158414024138, "grad_norm": 0.4696687161922455, "learning_rate": 7.487036158205578e-06, "loss": 0.3585, "step": 22724 }, { "epoch": 1.0428617319076683, "grad_norm": 0.45252132415771484, "learning_rate": 7.486823450495315e-06, "loss": 0.3239, "step": 22725 }, { "epoch": 1.0429076224129228, "grad_norm": 0.4622134566307068, "learning_rate": 7.4866107368050265e-06, "loss": 0.3533, "step": 22726 }, { "epoch": 1.0429535129181773, "grad_norm": 0.432308554649353, "learning_rate": 7.486398017135223e-06, "loss": 0.2685, "step": 22727 }, { "epoch": 1.0429994034234318, "grad_norm": 0.465487003326416, "learning_rate": 7.486185291486416e-06, "loss": 0.3889, "step": 22728 }, { "epoch": 1.043045293928686, "grad_norm": 0.4786968529224396, "learning_rate": 7.485972559859119e-06, "loss": 0.3988, "step": 22729 }, { "epoch": 1.0430911844339406, "grad_norm": 0.5063127279281616, "learning_rate": 7.485759822253842e-06, "loss": 0.4489, "step": 22730 }, { "epoch": 1.043137074939195, "grad_norm": 0.44440793991088867, "learning_rate": 7.485547078671098e-06, "loss": 0.3213, "step": 22731 }, { "epoch": 1.0431829654444496, "grad_norm": 0.4996184706687927, "learning_rate": 7.485334329111398e-06, "loss": 0.4267, "step": 22732 }, { "epoch": 1.043228855949704, "grad_norm": 0.47585466504096985, "learning_rate": 7.485121573575252e-06, "loss": 0.3567, "step": 22733 }, { "epoch": 1.0432747464549585, "grad_norm": 0.42398712038993835, "learning_rate": 7.484908812063174e-06, "loss": 0.3105, "step": 22734 }, { "epoch": 1.043320636960213, "grad_norm": 0.4860897362232208, "learning_rate": 7.484696044575674e-06, "loss": 0.3428, "step": 22735 }, { "epoch": 1.0433665274654673, "grad_norm": 0.46490931510925293, "learning_rate": 7.484483271113265e-06, "loss": 0.3166, "step": 22736 }, { "epoch": 1.0434124179707218, "grad_norm": 0.5142837166786194, "learning_rate": 7.484270491676458e-06, "loss": 0.4525, "step": 22737 }, { "epoch": 1.0434583084759763, "grad_norm": 0.4415644705295563, "learning_rate": 7.484057706265764e-06, "loss": 0.3148, "step": 22738 }, { "epoch": 1.0435041989812308, "grad_norm": 0.4523180425167084, "learning_rate": 7.483844914881695e-06, "loss": 0.3414, "step": 22739 }, { "epoch": 1.0435500894864853, "grad_norm": 0.4948563277721405, "learning_rate": 7.483632117524762e-06, "loss": 0.3602, "step": 22740 }, { "epoch": 1.0435959799917398, "grad_norm": 0.48705506324768066, "learning_rate": 7.483419314195478e-06, "loss": 0.3965, "step": 22741 }, { "epoch": 1.043641870496994, "grad_norm": 0.4426090717315674, "learning_rate": 7.4832065048943555e-06, "loss": 0.2924, "step": 22742 }, { "epoch": 1.0436877610022486, "grad_norm": 0.43279650807380676, "learning_rate": 7.482993689621905e-06, "loss": 0.2956, "step": 22743 }, { "epoch": 1.043733651507503, "grad_norm": 0.5082806944847107, "learning_rate": 7.4827808683786375e-06, "loss": 0.4397, "step": 22744 }, { "epoch": 1.0437795420127576, "grad_norm": 0.4536983072757721, "learning_rate": 7.482568041165066e-06, "loss": 0.3218, "step": 22745 }, { "epoch": 1.043825432518012, "grad_norm": 0.5104808211326599, "learning_rate": 7.482355207981702e-06, "loss": 0.4038, "step": 22746 }, { "epoch": 1.0438713230232666, "grad_norm": 0.4574693739414215, "learning_rate": 7.4821423688290575e-06, "loss": 0.3663, "step": 22747 }, { "epoch": 1.0439172135285208, "grad_norm": 0.4624350666999817, "learning_rate": 7.481929523707643e-06, "loss": 0.3818, "step": 22748 }, { "epoch": 1.0439631040337753, "grad_norm": 0.45005643367767334, "learning_rate": 7.481716672617971e-06, "loss": 0.3147, "step": 22749 }, { "epoch": 1.0440089945390298, "grad_norm": 0.44013574719429016, "learning_rate": 7.481503815560555e-06, "loss": 0.2867, "step": 22750 }, { "epoch": 1.0440548850442843, "grad_norm": 0.47101548314094543, "learning_rate": 7.481290952535904e-06, "loss": 0.3487, "step": 22751 }, { "epoch": 1.0441007755495388, "grad_norm": 0.4666503369808197, "learning_rate": 7.4810780835445325e-06, "loss": 0.3492, "step": 22752 }, { "epoch": 1.0441466660547933, "grad_norm": 0.4673019051551819, "learning_rate": 7.480865208586951e-06, "loss": 0.3742, "step": 22753 }, { "epoch": 1.0441925565600476, "grad_norm": 0.44033849239349365, "learning_rate": 7.480652327663673e-06, "loss": 0.3144, "step": 22754 }, { "epoch": 1.0442384470653021, "grad_norm": 0.4528561532497406, "learning_rate": 7.480439440775207e-06, "loss": 0.349, "step": 22755 }, { "epoch": 1.0442843375705566, "grad_norm": 0.4471134841442108, "learning_rate": 7.480226547922068e-06, "loss": 0.3679, "step": 22756 }, { "epoch": 1.044330228075811, "grad_norm": 0.47342759370803833, "learning_rate": 7.480013649104767e-06, "loss": 0.3297, "step": 22757 }, { "epoch": 1.0443761185810656, "grad_norm": 0.457973450422287, "learning_rate": 7.479800744323815e-06, "loss": 0.3169, "step": 22758 }, { "epoch": 1.04442200908632, "grad_norm": 0.48210006952285767, "learning_rate": 7.479587833579727e-06, "loss": 0.3944, "step": 22759 }, { "epoch": 1.0444678995915746, "grad_norm": 0.5029793381690979, "learning_rate": 7.47937491687301e-06, "loss": 0.3751, "step": 22760 }, { "epoch": 1.0445137900968289, "grad_norm": 0.47599369287490845, "learning_rate": 7.47916199420418e-06, "loss": 0.3626, "step": 22761 }, { "epoch": 1.0445596806020834, "grad_norm": 0.4540698528289795, "learning_rate": 7.478949065573749e-06, "loss": 0.3531, "step": 22762 }, { "epoch": 1.0446055711073379, "grad_norm": 0.46855083107948303, "learning_rate": 7.478736130982226e-06, "loss": 0.4054, "step": 22763 }, { "epoch": 1.0446514616125924, "grad_norm": 0.4986293911933899, "learning_rate": 7.478523190430127e-06, "loss": 0.3902, "step": 22764 }, { "epoch": 1.0446973521178469, "grad_norm": 0.519072413444519, "learning_rate": 7.478310243917961e-06, "loss": 0.4757, "step": 22765 }, { "epoch": 1.0447432426231014, "grad_norm": 0.4353163540363312, "learning_rate": 7.47809729144624e-06, "loss": 0.298, "step": 22766 }, { "epoch": 1.0447891331283556, "grad_norm": 0.4433232843875885, "learning_rate": 7.477884333015477e-06, "loss": 0.3048, "step": 22767 }, { "epoch": 1.0448350236336101, "grad_norm": 0.44295734167099, "learning_rate": 7.477671368626186e-06, "loss": 0.3256, "step": 22768 }, { "epoch": 1.0448809141388646, "grad_norm": 0.5128352046012878, "learning_rate": 7.477458398278876e-06, "loss": 0.4521, "step": 22769 }, { "epoch": 1.0449268046441191, "grad_norm": 0.4496910870075226, "learning_rate": 7.477245421974062e-06, "loss": 0.3466, "step": 22770 }, { "epoch": 1.0449726951493736, "grad_norm": 0.47334206104278564, "learning_rate": 7.477032439712253e-06, "loss": 0.3779, "step": 22771 }, { "epoch": 1.0450185856546281, "grad_norm": 0.4903627634048462, "learning_rate": 7.476819451493963e-06, "loss": 0.3692, "step": 22772 }, { "epoch": 1.0450644761598826, "grad_norm": 0.4296421706676483, "learning_rate": 7.476606457319705e-06, "loss": 0.2867, "step": 22773 }, { "epoch": 1.045110366665137, "grad_norm": 0.48531919717788696, "learning_rate": 7.476393457189989e-06, "loss": 0.3669, "step": 22774 }, { "epoch": 1.0451562571703914, "grad_norm": 0.4488718807697296, "learning_rate": 7.4761804511053286e-06, "loss": 0.3384, "step": 22775 }, { "epoch": 1.045202147675646, "grad_norm": 0.4254857897758484, "learning_rate": 7.4759674390662365e-06, "loss": 0.2967, "step": 22776 }, { "epoch": 1.0452480381809004, "grad_norm": 0.47261327505111694, "learning_rate": 7.475754421073223e-06, "loss": 0.3181, "step": 22777 }, { "epoch": 1.045293928686155, "grad_norm": 0.4443094730377197, "learning_rate": 7.475541397126801e-06, "loss": 0.3342, "step": 22778 }, { "epoch": 1.0453398191914094, "grad_norm": 0.5133997201919556, "learning_rate": 7.475328367227485e-06, "loss": 0.3832, "step": 22779 }, { "epoch": 1.0453857096966637, "grad_norm": 0.45386335253715515, "learning_rate": 7.475115331375785e-06, "loss": 0.3253, "step": 22780 }, { "epoch": 1.0454316002019182, "grad_norm": 0.4405195713043213, "learning_rate": 7.474902289572213e-06, "loss": 0.3202, "step": 22781 }, { "epoch": 1.0454774907071727, "grad_norm": 0.4836391806602478, "learning_rate": 7.4746892418172835e-06, "loss": 0.4019, "step": 22782 }, { "epoch": 1.0455233812124272, "grad_norm": 0.4710160493850708, "learning_rate": 7.474476188111506e-06, "loss": 0.3867, "step": 22783 }, { "epoch": 1.0455692717176817, "grad_norm": 0.44884902238845825, "learning_rate": 7.4742631284553935e-06, "loss": 0.2971, "step": 22784 }, { "epoch": 1.0456151622229362, "grad_norm": 0.46134212613105774, "learning_rate": 7.47405006284946e-06, "loss": 0.3039, "step": 22785 }, { "epoch": 1.0456610527281904, "grad_norm": 0.4284680187702179, "learning_rate": 7.473836991294219e-06, "loss": 0.3339, "step": 22786 }, { "epoch": 1.045706943233445, "grad_norm": 0.43046730756759644, "learning_rate": 7.473623913790179e-06, "loss": 0.2781, "step": 22787 }, { "epoch": 1.0457528337386994, "grad_norm": 0.455280601978302, "learning_rate": 7.4734108303378535e-06, "loss": 0.3528, "step": 22788 }, { "epoch": 1.045798724243954, "grad_norm": 0.4981512427330017, "learning_rate": 7.473197740937757e-06, "loss": 0.3264, "step": 22789 }, { "epoch": 1.0458446147492084, "grad_norm": 0.4475570023059845, "learning_rate": 7.4729846455904e-06, "loss": 0.3071, "step": 22790 }, { "epoch": 1.045890505254463, "grad_norm": 0.4726376235485077, "learning_rate": 7.472771544296295e-06, "loss": 0.319, "step": 22791 }, { "epoch": 1.0459363957597174, "grad_norm": 0.47917553782463074, "learning_rate": 7.472558437055954e-06, "loss": 0.4013, "step": 22792 }, { "epoch": 1.0459822862649717, "grad_norm": 0.48621171712875366, "learning_rate": 7.472345323869892e-06, "loss": 0.3543, "step": 22793 }, { "epoch": 1.0460281767702262, "grad_norm": 0.45518121123313904, "learning_rate": 7.472132204738619e-06, "loss": 0.3161, "step": 22794 }, { "epoch": 1.0460740672754807, "grad_norm": 0.44455569982528687, "learning_rate": 7.471919079662648e-06, "loss": 0.3172, "step": 22795 }, { "epoch": 1.0461199577807352, "grad_norm": 0.4999527633190155, "learning_rate": 7.471705948642492e-06, "loss": 0.4392, "step": 22796 }, { "epoch": 1.0461658482859897, "grad_norm": 0.4634413719177246, "learning_rate": 7.471492811678665e-06, "loss": 0.3432, "step": 22797 }, { "epoch": 1.0462117387912442, "grad_norm": 0.4599563777446747, "learning_rate": 7.471279668771676e-06, "loss": 0.3406, "step": 22798 }, { "epoch": 1.0462576292964985, "grad_norm": 0.4741176664829254, "learning_rate": 7.471066519922039e-06, "loss": 0.3271, "step": 22799 }, { "epoch": 1.046303519801753, "grad_norm": 0.4608007073402405, "learning_rate": 7.470853365130267e-06, "loss": 0.3471, "step": 22800 }, { "epoch": 1.0463494103070075, "grad_norm": 0.4550667107105255, "learning_rate": 7.470640204396873e-06, "loss": 0.3344, "step": 22801 }, { "epoch": 1.046395300812262, "grad_norm": 0.5201148390769958, "learning_rate": 7.470427037722371e-06, "loss": 0.4416, "step": 22802 }, { "epoch": 1.0464411913175165, "grad_norm": 0.44585612416267395, "learning_rate": 7.470213865107269e-06, "loss": 0.3161, "step": 22803 }, { "epoch": 1.046487081822771, "grad_norm": 0.4689854383468628, "learning_rate": 7.470000686552083e-06, "loss": 0.3622, "step": 22804 }, { "epoch": 1.0465329723280252, "grad_norm": 0.4431607723236084, "learning_rate": 7.469787502057325e-06, "loss": 0.3027, "step": 22805 }, { "epoch": 1.0465788628332797, "grad_norm": 0.4926261901855469, "learning_rate": 7.469574311623506e-06, "loss": 0.3525, "step": 22806 }, { "epoch": 1.0466247533385342, "grad_norm": 0.46701228618621826, "learning_rate": 7.469361115251143e-06, "loss": 0.3846, "step": 22807 }, { "epoch": 1.0466706438437887, "grad_norm": 0.49617844820022583, "learning_rate": 7.469147912940745e-06, "loss": 0.351, "step": 22808 }, { "epoch": 1.0467165343490432, "grad_norm": 0.47484090924263, "learning_rate": 7.468934704692825e-06, "loss": 0.368, "step": 22809 }, { "epoch": 1.0467624248542977, "grad_norm": 0.4885796904563904, "learning_rate": 7.468721490507896e-06, "loss": 0.4141, "step": 22810 }, { "epoch": 1.0468083153595522, "grad_norm": 0.43781065940856934, "learning_rate": 7.468508270386472e-06, "loss": 0.2946, "step": 22811 }, { "epoch": 1.0468542058648065, "grad_norm": 0.47285762429237366, "learning_rate": 7.468295044329063e-06, "loss": 0.3534, "step": 22812 }, { "epoch": 1.046900096370061, "grad_norm": 0.4861748218536377, "learning_rate": 7.468081812336187e-06, "loss": 0.4159, "step": 22813 }, { "epoch": 1.0469459868753155, "grad_norm": 0.46343794465065, "learning_rate": 7.467868574408349e-06, "loss": 0.3294, "step": 22814 }, { "epoch": 1.04699187738057, "grad_norm": 0.4343664348125458, "learning_rate": 7.467655330546068e-06, "loss": 0.2997, "step": 22815 }, { "epoch": 1.0470377678858245, "grad_norm": 0.46754932403564453, "learning_rate": 7.467442080749854e-06, "loss": 0.339, "step": 22816 }, { "epoch": 1.047083658391079, "grad_norm": 0.4328851103782654, "learning_rate": 7.4672288250202205e-06, "loss": 0.3047, "step": 22817 }, { "epoch": 1.0471295488963333, "grad_norm": 0.48496606945991516, "learning_rate": 7.467015563357681e-06, "loss": 0.3726, "step": 22818 }, { "epoch": 1.0471754394015877, "grad_norm": 0.43926775455474854, "learning_rate": 7.466802295762749e-06, "loss": 0.3174, "step": 22819 }, { "epoch": 1.0472213299068422, "grad_norm": 0.5028486847877502, "learning_rate": 7.466589022235934e-06, "loss": 0.4347, "step": 22820 }, { "epoch": 1.0472672204120967, "grad_norm": 0.4662001132965088, "learning_rate": 7.466375742777752e-06, "loss": 0.3813, "step": 22821 }, { "epoch": 1.0473131109173512, "grad_norm": 0.46108874678611755, "learning_rate": 7.466162457388713e-06, "loss": 0.3097, "step": 22822 }, { "epoch": 1.0473590014226057, "grad_norm": 0.48175281286239624, "learning_rate": 7.465949166069333e-06, "loss": 0.4245, "step": 22823 }, { "epoch": 1.0474048919278602, "grad_norm": 0.44968870282173157, "learning_rate": 7.465735868820124e-06, "loss": 0.3365, "step": 22824 }, { "epoch": 1.0474507824331145, "grad_norm": 0.4968424439430237, "learning_rate": 7.465522565641597e-06, "loss": 0.3581, "step": 22825 }, { "epoch": 1.047496672938369, "grad_norm": 0.4804345965385437, "learning_rate": 7.465309256534267e-06, "loss": 0.3417, "step": 22826 }, { "epoch": 1.0475425634436235, "grad_norm": 0.42635828256607056, "learning_rate": 7.465095941498647e-06, "loss": 0.3148, "step": 22827 }, { "epoch": 1.047588453948878, "grad_norm": 0.4219377040863037, "learning_rate": 7.464882620535247e-06, "loss": 0.2871, "step": 22828 }, { "epoch": 1.0476343444541325, "grad_norm": 0.46893975138664246, "learning_rate": 7.4646692936445855e-06, "loss": 0.3269, "step": 22829 }, { "epoch": 1.047680234959387, "grad_norm": 0.4664924740791321, "learning_rate": 7.464455960827171e-06, "loss": 0.3929, "step": 22830 }, { "epoch": 1.0477261254646413, "grad_norm": 0.46971023082733154, "learning_rate": 7.464242622083516e-06, "loss": 0.3883, "step": 22831 }, { "epoch": 1.0477720159698958, "grad_norm": 0.45625361800193787, "learning_rate": 7.464029277414137e-06, "loss": 0.3164, "step": 22832 }, { "epoch": 1.0478179064751503, "grad_norm": 0.4419002830982208, "learning_rate": 7.463815926819545e-06, "loss": 0.3336, "step": 22833 }, { "epoch": 1.0478637969804048, "grad_norm": 0.42522820830345154, "learning_rate": 7.463602570300253e-06, "loss": 0.2759, "step": 22834 }, { "epoch": 1.0479096874856593, "grad_norm": 0.44821813702583313, "learning_rate": 7.463389207856776e-06, "loss": 0.346, "step": 22835 }, { "epoch": 1.0479555779909138, "grad_norm": 0.48051679134368896, "learning_rate": 7.463175839489622e-06, "loss": 0.376, "step": 22836 }, { "epoch": 1.048001468496168, "grad_norm": 0.4657687544822693, "learning_rate": 7.46296246519931e-06, "loss": 0.364, "step": 22837 }, { "epoch": 1.0480473590014225, "grad_norm": 0.5403136610984802, "learning_rate": 7.462749084986351e-06, "loss": 0.3918, "step": 22838 }, { "epoch": 1.048093249506677, "grad_norm": 0.4405789077281952, "learning_rate": 7.462535698851255e-06, "loss": 0.2859, "step": 22839 }, { "epoch": 1.0481391400119315, "grad_norm": 0.48423317074775696, "learning_rate": 7.4623223067945405e-06, "loss": 0.4092, "step": 22840 }, { "epoch": 1.048185030517186, "grad_norm": 0.5156185030937195, "learning_rate": 7.4621089088167174e-06, "loss": 0.4887, "step": 22841 }, { "epoch": 1.0482309210224405, "grad_norm": 0.49452975392341614, "learning_rate": 7.461895504918299e-06, "loss": 0.4065, "step": 22842 }, { "epoch": 1.0482768115276948, "grad_norm": 0.5345056653022766, "learning_rate": 7.461682095099798e-06, "loss": 0.4241, "step": 22843 }, { "epoch": 1.0483227020329493, "grad_norm": 0.46285098791122437, "learning_rate": 7.4614686793617305e-06, "loss": 0.3623, "step": 22844 }, { "epoch": 1.0483685925382038, "grad_norm": 0.4668427109718323, "learning_rate": 7.461255257704606e-06, "loss": 0.3379, "step": 22845 }, { "epoch": 1.0484144830434583, "grad_norm": 0.4942461848258972, "learning_rate": 7.461041830128941e-06, "loss": 0.373, "step": 22846 }, { "epoch": 1.0484603735487128, "grad_norm": 0.48151376843452454, "learning_rate": 7.460828396635247e-06, "loss": 0.3652, "step": 22847 }, { "epoch": 1.0485062640539673, "grad_norm": 0.4418684244155884, "learning_rate": 7.460614957224035e-06, "loss": 0.3245, "step": 22848 }, { "epoch": 1.0485521545592218, "grad_norm": 0.4535737633705139, "learning_rate": 7.460401511895823e-06, "loss": 0.3424, "step": 22849 }, { "epoch": 1.048598045064476, "grad_norm": 0.4610288143157959, "learning_rate": 7.4601880606511215e-06, "loss": 0.3143, "step": 22850 }, { "epoch": 1.0486439355697306, "grad_norm": 0.45131716132164, "learning_rate": 7.459974603490443e-06, "loss": 0.3322, "step": 22851 }, { "epoch": 1.048689826074985, "grad_norm": 0.5182852149009705, "learning_rate": 7.4597611404143035e-06, "loss": 0.3932, "step": 22852 }, { "epoch": 1.0487357165802396, "grad_norm": 0.4973585307598114, "learning_rate": 7.459547671423214e-06, "loss": 0.3939, "step": 22853 }, { "epoch": 1.048781607085494, "grad_norm": 0.48866260051727295, "learning_rate": 7.459334196517688e-06, "loss": 0.3632, "step": 22854 }, { "epoch": 1.0488274975907486, "grad_norm": 0.45171910524368286, "learning_rate": 7.459120715698241e-06, "loss": 0.3212, "step": 22855 }, { "epoch": 1.0488733880960028, "grad_norm": 0.4812033474445343, "learning_rate": 7.458907228965383e-06, "loss": 0.3551, "step": 22856 }, { "epoch": 1.0489192786012573, "grad_norm": 0.4883175790309906, "learning_rate": 7.458693736319631e-06, "loss": 0.3391, "step": 22857 }, { "epoch": 1.0489651691065118, "grad_norm": 0.4464930593967438, "learning_rate": 7.458480237761495e-06, "loss": 0.3321, "step": 22858 }, { "epoch": 1.0490110596117663, "grad_norm": 0.498491108417511, "learning_rate": 7.45826673329149e-06, "loss": 0.4228, "step": 22859 }, { "epoch": 1.0490569501170208, "grad_norm": 0.4749586582183838, "learning_rate": 7.45805322291013e-06, "loss": 0.3776, "step": 22860 }, { "epoch": 1.0491028406222753, "grad_norm": 0.47845861315727234, "learning_rate": 7.457839706617929e-06, "loss": 0.332, "step": 22861 }, { "epoch": 1.0491487311275298, "grad_norm": 0.5348946452140808, "learning_rate": 7.457626184415397e-06, "loss": 0.3735, "step": 22862 }, { "epoch": 1.049194621632784, "grad_norm": 0.448983758687973, "learning_rate": 7.45741265630305e-06, "loss": 0.3239, "step": 22863 }, { "epoch": 1.0492405121380386, "grad_norm": 0.47729814052581787, "learning_rate": 7.4571991222814025e-06, "loss": 0.3937, "step": 22864 }, { "epoch": 1.049286402643293, "grad_norm": 0.46043023467063904, "learning_rate": 7.456985582350965e-06, "loss": 0.3472, "step": 22865 }, { "epoch": 1.0493322931485476, "grad_norm": 0.5012228488922119, "learning_rate": 7.4567720365122535e-06, "loss": 0.414, "step": 22866 }, { "epoch": 1.049378183653802, "grad_norm": 0.5149646401405334, "learning_rate": 7.456558484765781e-06, "loss": 0.4113, "step": 22867 }, { "epoch": 1.0494240741590566, "grad_norm": 0.524141788482666, "learning_rate": 7.45634492711206e-06, "loss": 0.4496, "step": 22868 }, { "epoch": 1.0494699646643109, "grad_norm": 0.4521539807319641, "learning_rate": 7.4561313635516065e-06, "loss": 0.3337, "step": 22869 }, { "epoch": 1.0495158551695654, "grad_norm": 0.45072779059410095, "learning_rate": 7.45591779408493e-06, "loss": 0.3046, "step": 22870 }, { "epoch": 1.0495617456748199, "grad_norm": 0.4728637933731079, "learning_rate": 7.4557042187125464e-06, "loss": 0.3981, "step": 22871 }, { "epoch": 1.0496076361800744, "grad_norm": 0.4781298339366913, "learning_rate": 7.45549063743497e-06, "loss": 0.3435, "step": 22872 }, { "epoch": 1.0496535266853289, "grad_norm": 0.5465521812438965, "learning_rate": 7.455277050252714e-06, "loss": 0.4372, "step": 22873 }, { "epoch": 1.0496994171905834, "grad_norm": 0.4891458749771118, "learning_rate": 7.45506345716629e-06, "loss": 0.3884, "step": 22874 }, { "epoch": 1.0497453076958376, "grad_norm": 0.45702677965164185, "learning_rate": 7.4548498581762155e-06, "loss": 0.319, "step": 22875 }, { "epoch": 1.0497911982010921, "grad_norm": 0.5109661817550659, "learning_rate": 7.4546362532829995e-06, "loss": 0.4011, "step": 22876 }, { "epoch": 1.0498370887063466, "grad_norm": 0.524146556854248, "learning_rate": 7.454422642487159e-06, "loss": 0.4392, "step": 22877 }, { "epoch": 1.0498829792116011, "grad_norm": 0.47927236557006836, "learning_rate": 7.4542090257892074e-06, "loss": 0.4014, "step": 22878 }, { "epoch": 1.0499288697168556, "grad_norm": 0.45382750034332275, "learning_rate": 7.453995403189658e-06, "loss": 0.3632, "step": 22879 }, { "epoch": 1.0499747602221101, "grad_norm": 0.47037824988365173, "learning_rate": 7.453781774689021e-06, "loss": 0.3641, "step": 22880 }, { "epoch": 1.0500206507273646, "grad_norm": 0.44643646478652954, "learning_rate": 7.453568140287817e-06, "loss": 0.3389, "step": 22881 }, { "epoch": 1.0500665412326189, "grad_norm": 0.4922311007976532, "learning_rate": 7.4533544999865535e-06, "loss": 0.3982, "step": 22882 }, { "epoch": 1.0501124317378734, "grad_norm": 0.4400462806224823, "learning_rate": 7.453140853785749e-06, "loss": 0.3512, "step": 22883 }, { "epoch": 1.0501583222431279, "grad_norm": 0.5003165006637573, "learning_rate": 7.452927201685913e-06, "loss": 0.339, "step": 22884 }, { "epoch": 1.0502042127483824, "grad_norm": 0.4642479717731476, "learning_rate": 7.4527135436875615e-06, "loss": 0.382, "step": 22885 }, { "epoch": 1.0502501032536369, "grad_norm": 0.4973624646663666, "learning_rate": 7.4524998797912096e-06, "loss": 0.4472, "step": 22886 }, { "epoch": 1.0502959937588914, "grad_norm": 0.48698046803474426, "learning_rate": 7.452286209997368e-06, "loss": 0.3988, "step": 22887 }, { "epoch": 1.0503418842641457, "grad_norm": 0.44886457920074463, "learning_rate": 7.452072534306551e-06, "loss": 0.3123, "step": 22888 }, { "epoch": 1.0503877747694002, "grad_norm": 0.4723467230796814, "learning_rate": 7.451858852719276e-06, "loss": 0.3452, "step": 22889 }, { "epoch": 1.0504336652746546, "grad_norm": 0.4727070927619934, "learning_rate": 7.451645165236055e-06, "loss": 0.3534, "step": 22890 }, { "epoch": 1.0504795557799091, "grad_norm": 0.4845750629901886, "learning_rate": 7.451431471857398e-06, "loss": 0.4114, "step": 22891 }, { "epoch": 1.0505254462851636, "grad_norm": 0.43944084644317627, "learning_rate": 7.451217772583823e-06, "loss": 0.3232, "step": 22892 }, { "epoch": 1.0505713367904181, "grad_norm": 0.4886188507080078, "learning_rate": 7.451004067415843e-06, "loss": 0.4007, "step": 22893 }, { "epoch": 1.0506172272956724, "grad_norm": 0.5098027586936951, "learning_rate": 7.450790356353972e-06, "loss": 0.4342, "step": 22894 }, { "epoch": 1.050663117800927, "grad_norm": 0.44509491324424744, "learning_rate": 7.450576639398725e-06, "loss": 0.3484, "step": 22895 }, { "epoch": 1.0507090083061814, "grad_norm": 0.5053408145904541, "learning_rate": 7.450362916550612e-06, "loss": 0.4497, "step": 22896 }, { "epoch": 1.050754898811436, "grad_norm": 0.5024242997169495, "learning_rate": 7.4501491878101504e-06, "loss": 0.4456, "step": 22897 }, { "epoch": 1.0508007893166904, "grad_norm": 0.4739820659160614, "learning_rate": 7.449935453177854e-06, "loss": 0.372, "step": 22898 }, { "epoch": 1.050846679821945, "grad_norm": 0.47009769082069397, "learning_rate": 7.4497217126542355e-06, "loss": 0.367, "step": 22899 }, { "epoch": 1.0508925703271994, "grad_norm": 0.48233696818351746, "learning_rate": 7.449507966239811e-06, "loss": 0.3713, "step": 22900 }, { "epoch": 1.0509384608324537, "grad_norm": 0.47898244857788086, "learning_rate": 7.449294213935091e-06, "loss": 0.3429, "step": 22901 }, { "epoch": 1.0509843513377082, "grad_norm": 0.45203670859336853, "learning_rate": 7.449080455740592e-06, "loss": 0.3269, "step": 22902 }, { "epoch": 1.0510302418429627, "grad_norm": 0.47224658727645874, "learning_rate": 7.448866691656828e-06, "loss": 0.3593, "step": 22903 }, { "epoch": 1.0510761323482172, "grad_norm": 0.4399934709072113, "learning_rate": 7.448652921684312e-06, "loss": 0.3358, "step": 22904 }, { "epoch": 1.0511220228534717, "grad_norm": 0.574245274066925, "learning_rate": 7.448439145823558e-06, "loss": 0.4313, "step": 22905 }, { "epoch": 1.0511679133587262, "grad_norm": 0.4937978684902191, "learning_rate": 7.448225364075081e-06, "loss": 0.4129, "step": 22906 }, { "epoch": 1.0512138038639804, "grad_norm": 0.4915756583213806, "learning_rate": 7.448011576439396e-06, "loss": 0.355, "step": 22907 }, { "epoch": 1.051259694369235, "grad_norm": 0.4830446243286133, "learning_rate": 7.447797782917013e-06, "loss": 0.3597, "step": 22908 }, { "epoch": 1.0513055848744894, "grad_norm": 0.5387779474258423, "learning_rate": 7.447583983508451e-06, "loss": 0.4403, "step": 22909 }, { "epoch": 1.051351475379744, "grad_norm": 0.5020200610160828, "learning_rate": 7.4473701782142215e-06, "loss": 0.3967, "step": 22910 }, { "epoch": 1.0513973658849984, "grad_norm": 0.5463634729385376, "learning_rate": 7.447156367034839e-06, "loss": 0.3956, "step": 22911 }, { "epoch": 1.051443256390253, "grad_norm": 0.49161043763160706, "learning_rate": 7.446942549970818e-06, "loss": 0.4165, "step": 22912 }, { "epoch": 1.0514891468955074, "grad_norm": 0.467472106218338, "learning_rate": 7.446728727022672e-06, "loss": 0.306, "step": 22913 }, { "epoch": 1.0515350374007617, "grad_norm": 0.4945918917655945, "learning_rate": 7.4465148981909155e-06, "loss": 0.4093, "step": 22914 }, { "epoch": 1.0515809279060162, "grad_norm": 0.48945873975753784, "learning_rate": 7.446301063476063e-06, "loss": 0.3656, "step": 22915 }, { "epoch": 1.0516268184112707, "grad_norm": 0.4703664481639862, "learning_rate": 7.446087222878631e-06, "loss": 0.3401, "step": 22916 }, { "epoch": 1.0516727089165252, "grad_norm": 0.4353506863117218, "learning_rate": 7.445873376399129e-06, "loss": 0.3216, "step": 22917 }, { "epoch": 1.0517185994217797, "grad_norm": 0.4693823456764221, "learning_rate": 7.445659524038072e-06, "loss": 0.3282, "step": 22918 }, { "epoch": 1.0517644899270342, "grad_norm": 0.46539637446403503, "learning_rate": 7.445445665795977e-06, "loss": 0.3949, "step": 22919 }, { "epoch": 1.0518103804322885, "grad_norm": 0.44187384843826294, "learning_rate": 7.445231801673358e-06, "loss": 0.3067, "step": 22920 }, { "epoch": 1.051856270937543, "grad_norm": 0.4271581470966339, "learning_rate": 7.445017931670726e-06, "loss": 0.2993, "step": 22921 }, { "epoch": 1.0519021614427975, "grad_norm": 0.45700979232788086, "learning_rate": 7.444804055788601e-06, "loss": 0.3276, "step": 22922 }, { "epoch": 1.051948051948052, "grad_norm": 0.4775296747684479, "learning_rate": 7.444590174027492e-06, "loss": 0.3543, "step": 22923 }, { "epoch": 1.0519939424533065, "grad_norm": 0.4667254388332367, "learning_rate": 7.444376286387914e-06, "loss": 0.333, "step": 22924 }, { "epoch": 1.052039832958561, "grad_norm": 0.4507078230381012, "learning_rate": 7.444162392870383e-06, "loss": 0.3373, "step": 22925 }, { "epoch": 1.0520857234638152, "grad_norm": 0.4883470833301544, "learning_rate": 7.443948493475414e-06, "loss": 0.3574, "step": 22926 }, { "epoch": 1.0521316139690697, "grad_norm": 0.46691417694091797, "learning_rate": 7.443734588203519e-06, "loss": 0.3878, "step": 22927 }, { "epoch": 1.0521775044743242, "grad_norm": 0.461717814207077, "learning_rate": 7.443520677055214e-06, "loss": 0.3162, "step": 22928 }, { "epoch": 1.0522233949795787, "grad_norm": 0.43372756242752075, "learning_rate": 7.443306760031014e-06, "loss": 0.3279, "step": 22929 }, { "epoch": 1.0522692854848332, "grad_norm": 0.5015509724617004, "learning_rate": 7.443092837131432e-06, "loss": 0.3504, "step": 22930 }, { "epoch": 1.0523151759900877, "grad_norm": 0.4529511332511902, "learning_rate": 7.442878908356981e-06, "loss": 0.3419, "step": 22931 }, { "epoch": 1.052361066495342, "grad_norm": 0.4755386412143707, "learning_rate": 7.442664973708178e-06, "loss": 0.4048, "step": 22932 }, { "epoch": 1.0524069570005965, "grad_norm": 0.48132649064064026, "learning_rate": 7.442451033185538e-06, "loss": 0.392, "step": 22933 }, { "epoch": 1.052452847505851, "grad_norm": 0.47903576493263245, "learning_rate": 7.442237086789574e-06, "loss": 0.341, "step": 22934 }, { "epoch": 1.0524987380111055, "grad_norm": 0.4602435529232025, "learning_rate": 7.442023134520799e-06, "loss": 0.3279, "step": 22935 }, { "epoch": 1.05254462851636, "grad_norm": 0.4349225163459778, "learning_rate": 7.44180917637973e-06, "loss": 0.2945, "step": 22936 }, { "epoch": 1.0525905190216145, "grad_norm": 0.4564686119556427, "learning_rate": 7.441595212366881e-06, "loss": 0.3155, "step": 22937 }, { "epoch": 1.052636409526869, "grad_norm": 0.4449274241924286, "learning_rate": 7.4413812424827655e-06, "loss": 0.3212, "step": 22938 }, { "epoch": 1.0526823000321233, "grad_norm": 0.48479411005973816, "learning_rate": 7.441167266727897e-06, "loss": 0.3549, "step": 22939 }, { "epoch": 1.0527281905373778, "grad_norm": 0.45903050899505615, "learning_rate": 7.440953285102795e-06, "loss": 0.3488, "step": 22940 }, { "epoch": 1.0527740810426323, "grad_norm": 0.4260769188404083, "learning_rate": 7.440739297607968e-06, "loss": 0.2984, "step": 22941 }, { "epoch": 1.0528199715478868, "grad_norm": 0.486358642578125, "learning_rate": 7.440525304243935e-06, "loss": 0.3592, "step": 22942 }, { "epoch": 1.0528658620531413, "grad_norm": 0.5319312810897827, "learning_rate": 7.440311305011208e-06, "loss": 0.4014, "step": 22943 }, { "epoch": 1.0529117525583958, "grad_norm": 0.45271381735801697, "learning_rate": 7.4400972999103035e-06, "loss": 0.3488, "step": 22944 }, { "epoch": 1.05295764306365, "grad_norm": 0.44694584608078003, "learning_rate": 7.439883288941735e-06, "loss": 0.3234, "step": 22945 }, { "epoch": 1.0530035335689045, "grad_norm": 0.4485241174697876, "learning_rate": 7.439669272106017e-06, "loss": 0.316, "step": 22946 }, { "epoch": 1.053049424074159, "grad_norm": 0.49566006660461426, "learning_rate": 7.439455249403663e-06, "loss": 0.3893, "step": 22947 }, { "epoch": 1.0530953145794135, "grad_norm": 0.4916944205760956, "learning_rate": 7.439241220835191e-06, "loss": 0.3627, "step": 22948 }, { "epoch": 1.053141205084668, "grad_norm": 0.4673306345939636, "learning_rate": 7.439027186401113e-06, "loss": 0.347, "step": 22949 }, { "epoch": 1.0531870955899225, "grad_norm": 0.4862653911113739, "learning_rate": 7.4388131461019445e-06, "loss": 0.4043, "step": 22950 }, { "epoch": 1.053232986095177, "grad_norm": 0.47215336561203003, "learning_rate": 7.4385990999381996e-06, "loss": 0.3541, "step": 22951 }, { "epoch": 1.0532788766004313, "grad_norm": 0.4164862334728241, "learning_rate": 7.438385047910394e-06, "loss": 0.2744, "step": 22952 }, { "epoch": 1.0533247671056858, "grad_norm": 0.467967689037323, "learning_rate": 7.43817099001904e-06, "loss": 0.3867, "step": 22953 }, { "epoch": 1.0533706576109403, "grad_norm": 0.47614115476608276, "learning_rate": 7.4379569262646576e-06, "loss": 0.3649, "step": 22954 }, { "epoch": 1.0534165481161948, "grad_norm": 0.45890775322914124, "learning_rate": 7.437742856647756e-06, "loss": 0.3223, "step": 22955 }, { "epoch": 1.0534624386214493, "grad_norm": 0.533683717250824, "learning_rate": 7.437528781168852e-06, "loss": 0.4444, "step": 22956 }, { "epoch": 1.0535083291267038, "grad_norm": 0.4898475706577301, "learning_rate": 7.437314699828463e-06, "loss": 0.4274, "step": 22957 }, { "epoch": 1.053554219631958, "grad_norm": 0.4400314688682556, "learning_rate": 7.4371006126271e-06, "loss": 0.3304, "step": 22958 }, { "epoch": 1.0536001101372126, "grad_norm": 0.43901070952415466, "learning_rate": 7.436886519565278e-06, "loss": 0.3398, "step": 22959 }, { "epoch": 1.053646000642467, "grad_norm": 0.46425217390060425, "learning_rate": 7.4366724206435144e-06, "loss": 0.3782, "step": 22960 }, { "epoch": 1.0536918911477215, "grad_norm": 0.48289090394973755, "learning_rate": 7.436458315862323e-06, "loss": 0.3424, "step": 22961 }, { "epoch": 1.053737781652976, "grad_norm": 0.5225856304168701, "learning_rate": 7.436244205222216e-06, "loss": 0.4233, "step": 22962 }, { "epoch": 1.0537836721582305, "grad_norm": 0.47003406286239624, "learning_rate": 7.436030088723714e-06, "loss": 0.3803, "step": 22963 }, { "epoch": 1.0538295626634848, "grad_norm": 0.4456920325756073, "learning_rate": 7.435815966367326e-06, "loss": 0.3411, "step": 22964 }, { "epoch": 1.0538754531687393, "grad_norm": 0.4918728768825531, "learning_rate": 7.43560183815357e-06, "loss": 0.3988, "step": 22965 }, { "epoch": 1.0539213436739938, "grad_norm": 0.48221755027770996, "learning_rate": 7.43538770408296e-06, "loss": 0.318, "step": 22966 }, { "epoch": 1.0539672341792483, "grad_norm": 0.5157448649406433, "learning_rate": 7.435173564156012e-06, "loss": 0.4322, "step": 22967 }, { "epoch": 1.0540131246845028, "grad_norm": 0.44441649317741394, "learning_rate": 7.43495941837324e-06, "loss": 0.3144, "step": 22968 }, { "epoch": 1.0540590151897573, "grad_norm": 0.5442938804626465, "learning_rate": 7.4347452667351575e-06, "loss": 0.401, "step": 22969 }, { "epoch": 1.0541049056950118, "grad_norm": 0.4727689027786255, "learning_rate": 7.434531109242283e-06, "loss": 0.3208, "step": 22970 }, { "epoch": 1.054150796200266, "grad_norm": 0.4457760751247406, "learning_rate": 7.434316945895129e-06, "loss": 0.2937, "step": 22971 }, { "epoch": 1.0541966867055206, "grad_norm": 0.5181297063827515, "learning_rate": 7.434102776694211e-06, "loss": 0.4461, "step": 22972 }, { "epoch": 1.054242577210775, "grad_norm": 0.430737167596817, "learning_rate": 7.433888601640043e-06, "loss": 0.2814, "step": 22973 }, { "epoch": 1.0542884677160296, "grad_norm": 0.49610990285873413, "learning_rate": 7.433674420733142e-06, "loss": 0.3536, "step": 22974 }, { "epoch": 1.054334358221284, "grad_norm": 0.48436692357063293, "learning_rate": 7.433460233974023e-06, "loss": 0.3551, "step": 22975 }, { "epoch": 1.0543802487265386, "grad_norm": 0.4693945050239563, "learning_rate": 7.433246041363199e-06, "loss": 0.363, "step": 22976 }, { "epoch": 1.0544261392317928, "grad_norm": 0.4755348861217499, "learning_rate": 7.433031842901187e-06, "loss": 0.3393, "step": 22977 }, { "epoch": 1.0544720297370473, "grad_norm": 0.47202160954475403, "learning_rate": 7.4328176385885e-06, "loss": 0.3716, "step": 22978 }, { "epoch": 1.0545179202423018, "grad_norm": 0.4674060642719269, "learning_rate": 7.432603428425655e-06, "loss": 0.3943, "step": 22979 }, { "epoch": 1.0545638107475563, "grad_norm": 0.4633082449436188, "learning_rate": 7.432389212413167e-06, "loss": 0.3885, "step": 22980 }, { "epoch": 1.0546097012528108, "grad_norm": 0.47608405351638794, "learning_rate": 7.4321749905515505e-06, "loss": 0.3582, "step": 22981 }, { "epoch": 1.0546555917580653, "grad_norm": 0.48729392886161804, "learning_rate": 7.431960762841321e-06, "loss": 0.3607, "step": 22982 }, { "epoch": 1.0547014822633196, "grad_norm": 0.5162984728813171, "learning_rate": 7.431746529282993e-06, "loss": 0.3812, "step": 22983 }, { "epoch": 1.054747372768574, "grad_norm": 0.458340585231781, "learning_rate": 7.431532289877081e-06, "loss": 0.3422, "step": 22984 }, { "epoch": 1.0547932632738286, "grad_norm": 0.45092713832855225, "learning_rate": 7.4313180446241025e-06, "loss": 0.2996, "step": 22985 }, { "epoch": 1.054839153779083, "grad_norm": 0.4842176139354706, "learning_rate": 7.431103793524571e-06, "loss": 0.3624, "step": 22986 }, { "epoch": 1.0548850442843376, "grad_norm": 0.4518870413303375, "learning_rate": 7.430889536579003e-06, "loss": 0.3331, "step": 22987 }, { "epoch": 1.054930934789592, "grad_norm": 0.4805007576942444, "learning_rate": 7.430675273787913e-06, "loss": 0.4237, "step": 22988 }, { "epoch": 1.0549768252948466, "grad_norm": 0.46103641390800476, "learning_rate": 7.430461005151815e-06, "loss": 0.3646, "step": 22989 }, { "epoch": 1.0550227158001009, "grad_norm": 0.4092772901058197, "learning_rate": 7.430246730671226e-06, "loss": 0.2423, "step": 22990 }, { "epoch": 1.0550686063053554, "grad_norm": 0.4759734869003296, "learning_rate": 7.430032450346661e-06, "loss": 0.3269, "step": 22991 }, { "epoch": 1.0551144968106099, "grad_norm": 0.4694686532020569, "learning_rate": 7.429818164178634e-06, "loss": 0.3288, "step": 22992 }, { "epoch": 1.0551603873158644, "grad_norm": 0.5112667083740234, "learning_rate": 7.429603872167661e-06, "loss": 0.5059, "step": 22993 }, { "epoch": 1.0552062778211189, "grad_norm": 0.46050316095352173, "learning_rate": 7.429389574314259e-06, "loss": 0.3812, "step": 22994 }, { "epoch": 1.0552521683263734, "grad_norm": 0.48391494154930115, "learning_rate": 7.42917527061894e-06, "loss": 0.3247, "step": 22995 }, { "epoch": 1.0552980588316276, "grad_norm": 0.5052779316902161, "learning_rate": 7.428960961082221e-06, "loss": 0.4036, "step": 22996 }, { "epoch": 1.0553439493368821, "grad_norm": 0.48068252205848694, "learning_rate": 7.428746645704619e-06, "loss": 0.4004, "step": 22997 }, { "epoch": 1.0553898398421366, "grad_norm": 0.4690626859664917, "learning_rate": 7.4285323244866484e-06, "loss": 0.3736, "step": 22998 }, { "epoch": 1.0554357303473911, "grad_norm": 0.4160265326499939, "learning_rate": 7.428317997428822e-06, "loss": 0.3011, "step": 22999 }, { "epoch": 1.0554816208526456, "grad_norm": 0.4719209671020508, "learning_rate": 7.428103664531659e-06, "loss": 0.338, "step": 23000 }, { "epoch": 1.0555275113579001, "grad_norm": 0.4483465552330017, "learning_rate": 7.427889325795671e-06, "loss": 0.3047, "step": 23001 }, { "epoch": 1.0555734018631546, "grad_norm": 0.45481348037719727, "learning_rate": 7.427674981221377e-06, "loss": 0.3257, "step": 23002 }, { "epoch": 1.055619292368409, "grad_norm": 0.4806005656719208, "learning_rate": 7.427460630809291e-06, "loss": 0.3714, "step": 23003 }, { "epoch": 1.0556651828736634, "grad_norm": 0.4332531690597534, "learning_rate": 7.4272462745599275e-06, "loss": 0.2944, "step": 23004 }, { "epoch": 1.055711073378918, "grad_norm": 0.4874333143234253, "learning_rate": 7.427031912473803e-06, "loss": 0.3985, "step": 23005 }, { "epoch": 1.0557569638841724, "grad_norm": 0.4397573471069336, "learning_rate": 7.426817544551433e-06, "loss": 0.2927, "step": 23006 }, { "epoch": 1.055802854389427, "grad_norm": 0.42347070574760437, "learning_rate": 7.4266031707933315e-06, "loss": 0.3109, "step": 23007 }, { "epoch": 1.0558487448946814, "grad_norm": 0.4873827397823334, "learning_rate": 7.426388791200017e-06, "loss": 0.4254, "step": 23008 }, { "epoch": 1.0558946353999357, "grad_norm": 0.45702075958251953, "learning_rate": 7.426174405772002e-06, "loss": 0.3647, "step": 23009 }, { "epoch": 1.0559405259051902, "grad_norm": 0.41772589087486267, "learning_rate": 7.4259600145098034e-06, "loss": 0.2839, "step": 23010 }, { "epoch": 1.0559864164104447, "grad_norm": 0.48398715257644653, "learning_rate": 7.425745617413938e-06, "loss": 0.2997, "step": 23011 }, { "epoch": 1.0560323069156992, "grad_norm": 0.6297457218170166, "learning_rate": 7.425531214484918e-06, "loss": 0.3963, "step": 23012 }, { "epoch": 1.0560781974209537, "grad_norm": 0.4255613684654236, "learning_rate": 7.425316805723262e-06, "loss": 0.3037, "step": 23013 }, { "epoch": 1.0561240879262082, "grad_norm": 0.46519380807876587, "learning_rate": 7.425102391129485e-06, "loss": 0.3696, "step": 23014 }, { "epoch": 1.0561699784314624, "grad_norm": 0.4854913055896759, "learning_rate": 7.424887970704101e-06, "loss": 0.3303, "step": 23015 }, { "epoch": 1.056215868936717, "grad_norm": 0.48908936977386475, "learning_rate": 7.424673544447628e-06, "loss": 0.3966, "step": 23016 }, { "epoch": 1.0562617594419714, "grad_norm": 0.4920559227466583, "learning_rate": 7.424459112360579e-06, "loss": 0.3564, "step": 23017 }, { "epoch": 1.056307649947226, "grad_norm": 0.464352011680603, "learning_rate": 7.424244674443472e-06, "loss": 0.3172, "step": 23018 }, { "epoch": 1.0563535404524804, "grad_norm": 0.44081026315689087, "learning_rate": 7.424030230696821e-06, "loss": 0.3118, "step": 23019 }, { "epoch": 1.056399430957735, "grad_norm": 0.4807784855365753, "learning_rate": 7.423815781121144e-06, "loss": 0.3553, "step": 23020 }, { "epoch": 1.0564453214629892, "grad_norm": 0.44177550077438354, "learning_rate": 7.423601325716952e-06, "loss": 0.292, "step": 23021 }, { "epoch": 1.0564912119682437, "grad_norm": 0.462263822555542, "learning_rate": 7.423386864484768e-06, "loss": 0.3295, "step": 23022 }, { "epoch": 1.0565371024734982, "grad_norm": 0.5075477957725525, "learning_rate": 7.423172397425099e-06, "loss": 0.3814, "step": 23023 }, { "epoch": 1.0565829929787527, "grad_norm": 0.45924413204193115, "learning_rate": 7.422957924538467e-06, "loss": 0.3774, "step": 23024 }, { "epoch": 1.0566288834840072, "grad_norm": 0.4755311608314514, "learning_rate": 7.422743445825387e-06, "loss": 0.4136, "step": 23025 }, { "epoch": 1.0566747739892617, "grad_norm": 0.4683804214000702, "learning_rate": 7.4225289612863725e-06, "loss": 0.4206, "step": 23026 }, { "epoch": 1.0567206644945162, "grad_norm": 0.48861759901046753, "learning_rate": 7.42231447092194e-06, "loss": 0.4142, "step": 23027 }, { "epoch": 1.0567665549997705, "grad_norm": 0.47811856865882874, "learning_rate": 7.422099974732606e-06, "loss": 0.4095, "step": 23028 }, { "epoch": 1.056812445505025, "grad_norm": 0.4819469153881073, "learning_rate": 7.421885472718886e-06, "loss": 0.3831, "step": 23029 }, { "epoch": 1.0568583360102795, "grad_norm": 0.48023471236228943, "learning_rate": 7.421670964881297e-06, "loss": 0.3944, "step": 23030 }, { "epoch": 1.056904226515534, "grad_norm": 0.45764973759651184, "learning_rate": 7.421456451220353e-06, "loss": 0.3176, "step": 23031 }, { "epoch": 1.0569501170207884, "grad_norm": 0.43197572231292725, "learning_rate": 7.421241931736569e-06, "loss": 0.3015, "step": 23032 }, { "epoch": 1.056996007526043, "grad_norm": 0.4915072023868561, "learning_rate": 7.421027406430464e-06, "loss": 0.4018, "step": 23033 }, { "epoch": 1.0570418980312972, "grad_norm": 0.4470650553703308, "learning_rate": 7.420812875302551e-06, "loss": 0.3619, "step": 23034 }, { "epoch": 1.0570877885365517, "grad_norm": 0.4715549647808075, "learning_rate": 7.420598338353348e-06, "loss": 0.3094, "step": 23035 }, { "epoch": 1.0571336790418062, "grad_norm": 0.4616329073905945, "learning_rate": 7.42038379558337e-06, "loss": 0.3611, "step": 23036 }, { "epoch": 1.0571795695470607, "grad_norm": 0.47509992122650146, "learning_rate": 7.420169246993133e-06, "loss": 0.3676, "step": 23037 }, { "epoch": 1.0572254600523152, "grad_norm": 0.4342285990715027, "learning_rate": 7.419954692583151e-06, "loss": 0.3196, "step": 23038 }, { "epoch": 1.0572713505575697, "grad_norm": 0.4652702212333679, "learning_rate": 7.419740132353944e-06, "loss": 0.3333, "step": 23039 }, { "epoch": 1.0573172410628242, "grad_norm": 0.4997953772544861, "learning_rate": 7.419525566306024e-06, "loss": 0.3756, "step": 23040 }, { "epoch": 1.0573631315680785, "grad_norm": 0.4575677216053009, "learning_rate": 7.41931099443991e-06, "loss": 0.3302, "step": 23041 }, { "epoch": 1.057409022073333, "grad_norm": 0.5354112982749939, "learning_rate": 7.419096416756116e-06, "loss": 0.464, "step": 23042 }, { "epoch": 1.0574549125785875, "grad_norm": 0.4812207818031311, "learning_rate": 7.418881833255158e-06, "loss": 0.4137, "step": 23043 }, { "epoch": 1.057500803083842, "grad_norm": 0.48856064677238464, "learning_rate": 7.4186672439375525e-06, "loss": 0.4105, "step": 23044 }, { "epoch": 1.0575466935890965, "grad_norm": 0.47054198384284973, "learning_rate": 7.418452648803818e-06, "loss": 0.3897, "step": 23045 }, { "epoch": 1.057592584094351, "grad_norm": 0.4522615373134613, "learning_rate": 7.418238047854466e-06, "loss": 0.3388, "step": 23046 }, { "epoch": 1.0576384745996052, "grad_norm": 0.4636993408203125, "learning_rate": 7.418023441090016e-06, "loss": 0.3613, "step": 23047 }, { "epoch": 1.0576843651048597, "grad_norm": 0.5913541913032532, "learning_rate": 7.417808828510982e-06, "loss": 0.3344, "step": 23048 }, { "epoch": 1.0577302556101142, "grad_norm": 0.46286001801490784, "learning_rate": 7.41759421011788e-06, "loss": 0.3555, "step": 23049 }, { "epoch": 1.0577761461153687, "grad_norm": 0.47120001912117004, "learning_rate": 7.417379585911228e-06, "loss": 0.373, "step": 23050 }, { "epoch": 1.0578220366206232, "grad_norm": 0.49049481749534607, "learning_rate": 7.41716495589154e-06, "loss": 0.4402, "step": 23051 }, { "epoch": 1.0578679271258777, "grad_norm": 0.46740633249282837, "learning_rate": 7.416950320059335e-06, "loss": 0.3571, "step": 23052 }, { "epoch": 1.057913817631132, "grad_norm": 0.49441757798194885, "learning_rate": 7.416735678415127e-06, "loss": 0.3559, "step": 23053 }, { "epoch": 1.0579597081363865, "grad_norm": 0.46950918436050415, "learning_rate": 7.41652103095943e-06, "loss": 0.3521, "step": 23054 }, { "epoch": 1.058005598641641, "grad_norm": 0.46083974838256836, "learning_rate": 7.416306377692765e-06, "loss": 0.3477, "step": 23055 }, { "epoch": 1.0580514891468955, "grad_norm": 0.49763017892837524, "learning_rate": 7.416091718615645e-06, "loss": 0.3629, "step": 23056 }, { "epoch": 1.05809737965215, "grad_norm": 0.45558443665504456, "learning_rate": 7.4158770537285865e-06, "loss": 0.3224, "step": 23057 }, { "epoch": 1.0581432701574045, "grad_norm": 0.47926440834999084, "learning_rate": 7.415662383032107e-06, "loss": 0.3268, "step": 23058 }, { "epoch": 1.058189160662659, "grad_norm": 0.4860941767692566, "learning_rate": 7.415447706526722e-06, "loss": 0.3613, "step": 23059 }, { "epoch": 1.0582350511679133, "grad_norm": 0.5255950689315796, "learning_rate": 7.415233024212947e-06, "loss": 0.3531, "step": 23060 }, { "epoch": 1.0582809416731678, "grad_norm": 0.5197268724441528, "learning_rate": 7.415018336091299e-06, "loss": 0.4072, "step": 23061 }, { "epoch": 1.0583268321784223, "grad_norm": 0.4712773859500885, "learning_rate": 7.414803642162294e-06, "loss": 0.3674, "step": 23062 }, { "epoch": 1.0583727226836768, "grad_norm": 0.4689558744430542, "learning_rate": 7.414588942426448e-06, "loss": 0.3308, "step": 23063 }, { "epoch": 1.0584186131889313, "grad_norm": 0.47313669323921204, "learning_rate": 7.414374236884279e-06, "loss": 0.3418, "step": 23064 }, { "epoch": 1.0584645036941858, "grad_norm": 0.4789111912250519, "learning_rate": 7.414159525536301e-06, "loss": 0.3872, "step": 23065 }, { "epoch": 1.05851039419944, "grad_norm": 0.4722191393375397, "learning_rate": 7.413944808383031e-06, "loss": 0.4332, "step": 23066 }, { "epoch": 1.0585562847046945, "grad_norm": 0.48246580362319946, "learning_rate": 7.413730085424986e-06, "loss": 0.3737, "step": 23067 }, { "epoch": 1.058602175209949, "grad_norm": 0.4831930994987488, "learning_rate": 7.413515356662682e-06, "loss": 0.3683, "step": 23068 }, { "epoch": 1.0586480657152035, "grad_norm": 0.4684121906757355, "learning_rate": 7.413300622096635e-06, "loss": 0.3153, "step": 23069 }, { "epoch": 1.058693956220458, "grad_norm": 0.45734912157058716, "learning_rate": 7.413085881727362e-06, "loss": 0.2861, "step": 23070 }, { "epoch": 1.0587398467257125, "grad_norm": 0.45429527759552, "learning_rate": 7.41287113555538e-06, "loss": 0.3648, "step": 23071 }, { "epoch": 1.0587857372309668, "grad_norm": 0.4772087633609772, "learning_rate": 7.412656383581202e-06, "loss": 0.413, "step": 23072 }, { "epoch": 1.0588316277362213, "grad_norm": 0.45778122544288635, "learning_rate": 7.412441625805349e-06, "loss": 0.3621, "step": 23073 }, { "epoch": 1.0588775182414758, "grad_norm": 0.4943199157714844, "learning_rate": 7.412226862228334e-06, "loss": 0.3859, "step": 23074 }, { "epoch": 1.0589234087467303, "grad_norm": 0.46181315183639526, "learning_rate": 7.412012092850676e-06, "loss": 0.389, "step": 23075 }, { "epoch": 1.0589692992519848, "grad_norm": 0.4671902656555176, "learning_rate": 7.411797317672888e-06, "loss": 0.3655, "step": 23076 }, { "epoch": 1.0590151897572393, "grad_norm": 0.5070584416389465, "learning_rate": 7.411582536695491e-06, "loss": 0.3524, "step": 23077 }, { "epoch": 1.0590610802624938, "grad_norm": 0.45965513586997986, "learning_rate": 7.411367749918997e-06, "loss": 0.3852, "step": 23078 }, { "epoch": 1.059106970767748, "grad_norm": 0.47812527418136597, "learning_rate": 7.4111529573439265e-06, "loss": 0.3473, "step": 23079 }, { "epoch": 1.0591528612730026, "grad_norm": 0.4694713056087494, "learning_rate": 7.410938158970794e-06, "loss": 0.3476, "step": 23080 }, { "epoch": 1.059198751778257, "grad_norm": 0.4693313539028168, "learning_rate": 7.410723354800116e-06, "loss": 0.3695, "step": 23081 }, { "epoch": 1.0592446422835116, "grad_norm": 0.48752087354660034, "learning_rate": 7.410508544832407e-06, "loss": 0.3993, "step": 23082 }, { "epoch": 1.059290532788766, "grad_norm": 0.4743998050689697, "learning_rate": 7.4102937290681875e-06, "loss": 0.3366, "step": 23083 }, { "epoch": 1.0593364232940206, "grad_norm": 0.46278688311576843, "learning_rate": 7.410078907507973e-06, "loss": 0.394, "step": 23084 }, { "epoch": 1.0593823137992748, "grad_norm": 0.48054927587509155, "learning_rate": 7.409864080152279e-06, "loss": 0.3576, "step": 23085 }, { "epoch": 1.0594282043045293, "grad_norm": 0.46398183703422546, "learning_rate": 7.409649247001621e-06, "loss": 0.3442, "step": 23086 }, { "epoch": 1.0594740948097838, "grad_norm": 0.4538950026035309, "learning_rate": 7.409434408056519e-06, "loss": 0.3553, "step": 23087 }, { "epoch": 1.0595199853150383, "grad_norm": 0.5015366077423096, "learning_rate": 7.409219563317487e-06, "loss": 0.4608, "step": 23088 }, { "epoch": 1.0595658758202928, "grad_norm": 0.46929410099983215, "learning_rate": 7.409004712785041e-06, "loss": 0.3317, "step": 23089 }, { "epoch": 1.0596117663255473, "grad_norm": 0.4957880973815918, "learning_rate": 7.408789856459701e-06, "loss": 0.3769, "step": 23090 }, { "epoch": 1.0596576568308018, "grad_norm": 0.4621545374393463, "learning_rate": 7.4085749943419805e-06, "loss": 0.3403, "step": 23091 }, { "epoch": 1.059703547336056, "grad_norm": 0.45739486813545227, "learning_rate": 7.408360126432396e-06, "loss": 0.3399, "step": 23092 }, { "epoch": 1.0597494378413106, "grad_norm": 0.4303765296936035, "learning_rate": 7.408145252731469e-06, "loss": 0.309, "step": 23093 }, { "epoch": 1.059795328346565, "grad_norm": 0.48599541187286377, "learning_rate": 7.40793037323971e-06, "loss": 0.409, "step": 23094 }, { "epoch": 1.0598412188518196, "grad_norm": 0.4751538634300232, "learning_rate": 7.4077154879576396e-06, "loss": 0.3821, "step": 23095 }, { "epoch": 1.059887109357074, "grad_norm": 0.48826760053634644, "learning_rate": 7.407500596885773e-06, "loss": 0.3787, "step": 23096 }, { "epoch": 1.0599329998623286, "grad_norm": 0.4446692168712616, "learning_rate": 7.407285700024627e-06, "loss": 0.3352, "step": 23097 }, { "epoch": 1.0599788903675829, "grad_norm": 0.46527180075645447, "learning_rate": 7.407070797374719e-06, "loss": 0.3557, "step": 23098 }, { "epoch": 1.0600247808728374, "grad_norm": 0.4475729465484619, "learning_rate": 7.406855888936565e-06, "loss": 0.3325, "step": 23099 }, { "epoch": 1.0600706713780919, "grad_norm": 0.4595482349395752, "learning_rate": 7.406640974710682e-06, "loss": 0.3718, "step": 23100 }, { "epoch": 1.0601165618833464, "grad_norm": 0.430745005607605, "learning_rate": 7.406426054697587e-06, "loss": 0.2876, "step": 23101 }, { "epoch": 1.0601624523886009, "grad_norm": 0.4403243958950043, "learning_rate": 7.406211128897798e-06, "loss": 0.3052, "step": 23102 }, { "epoch": 1.0602083428938553, "grad_norm": 0.47933998703956604, "learning_rate": 7.40599619731183e-06, "loss": 0.4203, "step": 23103 }, { "epoch": 1.0602542333991096, "grad_norm": 0.5219547748565674, "learning_rate": 7.405781259940201e-06, "loss": 0.4483, "step": 23104 }, { "epoch": 1.0603001239043641, "grad_norm": 0.41271719336509705, "learning_rate": 7.405566316783426e-06, "loss": 0.2698, "step": 23105 }, { "epoch": 1.0603460144096186, "grad_norm": 0.410192608833313, "learning_rate": 7.405351367842024e-06, "loss": 0.2912, "step": 23106 }, { "epoch": 1.0603919049148731, "grad_norm": 0.465892493724823, "learning_rate": 7.405136413116511e-06, "loss": 0.371, "step": 23107 }, { "epoch": 1.0604377954201276, "grad_norm": 0.4614354968070984, "learning_rate": 7.404921452607405e-06, "loss": 0.3421, "step": 23108 }, { "epoch": 1.0604836859253821, "grad_norm": 0.4555666446685791, "learning_rate": 7.40470648631522e-06, "loss": 0.3047, "step": 23109 }, { "epoch": 1.0605295764306364, "grad_norm": 0.5120908617973328, "learning_rate": 7.404491514240477e-06, "loss": 0.4328, "step": 23110 }, { "epoch": 1.0605754669358909, "grad_norm": 0.46374353766441345, "learning_rate": 7.404276536383689e-06, "loss": 0.3572, "step": 23111 }, { "epoch": 1.0606213574411454, "grad_norm": 0.5516995191574097, "learning_rate": 7.4040615527453766e-06, "loss": 0.5163, "step": 23112 }, { "epoch": 1.0606672479463999, "grad_norm": 0.5185773968696594, "learning_rate": 7.403846563326054e-06, "loss": 0.4452, "step": 23113 }, { "epoch": 1.0607131384516544, "grad_norm": 0.48388969898223877, "learning_rate": 7.403631568126238e-06, "loss": 0.3541, "step": 23114 }, { "epoch": 1.0607590289569089, "grad_norm": 0.47464025020599365, "learning_rate": 7.4034165671464475e-06, "loss": 0.3576, "step": 23115 }, { "epoch": 1.0608049194621634, "grad_norm": 0.44310787320137024, "learning_rate": 7.4032015603871985e-06, "loss": 0.3125, "step": 23116 }, { "epoch": 1.0608508099674177, "grad_norm": 0.4825877845287323, "learning_rate": 7.40298654784901e-06, "loss": 0.3718, "step": 23117 }, { "epoch": 1.0608967004726721, "grad_norm": 0.45042884349823, "learning_rate": 7.402771529532396e-06, "loss": 0.3329, "step": 23118 }, { "epoch": 1.0609425909779266, "grad_norm": 0.4516732394695282, "learning_rate": 7.402556505437873e-06, "loss": 0.3462, "step": 23119 }, { "epoch": 1.0609884814831811, "grad_norm": 0.47098278999328613, "learning_rate": 7.402341475565962e-06, "loss": 0.3352, "step": 23120 }, { "epoch": 1.0610343719884356, "grad_norm": 0.4459187984466553, "learning_rate": 7.402126439917176e-06, "loss": 0.3664, "step": 23121 }, { "epoch": 1.0610802624936901, "grad_norm": 0.47348111867904663, "learning_rate": 7.401911398492037e-06, "loss": 0.4467, "step": 23122 }, { "epoch": 1.0611261529989444, "grad_norm": 0.4263926148414612, "learning_rate": 7.401696351291056e-06, "loss": 0.3143, "step": 23123 }, { "epoch": 1.061172043504199, "grad_norm": 0.48775508999824524, "learning_rate": 7.4014812983147565e-06, "loss": 0.4097, "step": 23124 }, { "epoch": 1.0612179340094534, "grad_norm": 0.4734116792678833, "learning_rate": 7.401266239563649e-06, "loss": 0.3436, "step": 23125 }, { "epoch": 1.061263824514708, "grad_norm": 0.44242510199546814, "learning_rate": 7.401051175038256e-06, "loss": 0.3596, "step": 23126 }, { "epoch": 1.0613097150199624, "grad_norm": 0.4346720576286316, "learning_rate": 7.400836104739093e-06, "loss": 0.284, "step": 23127 }, { "epoch": 1.061355605525217, "grad_norm": 0.49245285987854004, "learning_rate": 7.400621028666677e-06, "loss": 0.4474, "step": 23128 }, { "epoch": 1.0614014960304714, "grad_norm": 0.4828285276889801, "learning_rate": 7.400405946821524e-06, "loss": 0.3699, "step": 23129 }, { "epoch": 1.0614473865357257, "grad_norm": 0.48777469992637634, "learning_rate": 7.400190859204153e-06, "loss": 0.3516, "step": 23130 }, { "epoch": 1.0614932770409802, "grad_norm": 0.4609070420265198, "learning_rate": 7.39997576581508e-06, "loss": 0.3779, "step": 23131 }, { "epoch": 1.0615391675462347, "grad_norm": 0.5027076601982117, "learning_rate": 7.399760666654823e-06, "loss": 0.4028, "step": 23132 }, { "epoch": 1.0615850580514892, "grad_norm": 0.48082324862480164, "learning_rate": 7.3995455617238985e-06, "loss": 0.3607, "step": 23133 }, { "epoch": 1.0616309485567437, "grad_norm": 0.4866553843021393, "learning_rate": 7.399330451022826e-06, "loss": 0.3816, "step": 23134 }, { "epoch": 1.0616768390619982, "grad_norm": 0.45592764019966125, "learning_rate": 7.399115334552121e-06, "loss": 0.3465, "step": 23135 }, { "epoch": 1.0617227295672524, "grad_norm": 0.43276214599609375, "learning_rate": 7.398900212312298e-06, "loss": 0.2933, "step": 23136 }, { "epoch": 1.061768620072507, "grad_norm": 0.4474048316478729, "learning_rate": 7.398685084303878e-06, "loss": 0.2897, "step": 23137 }, { "epoch": 1.0618145105777614, "grad_norm": 0.4103580415248871, "learning_rate": 7.398469950527378e-06, "loss": 0.2656, "step": 23138 }, { "epoch": 1.061860401083016, "grad_norm": 0.44662556052207947, "learning_rate": 7.3982548109833165e-06, "loss": 0.3555, "step": 23139 }, { "epoch": 1.0619062915882704, "grad_norm": 0.4791778028011322, "learning_rate": 7.3980396656722066e-06, "loss": 0.3506, "step": 23140 }, { "epoch": 1.061952182093525, "grad_norm": 0.5053661465644836, "learning_rate": 7.397824514594569e-06, "loss": 0.3681, "step": 23141 }, { "epoch": 1.0619980725987792, "grad_norm": 0.460355281829834, "learning_rate": 7.39760935775092e-06, "loss": 0.3924, "step": 23142 }, { "epoch": 1.0620439631040337, "grad_norm": 0.4585028290748596, "learning_rate": 7.397394195141776e-06, "loss": 0.3829, "step": 23143 }, { "epoch": 1.0620898536092882, "grad_norm": 0.48816028237342834, "learning_rate": 7.397179026767658e-06, "loss": 0.4127, "step": 23144 }, { "epoch": 1.0621357441145427, "grad_norm": 0.47319790720939636, "learning_rate": 7.39696385262908e-06, "loss": 0.3609, "step": 23145 }, { "epoch": 1.0621816346197972, "grad_norm": 0.47193384170532227, "learning_rate": 7.396748672726561e-06, "loss": 0.3332, "step": 23146 }, { "epoch": 1.0622275251250517, "grad_norm": 0.4428778290748596, "learning_rate": 7.396533487060617e-06, "loss": 0.3268, "step": 23147 }, { "epoch": 1.0622734156303062, "grad_norm": 0.5135975480079651, "learning_rate": 7.396318295631766e-06, "loss": 0.4228, "step": 23148 }, { "epoch": 1.0623193061355605, "grad_norm": 0.46807822585105896, "learning_rate": 7.396103098440527e-06, "loss": 0.3524, "step": 23149 }, { "epoch": 1.062365196640815, "grad_norm": 0.48073747754096985, "learning_rate": 7.395887895487417e-06, "loss": 0.366, "step": 23150 }, { "epoch": 1.0624110871460695, "grad_norm": 0.48691093921661377, "learning_rate": 7.39567268677295e-06, "loss": 0.3823, "step": 23151 }, { "epoch": 1.062456977651324, "grad_norm": 0.4414513111114502, "learning_rate": 7.395457472297649e-06, "loss": 0.3341, "step": 23152 }, { "epoch": 1.0625028681565785, "grad_norm": 0.4797389507293701, "learning_rate": 7.395242252062027e-06, "loss": 0.3589, "step": 23153 }, { "epoch": 1.062548758661833, "grad_norm": 0.4618520140647888, "learning_rate": 7.395027026066604e-06, "loss": 0.3427, "step": 23154 }, { "epoch": 1.0625946491670872, "grad_norm": 0.4796909689903259, "learning_rate": 7.3948117943118954e-06, "loss": 0.3807, "step": 23155 }, { "epoch": 1.0626405396723417, "grad_norm": 0.4829982817173004, "learning_rate": 7.3945965567984235e-06, "loss": 0.4056, "step": 23156 }, { "epoch": 1.0626864301775962, "grad_norm": 0.42288678884506226, "learning_rate": 7.3943813135267e-06, "loss": 0.2691, "step": 23157 }, { "epoch": 1.0627323206828507, "grad_norm": 0.45509395003318787, "learning_rate": 7.394166064497246e-06, "loss": 0.3733, "step": 23158 }, { "epoch": 1.0627782111881052, "grad_norm": 0.42252933979034424, "learning_rate": 7.393950809710578e-06, "loss": 0.2908, "step": 23159 }, { "epoch": 1.0628241016933597, "grad_norm": 0.476764440536499, "learning_rate": 7.393735549167215e-06, "loss": 0.371, "step": 23160 }, { "epoch": 1.062869992198614, "grad_norm": 0.47859328985214233, "learning_rate": 7.393520282867672e-06, "loss": 0.3996, "step": 23161 }, { "epoch": 1.0629158827038685, "grad_norm": 0.45215800404548645, "learning_rate": 7.393305010812468e-06, "loss": 0.3298, "step": 23162 }, { "epoch": 1.062961773209123, "grad_norm": 0.46105122566223145, "learning_rate": 7.393089733002122e-06, "loss": 0.3205, "step": 23163 }, { "epoch": 1.0630076637143775, "grad_norm": 0.47521859407424927, "learning_rate": 7.392874449437151e-06, "loss": 0.4086, "step": 23164 }, { "epoch": 1.063053554219632, "grad_norm": 0.4428737163543701, "learning_rate": 7.39265916011807e-06, "loss": 0.3338, "step": 23165 }, { "epoch": 1.0630994447248865, "grad_norm": 0.42021840810775757, "learning_rate": 7.3924438650454e-06, "loss": 0.335, "step": 23166 }, { "epoch": 1.063145335230141, "grad_norm": 0.47668012976646423, "learning_rate": 7.392228564219658e-06, "loss": 0.3737, "step": 23167 }, { "epoch": 1.0631912257353953, "grad_norm": 0.46835047006607056, "learning_rate": 7.3920132576413605e-06, "loss": 0.3409, "step": 23168 }, { "epoch": 1.0632371162406498, "grad_norm": 0.44017091393470764, "learning_rate": 7.391797945311027e-06, "loss": 0.3242, "step": 23169 }, { "epoch": 1.0632830067459043, "grad_norm": 0.43188610672950745, "learning_rate": 7.391582627229173e-06, "loss": 0.2766, "step": 23170 }, { "epoch": 1.0633288972511588, "grad_norm": 0.4693223834037781, "learning_rate": 7.391367303396319e-06, "loss": 0.3401, "step": 23171 }, { "epoch": 1.0633747877564133, "grad_norm": 0.466511070728302, "learning_rate": 7.391151973812981e-06, "loss": 0.3639, "step": 23172 }, { "epoch": 1.0634206782616678, "grad_norm": 0.4652596414089203, "learning_rate": 7.390936638479676e-06, "loss": 0.3741, "step": 23173 }, { "epoch": 1.063466568766922, "grad_norm": 0.4702387750148773, "learning_rate": 7.390721297396924e-06, "loss": 0.357, "step": 23174 }, { "epoch": 1.0635124592721765, "grad_norm": 0.4831272065639496, "learning_rate": 7.3905059505652424e-06, "loss": 0.3535, "step": 23175 }, { "epoch": 1.063558349777431, "grad_norm": 0.49053022265434265, "learning_rate": 7.390290597985147e-06, "loss": 0.4237, "step": 23176 }, { "epoch": 1.0636042402826855, "grad_norm": 0.4525579810142517, "learning_rate": 7.390075239657157e-06, "loss": 0.3793, "step": 23177 }, { "epoch": 1.06365013078794, "grad_norm": 0.4613303542137146, "learning_rate": 7.389859875581793e-06, "loss": 0.3543, "step": 23178 }, { "epoch": 1.0636960212931945, "grad_norm": 0.4703523516654968, "learning_rate": 7.389644505759568e-06, "loss": 0.3624, "step": 23179 }, { "epoch": 1.063741911798449, "grad_norm": 0.5201200842857361, "learning_rate": 7.389429130191003e-06, "loss": 0.4358, "step": 23180 }, { "epoch": 1.0637878023037033, "grad_norm": 0.4796191453933716, "learning_rate": 7.389213748876614e-06, "loss": 0.368, "step": 23181 }, { "epoch": 1.0638336928089578, "grad_norm": 0.4687592089176178, "learning_rate": 7.388998361816921e-06, "loss": 0.389, "step": 23182 }, { "epoch": 1.0638795833142123, "grad_norm": 0.4758653938770294, "learning_rate": 7.388782969012441e-06, "loss": 0.3567, "step": 23183 }, { "epoch": 1.0639254738194668, "grad_norm": 0.42405059933662415, "learning_rate": 7.38856757046369e-06, "loss": 0.2867, "step": 23184 }, { "epoch": 1.0639713643247213, "grad_norm": 0.4148804247379303, "learning_rate": 7.38835216617119e-06, "loss": 0.2969, "step": 23185 }, { "epoch": 1.0640172548299758, "grad_norm": 0.462131142616272, "learning_rate": 7.388136756135457e-06, "loss": 0.3495, "step": 23186 }, { "epoch": 1.06406314533523, "grad_norm": 0.4592922329902649, "learning_rate": 7.387921340357007e-06, "loss": 0.3682, "step": 23187 }, { "epoch": 1.0641090358404846, "grad_norm": 0.47476041316986084, "learning_rate": 7.387705918836361e-06, "loss": 0.3651, "step": 23188 }, { "epoch": 1.064154926345739, "grad_norm": 0.4562109112739563, "learning_rate": 7.387490491574036e-06, "loss": 0.3206, "step": 23189 }, { "epoch": 1.0642008168509935, "grad_norm": 0.41860273480415344, "learning_rate": 7.387275058570549e-06, "loss": 0.3276, "step": 23190 }, { "epoch": 1.064246707356248, "grad_norm": 0.513583779335022, "learning_rate": 7.38705961982642e-06, "loss": 0.3918, "step": 23191 }, { "epoch": 1.0642925978615025, "grad_norm": 0.49761083722114563, "learning_rate": 7.386844175342165e-06, "loss": 0.3871, "step": 23192 }, { "epoch": 1.0643384883667568, "grad_norm": 0.48394280672073364, "learning_rate": 7.386628725118305e-06, "loss": 0.3469, "step": 23193 }, { "epoch": 1.0643843788720113, "grad_norm": 0.4566066861152649, "learning_rate": 7.386413269155353e-06, "loss": 0.3136, "step": 23194 }, { "epoch": 1.0644302693772658, "grad_norm": 0.4547611176967621, "learning_rate": 7.386197807453832e-06, "loss": 0.3344, "step": 23195 }, { "epoch": 1.0644761598825203, "grad_norm": 0.46940499544143677, "learning_rate": 7.385982340014258e-06, "loss": 0.3571, "step": 23196 }, { "epoch": 1.0645220503877748, "grad_norm": 0.42680832743644714, "learning_rate": 7.385766866837151e-06, "loss": 0.3315, "step": 23197 }, { "epoch": 1.0645679408930293, "grad_norm": 0.4713243246078491, "learning_rate": 7.385551387923025e-06, "loss": 0.3538, "step": 23198 }, { "epoch": 1.0646138313982836, "grad_norm": 0.46376854181289673, "learning_rate": 7.385335903272404e-06, "loss": 0.3255, "step": 23199 }, { "epoch": 1.064659721903538, "grad_norm": 0.4650602340698242, "learning_rate": 7.385120412885801e-06, "loss": 0.3971, "step": 23200 }, { "epoch": 1.0647056124087926, "grad_norm": 0.5004786252975464, "learning_rate": 7.384904916763736e-06, "loss": 0.3874, "step": 23201 }, { "epoch": 1.064751502914047, "grad_norm": 0.471303790807724, "learning_rate": 7.384689414906727e-06, "loss": 0.3913, "step": 23202 }, { "epoch": 1.0647973934193016, "grad_norm": 0.49919578433036804, "learning_rate": 7.384473907315295e-06, "loss": 0.3896, "step": 23203 }, { "epoch": 1.064843283924556, "grad_norm": 0.4801212251186371, "learning_rate": 7.384258393989954e-06, "loss": 0.4366, "step": 23204 }, { "epoch": 1.0648891744298106, "grad_norm": 0.4881910979747772, "learning_rate": 7.384042874931224e-06, "loss": 0.4146, "step": 23205 }, { "epoch": 1.0649350649350648, "grad_norm": 0.4771069586277008, "learning_rate": 7.383827350139624e-06, "loss": 0.35, "step": 23206 }, { "epoch": 1.0649809554403193, "grad_norm": 0.4326435327529907, "learning_rate": 7.383611819615671e-06, "loss": 0.2743, "step": 23207 }, { "epoch": 1.0650268459455738, "grad_norm": 0.49072134494781494, "learning_rate": 7.383396283359883e-06, "loss": 0.4059, "step": 23208 }, { "epoch": 1.0650727364508283, "grad_norm": 0.4681280255317688, "learning_rate": 7.383180741372781e-06, "loss": 0.3581, "step": 23209 }, { "epoch": 1.0651186269560828, "grad_norm": 0.42797401547431946, "learning_rate": 7.382965193654881e-06, "loss": 0.3102, "step": 23210 }, { "epoch": 1.0651645174613373, "grad_norm": 0.46155017614364624, "learning_rate": 7.3827496402067024e-06, "loss": 0.3426, "step": 23211 }, { "epoch": 1.0652104079665916, "grad_norm": 0.46186041831970215, "learning_rate": 7.3825340810287605e-06, "loss": 0.3193, "step": 23212 }, { "epoch": 1.065256298471846, "grad_norm": 0.4316633343696594, "learning_rate": 7.382318516121578e-06, "loss": 0.3385, "step": 23213 }, { "epoch": 1.0653021889771006, "grad_norm": 0.469899445772171, "learning_rate": 7.382102945485671e-06, "loss": 0.3812, "step": 23214 }, { "epoch": 1.065348079482355, "grad_norm": 0.44728755950927734, "learning_rate": 7.381887369121558e-06, "loss": 0.3416, "step": 23215 }, { "epoch": 1.0653939699876096, "grad_norm": 0.46301066875457764, "learning_rate": 7.381671787029758e-06, "loss": 0.3351, "step": 23216 }, { "epoch": 1.065439860492864, "grad_norm": 0.49562713503837585, "learning_rate": 7.381456199210789e-06, "loss": 0.2947, "step": 23217 }, { "epoch": 1.0654857509981186, "grad_norm": 0.4708433747291565, "learning_rate": 7.381240605665169e-06, "loss": 0.388, "step": 23218 }, { "epoch": 1.0655316415033729, "grad_norm": 0.4784267842769623, "learning_rate": 7.381025006393417e-06, "loss": 0.3432, "step": 23219 }, { "epoch": 1.0655775320086274, "grad_norm": 0.4662189185619354, "learning_rate": 7.380809401396052e-06, "loss": 0.3451, "step": 23220 }, { "epoch": 1.0656234225138819, "grad_norm": 0.5102803111076355, "learning_rate": 7.380593790673592e-06, "loss": 0.4064, "step": 23221 }, { "epoch": 1.0656693130191364, "grad_norm": 0.46173325181007385, "learning_rate": 7.380378174226554e-06, "loss": 0.2801, "step": 23222 }, { "epoch": 1.0657152035243909, "grad_norm": 0.4564485251903534, "learning_rate": 7.380162552055458e-06, "loss": 0.3569, "step": 23223 }, { "epoch": 1.0657610940296454, "grad_norm": 0.44951969385147095, "learning_rate": 7.3799469241608226e-06, "loss": 0.346, "step": 23224 }, { "epoch": 1.0658069845348996, "grad_norm": 0.4897228181362152, "learning_rate": 7.3797312905431654e-06, "loss": 0.3954, "step": 23225 }, { "epoch": 1.0658528750401541, "grad_norm": 0.4838710427284241, "learning_rate": 7.379515651203008e-06, "loss": 0.3318, "step": 23226 }, { "epoch": 1.0658987655454086, "grad_norm": 0.4566514194011688, "learning_rate": 7.379300006140863e-06, "loss": 0.3592, "step": 23227 }, { "epoch": 1.0659446560506631, "grad_norm": 0.5168399810791016, "learning_rate": 7.379084355357254e-06, "loss": 0.4536, "step": 23228 }, { "epoch": 1.0659905465559176, "grad_norm": 0.47856512665748596, "learning_rate": 7.378868698852698e-06, "loss": 0.3697, "step": 23229 }, { "epoch": 1.0660364370611721, "grad_norm": 0.5143166780471802, "learning_rate": 7.378653036627713e-06, "loss": 0.2195, "step": 23230 }, { "epoch": 1.0660823275664266, "grad_norm": 0.4776109755039215, "learning_rate": 7.378437368682819e-06, "loss": 0.3867, "step": 23231 }, { "epoch": 1.066128218071681, "grad_norm": 0.45903000235557556, "learning_rate": 7.3782216950185335e-06, "loss": 0.3569, "step": 23232 }, { "epoch": 1.0661741085769354, "grad_norm": 0.4664532244205475, "learning_rate": 7.378006015635375e-06, "loss": 0.3261, "step": 23233 }, { "epoch": 1.06621999908219, "grad_norm": 0.4476882517337799, "learning_rate": 7.377790330533864e-06, "loss": 0.3163, "step": 23234 }, { "epoch": 1.0662658895874444, "grad_norm": 0.4685955345630646, "learning_rate": 7.377574639714515e-06, "loss": 0.3792, "step": 23235 }, { "epoch": 1.066311780092699, "grad_norm": 0.4703274965286255, "learning_rate": 7.377358943177851e-06, "loss": 0.3535, "step": 23236 }, { "epoch": 1.0663576705979532, "grad_norm": 0.4951554238796234, "learning_rate": 7.377143240924392e-06, "loss": 0.3788, "step": 23237 }, { "epoch": 1.0664035611032077, "grad_norm": 0.5084136724472046, "learning_rate": 7.376927532954649e-06, "loss": 0.3986, "step": 23238 }, { "epoch": 1.0664494516084622, "grad_norm": 0.512022078037262, "learning_rate": 7.376711819269147e-06, "loss": 0.4653, "step": 23239 }, { "epoch": 1.0664953421137167, "grad_norm": 0.4714144766330719, "learning_rate": 7.376496099868404e-06, "loss": 0.371, "step": 23240 }, { "epoch": 1.0665412326189712, "grad_norm": 0.4414292275905609, "learning_rate": 7.376280374752936e-06, "loss": 0.2868, "step": 23241 }, { "epoch": 1.0665871231242257, "grad_norm": 0.4290190041065216, "learning_rate": 7.376064643923266e-06, "loss": 0.2913, "step": 23242 }, { "epoch": 1.0666330136294802, "grad_norm": 0.4632873833179474, "learning_rate": 7.37584890737991e-06, "loss": 0.3501, "step": 23243 }, { "epoch": 1.0666789041347344, "grad_norm": 0.4805276691913605, "learning_rate": 7.375633165123386e-06, "loss": 0.3605, "step": 23244 }, { "epoch": 1.066724794639989, "grad_norm": 0.4731072783470154, "learning_rate": 7.375417417154216e-06, "loss": 0.3668, "step": 23245 }, { "epoch": 1.0667706851452434, "grad_norm": 0.5391817688941956, "learning_rate": 7.375201663472915e-06, "loss": 0.4932, "step": 23246 }, { "epoch": 1.066816575650498, "grad_norm": 0.43861159682273865, "learning_rate": 7.374985904080003e-06, "loss": 0.3121, "step": 23247 }, { "epoch": 1.0668624661557524, "grad_norm": 0.4650154411792755, "learning_rate": 7.374770138976002e-06, "loss": 0.3502, "step": 23248 }, { "epoch": 1.066908356661007, "grad_norm": 0.4767824411392212, "learning_rate": 7.374554368161426e-06, "loss": 0.3588, "step": 23249 }, { "epoch": 1.0669542471662612, "grad_norm": 0.435228556394577, "learning_rate": 7.3743385916367974e-06, "loss": 0.2959, "step": 23250 }, { "epoch": 1.0670001376715157, "grad_norm": 0.44154050946235657, "learning_rate": 7.374122809402634e-06, "loss": 0.3001, "step": 23251 }, { "epoch": 1.0670460281767702, "grad_norm": 0.4635991156101227, "learning_rate": 7.373907021459454e-06, "loss": 0.344, "step": 23252 }, { "epoch": 1.0670919186820247, "grad_norm": 0.44885897636413574, "learning_rate": 7.373691227807778e-06, "loss": 0.3506, "step": 23253 }, { "epoch": 1.0671378091872792, "grad_norm": 0.4561697840690613, "learning_rate": 7.373475428448123e-06, "loss": 0.3225, "step": 23254 }, { "epoch": 1.0671836996925337, "grad_norm": 0.4934261739253998, "learning_rate": 7.373259623381007e-06, "loss": 0.3946, "step": 23255 }, { "epoch": 1.0672295901977882, "grad_norm": 0.5005345940589905, "learning_rate": 7.373043812606951e-06, "loss": 0.3925, "step": 23256 }, { "epoch": 1.0672754807030425, "grad_norm": 0.47204315662384033, "learning_rate": 7.372827996126476e-06, "loss": 0.3793, "step": 23257 }, { "epoch": 1.067321371208297, "grad_norm": 0.4954288601875305, "learning_rate": 7.372612173940097e-06, "loss": 0.3716, "step": 23258 }, { "epoch": 1.0673672617135515, "grad_norm": 0.5067251324653625, "learning_rate": 7.372396346048335e-06, "loss": 0.3958, "step": 23259 }, { "epoch": 1.067413152218806, "grad_norm": 0.48203274607658386, "learning_rate": 7.372180512451709e-06, "loss": 0.2547, "step": 23260 }, { "epoch": 1.0674590427240604, "grad_norm": 0.4858340919017792, "learning_rate": 7.371964673150735e-06, "loss": 0.3537, "step": 23261 }, { "epoch": 1.067504933229315, "grad_norm": 0.47714370489120483, "learning_rate": 7.371748828145937e-06, "loss": 0.3513, "step": 23262 }, { "epoch": 1.0675508237345692, "grad_norm": 0.4616807699203491, "learning_rate": 7.371532977437831e-06, "loss": 0.3211, "step": 23263 }, { "epoch": 1.0675967142398237, "grad_norm": 0.44778233766555786, "learning_rate": 7.371317121026937e-06, "loss": 0.3084, "step": 23264 }, { "epoch": 1.0676426047450782, "grad_norm": 0.4856511652469635, "learning_rate": 7.371101258913775e-06, "loss": 0.3442, "step": 23265 }, { "epoch": 1.0676884952503327, "grad_norm": 0.49679791927337646, "learning_rate": 7.37088539109886e-06, "loss": 0.398, "step": 23266 }, { "epoch": 1.0677343857555872, "grad_norm": 0.4765186011791229, "learning_rate": 7.370669517582714e-06, "loss": 0.357, "step": 23267 }, { "epoch": 1.0677802762608417, "grad_norm": 0.5197485089302063, "learning_rate": 7.370453638365858e-06, "loss": 0.3834, "step": 23268 }, { "epoch": 1.0678261667660962, "grad_norm": 0.5028704404830933, "learning_rate": 7.370237753448808e-06, "loss": 0.4068, "step": 23269 }, { "epoch": 1.0678720572713505, "grad_norm": 0.44178977608680725, "learning_rate": 7.370021862832084e-06, "loss": 0.2785, "step": 23270 }, { "epoch": 1.067917947776605, "grad_norm": 0.44496190547943115, "learning_rate": 7.369805966516207e-06, "loss": 0.3172, "step": 23271 }, { "epoch": 1.0679638382818595, "grad_norm": 0.4807088375091553, "learning_rate": 7.369590064501693e-06, "loss": 0.4275, "step": 23272 }, { "epoch": 1.068009728787114, "grad_norm": 0.4490721821784973, "learning_rate": 7.369374156789063e-06, "loss": 0.3447, "step": 23273 }, { "epoch": 1.0680556192923685, "grad_norm": 0.48562443256378174, "learning_rate": 7.369158243378836e-06, "loss": 0.3905, "step": 23274 }, { "epoch": 1.068101509797623, "grad_norm": 0.4537728428840637, "learning_rate": 7.368942324271532e-06, "loss": 0.3391, "step": 23275 }, { "epoch": 1.0681474003028772, "grad_norm": 0.48548364639282227, "learning_rate": 7.368726399467668e-06, "loss": 0.3217, "step": 23276 }, { "epoch": 1.0681932908081317, "grad_norm": 0.4656364321708679, "learning_rate": 7.368510468967767e-06, "loss": 0.3615, "step": 23277 }, { "epoch": 1.0682391813133862, "grad_norm": 0.4781460464000702, "learning_rate": 7.368294532772344e-06, "loss": 0.3479, "step": 23278 }, { "epoch": 1.0682850718186407, "grad_norm": 0.4770740568637848, "learning_rate": 7.36807859088192e-06, "loss": 0.382, "step": 23279 }, { "epoch": 1.0683309623238952, "grad_norm": 0.5299501419067383, "learning_rate": 7.367862643297015e-06, "loss": 0.4021, "step": 23280 }, { "epoch": 1.0683768528291497, "grad_norm": 0.4940410554409027, "learning_rate": 7.367646690018146e-06, "loss": 0.3752, "step": 23281 }, { "epoch": 1.068422743334404, "grad_norm": 0.4970751106739044, "learning_rate": 7.367430731045837e-06, "loss": 0.405, "step": 23282 }, { "epoch": 1.0684686338396585, "grad_norm": 0.6587731242179871, "learning_rate": 7.3672147663806015e-06, "loss": 0.4599, "step": 23283 }, { "epoch": 1.068514524344913, "grad_norm": 0.46962442994117737, "learning_rate": 7.366998796022962e-06, "loss": 0.3366, "step": 23284 }, { "epoch": 1.0685604148501675, "grad_norm": 0.4870610535144806, "learning_rate": 7.366782819973439e-06, "loss": 0.3511, "step": 23285 }, { "epoch": 1.068606305355422, "grad_norm": 0.4577733278274536, "learning_rate": 7.36656683823255e-06, "loss": 0.3258, "step": 23286 }, { "epoch": 1.0686521958606765, "grad_norm": 0.4981312155723572, "learning_rate": 7.3663508508008134e-06, "loss": 0.4167, "step": 23287 }, { "epoch": 1.0686980863659308, "grad_norm": 0.48805591464042664, "learning_rate": 7.366134857678751e-06, "loss": 0.3602, "step": 23288 }, { "epoch": 1.0687439768711853, "grad_norm": 0.46588394045829773, "learning_rate": 7.36591885886688e-06, "loss": 0.3851, "step": 23289 }, { "epoch": 1.0687898673764398, "grad_norm": 0.4560999274253845, "learning_rate": 7.365702854365721e-06, "loss": 0.3214, "step": 23290 }, { "epoch": 1.0688357578816943, "grad_norm": 0.521303117275238, "learning_rate": 7.365486844175794e-06, "loss": 0.398, "step": 23291 }, { "epoch": 1.0688816483869488, "grad_norm": 0.46699458360671997, "learning_rate": 7.365270828297618e-06, "loss": 0.3693, "step": 23292 }, { "epoch": 1.0689275388922033, "grad_norm": 0.43613696098327637, "learning_rate": 7.365054806731712e-06, "loss": 0.287, "step": 23293 }, { "epoch": 1.0689734293974578, "grad_norm": 0.47623035311698914, "learning_rate": 7.364838779478594e-06, "loss": 0.369, "step": 23294 }, { "epoch": 1.069019319902712, "grad_norm": 0.4262254536151886, "learning_rate": 7.3646227465387865e-06, "loss": 0.3035, "step": 23295 }, { "epoch": 1.0690652104079665, "grad_norm": 0.48829540610313416, "learning_rate": 7.364406707912808e-06, "loss": 0.4084, "step": 23296 }, { "epoch": 1.069111100913221, "grad_norm": 0.4771776795387268, "learning_rate": 7.364190663601178e-06, "loss": 0.371, "step": 23297 }, { "epoch": 1.0691569914184755, "grad_norm": 0.44669926166534424, "learning_rate": 7.363974613604413e-06, "loss": 0.28, "step": 23298 }, { "epoch": 1.06920288192373, "grad_norm": 0.5774834156036377, "learning_rate": 7.363758557923037e-06, "loss": 0.2672, "step": 23299 }, { "epoch": 1.0692487724289845, "grad_norm": 0.48455867171287537, "learning_rate": 7.363542496557568e-06, "loss": 0.4179, "step": 23300 }, { "epoch": 1.0692946629342388, "grad_norm": 0.44213566184043884, "learning_rate": 7.363326429508524e-06, "loss": 0.3204, "step": 23301 }, { "epoch": 1.0693405534394933, "grad_norm": 0.49709513783454895, "learning_rate": 7.3631103567764266e-06, "loss": 0.3919, "step": 23302 }, { "epoch": 1.0693864439447478, "grad_norm": 0.5035470128059387, "learning_rate": 7.362894278361795e-06, "loss": 0.4034, "step": 23303 }, { "epoch": 1.0694323344500023, "grad_norm": 0.46188509464263916, "learning_rate": 7.362678194265147e-06, "loss": 0.2875, "step": 23304 }, { "epoch": 1.0694782249552568, "grad_norm": 0.5251035094261169, "learning_rate": 7.362462104487005e-06, "loss": 0.4345, "step": 23305 }, { "epoch": 1.0695241154605113, "grad_norm": 0.48968422412872314, "learning_rate": 7.362246009027887e-06, "loss": 0.3836, "step": 23306 }, { "epoch": 1.0695700059657658, "grad_norm": 0.46700963377952576, "learning_rate": 7.362029907888313e-06, "loss": 0.3548, "step": 23307 }, { "epoch": 1.06961589647102, "grad_norm": 0.49318668246269226, "learning_rate": 7.361813801068802e-06, "loss": 0.3353, "step": 23308 }, { "epoch": 1.0696617869762746, "grad_norm": 0.4985136091709137, "learning_rate": 7.3615976885698745e-06, "loss": 0.367, "step": 23309 }, { "epoch": 1.069707677481529, "grad_norm": 0.4702286422252655, "learning_rate": 7.36138157039205e-06, "loss": 0.3426, "step": 23310 }, { "epoch": 1.0697535679867836, "grad_norm": 0.45572763681411743, "learning_rate": 7.361165446535848e-06, "loss": 0.3397, "step": 23311 }, { "epoch": 1.069799458492038, "grad_norm": 0.443445086479187, "learning_rate": 7.360949317001787e-06, "loss": 0.3118, "step": 23312 }, { "epoch": 1.0698453489972926, "grad_norm": 0.48508909344673157, "learning_rate": 7.360733181790391e-06, "loss": 0.4216, "step": 23313 }, { "epoch": 1.0698912395025468, "grad_norm": 0.4612192213535309, "learning_rate": 7.360517040902176e-06, "loss": 0.3493, "step": 23314 }, { "epoch": 1.0699371300078013, "grad_norm": 0.47229692339897156, "learning_rate": 7.36030089433766e-06, "loss": 0.3696, "step": 23315 }, { "epoch": 1.0699830205130558, "grad_norm": 0.4459943175315857, "learning_rate": 7.360084742097367e-06, "loss": 0.3022, "step": 23316 }, { "epoch": 1.0700289110183103, "grad_norm": 0.451636403799057, "learning_rate": 7.359868584181815e-06, "loss": 0.3013, "step": 23317 }, { "epoch": 1.0700748015235648, "grad_norm": 0.49632880091667175, "learning_rate": 7.3596524205915236e-06, "loss": 0.3557, "step": 23318 }, { "epoch": 1.0701206920288193, "grad_norm": 0.48757219314575195, "learning_rate": 7.359436251327014e-06, "loss": 0.408, "step": 23319 }, { "epoch": 1.0701665825340738, "grad_norm": 0.4749176800251007, "learning_rate": 7.3592200763888035e-06, "loss": 0.3829, "step": 23320 }, { "epoch": 1.070212473039328, "grad_norm": 0.4757743179798126, "learning_rate": 7.359003895777412e-06, "loss": 0.3764, "step": 23321 }, { "epoch": 1.0702583635445826, "grad_norm": 0.47482824325561523, "learning_rate": 7.358787709493364e-06, "loss": 0.352, "step": 23322 }, { "epoch": 1.070304254049837, "grad_norm": 0.4894445836544037, "learning_rate": 7.358571517537174e-06, "loss": 0.4084, "step": 23323 }, { "epoch": 1.0703501445550916, "grad_norm": 0.44325003027915955, "learning_rate": 7.358355319909365e-06, "loss": 0.3496, "step": 23324 }, { "epoch": 1.070396035060346, "grad_norm": 0.4937640428543091, "learning_rate": 7.358139116610454e-06, "loss": 0.3694, "step": 23325 }, { "epoch": 1.0704419255656004, "grad_norm": 0.4893208146095276, "learning_rate": 7.357922907640964e-06, "loss": 0.3918, "step": 23326 }, { "epoch": 1.0704878160708549, "grad_norm": 0.4667434096336365, "learning_rate": 7.357706693001414e-06, "loss": 0.342, "step": 23327 }, { "epoch": 1.0705337065761094, "grad_norm": 0.4757764935493469, "learning_rate": 7.357490472692323e-06, "loss": 0.3533, "step": 23328 }, { "epoch": 1.0705795970813639, "grad_norm": 0.4753258228302002, "learning_rate": 7.357274246714211e-06, "loss": 0.3798, "step": 23329 }, { "epoch": 1.0706254875866184, "grad_norm": 0.43887829780578613, "learning_rate": 7.357058015067599e-06, "loss": 0.3017, "step": 23330 }, { "epoch": 1.0706713780918728, "grad_norm": 0.49059784412384033, "learning_rate": 7.356841777753005e-06, "loss": 0.3649, "step": 23331 }, { "epoch": 1.0707172685971273, "grad_norm": 0.4883278012275696, "learning_rate": 7.356625534770951e-06, "loss": 0.3915, "step": 23332 }, { "epoch": 1.0707631591023816, "grad_norm": 0.46442916989326477, "learning_rate": 7.356409286121957e-06, "loss": 0.3725, "step": 23333 }, { "epoch": 1.0708090496076361, "grad_norm": 0.47397908568382263, "learning_rate": 7.356193031806542e-06, "loss": 0.3654, "step": 23334 }, { "epoch": 1.0708549401128906, "grad_norm": 0.490227073431015, "learning_rate": 7.355976771825227e-06, "loss": 0.334, "step": 23335 }, { "epoch": 1.0709008306181451, "grad_norm": 0.47585251927375793, "learning_rate": 7.355760506178532e-06, "loss": 0.3221, "step": 23336 }, { "epoch": 1.0709467211233996, "grad_norm": 0.45301082730293274, "learning_rate": 7.355544234866975e-06, "loss": 0.3593, "step": 23337 }, { "epoch": 1.0709926116286541, "grad_norm": 0.4856165945529938, "learning_rate": 7.355327957891077e-06, "loss": 0.374, "step": 23338 }, { "epoch": 1.0710385021339084, "grad_norm": 0.5137026906013489, "learning_rate": 7.355111675251361e-06, "loss": 0.4345, "step": 23339 }, { "epoch": 1.0710843926391629, "grad_norm": 0.4870001971721649, "learning_rate": 7.354895386948343e-06, "loss": 0.4087, "step": 23340 }, { "epoch": 1.0711302831444174, "grad_norm": 0.41522011160850525, "learning_rate": 7.354679092982545e-06, "loss": 0.2561, "step": 23341 }, { "epoch": 1.0711761736496719, "grad_norm": 0.4955638647079468, "learning_rate": 7.354462793354488e-06, "loss": 0.3879, "step": 23342 }, { "epoch": 1.0712220641549264, "grad_norm": 0.44853171706199646, "learning_rate": 7.3542464880646895e-06, "loss": 0.343, "step": 23343 }, { "epoch": 1.0712679546601809, "grad_norm": 0.4849448800086975, "learning_rate": 7.3540301771136726e-06, "loss": 0.3392, "step": 23344 }, { "epoch": 1.0713138451654354, "grad_norm": 0.49431079626083374, "learning_rate": 7.353813860501955e-06, "loss": 0.3212, "step": 23345 }, { "epoch": 1.0713597356706897, "grad_norm": 0.4501889646053314, "learning_rate": 7.353597538230059e-06, "loss": 0.3338, "step": 23346 }, { "epoch": 1.0714056261759441, "grad_norm": 0.46526914834976196, "learning_rate": 7.353381210298503e-06, "loss": 0.3298, "step": 23347 }, { "epoch": 1.0714515166811986, "grad_norm": 0.47608691453933716, "learning_rate": 7.353164876707808e-06, "loss": 0.3106, "step": 23348 }, { "epoch": 1.0714974071864531, "grad_norm": 0.46031177043914795, "learning_rate": 7.352948537458493e-06, "loss": 0.3112, "step": 23349 }, { "epoch": 1.0715432976917076, "grad_norm": 0.539963960647583, "learning_rate": 7.352732192551081e-06, "loss": 0.3646, "step": 23350 }, { "epoch": 1.0715891881969621, "grad_norm": 0.5081068873405457, "learning_rate": 7.352515841986091e-06, "loss": 0.3758, "step": 23351 }, { "epoch": 1.0716350787022164, "grad_norm": 0.4645172357559204, "learning_rate": 7.35229948576404e-06, "loss": 0.3316, "step": 23352 }, { "epoch": 1.071680969207471, "grad_norm": 0.5112524628639221, "learning_rate": 7.352083123885455e-06, "loss": 0.4177, "step": 23353 }, { "epoch": 1.0717268597127254, "grad_norm": 0.4954569339752197, "learning_rate": 7.3518667563508495e-06, "loss": 0.3525, "step": 23354 }, { "epoch": 1.07177275021798, "grad_norm": 0.4343908429145813, "learning_rate": 7.351650383160748e-06, "loss": 0.3266, "step": 23355 }, { "epoch": 1.0718186407232344, "grad_norm": 0.47046366333961487, "learning_rate": 7.351434004315669e-06, "loss": 0.3291, "step": 23356 }, { "epoch": 1.071864531228489, "grad_norm": 0.5239343047142029, "learning_rate": 7.3512176198161345e-06, "loss": 0.387, "step": 23357 }, { "epoch": 1.0719104217337434, "grad_norm": 0.48323768377304077, "learning_rate": 7.351001229662664e-06, "loss": 0.3725, "step": 23358 }, { "epoch": 1.0719563122389977, "grad_norm": 0.5232014656066895, "learning_rate": 7.3507848338557755e-06, "loss": 0.4212, "step": 23359 }, { "epoch": 1.0720022027442522, "grad_norm": 0.42359331250190735, "learning_rate": 7.350568432395992e-06, "loss": 0.3072, "step": 23360 }, { "epoch": 1.0720480932495067, "grad_norm": 0.4872820973396301, "learning_rate": 7.350352025283833e-06, "loss": 0.3852, "step": 23361 }, { "epoch": 1.0720939837547612, "grad_norm": 0.5258241295814514, "learning_rate": 7.350135612519822e-06, "loss": 0.4357, "step": 23362 }, { "epoch": 1.0721398742600157, "grad_norm": 0.5280765891075134, "learning_rate": 7.3499191941044736e-06, "loss": 0.3844, "step": 23363 }, { "epoch": 1.0721857647652702, "grad_norm": 0.4724825620651245, "learning_rate": 7.349702770038312e-06, "loss": 0.3237, "step": 23364 }, { "epoch": 1.0722316552705244, "grad_norm": 0.5144622921943665, "learning_rate": 7.349486340321856e-06, "loss": 0.4142, "step": 23365 }, { "epoch": 1.072277545775779, "grad_norm": 0.4855806231498718, "learning_rate": 7.349269904955627e-06, "loss": 0.3988, "step": 23366 }, { "epoch": 1.0723234362810334, "grad_norm": 0.4775330126285553, "learning_rate": 7.349053463940146e-06, "loss": 0.3406, "step": 23367 }, { "epoch": 1.072369326786288, "grad_norm": 0.4438757300376892, "learning_rate": 7.3488370172759334e-06, "loss": 0.2928, "step": 23368 }, { "epoch": 1.0724152172915424, "grad_norm": 0.44960495829582214, "learning_rate": 7.348620564963509e-06, "loss": 0.3377, "step": 23369 }, { "epoch": 1.072461107796797, "grad_norm": 0.5064049363136292, "learning_rate": 7.348404107003393e-06, "loss": 0.4473, "step": 23370 }, { "epoch": 1.0725069983020512, "grad_norm": 0.4997260570526123, "learning_rate": 7.348187643396105e-06, "loss": 0.4296, "step": 23371 }, { "epoch": 1.0725528888073057, "grad_norm": 0.5003837943077087, "learning_rate": 7.34797117414217e-06, "loss": 0.3817, "step": 23372 }, { "epoch": 1.0725987793125602, "grad_norm": 0.4416740834712982, "learning_rate": 7.3477546992421035e-06, "loss": 0.2908, "step": 23373 }, { "epoch": 1.0726446698178147, "grad_norm": 0.4635377824306488, "learning_rate": 7.347538218696427e-06, "loss": 0.3774, "step": 23374 }, { "epoch": 1.0726905603230692, "grad_norm": 0.507420539855957, "learning_rate": 7.347321732505663e-06, "loss": 0.4512, "step": 23375 }, { "epoch": 1.0727364508283237, "grad_norm": 0.46507561206817627, "learning_rate": 7.34710524067033e-06, "loss": 0.3801, "step": 23376 }, { "epoch": 1.072782341333578, "grad_norm": 0.4488803744316101, "learning_rate": 7.346888743190951e-06, "loss": 0.3262, "step": 23377 }, { "epoch": 1.0728282318388325, "grad_norm": 0.500082790851593, "learning_rate": 7.346672240068044e-06, "loss": 0.4205, "step": 23378 }, { "epoch": 1.072874122344087, "grad_norm": 0.4862593412399292, "learning_rate": 7.346455731302134e-06, "loss": 0.4223, "step": 23379 }, { "epoch": 1.0729200128493415, "grad_norm": 0.4844341576099396, "learning_rate": 7.346239216893735e-06, "loss": 0.409, "step": 23380 }, { "epoch": 1.072965903354596, "grad_norm": 0.46021410822868347, "learning_rate": 7.346022696843373e-06, "loss": 0.3262, "step": 23381 }, { "epoch": 1.0730117938598505, "grad_norm": 0.46575766801834106, "learning_rate": 7.345806171151565e-06, "loss": 0.3295, "step": 23382 }, { "epoch": 1.073057684365105, "grad_norm": 0.4837310016155243, "learning_rate": 7.345589639818835e-06, "loss": 0.3943, "step": 23383 }, { "epoch": 1.0731035748703592, "grad_norm": 0.45006975531578064, "learning_rate": 7.345373102845703e-06, "loss": 0.2927, "step": 23384 }, { "epoch": 1.0731494653756137, "grad_norm": 0.4840408265590668, "learning_rate": 7.345156560232687e-06, "loss": 0.3894, "step": 23385 }, { "epoch": 1.0731953558808682, "grad_norm": 0.4357624053955078, "learning_rate": 7.34494001198031e-06, "loss": 0.3197, "step": 23386 }, { "epoch": 1.0732412463861227, "grad_norm": 0.48195356130599976, "learning_rate": 7.344723458089094e-06, "loss": 0.3612, "step": 23387 }, { "epoch": 1.0732871368913772, "grad_norm": 0.48933321237564087, "learning_rate": 7.344506898559556e-06, "loss": 0.3396, "step": 23388 }, { "epoch": 1.0733330273966317, "grad_norm": 0.4602680802345276, "learning_rate": 7.34429033339222e-06, "loss": 0.3529, "step": 23389 }, { "epoch": 1.073378917901886, "grad_norm": 0.46789172291755676, "learning_rate": 7.344073762587605e-06, "loss": 0.3431, "step": 23390 }, { "epoch": 1.0734248084071405, "grad_norm": 0.4876914918422699, "learning_rate": 7.343857186146234e-06, "loss": 0.3635, "step": 23391 }, { "epoch": 1.073470698912395, "grad_norm": 0.5024553537368774, "learning_rate": 7.343640604068625e-06, "loss": 0.3824, "step": 23392 }, { "epoch": 1.0735165894176495, "grad_norm": 0.48838523030281067, "learning_rate": 7.3434240163552996e-06, "loss": 0.3726, "step": 23393 }, { "epoch": 1.073562479922904, "grad_norm": 0.45308491587638855, "learning_rate": 7.3432074230067794e-06, "loss": 0.3031, "step": 23394 }, { "epoch": 1.0736083704281585, "grad_norm": 0.5249916911125183, "learning_rate": 7.342990824023586e-06, "loss": 0.4444, "step": 23395 }, { "epoch": 1.073654260933413, "grad_norm": 0.5153510570526123, "learning_rate": 7.342774219406238e-06, "loss": 0.4147, "step": 23396 }, { "epoch": 1.0737001514386673, "grad_norm": 0.41731610894203186, "learning_rate": 7.3425576091552566e-06, "loss": 0.3057, "step": 23397 }, { "epoch": 1.0737460419439218, "grad_norm": 0.48786604404449463, "learning_rate": 7.342340993271164e-06, "loss": 0.3932, "step": 23398 }, { "epoch": 1.0737919324491763, "grad_norm": 0.49588921666145325, "learning_rate": 7.342124371754481e-06, "loss": 0.4131, "step": 23399 }, { "epoch": 1.0738378229544308, "grad_norm": 0.4938793480396271, "learning_rate": 7.341907744605729e-06, "loss": 0.3637, "step": 23400 }, { "epoch": 1.0738837134596853, "grad_norm": 0.4338071346282959, "learning_rate": 7.341691111825428e-06, "loss": 0.3108, "step": 23401 }, { "epoch": 1.0739296039649397, "grad_norm": 0.4732199013233185, "learning_rate": 7.341474473414097e-06, "loss": 0.3732, "step": 23402 }, { "epoch": 1.073975494470194, "grad_norm": 0.4547850489616394, "learning_rate": 7.341257829372259e-06, "loss": 0.3491, "step": 23403 }, { "epoch": 1.0740213849754485, "grad_norm": 0.435747891664505, "learning_rate": 7.341041179700436e-06, "loss": 0.3368, "step": 23404 }, { "epoch": 1.074067275480703, "grad_norm": 0.4430619180202484, "learning_rate": 7.340824524399148e-06, "loss": 0.3187, "step": 23405 }, { "epoch": 1.0741131659859575, "grad_norm": 0.4889989197254181, "learning_rate": 7.340607863468916e-06, "loss": 0.3176, "step": 23406 }, { "epoch": 1.074159056491212, "grad_norm": 0.7272564172744751, "learning_rate": 7.3403911969102605e-06, "loss": 0.4506, "step": 23407 }, { "epoch": 1.0742049469964665, "grad_norm": 0.44565701484680176, "learning_rate": 7.340174524723702e-06, "loss": 0.3339, "step": 23408 }, { "epoch": 1.0742508375017208, "grad_norm": 0.4596360921859741, "learning_rate": 7.339957846909763e-06, "loss": 0.3508, "step": 23409 }, { "epoch": 1.0742967280069753, "grad_norm": 0.47095972299575806, "learning_rate": 7.339741163468963e-06, "loss": 0.3483, "step": 23410 }, { "epoch": 1.0743426185122298, "grad_norm": 0.4944157898426056, "learning_rate": 7.339524474401824e-06, "loss": 0.4121, "step": 23411 }, { "epoch": 1.0743885090174843, "grad_norm": 0.47528985142707825, "learning_rate": 7.33930777970887e-06, "loss": 0.3897, "step": 23412 }, { "epoch": 1.0744343995227388, "grad_norm": 0.4616160988807678, "learning_rate": 7.339091079390615e-06, "loss": 0.3756, "step": 23413 }, { "epoch": 1.0744802900279933, "grad_norm": 0.487370103597641, "learning_rate": 7.338874373447587e-06, "loss": 0.3881, "step": 23414 }, { "epoch": 1.0745261805332476, "grad_norm": 0.4908181130886078, "learning_rate": 7.338657661880303e-06, "loss": 0.343, "step": 23415 }, { "epoch": 1.074572071038502, "grad_norm": 0.4783722758293152, "learning_rate": 7.338440944689286e-06, "loss": 0.3586, "step": 23416 }, { "epoch": 1.0746179615437566, "grad_norm": 0.4488353431224823, "learning_rate": 7.338224221875055e-06, "loss": 0.3149, "step": 23417 }, { "epoch": 1.074663852049011, "grad_norm": 0.5053941607475281, "learning_rate": 7.338007493438135e-06, "loss": 0.3496, "step": 23418 }, { "epoch": 1.0747097425542655, "grad_norm": 0.45824456214904785, "learning_rate": 7.337790759379043e-06, "loss": 0.3635, "step": 23419 }, { "epoch": 1.07475563305952, "grad_norm": 0.48228734731674194, "learning_rate": 7.337574019698304e-06, "loss": 0.3798, "step": 23420 }, { "epoch": 1.0748015235647745, "grad_norm": 0.41317692399024963, "learning_rate": 7.337357274396435e-06, "loss": 0.294, "step": 23421 }, { "epoch": 1.0748474140700288, "grad_norm": 0.43934279680252075, "learning_rate": 7.3371405234739625e-06, "loss": 0.3138, "step": 23422 }, { "epoch": 1.0748933045752833, "grad_norm": 0.47413378953933716, "learning_rate": 7.336923766931401e-06, "loss": 0.3472, "step": 23423 }, { "epoch": 1.0749391950805378, "grad_norm": 0.45065662264823914, "learning_rate": 7.336707004769278e-06, "loss": 0.364, "step": 23424 }, { "epoch": 1.0749850855857923, "grad_norm": 0.4456998109817505, "learning_rate": 7.336490236988111e-06, "loss": 0.3365, "step": 23425 }, { "epoch": 1.0750309760910468, "grad_norm": 0.46001824736595154, "learning_rate": 7.336273463588424e-06, "loss": 0.3756, "step": 23426 }, { "epoch": 1.0750768665963013, "grad_norm": 0.4887087941169739, "learning_rate": 7.336056684570735e-06, "loss": 0.4226, "step": 23427 }, { "epoch": 1.0751227571015556, "grad_norm": 0.44517433643341064, "learning_rate": 7.335839899935566e-06, "loss": 0.3725, "step": 23428 }, { "epoch": 1.07516864760681, "grad_norm": 0.4728129208087921, "learning_rate": 7.335623109683442e-06, "loss": 0.2938, "step": 23429 }, { "epoch": 1.0752145381120646, "grad_norm": 0.49400508403778076, "learning_rate": 7.335406313814879e-06, "loss": 0.3927, "step": 23430 }, { "epoch": 1.075260428617319, "grad_norm": 0.46701493859291077, "learning_rate": 7.335189512330402e-06, "loss": 0.3265, "step": 23431 }, { "epoch": 1.0753063191225736, "grad_norm": 0.4558035433292389, "learning_rate": 7.334972705230532e-06, "loss": 0.3212, "step": 23432 }, { "epoch": 1.075352209627828, "grad_norm": 0.5196183919906616, "learning_rate": 7.334755892515789e-06, "loss": 0.4139, "step": 23433 }, { "epoch": 1.0753981001330826, "grad_norm": 0.5299758315086365, "learning_rate": 7.334539074186694e-06, "loss": 0.4258, "step": 23434 }, { "epoch": 1.0754439906383368, "grad_norm": 0.4955865442752838, "learning_rate": 7.33432225024377e-06, "loss": 0.3623, "step": 23435 }, { "epoch": 1.0754898811435913, "grad_norm": 0.4471342861652374, "learning_rate": 7.334105420687539e-06, "loss": 0.3618, "step": 23436 }, { "epoch": 1.0755357716488458, "grad_norm": 0.4782399833202362, "learning_rate": 7.333888585518519e-06, "loss": 0.3779, "step": 23437 }, { "epoch": 1.0755816621541003, "grad_norm": 0.4569876194000244, "learning_rate": 7.333671744737235e-06, "loss": 0.3417, "step": 23438 }, { "epoch": 1.0756275526593548, "grad_norm": 0.5006552338600159, "learning_rate": 7.333454898344207e-06, "loss": 0.4064, "step": 23439 }, { "epoch": 1.0756734431646093, "grad_norm": 0.45773035287857056, "learning_rate": 7.3332380463399555e-06, "loss": 0.3109, "step": 23440 }, { "epoch": 1.0757193336698636, "grad_norm": 0.535605788230896, "learning_rate": 7.333021188725003e-06, "loss": 0.4409, "step": 23441 }, { "epoch": 1.075765224175118, "grad_norm": 0.4890125095844269, "learning_rate": 7.33280432549987e-06, "loss": 0.3332, "step": 23442 }, { "epoch": 1.0758111146803726, "grad_norm": 0.439662367105484, "learning_rate": 7.332587456665081e-06, "loss": 0.3415, "step": 23443 }, { "epoch": 1.075857005185627, "grad_norm": 0.4420439898967743, "learning_rate": 7.332370582221156e-06, "loss": 0.3129, "step": 23444 }, { "epoch": 1.0759028956908816, "grad_norm": 0.46956875920295715, "learning_rate": 7.332153702168613e-06, "loss": 0.3504, "step": 23445 }, { "epoch": 1.075948786196136, "grad_norm": 0.48991408944129944, "learning_rate": 7.331936816507977e-06, "loss": 0.3784, "step": 23446 }, { "epoch": 1.0759946767013906, "grad_norm": 0.4563630521297455, "learning_rate": 7.331719925239769e-06, "loss": 0.3503, "step": 23447 }, { "epoch": 1.0760405672066449, "grad_norm": 0.4389912486076355, "learning_rate": 7.331503028364512e-06, "loss": 0.267, "step": 23448 }, { "epoch": 1.0760864577118994, "grad_norm": 0.45491331815719604, "learning_rate": 7.331286125882726e-06, "loss": 0.3094, "step": 23449 }, { "epoch": 1.0761323482171539, "grad_norm": 0.4311503469944, "learning_rate": 7.331069217794931e-06, "loss": 0.2999, "step": 23450 }, { "epoch": 1.0761782387224084, "grad_norm": 0.4511682689189911, "learning_rate": 7.330852304101651e-06, "loss": 0.3648, "step": 23451 }, { "epoch": 1.0762241292276629, "grad_norm": 0.48167118430137634, "learning_rate": 7.3306353848034076e-06, "loss": 0.403, "step": 23452 }, { "epoch": 1.0762700197329174, "grad_norm": 0.500676155090332, "learning_rate": 7.33041845990072e-06, "loss": 0.4133, "step": 23453 }, { "epoch": 1.0763159102381716, "grad_norm": 0.4766101539134979, "learning_rate": 7.330201529394113e-06, "loss": 0.4234, "step": 23454 }, { "epoch": 1.0763618007434261, "grad_norm": 0.5137669444084167, "learning_rate": 7.329984593284106e-06, "loss": 0.4474, "step": 23455 }, { "epoch": 1.0764076912486806, "grad_norm": 0.5414932370185852, "learning_rate": 7.329767651571221e-06, "loss": 0.4451, "step": 23456 }, { "epoch": 1.0764535817539351, "grad_norm": 0.4387247562408447, "learning_rate": 7.329550704255981e-06, "loss": 0.3385, "step": 23457 }, { "epoch": 1.0764994722591896, "grad_norm": 0.48682117462158203, "learning_rate": 7.329333751338906e-06, "loss": 0.3513, "step": 23458 }, { "epoch": 1.0765453627644441, "grad_norm": 0.5059272050857544, "learning_rate": 7.329116792820519e-06, "loss": 0.3711, "step": 23459 }, { "epoch": 1.0765912532696984, "grad_norm": 0.514149010181427, "learning_rate": 7.3288998287013425e-06, "loss": 0.424, "step": 23460 }, { "epoch": 1.076637143774953, "grad_norm": 0.5497069954872131, "learning_rate": 7.328682858981895e-06, "loss": 0.4766, "step": 23461 }, { "epoch": 1.0766830342802074, "grad_norm": 0.4689859449863434, "learning_rate": 7.328465883662701e-06, "loss": 0.3535, "step": 23462 }, { "epoch": 1.076728924785462, "grad_norm": 0.46419641375541687, "learning_rate": 7.328248902744281e-06, "loss": 0.3472, "step": 23463 }, { "epoch": 1.0767748152907164, "grad_norm": 0.5596047043800354, "learning_rate": 7.3280319162271574e-06, "loss": 0.4311, "step": 23464 }, { "epoch": 1.076820705795971, "grad_norm": 0.48338043689727783, "learning_rate": 7.327814924111852e-06, "loss": 0.3364, "step": 23465 }, { "epoch": 1.0768665963012252, "grad_norm": 0.4785359501838684, "learning_rate": 7.327597926398886e-06, "loss": 0.3199, "step": 23466 }, { "epoch": 1.0769124868064797, "grad_norm": 0.5092898607254028, "learning_rate": 7.327380923088782e-06, "loss": 0.4056, "step": 23467 }, { "epoch": 1.0769583773117342, "grad_norm": 0.4456356167793274, "learning_rate": 7.3271639141820605e-06, "loss": 0.3173, "step": 23468 }, { "epoch": 1.0770042678169887, "grad_norm": 0.5203625559806824, "learning_rate": 7.326946899679246e-06, "loss": 0.4053, "step": 23469 }, { "epoch": 1.0770501583222432, "grad_norm": 0.41401246190071106, "learning_rate": 7.326729879580856e-06, "loss": 0.3092, "step": 23470 }, { "epoch": 1.0770960488274977, "grad_norm": 0.5333976149559021, "learning_rate": 7.326512853887418e-06, "loss": 0.4498, "step": 23471 }, { "epoch": 1.0771419393327522, "grad_norm": 0.5234068036079407, "learning_rate": 7.326295822599449e-06, "loss": 0.3757, "step": 23472 }, { "epoch": 1.0771878298380064, "grad_norm": 0.4695976972579956, "learning_rate": 7.326078785717473e-06, "loss": 0.3348, "step": 23473 }, { "epoch": 1.077233720343261, "grad_norm": 0.4300691783428192, "learning_rate": 7.325861743242013e-06, "loss": 0.2506, "step": 23474 }, { "epoch": 1.0772796108485154, "grad_norm": 0.4487209916114807, "learning_rate": 7.325644695173587e-06, "loss": 0.3454, "step": 23475 }, { "epoch": 1.07732550135377, "grad_norm": 0.491038978099823, "learning_rate": 7.325427641512721e-06, "loss": 0.4187, "step": 23476 }, { "epoch": 1.0773713918590244, "grad_norm": 0.41606199741363525, "learning_rate": 7.325210582259936e-06, "loss": 0.2914, "step": 23477 }, { "epoch": 1.077417282364279, "grad_norm": 0.4685066044330597, "learning_rate": 7.324993517415753e-06, "loss": 0.32, "step": 23478 }, { "epoch": 1.0774631728695332, "grad_norm": 0.4569603204727173, "learning_rate": 7.324776446980694e-06, "loss": 0.3398, "step": 23479 }, { "epoch": 1.0775090633747877, "grad_norm": 0.4687044024467468, "learning_rate": 7.324559370955282e-06, "loss": 0.3435, "step": 23480 }, { "epoch": 1.0775549538800422, "grad_norm": 0.4422742426395416, "learning_rate": 7.324342289340036e-06, "loss": 0.3446, "step": 23481 }, { "epoch": 1.0776008443852967, "grad_norm": 0.4986693859100342, "learning_rate": 7.324125202135483e-06, "loss": 0.3877, "step": 23482 }, { "epoch": 1.0776467348905512, "grad_norm": 0.4634506404399872, "learning_rate": 7.323908109342143e-06, "loss": 0.352, "step": 23483 }, { "epoch": 1.0776926253958057, "grad_norm": 0.4681623578071594, "learning_rate": 7.3236910109605354e-06, "loss": 0.3214, "step": 23484 }, { "epoch": 1.0777385159010602, "grad_norm": 0.5375463962554932, "learning_rate": 7.323473906991184e-06, "loss": 0.3676, "step": 23485 }, { "epoch": 1.0777844064063145, "grad_norm": 0.4827241599559784, "learning_rate": 7.323256797434612e-06, "loss": 0.3741, "step": 23486 }, { "epoch": 1.077830296911569, "grad_norm": 0.6304171681404114, "learning_rate": 7.323039682291342e-06, "loss": 0.386, "step": 23487 }, { "epoch": 1.0778761874168235, "grad_norm": 0.45733290910720825, "learning_rate": 7.322822561561893e-06, "loss": 0.3448, "step": 23488 }, { "epoch": 1.077922077922078, "grad_norm": 0.4650786221027374, "learning_rate": 7.32260543524679e-06, "loss": 0.3259, "step": 23489 }, { "epoch": 1.0779679684273324, "grad_norm": 0.48441633582115173, "learning_rate": 7.3223883033465525e-06, "loss": 0.3737, "step": 23490 }, { "epoch": 1.078013858932587, "grad_norm": 0.5224981307983398, "learning_rate": 7.3221711658617055e-06, "loss": 0.4214, "step": 23491 }, { "epoch": 1.0780597494378412, "grad_norm": 0.44984105229377747, "learning_rate": 7.3219540227927684e-06, "loss": 0.2963, "step": 23492 }, { "epoch": 1.0781056399430957, "grad_norm": 0.44990360736846924, "learning_rate": 7.321736874140266e-06, "loss": 0.3261, "step": 23493 }, { "epoch": 1.0781515304483502, "grad_norm": 0.5042288899421692, "learning_rate": 7.3215197199047195e-06, "loss": 0.3794, "step": 23494 }, { "epoch": 1.0781974209536047, "grad_norm": 0.47461003065109253, "learning_rate": 7.3213025600866494e-06, "loss": 0.3524, "step": 23495 }, { "epoch": 1.0782433114588592, "grad_norm": 0.4568961560726166, "learning_rate": 7.321085394686579e-06, "loss": 0.3355, "step": 23496 }, { "epoch": 1.0782892019641137, "grad_norm": 0.4486820101737976, "learning_rate": 7.320868223705032e-06, "loss": 0.3247, "step": 23497 }, { "epoch": 1.078335092469368, "grad_norm": 0.5103617906570435, "learning_rate": 7.3206510471425294e-06, "loss": 0.4005, "step": 23498 }, { "epoch": 1.0783809829746225, "grad_norm": 0.44462844729423523, "learning_rate": 7.320433864999592e-06, "loss": 0.3251, "step": 23499 }, { "epoch": 1.078426873479877, "grad_norm": 0.4477587342262268, "learning_rate": 7.320216677276746e-06, "loss": 0.3073, "step": 23500 }, { "epoch": 1.0784727639851315, "grad_norm": 0.5054042935371399, "learning_rate": 7.319999483974509e-06, "loss": 0.4346, "step": 23501 }, { "epoch": 1.078518654490386, "grad_norm": 0.472153902053833, "learning_rate": 7.3197822850934055e-06, "loss": 0.3799, "step": 23502 }, { "epoch": 1.0785645449956405, "grad_norm": 0.4857749342918396, "learning_rate": 7.319565080633959e-06, "loss": 0.4292, "step": 23503 }, { "epoch": 1.0786104355008947, "grad_norm": 0.4642336964607239, "learning_rate": 7.31934787059669e-06, "loss": 0.3438, "step": 23504 }, { "epoch": 1.0786563260061492, "grad_norm": 0.46415457129478455, "learning_rate": 7.319130654982121e-06, "loss": 0.3545, "step": 23505 }, { "epoch": 1.0787022165114037, "grad_norm": 0.49333053827285767, "learning_rate": 7.318913433790775e-06, "loss": 0.4277, "step": 23506 }, { "epoch": 1.0787481070166582, "grad_norm": 0.4162762463092804, "learning_rate": 7.318696207023175e-06, "loss": 0.2788, "step": 23507 }, { "epoch": 1.0787939975219127, "grad_norm": 0.431846559047699, "learning_rate": 7.318478974679841e-06, "loss": 0.287, "step": 23508 }, { "epoch": 1.0788398880271672, "grad_norm": 0.4641336500644684, "learning_rate": 7.318261736761299e-06, "loss": 0.3394, "step": 23509 }, { "epoch": 1.0788857785324217, "grad_norm": 0.46669909358024597, "learning_rate": 7.318044493268066e-06, "loss": 0.3956, "step": 23510 }, { "epoch": 1.078931669037676, "grad_norm": 0.473905473947525, "learning_rate": 7.31782724420067e-06, "loss": 0.3526, "step": 23511 }, { "epoch": 1.0789775595429305, "grad_norm": 0.4364517331123352, "learning_rate": 7.31760998955963e-06, "loss": 0.3281, "step": 23512 }, { "epoch": 1.079023450048185, "grad_norm": 0.4526953101158142, "learning_rate": 7.317392729345468e-06, "loss": 0.3407, "step": 23513 }, { "epoch": 1.0790693405534395, "grad_norm": 0.4453414976596832, "learning_rate": 7.31717546355871e-06, "loss": 0.272, "step": 23514 }, { "epoch": 1.079115231058694, "grad_norm": 0.4721308946609497, "learning_rate": 7.3169581921998764e-06, "loss": 0.3851, "step": 23515 }, { "epoch": 1.0791611215639485, "grad_norm": 0.4728175401687622, "learning_rate": 7.316740915269489e-06, "loss": 0.3426, "step": 23516 }, { "epoch": 1.0792070120692028, "grad_norm": 0.4878484606742859, "learning_rate": 7.316523632768071e-06, "loss": 0.3817, "step": 23517 }, { "epoch": 1.0792529025744573, "grad_norm": 0.45411980152130127, "learning_rate": 7.316306344696145e-06, "loss": 0.3714, "step": 23518 }, { "epoch": 1.0792987930797118, "grad_norm": 0.5303005576133728, "learning_rate": 7.3160890510542325e-06, "loss": 0.408, "step": 23519 }, { "epoch": 1.0793446835849663, "grad_norm": 0.46261075139045715, "learning_rate": 7.315871751842858e-06, "loss": 0.3733, "step": 23520 }, { "epoch": 1.0793905740902208, "grad_norm": 0.5062291026115417, "learning_rate": 7.315654447062541e-06, "loss": 0.3927, "step": 23521 }, { "epoch": 1.0794364645954753, "grad_norm": 0.4767845571041107, "learning_rate": 7.315437136713808e-06, "loss": 0.3823, "step": 23522 }, { "epoch": 1.0794823551007298, "grad_norm": 0.450222373008728, "learning_rate": 7.315219820797177e-06, "loss": 0.3543, "step": 23523 }, { "epoch": 1.079528245605984, "grad_norm": 0.46025407314300537, "learning_rate": 7.315002499313175e-06, "loss": 0.3548, "step": 23524 }, { "epoch": 1.0795741361112385, "grad_norm": 0.448490172624588, "learning_rate": 7.314785172262322e-06, "loss": 0.3702, "step": 23525 }, { "epoch": 1.079620026616493, "grad_norm": 0.48036590218544006, "learning_rate": 7.314567839645141e-06, "loss": 0.4345, "step": 23526 }, { "epoch": 1.0796659171217475, "grad_norm": 0.48849359154701233, "learning_rate": 7.314350501462155e-06, "loss": 0.3666, "step": 23527 }, { "epoch": 1.079711807627002, "grad_norm": 0.45617103576660156, "learning_rate": 7.314133157713888e-06, "loss": 0.3784, "step": 23528 }, { "epoch": 1.0797576981322565, "grad_norm": 0.474863737821579, "learning_rate": 7.3139158084008585e-06, "loss": 0.2838, "step": 23529 }, { "epoch": 1.0798035886375108, "grad_norm": 0.4458044171333313, "learning_rate": 7.3136984535235944e-06, "loss": 0.2934, "step": 23530 }, { "epoch": 1.0798494791427653, "grad_norm": 0.4668932259082794, "learning_rate": 7.313481093082614e-06, "loss": 0.3329, "step": 23531 }, { "epoch": 1.0798953696480198, "grad_norm": 0.450481653213501, "learning_rate": 7.313263727078441e-06, "loss": 0.3177, "step": 23532 }, { "epoch": 1.0799412601532743, "grad_norm": 0.5127546191215515, "learning_rate": 7.3130463555116e-06, "loss": 0.3904, "step": 23533 }, { "epoch": 1.0799871506585288, "grad_norm": 0.5017886757850647, "learning_rate": 7.312828978382614e-06, "loss": 0.4091, "step": 23534 }, { "epoch": 1.0800330411637833, "grad_norm": 0.4631175696849823, "learning_rate": 7.312611595692e-06, "loss": 0.3542, "step": 23535 }, { "epoch": 1.0800789316690378, "grad_norm": 0.517154335975647, "learning_rate": 7.31239420744029e-06, "loss": 0.4125, "step": 23536 }, { "epoch": 1.080124822174292, "grad_norm": 1.5738763809204102, "learning_rate": 7.312176813627999e-06, "loss": 0.2992, "step": 23537 }, { "epoch": 1.0801707126795466, "grad_norm": 0.47340983152389526, "learning_rate": 7.311959414255652e-06, "loss": 0.3911, "step": 23538 }, { "epoch": 1.080216603184801, "grad_norm": 0.503952145576477, "learning_rate": 7.311742009323774e-06, "loss": 0.3455, "step": 23539 }, { "epoch": 1.0802624936900556, "grad_norm": 0.478572815656662, "learning_rate": 7.311524598832886e-06, "loss": 0.4146, "step": 23540 }, { "epoch": 1.08030838419531, "grad_norm": 0.4561272859573364, "learning_rate": 7.31130718278351e-06, "loss": 0.3475, "step": 23541 }, { "epoch": 1.0803542747005646, "grad_norm": 0.5038200616836548, "learning_rate": 7.311089761176171e-06, "loss": 0.3937, "step": 23542 }, { "epoch": 1.0804001652058188, "grad_norm": 0.4809851050376892, "learning_rate": 7.310872334011389e-06, "loss": 0.3744, "step": 23543 }, { "epoch": 1.0804460557110733, "grad_norm": 0.4719792306423187, "learning_rate": 7.310654901289688e-06, "loss": 0.3198, "step": 23544 }, { "epoch": 1.0804919462163278, "grad_norm": 0.4645216464996338, "learning_rate": 7.310437463011593e-06, "loss": 0.3731, "step": 23545 }, { "epoch": 1.0805378367215823, "grad_norm": 0.4579533338546753, "learning_rate": 7.310220019177623e-06, "loss": 0.3148, "step": 23546 }, { "epoch": 1.0805837272268368, "grad_norm": 0.4964037239551544, "learning_rate": 7.310002569788304e-06, "loss": 0.39, "step": 23547 }, { "epoch": 1.0806296177320913, "grad_norm": 0.5044741630554199, "learning_rate": 7.309785114844159e-06, "loss": 0.4585, "step": 23548 }, { "epoch": 1.0806755082373456, "grad_norm": 0.5061672329902649, "learning_rate": 7.309567654345708e-06, "loss": 0.3963, "step": 23549 }, { "epoch": 1.0807213987426, "grad_norm": 0.474551796913147, "learning_rate": 7.309350188293476e-06, "loss": 0.4297, "step": 23550 }, { "epoch": 1.0807672892478546, "grad_norm": 0.47430896759033203, "learning_rate": 7.309132716687987e-06, "loss": 0.3718, "step": 23551 }, { "epoch": 1.080813179753109, "grad_norm": 0.4745233356952667, "learning_rate": 7.308915239529761e-06, "loss": 0.3382, "step": 23552 }, { "epoch": 1.0808590702583636, "grad_norm": 0.47661593556404114, "learning_rate": 7.308697756819323e-06, "loss": 0.4067, "step": 23553 }, { "epoch": 1.080904960763618, "grad_norm": 0.45257216691970825, "learning_rate": 7.3084802685571965e-06, "loss": 0.3406, "step": 23554 }, { "epoch": 1.0809508512688724, "grad_norm": 0.4612959921360016, "learning_rate": 7.308262774743903e-06, "loss": 0.3741, "step": 23555 }, { "epoch": 1.0809967417741269, "grad_norm": 0.4456912875175476, "learning_rate": 7.308045275379966e-06, "loss": 0.3317, "step": 23556 }, { "epoch": 1.0810426322793814, "grad_norm": 0.4832839369773865, "learning_rate": 7.307827770465907e-06, "loss": 0.4407, "step": 23557 }, { "epoch": 1.0810885227846359, "grad_norm": 0.4708153307437897, "learning_rate": 7.307610260002254e-06, "loss": 0.3425, "step": 23558 }, { "epoch": 1.0811344132898904, "grad_norm": 0.4880259931087494, "learning_rate": 7.307392743989524e-06, "loss": 0.4463, "step": 23559 }, { "epoch": 1.0811803037951448, "grad_norm": 0.4488755464553833, "learning_rate": 7.307175222428243e-06, "loss": 0.3593, "step": 23560 }, { "epoch": 1.0812261943003993, "grad_norm": 0.47707399725914, "learning_rate": 7.306957695318933e-06, "loss": 0.3803, "step": 23561 }, { "epoch": 1.0812720848056536, "grad_norm": 0.4680439829826355, "learning_rate": 7.30674016266212e-06, "loss": 0.3612, "step": 23562 }, { "epoch": 1.0813179753109081, "grad_norm": 0.46313175559043884, "learning_rate": 7.306522624458325e-06, "loss": 0.3499, "step": 23563 }, { "epoch": 1.0813638658161626, "grad_norm": 0.49966713786125183, "learning_rate": 7.306305080708069e-06, "loss": 0.3349, "step": 23564 }, { "epoch": 1.0814097563214171, "grad_norm": 0.47009003162384033, "learning_rate": 7.306087531411878e-06, "loss": 0.3159, "step": 23565 }, { "epoch": 1.0814556468266716, "grad_norm": 0.4476785659790039, "learning_rate": 7.305869976570274e-06, "loss": 0.3382, "step": 23566 }, { "epoch": 1.081501537331926, "grad_norm": 0.4336528778076172, "learning_rate": 7.30565241618378e-06, "loss": 0.2819, "step": 23567 }, { "epoch": 1.0815474278371804, "grad_norm": 0.49274057149887085, "learning_rate": 7.305434850252922e-06, "loss": 0.3566, "step": 23568 }, { "epoch": 1.0815933183424349, "grad_norm": 0.45834195613861084, "learning_rate": 7.305217278778219e-06, "loss": 0.3699, "step": 23569 }, { "epoch": 1.0816392088476894, "grad_norm": 0.4983423352241516, "learning_rate": 7.304999701760196e-06, "loss": 0.4491, "step": 23570 }, { "epoch": 1.0816850993529439, "grad_norm": 0.46036314964294434, "learning_rate": 7.304782119199377e-06, "loss": 0.3869, "step": 23571 }, { "epoch": 1.0817309898581984, "grad_norm": 0.4685608744621277, "learning_rate": 7.304564531096283e-06, "loss": 0.4066, "step": 23572 }, { "epoch": 1.0817768803634529, "grad_norm": 0.43725624680519104, "learning_rate": 7.304346937451441e-06, "loss": 0.3169, "step": 23573 }, { "epoch": 1.0818227708687074, "grad_norm": 0.49379169940948486, "learning_rate": 7.30412933826537e-06, "loss": 0.3927, "step": 23574 }, { "epoch": 1.0818686613739616, "grad_norm": 0.44622644782066345, "learning_rate": 7.303911733538596e-06, "loss": 0.3102, "step": 23575 }, { "epoch": 1.0819145518792161, "grad_norm": 0.4554527997970581, "learning_rate": 7.303694123271641e-06, "loss": 0.3332, "step": 23576 }, { "epoch": 1.0819604423844706, "grad_norm": 0.4792640507221222, "learning_rate": 7.303476507465028e-06, "loss": 0.406, "step": 23577 }, { "epoch": 1.0820063328897251, "grad_norm": 0.5107975602149963, "learning_rate": 7.303258886119281e-06, "loss": 0.3778, "step": 23578 }, { "epoch": 1.0820522233949796, "grad_norm": 0.48638445138931274, "learning_rate": 7.303041259234925e-06, "loss": 0.4464, "step": 23579 }, { "epoch": 1.0820981139002341, "grad_norm": 0.48854225873947144, "learning_rate": 7.3028236268124805e-06, "loss": 0.427, "step": 23580 }, { "epoch": 1.0821440044054884, "grad_norm": 0.4725310504436493, "learning_rate": 7.302605988852471e-06, "loss": 0.3896, "step": 23581 }, { "epoch": 1.082189894910743, "grad_norm": 0.44566768407821655, "learning_rate": 7.302388345355424e-06, "loss": 0.3479, "step": 23582 }, { "epoch": 1.0822357854159974, "grad_norm": 0.47477608919143677, "learning_rate": 7.302170696321856e-06, "loss": 0.3574, "step": 23583 }, { "epoch": 1.082281675921252, "grad_norm": 0.4395079016685486, "learning_rate": 7.3019530417522965e-06, "loss": 0.336, "step": 23584 }, { "epoch": 1.0823275664265064, "grad_norm": 0.5206356048583984, "learning_rate": 7.3017353816472655e-06, "loss": 0.4842, "step": 23585 }, { "epoch": 1.082373456931761, "grad_norm": 0.46876588463783264, "learning_rate": 7.301517716007287e-06, "loss": 0.3225, "step": 23586 }, { "epoch": 1.0824193474370152, "grad_norm": 0.48180779814720154, "learning_rate": 7.301300044832886e-06, "loss": 0.3751, "step": 23587 }, { "epoch": 1.0824652379422697, "grad_norm": 0.4468536078929901, "learning_rate": 7.301082368124583e-06, "loss": 0.3188, "step": 23588 }, { "epoch": 1.0825111284475242, "grad_norm": 0.4918350577354431, "learning_rate": 7.3008646858829035e-06, "loss": 0.367, "step": 23589 }, { "epoch": 1.0825570189527787, "grad_norm": 0.44985541701316833, "learning_rate": 7.3006469981083716e-06, "loss": 0.3198, "step": 23590 }, { "epoch": 1.0826029094580332, "grad_norm": 0.41953182220458984, "learning_rate": 7.300429304801511e-06, "loss": 0.2652, "step": 23591 }, { "epoch": 1.0826487999632877, "grad_norm": 0.4844287037849426, "learning_rate": 7.3002116059628415e-06, "loss": 0.3564, "step": 23592 }, { "epoch": 1.082694690468542, "grad_norm": 0.49515190720558167, "learning_rate": 7.29999390159289e-06, "loss": 0.3691, "step": 23593 }, { "epoch": 1.0827405809737964, "grad_norm": 0.44641926884651184, "learning_rate": 7.299776191692178e-06, "loss": 0.3513, "step": 23594 }, { "epoch": 1.082786471479051, "grad_norm": 0.5262213945388794, "learning_rate": 7.299558476261232e-06, "loss": 0.4242, "step": 23595 }, { "epoch": 1.0828323619843054, "grad_norm": 0.4499502182006836, "learning_rate": 7.2993407553005725e-06, "loss": 0.3392, "step": 23596 }, { "epoch": 1.08287825248956, "grad_norm": 0.47274500131607056, "learning_rate": 7.299123028810724e-06, "loss": 0.3556, "step": 23597 }, { "epoch": 1.0829241429948144, "grad_norm": 0.48076656460762024, "learning_rate": 7.29890529679221e-06, "loss": 0.3323, "step": 23598 }, { "epoch": 1.082970033500069, "grad_norm": 0.46518760919570923, "learning_rate": 7.298687559245556e-06, "loss": 0.3637, "step": 23599 }, { "epoch": 1.0830159240053232, "grad_norm": 0.43218323588371277, "learning_rate": 7.298469816171282e-06, "loss": 0.3253, "step": 23600 }, { "epoch": 1.0830618145105777, "grad_norm": 0.4853629469871521, "learning_rate": 7.298252067569915e-06, "loss": 0.3984, "step": 23601 }, { "epoch": 1.0831077050158322, "grad_norm": 0.4271202087402344, "learning_rate": 7.298034313441977e-06, "loss": 0.3062, "step": 23602 }, { "epoch": 1.0831535955210867, "grad_norm": 0.4597027003765106, "learning_rate": 7.2978165537879906e-06, "loss": 0.3511, "step": 23603 }, { "epoch": 1.0831994860263412, "grad_norm": 0.4460557997226715, "learning_rate": 7.297598788608482e-06, "loss": 0.3364, "step": 23604 }, { "epoch": 1.0832453765315957, "grad_norm": 0.6168267726898193, "learning_rate": 7.297381017903972e-06, "loss": 0.3702, "step": 23605 }, { "epoch": 1.08329126703685, "grad_norm": 0.44970476627349854, "learning_rate": 7.297163241674986e-06, "loss": 0.3154, "step": 23606 }, { "epoch": 1.0833371575421045, "grad_norm": 0.4559820294380188, "learning_rate": 7.296945459922049e-06, "loss": 0.3322, "step": 23607 }, { "epoch": 1.083383048047359, "grad_norm": 0.4744173288345337, "learning_rate": 7.29672767264568e-06, "loss": 0.3614, "step": 23608 }, { "epoch": 1.0834289385526135, "grad_norm": 0.4712355136871338, "learning_rate": 7.296509879846408e-06, "loss": 0.3527, "step": 23609 }, { "epoch": 1.083474829057868, "grad_norm": 0.465330570936203, "learning_rate": 7.296292081524755e-06, "loss": 0.3605, "step": 23610 }, { "epoch": 1.0835207195631225, "grad_norm": 0.4512239098548889, "learning_rate": 7.296074277681243e-06, "loss": 0.3715, "step": 23611 }, { "epoch": 1.083566610068377, "grad_norm": 0.4709427058696747, "learning_rate": 7.295856468316398e-06, "loss": 0.3847, "step": 23612 }, { "epoch": 1.0836125005736312, "grad_norm": 0.48046132922172546, "learning_rate": 7.295638653430743e-06, "loss": 0.3941, "step": 23613 }, { "epoch": 1.0836583910788857, "grad_norm": 0.4573792517185211, "learning_rate": 7.2954208330247996e-06, "loss": 0.3504, "step": 23614 }, { "epoch": 1.0837042815841402, "grad_norm": 0.43712276220321655, "learning_rate": 7.295203007099094e-06, "loss": 0.2852, "step": 23615 }, { "epoch": 1.0837501720893947, "grad_norm": 0.4590231478214264, "learning_rate": 7.294985175654151e-06, "loss": 0.3622, "step": 23616 }, { "epoch": 1.0837960625946492, "grad_norm": 0.4467950463294983, "learning_rate": 7.2947673386904935e-06, "loss": 0.3175, "step": 23617 }, { "epoch": 1.0838419530999037, "grad_norm": 0.4304211437702179, "learning_rate": 7.294549496208643e-06, "loss": 0.2923, "step": 23618 }, { "epoch": 1.083887843605158, "grad_norm": 0.5015869736671448, "learning_rate": 7.294331648209127e-06, "loss": 0.4172, "step": 23619 }, { "epoch": 1.0839337341104125, "grad_norm": 0.4822033941745758, "learning_rate": 7.2941137946924656e-06, "loss": 0.3966, "step": 23620 }, { "epoch": 1.083979624615667, "grad_norm": 0.49326276779174805, "learning_rate": 7.2938959356591855e-06, "loss": 0.3851, "step": 23621 }, { "epoch": 1.0840255151209215, "grad_norm": 0.4699822664260864, "learning_rate": 7.293678071109808e-06, "loss": 0.3709, "step": 23622 }, { "epoch": 1.084071405626176, "grad_norm": 0.4431056082248688, "learning_rate": 7.293460201044862e-06, "loss": 0.2958, "step": 23623 }, { "epoch": 1.0841172961314305, "grad_norm": 0.4783771336078644, "learning_rate": 7.293242325464865e-06, "loss": 0.3942, "step": 23624 }, { "epoch": 1.084163186636685, "grad_norm": 0.45762214064598083, "learning_rate": 7.293024444370346e-06, "loss": 0.3509, "step": 23625 }, { "epoch": 1.0842090771419393, "grad_norm": 0.5078889727592468, "learning_rate": 7.292806557761825e-06, "loss": 0.4282, "step": 23626 }, { "epoch": 1.0842549676471938, "grad_norm": 0.45577922463417053, "learning_rate": 7.292588665639829e-06, "loss": 0.3277, "step": 23627 }, { "epoch": 1.0843008581524483, "grad_norm": 0.45995786786079407, "learning_rate": 7.292370768004882e-06, "loss": 0.3244, "step": 23628 }, { "epoch": 1.0843467486577028, "grad_norm": 0.4844330847263336, "learning_rate": 7.292152864857504e-06, "loss": 0.3503, "step": 23629 }, { "epoch": 1.0843926391629573, "grad_norm": 0.47811073064804077, "learning_rate": 7.291934956198224e-06, "loss": 0.3549, "step": 23630 }, { "epoch": 1.0844385296682117, "grad_norm": 0.497405469417572, "learning_rate": 7.291717042027563e-06, "loss": 0.4296, "step": 23631 }, { "epoch": 1.084484420173466, "grad_norm": 0.4589798152446747, "learning_rate": 7.2914991223460455e-06, "loss": 0.3177, "step": 23632 }, { "epoch": 1.0845303106787205, "grad_norm": 0.4962727427482605, "learning_rate": 7.291281197154197e-06, "loss": 0.4368, "step": 23633 }, { "epoch": 1.084576201183975, "grad_norm": 0.46458354592323303, "learning_rate": 7.291063266452539e-06, "loss": 0.3327, "step": 23634 }, { "epoch": 1.0846220916892295, "grad_norm": 0.4918009638786316, "learning_rate": 7.290845330241597e-06, "loss": 0.3906, "step": 23635 }, { "epoch": 1.084667982194484, "grad_norm": 0.4659987986087799, "learning_rate": 7.290627388521896e-06, "loss": 0.3326, "step": 23636 }, { "epoch": 1.0847138726997385, "grad_norm": 0.4501245319843292, "learning_rate": 7.290409441293957e-06, "loss": 0.3463, "step": 23637 }, { "epoch": 1.0847597632049928, "grad_norm": 0.4552854001522064, "learning_rate": 7.290191488558309e-06, "loss": 0.379, "step": 23638 }, { "epoch": 1.0848056537102473, "grad_norm": 0.4765038788318634, "learning_rate": 7.289973530315473e-06, "loss": 0.4026, "step": 23639 }, { "epoch": 1.0848515442155018, "grad_norm": 0.4558756649494171, "learning_rate": 7.289755566565971e-06, "loss": 0.3766, "step": 23640 }, { "epoch": 1.0848974347207563, "grad_norm": 0.45331135392189026, "learning_rate": 7.289537597310332e-06, "loss": 0.3151, "step": 23641 }, { "epoch": 1.0849433252260108, "grad_norm": 0.45359963178634644, "learning_rate": 7.289319622549076e-06, "loss": 0.3141, "step": 23642 }, { "epoch": 1.0849892157312653, "grad_norm": 0.43193748593330383, "learning_rate": 7.289101642282729e-06, "loss": 0.312, "step": 23643 }, { "epoch": 1.0850351062365196, "grad_norm": 0.4798784554004669, "learning_rate": 7.288883656511816e-06, "loss": 0.4035, "step": 23644 }, { "epoch": 1.085080996741774, "grad_norm": 0.4726826846599579, "learning_rate": 7.2886656652368605e-06, "loss": 0.3881, "step": 23645 }, { "epoch": 1.0851268872470285, "grad_norm": 0.456220805644989, "learning_rate": 7.288447668458385e-06, "loss": 0.3845, "step": 23646 }, { "epoch": 1.085172777752283, "grad_norm": 0.4930247366428375, "learning_rate": 7.288229666176916e-06, "loss": 0.3831, "step": 23647 }, { "epoch": 1.0852186682575375, "grad_norm": 0.4811192750930786, "learning_rate": 7.288011658392975e-06, "loss": 0.3751, "step": 23648 }, { "epoch": 1.085264558762792, "grad_norm": 0.4941202998161316, "learning_rate": 7.28779364510709e-06, "loss": 0.4068, "step": 23649 }, { "epoch": 1.0853104492680465, "grad_norm": 0.46955496072769165, "learning_rate": 7.2875756263197835e-06, "loss": 0.3869, "step": 23650 }, { "epoch": 1.0853563397733008, "grad_norm": 0.5762729644775391, "learning_rate": 7.287357602031579e-06, "loss": 0.3655, "step": 23651 }, { "epoch": 1.0854022302785553, "grad_norm": 0.4939965307712555, "learning_rate": 7.287139572243e-06, "loss": 0.412, "step": 23652 }, { "epoch": 1.0854481207838098, "grad_norm": 0.4429567754268646, "learning_rate": 7.286921536954575e-06, "loss": 0.2944, "step": 23653 }, { "epoch": 1.0854940112890643, "grad_norm": 0.4839816689491272, "learning_rate": 7.286703496166822e-06, "loss": 0.429, "step": 23654 }, { "epoch": 1.0855399017943188, "grad_norm": 0.48793327808380127, "learning_rate": 7.286485449880273e-06, "loss": 0.3894, "step": 23655 }, { "epoch": 1.0855857922995733, "grad_norm": 0.46330705285072327, "learning_rate": 7.286267398095446e-06, "loss": 0.3664, "step": 23656 }, { "epoch": 1.0856316828048276, "grad_norm": 0.43843090534210205, "learning_rate": 7.286049340812866e-06, "loss": 0.3111, "step": 23657 }, { "epoch": 1.085677573310082, "grad_norm": 0.5005151033401489, "learning_rate": 7.285831278033062e-06, "loss": 0.429, "step": 23658 }, { "epoch": 1.0857234638153366, "grad_norm": 0.4948265850543976, "learning_rate": 7.285613209756552e-06, "loss": 0.3821, "step": 23659 }, { "epoch": 1.085769354320591, "grad_norm": 0.46440795063972473, "learning_rate": 7.285395135983865e-06, "loss": 0.3672, "step": 23660 }, { "epoch": 1.0858152448258456, "grad_norm": 0.44109541177749634, "learning_rate": 7.285177056715526e-06, "loss": 0.3195, "step": 23661 }, { "epoch": 1.0858611353311, "grad_norm": 0.4612303376197815, "learning_rate": 7.284958971952054e-06, "loss": 0.3183, "step": 23662 }, { "epoch": 1.0859070258363546, "grad_norm": 0.457851380109787, "learning_rate": 7.284740881693978e-06, "loss": 0.3376, "step": 23663 }, { "epoch": 1.0859529163416088, "grad_norm": 0.46595296263694763, "learning_rate": 7.284522785941822e-06, "loss": 0.3179, "step": 23664 }, { "epoch": 1.0859988068468633, "grad_norm": 0.4398355484008789, "learning_rate": 7.284304684696108e-06, "loss": 0.3219, "step": 23665 }, { "epoch": 1.0860446973521178, "grad_norm": 0.47172078490257263, "learning_rate": 7.284086577957364e-06, "loss": 0.3497, "step": 23666 }, { "epoch": 1.0860905878573723, "grad_norm": 0.4895589351654053, "learning_rate": 7.283868465726114e-06, "loss": 0.4177, "step": 23667 }, { "epoch": 1.0861364783626268, "grad_norm": 0.4705275893211365, "learning_rate": 7.2836503480028775e-06, "loss": 0.3734, "step": 23668 }, { "epoch": 1.0861823688678813, "grad_norm": 0.44050320982933044, "learning_rate": 7.283432224788185e-06, "loss": 0.2919, "step": 23669 }, { "epoch": 1.0862282593731356, "grad_norm": 0.475134938955307, "learning_rate": 7.283214096082558e-06, "loss": 0.3604, "step": 23670 }, { "epoch": 1.08627414987839, "grad_norm": 0.467201828956604, "learning_rate": 7.282995961886521e-06, "loss": 0.3648, "step": 23671 }, { "epoch": 1.0863200403836446, "grad_norm": 0.5183555483818054, "learning_rate": 7.282777822200602e-06, "loss": 0.4177, "step": 23672 }, { "epoch": 1.086365930888899, "grad_norm": 0.5371918678283691, "learning_rate": 7.28255967702532e-06, "loss": 0.3827, "step": 23673 }, { "epoch": 1.0864118213941536, "grad_norm": 0.4957188367843628, "learning_rate": 7.282341526361203e-06, "loss": 0.4142, "step": 23674 }, { "epoch": 1.086457711899408, "grad_norm": 0.428519606590271, "learning_rate": 7.282123370208776e-06, "loss": 0.2829, "step": 23675 }, { "epoch": 1.0865036024046624, "grad_norm": 0.498566597700119, "learning_rate": 7.281905208568561e-06, "loss": 0.418, "step": 23676 }, { "epoch": 1.0865494929099169, "grad_norm": 0.46495139598846436, "learning_rate": 7.281687041441085e-06, "loss": 0.345, "step": 23677 }, { "epoch": 1.0865953834151714, "grad_norm": 0.4814172387123108, "learning_rate": 7.281468868826872e-06, "loss": 0.4107, "step": 23678 }, { "epoch": 1.0866412739204259, "grad_norm": 0.443947434425354, "learning_rate": 7.281250690726446e-06, "loss": 0.3053, "step": 23679 }, { "epoch": 1.0866871644256804, "grad_norm": 0.45778733491897583, "learning_rate": 7.281032507140331e-06, "loss": 0.3658, "step": 23680 }, { "epoch": 1.0867330549309349, "grad_norm": 0.42524299025535583, "learning_rate": 7.280814318069054e-06, "loss": 0.2841, "step": 23681 }, { "epoch": 1.0867789454361891, "grad_norm": 0.46094146370887756, "learning_rate": 7.280596123513138e-06, "loss": 0.3757, "step": 23682 }, { "epoch": 1.0868248359414436, "grad_norm": 0.4793056547641754, "learning_rate": 7.280377923473109e-06, "loss": 0.3803, "step": 23683 }, { "epoch": 1.0868707264466981, "grad_norm": 0.45266926288604736, "learning_rate": 7.2801597179494906e-06, "loss": 0.3299, "step": 23684 }, { "epoch": 1.0869166169519526, "grad_norm": 0.44952377676963806, "learning_rate": 7.279941506942807e-06, "loss": 0.3225, "step": 23685 }, { "epoch": 1.0869625074572071, "grad_norm": 0.44711464643478394, "learning_rate": 7.279723290453584e-06, "loss": 0.3025, "step": 23686 }, { "epoch": 1.0870083979624616, "grad_norm": 0.5084816813468933, "learning_rate": 7.279505068482345e-06, "loss": 0.3985, "step": 23687 }, { "epoch": 1.0870542884677161, "grad_norm": 0.48148325085639954, "learning_rate": 7.279286841029618e-06, "loss": 0.367, "step": 23688 }, { "epoch": 1.0871001789729704, "grad_norm": 0.4846136271953583, "learning_rate": 7.279068608095925e-06, "loss": 0.3465, "step": 23689 }, { "epoch": 1.087146069478225, "grad_norm": 0.47388023138046265, "learning_rate": 7.278850369681789e-06, "loss": 0.3586, "step": 23690 }, { "epoch": 1.0871919599834794, "grad_norm": 0.48141899704933167, "learning_rate": 7.278632125787739e-06, "loss": 0.3425, "step": 23691 }, { "epoch": 1.087237850488734, "grad_norm": 0.43066883087158203, "learning_rate": 7.278413876414298e-06, "loss": 0.3051, "step": 23692 }, { "epoch": 1.0872837409939884, "grad_norm": 0.5115679502487183, "learning_rate": 7.278195621561989e-06, "loss": 0.4489, "step": 23693 }, { "epoch": 1.0873296314992429, "grad_norm": 0.48040705919265747, "learning_rate": 7.277977361231341e-06, "loss": 0.418, "step": 23694 }, { "epoch": 1.0873755220044972, "grad_norm": 0.46274641156196594, "learning_rate": 7.277759095422875e-06, "loss": 0.3186, "step": 23695 }, { "epoch": 1.0874214125097517, "grad_norm": 0.4759560227394104, "learning_rate": 7.277540824137117e-06, "loss": 0.4094, "step": 23696 }, { "epoch": 1.0874673030150062, "grad_norm": 0.4669491946697235, "learning_rate": 7.277322547374592e-06, "loss": 0.3088, "step": 23697 }, { "epoch": 1.0875131935202607, "grad_norm": 0.4498862028121948, "learning_rate": 7.277104265135826e-06, "loss": 0.307, "step": 23698 }, { "epoch": 1.0875590840255152, "grad_norm": 0.4459482729434967, "learning_rate": 7.276885977421343e-06, "loss": 0.3271, "step": 23699 }, { "epoch": 1.0876049745307697, "grad_norm": 0.4891681969165802, "learning_rate": 7.276667684231666e-06, "loss": 0.3656, "step": 23700 }, { "epoch": 1.0876508650360242, "grad_norm": 0.487946093082428, "learning_rate": 7.276449385567323e-06, "loss": 0.3566, "step": 23701 }, { "epoch": 1.0876967555412784, "grad_norm": 0.45046499371528625, "learning_rate": 7.276231081428839e-06, "loss": 0.3816, "step": 23702 }, { "epoch": 1.087742646046533, "grad_norm": 0.5621416568756104, "learning_rate": 7.276012771816736e-06, "loss": 0.3173, "step": 23703 }, { "epoch": 1.0877885365517874, "grad_norm": 0.48943835496902466, "learning_rate": 7.27579445673154e-06, "loss": 0.3753, "step": 23704 }, { "epoch": 1.087834427057042, "grad_norm": 0.4863723814487457, "learning_rate": 7.275576136173779e-06, "loss": 0.3636, "step": 23705 }, { "epoch": 1.0878803175622964, "grad_norm": 0.4484808146953583, "learning_rate": 7.2753578101439735e-06, "loss": 0.3311, "step": 23706 }, { "epoch": 1.087926208067551, "grad_norm": 0.4434199929237366, "learning_rate": 7.275139478642651e-06, "loss": 0.3326, "step": 23707 }, { "epoch": 1.0879720985728052, "grad_norm": 0.45532912015914917, "learning_rate": 7.274921141670336e-06, "loss": 0.3286, "step": 23708 }, { "epoch": 1.0880179890780597, "grad_norm": 0.46044889092445374, "learning_rate": 7.274702799227554e-06, "loss": 0.3478, "step": 23709 }, { "epoch": 1.0880638795833142, "grad_norm": 0.4920402467250824, "learning_rate": 7.27448445131483e-06, "loss": 0.3991, "step": 23710 }, { "epoch": 1.0881097700885687, "grad_norm": 0.534299373626709, "learning_rate": 7.2742660979326884e-06, "loss": 0.4813, "step": 23711 }, { "epoch": 1.0881556605938232, "grad_norm": 0.4399348199367523, "learning_rate": 7.2740477390816555e-06, "loss": 0.314, "step": 23712 }, { "epoch": 1.0882015510990777, "grad_norm": 0.4977019727230072, "learning_rate": 7.273829374762253e-06, "loss": 0.4242, "step": 23713 }, { "epoch": 1.0882474416043322, "grad_norm": 0.48229271173477173, "learning_rate": 7.273611004975011e-06, "loss": 0.3987, "step": 23714 }, { "epoch": 1.0882933321095865, "grad_norm": 0.49072936177253723, "learning_rate": 7.2733926297204514e-06, "loss": 0.4131, "step": 23715 }, { "epoch": 1.088339222614841, "grad_norm": 0.4612120985984802, "learning_rate": 7.2731742489991e-06, "loss": 0.3648, "step": 23716 }, { "epoch": 1.0883851131200954, "grad_norm": 0.4244402050971985, "learning_rate": 7.272955862811482e-06, "loss": 0.2625, "step": 23717 }, { "epoch": 1.08843100362535, "grad_norm": 0.49267107248306274, "learning_rate": 7.272737471158122e-06, "loss": 0.3844, "step": 23718 }, { "epoch": 1.0884768941306044, "grad_norm": 0.4852755069732666, "learning_rate": 7.272519074039546e-06, "loss": 0.3436, "step": 23719 }, { "epoch": 1.088522784635859, "grad_norm": 0.5186461806297302, "learning_rate": 7.272300671456279e-06, "loss": 0.4182, "step": 23720 }, { "epoch": 1.0885686751411132, "grad_norm": 0.4445938766002655, "learning_rate": 7.272082263408846e-06, "loss": 0.3078, "step": 23721 }, { "epoch": 1.0886145656463677, "grad_norm": 0.5094393491744995, "learning_rate": 7.271863849897772e-06, "loss": 0.4325, "step": 23722 }, { "epoch": 1.0886604561516222, "grad_norm": 0.47436264157295227, "learning_rate": 7.271645430923584e-06, "loss": 0.3225, "step": 23723 }, { "epoch": 1.0887063466568767, "grad_norm": 0.4687618315219879, "learning_rate": 7.271427006486803e-06, "loss": 0.3578, "step": 23724 }, { "epoch": 1.0887522371621312, "grad_norm": 0.4699172377586365, "learning_rate": 7.271208576587958e-06, "loss": 0.3092, "step": 23725 }, { "epoch": 1.0887981276673857, "grad_norm": 0.4778865873813629, "learning_rate": 7.270990141227574e-06, "loss": 0.3962, "step": 23726 }, { "epoch": 1.08884401817264, "grad_norm": 0.4780392348766327, "learning_rate": 7.270771700406175e-06, "loss": 0.3446, "step": 23727 }, { "epoch": 1.0888899086778945, "grad_norm": 0.4364818334579468, "learning_rate": 7.270553254124286e-06, "loss": 0.3145, "step": 23728 }, { "epoch": 1.088935799183149, "grad_norm": 0.44355857372283936, "learning_rate": 7.270334802382435e-06, "loss": 0.2938, "step": 23729 }, { "epoch": 1.0889816896884035, "grad_norm": 0.4932951033115387, "learning_rate": 7.270116345181143e-06, "loss": 0.4324, "step": 23730 }, { "epoch": 1.089027580193658, "grad_norm": 0.5153430104255676, "learning_rate": 7.26989788252094e-06, "loss": 0.4017, "step": 23731 }, { "epoch": 1.0890734706989125, "grad_norm": 0.5497186183929443, "learning_rate": 7.2696794144023475e-06, "loss": 0.4318, "step": 23732 }, { "epoch": 1.0891193612041667, "grad_norm": 0.4481191337108612, "learning_rate": 7.269460940825892e-06, "loss": 0.3195, "step": 23733 }, { "epoch": 1.0891652517094212, "grad_norm": 0.47311103343963623, "learning_rate": 7.269242461792099e-06, "loss": 0.3995, "step": 23734 }, { "epoch": 1.0892111422146757, "grad_norm": 0.5088847875595093, "learning_rate": 7.269023977301495e-06, "loss": 0.4427, "step": 23735 }, { "epoch": 1.0892570327199302, "grad_norm": 0.4518539011478424, "learning_rate": 7.268805487354604e-06, "loss": 0.3399, "step": 23736 }, { "epoch": 1.0893029232251847, "grad_norm": 0.4472644627094269, "learning_rate": 7.2685869919519525e-06, "loss": 0.3013, "step": 23737 }, { "epoch": 1.0893488137304392, "grad_norm": 0.5100380778312683, "learning_rate": 7.2683684910940654e-06, "loss": 0.3898, "step": 23738 }, { "epoch": 1.0893947042356937, "grad_norm": 0.5007476806640625, "learning_rate": 7.2681499847814675e-06, "loss": 0.3935, "step": 23739 }, { "epoch": 1.089440594740948, "grad_norm": 0.4192773103713989, "learning_rate": 7.267931473014685e-06, "loss": 0.2906, "step": 23740 }, { "epoch": 1.0894864852462025, "grad_norm": 0.48261842131614685, "learning_rate": 7.267712955794242e-06, "loss": 0.3887, "step": 23741 }, { "epoch": 1.089532375751457, "grad_norm": 0.42712298035621643, "learning_rate": 7.2674944331206665e-06, "loss": 0.296, "step": 23742 }, { "epoch": 1.0895782662567115, "grad_norm": 0.4437287151813507, "learning_rate": 7.267275904994482e-06, "loss": 0.3352, "step": 23743 }, { "epoch": 1.089624156761966, "grad_norm": 0.4409063458442688, "learning_rate": 7.267057371416214e-06, "loss": 0.33, "step": 23744 }, { "epoch": 1.0896700472672205, "grad_norm": 0.48689156770706177, "learning_rate": 7.266838832386388e-06, "loss": 0.4358, "step": 23745 }, { "epoch": 1.0897159377724748, "grad_norm": 0.4629826545715332, "learning_rate": 7.266620287905533e-06, "loss": 0.3295, "step": 23746 }, { "epoch": 1.0897618282777293, "grad_norm": 0.5226610898971558, "learning_rate": 7.266401737974168e-06, "loss": 0.4519, "step": 23747 }, { "epoch": 1.0898077187829838, "grad_norm": 0.5054270029067993, "learning_rate": 7.266183182592824e-06, "loss": 0.3746, "step": 23748 }, { "epoch": 1.0898536092882383, "grad_norm": 0.43981775641441345, "learning_rate": 7.265964621762025e-06, "loss": 0.3076, "step": 23749 }, { "epoch": 1.0898994997934928, "grad_norm": 0.4779706597328186, "learning_rate": 7.265746055482294e-06, "loss": 0.3563, "step": 23750 }, { "epoch": 1.0899453902987473, "grad_norm": 0.4893731474876404, "learning_rate": 7.2655274837541626e-06, "loss": 0.3602, "step": 23751 }, { "epoch": 1.0899912808040018, "grad_norm": 0.44082602858543396, "learning_rate": 7.265308906578149e-06, "loss": 0.3323, "step": 23752 }, { "epoch": 1.090037171309256, "grad_norm": 0.47685953974723816, "learning_rate": 7.265090323954785e-06, "loss": 0.3449, "step": 23753 }, { "epoch": 1.0900830618145105, "grad_norm": 0.4497973918914795, "learning_rate": 7.264871735884592e-06, "loss": 0.3354, "step": 23754 }, { "epoch": 1.090128952319765, "grad_norm": 0.45801833271980286, "learning_rate": 7.264653142368097e-06, "loss": 0.3396, "step": 23755 }, { "epoch": 1.0901748428250195, "grad_norm": 0.47226232290267944, "learning_rate": 7.264434543405827e-06, "loss": 0.3227, "step": 23756 }, { "epoch": 1.090220733330274, "grad_norm": 0.48458725214004517, "learning_rate": 7.264215938998307e-06, "loss": 0.3654, "step": 23757 }, { "epoch": 1.0902666238355285, "grad_norm": 0.45109155774116516, "learning_rate": 7.263997329146061e-06, "loss": 0.3319, "step": 23758 }, { "epoch": 1.0903125143407828, "grad_norm": 0.45144978165626526, "learning_rate": 7.263778713849616e-06, "loss": 0.3547, "step": 23759 }, { "epoch": 1.0903584048460373, "grad_norm": 0.4296868145465851, "learning_rate": 7.2635600931094995e-06, "loss": 0.3279, "step": 23760 }, { "epoch": 1.0904042953512918, "grad_norm": 0.423123836517334, "learning_rate": 7.263341466926234e-06, "loss": 0.2995, "step": 23761 }, { "epoch": 1.0904501858565463, "grad_norm": 0.4677504301071167, "learning_rate": 7.263122835300346e-06, "loss": 0.3272, "step": 23762 }, { "epoch": 1.0904960763618008, "grad_norm": 0.4793601930141449, "learning_rate": 7.262904198232364e-06, "loss": 0.3691, "step": 23763 }, { "epoch": 1.0905419668670553, "grad_norm": 0.47027599811553955, "learning_rate": 7.262685555722809e-06, "loss": 0.3846, "step": 23764 }, { "epoch": 1.0905878573723096, "grad_norm": 0.43046247959136963, "learning_rate": 7.26246690777221e-06, "loss": 0.2846, "step": 23765 }, { "epoch": 1.090633747877564, "grad_norm": 0.47335097193717957, "learning_rate": 7.262248254381093e-06, "loss": 0.3671, "step": 23766 }, { "epoch": 1.0906796383828186, "grad_norm": 0.4892774224281311, "learning_rate": 7.262029595549982e-06, "loss": 0.3849, "step": 23767 }, { "epoch": 1.090725528888073, "grad_norm": 0.42006880044937134, "learning_rate": 7.261810931279405e-06, "loss": 0.2898, "step": 23768 }, { "epoch": 1.0907714193933276, "grad_norm": 0.4683209955692291, "learning_rate": 7.261592261569885e-06, "loss": 0.3281, "step": 23769 }, { "epoch": 1.090817309898582, "grad_norm": 0.4926886260509491, "learning_rate": 7.261373586421951e-06, "loss": 0.427, "step": 23770 }, { "epoch": 1.0908632004038363, "grad_norm": 0.5026347041130066, "learning_rate": 7.261154905836128e-06, "loss": 0.449, "step": 23771 }, { "epoch": 1.0909090909090908, "grad_norm": 0.4456912577152252, "learning_rate": 7.260936219812939e-06, "loss": 0.329, "step": 23772 }, { "epoch": 1.0909549814143453, "grad_norm": 0.44129589200019836, "learning_rate": 7.260717528352912e-06, "loss": 0.3557, "step": 23773 }, { "epoch": 1.0910008719195998, "grad_norm": 0.5035292506217957, "learning_rate": 7.260498831456573e-06, "loss": 0.3313, "step": 23774 }, { "epoch": 1.0910467624248543, "grad_norm": 0.4490951597690582, "learning_rate": 7.260280129124449e-06, "loss": 0.349, "step": 23775 }, { "epoch": 1.0910926529301088, "grad_norm": 0.43476301431655884, "learning_rate": 7.260061421357063e-06, "loss": 0.3292, "step": 23776 }, { "epoch": 1.0911385434353633, "grad_norm": 0.4856126308441162, "learning_rate": 7.259842708154944e-06, "loss": 0.4148, "step": 23777 }, { "epoch": 1.0911844339406176, "grad_norm": 0.46921876072883606, "learning_rate": 7.259623989518616e-06, "loss": 0.3376, "step": 23778 }, { "epoch": 1.091230324445872, "grad_norm": 0.4979473948478699, "learning_rate": 7.259405265448605e-06, "loss": 0.4038, "step": 23779 }, { "epoch": 1.0912762149511266, "grad_norm": 0.4819836914539337, "learning_rate": 7.259186535945437e-06, "loss": 0.342, "step": 23780 }, { "epoch": 1.091322105456381, "grad_norm": 0.4810265898704529, "learning_rate": 7.25896780100964e-06, "loss": 0.4267, "step": 23781 }, { "epoch": 1.0913679959616356, "grad_norm": 0.45266208052635193, "learning_rate": 7.258749060641738e-06, "loss": 0.374, "step": 23782 }, { "epoch": 1.09141388646689, "grad_norm": 0.4391326606273651, "learning_rate": 7.258530314842257e-06, "loss": 0.3189, "step": 23783 }, { "epoch": 1.0914597769721444, "grad_norm": 0.5090668201446533, "learning_rate": 7.258311563611723e-06, "loss": 0.3757, "step": 23784 }, { "epoch": 1.0915056674773989, "grad_norm": 0.4570576548576355, "learning_rate": 7.258092806950663e-06, "loss": 0.3208, "step": 23785 }, { "epoch": 1.0915515579826534, "grad_norm": 0.48916321992874146, "learning_rate": 7.257874044859603e-06, "loss": 0.3917, "step": 23786 }, { "epoch": 1.0915974484879079, "grad_norm": 0.45773088932037354, "learning_rate": 7.2576552773390676e-06, "loss": 0.3229, "step": 23787 }, { "epoch": 1.0916433389931623, "grad_norm": 0.4675646424293518, "learning_rate": 7.257436504389583e-06, "loss": 0.36, "step": 23788 }, { "epoch": 1.0916892294984168, "grad_norm": 0.4666242003440857, "learning_rate": 7.257217726011677e-06, "loss": 0.3572, "step": 23789 }, { "epoch": 1.0917351200036713, "grad_norm": 0.4719341993331909, "learning_rate": 7.2569989422058755e-06, "loss": 0.421, "step": 23790 }, { "epoch": 1.0917810105089256, "grad_norm": 0.4655524790287018, "learning_rate": 7.2567801529727035e-06, "loss": 0.3228, "step": 23791 }, { "epoch": 1.0918269010141801, "grad_norm": 0.4659241735935211, "learning_rate": 7.256561358312687e-06, "loss": 0.3502, "step": 23792 }, { "epoch": 1.0918727915194346, "grad_norm": 0.4599721431732178, "learning_rate": 7.2563425582263525e-06, "loss": 0.4093, "step": 23793 }, { "epoch": 1.0919186820246891, "grad_norm": 0.460589200258255, "learning_rate": 7.256123752714227e-06, "loss": 0.3039, "step": 23794 }, { "epoch": 1.0919645725299436, "grad_norm": 0.44122937321662903, "learning_rate": 7.2559049417768345e-06, "loss": 0.3044, "step": 23795 }, { "epoch": 1.092010463035198, "grad_norm": 0.4588642418384552, "learning_rate": 7.255686125414704e-06, "loss": 0.3128, "step": 23796 }, { "epoch": 1.0920563535404524, "grad_norm": 0.49759557843208313, "learning_rate": 7.255467303628361e-06, "loss": 0.4116, "step": 23797 }, { "epoch": 1.0921022440457069, "grad_norm": 0.5044850707054138, "learning_rate": 7.25524847641833e-06, "loss": 0.373, "step": 23798 }, { "epoch": 1.0921481345509614, "grad_norm": 0.4305585026741028, "learning_rate": 7.255029643785136e-06, "loss": 0.3315, "step": 23799 }, { "epoch": 1.0921940250562159, "grad_norm": 0.4639173150062561, "learning_rate": 7.254810805729311e-06, "loss": 0.3628, "step": 23800 }, { "epoch": 1.0922399155614704, "grad_norm": 0.4546792507171631, "learning_rate": 7.254591962251376e-06, "loss": 0.3294, "step": 23801 }, { "epoch": 1.0922858060667249, "grad_norm": 0.47488027811050415, "learning_rate": 7.254373113351859e-06, "loss": 0.368, "step": 23802 }, { "epoch": 1.0923316965719794, "grad_norm": 0.46031689643859863, "learning_rate": 7.254154259031288e-06, "loss": 0.3739, "step": 23803 }, { "epoch": 1.0923775870772336, "grad_norm": 0.5144749879837036, "learning_rate": 7.253935399290184e-06, "loss": 0.388, "step": 23804 }, { "epoch": 1.0924234775824881, "grad_norm": 0.4697451591491699, "learning_rate": 7.253716534129078e-06, "loss": 0.3372, "step": 23805 }, { "epoch": 1.0924693680877426, "grad_norm": 0.4673503637313843, "learning_rate": 7.253497663548495e-06, "loss": 0.3132, "step": 23806 }, { "epoch": 1.0925152585929971, "grad_norm": 0.4911215007305145, "learning_rate": 7.253278787548962e-06, "loss": 0.4051, "step": 23807 }, { "epoch": 1.0925611490982516, "grad_norm": 0.45835134387016296, "learning_rate": 7.253059906131006e-06, "loss": 0.3212, "step": 23808 }, { "epoch": 1.0926070396035061, "grad_norm": 0.4836196005344391, "learning_rate": 7.2528410192951496e-06, "loss": 0.2525, "step": 23809 }, { "epoch": 1.0926529301087604, "grad_norm": 0.452505886554718, "learning_rate": 7.2526221270419215e-06, "loss": 0.2971, "step": 23810 }, { "epoch": 1.092698820614015, "grad_norm": 0.4454563558101654, "learning_rate": 7.252403229371851e-06, "loss": 0.3525, "step": 23811 }, { "epoch": 1.0927447111192694, "grad_norm": 0.4527369439601898, "learning_rate": 7.2521843262854585e-06, "loss": 0.3502, "step": 23812 }, { "epoch": 1.092790601624524, "grad_norm": 0.4554269313812256, "learning_rate": 7.2519654177832755e-06, "loss": 0.3629, "step": 23813 }, { "epoch": 1.0928364921297784, "grad_norm": 0.5047026872634888, "learning_rate": 7.251746503865825e-06, "loss": 0.4132, "step": 23814 }, { "epoch": 1.092882382635033, "grad_norm": 0.4550182521343231, "learning_rate": 7.251527584533635e-06, "loss": 0.3077, "step": 23815 }, { "epoch": 1.0929282731402872, "grad_norm": 0.4915163815021515, "learning_rate": 7.251308659787233e-06, "loss": 0.3832, "step": 23816 }, { "epoch": 1.0929741636455417, "grad_norm": 0.4778660535812378, "learning_rate": 7.2510897296271434e-06, "loss": 0.4038, "step": 23817 }, { "epoch": 1.0930200541507962, "grad_norm": 0.4644222557544708, "learning_rate": 7.2508707940538925e-06, "loss": 0.3737, "step": 23818 }, { "epoch": 1.0930659446560507, "grad_norm": 0.45537397265434265, "learning_rate": 7.25065185306801e-06, "loss": 0.3557, "step": 23819 }, { "epoch": 1.0931118351613052, "grad_norm": 0.4812740683555603, "learning_rate": 7.250432906670019e-06, "loss": 0.4309, "step": 23820 }, { "epoch": 1.0931577256665597, "grad_norm": 0.4736979603767395, "learning_rate": 7.250213954860446e-06, "loss": 0.3299, "step": 23821 }, { "epoch": 1.093203616171814, "grad_norm": 0.47063735127449036, "learning_rate": 7.2499949976398195e-06, "loss": 0.3503, "step": 23822 }, { "epoch": 1.0932495066770684, "grad_norm": 0.459395170211792, "learning_rate": 7.249776035008664e-06, "loss": 0.3651, "step": 23823 }, { "epoch": 1.093295397182323, "grad_norm": 0.4948347508907318, "learning_rate": 7.24955706696751e-06, "loss": 0.4022, "step": 23824 }, { "epoch": 1.0933412876875774, "grad_norm": 0.4732950031757355, "learning_rate": 7.24933809351688e-06, "loss": 0.3573, "step": 23825 }, { "epoch": 1.093387178192832, "grad_norm": 0.4327717423439026, "learning_rate": 7.2491191146573e-06, "loss": 0.2647, "step": 23826 }, { "epoch": 1.0934330686980864, "grad_norm": 0.46840932965278625, "learning_rate": 7.2489001303893e-06, "loss": 0.3658, "step": 23827 }, { "epoch": 1.093478959203341, "grad_norm": 0.4735131859779358, "learning_rate": 7.248681140713405e-06, "loss": 0.3346, "step": 23828 }, { "epoch": 1.0935248497085952, "grad_norm": 0.463206022977829, "learning_rate": 7.24846214563014e-06, "loss": 0.3254, "step": 23829 }, { "epoch": 1.0935707402138497, "grad_norm": 0.520291268825531, "learning_rate": 7.248243145140036e-06, "loss": 0.4152, "step": 23830 }, { "epoch": 1.0936166307191042, "grad_norm": 0.4621307849884033, "learning_rate": 7.2480241392436155e-06, "loss": 0.364, "step": 23831 }, { "epoch": 1.0936625212243587, "grad_norm": 0.4838724434375763, "learning_rate": 7.247805127941405e-06, "loss": 0.388, "step": 23832 }, { "epoch": 1.0937084117296132, "grad_norm": 0.46205249428749084, "learning_rate": 7.247586111233935e-06, "loss": 0.3222, "step": 23833 }, { "epoch": 1.0937543022348677, "grad_norm": 0.5231717824935913, "learning_rate": 7.247367089121729e-06, "loss": 0.4484, "step": 23834 }, { "epoch": 1.093800192740122, "grad_norm": 0.4860301911830902, "learning_rate": 7.247148061605314e-06, "loss": 0.3566, "step": 23835 }, { "epoch": 1.0938460832453765, "grad_norm": 0.4353000521659851, "learning_rate": 7.2469290286852165e-06, "loss": 0.2877, "step": 23836 }, { "epoch": 1.093891973750631, "grad_norm": 0.5021859407424927, "learning_rate": 7.246709990361964e-06, "loss": 0.4575, "step": 23837 }, { "epoch": 1.0939378642558855, "grad_norm": 0.43979597091674805, "learning_rate": 7.246490946636084e-06, "loss": 0.3647, "step": 23838 }, { "epoch": 1.09398375476114, "grad_norm": 0.5027796626091003, "learning_rate": 7.246271897508102e-06, "loss": 0.3874, "step": 23839 }, { "epoch": 1.0940296452663945, "grad_norm": 0.46337106823921204, "learning_rate": 7.246052842978544e-06, "loss": 0.3631, "step": 23840 }, { "epoch": 1.094075535771649, "grad_norm": 0.5020548105239868, "learning_rate": 7.24583378304794e-06, "loss": 0.4274, "step": 23841 }, { "epoch": 1.0941214262769032, "grad_norm": 0.5021292567253113, "learning_rate": 7.245614717716812e-06, "loss": 0.3626, "step": 23842 }, { "epoch": 1.0941673167821577, "grad_norm": 0.47919702529907227, "learning_rate": 7.245395646985691e-06, "loss": 0.387, "step": 23843 }, { "epoch": 1.0942132072874122, "grad_norm": 0.45636335015296936, "learning_rate": 7.245176570855102e-06, "loss": 0.3361, "step": 23844 }, { "epoch": 1.0942590977926667, "grad_norm": 0.4448567032814026, "learning_rate": 7.24495748932557e-06, "loss": 0.3358, "step": 23845 }, { "epoch": 1.0943049882979212, "grad_norm": 0.48710906505584717, "learning_rate": 7.2447384023976265e-06, "loss": 0.4065, "step": 23846 }, { "epoch": 1.0943508788031757, "grad_norm": 0.4803224503993988, "learning_rate": 7.2445193100717935e-06, "loss": 0.3668, "step": 23847 }, { "epoch": 1.09439676930843, "grad_norm": 0.4602110981941223, "learning_rate": 7.244300212348603e-06, "loss": 0.3648, "step": 23848 }, { "epoch": 1.0944426598136845, "grad_norm": 0.4637419283390045, "learning_rate": 7.244081109228576e-06, "loss": 0.3273, "step": 23849 }, { "epoch": 1.094488550318939, "grad_norm": 0.4870504140853882, "learning_rate": 7.2438620007122425e-06, "loss": 0.3853, "step": 23850 }, { "epoch": 1.0945344408241935, "grad_norm": 0.47301194071769714, "learning_rate": 7.24364288680013e-06, "loss": 0.3163, "step": 23851 }, { "epoch": 1.094580331329448, "grad_norm": 0.4585762619972229, "learning_rate": 7.2434237674927634e-06, "loss": 0.34, "step": 23852 }, { "epoch": 1.0946262218347025, "grad_norm": 0.4245307445526123, "learning_rate": 7.243204642790672e-06, "loss": 0.3212, "step": 23853 }, { "epoch": 1.0946721123399568, "grad_norm": 0.45641079545021057, "learning_rate": 7.2429855126943806e-06, "loss": 0.3276, "step": 23854 }, { "epoch": 1.0947180028452113, "grad_norm": 0.47232240438461304, "learning_rate": 7.242766377204415e-06, "loss": 0.3814, "step": 23855 }, { "epoch": 1.0947638933504658, "grad_norm": 0.46342048048973083, "learning_rate": 7.242547236321308e-06, "loss": 0.3675, "step": 23856 }, { "epoch": 1.0948097838557203, "grad_norm": 0.4842508137226105, "learning_rate": 7.242328090045581e-06, "loss": 0.3649, "step": 23857 }, { "epoch": 1.0948556743609748, "grad_norm": 0.4826911687850952, "learning_rate": 7.242108938377762e-06, "loss": 0.4135, "step": 23858 }, { "epoch": 1.0949015648662292, "grad_norm": 0.5023464560508728, "learning_rate": 7.241889781318379e-06, "loss": 0.3668, "step": 23859 }, { "epoch": 1.0949474553714835, "grad_norm": 0.4372011125087738, "learning_rate": 7.2416706188679585e-06, "loss": 0.3262, "step": 23860 }, { "epoch": 1.094993345876738, "grad_norm": 0.47171303629875183, "learning_rate": 7.241451451027027e-06, "loss": 0.3631, "step": 23861 }, { "epoch": 1.0950392363819925, "grad_norm": 0.5472835898399353, "learning_rate": 7.241232277796113e-06, "loss": 0.3197, "step": 23862 }, { "epoch": 1.095085126887247, "grad_norm": 0.4706948399543762, "learning_rate": 7.241013099175744e-06, "loss": 0.3305, "step": 23863 }, { "epoch": 1.0951310173925015, "grad_norm": 0.44000208377838135, "learning_rate": 7.240793915166444e-06, "loss": 0.336, "step": 23864 }, { "epoch": 1.095176907897756, "grad_norm": 0.4843046963214874, "learning_rate": 7.240574725768743e-06, "loss": 0.3277, "step": 23865 }, { "epoch": 1.0952227984030105, "grad_norm": 0.483981728553772, "learning_rate": 7.240355530983165e-06, "loss": 0.388, "step": 23866 }, { "epoch": 1.0952686889082648, "grad_norm": 0.43154647946357727, "learning_rate": 7.240136330810241e-06, "loss": 0.3198, "step": 23867 }, { "epoch": 1.0953145794135193, "grad_norm": 0.4725511372089386, "learning_rate": 7.2399171252504965e-06, "loss": 0.352, "step": 23868 }, { "epoch": 1.0953604699187738, "grad_norm": 0.4869677424430847, "learning_rate": 7.239697914304457e-06, "loss": 0.3394, "step": 23869 }, { "epoch": 1.0954063604240283, "grad_norm": 0.4616750180721283, "learning_rate": 7.2394786979726505e-06, "loss": 0.336, "step": 23870 }, { "epoch": 1.0954522509292828, "grad_norm": 0.45213887095451355, "learning_rate": 7.239259476255604e-06, "loss": 0.3414, "step": 23871 }, { "epoch": 1.0954981414345373, "grad_norm": 0.4849543571472168, "learning_rate": 7.239040249153847e-06, "loss": 0.3476, "step": 23872 }, { "epoch": 1.0955440319397916, "grad_norm": 0.49670228362083435, "learning_rate": 7.238821016667905e-06, "loss": 0.4076, "step": 23873 }, { "epoch": 1.095589922445046, "grad_norm": 0.4579293131828308, "learning_rate": 7.238601778798304e-06, "loss": 0.3249, "step": 23874 }, { "epoch": 1.0956358129503005, "grad_norm": 0.46338701248168945, "learning_rate": 7.238382535545572e-06, "loss": 0.3791, "step": 23875 }, { "epoch": 1.095681703455555, "grad_norm": 0.4487346112728119, "learning_rate": 7.238163286910238e-06, "loss": 0.3363, "step": 23876 }, { "epoch": 1.0957275939608095, "grad_norm": 0.46870821714401245, "learning_rate": 7.2379440328928255e-06, "loss": 0.4302, "step": 23877 }, { "epoch": 1.095773484466064, "grad_norm": 0.4493914246559143, "learning_rate": 7.237724773493866e-06, "loss": 0.341, "step": 23878 }, { "epoch": 1.0958193749713185, "grad_norm": 0.48510655760765076, "learning_rate": 7.237505508713884e-06, "loss": 0.3465, "step": 23879 }, { "epoch": 1.0958652654765728, "grad_norm": 0.4675643742084503, "learning_rate": 7.237286238553407e-06, "loss": 0.3639, "step": 23880 }, { "epoch": 1.0959111559818273, "grad_norm": 0.508488655090332, "learning_rate": 7.2370669630129615e-06, "loss": 0.413, "step": 23881 }, { "epoch": 1.0959570464870818, "grad_norm": 0.47606125473976135, "learning_rate": 7.236847682093078e-06, "loss": 0.3574, "step": 23882 }, { "epoch": 1.0960029369923363, "grad_norm": 0.4884919822216034, "learning_rate": 7.236628395794281e-06, "loss": 0.4183, "step": 23883 }, { "epoch": 1.0960488274975908, "grad_norm": 0.4317469894886017, "learning_rate": 7.236409104117099e-06, "loss": 0.3248, "step": 23884 }, { "epoch": 1.0960947180028453, "grad_norm": 0.460892915725708, "learning_rate": 7.23618980706206e-06, "loss": 0.3244, "step": 23885 }, { "epoch": 1.0961406085080996, "grad_norm": 0.3895472586154938, "learning_rate": 7.235970504629688e-06, "loss": 0.237, "step": 23886 }, { "epoch": 1.096186499013354, "grad_norm": 0.46065688133239746, "learning_rate": 7.235751196820513e-06, "loss": 0.343, "step": 23887 }, { "epoch": 1.0962323895186086, "grad_norm": 0.49066001176834106, "learning_rate": 7.235531883635063e-06, "loss": 0.4162, "step": 23888 }, { "epoch": 1.096278280023863, "grad_norm": 0.4778464436531067, "learning_rate": 7.235312565073864e-06, "loss": 0.3794, "step": 23889 }, { "epoch": 1.0963241705291176, "grad_norm": 0.5221874117851257, "learning_rate": 7.2350932411374446e-06, "loss": 0.434, "step": 23890 }, { "epoch": 1.096370061034372, "grad_norm": 0.4347390830516815, "learning_rate": 7.23487391182633e-06, "loss": 0.3194, "step": 23891 }, { "epoch": 1.0964159515396266, "grad_norm": 0.4671892821788788, "learning_rate": 7.234654577141048e-06, "loss": 0.3189, "step": 23892 }, { "epoch": 1.0964618420448808, "grad_norm": 0.449482262134552, "learning_rate": 7.2344352370821295e-06, "loss": 0.3193, "step": 23893 }, { "epoch": 1.0965077325501353, "grad_norm": 0.4491939842700958, "learning_rate": 7.234215891650098e-06, "loss": 0.3341, "step": 23894 }, { "epoch": 1.0965536230553898, "grad_norm": 0.4929482340812683, "learning_rate": 7.233996540845482e-06, "loss": 0.3637, "step": 23895 }, { "epoch": 1.0965995135606443, "grad_norm": 0.4477548599243164, "learning_rate": 7.2337771846688125e-06, "loss": 0.3241, "step": 23896 }, { "epoch": 1.0966454040658988, "grad_norm": 0.44800859689712524, "learning_rate": 7.23355782312061e-06, "loss": 0.3233, "step": 23897 }, { "epoch": 1.0966912945711533, "grad_norm": 0.4924427568912506, "learning_rate": 7.233338456201409e-06, "loss": 0.3756, "step": 23898 }, { "epoch": 1.0967371850764076, "grad_norm": 0.44594806432724, "learning_rate": 7.233119083911731e-06, "loss": 0.3206, "step": 23899 }, { "epoch": 1.096783075581662, "grad_norm": 0.4848455786705017, "learning_rate": 7.232899706252108e-06, "loss": 0.3748, "step": 23900 }, { "epoch": 1.0968289660869166, "grad_norm": 0.42995554208755493, "learning_rate": 7.232680323223067e-06, "loss": 0.3188, "step": 23901 }, { "epoch": 1.096874856592171, "grad_norm": 0.46562460064888, "learning_rate": 7.232460934825132e-06, "loss": 0.4028, "step": 23902 }, { "epoch": 1.0969207470974256, "grad_norm": 0.5087507367134094, "learning_rate": 7.232241541058834e-06, "loss": 0.4107, "step": 23903 }, { "epoch": 1.09696663760268, "grad_norm": 0.4918316900730133, "learning_rate": 7.232022141924701e-06, "loss": 0.4457, "step": 23904 }, { "epoch": 1.0970125281079344, "grad_norm": 0.5147467851638794, "learning_rate": 7.231802737423257e-06, "loss": 0.2945, "step": 23905 }, { "epoch": 1.0970584186131889, "grad_norm": 0.4690674841403961, "learning_rate": 7.231583327555033e-06, "loss": 0.3866, "step": 23906 }, { "epoch": 1.0971043091184434, "grad_norm": 0.5054628252983093, "learning_rate": 7.231363912320556e-06, "loss": 0.4408, "step": 23907 }, { "epoch": 1.0971501996236979, "grad_norm": 0.473814994096756, "learning_rate": 7.231144491720351e-06, "loss": 0.3847, "step": 23908 }, { "epoch": 1.0971960901289524, "grad_norm": 0.4512443542480469, "learning_rate": 7.230925065754949e-06, "loss": 0.3435, "step": 23909 }, { "epoch": 1.0972419806342069, "grad_norm": 0.5123233795166016, "learning_rate": 7.230705634424876e-06, "loss": 0.418, "step": 23910 }, { "epoch": 1.0972878711394611, "grad_norm": 0.4995267689228058, "learning_rate": 7.230486197730662e-06, "loss": 0.4127, "step": 23911 }, { "epoch": 1.0973337616447156, "grad_norm": 0.5109555721282959, "learning_rate": 7.23026675567283e-06, "loss": 0.4773, "step": 23912 }, { "epoch": 1.0973796521499701, "grad_norm": 0.4698633551597595, "learning_rate": 7.2300473082519105e-06, "loss": 0.3604, "step": 23913 }, { "epoch": 1.0974255426552246, "grad_norm": 0.4330309331417084, "learning_rate": 7.229827855468432e-06, "loss": 0.2692, "step": 23914 }, { "epoch": 1.0974714331604791, "grad_norm": 0.45150139927864075, "learning_rate": 7.229608397322921e-06, "loss": 0.2744, "step": 23915 }, { "epoch": 1.0975173236657336, "grad_norm": 0.507512092590332, "learning_rate": 7.2293889338159055e-06, "loss": 0.4556, "step": 23916 }, { "epoch": 1.0975632141709881, "grad_norm": 0.4656270742416382, "learning_rate": 7.229169464947914e-06, "loss": 0.3476, "step": 23917 }, { "epoch": 1.0976091046762424, "grad_norm": 0.4692942798137665, "learning_rate": 7.228949990719473e-06, "loss": 0.3322, "step": 23918 }, { "epoch": 1.097654995181497, "grad_norm": 0.46237239241600037, "learning_rate": 7.22873051113111e-06, "loss": 0.3607, "step": 23919 }, { "epoch": 1.0977008856867514, "grad_norm": 0.489328533411026, "learning_rate": 7.228511026183353e-06, "loss": 0.3563, "step": 23920 }, { "epoch": 1.097746776192006, "grad_norm": 0.4600256383419037, "learning_rate": 7.228291535876733e-06, "loss": 0.3421, "step": 23921 }, { "epoch": 1.0977926666972604, "grad_norm": 0.45237934589385986, "learning_rate": 7.228072040211773e-06, "loss": 0.2897, "step": 23922 }, { "epoch": 1.0978385572025149, "grad_norm": 0.4462050199508667, "learning_rate": 7.227852539189002e-06, "loss": 0.3305, "step": 23923 }, { "epoch": 1.0978844477077692, "grad_norm": 0.4860582649707794, "learning_rate": 7.227633032808952e-06, "loss": 0.3417, "step": 23924 }, { "epoch": 1.0979303382130237, "grad_norm": 0.44280707836151123, "learning_rate": 7.227413521072144e-06, "loss": 0.3151, "step": 23925 }, { "epoch": 1.0979762287182782, "grad_norm": 0.4673399031162262, "learning_rate": 7.227194003979111e-06, "loss": 0.3369, "step": 23926 }, { "epoch": 1.0980221192235327, "grad_norm": 0.48334190249443054, "learning_rate": 7.22697448153038e-06, "loss": 0.3884, "step": 23927 }, { "epoch": 1.0980680097287872, "grad_norm": 0.4762553572654724, "learning_rate": 7.226754953726478e-06, "loss": 0.3628, "step": 23928 }, { "epoch": 1.0981139002340417, "grad_norm": 0.4945092797279358, "learning_rate": 7.2265354205679325e-06, "loss": 0.3687, "step": 23929 }, { "epoch": 1.0981597907392961, "grad_norm": 0.45898112654685974, "learning_rate": 7.2263158820552735e-06, "loss": 0.3327, "step": 23930 }, { "epoch": 1.0982056812445504, "grad_norm": 0.444606214761734, "learning_rate": 7.226096338189026e-06, "loss": 0.3047, "step": 23931 }, { "epoch": 1.098251571749805, "grad_norm": 0.46046361327171326, "learning_rate": 7.22587678896972e-06, "loss": 0.3556, "step": 23932 }, { "epoch": 1.0982974622550594, "grad_norm": 0.44336238503456116, "learning_rate": 7.2256572343978825e-06, "loss": 0.3142, "step": 23933 }, { "epoch": 1.098343352760314, "grad_norm": 0.45828118920326233, "learning_rate": 7.2254376744740415e-06, "loss": 0.3582, "step": 23934 }, { "epoch": 1.0983892432655684, "grad_norm": 0.530708909034729, "learning_rate": 7.225218109198726e-06, "loss": 0.3805, "step": 23935 }, { "epoch": 1.098435133770823, "grad_norm": 0.436158686876297, "learning_rate": 7.224998538572463e-06, "loss": 0.3044, "step": 23936 }, { "epoch": 1.0984810242760772, "grad_norm": 0.46997302770614624, "learning_rate": 7.22477896259578e-06, "loss": 0.4233, "step": 23937 }, { "epoch": 1.0985269147813317, "grad_norm": 0.44957563281059265, "learning_rate": 7.224559381269208e-06, "loss": 0.3281, "step": 23938 }, { "epoch": 1.0985728052865862, "grad_norm": 0.4778108596801758, "learning_rate": 7.22433979459327e-06, "loss": 0.3716, "step": 23939 }, { "epoch": 1.0986186957918407, "grad_norm": 0.4355666935443878, "learning_rate": 7.224120202568498e-06, "loss": 0.3059, "step": 23940 }, { "epoch": 1.0986645862970952, "grad_norm": 0.46271151304244995, "learning_rate": 7.22390060519542e-06, "loss": 0.3929, "step": 23941 }, { "epoch": 1.0987104768023497, "grad_norm": 0.45417970418930054, "learning_rate": 7.223681002474561e-06, "loss": 0.3389, "step": 23942 }, { "epoch": 1.098756367307604, "grad_norm": 0.4929611384868622, "learning_rate": 7.223461394406452e-06, "loss": 0.4088, "step": 23943 }, { "epoch": 1.0988022578128585, "grad_norm": 0.4923354387283325, "learning_rate": 7.223241780991621e-06, "loss": 0.4291, "step": 23944 }, { "epoch": 1.098848148318113, "grad_norm": 0.47711026668548584, "learning_rate": 7.223022162230593e-06, "loss": 0.4143, "step": 23945 }, { "epoch": 1.0988940388233674, "grad_norm": 0.5049922466278076, "learning_rate": 7.222802538123898e-06, "loss": 0.4545, "step": 23946 }, { "epoch": 1.098939929328622, "grad_norm": 0.5047472715377808, "learning_rate": 7.222582908672067e-06, "loss": 0.4117, "step": 23947 }, { "epoch": 1.0989858198338764, "grad_norm": 0.4697808623313904, "learning_rate": 7.222363273875624e-06, "loss": 0.4552, "step": 23948 }, { "epoch": 1.0990317103391307, "grad_norm": 0.4586635231971741, "learning_rate": 7.222143633735099e-06, "loss": 0.3585, "step": 23949 }, { "epoch": 1.0990776008443852, "grad_norm": 0.412824422121048, "learning_rate": 7.22192398825102e-06, "loss": 0.2696, "step": 23950 }, { "epoch": 1.0991234913496397, "grad_norm": 0.4264359772205353, "learning_rate": 7.221704337423916e-06, "loss": 0.289, "step": 23951 }, { "epoch": 1.0991693818548942, "grad_norm": 0.489128053188324, "learning_rate": 7.221484681254314e-06, "loss": 0.4592, "step": 23952 }, { "epoch": 1.0992152723601487, "grad_norm": 0.46833640336990356, "learning_rate": 7.22126501974274e-06, "loss": 0.3883, "step": 23953 }, { "epoch": 1.0992611628654032, "grad_norm": 0.4626694321632385, "learning_rate": 7.221045352889729e-06, "loss": 0.36, "step": 23954 }, { "epoch": 1.0993070533706577, "grad_norm": 0.47982850670814514, "learning_rate": 7.220825680695802e-06, "loss": 0.3565, "step": 23955 }, { "epoch": 1.099352943875912, "grad_norm": 0.4920386075973511, "learning_rate": 7.22060600316149e-06, "loss": 0.3658, "step": 23956 }, { "epoch": 1.0993988343811665, "grad_norm": 0.47356560826301575, "learning_rate": 7.220386320287321e-06, "loss": 0.3536, "step": 23957 }, { "epoch": 1.099444724886421, "grad_norm": 0.48194918036460876, "learning_rate": 7.220166632073825e-06, "loss": 0.3514, "step": 23958 }, { "epoch": 1.0994906153916755, "grad_norm": 0.48215362429618835, "learning_rate": 7.219946938521528e-06, "loss": 0.3164, "step": 23959 }, { "epoch": 1.09953650589693, "grad_norm": 0.4408113360404968, "learning_rate": 7.219727239630961e-06, "loss": 0.3021, "step": 23960 }, { "epoch": 1.0995823964021845, "grad_norm": 0.464308500289917, "learning_rate": 7.219507535402649e-06, "loss": 0.3921, "step": 23961 }, { "epoch": 1.0996282869074387, "grad_norm": 0.4719148576259613, "learning_rate": 7.2192878258371225e-06, "loss": 0.3345, "step": 23962 }, { "epoch": 1.0996741774126932, "grad_norm": 0.48751190304756165, "learning_rate": 7.219068110934909e-06, "loss": 0.393, "step": 23963 }, { "epoch": 1.0997200679179477, "grad_norm": 0.4642772972583771, "learning_rate": 7.218848390696537e-06, "loss": 0.3316, "step": 23964 }, { "epoch": 1.0997659584232022, "grad_norm": 0.482030987739563, "learning_rate": 7.218628665122534e-06, "loss": 0.3998, "step": 23965 }, { "epoch": 1.0998118489284567, "grad_norm": 0.4981866180896759, "learning_rate": 7.218408934213431e-06, "loss": 0.3633, "step": 23966 }, { "epoch": 1.0998577394337112, "grad_norm": 0.5212001800537109, "learning_rate": 7.218189197969754e-06, "loss": 0.3884, "step": 23967 }, { "epoch": 1.0999036299389657, "grad_norm": 0.4873100817203522, "learning_rate": 7.217969456392031e-06, "loss": 0.3613, "step": 23968 }, { "epoch": 1.09994952044422, "grad_norm": 0.455390989780426, "learning_rate": 7.217749709480792e-06, "loss": 0.2966, "step": 23969 }, { "epoch": 1.0999954109494745, "grad_norm": 0.44289064407348633, "learning_rate": 7.217529957236566e-06, "loss": 0.3024, "step": 23970 }, { "epoch": 1.100041301454729, "grad_norm": 0.49131977558135986, "learning_rate": 7.217310199659879e-06, "loss": 0.3602, "step": 23971 }, { "epoch": 1.1000871919599835, "grad_norm": 0.4802387058734894, "learning_rate": 7.217090436751263e-06, "loss": 0.3948, "step": 23972 }, { "epoch": 1.100133082465238, "grad_norm": 0.47183507680892944, "learning_rate": 7.216870668511242e-06, "loss": 0.3968, "step": 23973 }, { "epoch": 1.1001789729704925, "grad_norm": 0.47388336062431335, "learning_rate": 7.216650894940347e-06, "loss": 0.3255, "step": 23974 }, { "epoch": 1.1002248634757468, "grad_norm": 0.479500412940979, "learning_rate": 7.2164311160391065e-06, "loss": 0.3953, "step": 23975 }, { "epoch": 1.1002707539810013, "grad_norm": 0.4891059696674347, "learning_rate": 7.216211331808049e-06, "loss": 0.44, "step": 23976 }, { "epoch": 1.1003166444862558, "grad_norm": 0.4971572160720825, "learning_rate": 7.215991542247702e-06, "loss": 0.4193, "step": 23977 }, { "epoch": 1.1003625349915103, "grad_norm": 0.45128193497657776, "learning_rate": 7.215771747358595e-06, "loss": 0.3447, "step": 23978 }, { "epoch": 1.1004084254967648, "grad_norm": 0.478458046913147, "learning_rate": 7.2155519471412574e-06, "loss": 0.3461, "step": 23979 }, { "epoch": 1.1004543160020193, "grad_norm": 0.4737812578678131, "learning_rate": 7.215332141596215e-06, "loss": 0.3627, "step": 23980 }, { "epoch": 1.1005002065072738, "grad_norm": 0.4808630049228668, "learning_rate": 7.215112330723999e-06, "loss": 0.4416, "step": 23981 }, { "epoch": 1.100546097012528, "grad_norm": 0.513425350189209, "learning_rate": 7.214892514525136e-06, "loss": 0.3953, "step": 23982 }, { "epoch": 1.1005919875177825, "grad_norm": 0.452442467212677, "learning_rate": 7.214672693000157e-06, "loss": 0.3492, "step": 23983 }, { "epoch": 1.100637878023037, "grad_norm": 0.47945713996887207, "learning_rate": 7.214452866149589e-06, "loss": 0.3568, "step": 23984 }, { "epoch": 1.1006837685282915, "grad_norm": 0.5168142914772034, "learning_rate": 7.2142330339739585e-06, "loss": 0.4286, "step": 23985 }, { "epoch": 1.100729659033546, "grad_norm": 0.45413991808891296, "learning_rate": 7.214013196473798e-06, "loss": 0.3273, "step": 23986 }, { "epoch": 1.1007755495388005, "grad_norm": 0.4300079345703125, "learning_rate": 7.2137933536496355e-06, "loss": 0.3478, "step": 23987 }, { "epoch": 1.1008214400440548, "grad_norm": 0.48418566584587097, "learning_rate": 7.213573505501996e-06, "loss": 0.3968, "step": 23988 }, { "epoch": 1.1008673305493093, "grad_norm": 0.4967102110385895, "learning_rate": 7.213353652031414e-06, "loss": 0.3387, "step": 23989 }, { "epoch": 1.1009132210545638, "grad_norm": 0.4554750621318817, "learning_rate": 7.213133793238412e-06, "loss": 0.3062, "step": 23990 }, { "epoch": 1.1009591115598183, "grad_norm": 0.5580289959907532, "learning_rate": 7.212913929123523e-06, "loss": 0.3075, "step": 23991 }, { "epoch": 1.1010050020650728, "grad_norm": 0.5773463845252991, "learning_rate": 7.212694059687275e-06, "loss": 0.3915, "step": 23992 }, { "epoch": 1.1010508925703273, "grad_norm": 0.46127811074256897, "learning_rate": 7.212474184930196e-06, "loss": 0.3437, "step": 23993 }, { "epoch": 1.1010967830755816, "grad_norm": 0.4302867650985718, "learning_rate": 7.212254304852815e-06, "loss": 0.3106, "step": 23994 }, { "epoch": 1.101142673580836, "grad_norm": 0.5103839635848999, "learning_rate": 7.21203441945566e-06, "loss": 0.4396, "step": 23995 }, { "epoch": 1.1011885640860906, "grad_norm": 0.5239191651344299, "learning_rate": 7.21181452873926e-06, "loss": 0.4235, "step": 23996 }, { "epoch": 1.101234454591345, "grad_norm": 0.4506818652153015, "learning_rate": 7.211594632704145e-06, "loss": 0.3131, "step": 23997 }, { "epoch": 1.1012803450965996, "grad_norm": 0.4750039875507355, "learning_rate": 7.211374731350843e-06, "loss": 0.3837, "step": 23998 }, { "epoch": 1.101326235601854, "grad_norm": 0.48315873742103577, "learning_rate": 7.211154824679882e-06, "loss": 0.3505, "step": 23999 }, { "epoch": 1.1013721261071083, "grad_norm": 0.4598916172981262, "learning_rate": 7.210934912691792e-06, "loss": 0.3284, "step": 24000 }, { "epoch": 1.1014180166123628, "grad_norm": 0.5089893341064453, "learning_rate": 7.2107149953871005e-06, "loss": 0.3594, "step": 24001 }, { "epoch": 1.1014639071176173, "grad_norm": 0.4536747932434082, "learning_rate": 7.210495072766337e-06, "loss": 0.3558, "step": 24002 }, { "epoch": 1.1015097976228718, "grad_norm": 0.4666988253593445, "learning_rate": 7.210275144830032e-06, "loss": 0.3635, "step": 24003 }, { "epoch": 1.1015556881281263, "grad_norm": 0.4292888939380646, "learning_rate": 7.2100552115787125e-06, "loss": 0.2877, "step": 24004 }, { "epoch": 1.1016015786333808, "grad_norm": 0.421105295419693, "learning_rate": 7.2098352730129074e-06, "loss": 0.3073, "step": 24005 }, { "epoch": 1.1016474691386353, "grad_norm": 0.45122382044792175, "learning_rate": 7.209615329133146e-06, "loss": 0.3216, "step": 24006 }, { "epoch": 1.1016933596438896, "grad_norm": 0.5267409086227417, "learning_rate": 7.209395379939956e-06, "loss": 0.4142, "step": 24007 }, { "epoch": 1.101739250149144, "grad_norm": 0.4449407160282135, "learning_rate": 7.2091754254338685e-06, "loss": 0.3203, "step": 24008 }, { "epoch": 1.1017851406543986, "grad_norm": 0.46412962675094604, "learning_rate": 7.208955465615412e-06, "loss": 0.3563, "step": 24009 }, { "epoch": 1.101831031159653, "grad_norm": 0.48806676268577576, "learning_rate": 7.208735500485113e-06, "loss": 0.4209, "step": 24010 }, { "epoch": 1.1018769216649076, "grad_norm": 0.42918795347213745, "learning_rate": 7.2085155300435035e-06, "loss": 0.306, "step": 24011 }, { "epoch": 1.101922812170162, "grad_norm": 0.44106441736221313, "learning_rate": 7.2082955542911115e-06, "loss": 0.3096, "step": 24012 }, { "epoch": 1.1019687026754164, "grad_norm": 0.4940788745880127, "learning_rate": 7.208075573228465e-06, "loss": 0.4145, "step": 24013 }, { "epoch": 1.1020145931806709, "grad_norm": 0.5082111954689026, "learning_rate": 7.207855586856094e-06, "loss": 0.3559, "step": 24014 }, { "epoch": 1.1020604836859254, "grad_norm": 0.4832407236099243, "learning_rate": 7.207635595174527e-06, "loss": 0.372, "step": 24015 }, { "epoch": 1.1021063741911798, "grad_norm": 0.5201005339622498, "learning_rate": 7.207415598184293e-06, "loss": 0.3522, "step": 24016 }, { "epoch": 1.1021522646964343, "grad_norm": 0.47041216492652893, "learning_rate": 7.207195595885921e-06, "loss": 0.3167, "step": 24017 }, { "epoch": 1.1021981552016888, "grad_norm": 0.46871718764305115, "learning_rate": 7.20697558827994e-06, "loss": 0.3449, "step": 24018 }, { "epoch": 1.1022440457069433, "grad_norm": 0.4304443895816803, "learning_rate": 7.206755575366879e-06, "loss": 0.3048, "step": 24019 }, { "epoch": 1.1022899362121976, "grad_norm": 0.5066967606544495, "learning_rate": 7.20653555714727e-06, "loss": 0.2805, "step": 24020 }, { "epoch": 1.1023358267174521, "grad_norm": 0.4770541191101074, "learning_rate": 7.206315533621636e-06, "loss": 0.3632, "step": 24021 }, { "epoch": 1.1023817172227066, "grad_norm": 0.5333412885665894, "learning_rate": 7.20609550479051e-06, "loss": 0.422, "step": 24022 }, { "epoch": 1.1024276077279611, "grad_norm": 0.448955237865448, "learning_rate": 7.205875470654422e-06, "loss": 0.2849, "step": 24023 }, { "epoch": 1.1024734982332156, "grad_norm": 0.507507860660553, "learning_rate": 7.205655431213898e-06, "loss": 0.3605, "step": 24024 }, { "epoch": 1.10251938873847, "grad_norm": 0.4831008315086365, "learning_rate": 7.2054353864694705e-06, "loss": 0.4347, "step": 24025 }, { "epoch": 1.1025652792437244, "grad_norm": 0.43569931387901306, "learning_rate": 7.205215336421667e-06, "loss": 0.3089, "step": 24026 }, { "epoch": 1.1026111697489789, "grad_norm": 0.5779900550842285, "learning_rate": 7.204995281071016e-06, "loss": 0.322, "step": 24027 }, { "epoch": 1.1026570602542334, "grad_norm": 0.473202645778656, "learning_rate": 7.204775220418046e-06, "loss": 0.3754, "step": 24028 }, { "epoch": 1.1027029507594879, "grad_norm": 0.4988468885421753, "learning_rate": 7.204555154463288e-06, "loss": 0.3866, "step": 24029 }, { "epoch": 1.1027488412647424, "grad_norm": 0.4621218144893646, "learning_rate": 7.2043350832072725e-06, "loss": 0.2992, "step": 24030 }, { "epoch": 1.1027947317699969, "grad_norm": 0.4547552466392517, "learning_rate": 7.204115006650526e-06, "loss": 0.3356, "step": 24031 }, { "epoch": 1.1028406222752511, "grad_norm": 0.5353094935417175, "learning_rate": 7.203894924793577e-06, "loss": 0.4615, "step": 24032 }, { "epoch": 1.1028865127805056, "grad_norm": 0.47319296002388, "learning_rate": 7.203674837636958e-06, "loss": 0.3257, "step": 24033 }, { "epoch": 1.1029324032857601, "grad_norm": 0.49430686235427856, "learning_rate": 7.203454745181197e-06, "loss": 0.4166, "step": 24034 }, { "epoch": 1.1029782937910146, "grad_norm": 0.5281789898872375, "learning_rate": 7.20323464742682e-06, "loss": 0.4042, "step": 24035 }, { "epoch": 1.1030241842962691, "grad_norm": 0.4187924563884735, "learning_rate": 7.203014544374361e-06, "loss": 0.2874, "step": 24036 }, { "epoch": 1.1030700748015236, "grad_norm": 0.48098692297935486, "learning_rate": 7.2027944360243476e-06, "loss": 0.3484, "step": 24037 }, { "epoch": 1.103115965306778, "grad_norm": 0.5104495286941528, "learning_rate": 7.202574322377308e-06, "loss": 0.372, "step": 24038 }, { "epoch": 1.1031618558120324, "grad_norm": 0.4671279489994049, "learning_rate": 7.202354203433772e-06, "loss": 0.3136, "step": 24039 }, { "epoch": 1.103207746317287, "grad_norm": 0.48503419756889343, "learning_rate": 7.202134079194269e-06, "loss": 0.4063, "step": 24040 }, { "epoch": 1.1032536368225414, "grad_norm": 0.4414476454257965, "learning_rate": 7.20191394965933e-06, "loss": 0.3167, "step": 24041 }, { "epoch": 1.103299527327796, "grad_norm": 0.49026811122894287, "learning_rate": 7.201693814829482e-06, "loss": 0.3833, "step": 24042 }, { "epoch": 1.1033454178330504, "grad_norm": 0.4644249379634857, "learning_rate": 7.201473674705257e-06, "loss": 0.3232, "step": 24043 }, { "epoch": 1.103391308338305, "grad_norm": 0.46489375829696655, "learning_rate": 7.20125352928718e-06, "loss": 0.3554, "step": 24044 }, { "epoch": 1.1034371988435592, "grad_norm": 0.47339317202568054, "learning_rate": 7.201033378575785e-06, "loss": 0.367, "step": 24045 }, { "epoch": 1.1034830893488137, "grad_norm": 0.43301594257354736, "learning_rate": 7.200813222571598e-06, "loss": 0.3114, "step": 24046 }, { "epoch": 1.1035289798540682, "grad_norm": 0.47108039259910583, "learning_rate": 7.20059306127515e-06, "loss": 0.3621, "step": 24047 }, { "epoch": 1.1035748703593227, "grad_norm": 0.4704408049583435, "learning_rate": 7.200372894686971e-06, "loss": 0.3212, "step": 24048 }, { "epoch": 1.1036207608645772, "grad_norm": 0.46565011143684387, "learning_rate": 7.200152722807589e-06, "loss": 0.3576, "step": 24049 }, { "epoch": 1.1036666513698317, "grad_norm": 0.4595431983470917, "learning_rate": 7.199932545637534e-06, "loss": 0.3365, "step": 24050 }, { "epoch": 1.103712541875086, "grad_norm": 0.45626500248908997, "learning_rate": 7.199712363177336e-06, "loss": 0.3408, "step": 24051 }, { "epoch": 1.1037584323803404, "grad_norm": 0.45966145396232605, "learning_rate": 7.1994921754275225e-06, "loss": 0.3691, "step": 24052 }, { "epoch": 1.103804322885595, "grad_norm": 0.48447123169898987, "learning_rate": 7.199271982388627e-06, "loss": 0.3685, "step": 24053 }, { "epoch": 1.1038502133908494, "grad_norm": 0.5024548768997192, "learning_rate": 7.199051784061175e-06, "loss": 0.4377, "step": 24054 }, { "epoch": 1.103896103896104, "grad_norm": 0.5267302393913269, "learning_rate": 7.198831580445698e-06, "loss": 0.4309, "step": 24055 }, { "epoch": 1.1039419944013584, "grad_norm": 0.42920824885368347, "learning_rate": 7.198611371542723e-06, "loss": 0.2545, "step": 24056 }, { "epoch": 1.103987884906613, "grad_norm": 0.424019992351532, "learning_rate": 7.1983911573527845e-06, "loss": 0.3093, "step": 24057 }, { "epoch": 1.1040337754118672, "grad_norm": 0.5126121044158936, "learning_rate": 7.198170937876409e-06, "loss": 0.4309, "step": 24058 }, { "epoch": 1.1040796659171217, "grad_norm": 0.4792507290840149, "learning_rate": 7.197950713114124e-06, "loss": 0.3912, "step": 24059 }, { "epoch": 1.1041255564223762, "grad_norm": 0.4704703688621521, "learning_rate": 7.197730483066463e-06, "loss": 0.3457, "step": 24060 }, { "epoch": 1.1041714469276307, "grad_norm": 0.4509371221065521, "learning_rate": 7.197510247733952e-06, "loss": 0.3532, "step": 24061 }, { "epoch": 1.1042173374328852, "grad_norm": 0.45793417096138, "learning_rate": 7.197290007117124e-06, "loss": 0.3332, "step": 24062 }, { "epoch": 1.1042632279381397, "grad_norm": 0.465079665184021, "learning_rate": 7.197069761216507e-06, "loss": 0.359, "step": 24063 }, { "epoch": 1.104309118443394, "grad_norm": 0.44251570105552673, "learning_rate": 7.19684951003263e-06, "loss": 0.3008, "step": 24064 }, { "epoch": 1.1043550089486485, "grad_norm": 0.5197700262069702, "learning_rate": 7.196629253566025e-06, "loss": 0.469, "step": 24065 }, { "epoch": 1.104400899453903, "grad_norm": 0.521238386631012, "learning_rate": 7.196408991817216e-06, "loss": 0.3781, "step": 24066 }, { "epoch": 1.1044467899591575, "grad_norm": 0.45557019114494324, "learning_rate": 7.1961887247867394e-06, "loss": 0.3316, "step": 24067 }, { "epoch": 1.104492680464412, "grad_norm": 0.5225608348846436, "learning_rate": 7.195968452475122e-06, "loss": 0.357, "step": 24068 }, { "epoch": 1.1045385709696665, "grad_norm": 0.6290162801742554, "learning_rate": 7.195748174882893e-06, "loss": 0.4414, "step": 24069 }, { "epoch": 1.104584461474921, "grad_norm": 0.44933953881263733, "learning_rate": 7.195527892010583e-06, "loss": 0.3821, "step": 24070 }, { "epoch": 1.1046303519801752, "grad_norm": 0.45774587988853455, "learning_rate": 7.195307603858721e-06, "loss": 0.3474, "step": 24071 }, { "epoch": 1.1046762424854297, "grad_norm": 0.46265432238578796, "learning_rate": 7.195087310427837e-06, "loss": 0.3529, "step": 24072 }, { "epoch": 1.1047221329906842, "grad_norm": 0.485806405544281, "learning_rate": 7.19486701171846e-06, "loss": 0.3329, "step": 24073 }, { "epoch": 1.1047680234959387, "grad_norm": 0.4814291000366211, "learning_rate": 7.194646707731122e-06, "loss": 0.3969, "step": 24074 }, { "epoch": 1.1048139140011932, "grad_norm": 0.42980897426605225, "learning_rate": 7.194426398466352e-06, "loss": 0.3065, "step": 24075 }, { "epoch": 1.1048598045064477, "grad_norm": 0.4809776246547699, "learning_rate": 7.194206083924678e-06, "loss": 0.3386, "step": 24076 }, { "epoch": 1.104905695011702, "grad_norm": 0.46797633171081543, "learning_rate": 7.19398576410663e-06, "loss": 0.358, "step": 24077 }, { "epoch": 1.1049515855169565, "grad_norm": 0.47154927253723145, "learning_rate": 7.1937654390127395e-06, "loss": 0.3865, "step": 24078 }, { "epoch": 1.104997476022211, "grad_norm": 0.4976094961166382, "learning_rate": 7.193545108643536e-06, "loss": 0.3465, "step": 24079 }, { "epoch": 1.1050433665274655, "grad_norm": 0.44048821926116943, "learning_rate": 7.19332477299955e-06, "loss": 0.3046, "step": 24080 }, { "epoch": 1.10508925703272, "grad_norm": 0.5054158568382263, "learning_rate": 7.193104432081307e-06, "loss": 0.4221, "step": 24081 }, { "epoch": 1.1051351475379745, "grad_norm": 0.4484591782093048, "learning_rate": 7.192884085889342e-06, "loss": 0.292, "step": 24082 }, { "epoch": 1.1051810380432288, "grad_norm": 0.4832470715045929, "learning_rate": 7.192663734424181e-06, "loss": 0.4032, "step": 24083 }, { "epoch": 1.1052269285484833, "grad_norm": 0.4845431447029114, "learning_rate": 7.192443377686358e-06, "loss": 0.385, "step": 24084 }, { "epoch": 1.1052728190537378, "grad_norm": 0.4488259255886078, "learning_rate": 7.1922230156763995e-06, "loss": 0.3151, "step": 24085 }, { "epoch": 1.1053187095589923, "grad_norm": 0.45792967081069946, "learning_rate": 7.192002648394838e-06, "loss": 0.367, "step": 24086 }, { "epoch": 1.1053646000642467, "grad_norm": 0.46047285199165344, "learning_rate": 7.1917822758421994e-06, "loss": 0.3316, "step": 24087 }, { "epoch": 1.1054104905695012, "grad_norm": 0.45247751474380493, "learning_rate": 7.191561898019018e-06, "loss": 0.3654, "step": 24088 }, { "epoch": 1.1054563810747555, "grad_norm": 0.47605806589126587, "learning_rate": 7.191341514925821e-06, "loss": 0.3939, "step": 24089 }, { "epoch": 1.10550227158001, "grad_norm": 0.44261467456817627, "learning_rate": 7.191121126563141e-06, "loss": 0.3633, "step": 24090 }, { "epoch": 1.1055481620852645, "grad_norm": 0.45486992597579956, "learning_rate": 7.190900732931505e-06, "loss": 0.3028, "step": 24091 }, { "epoch": 1.105594052590519, "grad_norm": 0.47403666377067566, "learning_rate": 7.190680334031443e-06, "loss": 0.3305, "step": 24092 }, { "epoch": 1.1056399430957735, "grad_norm": 0.4804691970348358, "learning_rate": 7.190459929863487e-06, "loss": 0.387, "step": 24093 }, { "epoch": 1.105685833601028, "grad_norm": 0.49163907766342163, "learning_rate": 7.190239520428167e-06, "loss": 0.4266, "step": 24094 }, { "epoch": 1.1057317241062825, "grad_norm": 0.44634315371513367, "learning_rate": 7.190019105726011e-06, "loss": 0.2993, "step": 24095 }, { "epoch": 1.1057776146115368, "grad_norm": 0.4560367166996002, "learning_rate": 7.189798685757552e-06, "loss": 0.3242, "step": 24096 }, { "epoch": 1.1058235051167913, "grad_norm": 0.4892953932285309, "learning_rate": 7.189578260523317e-06, "loss": 0.3797, "step": 24097 }, { "epoch": 1.1058693956220458, "grad_norm": 0.44532591104507446, "learning_rate": 7.189357830023838e-06, "loss": 0.345, "step": 24098 }, { "epoch": 1.1059152861273003, "grad_norm": 0.44525471329689026, "learning_rate": 7.189137394259645e-06, "loss": 0.3343, "step": 24099 }, { "epoch": 1.1059611766325548, "grad_norm": 0.46108290553092957, "learning_rate": 7.1889169532312644e-06, "loss": 0.3472, "step": 24100 }, { "epoch": 1.1060070671378093, "grad_norm": 0.4928763806819916, "learning_rate": 7.188696506939233e-06, "loss": 0.3852, "step": 24101 }, { "epoch": 1.1060529576430636, "grad_norm": 0.4712491035461426, "learning_rate": 7.188476055384075e-06, "loss": 0.3908, "step": 24102 }, { "epoch": 1.106098848148318, "grad_norm": 0.46864330768585205, "learning_rate": 7.188255598566322e-06, "loss": 0.3329, "step": 24103 }, { "epoch": 1.1061447386535725, "grad_norm": 0.5347345471382141, "learning_rate": 7.188035136486508e-06, "loss": 0.3856, "step": 24104 }, { "epoch": 1.106190629158827, "grad_norm": 0.49270766973495483, "learning_rate": 7.1878146691451575e-06, "loss": 0.4031, "step": 24105 }, { "epoch": 1.1062365196640815, "grad_norm": 0.48200806975364685, "learning_rate": 7.187594196542804e-06, "loss": 0.355, "step": 24106 }, { "epoch": 1.106282410169336, "grad_norm": 0.45261549949645996, "learning_rate": 7.1873737186799775e-06, "loss": 0.3193, "step": 24107 }, { "epoch": 1.1063283006745905, "grad_norm": 0.4746640920639038, "learning_rate": 7.187153235557208e-06, "loss": 0.4225, "step": 24108 }, { "epoch": 1.1063741911798448, "grad_norm": 0.5818424224853516, "learning_rate": 7.186932747175024e-06, "loss": 0.4012, "step": 24109 }, { "epoch": 1.1064200816850993, "grad_norm": 0.5333906412124634, "learning_rate": 7.1867122535339565e-06, "loss": 0.4098, "step": 24110 }, { "epoch": 1.1064659721903538, "grad_norm": 0.4457211196422577, "learning_rate": 7.186491754634537e-06, "loss": 0.2907, "step": 24111 }, { "epoch": 1.1065118626956083, "grad_norm": 0.4414770305156708, "learning_rate": 7.186271250477296e-06, "loss": 0.3271, "step": 24112 }, { "epoch": 1.1065577532008628, "grad_norm": 0.5181454420089722, "learning_rate": 7.186050741062762e-06, "loss": 0.4496, "step": 24113 }, { "epoch": 1.1066036437061173, "grad_norm": 0.5419386625289917, "learning_rate": 7.185830226391465e-06, "loss": 0.4375, "step": 24114 }, { "epoch": 1.1066495342113716, "grad_norm": 0.4356323480606079, "learning_rate": 7.185609706463937e-06, "loss": 0.3061, "step": 24115 }, { "epoch": 1.106695424716626, "grad_norm": 0.4951098561286926, "learning_rate": 7.185389181280706e-06, "loss": 0.3879, "step": 24116 }, { "epoch": 1.1067413152218806, "grad_norm": 0.43494734168052673, "learning_rate": 7.185168650842305e-06, "loss": 0.3128, "step": 24117 }, { "epoch": 1.106787205727135, "grad_norm": 0.44640690088272095, "learning_rate": 7.184948115149264e-06, "loss": 0.2986, "step": 24118 }, { "epoch": 1.1068330962323896, "grad_norm": 0.4308014512062073, "learning_rate": 7.184727574202111e-06, "loss": 0.2916, "step": 24119 }, { "epoch": 1.106878986737644, "grad_norm": 0.5008946657180786, "learning_rate": 7.184507028001377e-06, "loss": 0.4116, "step": 24120 }, { "epoch": 1.1069248772428983, "grad_norm": 0.5626621842384338, "learning_rate": 7.184286476547594e-06, "loss": 0.4505, "step": 24121 }, { "epoch": 1.1069707677481528, "grad_norm": 0.505445659160614, "learning_rate": 7.184065919841292e-06, "loss": 0.4631, "step": 24122 }, { "epoch": 1.1070166582534073, "grad_norm": 0.46523237228393555, "learning_rate": 7.183845357883e-06, "loss": 0.3401, "step": 24123 }, { "epoch": 1.1070625487586618, "grad_norm": 0.46505486965179443, "learning_rate": 7.183624790673249e-06, "loss": 0.3589, "step": 24124 }, { "epoch": 1.1071084392639163, "grad_norm": 0.48532193899154663, "learning_rate": 7.183404218212571e-06, "loss": 0.432, "step": 24125 }, { "epoch": 1.1071543297691708, "grad_norm": 0.4821535050868988, "learning_rate": 7.1831836405014935e-06, "loss": 0.3712, "step": 24126 }, { "epoch": 1.107200220274425, "grad_norm": 0.4672161638736725, "learning_rate": 7.182963057540548e-06, "loss": 0.3475, "step": 24127 }, { "epoch": 1.1072461107796796, "grad_norm": 0.4045127332210541, "learning_rate": 7.182742469330266e-06, "loss": 0.305, "step": 24128 }, { "epoch": 1.107292001284934, "grad_norm": 0.46311289072036743, "learning_rate": 7.182521875871178e-06, "loss": 0.3443, "step": 24129 }, { "epoch": 1.1073378917901886, "grad_norm": 0.44986966252326965, "learning_rate": 7.182301277163814e-06, "loss": 0.3521, "step": 24130 }, { "epoch": 1.107383782295443, "grad_norm": 0.4785291850566864, "learning_rate": 7.182080673208702e-06, "loss": 0.3792, "step": 24131 }, { "epoch": 1.1074296728006976, "grad_norm": 0.43488848209381104, "learning_rate": 7.181860064006376e-06, "loss": 0.3072, "step": 24132 }, { "epoch": 1.107475563305952, "grad_norm": 0.4507552981376648, "learning_rate": 7.181639449557365e-06, "loss": 0.3403, "step": 24133 }, { "epoch": 1.1075214538112064, "grad_norm": 0.46561646461486816, "learning_rate": 7.1814188298622e-06, "loss": 0.3187, "step": 24134 }, { "epoch": 1.1075673443164609, "grad_norm": 0.5131621956825256, "learning_rate": 7.1811982049214116e-06, "loss": 0.4175, "step": 24135 }, { "epoch": 1.1076132348217154, "grad_norm": 0.450484037399292, "learning_rate": 7.180977574735528e-06, "loss": 0.3298, "step": 24136 }, { "epoch": 1.1076591253269699, "grad_norm": 0.4876645803451538, "learning_rate": 7.180756939305083e-06, "loss": 0.3874, "step": 24137 }, { "epoch": 1.1077050158322244, "grad_norm": 0.4379865825176239, "learning_rate": 7.1805362986306045e-06, "loss": 0.3272, "step": 24138 }, { "epoch": 1.1077509063374789, "grad_norm": 0.45753467082977295, "learning_rate": 7.180315652712628e-06, "loss": 0.2913, "step": 24139 }, { "epoch": 1.1077967968427331, "grad_norm": 0.43448999524116516, "learning_rate": 7.180095001551678e-06, "loss": 0.3257, "step": 24140 }, { "epoch": 1.1078426873479876, "grad_norm": 0.43923598527908325, "learning_rate": 7.179874345148287e-06, "loss": 0.3149, "step": 24141 }, { "epoch": 1.1078885778532421, "grad_norm": 0.45992422103881836, "learning_rate": 7.1796536835029875e-06, "loss": 0.2937, "step": 24142 }, { "epoch": 1.1079344683584966, "grad_norm": 0.47843483090400696, "learning_rate": 7.179433016616308e-06, "loss": 0.352, "step": 24143 }, { "epoch": 1.1079803588637511, "grad_norm": 0.4803861677646637, "learning_rate": 7.179212344488779e-06, "loss": 0.3607, "step": 24144 }, { "epoch": 1.1080262493690056, "grad_norm": 0.4707183241844177, "learning_rate": 7.178991667120935e-06, "loss": 0.3673, "step": 24145 }, { "epoch": 1.1080721398742601, "grad_norm": 0.46289199590682983, "learning_rate": 7.1787709845133015e-06, "loss": 0.3816, "step": 24146 }, { "epoch": 1.1081180303795144, "grad_norm": 0.48825475573539734, "learning_rate": 7.178550296666411e-06, "loss": 0.3672, "step": 24147 }, { "epoch": 1.108163920884769, "grad_norm": 0.44593343138694763, "learning_rate": 7.1783296035807955e-06, "loss": 0.3036, "step": 24148 }, { "epoch": 1.1082098113900234, "grad_norm": 0.45376941561698914, "learning_rate": 7.178108905256984e-06, "loss": 0.334, "step": 24149 }, { "epoch": 1.108255701895278, "grad_norm": 0.4502517580986023, "learning_rate": 7.17788820169551e-06, "loss": 0.3352, "step": 24150 }, { "epoch": 1.1083015924005324, "grad_norm": 0.4482055604457855, "learning_rate": 7.177667492896901e-06, "loss": 0.3619, "step": 24151 }, { "epoch": 1.1083474829057869, "grad_norm": 0.5047576427459717, "learning_rate": 7.1774467788616885e-06, "loss": 0.4286, "step": 24152 }, { "epoch": 1.1083933734110412, "grad_norm": 0.45702528953552246, "learning_rate": 7.177226059590405e-06, "loss": 0.3513, "step": 24153 }, { "epoch": 1.1084392639162957, "grad_norm": 0.4475311040878296, "learning_rate": 7.177005335083579e-06, "loss": 0.3348, "step": 24154 }, { "epoch": 1.1084851544215502, "grad_norm": 0.46700724959373474, "learning_rate": 7.176784605341743e-06, "loss": 0.3786, "step": 24155 }, { "epoch": 1.1085310449268047, "grad_norm": 0.530242919921875, "learning_rate": 7.176563870365426e-06, "loss": 0.4173, "step": 24156 }, { "epoch": 1.1085769354320592, "grad_norm": 0.44964393973350525, "learning_rate": 7.17634313015516e-06, "loss": 0.2955, "step": 24157 }, { "epoch": 1.1086228259373136, "grad_norm": 0.5270382761955261, "learning_rate": 7.176122384711476e-06, "loss": 0.4128, "step": 24158 }, { "epoch": 1.1086687164425681, "grad_norm": 0.45300212502479553, "learning_rate": 7.175901634034905e-06, "loss": 0.3187, "step": 24159 }, { "epoch": 1.1087146069478224, "grad_norm": 0.45949307084083557, "learning_rate": 7.175680878125976e-06, "loss": 0.3442, "step": 24160 }, { "epoch": 1.108760497453077, "grad_norm": 0.4816446602344513, "learning_rate": 7.175460116985222e-06, "loss": 0.4225, "step": 24161 }, { "epoch": 1.1088063879583314, "grad_norm": 0.4883665144443512, "learning_rate": 7.175239350613173e-06, "loss": 0.3851, "step": 24162 }, { "epoch": 1.108852278463586, "grad_norm": 0.4621060788631439, "learning_rate": 7.17501857901036e-06, "loss": 0.3851, "step": 24163 }, { "epoch": 1.1088981689688404, "grad_norm": 0.4945509433746338, "learning_rate": 7.174797802177313e-06, "loss": 0.4239, "step": 24164 }, { "epoch": 1.108944059474095, "grad_norm": 0.45855578780174255, "learning_rate": 7.1745770201145646e-06, "loss": 0.3343, "step": 24165 }, { "epoch": 1.1089899499793492, "grad_norm": 0.4669126570224762, "learning_rate": 7.174356232822644e-06, "loss": 0.3528, "step": 24166 }, { "epoch": 1.1090358404846037, "grad_norm": 0.43554508686065674, "learning_rate": 7.1741354403020844e-06, "loss": 0.3546, "step": 24167 }, { "epoch": 1.1090817309898582, "grad_norm": 0.4729241132736206, "learning_rate": 7.1739146425534145e-06, "loss": 0.3945, "step": 24168 }, { "epoch": 1.1091276214951127, "grad_norm": 0.4337855577468872, "learning_rate": 7.173693839577165e-06, "loss": 0.31, "step": 24169 }, { "epoch": 1.1091735120003672, "grad_norm": 0.4468729794025421, "learning_rate": 7.173473031373869e-06, "loss": 0.3173, "step": 24170 }, { "epoch": 1.1092194025056217, "grad_norm": 0.46156302094459534, "learning_rate": 7.173252217944056e-06, "loss": 0.3144, "step": 24171 }, { "epoch": 1.109265293010876, "grad_norm": 0.48598507046699524, "learning_rate": 7.173031399288257e-06, "loss": 0.3981, "step": 24172 }, { "epoch": 1.1093111835161305, "grad_norm": 0.45429477095603943, "learning_rate": 7.172810575407003e-06, "loss": 0.3085, "step": 24173 }, { "epoch": 1.109357074021385, "grad_norm": 0.4479900300502777, "learning_rate": 7.172589746300827e-06, "loss": 0.3182, "step": 24174 }, { "epoch": 1.1094029645266394, "grad_norm": 0.4394559860229492, "learning_rate": 7.172368911970256e-06, "loss": 0.2955, "step": 24175 }, { "epoch": 1.109448855031894, "grad_norm": 0.4659448266029358, "learning_rate": 7.172148072415824e-06, "loss": 0.391, "step": 24176 }, { "epoch": 1.1094947455371484, "grad_norm": 0.42809024453163147, "learning_rate": 7.171927227638063e-06, "loss": 0.3169, "step": 24177 }, { "epoch": 1.1095406360424027, "grad_norm": 0.45912107825279236, "learning_rate": 7.171706377637502e-06, "loss": 0.3837, "step": 24178 }, { "epoch": 1.1095865265476572, "grad_norm": 0.493915319442749, "learning_rate": 7.171485522414671e-06, "loss": 0.3564, "step": 24179 }, { "epoch": 1.1096324170529117, "grad_norm": 0.44918495416641235, "learning_rate": 7.171264661970103e-06, "loss": 0.3425, "step": 24180 }, { "epoch": 1.1096783075581662, "grad_norm": 0.445166677236557, "learning_rate": 7.171043796304331e-06, "loss": 0.3278, "step": 24181 }, { "epoch": 1.1097241980634207, "grad_norm": 0.44932299852371216, "learning_rate": 7.170822925417882e-06, "loss": 0.3697, "step": 24182 }, { "epoch": 1.1097700885686752, "grad_norm": 0.4315730929374695, "learning_rate": 7.170602049311289e-06, "loss": 0.2813, "step": 24183 }, { "epoch": 1.1098159790739297, "grad_norm": 0.4613051116466522, "learning_rate": 7.170381167985084e-06, "loss": 0.361, "step": 24184 }, { "epoch": 1.109861869579184, "grad_norm": 0.49513137340545654, "learning_rate": 7.170160281439796e-06, "loss": 0.3918, "step": 24185 }, { "epoch": 1.1099077600844385, "grad_norm": 0.46481776237487793, "learning_rate": 7.169939389675957e-06, "loss": 0.3536, "step": 24186 }, { "epoch": 1.109953650589693, "grad_norm": 0.4210161864757538, "learning_rate": 7.169718492694101e-06, "loss": 0.272, "step": 24187 }, { "epoch": 1.1099995410949475, "grad_norm": 0.47596031427383423, "learning_rate": 7.169497590494756e-06, "loss": 0.4252, "step": 24188 }, { "epoch": 1.110045431600202, "grad_norm": 0.4196113348007202, "learning_rate": 7.169276683078453e-06, "loss": 0.2669, "step": 24189 }, { "epoch": 1.1100913221054565, "grad_norm": 0.49251946806907654, "learning_rate": 7.169055770445726e-06, "loss": 0.4089, "step": 24190 }, { "epoch": 1.1101372126107107, "grad_norm": 0.4743382930755615, "learning_rate": 7.168834852597102e-06, "loss": 0.389, "step": 24191 }, { "epoch": 1.1101831031159652, "grad_norm": 0.5145764350891113, "learning_rate": 7.168613929533117e-06, "loss": 0.4567, "step": 24192 }, { "epoch": 1.1102289936212197, "grad_norm": 0.4943943917751312, "learning_rate": 7.168393001254298e-06, "loss": 0.396, "step": 24193 }, { "epoch": 1.1102748841264742, "grad_norm": 0.43554216623306274, "learning_rate": 7.16817206776118e-06, "loss": 0.3004, "step": 24194 }, { "epoch": 1.1103207746317287, "grad_norm": 0.48390504717826843, "learning_rate": 7.167951129054291e-06, "loss": 0.3369, "step": 24195 }, { "epoch": 1.1103666651369832, "grad_norm": 0.4849126935005188, "learning_rate": 7.1677301851341655e-06, "loss": 0.3966, "step": 24196 }, { "epoch": 1.1104125556422377, "grad_norm": 0.48217445611953735, "learning_rate": 7.167509236001331e-06, "loss": 0.3979, "step": 24197 }, { "epoch": 1.110458446147492, "grad_norm": 0.4590473175048828, "learning_rate": 7.167288281656322e-06, "loss": 0.344, "step": 24198 }, { "epoch": 1.1105043366527465, "grad_norm": 0.45626410841941833, "learning_rate": 7.167067322099669e-06, "loss": 0.3582, "step": 24199 }, { "epoch": 1.110550227158001, "grad_norm": 0.44711849093437195, "learning_rate": 7.1668463573319015e-06, "loss": 0.3363, "step": 24200 }, { "epoch": 1.1105961176632555, "grad_norm": 0.4840066134929657, "learning_rate": 7.166625387353554e-06, "loss": 0.3515, "step": 24201 }, { "epoch": 1.11064200816851, "grad_norm": 0.4763987064361572, "learning_rate": 7.166404412165155e-06, "loss": 0.3869, "step": 24202 }, { "epoch": 1.1106878986737645, "grad_norm": 0.4718271791934967, "learning_rate": 7.166183431767237e-06, "loss": 0.3605, "step": 24203 }, { "epoch": 1.1107337891790188, "grad_norm": 0.42942309379577637, "learning_rate": 7.165962446160332e-06, "loss": 0.3222, "step": 24204 }, { "epoch": 1.1107796796842733, "grad_norm": 0.4857894480228424, "learning_rate": 7.165741455344974e-06, "loss": 0.3952, "step": 24205 }, { "epoch": 1.1108255701895278, "grad_norm": 0.5267897248268127, "learning_rate": 7.165520459321686e-06, "loss": 0.395, "step": 24206 }, { "epoch": 1.1108714606947823, "grad_norm": 0.4688046872615814, "learning_rate": 7.165299458091009e-06, "loss": 0.3728, "step": 24207 }, { "epoch": 1.1109173512000368, "grad_norm": 0.46491506695747375, "learning_rate": 7.165078451653467e-06, "loss": 0.365, "step": 24208 }, { "epoch": 1.1109632417052913, "grad_norm": 0.4369814693927765, "learning_rate": 7.164857440009597e-06, "loss": 0.3479, "step": 24209 }, { "epoch": 1.1110091322105455, "grad_norm": 0.4969015121459961, "learning_rate": 7.164636423159927e-06, "loss": 0.3753, "step": 24210 }, { "epoch": 1.1110550227158, "grad_norm": 0.4583773910999298, "learning_rate": 7.16441540110499e-06, "loss": 0.3717, "step": 24211 }, { "epoch": 1.1111009132210545, "grad_norm": 0.44166794419288635, "learning_rate": 7.164194373845318e-06, "loss": 0.2825, "step": 24212 }, { "epoch": 1.111146803726309, "grad_norm": 0.5372514724731445, "learning_rate": 7.163973341381439e-06, "loss": 0.4226, "step": 24213 }, { "epoch": 1.1111926942315635, "grad_norm": 0.4493069052696228, "learning_rate": 7.163752303713889e-06, "loss": 0.288, "step": 24214 }, { "epoch": 1.111238584736818, "grad_norm": 0.47101372480392456, "learning_rate": 7.163531260843197e-06, "loss": 0.3759, "step": 24215 }, { "epoch": 1.1112844752420723, "grad_norm": 0.45408353209495544, "learning_rate": 7.163310212769896e-06, "loss": 0.3162, "step": 24216 }, { "epoch": 1.1113303657473268, "grad_norm": 0.46379348635673523, "learning_rate": 7.163089159494515e-06, "loss": 0.3485, "step": 24217 }, { "epoch": 1.1113762562525813, "grad_norm": 0.46426066756248474, "learning_rate": 7.162868101017589e-06, "loss": 0.3356, "step": 24218 }, { "epoch": 1.1114221467578358, "grad_norm": 0.43641340732574463, "learning_rate": 7.162647037339646e-06, "loss": 0.3084, "step": 24219 }, { "epoch": 1.1114680372630903, "grad_norm": 0.4933439791202545, "learning_rate": 7.162425968461221e-06, "loss": 0.3932, "step": 24220 }, { "epoch": 1.1115139277683448, "grad_norm": 0.4686475694179535, "learning_rate": 7.1622048943828445e-06, "loss": 0.341, "step": 24221 }, { "epoch": 1.1115598182735993, "grad_norm": 0.4840691387653351, "learning_rate": 7.161983815105046e-06, "loss": 0.3609, "step": 24222 }, { "epoch": 1.1116057087788536, "grad_norm": 0.47255459427833557, "learning_rate": 7.1617627306283586e-06, "loss": 0.4029, "step": 24223 }, { "epoch": 1.111651599284108, "grad_norm": 0.5708515644073486, "learning_rate": 7.1615416409533155e-06, "loss": 0.4554, "step": 24224 }, { "epoch": 1.1116974897893626, "grad_norm": 0.471022367477417, "learning_rate": 7.161320546080445e-06, "loss": 0.3157, "step": 24225 }, { "epoch": 1.111743380294617, "grad_norm": 0.6310541033744812, "learning_rate": 7.1610994460102845e-06, "loss": 0.4361, "step": 24226 }, { "epoch": 1.1117892707998716, "grad_norm": 0.4477960765361786, "learning_rate": 7.160878340743359e-06, "loss": 0.3076, "step": 24227 }, { "epoch": 1.111835161305126, "grad_norm": 0.5310072898864746, "learning_rate": 7.1606572302802036e-06, "loss": 0.367, "step": 24228 }, { "epoch": 1.1118810518103803, "grad_norm": 0.5096598863601685, "learning_rate": 7.16043611462135e-06, "loss": 0.4059, "step": 24229 }, { "epoch": 1.1119269423156348, "grad_norm": 0.47221890091896057, "learning_rate": 7.160214993767328e-06, "loss": 0.3598, "step": 24230 }, { "epoch": 1.1119728328208893, "grad_norm": 0.5022576451301575, "learning_rate": 7.159993867718671e-06, "loss": 0.4224, "step": 24231 }, { "epoch": 1.1120187233261438, "grad_norm": 0.49132218956947327, "learning_rate": 7.159772736475913e-06, "loss": 0.4014, "step": 24232 }, { "epoch": 1.1120646138313983, "grad_norm": 0.5183481574058533, "learning_rate": 7.159551600039581e-06, "loss": 0.3684, "step": 24233 }, { "epoch": 1.1121105043366528, "grad_norm": 0.47301411628723145, "learning_rate": 7.159330458410207e-06, "loss": 0.3381, "step": 24234 }, { "epoch": 1.1121563948419073, "grad_norm": 0.43489307165145874, "learning_rate": 7.159109311588328e-06, "loss": 0.3124, "step": 24235 }, { "epoch": 1.1122022853471616, "grad_norm": 0.4650524854660034, "learning_rate": 7.158888159574472e-06, "loss": 0.3674, "step": 24236 }, { "epoch": 1.112248175852416, "grad_norm": 0.47108009457588196, "learning_rate": 7.158667002369171e-06, "loss": 0.3966, "step": 24237 }, { "epoch": 1.1122940663576706, "grad_norm": 0.41447097063064575, "learning_rate": 7.158445839972957e-06, "loss": 0.2928, "step": 24238 }, { "epoch": 1.112339956862925, "grad_norm": 0.5000222325325012, "learning_rate": 7.158224672386362e-06, "loss": 0.3827, "step": 24239 }, { "epoch": 1.1123858473681796, "grad_norm": 0.47842738032341003, "learning_rate": 7.158003499609918e-06, "loss": 0.3371, "step": 24240 }, { "epoch": 1.112431737873434, "grad_norm": 0.5362964272499084, "learning_rate": 7.157782321644157e-06, "loss": 0.3184, "step": 24241 }, { "epoch": 1.1124776283786884, "grad_norm": 0.5120749473571777, "learning_rate": 7.157561138489608e-06, "loss": 0.3852, "step": 24242 }, { "epoch": 1.1125235188839429, "grad_norm": 0.4918997883796692, "learning_rate": 7.15733995014681e-06, "loss": 0.4268, "step": 24243 }, { "epoch": 1.1125694093891974, "grad_norm": 0.49919113516807556, "learning_rate": 7.157118756616287e-06, "loss": 0.401, "step": 24244 }, { "epoch": 1.1126152998944518, "grad_norm": 0.48568886518478394, "learning_rate": 7.1568975578985744e-06, "loss": 0.4067, "step": 24245 }, { "epoch": 1.1126611903997063, "grad_norm": 0.4275054633617401, "learning_rate": 7.1566763539942055e-06, "loss": 0.2913, "step": 24246 }, { "epoch": 1.1127070809049608, "grad_norm": 0.49005022644996643, "learning_rate": 7.156455144903709e-06, "loss": 0.3789, "step": 24247 }, { "epoch": 1.1127529714102153, "grad_norm": 0.4599388837814331, "learning_rate": 7.156233930627619e-06, "loss": 0.3154, "step": 24248 }, { "epoch": 1.1127988619154696, "grad_norm": 0.4920576214790344, "learning_rate": 7.156012711166467e-06, "loss": 0.4163, "step": 24249 }, { "epoch": 1.1128447524207241, "grad_norm": 0.5525829792022705, "learning_rate": 7.155791486520785e-06, "loss": 0.3983, "step": 24250 }, { "epoch": 1.1128906429259786, "grad_norm": 0.4622592329978943, "learning_rate": 7.155570256691105e-06, "loss": 0.3388, "step": 24251 }, { "epoch": 1.112936533431233, "grad_norm": 0.47792983055114746, "learning_rate": 7.155349021677957e-06, "loss": 0.3851, "step": 24252 }, { "epoch": 1.1129824239364876, "grad_norm": 0.5089048147201538, "learning_rate": 7.155127781481876e-06, "loss": 0.4108, "step": 24253 }, { "epoch": 1.113028314441742, "grad_norm": 0.4869705140590668, "learning_rate": 7.154906536103395e-06, "loss": 0.4004, "step": 24254 }, { "epoch": 1.1130742049469964, "grad_norm": 0.4836026728153229, "learning_rate": 7.154685285543042e-06, "loss": 0.376, "step": 24255 }, { "epoch": 1.1131200954522509, "grad_norm": 0.5027388334274292, "learning_rate": 7.154464029801351e-06, "loss": 0.384, "step": 24256 }, { "epoch": 1.1131659859575054, "grad_norm": 0.47725412249565125, "learning_rate": 7.1542427688788545e-06, "loss": 0.3186, "step": 24257 }, { "epoch": 1.1132118764627599, "grad_norm": 0.4454929232597351, "learning_rate": 7.154021502776083e-06, "loss": 0.3131, "step": 24258 }, { "epoch": 1.1132577669680144, "grad_norm": 0.47490495443344116, "learning_rate": 7.153800231493571e-06, "loss": 0.3788, "step": 24259 }, { "epoch": 1.1133036574732689, "grad_norm": 0.46975889801979065, "learning_rate": 7.153578955031849e-06, "loss": 0.3333, "step": 24260 }, { "epoch": 1.1133495479785231, "grad_norm": 0.4466855227947235, "learning_rate": 7.153357673391448e-06, "loss": 0.3233, "step": 24261 }, { "epoch": 1.1133954384837776, "grad_norm": 0.46465012431144714, "learning_rate": 7.153136386572901e-06, "loss": 0.3881, "step": 24262 }, { "epoch": 1.1134413289890321, "grad_norm": 0.46011868119239807, "learning_rate": 7.152915094576743e-06, "loss": 0.3487, "step": 24263 }, { "epoch": 1.1134872194942866, "grad_norm": 0.4497684836387634, "learning_rate": 7.1526937974035015e-06, "loss": 0.2926, "step": 24264 }, { "epoch": 1.1135331099995411, "grad_norm": 0.5106773376464844, "learning_rate": 7.1524724950537125e-06, "loss": 0.376, "step": 24265 }, { "epoch": 1.1135790005047956, "grad_norm": 0.4786224961280823, "learning_rate": 7.152251187527906e-06, "loss": 0.3773, "step": 24266 }, { "epoch": 1.11362489101005, "grad_norm": 0.45936697721481323, "learning_rate": 7.1520298748266136e-06, "loss": 0.3348, "step": 24267 }, { "epoch": 1.1136707815153044, "grad_norm": 0.46732693910598755, "learning_rate": 7.151808556950369e-06, "loss": 0.3195, "step": 24268 }, { "epoch": 1.113716672020559, "grad_norm": 0.4996951222419739, "learning_rate": 7.151587233899705e-06, "loss": 0.3799, "step": 24269 }, { "epoch": 1.1137625625258134, "grad_norm": 0.43901556730270386, "learning_rate": 7.1513659056751515e-06, "loss": 0.3169, "step": 24270 }, { "epoch": 1.113808453031068, "grad_norm": 0.4447793960571289, "learning_rate": 7.151144572277242e-06, "loss": 0.3566, "step": 24271 }, { "epoch": 1.1138543435363224, "grad_norm": 0.5076403021812439, "learning_rate": 7.1509232337065105e-06, "loss": 0.396, "step": 24272 }, { "epoch": 1.113900234041577, "grad_norm": 0.49268388748168945, "learning_rate": 7.150701889963485e-06, "loss": 0.372, "step": 24273 }, { "epoch": 1.1139461245468312, "grad_norm": 0.463837593793869, "learning_rate": 7.150480541048702e-06, "loss": 0.3761, "step": 24274 }, { "epoch": 1.1139920150520857, "grad_norm": 0.43185147643089294, "learning_rate": 7.150259186962691e-06, "loss": 0.2797, "step": 24275 }, { "epoch": 1.1140379055573402, "grad_norm": 0.43756139278411865, "learning_rate": 7.150037827705987e-06, "loss": 0.2771, "step": 24276 }, { "epoch": 1.1140837960625947, "grad_norm": 0.4787544906139374, "learning_rate": 7.14981646327912e-06, "loss": 0.3822, "step": 24277 }, { "epoch": 1.1141296865678492, "grad_norm": 0.5242376327514648, "learning_rate": 7.1495950936826205e-06, "loss": 0.4725, "step": 24278 }, { "epoch": 1.1141755770731037, "grad_norm": 0.472313791513443, "learning_rate": 7.149373718917025e-06, "loss": 0.3547, "step": 24279 }, { "epoch": 1.114221467578358, "grad_norm": 0.5025116801261902, "learning_rate": 7.149152338982865e-06, "loss": 0.386, "step": 24280 }, { "epoch": 1.1142673580836124, "grad_norm": 0.46358856558799744, "learning_rate": 7.1489309538806705e-06, "loss": 0.306, "step": 24281 }, { "epoch": 1.114313248588867, "grad_norm": 0.482504665851593, "learning_rate": 7.148709563610976e-06, "loss": 0.3413, "step": 24282 }, { "epoch": 1.1143591390941214, "grad_norm": 0.45809096097946167, "learning_rate": 7.148488168174313e-06, "loss": 0.3472, "step": 24283 }, { "epoch": 1.114405029599376, "grad_norm": 0.5431184768676758, "learning_rate": 7.148266767571214e-06, "loss": 0.3429, "step": 24284 }, { "epoch": 1.1144509201046304, "grad_norm": 0.48125937581062317, "learning_rate": 7.14804536180221e-06, "loss": 0.3708, "step": 24285 }, { "epoch": 1.114496810609885, "grad_norm": 0.49789756536483765, "learning_rate": 7.147823950867837e-06, "loss": 0.4154, "step": 24286 }, { "epoch": 1.1145427011151392, "grad_norm": 0.49578002095222473, "learning_rate": 7.147602534768625e-06, "loss": 0.3758, "step": 24287 }, { "epoch": 1.1145885916203937, "grad_norm": 0.4344961643218994, "learning_rate": 7.147381113505105e-06, "loss": 0.3056, "step": 24288 }, { "epoch": 1.1146344821256482, "grad_norm": 0.5006224513053894, "learning_rate": 7.1471596870778135e-06, "loss": 0.4223, "step": 24289 }, { "epoch": 1.1146803726309027, "grad_norm": 0.4563644826412201, "learning_rate": 7.146938255487279e-06, "loss": 0.3149, "step": 24290 }, { "epoch": 1.1147262631361572, "grad_norm": 0.4680328667163849, "learning_rate": 7.1467168187340366e-06, "loss": 0.3596, "step": 24291 }, { "epoch": 1.1147721536414117, "grad_norm": 0.47042617201805115, "learning_rate": 7.146495376818618e-06, "loss": 0.3773, "step": 24292 }, { "epoch": 1.114818044146666, "grad_norm": 0.5124953389167786, "learning_rate": 7.146273929741554e-06, "loss": 0.4377, "step": 24293 }, { "epoch": 1.1148639346519205, "grad_norm": 0.5205598473548889, "learning_rate": 7.14605247750338e-06, "loss": 0.3475, "step": 24294 }, { "epoch": 1.114909825157175, "grad_norm": 0.443511962890625, "learning_rate": 7.145831020104625e-06, "loss": 0.3064, "step": 24295 }, { "epoch": 1.1149557156624295, "grad_norm": 0.4829126000404358, "learning_rate": 7.145609557545825e-06, "loss": 0.3631, "step": 24296 }, { "epoch": 1.115001606167684, "grad_norm": 0.4706784784793854, "learning_rate": 7.145388089827512e-06, "loss": 0.3377, "step": 24297 }, { "epoch": 1.1150474966729385, "grad_norm": 0.4684758484363556, "learning_rate": 7.145166616950217e-06, "loss": 0.3349, "step": 24298 }, { "epoch": 1.1150933871781927, "grad_norm": 0.44308769702911377, "learning_rate": 7.144945138914473e-06, "loss": 0.331, "step": 24299 }, { "epoch": 1.1151392776834472, "grad_norm": 0.45921099185943604, "learning_rate": 7.1447236557208135e-06, "loss": 0.3639, "step": 24300 }, { "epoch": 1.1151851681887017, "grad_norm": 0.5030645728111267, "learning_rate": 7.14450216736977e-06, "loss": 0.4407, "step": 24301 }, { "epoch": 1.1152310586939562, "grad_norm": 0.46139273047447205, "learning_rate": 7.144280673861876e-06, "loss": 0.3448, "step": 24302 }, { "epoch": 1.1152769491992107, "grad_norm": 0.4381040632724762, "learning_rate": 7.144059175197664e-06, "loss": 0.3139, "step": 24303 }, { "epoch": 1.1153228397044652, "grad_norm": 0.4558829367160797, "learning_rate": 7.143837671377665e-06, "loss": 0.3681, "step": 24304 }, { "epoch": 1.1153687302097195, "grad_norm": 0.4585329592227936, "learning_rate": 7.143616162402414e-06, "loss": 0.3888, "step": 24305 }, { "epoch": 1.115414620714974, "grad_norm": 0.5145308375358582, "learning_rate": 7.143394648272442e-06, "loss": 0.4057, "step": 24306 }, { "epoch": 1.1154605112202285, "grad_norm": 0.4452243149280548, "learning_rate": 7.143173128988283e-06, "loss": 0.313, "step": 24307 }, { "epoch": 1.115506401725483, "grad_norm": 0.5108509063720703, "learning_rate": 7.14295160455047e-06, "loss": 0.4256, "step": 24308 }, { "epoch": 1.1155522922307375, "grad_norm": 0.46300122141838074, "learning_rate": 7.142730074959534e-06, "loss": 0.3608, "step": 24309 }, { "epoch": 1.115598182735992, "grad_norm": 0.503735363483429, "learning_rate": 7.142508540216008e-06, "loss": 0.4172, "step": 24310 }, { "epoch": 1.1156440732412465, "grad_norm": 0.46704715490341187, "learning_rate": 7.142287000320425e-06, "loss": 0.3852, "step": 24311 }, { "epoch": 1.1156899637465008, "grad_norm": 0.452558308839798, "learning_rate": 7.142065455273318e-06, "loss": 0.3751, "step": 24312 }, { "epoch": 1.1157358542517553, "grad_norm": 0.4894804060459137, "learning_rate": 7.141843905075221e-06, "loss": 0.4063, "step": 24313 }, { "epoch": 1.1157817447570098, "grad_norm": 0.4186522662639618, "learning_rate": 7.1416223497266645e-06, "loss": 0.2778, "step": 24314 }, { "epoch": 1.1158276352622643, "grad_norm": 0.482189416885376, "learning_rate": 7.1414007892281815e-06, "loss": 0.342, "step": 24315 }, { "epoch": 1.1158735257675187, "grad_norm": 0.4641772508621216, "learning_rate": 7.141179223580305e-06, "loss": 0.2932, "step": 24316 }, { "epoch": 1.1159194162727732, "grad_norm": 0.5172176361083984, "learning_rate": 7.140957652783569e-06, "loss": 0.4675, "step": 24317 }, { "epoch": 1.1159653067780275, "grad_norm": 0.49548348784446716, "learning_rate": 7.140736076838505e-06, "loss": 0.363, "step": 24318 }, { "epoch": 1.116011197283282, "grad_norm": 0.4969908893108368, "learning_rate": 7.140514495745648e-06, "loss": 0.4397, "step": 24319 }, { "epoch": 1.1160570877885365, "grad_norm": 0.5102058053016663, "learning_rate": 7.140292909505528e-06, "loss": 0.4168, "step": 24320 }, { "epoch": 1.116102978293791, "grad_norm": 0.4525607228279114, "learning_rate": 7.140071318118679e-06, "loss": 0.3186, "step": 24321 }, { "epoch": 1.1161488687990455, "grad_norm": 0.46271592378616333, "learning_rate": 7.1398497215856336e-06, "loss": 0.3682, "step": 24322 }, { "epoch": 1.1161947593043, "grad_norm": 0.45411646366119385, "learning_rate": 7.139628119906926e-06, "loss": 0.3483, "step": 24323 }, { "epoch": 1.1162406498095545, "grad_norm": 0.4574289619922638, "learning_rate": 7.139406513083088e-06, "loss": 0.3757, "step": 24324 }, { "epoch": 1.1162865403148088, "grad_norm": 0.49333423376083374, "learning_rate": 7.139184901114653e-06, "loss": 0.4425, "step": 24325 }, { "epoch": 1.1163324308200633, "grad_norm": 0.4710083305835724, "learning_rate": 7.1389632840021514e-06, "loss": 0.3792, "step": 24326 }, { "epoch": 1.1163783213253178, "grad_norm": 0.48885902762413025, "learning_rate": 7.138741661746119e-06, "loss": 0.3394, "step": 24327 }, { "epoch": 1.1164242118305723, "grad_norm": 0.4619206488132477, "learning_rate": 7.1385200343470895e-06, "loss": 0.3134, "step": 24328 }, { "epoch": 1.1164701023358268, "grad_norm": 0.4482620060443878, "learning_rate": 7.138298401805592e-06, "loss": 0.32, "step": 24329 }, { "epoch": 1.1165159928410813, "grad_norm": 0.45085129141807556, "learning_rate": 7.138076764122164e-06, "loss": 0.327, "step": 24330 }, { "epoch": 1.1165618833463355, "grad_norm": 0.4841673970222473, "learning_rate": 7.137855121297335e-06, "loss": 0.3978, "step": 24331 }, { "epoch": 1.11660777385159, "grad_norm": 0.469740092754364, "learning_rate": 7.137633473331638e-06, "loss": 0.3839, "step": 24332 }, { "epoch": 1.1166536643568445, "grad_norm": 0.4611456096172333, "learning_rate": 7.137411820225608e-06, "loss": 0.359, "step": 24333 }, { "epoch": 1.116699554862099, "grad_norm": 0.48861223459243774, "learning_rate": 7.137190161979778e-06, "loss": 0.4263, "step": 24334 }, { "epoch": 1.1167454453673535, "grad_norm": 0.4440300166606903, "learning_rate": 7.136968498594679e-06, "loss": 0.3547, "step": 24335 }, { "epoch": 1.116791335872608, "grad_norm": 0.4237881600856781, "learning_rate": 7.136746830070846e-06, "loss": 0.2821, "step": 24336 }, { "epoch": 1.1168372263778625, "grad_norm": 0.5539296865463257, "learning_rate": 7.1365251564088115e-06, "loss": 0.5153, "step": 24337 }, { "epoch": 1.1168831168831168, "grad_norm": 0.5078336596488953, "learning_rate": 7.136303477609108e-06, "loss": 0.472, "step": 24338 }, { "epoch": 1.1169290073883713, "grad_norm": 0.4568607211112976, "learning_rate": 7.136081793672267e-06, "loss": 0.3557, "step": 24339 }, { "epoch": 1.1169748978936258, "grad_norm": 0.47729426622390747, "learning_rate": 7.135860104598826e-06, "loss": 0.3574, "step": 24340 }, { "epoch": 1.1170207883988803, "grad_norm": 0.5007487535476685, "learning_rate": 7.135638410389315e-06, "loss": 0.4103, "step": 24341 }, { "epoch": 1.1170666789041348, "grad_norm": 0.47860297560691833, "learning_rate": 7.135416711044267e-06, "loss": 0.4086, "step": 24342 }, { "epoch": 1.1171125694093893, "grad_norm": 0.45741936564445496, "learning_rate": 7.135195006564217e-06, "loss": 0.3155, "step": 24343 }, { "epoch": 1.1171584599146436, "grad_norm": 0.4515902101993561, "learning_rate": 7.134973296949695e-06, "loss": 0.3021, "step": 24344 }, { "epoch": 1.117204350419898, "grad_norm": 0.46438005566596985, "learning_rate": 7.134751582201238e-06, "loss": 0.3303, "step": 24345 }, { "epoch": 1.1172502409251526, "grad_norm": 0.5039751529693604, "learning_rate": 7.134529862319377e-06, "loss": 0.4346, "step": 24346 }, { "epoch": 1.117296131430407, "grad_norm": 0.5088385939598083, "learning_rate": 7.1343081373046445e-06, "loss": 0.3336, "step": 24347 }, { "epoch": 1.1173420219356616, "grad_norm": 0.47086963057518005, "learning_rate": 7.134086407157575e-06, "loss": 0.358, "step": 24348 }, { "epoch": 1.117387912440916, "grad_norm": 0.4846627712249756, "learning_rate": 7.1338646718787e-06, "loss": 0.3544, "step": 24349 }, { "epoch": 1.1174338029461703, "grad_norm": 0.48292118310928345, "learning_rate": 7.133642931468554e-06, "loss": 0.3753, "step": 24350 }, { "epoch": 1.1174796934514248, "grad_norm": 0.45573264360427856, "learning_rate": 7.133421185927672e-06, "loss": 0.3364, "step": 24351 }, { "epoch": 1.1175255839566793, "grad_norm": 0.4344509541988373, "learning_rate": 7.1331994352565836e-06, "loss": 0.3167, "step": 24352 }, { "epoch": 1.1175714744619338, "grad_norm": 0.45220839977264404, "learning_rate": 7.132977679455823e-06, "loss": 0.36, "step": 24353 }, { "epoch": 1.1176173649671883, "grad_norm": 0.46268683671951294, "learning_rate": 7.132755918525928e-06, "loss": 0.3731, "step": 24354 }, { "epoch": 1.1176632554724428, "grad_norm": 0.4575693905353546, "learning_rate": 7.132534152467425e-06, "loss": 0.3202, "step": 24355 }, { "epoch": 1.117709145977697, "grad_norm": 0.49020811915397644, "learning_rate": 7.132312381280852e-06, "loss": 0.3676, "step": 24356 }, { "epoch": 1.1177550364829516, "grad_norm": 0.5937674641609192, "learning_rate": 7.132090604966741e-06, "loss": 0.2857, "step": 24357 }, { "epoch": 1.117800926988206, "grad_norm": 0.44291162490844727, "learning_rate": 7.1318688235256224e-06, "loss": 0.3306, "step": 24358 }, { "epoch": 1.1178468174934606, "grad_norm": 0.4942046105861664, "learning_rate": 7.1316470369580345e-06, "loss": 0.4171, "step": 24359 }, { "epoch": 1.117892707998715, "grad_norm": 0.45690515637397766, "learning_rate": 7.131425245264507e-06, "loss": 0.2946, "step": 24360 }, { "epoch": 1.1179385985039696, "grad_norm": 0.43732553720474243, "learning_rate": 7.131203448445575e-06, "loss": 0.2946, "step": 24361 }, { "epoch": 1.117984489009224, "grad_norm": 0.47183582186698914, "learning_rate": 7.130981646501771e-06, "loss": 0.3474, "step": 24362 }, { "epoch": 1.1180303795144784, "grad_norm": 0.45972776412963867, "learning_rate": 7.130759839433629e-06, "loss": 0.305, "step": 24363 }, { "epoch": 1.1180762700197329, "grad_norm": 0.49848994612693787, "learning_rate": 7.130538027241683e-06, "loss": 0.4, "step": 24364 }, { "epoch": 1.1181221605249874, "grad_norm": 0.4721606969833374, "learning_rate": 7.130316209926464e-06, "loss": 0.4084, "step": 24365 }, { "epoch": 1.1181680510302419, "grad_norm": 0.43907833099365234, "learning_rate": 7.130094387488507e-06, "loss": 0.2847, "step": 24366 }, { "epoch": 1.1182139415354964, "grad_norm": 0.5151420831680298, "learning_rate": 7.129872559928347e-06, "loss": 0.4115, "step": 24367 }, { "epoch": 1.1182598320407509, "grad_norm": 0.5098459720611572, "learning_rate": 7.129650727246514e-06, "loss": 0.4528, "step": 24368 }, { "epoch": 1.1183057225460051, "grad_norm": 0.4762936532497406, "learning_rate": 7.129428889443544e-06, "loss": 0.3523, "step": 24369 }, { "epoch": 1.1183516130512596, "grad_norm": 0.45573607087135315, "learning_rate": 7.129207046519968e-06, "loss": 0.3107, "step": 24370 }, { "epoch": 1.1183975035565141, "grad_norm": 0.46376508474349976, "learning_rate": 7.128985198476323e-06, "loss": 0.3826, "step": 24371 }, { "epoch": 1.1184433940617686, "grad_norm": 0.5174378752708435, "learning_rate": 7.1287633453131385e-06, "loss": 0.4485, "step": 24372 }, { "epoch": 1.1184892845670231, "grad_norm": 0.448304146528244, "learning_rate": 7.128541487030952e-06, "loss": 0.3437, "step": 24373 }, { "epoch": 1.1185351750722776, "grad_norm": 0.43810001015663147, "learning_rate": 7.1283196236302956e-06, "loss": 0.3357, "step": 24374 }, { "epoch": 1.1185810655775321, "grad_norm": 0.48438560962677, "learning_rate": 7.128097755111699e-06, "loss": 0.3615, "step": 24375 }, { "epoch": 1.1186269560827864, "grad_norm": 0.4682760238647461, "learning_rate": 7.127875881475702e-06, "loss": 0.3787, "step": 24376 }, { "epoch": 1.118672846588041, "grad_norm": 0.46575334668159485, "learning_rate": 7.127654002722833e-06, "loss": 0.3825, "step": 24377 }, { "epoch": 1.1187187370932954, "grad_norm": 0.46889790892601013, "learning_rate": 7.127432118853629e-06, "loss": 0.3352, "step": 24378 }, { "epoch": 1.1187646275985499, "grad_norm": 0.4734900891780853, "learning_rate": 7.127210229868622e-06, "loss": 0.3238, "step": 24379 }, { "epoch": 1.1188105181038044, "grad_norm": 0.44081026315689087, "learning_rate": 7.1269883357683455e-06, "loss": 0.3224, "step": 24380 }, { "epoch": 1.1188564086090589, "grad_norm": 0.4423227906227112, "learning_rate": 7.126766436553332e-06, "loss": 0.3409, "step": 24381 }, { "epoch": 1.1189022991143132, "grad_norm": 0.5010138750076294, "learning_rate": 7.126544532224119e-06, "loss": 0.4052, "step": 24382 }, { "epoch": 1.1189481896195677, "grad_norm": 0.4685904085636139, "learning_rate": 7.126322622781235e-06, "loss": 0.3854, "step": 24383 }, { "epoch": 1.1189940801248222, "grad_norm": 0.4451734721660614, "learning_rate": 7.126100708225218e-06, "loss": 0.3215, "step": 24384 }, { "epoch": 1.1190399706300767, "grad_norm": 0.43488505482673645, "learning_rate": 7.1258787885566e-06, "loss": 0.3116, "step": 24385 }, { "epoch": 1.1190858611353312, "grad_norm": 0.4435853660106659, "learning_rate": 7.125656863775914e-06, "loss": 0.3023, "step": 24386 }, { "epoch": 1.1191317516405856, "grad_norm": 0.45220503211021423, "learning_rate": 7.1254349338836925e-06, "loss": 0.3249, "step": 24387 }, { "epoch": 1.11917764214584, "grad_norm": 0.4884524643421173, "learning_rate": 7.125212998880473e-06, "loss": 0.3938, "step": 24388 }, { "epoch": 1.1192235326510944, "grad_norm": 0.5248652696609497, "learning_rate": 7.124991058766786e-06, "loss": 0.4274, "step": 24389 }, { "epoch": 1.119269423156349, "grad_norm": 0.47318708896636963, "learning_rate": 7.1247691135431675e-06, "loss": 0.3922, "step": 24390 }, { "epoch": 1.1193153136616034, "grad_norm": 0.45581868290901184, "learning_rate": 7.124547163210148e-06, "loss": 0.3338, "step": 24391 }, { "epoch": 1.119361204166858, "grad_norm": 0.44147130846977234, "learning_rate": 7.124325207768263e-06, "loss": 0.3129, "step": 24392 }, { "epoch": 1.1194070946721124, "grad_norm": 0.4626491367816925, "learning_rate": 7.124103247218048e-06, "loss": 0.3162, "step": 24393 }, { "epoch": 1.1194529851773667, "grad_norm": 0.5089107155799866, "learning_rate": 7.1238812815600335e-06, "loss": 0.4024, "step": 24394 }, { "epoch": 1.1194988756826212, "grad_norm": 0.47981545329093933, "learning_rate": 7.123659310794756e-06, "loss": 0.4022, "step": 24395 }, { "epoch": 1.1195447661878757, "grad_norm": 0.42519521713256836, "learning_rate": 7.123437334922748e-06, "loss": 0.3044, "step": 24396 }, { "epoch": 1.1195906566931302, "grad_norm": 0.4342162013053894, "learning_rate": 7.1232153539445425e-06, "loss": 0.3335, "step": 24397 }, { "epoch": 1.1196365471983847, "grad_norm": 0.47132354974746704, "learning_rate": 7.122993367860674e-06, "loss": 0.3401, "step": 24398 }, { "epoch": 1.1196824377036392, "grad_norm": 0.4608250558376312, "learning_rate": 7.122771376671678e-06, "loss": 0.34, "step": 24399 }, { "epoch": 1.1197283282088937, "grad_norm": 0.4674767851829529, "learning_rate": 7.122549380378087e-06, "loss": 0.3435, "step": 24400 }, { "epoch": 1.119774218714148, "grad_norm": 0.4207720458507538, "learning_rate": 7.1223273789804326e-06, "loss": 0.271, "step": 24401 }, { "epoch": 1.1198201092194024, "grad_norm": 0.5110670328140259, "learning_rate": 7.1221053724792525e-06, "loss": 0.4602, "step": 24402 }, { "epoch": 1.119865999724657, "grad_norm": 0.48111262917518616, "learning_rate": 7.121883360875077e-06, "loss": 0.3799, "step": 24403 }, { "epoch": 1.1199118902299114, "grad_norm": 0.4518736004829407, "learning_rate": 7.121661344168442e-06, "loss": 0.3064, "step": 24404 }, { "epoch": 1.119957780735166, "grad_norm": 0.47767311334609985, "learning_rate": 7.1214393223598825e-06, "loss": 0.3916, "step": 24405 }, { "epoch": 1.1200036712404204, "grad_norm": 0.4766079783439636, "learning_rate": 7.121217295449931e-06, "loss": 0.3904, "step": 24406 }, { "epoch": 1.1200495617456747, "grad_norm": 0.5239993929862976, "learning_rate": 7.120995263439121e-06, "loss": 0.4783, "step": 24407 }, { "epoch": 1.1200954522509292, "grad_norm": 0.5094127058982849, "learning_rate": 7.120773226327986e-06, "loss": 0.4139, "step": 24408 }, { "epoch": 1.1201413427561837, "grad_norm": 0.4709087014198303, "learning_rate": 7.120551184117061e-06, "loss": 0.4434, "step": 24409 }, { "epoch": 1.1201872332614382, "grad_norm": 0.5592716336250305, "learning_rate": 7.120329136806879e-06, "loss": 0.3763, "step": 24410 }, { "epoch": 1.1202331237666927, "grad_norm": 0.4713756740093231, "learning_rate": 7.1201070843979756e-06, "loss": 0.3531, "step": 24411 }, { "epoch": 1.1202790142719472, "grad_norm": 0.45278388261795044, "learning_rate": 7.119885026890884e-06, "loss": 0.3546, "step": 24412 }, { "epoch": 1.1203249047772017, "grad_norm": 0.46855875849723816, "learning_rate": 7.1196629642861384e-06, "loss": 0.3116, "step": 24413 }, { "epoch": 1.120370795282456, "grad_norm": 0.4318397343158722, "learning_rate": 7.119440896584271e-06, "loss": 0.2751, "step": 24414 }, { "epoch": 1.1204166857877105, "grad_norm": 0.47066086530685425, "learning_rate": 7.119218823785816e-06, "loss": 0.3455, "step": 24415 }, { "epoch": 1.120462576292965, "grad_norm": 0.4986349642276764, "learning_rate": 7.118996745891311e-06, "loss": 0.3862, "step": 24416 }, { "epoch": 1.1205084667982195, "grad_norm": 0.4356958270072937, "learning_rate": 7.118774662901288e-06, "loss": 0.3364, "step": 24417 }, { "epoch": 1.120554357303474, "grad_norm": 0.5212225914001465, "learning_rate": 7.118552574816279e-06, "loss": 0.3989, "step": 24418 }, { "epoch": 1.1206002478087285, "grad_norm": 0.49545150995254517, "learning_rate": 7.1183304816368206e-06, "loss": 0.4239, "step": 24419 }, { "epoch": 1.1206461383139827, "grad_norm": 0.4666737914085388, "learning_rate": 7.118108383363444e-06, "loss": 0.3239, "step": 24420 }, { "epoch": 1.1206920288192372, "grad_norm": 0.46655821800231934, "learning_rate": 7.1178862799966875e-06, "loss": 0.3498, "step": 24421 }, { "epoch": 1.1207379193244917, "grad_norm": 0.443051278591156, "learning_rate": 7.1176641715370835e-06, "loss": 0.3387, "step": 24422 }, { "epoch": 1.1207838098297462, "grad_norm": 0.4540196657180786, "learning_rate": 7.117442057985163e-06, "loss": 0.3317, "step": 24423 }, { "epoch": 1.1208297003350007, "grad_norm": 0.5015744566917419, "learning_rate": 7.117219939341464e-06, "loss": 0.376, "step": 24424 }, { "epoch": 1.1208755908402552, "grad_norm": 0.4612747132778168, "learning_rate": 7.116997815606519e-06, "loss": 0.3307, "step": 24425 }, { "epoch": 1.1209214813455097, "grad_norm": 0.47574570775032043, "learning_rate": 7.116775686780861e-06, "loss": 0.3971, "step": 24426 }, { "epoch": 1.120967371850764, "grad_norm": 0.693388819694519, "learning_rate": 7.116553552865029e-06, "loss": 0.4142, "step": 24427 }, { "epoch": 1.1210132623560185, "grad_norm": 0.4336928129196167, "learning_rate": 7.1163314138595515e-06, "loss": 0.3286, "step": 24428 }, { "epoch": 1.121059152861273, "grad_norm": 0.4630883038043976, "learning_rate": 7.116109269764965e-06, "loss": 0.3681, "step": 24429 }, { "epoch": 1.1211050433665275, "grad_norm": 0.4876170754432678, "learning_rate": 7.115887120581804e-06, "loss": 0.3894, "step": 24430 }, { "epoch": 1.121150933871782, "grad_norm": 0.4595867693424225, "learning_rate": 7.1156649663106016e-06, "loss": 0.3778, "step": 24431 }, { "epoch": 1.1211968243770365, "grad_norm": 0.4024995267391205, "learning_rate": 7.115442806951892e-06, "loss": 0.2678, "step": 24432 }, { "epoch": 1.1212427148822908, "grad_norm": 0.4695373475551605, "learning_rate": 7.115220642506213e-06, "loss": 0.3899, "step": 24433 }, { "epoch": 1.1212886053875453, "grad_norm": 0.4568944275379181, "learning_rate": 7.114998472974093e-06, "loss": 0.3738, "step": 24434 }, { "epoch": 1.1213344958927998, "grad_norm": 0.478090763092041, "learning_rate": 7.11477629835607e-06, "loss": 0.3855, "step": 24435 }, { "epoch": 1.1213803863980543, "grad_norm": 0.4668237864971161, "learning_rate": 7.114554118652679e-06, "loss": 0.3565, "step": 24436 }, { "epoch": 1.1214262769033088, "grad_norm": 0.46233442425727844, "learning_rate": 7.11433193386445e-06, "loss": 0.3366, "step": 24437 }, { "epoch": 1.1214721674085633, "grad_norm": 0.4658839702606201, "learning_rate": 7.114109743991922e-06, "loss": 0.342, "step": 24438 }, { "epoch": 1.1215180579138175, "grad_norm": 0.48771369457244873, "learning_rate": 7.113887549035627e-06, "loss": 0.4359, "step": 24439 }, { "epoch": 1.121563948419072, "grad_norm": 0.4754769206047058, "learning_rate": 7.113665348996099e-06, "loss": 0.3885, "step": 24440 }, { "epoch": 1.1216098389243265, "grad_norm": 0.4918484687805176, "learning_rate": 7.113443143873874e-06, "loss": 0.3671, "step": 24441 }, { "epoch": 1.121655729429581, "grad_norm": 0.45737361907958984, "learning_rate": 7.113220933669485e-06, "loss": 0.3332, "step": 24442 }, { "epoch": 1.1217016199348355, "grad_norm": 0.4461697041988373, "learning_rate": 7.112998718383465e-06, "loss": 0.309, "step": 24443 }, { "epoch": 1.12174751044009, "grad_norm": 0.5030305981636047, "learning_rate": 7.112776498016352e-06, "loss": 0.4396, "step": 24444 }, { "epoch": 1.1217934009453443, "grad_norm": 0.47860968112945557, "learning_rate": 7.112554272568677e-06, "loss": 0.3351, "step": 24445 }, { "epoch": 1.1218392914505988, "grad_norm": 0.44691017270088196, "learning_rate": 7.112332042040976e-06, "loss": 0.315, "step": 24446 }, { "epoch": 1.1218851819558533, "grad_norm": 0.506725549697876, "learning_rate": 7.112109806433784e-06, "loss": 0.3941, "step": 24447 }, { "epoch": 1.1219310724611078, "grad_norm": 0.47101643681526184, "learning_rate": 7.111887565747633e-06, "loss": 0.3797, "step": 24448 }, { "epoch": 1.1219769629663623, "grad_norm": 0.4220737814903259, "learning_rate": 7.11166531998306e-06, "loss": 0.3103, "step": 24449 }, { "epoch": 1.1220228534716168, "grad_norm": 0.4636615216732025, "learning_rate": 7.111443069140598e-06, "loss": 0.3334, "step": 24450 }, { "epoch": 1.1220687439768713, "grad_norm": 0.43551406264305115, "learning_rate": 7.1112208132207815e-06, "loss": 0.2961, "step": 24451 }, { "epoch": 1.1221146344821256, "grad_norm": 0.4257749021053314, "learning_rate": 7.110998552224145e-06, "loss": 0.3068, "step": 24452 }, { "epoch": 1.12216052498738, "grad_norm": 0.4876939356327057, "learning_rate": 7.1107762861512245e-06, "loss": 0.3572, "step": 24453 }, { "epoch": 1.1222064154926346, "grad_norm": 0.5125496983528137, "learning_rate": 7.110554015002552e-06, "loss": 0.4043, "step": 24454 }, { "epoch": 1.122252305997889, "grad_norm": 0.4877931773662567, "learning_rate": 7.110331738778666e-06, "loss": 0.4135, "step": 24455 }, { "epoch": 1.1222981965031436, "grad_norm": 0.46114933490753174, "learning_rate": 7.110109457480095e-06, "loss": 0.3856, "step": 24456 }, { "epoch": 1.122344087008398, "grad_norm": 0.4857105016708374, "learning_rate": 7.109887171107377e-06, "loss": 0.3721, "step": 24457 }, { "epoch": 1.1223899775136523, "grad_norm": 0.485323429107666, "learning_rate": 7.109664879661048e-06, "loss": 0.3704, "step": 24458 }, { "epoch": 1.1224358680189068, "grad_norm": 0.4756920039653778, "learning_rate": 7.109442583141639e-06, "loss": 0.4091, "step": 24459 }, { "epoch": 1.1224817585241613, "grad_norm": 0.6531346440315247, "learning_rate": 7.109220281549687e-06, "loss": 0.4007, "step": 24460 }, { "epoch": 1.1225276490294158, "grad_norm": 0.5017656683921814, "learning_rate": 7.108997974885728e-06, "loss": 0.347, "step": 24461 }, { "epoch": 1.1225735395346703, "grad_norm": 0.45709168910980225, "learning_rate": 7.1087756631502915e-06, "loss": 0.3509, "step": 24462 }, { "epoch": 1.1226194300399248, "grad_norm": 0.4704689383506775, "learning_rate": 7.108553346343915e-06, "loss": 0.4163, "step": 24463 }, { "epoch": 1.1226653205451793, "grad_norm": 0.4220016896724701, "learning_rate": 7.1083310244671345e-06, "loss": 0.2975, "step": 24464 }, { "epoch": 1.1227112110504336, "grad_norm": 0.4469843804836273, "learning_rate": 7.1081086975204825e-06, "loss": 0.3218, "step": 24465 }, { "epoch": 1.122757101555688, "grad_norm": 0.4760843813419342, "learning_rate": 7.107886365504496e-06, "loss": 0.3728, "step": 24466 }, { "epoch": 1.1228029920609426, "grad_norm": 0.454483300447464, "learning_rate": 7.1076640284197075e-06, "loss": 0.3237, "step": 24467 }, { "epoch": 1.122848882566197, "grad_norm": 0.4691222012042999, "learning_rate": 7.107441686266652e-06, "loss": 0.3445, "step": 24468 }, { "epoch": 1.1228947730714516, "grad_norm": 0.47898927330970764, "learning_rate": 7.107219339045863e-06, "loss": 0.3548, "step": 24469 }, { "epoch": 1.122940663576706, "grad_norm": 0.4752456247806549, "learning_rate": 7.106996986757878e-06, "loss": 0.3619, "step": 24470 }, { "epoch": 1.1229865540819604, "grad_norm": 0.4784412980079651, "learning_rate": 7.1067746294032305e-06, "loss": 0.4044, "step": 24471 }, { "epoch": 1.1230324445872149, "grad_norm": 0.48600858449935913, "learning_rate": 7.106552266982455e-06, "loss": 0.3615, "step": 24472 }, { "epoch": 1.1230783350924693, "grad_norm": 0.4752776324748993, "learning_rate": 7.106329899496085e-06, "loss": 0.3459, "step": 24473 }, { "epoch": 1.1231242255977238, "grad_norm": 0.4536878764629364, "learning_rate": 7.106107526944657e-06, "loss": 0.3047, "step": 24474 }, { "epoch": 1.1231701161029783, "grad_norm": 0.4537631869316101, "learning_rate": 7.105885149328704e-06, "loss": 0.3378, "step": 24475 }, { "epoch": 1.1232160066082328, "grad_norm": 0.4891679883003235, "learning_rate": 7.105662766648763e-06, "loss": 0.3959, "step": 24476 }, { "epoch": 1.1232618971134871, "grad_norm": 0.5229505300521851, "learning_rate": 7.1054403789053684e-06, "loss": 0.3802, "step": 24477 }, { "epoch": 1.1233077876187416, "grad_norm": 0.5272250175476074, "learning_rate": 7.105217986099054e-06, "loss": 0.4326, "step": 24478 }, { "epoch": 1.1233536781239961, "grad_norm": 0.44651180505752563, "learning_rate": 7.1049955882303526e-06, "loss": 0.3184, "step": 24479 }, { "epoch": 1.1233995686292506, "grad_norm": 0.5361708402633667, "learning_rate": 7.104773185299803e-06, "loss": 0.4607, "step": 24480 }, { "epoch": 1.123445459134505, "grad_norm": 0.4506639838218689, "learning_rate": 7.104550777307937e-06, "loss": 0.3138, "step": 24481 }, { "epoch": 1.1234913496397596, "grad_norm": 0.47576722502708435, "learning_rate": 7.104328364255292e-06, "loss": 0.4276, "step": 24482 }, { "epoch": 1.1235372401450139, "grad_norm": 0.4305223524570465, "learning_rate": 7.1041059461424e-06, "loss": 0.3027, "step": 24483 }, { "epoch": 1.1235831306502684, "grad_norm": 0.4397425353527069, "learning_rate": 7.1038835229697985e-06, "loss": 0.3345, "step": 24484 }, { "epoch": 1.1236290211555229, "grad_norm": 0.4513666331768036, "learning_rate": 7.103661094738021e-06, "loss": 0.3533, "step": 24485 }, { "epoch": 1.1236749116607774, "grad_norm": 0.4771495759487152, "learning_rate": 7.103438661447601e-06, "loss": 0.3408, "step": 24486 }, { "epoch": 1.1237208021660319, "grad_norm": 0.5015978217124939, "learning_rate": 7.103216223099077e-06, "loss": 0.4135, "step": 24487 }, { "epoch": 1.1237666926712864, "grad_norm": 0.5056594610214233, "learning_rate": 7.1029937796929805e-06, "loss": 0.367, "step": 24488 }, { "epoch": 1.1238125831765409, "grad_norm": 0.44038277864456177, "learning_rate": 7.102771331229849e-06, "loss": 0.3394, "step": 24489 }, { "epoch": 1.1238584736817951, "grad_norm": 0.4544081687927246, "learning_rate": 7.102548877710215e-06, "loss": 0.3394, "step": 24490 }, { "epoch": 1.1239043641870496, "grad_norm": 0.5266464948654175, "learning_rate": 7.102326419134613e-06, "loss": 0.4399, "step": 24491 }, { "epoch": 1.1239502546923041, "grad_norm": 0.49854058027267456, "learning_rate": 7.102103955503582e-06, "loss": 0.4403, "step": 24492 }, { "epoch": 1.1239961451975586, "grad_norm": 0.4582069218158722, "learning_rate": 7.101881486817654e-06, "loss": 0.3308, "step": 24493 }, { "epoch": 1.1240420357028131, "grad_norm": 0.5077621340751648, "learning_rate": 7.101659013077363e-06, "loss": 0.4576, "step": 24494 }, { "epoch": 1.1240879262080676, "grad_norm": 0.47128525376319885, "learning_rate": 7.101436534283248e-06, "loss": 0.3936, "step": 24495 }, { "epoch": 1.124133816713322, "grad_norm": 0.45865651965141296, "learning_rate": 7.101214050435838e-06, "loss": 0.3334, "step": 24496 }, { "epoch": 1.1241797072185764, "grad_norm": 0.4583466649055481, "learning_rate": 7.100991561535673e-06, "loss": 0.3158, "step": 24497 }, { "epoch": 1.124225597723831, "grad_norm": 0.44251590967178345, "learning_rate": 7.100769067583288e-06, "loss": 0.3146, "step": 24498 }, { "epoch": 1.1242714882290854, "grad_norm": 0.4577972888946533, "learning_rate": 7.100546568579214e-06, "loss": 0.3522, "step": 24499 }, { "epoch": 1.12431737873434, "grad_norm": 0.46744784712791443, "learning_rate": 7.100324064523989e-06, "loss": 0.337, "step": 24500 }, { "epoch": 1.1243632692395944, "grad_norm": 0.46433404088020325, "learning_rate": 7.100101555418149e-06, "loss": 0.3773, "step": 24501 }, { "epoch": 1.124409159744849, "grad_norm": 0.4964696168899536, "learning_rate": 7.099879041262225e-06, "loss": 0.4166, "step": 24502 }, { "epoch": 1.1244550502501032, "grad_norm": 0.4846893846988678, "learning_rate": 7.099656522056757e-06, "loss": 0.3867, "step": 24503 }, { "epoch": 1.1245009407553577, "grad_norm": 0.46418121457099915, "learning_rate": 7.099433997802277e-06, "loss": 0.3493, "step": 24504 }, { "epoch": 1.1245468312606122, "grad_norm": 0.4548535943031311, "learning_rate": 7.099211468499321e-06, "loss": 0.3682, "step": 24505 }, { "epoch": 1.1245927217658667, "grad_norm": 0.46428796648979187, "learning_rate": 7.098988934148423e-06, "loss": 0.3751, "step": 24506 }, { "epoch": 1.1246386122711212, "grad_norm": 0.49877917766571045, "learning_rate": 7.098766394750119e-06, "loss": 0.432, "step": 24507 }, { "epoch": 1.1246845027763757, "grad_norm": 0.4951443076133728, "learning_rate": 7.098543850304944e-06, "loss": 0.3796, "step": 24508 }, { "epoch": 1.12473039328163, "grad_norm": 0.42637306451797485, "learning_rate": 7.0983213008134354e-06, "loss": 0.3174, "step": 24509 }, { "epoch": 1.1247762837868844, "grad_norm": 0.42890873551368713, "learning_rate": 7.098098746276125e-06, "loss": 0.3249, "step": 24510 }, { "epoch": 1.124822174292139, "grad_norm": 0.4537610411643982, "learning_rate": 7.09787618669355e-06, "loss": 0.3228, "step": 24511 }, { "epoch": 1.1248680647973934, "grad_norm": 0.46539074182510376, "learning_rate": 7.097653622066244e-06, "loss": 0.3955, "step": 24512 }, { "epoch": 1.124913955302648, "grad_norm": 0.45689210295677185, "learning_rate": 7.097431052394743e-06, "loss": 0.3165, "step": 24513 }, { "epoch": 1.1249598458079024, "grad_norm": 0.46390965580940247, "learning_rate": 7.0972084776795836e-06, "loss": 0.3422, "step": 24514 }, { "epoch": 1.125005736313157, "grad_norm": 0.4720083475112915, "learning_rate": 7.096985897921299e-06, "loss": 0.3683, "step": 24515 }, { "epoch": 1.1250516268184112, "grad_norm": 0.4342659115791321, "learning_rate": 7.096763313120424e-06, "loss": 0.311, "step": 24516 }, { "epoch": 1.1250975173236657, "grad_norm": 0.4513651430606842, "learning_rate": 7.096540723277497e-06, "loss": 0.312, "step": 24517 }, { "epoch": 1.1251434078289202, "grad_norm": 0.4567718505859375, "learning_rate": 7.096318128393049e-06, "loss": 0.3592, "step": 24518 }, { "epoch": 1.1251892983341747, "grad_norm": 0.48970913887023926, "learning_rate": 7.096095528467618e-06, "loss": 0.3802, "step": 24519 }, { "epoch": 1.1252351888394292, "grad_norm": 0.4593092203140259, "learning_rate": 7.095872923501741e-06, "loss": 0.3491, "step": 24520 }, { "epoch": 1.1252810793446835, "grad_norm": 0.481396347284317, "learning_rate": 7.09565031349595e-06, "loss": 0.4155, "step": 24521 }, { "epoch": 1.125326969849938, "grad_norm": 0.4844643175601959, "learning_rate": 7.09542769845078e-06, "loss": 0.3895, "step": 24522 }, { "epoch": 1.1253728603551925, "grad_norm": 0.4977850914001465, "learning_rate": 7.095205078366769e-06, "loss": 0.4252, "step": 24523 }, { "epoch": 1.125418750860447, "grad_norm": 0.4896126687526703, "learning_rate": 7.09498245324445e-06, "loss": 0.4079, "step": 24524 }, { "epoch": 1.1254646413657015, "grad_norm": 0.4772425889968872, "learning_rate": 7.094759823084361e-06, "loss": 0.3756, "step": 24525 }, { "epoch": 1.125510531870956, "grad_norm": 0.4161599278450012, "learning_rate": 7.094537187887035e-06, "loss": 0.2733, "step": 24526 }, { "epoch": 1.1255564223762105, "grad_norm": 0.43875566124916077, "learning_rate": 7.094314547653008e-06, "loss": 0.3373, "step": 24527 }, { "epoch": 1.1256023128814647, "grad_norm": 0.4744507372379303, "learning_rate": 7.094091902382815e-06, "loss": 0.3851, "step": 24528 }, { "epoch": 1.1256482033867192, "grad_norm": 0.47918373346328735, "learning_rate": 7.0938692520769926e-06, "loss": 0.3404, "step": 24529 }, { "epoch": 1.1256940938919737, "grad_norm": 0.4705836772918701, "learning_rate": 7.093646596736074e-06, "loss": 0.3283, "step": 24530 }, { "epoch": 1.1257399843972282, "grad_norm": 0.503592848777771, "learning_rate": 7.093423936360598e-06, "loss": 0.3549, "step": 24531 }, { "epoch": 1.1257858749024827, "grad_norm": 0.4714903235435486, "learning_rate": 7.093201270951099e-06, "loss": 0.3303, "step": 24532 }, { "epoch": 1.1258317654077372, "grad_norm": 0.4885201156139374, "learning_rate": 7.092978600508109e-06, "loss": 0.3575, "step": 24533 }, { "epoch": 1.1258776559129915, "grad_norm": 0.5011963844299316, "learning_rate": 7.092755925032166e-06, "loss": 0.3664, "step": 24534 }, { "epoch": 1.125923546418246, "grad_norm": 0.5127184987068176, "learning_rate": 7.092533244523808e-06, "loss": 0.4267, "step": 24535 }, { "epoch": 1.1259694369235005, "grad_norm": 0.48105233907699585, "learning_rate": 7.092310558983567e-06, "loss": 0.3442, "step": 24536 }, { "epoch": 1.126015327428755, "grad_norm": 0.48005300760269165, "learning_rate": 7.09208786841198e-06, "loss": 0.3795, "step": 24537 }, { "epoch": 1.1260612179340095, "grad_norm": 0.4687623977661133, "learning_rate": 7.09186517280958e-06, "loss": 0.4175, "step": 24538 }, { "epoch": 1.126107108439264, "grad_norm": 0.466586709022522, "learning_rate": 7.091642472176905e-06, "loss": 0.3601, "step": 24539 }, { "epoch": 1.1261529989445185, "grad_norm": 0.45807895064353943, "learning_rate": 7.0914197665144915e-06, "loss": 0.3879, "step": 24540 }, { "epoch": 1.1261988894497728, "grad_norm": 0.46288588643074036, "learning_rate": 7.091197055822871e-06, "loss": 0.3515, "step": 24541 }, { "epoch": 1.1262447799550273, "grad_norm": 0.4602619409561157, "learning_rate": 7.090974340102584e-06, "loss": 0.3451, "step": 24542 }, { "epoch": 1.1262906704602818, "grad_norm": 0.4624866545200348, "learning_rate": 7.090751619354164e-06, "loss": 0.3427, "step": 24543 }, { "epoch": 1.1263365609655362, "grad_norm": 0.42305925488471985, "learning_rate": 7.090528893578144e-06, "loss": 0.2961, "step": 24544 }, { "epoch": 1.1263824514707907, "grad_norm": 0.4329456686973572, "learning_rate": 7.090306162775062e-06, "loss": 0.2941, "step": 24545 }, { "epoch": 1.1264283419760452, "grad_norm": 0.4587375819683075, "learning_rate": 7.090083426945455e-06, "loss": 0.3478, "step": 24546 }, { "epoch": 1.1264742324812995, "grad_norm": 0.42938199639320374, "learning_rate": 7.089860686089855e-06, "loss": 0.2873, "step": 24547 }, { "epoch": 1.126520122986554, "grad_norm": 0.516125500202179, "learning_rate": 7.089637940208801e-06, "loss": 0.4387, "step": 24548 }, { "epoch": 1.1265660134918085, "grad_norm": 0.48361027240753174, "learning_rate": 7.089415189302829e-06, "loss": 0.4261, "step": 24549 }, { "epoch": 1.126611903997063, "grad_norm": 0.546575665473938, "learning_rate": 7.089192433372469e-06, "loss": 0.5349, "step": 24550 }, { "epoch": 1.1266577945023175, "grad_norm": 0.46081236004829407, "learning_rate": 7.088969672418262e-06, "loss": 0.3643, "step": 24551 }, { "epoch": 1.126703685007572, "grad_norm": 0.44108253717422485, "learning_rate": 7.088746906440744e-06, "loss": 0.3072, "step": 24552 }, { "epoch": 1.1267495755128265, "grad_norm": 0.46580585837364197, "learning_rate": 7.088524135440448e-06, "loss": 0.3513, "step": 24553 }, { "epoch": 1.1267954660180808, "grad_norm": 0.4716245234012604, "learning_rate": 7.088301359417911e-06, "loss": 0.3543, "step": 24554 }, { "epoch": 1.1268413565233353, "grad_norm": 0.5105167627334595, "learning_rate": 7.088078578373667e-06, "loss": 0.4201, "step": 24555 }, { "epoch": 1.1268872470285898, "grad_norm": 0.4353860914707184, "learning_rate": 7.087855792308252e-06, "loss": 0.3422, "step": 24556 }, { "epoch": 1.1269331375338443, "grad_norm": 0.44406914710998535, "learning_rate": 7.087633001222206e-06, "loss": 0.3169, "step": 24557 }, { "epoch": 1.1269790280390988, "grad_norm": 0.4783133864402771, "learning_rate": 7.08741020511606e-06, "loss": 0.3842, "step": 24558 }, { "epoch": 1.127024918544353, "grad_norm": 0.4782160222530365, "learning_rate": 7.087187403990351e-06, "loss": 0.3632, "step": 24559 }, { "epoch": 1.1270708090496075, "grad_norm": 0.4648149907588959, "learning_rate": 7.086964597845616e-06, "loss": 0.3127, "step": 24560 }, { "epoch": 1.127116699554862, "grad_norm": 0.5468202829360962, "learning_rate": 7.0867417866823875e-06, "loss": 0.4589, "step": 24561 }, { "epoch": 1.1271625900601165, "grad_norm": 0.46742403507232666, "learning_rate": 7.086518970501205e-06, "loss": 0.3526, "step": 24562 }, { "epoch": 1.127208480565371, "grad_norm": 0.46444547176361084, "learning_rate": 7.086296149302604e-06, "loss": 0.3873, "step": 24563 }, { "epoch": 1.1272543710706255, "grad_norm": 0.4539959132671356, "learning_rate": 7.086073323087118e-06, "loss": 0.3539, "step": 24564 }, { "epoch": 1.12730026157588, "grad_norm": 0.45937666296958923, "learning_rate": 7.085850491855284e-06, "loss": 0.3192, "step": 24565 }, { "epoch": 1.1273461520811345, "grad_norm": 0.46128958463668823, "learning_rate": 7.085627655607639e-06, "loss": 0.3579, "step": 24566 }, { "epoch": 1.1273920425863888, "grad_norm": 0.4815141558647156, "learning_rate": 7.085404814344716e-06, "loss": 0.3983, "step": 24567 }, { "epoch": 1.1274379330916433, "grad_norm": 0.46070626378059387, "learning_rate": 7.085181968067054e-06, "loss": 0.3242, "step": 24568 }, { "epoch": 1.1274838235968978, "grad_norm": 0.4765242040157318, "learning_rate": 7.084959116775187e-06, "loss": 0.3443, "step": 24569 }, { "epoch": 1.1275297141021523, "grad_norm": 0.47750386595726013, "learning_rate": 7.084736260469651e-06, "loss": 0.3533, "step": 24570 }, { "epoch": 1.1275756046074068, "grad_norm": 0.4724601209163666, "learning_rate": 7.084513399150983e-06, "loss": 0.3244, "step": 24571 }, { "epoch": 1.127621495112661, "grad_norm": 0.44304022192955017, "learning_rate": 7.084290532819718e-06, "loss": 0.3051, "step": 24572 }, { "epoch": 1.1276673856179156, "grad_norm": 0.4654863774776459, "learning_rate": 7.08406766147639e-06, "loss": 0.3769, "step": 24573 }, { "epoch": 1.12771327612317, "grad_norm": 0.4668884873390198, "learning_rate": 7.08384478512154e-06, "loss": 0.3638, "step": 24574 }, { "epoch": 1.1277591666284246, "grad_norm": 0.4853515326976776, "learning_rate": 7.0836219037557e-06, "loss": 0.3994, "step": 24575 }, { "epoch": 1.127805057133679, "grad_norm": 0.46285679936408997, "learning_rate": 7.083399017379406e-06, "loss": 0.3254, "step": 24576 }, { "epoch": 1.1278509476389336, "grad_norm": 0.4762583076953888, "learning_rate": 7.083176125993196e-06, "loss": 0.4098, "step": 24577 }, { "epoch": 1.127896838144188, "grad_norm": 0.4493516683578491, "learning_rate": 7.082953229597604e-06, "loss": 0.3506, "step": 24578 }, { "epoch": 1.1279427286494423, "grad_norm": 0.45163342356681824, "learning_rate": 7.082730328193168e-06, "loss": 0.336, "step": 24579 }, { "epoch": 1.1279886191546968, "grad_norm": 0.4574577808380127, "learning_rate": 7.082507421780423e-06, "loss": 0.3451, "step": 24580 }, { "epoch": 1.1280345096599513, "grad_norm": 0.47335854172706604, "learning_rate": 7.082284510359903e-06, "loss": 0.4202, "step": 24581 }, { "epoch": 1.1280804001652058, "grad_norm": 0.5153486728668213, "learning_rate": 7.082061593932148e-06, "loss": 0.3198, "step": 24582 }, { "epoch": 1.1281262906704603, "grad_norm": 0.4294804632663727, "learning_rate": 7.081838672497691e-06, "loss": 0.2976, "step": 24583 }, { "epoch": 1.1281721811757148, "grad_norm": 0.4901289939880371, "learning_rate": 7.081615746057069e-06, "loss": 0.3994, "step": 24584 }, { "epoch": 1.128218071680969, "grad_norm": 0.48187538981437683, "learning_rate": 7.081392814610819e-06, "loss": 0.3546, "step": 24585 }, { "epoch": 1.1282639621862236, "grad_norm": 0.43343669176101685, "learning_rate": 7.081169878159475e-06, "loss": 0.3408, "step": 24586 }, { "epoch": 1.128309852691478, "grad_norm": 0.47424763441085815, "learning_rate": 7.080946936703575e-06, "loss": 0.3516, "step": 24587 }, { "epoch": 1.1283557431967326, "grad_norm": 0.4683694541454315, "learning_rate": 7.080723990243654e-06, "loss": 0.2996, "step": 24588 }, { "epoch": 1.128401633701987, "grad_norm": 0.4467673897743225, "learning_rate": 7.080501038780249e-06, "loss": 0.2991, "step": 24589 }, { "epoch": 1.1284475242072416, "grad_norm": 0.4705336093902588, "learning_rate": 7.080278082313895e-06, "loss": 0.3772, "step": 24590 }, { "epoch": 1.128493414712496, "grad_norm": 0.4445994198322296, "learning_rate": 7.08005512084513e-06, "loss": 0.2934, "step": 24591 }, { "epoch": 1.1285393052177504, "grad_norm": 0.46373361349105835, "learning_rate": 7.079832154374488e-06, "loss": 0.335, "step": 24592 }, { "epoch": 1.1285851957230049, "grad_norm": 0.4306647479534149, "learning_rate": 7.079609182902505e-06, "loss": 0.3341, "step": 24593 }, { "epoch": 1.1286310862282594, "grad_norm": 0.49501678347587585, "learning_rate": 7.079386206429719e-06, "loss": 0.3928, "step": 24594 }, { "epoch": 1.1286769767335139, "grad_norm": 0.4977308213710785, "learning_rate": 7.079163224956666e-06, "loss": 0.4154, "step": 24595 }, { "epoch": 1.1287228672387684, "grad_norm": 0.46642938256263733, "learning_rate": 7.078940238483881e-06, "loss": 0.3263, "step": 24596 }, { "epoch": 1.1287687577440229, "grad_norm": 0.4599094092845917, "learning_rate": 7.078717247011903e-06, "loss": 0.3362, "step": 24597 }, { "epoch": 1.1288146482492771, "grad_norm": 0.4833031892776489, "learning_rate": 7.0784942505412636e-06, "loss": 0.3624, "step": 24598 }, { "epoch": 1.1288605387545316, "grad_norm": 0.5002352595329285, "learning_rate": 7.078271249072502e-06, "loss": 0.3685, "step": 24599 }, { "epoch": 1.1289064292597861, "grad_norm": 0.4528098702430725, "learning_rate": 7.078048242606154e-06, "loss": 0.3352, "step": 24600 }, { "epoch": 1.1289523197650406, "grad_norm": 0.49634674191474915, "learning_rate": 7.077825231142756e-06, "loss": 0.417, "step": 24601 }, { "epoch": 1.1289982102702951, "grad_norm": 0.4813739061355591, "learning_rate": 7.0776022146828445e-06, "loss": 0.3136, "step": 24602 }, { "epoch": 1.1290441007755496, "grad_norm": 0.46177199482917786, "learning_rate": 7.077379193226955e-06, "loss": 0.3851, "step": 24603 }, { "epoch": 1.1290899912808041, "grad_norm": 0.4620695412158966, "learning_rate": 7.077156166775624e-06, "loss": 0.3662, "step": 24604 }, { "epoch": 1.1291358817860584, "grad_norm": 0.46620461344718933, "learning_rate": 7.076933135329388e-06, "loss": 0.3399, "step": 24605 }, { "epoch": 1.129181772291313, "grad_norm": 0.47636425495147705, "learning_rate": 7.0767100988887836e-06, "loss": 0.3644, "step": 24606 }, { "epoch": 1.1292276627965674, "grad_norm": 0.493269145488739, "learning_rate": 7.076487057454347e-06, "loss": 0.4014, "step": 24607 }, { "epoch": 1.1292735533018219, "grad_norm": 0.47368744015693665, "learning_rate": 7.076264011026615e-06, "loss": 0.3567, "step": 24608 }, { "epoch": 1.1293194438070764, "grad_norm": 0.4601665735244751, "learning_rate": 7.07604095960612e-06, "loss": 0.3628, "step": 24609 }, { "epoch": 1.1293653343123307, "grad_norm": 0.4633275866508484, "learning_rate": 7.075817903193405e-06, "loss": 0.3639, "step": 24610 }, { "epoch": 1.1294112248175852, "grad_norm": 0.46169790625572205, "learning_rate": 7.075594841789003e-06, "loss": 0.3656, "step": 24611 }, { "epoch": 1.1294571153228397, "grad_norm": 0.5021532773971558, "learning_rate": 7.07537177539345e-06, "loss": 0.4629, "step": 24612 }, { "epoch": 1.1295030058280942, "grad_norm": 0.42406079173088074, "learning_rate": 7.075148704007282e-06, "loss": 0.2943, "step": 24613 }, { "epoch": 1.1295488963333487, "grad_norm": 0.46010372042655945, "learning_rate": 7.0749256276310376e-06, "loss": 0.3297, "step": 24614 }, { "epoch": 1.1295947868386031, "grad_norm": 0.5121824741363525, "learning_rate": 7.074702546265251e-06, "loss": 0.445, "step": 24615 }, { "epoch": 1.1296406773438576, "grad_norm": 0.49617666006088257, "learning_rate": 7.0744794599104595e-06, "loss": 0.3647, "step": 24616 }, { "epoch": 1.129686567849112, "grad_norm": 0.4901573956012726, "learning_rate": 7.074256368567201e-06, "loss": 0.3855, "step": 24617 }, { "epoch": 1.1297324583543664, "grad_norm": 0.4987618923187256, "learning_rate": 7.07403327223601e-06, "loss": 0.4014, "step": 24618 }, { "epoch": 1.129778348859621, "grad_norm": 0.46302151679992676, "learning_rate": 7.073810170917422e-06, "loss": 0.3687, "step": 24619 }, { "epoch": 1.1298242393648754, "grad_norm": 0.45192191004753113, "learning_rate": 7.073587064611976e-06, "loss": 0.3411, "step": 24620 }, { "epoch": 1.12987012987013, "grad_norm": 0.45732104778289795, "learning_rate": 7.073363953320207e-06, "loss": 0.2982, "step": 24621 }, { "epoch": 1.1299160203753844, "grad_norm": 0.47261545062065125, "learning_rate": 7.073140837042654e-06, "loss": 0.3621, "step": 24622 }, { "epoch": 1.1299619108806387, "grad_norm": 0.42634132504463196, "learning_rate": 7.07291771577985e-06, "loss": 0.3076, "step": 24623 }, { "epoch": 1.1300078013858932, "grad_norm": 0.4517354965209961, "learning_rate": 7.072694589532334e-06, "loss": 0.3536, "step": 24624 }, { "epoch": 1.1300536918911477, "grad_norm": 0.4304409623146057, "learning_rate": 7.072471458300642e-06, "loss": 0.3022, "step": 24625 }, { "epoch": 1.1300995823964022, "grad_norm": 0.43409183621406555, "learning_rate": 7.072248322085308e-06, "loss": 0.3677, "step": 24626 }, { "epoch": 1.1301454729016567, "grad_norm": 0.4629223942756653, "learning_rate": 7.072025180886872e-06, "loss": 0.3385, "step": 24627 }, { "epoch": 1.1301913634069112, "grad_norm": 0.47877368330955505, "learning_rate": 7.07180203470587e-06, "loss": 0.4395, "step": 24628 }, { "epoch": 1.1302372539121657, "grad_norm": 0.4613826274871826, "learning_rate": 7.071578883542838e-06, "loss": 0.3599, "step": 24629 }, { "epoch": 1.13028314441742, "grad_norm": 0.47733768820762634, "learning_rate": 7.071355727398311e-06, "loss": 0.3733, "step": 24630 }, { "epoch": 1.1303290349226744, "grad_norm": 0.48830389976501465, "learning_rate": 7.071132566272829e-06, "loss": 0.4233, "step": 24631 }, { "epoch": 1.130374925427929, "grad_norm": 0.4276810884475708, "learning_rate": 7.070909400166926e-06, "loss": 0.2697, "step": 24632 }, { "epoch": 1.1304208159331834, "grad_norm": 0.49892228841781616, "learning_rate": 7.07068622908114e-06, "loss": 0.4182, "step": 24633 }, { "epoch": 1.130466706438438, "grad_norm": 0.508402407169342, "learning_rate": 7.070463053016008e-06, "loss": 0.3765, "step": 24634 }, { "epoch": 1.1305125969436924, "grad_norm": 0.49524861574172974, "learning_rate": 7.070239871972064e-06, "loss": 0.4445, "step": 24635 }, { "epoch": 1.1305584874489467, "grad_norm": 0.45710158348083496, "learning_rate": 7.070016685949848e-06, "loss": 0.3204, "step": 24636 }, { "epoch": 1.1306043779542012, "grad_norm": 0.5117238163948059, "learning_rate": 7.069793494949894e-06, "loss": 0.3658, "step": 24637 }, { "epoch": 1.1306502684594557, "grad_norm": 0.4672723710536957, "learning_rate": 7.069570298972739e-06, "loss": 0.3298, "step": 24638 }, { "epoch": 1.1306961589647102, "grad_norm": 0.494704008102417, "learning_rate": 7.0693470980189225e-06, "loss": 0.4084, "step": 24639 }, { "epoch": 1.1307420494699647, "grad_norm": 0.464688241481781, "learning_rate": 7.06912389208898e-06, "loss": 0.3543, "step": 24640 }, { "epoch": 1.1307879399752192, "grad_norm": 0.4941105246543884, "learning_rate": 7.068900681183445e-06, "loss": 0.3867, "step": 24641 }, { "epoch": 1.1308338304804737, "grad_norm": 0.4670950472354889, "learning_rate": 7.068677465302858e-06, "loss": 0.3907, "step": 24642 }, { "epoch": 1.130879720985728, "grad_norm": 0.4825858473777771, "learning_rate": 7.068454244447754e-06, "loss": 0.3553, "step": 24643 }, { "epoch": 1.1309256114909825, "grad_norm": 0.44604653120040894, "learning_rate": 7.068231018618671e-06, "loss": 0.3232, "step": 24644 }, { "epoch": 1.130971501996237, "grad_norm": 0.4585740566253662, "learning_rate": 7.068007787816147e-06, "loss": 0.3514, "step": 24645 }, { "epoch": 1.1310173925014915, "grad_norm": 0.4931480586528778, "learning_rate": 7.067784552040715e-06, "loss": 0.3383, "step": 24646 }, { "epoch": 1.131063283006746, "grad_norm": 0.4740438759326935, "learning_rate": 7.067561311292913e-06, "loss": 0.3639, "step": 24647 }, { "epoch": 1.1311091735120002, "grad_norm": 0.44726133346557617, "learning_rate": 7.067338065573279e-06, "loss": 0.2799, "step": 24648 }, { "epoch": 1.1311550640172547, "grad_norm": 0.46154269576072693, "learning_rate": 7.06711481488235e-06, "loss": 0.3221, "step": 24649 }, { "epoch": 1.1312009545225092, "grad_norm": 0.5094354152679443, "learning_rate": 7.0668915592206635e-06, "loss": 0.4388, "step": 24650 }, { "epoch": 1.1312468450277637, "grad_norm": 0.46852555871009827, "learning_rate": 7.0666682985887536e-06, "loss": 0.3584, "step": 24651 }, { "epoch": 1.1312927355330182, "grad_norm": 0.46889567375183105, "learning_rate": 7.066445032987159e-06, "loss": 0.3898, "step": 24652 }, { "epoch": 1.1313386260382727, "grad_norm": 0.522243857383728, "learning_rate": 7.066221762416417e-06, "loss": 0.4256, "step": 24653 }, { "epoch": 1.1313845165435272, "grad_norm": 0.4601024389266968, "learning_rate": 7.0659984868770615e-06, "loss": 0.3743, "step": 24654 }, { "epoch": 1.1314304070487817, "grad_norm": 0.47413766384124756, "learning_rate": 7.065775206369633e-06, "loss": 0.3633, "step": 24655 }, { "epoch": 1.131476297554036, "grad_norm": 0.4264897406101227, "learning_rate": 7.065551920894669e-06, "loss": 0.2879, "step": 24656 }, { "epoch": 1.1315221880592905, "grad_norm": 0.46694231033325195, "learning_rate": 7.065328630452702e-06, "loss": 0.345, "step": 24657 }, { "epoch": 1.131568078564545, "grad_norm": 0.47167566418647766, "learning_rate": 7.065105335044271e-06, "loss": 0.3482, "step": 24658 }, { "epoch": 1.1316139690697995, "grad_norm": 0.4649851620197296, "learning_rate": 7.0648820346699156e-06, "loss": 0.3561, "step": 24659 }, { "epoch": 1.131659859575054, "grad_norm": 0.5117760896682739, "learning_rate": 7.064658729330169e-06, "loss": 0.4228, "step": 24660 }, { "epoch": 1.1317057500803083, "grad_norm": 0.46339282393455505, "learning_rate": 7.064435419025571e-06, "loss": 0.367, "step": 24661 }, { "epoch": 1.1317516405855628, "grad_norm": 0.48824307322502136, "learning_rate": 7.064212103756658e-06, "loss": 0.4471, "step": 24662 }, { "epoch": 1.1317975310908173, "grad_norm": 0.4682967960834503, "learning_rate": 7.063988783523965e-06, "loss": 0.3439, "step": 24663 }, { "epoch": 1.1318434215960718, "grad_norm": 0.45112791657447815, "learning_rate": 7.06376545832803e-06, "loss": 0.3571, "step": 24664 }, { "epoch": 1.1318893121013263, "grad_norm": 0.4737400412559509, "learning_rate": 7.063542128169392e-06, "loss": 0.3604, "step": 24665 }, { "epoch": 1.1319352026065808, "grad_norm": 0.46167582273483276, "learning_rate": 7.063318793048585e-06, "loss": 0.3604, "step": 24666 }, { "epoch": 1.1319810931118353, "grad_norm": 0.47838419675827026, "learning_rate": 7.063095452966149e-06, "loss": 0.3796, "step": 24667 }, { "epoch": 1.1320269836170895, "grad_norm": 0.44395148754119873, "learning_rate": 7.062872107922618e-06, "loss": 0.3372, "step": 24668 }, { "epoch": 1.132072874122344, "grad_norm": 0.4389018714427948, "learning_rate": 7.062648757918531e-06, "loss": 0.3084, "step": 24669 }, { "epoch": 1.1321187646275985, "grad_norm": 0.4572465121746063, "learning_rate": 7.062425402954425e-06, "loss": 0.3364, "step": 24670 }, { "epoch": 1.132164655132853, "grad_norm": 0.48669055104255676, "learning_rate": 7.062202043030837e-06, "loss": 0.3843, "step": 24671 }, { "epoch": 1.1322105456381075, "grad_norm": 0.46957796812057495, "learning_rate": 7.061978678148304e-06, "loss": 0.3954, "step": 24672 }, { "epoch": 1.132256436143362, "grad_norm": 0.47031453251838684, "learning_rate": 7.061755308307363e-06, "loss": 0.3608, "step": 24673 }, { "epoch": 1.1323023266486163, "grad_norm": 0.4731173813343048, "learning_rate": 7.061531933508551e-06, "loss": 0.3793, "step": 24674 }, { "epoch": 1.1323482171538708, "grad_norm": 0.5126436948776245, "learning_rate": 7.061308553752404e-06, "loss": 0.4407, "step": 24675 }, { "epoch": 1.1323941076591253, "grad_norm": 0.5006262063980103, "learning_rate": 7.061085169039463e-06, "loss": 0.4251, "step": 24676 }, { "epoch": 1.1324399981643798, "grad_norm": 0.47339576482772827, "learning_rate": 7.06086177937026e-06, "loss": 0.405, "step": 24677 }, { "epoch": 1.1324858886696343, "grad_norm": 0.46169745922088623, "learning_rate": 7.0606383847453374e-06, "loss": 0.332, "step": 24678 }, { "epoch": 1.1325317791748888, "grad_norm": 0.49350640177726746, "learning_rate": 7.06041498516523e-06, "loss": 0.3898, "step": 24679 }, { "epoch": 1.1325776696801433, "grad_norm": 0.4572957456111908, "learning_rate": 7.060191580630472e-06, "loss": 0.3527, "step": 24680 }, { "epoch": 1.1326235601853976, "grad_norm": 0.5221545100212097, "learning_rate": 7.0599681711416044e-06, "loss": 0.4074, "step": 24681 }, { "epoch": 1.132669450690652, "grad_norm": 0.5155816674232483, "learning_rate": 7.059744756699165e-06, "loss": 0.4505, "step": 24682 }, { "epoch": 1.1327153411959066, "grad_norm": 0.463096022605896, "learning_rate": 7.05952133730369e-06, "loss": 0.36, "step": 24683 }, { "epoch": 1.132761231701161, "grad_norm": 0.4870770573616028, "learning_rate": 7.059297912955715e-06, "loss": 0.3825, "step": 24684 }, { "epoch": 1.1328071222064156, "grad_norm": 0.42900702357292175, "learning_rate": 7.059074483655778e-06, "loss": 0.254, "step": 24685 }, { "epoch": 1.13285301271167, "grad_norm": 0.4253750443458557, "learning_rate": 7.058851049404416e-06, "loss": 0.262, "step": 24686 }, { "epoch": 1.1328989032169243, "grad_norm": 0.43896132707595825, "learning_rate": 7.058627610202169e-06, "loss": 0.3172, "step": 24687 }, { "epoch": 1.1329447937221788, "grad_norm": 0.5336152911186218, "learning_rate": 7.05840416604957e-06, "loss": 0.471, "step": 24688 }, { "epoch": 1.1329906842274333, "grad_norm": 0.479544460773468, "learning_rate": 7.05818071694716e-06, "loss": 0.366, "step": 24689 }, { "epoch": 1.1330365747326878, "grad_norm": 0.4719175398349762, "learning_rate": 7.057957262895476e-06, "loss": 0.3912, "step": 24690 }, { "epoch": 1.1330824652379423, "grad_norm": 0.4595673978328705, "learning_rate": 7.057733803895054e-06, "loss": 0.3402, "step": 24691 }, { "epoch": 1.1331283557431968, "grad_norm": 0.4903862476348877, "learning_rate": 7.05751033994643e-06, "loss": 0.396, "step": 24692 }, { "epoch": 1.1331742462484513, "grad_norm": 0.4468924403190613, "learning_rate": 7.057286871050144e-06, "loss": 0.3191, "step": 24693 }, { "epoch": 1.1332201367537056, "grad_norm": 0.4685289263725281, "learning_rate": 7.0570633972067335e-06, "loss": 0.3379, "step": 24694 }, { "epoch": 1.13326602725896, "grad_norm": 0.5066692233085632, "learning_rate": 7.056839918416733e-06, "loss": 0.4151, "step": 24695 }, { "epoch": 1.1333119177642146, "grad_norm": 0.48665502667427063, "learning_rate": 7.056616434680682e-06, "loss": 0.3849, "step": 24696 }, { "epoch": 1.133357808269469, "grad_norm": 0.4585694968700409, "learning_rate": 7.056392945999118e-06, "loss": 0.3672, "step": 24697 }, { "epoch": 1.1334036987747236, "grad_norm": 0.4581230878829956, "learning_rate": 7.056169452372577e-06, "loss": 0.366, "step": 24698 }, { "epoch": 1.1334495892799779, "grad_norm": 0.4281544089317322, "learning_rate": 7.055945953801599e-06, "loss": 0.3177, "step": 24699 }, { "epoch": 1.1334954797852324, "grad_norm": 0.48600640892982483, "learning_rate": 7.05572245028672e-06, "loss": 0.371, "step": 24700 }, { "epoch": 1.1335413702904868, "grad_norm": 0.4628463685512543, "learning_rate": 7.055498941828476e-06, "loss": 0.3471, "step": 24701 }, { "epoch": 1.1335872607957413, "grad_norm": 0.45399749279022217, "learning_rate": 7.055275428427406e-06, "loss": 0.323, "step": 24702 }, { "epoch": 1.1336331513009958, "grad_norm": 0.48110607266426086, "learning_rate": 7.055051910084047e-06, "loss": 0.4187, "step": 24703 }, { "epoch": 1.1336790418062503, "grad_norm": 0.4542180001735687, "learning_rate": 7.054828386798938e-06, "loss": 0.3465, "step": 24704 }, { "epoch": 1.1337249323115048, "grad_norm": 0.47487661242485046, "learning_rate": 7.0546048585726136e-06, "loss": 0.3824, "step": 24705 }, { "epoch": 1.1337708228167591, "grad_norm": 0.4624318480491638, "learning_rate": 7.054381325405614e-06, "loss": 0.3635, "step": 24706 }, { "epoch": 1.1338167133220136, "grad_norm": 0.44230806827545166, "learning_rate": 7.054157787298475e-06, "loss": 0.3511, "step": 24707 }, { "epoch": 1.1338626038272681, "grad_norm": 0.43595731258392334, "learning_rate": 7.0539342442517344e-06, "loss": 0.3402, "step": 24708 }, { "epoch": 1.1339084943325226, "grad_norm": 0.44333600997924805, "learning_rate": 7.053710696265932e-06, "loss": 0.313, "step": 24709 }, { "epoch": 1.133954384837777, "grad_norm": 0.5078650116920471, "learning_rate": 7.0534871433416016e-06, "loss": 0.3975, "step": 24710 }, { "epoch": 1.1340002753430316, "grad_norm": 0.4935639202594757, "learning_rate": 7.053263585479285e-06, "loss": 0.3834, "step": 24711 }, { "epoch": 1.1340461658482859, "grad_norm": 0.4503193795681, "learning_rate": 7.053040022679516e-06, "loss": 0.3036, "step": 24712 }, { "epoch": 1.1340920563535404, "grad_norm": 0.44749170541763306, "learning_rate": 7.052816454942834e-06, "loss": 0.3425, "step": 24713 }, { "epoch": 1.1341379468587949, "grad_norm": 0.47759994864463806, "learning_rate": 7.052592882269775e-06, "loss": 0.35, "step": 24714 }, { "epoch": 1.1341838373640494, "grad_norm": 0.4530743956565857, "learning_rate": 7.05236930466088e-06, "loss": 0.3216, "step": 24715 }, { "epoch": 1.1342297278693039, "grad_norm": 0.45324352383613586, "learning_rate": 7.052145722116684e-06, "loss": 0.3327, "step": 24716 }, { "epoch": 1.1342756183745584, "grad_norm": 0.4964286983013153, "learning_rate": 7.051922134637725e-06, "loss": 0.404, "step": 24717 }, { "epoch": 1.1343215088798129, "grad_norm": 0.4560413658618927, "learning_rate": 7.0516985422245406e-06, "loss": 0.3425, "step": 24718 }, { "epoch": 1.1343673993850671, "grad_norm": 0.4483110308647156, "learning_rate": 7.05147494487767e-06, "loss": 0.361, "step": 24719 }, { "epoch": 1.1344132898903216, "grad_norm": 0.4693838357925415, "learning_rate": 7.051251342597647e-06, "loss": 0.3028, "step": 24720 }, { "epoch": 1.1344591803955761, "grad_norm": 0.47053030133247375, "learning_rate": 7.051027735385014e-06, "loss": 0.348, "step": 24721 }, { "epoch": 1.1345050709008306, "grad_norm": 0.4874568283557892, "learning_rate": 7.050804123240307e-06, "loss": 0.3846, "step": 24722 }, { "epoch": 1.1345509614060851, "grad_norm": 0.535033106803894, "learning_rate": 7.050580506164062e-06, "loss": 0.4384, "step": 24723 }, { "epoch": 1.1345968519113396, "grad_norm": 0.4514215588569641, "learning_rate": 7.050356884156819e-06, "loss": 0.3333, "step": 24724 }, { "epoch": 1.134642742416594, "grad_norm": 0.4870937764644623, "learning_rate": 7.050133257219114e-06, "loss": 0.3837, "step": 24725 }, { "epoch": 1.1346886329218484, "grad_norm": 0.47671422362327576, "learning_rate": 7.049909625351486e-06, "loss": 0.3535, "step": 24726 }, { "epoch": 1.134734523427103, "grad_norm": 0.4436316192150116, "learning_rate": 7.049685988554473e-06, "loss": 0.3342, "step": 24727 }, { "epoch": 1.1347804139323574, "grad_norm": 0.4829087555408478, "learning_rate": 7.04946234682861e-06, "loss": 0.4447, "step": 24728 }, { "epoch": 1.134826304437612, "grad_norm": 0.43359872698783875, "learning_rate": 7.049238700174439e-06, "loss": 0.3305, "step": 24729 }, { "epoch": 1.1348721949428664, "grad_norm": 0.45679810643196106, "learning_rate": 7.049015048592495e-06, "loss": 0.363, "step": 24730 }, { "epoch": 1.134918085448121, "grad_norm": 0.43866032361984253, "learning_rate": 7.048791392083316e-06, "loss": 0.2955, "step": 24731 }, { "epoch": 1.1349639759533752, "grad_norm": 0.4772982597351074, "learning_rate": 7.048567730647441e-06, "loss": 0.348, "step": 24732 }, { "epoch": 1.1350098664586297, "grad_norm": 0.45870426297187805, "learning_rate": 7.0483440642854074e-06, "loss": 0.3746, "step": 24733 }, { "epoch": 1.1350557569638842, "grad_norm": 0.4722480773925781, "learning_rate": 7.048120392997751e-06, "loss": 0.3447, "step": 24734 }, { "epoch": 1.1351016474691387, "grad_norm": 0.4523755609989166, "learning_rate": 7.047896716785014e-06, "loss": 0.3116, "step": 24735 }, { "epoch": 1.1351475379743932, "grad_norm": 0.46340692043304443, "learning_rate": 7.0476730356477295e-06, "loss": 0.3528, "step": 24736 }, { "epoch": 1.1351934284796474, "grad_norm": 0.4610845446586609, "learning_rate": 7.04744934958644e-06, "loss": 0.3462, "step": 24737 }, { "epoch": 1.135239318984902, "grad_norm": 0.4838887155056, "learning_rate": 7.0472256586016794e-06, "loss": 0.4202, "step": 24738 }, { "epoch": 1.1352852094901564, "grad_norm": 0.4883697032928467, "learning_rate": 7.0470019626939865e-06, "loss": 0.3929, "step": 24739 }, { "epoch": 1.135331099995411, "grad_norm": 0.4319809377193451, "learning_rate": 7.046778261863901e-06, "loss": 0.284, "step": 24740 }, { "epoch": 1.1353769905006654, "grad_norm": 0.4681074321269989, "learning_rate": 7.0465545561119595e-06, "loss": 0.3536, "step": 24741 }, { "epoch": 1.13542288100592, "grad_norm": 0.4900055527687073, "learning_rate": 7.0463308454387e-06, "loss": 0.3611, "step": 24742 }, { "epoch": 1.1354687715111744, "grad_norm": 0.4769522249698639, "learning_rate": 7.046107129844661e-06, "loss": 0.36, "step": 24743 }, { "epoch": 1.135514662016429, "grad_norm": 0.4656541347503662, "learning_rate": 7.045883409330381e-06, "loss": 0.3462, "step": 24744 }, { "epoch": 1.1355605525216832, "grad_norm": 0.45617586374282837, "learning_rate": 7.045659683896395e-06, "loss": 0.3236, "step": 24745 }, { "epoch": 1.1356064430269377, "grad_norm": 0.4944656193256378, "learning_rate": 7.045435953543245e-06, "loss": 0.3934, "step": 24746 }, { "epoch": 1.1356523335321922, "grad_norm": 0.48666101694107056, "learning_rate": 7.045212218271466e-06, "loss": 0.3925, "step": 24747 }, { "epoch": 1.1356982240374467, "grad_norm": 0.47320905327796936, "learning_rate": 7.044988478081598e-06, "loss": 0.3339, "step": 24748 }, { "epoch": 1.1357441145427012, "grad_norm": 0.48678824305534363, "learning_rate": 7.044764732974179e-06, "loss": 0.3719, "step": 24749 }, { "epoch": 1.1357900050479555, "grad_norm": 0.454212486743927, "learning_rate": 7.044540982949743e-06, "loss": 0.3236, "step": 24750 }, { "epoch": 1.13583589555321, "grad_norm": 0.46318498253822327, "learning_rate": 7.044317228008833e-06, "loss": 0.3389, "step": 24751 }, { "epoch": 1.1358817860584645, "grad_norm": 0.4354458749294281, "learning_rate": 7.044093468151985e-06, "loss": 0.2534, "step": 24752 }, { "epoch": 1.135927676563719, "grad_norm": 0.46282947063446045, "learning_rate": 7.043869703379738e-06, "loss": 0.3546, "step": 24753 }, { "epoch": 1.1359735670689735, "grad_norm": 0.48586320877075195, "learning_rate": 7.0436459336926284e-06, "loss": 0.3845, "step": 24754 }, { "epoch": 1.136019457574228, "grad_norm": 0.4965175986289978, "learning_rate": 7.0434221590911964e-06, "loss": 0.4018, "step": 24755 }, { "epoch": 1.1360653480794825, "grad_norm": 0.4964290261268616, "learning_rate": 7.043198379575978e-06, "loss": 0.4209, "step": 24756 }, { "epoch": 1.1361112385847367, "grad_norm": 0.4509190022945404, "learning_rate": 7.042974595147513e-06, "loss": 0.3208, "step": 24757 }, { "epoch": 1.1361571290899912, "grad_norm": 0.4863429367542267, "learning_rate": 7.042750805806339e-06, "loss": 0.3863, "step": 24758 }, { "epoch": 1.1362030195952457, "grad_norm": 0.45994833111763, "learning_rate": 7.0425270115529945e-06, "loss": 0.3342, "step": 24759 }, { "epoch": 1.1362489101005002, "grad_norm": 0.45173588395118713, "learning_rate": 7.042303212388015e-06, "loss": 0.2975, "step": 24760 }, { "epoch": 1.1362948006057547, "grad_norm": 0.48274773359298706, "learning_rate": 7.042079408311943e-06, "loss": 0.3846, "step": 24761 }, { "epoch": 1.1363406911110092, "grad_norm": 0.4808579385280609, "learning_rate": 7.041855599325314e-06, "loss": 0.4071, "step": 24762 }, { "epoch": 1.1363865816162635, "grad_norm": 0.4286321997642517, "learning_rate": 7.0416317854286665e-06, "loss": 0.2862, "step": 24763 }, { "epoch": 1.136432472121518, "grad_norm": 0.5289644598960876, "learning_rate": 7.041407966622539e-06, "loss": 0.4298, "step": 24764 }, { "epoch": 1.1364783626267725, "grad_norm": 0.45408350229263306, "learning_rate": 7.04118414290747e-06, "loss": 0.3187, "step": 24765 }, { "epoch": 1.136524253132027, "grad_norm": 0.5313528180122375, "learning_rate": 7.040960314283998e-06, "loss": 0.4638, "step": 24766 }, { "epoch": 1.1365701436372815, "grad_norm": 0.4300351142883301, "learning_rate": 7.040736480752659e-06, "loss": 0.2891, "step": 24767 }, { "epoch": 1.136616034142536, "grad_norm": 0.4096066653728485, "learning_rate": 7.040512642313992e-06, "loss": 0.264, "step": 24768 }, { "epoch": 1.1366619246477905, "grad_norm": 0.4362323582172394, "learning_rate": 7.040288798968538e-06, "loss": 0.2998, "step": 24769 }, { "epoch": 1.1367078151530448, "grad_norm": 0.4990677833557129, "learning_rate": 7.040064950716834e-06, "loss": 0.3832, "step": 24770 }, { "epoch": 1.1367537056582993, "grad_norm": 0.4813346862792969, "learning_rate": 7.0398410975594155e-06, "loss": 0.3519, "step": 24771 }, { "epoch": 1.1367995961635537, "grad_norm": 0.4750055968761444, "learning_rate": 7.0396172394968245e-06, "loss": 0.354, "step": 24772 }, { "epoch": 1.1368454866688082, "grad_norm": 0.4687548875808716, "learning_rate": 7.039393376529596e-06, "loss": 0.3448, "step": 24773 }, { "epoch": 1.1368913771740627, "grad_norm": 0.470760315656662, "learning_rate": 7.039169508658271e-06, "loss": 0.3718, "step": 24774 }, { "epoch": 1.1369372676793172, "grad_norm": 0.5034664273262024, "learning_rate": 7.038945635883388e-06, "loss": 0.4164, "step": 24775 }, { "epoch": 1.1369831581845715, "grad_norm": 0.469431072473526, "learning_rate": 7.038721758205485e-06, "loss": 0.3913, "step": 24776 }, { "epoch": 1.137029048689826, "grad_norm": 0.4928151071071625, "learning_rate": 7.038497875625096e-06, "loss": 0.4353, "step": 24777 }, { "epoch": 1.1370749391950805, "grad_norm": 0.4459112584590912, "learning_rate": 7.038273988142767e-06, "loss": 0.2846, "step": 24778 }, { "epoch": 1.137120829700335, "grad_norm": 0.4745567739009857, "learning_rate": 7.038050095759029e-06, "loss": 0.3683, "step": 24779 }, { "epoch": 1.1371667202055895, "grad_norm": 0.4736587107181549, "learning_rate": 7.037826198474426e-06, "loss": 0.3141, "step": 24780 }, { "epoch": 1.137212610710844, "grad_norm": 0.5149346590042114, "learning_rate": 7.037602296289493e-06, "loss": 0.498, "step": 24781 }, { "epoch": 1.1372585012160985, "grad_norm": 0.495312362909317, "learning_rate": 7.037378389204769e-06, "loss": 0.3728, "step": 24782 }, { "epoch": 1.1373043917213528, "grad_norm": 0.4811500310897827, "learning_rate": 7.037154477220795e-06, "loss": 0.373, "step": 24783 }, { "epoch": 1.1373502822266073, "grad_norm": 0.4920310378074646, "learning_rate": 7.036930560338105e-06, "loss": 0.4206, "step": 24784 }, { "epoch": 1.1373961727318618, "grad_norm": 0.4230092167854309, "learning_rate": 7.0367066385572414e-06, "loss": 0.3127, "step": 24785 }, { "epoch": 1.1374420632371163, "grad_norm": 0.44914710521698, "learning_rate": 7.036482711878742e-06, "loss": 0.3539, "step": 24786 }, { "epoch": 1.1374879537423708, "grad_norm": 0.468374103307724, "learning_rate": 7.036258780303144e-06, "loss": 0.3465, "step": 24787 }, { "epoch": 1.137533844247625, "grad_norm": 0.5037892460823059, "learning_rate": 7.0360348438309846e-06, "loss": 0.4077, "step": 24788 }, { "epoch": 1.1375797347528795, "grad_norm": 0.49517032504081726, "learning_rate": 7.035810902462805e-06, "loss": 0.4562, "step": 24789 }, { "epoch": 1.137625625258134, "grad_norm": 0.4721580743789673, "learning_rate": 7.035586956199142e-06, "loss": 0.4031, "step": 24790 }, { "epoch": 1.1376715157633885, "grad_norm": 0.4820847511291504, "learning_rate": 7.035363005040536e-06, "loss": 0.3499, "step": 24791 }, { "epoch": 1.137717406268643, "grad_norm": 0.4424426257610321, "learning_rate": 7.035139048987525e-06, "loss": 0.3095, "step": 24792 }, { "epoch": 1.1377632967738975, "grad_norm": 0.47277045249938965, "learning_rate": 7.034915088040645e-06, "loss": 0.3207, "step": 24793 }, { "epoch": 1.137809187279152, "grad_norm": 0.4357203245162964, "learning_rate": 7.034691122200436e-06, "loss": 0.3053, "step": 24794 }, { "epoch": 1.1378550777844063, "grad_norm": 0.47035279870033264, "learning_rate": 7.034467151467439e-06, "loss": 0.3321, "step": 24795 }, { "epoch": 1.1379009682896608, "grad_norm": 0.4522012770175934, "learning_rate": 7.0342431758421896e-06, "loss": 0.3442, "step": 24796 }, { "epoch": 1.1379468587949153, "grad_norm": 0.46848559379577637, "learning_rate": 7.034019195325228e-06, "loss": 0.3866, "step": 24797 }, { "epoch": 1.1379927493001698, "grad_norm": 0.4865160882472992, "learning_rate": 7.033795209917093e-06, "loss": 0.3506, "step": 24798 }, { "epoch": 1.1380386398054243, "grad_norm": 0.5198656916618347, "learning_rate": 7.03357121961832e-06, "loss": 0.3868, "step": 24799 }, { "epoch": 1.1380845303106788, "grad_norm": 0.44505733251571655, "learning_rate": 7.033347224429452e-06, "loss": 0.3138, "step": 24800 }, { "epoch": 1.138130420815933, "grad_norm": 0.5344245433807373, "learning_rate": 7.033123224351024e-06, "loss": 0.4998, "step": 24801 }, { "epoch": 1.1381763113211876, "grad_norm": 0.48603877425193787, "learning_rate": 7.032899219383578e-06, "loss": 0.4286, "step": 24802 }, { "epoch": 1.138222201826442, "grad_norm": 0.4751037061214447, "learning_rate": 7.0326752095276505e-06, "loss": 0.3749, "step": 24803 }, { "epoch": 1.1382680923316966, "grad_norm": 0.46218571066856384, "learning_rate": 7.0324511947837805e-06, "loss": 0.3536, "step": 24804 }, { "epoch": 1.138313982836951, "grad_norm": 0.4770491421222687, "learning_rate": 7.032227175152506e-06, "loss": 0.4235, "step": 24805 }, { "epoch": 1.1383598733422056, "grad_norm": 0.47392261028289795, "learning_rate": 7.032003150634368e-06, "loss": 0.4171, "step": 24806 }, { "epoch": 1.13840576384746, "grad_norm": 0.5051649808883667, "learning_rate": 7.031779121229901e-06, "loss": 0.4046, "step": 24807 }, { "epoch": 1.1384516543527143, "grad_norm": 0.4590856432914734, "learning_rate": 7.03155508693965e-06, "loss": 0.34, "step": 24808 }, { "epoch": 1.1384975448579688, "grad_norm": 0.47395479679107666, "learning_rate": 7.031331047764149e-06, "loss": 0.3798, "step": 24809 }, { "epoch": 1.1385434353632233, "grad_norm": 0.45980486273765564, "learning_rate": 7.0311070037039365e-06, "loss": 0.3073, "step": 24810 }, { "epoch": 1.1385893258684778, "grad_norm": 0.4823946952819824, "learning_rate": 7.030882954759554e-06, "loss": 0.3593, "step": 24811 }, { "epoch": 1.1386352163737323, "grad_norm": 0.47585824131965637, "learning_rate": 7.030658900931539e-06, "loss": 0.3643, "step": 24812 }, { "epoch": 1.1386811068789868, "grad_norm": 0.45333290100097656, "learning_rate": 7.030434842220431e-06, "loss": 0.3292, "step": 24813 }, { "epoch": 1.138726997384241, "grad_norm": 0.4830933213233948, "learning_rate": 7.030210778626767e-06, "loss": 0.3837, "step": 24814 }, { "epoch": 1.1387728878894956, "grad_norm": 0.4814111888408661, "learning_rate": 7.029986710151085e-06, "loss": 0.364, "step": 24815 }, { "epoch": 1.13881877839475, "grad_norm": 0.542103111743927, "learning_rate": 7.029762636793928e-06, "loss": 0.3024, "step": 24816 }, { "epoch": 1.1388646689000046, "grad_norm": 0.4428088665008545, "learning_rate": 7.029538558555832e-06, "loss": 0.3153, "step": 24817 }, { "epoch": 1.138910559405259, "grad_norm": 0.49106183648109436, "learning_rate": 7.029314475437336e-06, "loss": 0.3727, "step": 24818 }, { "epoch": 1.1389564499105136, "grad_norm": 0.48322033882141113, "learning_rate": 7.029090387438979e-06, "loss": 0.409, "step": 24819 }, { "epoch": 1.139002340415768, "grad_norm": 0.5014519691467285, "learning_rate": 7.028866294561302e-06, "loss": 0.3839, "step": 24820 }, { "epoch": 1.1390482309210224, "grad_norm": 0.46592456102371216, "learning_rate": 7.028642196804839e-06, "loss": 0.3538, "step": 24821 }, { "epoch": 1.1390941214262769, "grad_norm": 0.4376320540904999, "learning_rate": 7.028418094170133e-06, "loss": 0.3161, "step": 24822 }, { "epoch": 1.1391400119315314, "grad_norm": 0.505426824092865, "learning_rate": 7.028193986657722e-06, "loss": 0.4151, "step": 24823 }, { "epoch": 1.1391859024367859, "grad_norm": 0.4779411554336548, "learning_rate": 7.0279698742681445e-06, "loss": 0.4402, "step": 24824 }, { "epoch": 1.1392317929420404, "grad_norm": 0.4424952566623688, "learning_rate": 7.027745757001939e-06, "loss": 0.3333, "step": 24825 }, { "epoch": 1.1392776834472946, "grad_norm": 0.4343445301055908, "learning_rate": 7.0275216348596465e-06, "loss": 0.3028, "step": 24826 }, { "epoch": 1.1393235739525491, "grad_norm": 0.45684632658958435, "learning_rate": 7.027297507841802e-06, "loss": 0.3089, "step": 24827 }, { "epoch": 1.1393694644578036, "grad_norm": 0.45610225200653076, "learning_rate": 7.027073375948948e-06, "loss": 0.3414, "step": 24828 }, { "epoch": 1.1394153549630581, "grad_norm": 0.47095775604248047, "learning_rate": 7.026849239181622e-06, "loss": 0.3725, "step": 24829 }, { "epoch": 1.1394612454683126, "grad_norm": 0.4666273891925812, "learning_rate": 7.0266250975403635e-06, "loss": 0.3772, "step": 24830 }, { "epoch": 1.1395071359735671, "grad_norm": 0.4947664141654968, "learning_rate": 7.0264009510257125e-06, "loss": 0.371, "step": 24831 }, { "epoch": 1.1395530264788216, "grad_norm": 0.48034748435020447, "learning_rate": 7.0261767996382046e-06, "loss": 0.3733, "step": 24832 }, { "epoch": 1.1395989169840761, "grad_norm": 0.4821909964084625, "learning_rate": 7.025952643378382e-06, "loss": 0.3318, "step": 24833 }, { "epoch": 1.1396448074893304, "grad_norm": 0.4427422881126404, "learning_rate": 7.025728482246783e-06, "loss": 0.3586, "step": 24834 }, { "epoch": 1.139690697994585, "grad_norm": 0.4751167893409729, "learning_rate": 7.025504316243946e-06, "loss": 0.3847, "step": 24835 }, { "epoch": 1.1397365884998394, "grad_norm": 0.4683513939380646, "learning_rate": 7.02528014537041e-06, "loss": 0.3577, "step": 24836 }, { "epoch": 1.1397824790050939, "grad_norm": 0.45081812143325806, "learning_rate": 7.025055969626715e-06, "loss": 0.3861, "step": 24837 }, { "epoch": 1.1398283695103484, "grad_norm": 0.4853639304637909, "learning_rate": 7.0248317890133984e-06, "loss": 0.4402, "step": 24838 }, { "epoch": 1.1398742600156027, "grad_norm": 0.4979836344718933, "learning_rate": 7.024607603531002e-06, "loss": 0.4082, "step": 24839 }, { "epoch": 1.1399201505208572, "grad_norm": 0.5283799767494202, "learning_rate": 7.024383413180062e-06, "loss": 0.4083, "step": 24840 }, { "epoch": 1.1399660410261117, "grad_norm": 0.4710626006126404, "learning_rate": 7.024159217961121e-06, "loss": 0.4031, "step": 24841 }, { "epoch": 1.1400119315313662, "grad_norm": 0.47772860527038574, "learning_rate": 7.023935017874713e-06, "loss": 0.3334, "step": 24842 }, { "epoch": 1.1400578220366206, "grad_norm": 0.4598221480846405, "learning_rate": 7.023710812921383e-06, "loss": 0.3629, "step": 24843 }, { "epoch": 1.1401037125418751, "grad_norm": 0.45816609263420105, "learning_rate": 7.023486603101664e-06, "loss": 0.3277, "step": 24844 }, { "epoch": 1.1401496030471296, "grad_norm": 0.55083167552948, "learning_rate": 7.023262388416099e-06, "loss": 0.4322, "step": 24845 }, { "epoch": 1.140195493552384, "grad_norm": 0.446175754070282, "learning_rate": 7.02303816886523e-06, "loss": 0.355, "step": 24846 }, { "epoch": 1.1402413840576384, "grad_norm": 0.46936631202697754, "learning_rate": 7.022813944449589e-06, "loss": 0.3307, "step": 24847 }, { "epoch": 1.140287274562893, "grad_norm": 0.47443658113479614, "learning_rate": 7.022589715169721e-06, "loss": 0.2998, "step": 24848 }, { "epoch": 1.1403331650681474, "grad_norm": 0.46371009945869446, "learning_rate": 7.022365481026161e-06, "loss": 0.347, "step": 24849 }, { "epoch": 1.140379055573402, "grad_norm": 0.46668490767478943, "learning_rate": 7.022141242019452e-06, "loss": 0.3118, "step": 24850 }, { "epoch": 1.1404249460786564, "grad_norm": 0.48154309391975403, "learning_rate": 7.02191699815013e-06, "loss": 0.3807, "step": 24851 }, { "epoch": 1.1404708365839107, "grad_norm": 0.489553302526474, "learning_rate": 7.021692749418738e-06, "loss": 0.3605, "step": 24852 }, { "epoch": 1.1405167270891652, "grad_norm": 0.5164687633514404, "learning_rate": 7.021468495825811e-06, "loss": 0.425, "step": 24853 }, { "epoch": 1.1405626175944197, "grad_norm": 0.470962792634964, "learning_rate": 7.021244237371892e-06, "loss": 0.3923, "step": 24854 }, { "epoch": 1.1406085080996742, "grad_norm": 0.4653065800666809, "learning_rate": 7.021019974057516e-06, "loss": 0.3119, "step": 24855 }, { "epoch": 1.1406543986049287, "grad_norm": 0.480281800031662, "learning_rate": 7.0207957058832256e-06, "loss": 0.3661, "step": 24856 }, { "epoch": 1.1407002891101832, "grad_norm": 0.44190484285354614, "learning_rate": 7.020571432849561e-06, "loss": 0.3474, "step": 24857 }, { "epoch": 1.1407461796154377, "grad_norm": 0.48670390248298645, "learning_rate": 7.020347154957059e-06, "loss": 0.3492, "step": 24858 }, { "epoch": 1.140792070120692, "grad_norm": 0.4663967788219452, "learning_rate": 7.020122872206258e-06, "loss": 0.3249, "step": 24859 }, { "epoch": 1.1408379606259464, "grad_norm": 0.4506220817565918, "learning_rate": 7.019898584597702e-06, "loss": 0.3557, "step": 24860 }, { "epoch": 1.140883851131201, "grad_norm": 0.44643524289131165, "learning_rate": 7.0196742921319265e-06, "loss": 0.3222, "step": 24861 }, { "epoch": 1.1409297416364554, "grad_norm": 0.4787890613079071, "learning_rate": 7.01944999480947e-06, "loss": 0.3974, "step": 24862 }, { "epoch": 1.14097563214171, "grad_norm": 0.4575692117214203, "learning_rate": 7.019225692630877e-06, "loss": 0.3303, "step": 24863 }, { "epoch": 1.1410215226469644, "grad_norm": 0.4653702676296234, "learning_rate": 7.01900138559668e-06, "loss": 0.3572, "step": 24864 }, { "epoch": 1.1410674131522187, "grad_norm": 0.47481268644332886, "learning_rate": 7.018777073707423e-06, "loss": 0.3566, "step": 24865 }, { "epoch": 1.1411133036574732, "grad_norm": 0.48292267322540283, "learning_rate": 7.018552756963645e-06, "loss": 0.3628, "step": 24866 }, { "epoch": 1.1411591941627277, "grad_norm": 0.4298136830329895, "learning_rate": 7.018328435365883e-06, "loss": 0.3155, "step": 24867 }, { "epoch": 1.1412050846679822, "grad_norm": 0.4496147930622101, "learning_rate": 7.018104108914682e-06, "loss": 0.3108, "step": 24868 }, { "epoch": 1.1412509751732367, "grad_norm": 0.5021294355392456, "learning_rate": 7.0178797776105735e-06, "loss": 0.3899, "step": 24869 }, { "epoch": 1.1412968656784912, "grad_norm": 0.46272557973861694, "learning_rate": 7.017655441454102e-06, "loss": 0.3379, "step": 24870 }, { "epoch": 1.1413427561837457, "grad_norm": 0.418933242559433, "learning_rate": 7.0174311004458075e-06, "loss": 0.271, "step": 24871 }, { "epoch": 1.141388646689, "grad_norm": 0.49746739864349365, "learning_rate": 7.017206754586226e-06, "loss": 0.3803, "step": 24872 }, { "epoch": 1.1414345371942545, "grad_norm": 0.4602879285812378, "learning_rate": 7.0169824038759e-06, "loss": 0.3506, "step": 24873 }, { "epoch": 1.141480427699509, "grad_norm": 0.449402779340744, "learning_rate": 7.016758048315369e-06, "loss": 0.3208, "step": 24874 }, { "epoch": 1.1415263182047635, "grad_norm": 0.461357444524765, "learning_rate": 7.016533687905169e-06, "loss": 0.3365, "step": 24875 }, { "epoch": 1.141572208710018, "grad_norm": 0.4784964919090271, "learning_rate": 7.016309322645842e-06, "loss": 0.4173, "step": 24876 }, { "epoch": 1.1416180992152722, "grad_norm": 0.5157486200332642, "learning_rate": 7.016084952537928e-06, "loss": 0.4099, "step": 24877 }, { "epoch": 1.1416639897205267, "grad_norm": 0.49279165267944336, "learning_rate": 7.015860577581966e-06, "loss": 0.3913, "step": 24878 }, { "epoch": 1.1417098802257812, "grad_norm": 0.45893847942352295, "learning_rate": 7.015636197778497e-06, "loss": 0.375, "step": 24879 }, { "epoch": 1.1417557707310357, "grad_norm": 0.4883574843406677, "learning_rate": 7.015411813128058e-06, "loss": 0.3809, "step": 24880 }, { "epoch": 1.1418016612362902, "grad_norm": 0.5022452473640442, "learning_rate": 7.015187423631189e-06, "loss": 0.3878, "step": 24881 }, { "epoch": 1.1418475517415447, "grad_norm": 0.4454035758972168, "learning_rate": 7.014963029288432e-06, "loss": 0.3534, "step": 24882 }, { "epoch": 1.1418934422467992, "grad_norm": 0.43624651432037354, "learning_rate": 7.014738630100323e-06, "loss": 0.2991, "step": 24883 }, { "epoch": 1.1419393327520535, "grad_norm": 0.4754253923892975, "learning_rate": 7.014514226067405e-06, "loss": 0.3754, "step": 24884 }, { "epoch": 1.141985223257308, "grad_norm": 0.47495603561401367, "learning_rate": 7.014289817190215e-06, "loss": 0.3664, "step": 24885 }, { "epoch": 1.1420311137625625, "grad_norm": 0.5194360017776489, "learning_rate": 7.014065403469294e-06, "loss": 0.4036, "step": 24886 }, { "epoch": 1.142077004267817, "grad_norm": 0.4147861897945404, "learning_rate": 7.01384098490518e-06, "loss": 0.2863, "step": 24887 }, { "epoch": 1.1421228947730715, "grad_norm": 0.5046252608299255, "learning_rate": 7.0136165614984165e-06, "loss": 0.4223, "step": 24888 }, { "epoch": 1.142168785278326, "grad_norm": 0.5072726607322693, "learning_rate": 7.013392133249539e-06, "loss": 0.3913, "step": 24889 }, { "epoch": 1.1422146757835803, "grad_norm": 0.4464978873729706, "learning_rate": 7.013167700159089e-06, "loss": 0.3661, "step": 24890 }, { "epoch": 1.1422605662888348, "grad_norm": 0.469239205121994, "learning_rate": 7.012943262227607e-06, "loss": 0.2996, "step": 24891 }, { "epoch": 1.1423064567940893, "grad_norm": 0.4954543113708496, "learning_rate": 7.012718819455632e-06, "loss": 0.3338, "step": 24892 }, { "epoch": 1.1423523472993438, "grad_norm": 0.492285817861557, "learning_rate": 7.012494371843701e-06, "loss": 0.373, "step": 24893 }, { "epoch": 1.1423982378045983, "grad_norm": 0.47986897826194763, "learning_rate": 7.01226991939236e-06, "loss": 0.4025, "step": 24894 }, { "epoch": 1.1424441283098528, "grad_norm": 0.47332343459129333, "learning_rate": 7.012045462102143e-06, "loss": 0.3426, "step": 24895 }, { "epoch": 1.1424900188151073, "grad_norm": 0.5081636905670166, "learning_rate": 7.011820999973591e-06, "loss": 0.4973, "step": 24896 }, { "epoch": 1.1425359093203615, "grad_norm": 0.46340858936309814, "learning_rate": 7.0115965330072455e-06, "loss": 0.3381, "step": 24897 }, { "epoch": 1.142581799825616, "grad_norm": 0.4857732951641083, "learning_rate": 7.011372061203644e-06, "loss": 0.401, "step": 24898 }, { "epoch": 1.1426276903308705, "grad_norm": 0.47230997681617737, "learning_rate": 7.011147584563328e-06, "loss": 0.3749, "step": 24899 }, { "epoch": 1.142673580836125, "grad_norm": 0.4550023674964905, "learning_rate": 7.010923103086837e-06, "loss": 0.3487, "step": 24900 }, { "epoch": 1.1427194713413795, "grad_norm": 0.4740363657474518, "learning_rate": 7.0106986167747104e-06, "loss": 0.4004, "step": 24901 }, { "epoch": 1.142765361846634, "grad_norm": 0.4842958450317383, "learning_rate": 7.010474125627489e-06, "loss": 0.3825, "step": 24902 }, { "epoch": 1.1428112523518883, "grad_norm": 0.48954150080680847, "learning_rate": 7.010249629645712e-06, "loss": 0.4297, "step": 24903 }, { "epoch": 1.1428571428571428, "grad_norm": 0.45172399282455444, "learning_rate": 7.010025128829917e-06, "loss": 0.3575, "step": 24904 }, { "epoch": 1.1429030333623973, "grad_norm": 0.4919714629650116, "learning_rate": 7.009800623180647e-06, "loss": 0.3651, "step": 24905 }, { "epoch": 1.1429489238676518, "grad_norm": 0.4649743437767029, "learning_rate": 7.009576112698442e-06, "loss": 0.3968, "step": 24906 }, { "epoch": 1.1429948143729063, "grad_norm": 0.46726515889167786, "learning_rate": 7.00935159738384e-06, "loss": 0.3666, "step": 24907 }, { "epoch": 1.1430407048781608, "grad_norm": 0.48515409231185913, "learning_rate": 7.009127077237381e-06, "loss": 0.3499, "step": 24908 }, { "epoch": 1.1430865953834153, "grad_norm": 0.5135096311569214, "learning_rate": 7.008902552259605e-06, "loss": 0.4144, "step": 24909 }, { "epoch": 1.1431324858886696, "grad_norm": 0.4409202039241791, "learning_rate": 7.008678022451053e-06, "loss": 0.3166, "step": 24910 }, { "epoch": 1.143178376393924, "grad_norm": 0.4671071469783783, "learning_rate": 7.008453487812266e-06, "loss": 0.3499, "step": 24911 }, { "epoch": 1.1432242668991786, "grad_norm": 0.47289708256721497, "learning_rate": 7.008228948343781e-06, "loss": 0.3298, "step": 24912 }, { "epoch": 1.143270157404433, "grad_norm": 0.4455314576625824, "learning_rate": 7.0080044040461395e-06, "loss": 0.3027, "step": 24913 }, { "epoch": 1.1433160479096875, "grad_norm": 0.47602707147598267, "learning_rate": 7.00777985491988e-06, "loss": 0.3941, "step": 24914 }, { "epoch": 1.1433619384149418, "grad_norm": 0.45630142092704773, "learning_rate": 7.0075553009655435e-06, "loss": 0.3272, "step": 24915 }, { "epoch": 1.1434078289201963, "grad_norm": 0.5175597667694092, "learning_rate": 7.007330742183671e-06, "loss": 0.3946, "step": 24916 }, { "epoch": 1.1434537194254508, "grad_norm": 0.4405609965324402, "learning_rate": 7.007106178574803e-06, "loss": 0.3392, "step": 24917 }, { "epoch": 1.1434996099307053, "grad_norm": 0.4584062099456787, "learning_rate": 7.006881610139476e-06, "loss": 0.2909, "step": 24918 }, { "epoch": 1.1435455004359598, "grad_norm": 0.4774465262889862, "learning_rate": 7.006657036878232e-06, "loss": 0.3676, "step": 24919 }, { "epoch": 1.1435913909412143, "grad_norm": 0.4936419129371643, "learning_rate": 7.006432458791612e-06, "loss": 0.3405, "step": 24920 }, { "epoch": 1.1436372814464688, "grad_norm": 0.4631178379058838, "learning_rate": 7.006207875880154e-06, "loss": 0.3645, "step": 24921 }, { "epoch": 1.1436831719517233, "grad_norm": 0.5628679394721985, "learning_rate": 7.005983288144401e-06, "loss": 0.3817, "step": 24922 }, { "epoch": 1.1437290624569776, "grad_norm": 0.49904584884643555, "learning_rate": 7.005758695584891e-06, "loss": 0.4479, "step": 24923 }, { "epoch": 1.143774952962232, "grad_norm": 0.4463474154472351, "learning_rate": 7.005534098202164e-06, "loss": 0.3298, "step": 24924 }, { "epoch": 1.1438208434674866, "grad_norm": 0.44511690735816956, "learning_rate": 7.00530949599676e-06, "loss": 0.3208, "step": 24925 }, { "epoch": 1.143866733972741, "grad_norm": 0.44681280851364136, "learning_rate": 7.00508488896922e-06, "loss": 0.3198, "step": 24926 }, { "epoch": 1.1439126244779956, "grad_norm": 0.47121742367744446, "learning_rate": 7.004860277120085e-06, "loss": 0.357, "step": 24927 }, { "epoch": 1.1439585149832499, "grad_norm": 0.448816180229187, "learning_rate": 7.004635660449892e-06, "loss": 0.2996, "step": 24928 }, { "epoch": 1.1440044054885044, "grad_norm": 0.4731786847114563, "learning_rate": 7.0044110389591825e-06, "loss": 0.2944, "step": 24929 }, { "epoch": 1.1440502959937588, "grad_norm": 0.5082247853279114, "learning_rate": 7.0041864126484995e-06, "loss": 0.4319, "step": 24930 }, { "epoch": 1.1440961864990133, "grad_norm": 0.44497427344322205, "learning_rate": 7.003961781518378e-06, "loss": 0.329, "step": 24931 }, { "epoch": 1.1441420770042678, "grad_norm": 0.4509630501270294, "learning_rate": 7.003737145569361e-06, "loss": 0.3339, "step": 24932 }, { "epoch": 1.1441879675095223, "grad_norm": 0.4785159230232239, "learning_rate": 7.003512504801991e-06, "loss": 0.4123, "step": 24933 }, { "epoch": 1.1442338580147768, "grad_norm": 0.469690203666687, "learning_rate": 7.0032878592168045e-06, "loss": 0.3286, "step": 24934 }, { "epoch": 1.1442797485200311, "grad_norm": 0.4968698024749756, "learning_rate": 7.003063208814343e-06, "loss": 0.3971, "step": 24935 }, { "epoch": 1.1443256390252856, "grad_norm": 0.421076238155365, "learning_rate": 7.002838553595146e-06, "loss": 0.2735, "step": 24936 }, { "epoch": 1.14437152953054, "grad_norm": 0.4518403112888336, "learning_rate": 7.002613893559754e-06, "loss": 0.3418, "step": 24937 }, { "epoch": 1.1444174200357946, "grad_norm": 0.4615568518638611, "learning_rate": 7.0023892287087095e-06, "loss": 0.3314, "step": 24938 }, { "epoch": 1.144463310541049, "grad_norm": 0.47610920667648315, "learning_rate": 7.00216455904255e-06, "loss": 0.3876, "step": 24939 }, { "epoch": 1.1445092010463036, "grad_norm": 0.49797719717025757, "learning_rate": 7.001939884561816e-06, "loss": 0.3535, "step": 24940 }, { "epoch": 1.1445550915515579, "grad_norm": 0.46331605315208435, "learning_rate": 7.001715205267048e-06, "loss": 0.3386, "step": 24941 }, { "epoch": 1.1446009820568124, "grad_norm": 0.444021999835968, "learning_rate": 7.001490521158788e-06, "loss": 0.3526, "step": 24942 }, { "epoch": 1.1446468725620669, "grad_norm": 0.4544728398323059, "learning_rate": 7.001265832237574e-06, "loss": 0.3222, "step": 24943 }, { "epoch": 1.1446927630673214, "grad_norm": 0.45844194293022156, "learning_rate": 7.001041138503948e-06, "loss": 0.3893, "step": 24944 }, { "epoch": 1.1447386535725759, "grad_norm": 0.4786856174468994, "learning_rate": 7.000816439958451e-06, "loss": 0.4069, "step": 24945 }, { "epoch": 1.1447845440778304, "grad_norm": 0.4699844419956207, "learning_rate": 7.00059173660162e-06, "loss": 0.3317, "step": 24946 }, { "epoch": 1.1448304345830849, "grad_norm": 0.47999995946884155, "learning_rate": 7.000367028433998e-06, "loss": 0.3695, "step": 24947 }, { "epoch": 1.1448763250883391, "grad_norm": 0.42146944999694824, "learning_rate": 7.000142315456123e-06, "loss": 0.3195, "step": 24948 }, { "epoch": 1.1449222155935936, "grad_norm": 0.508046567440033, "learning_rate": 6.999917597668538e-06, "loss": 0.3757, "step": 24949 }, { "epoch": 1.1449681060988481, "grad_norm": 0.4362163543701172, "learning_rate": 6.999692875071784e-06, "loss": 0.3348, "step": 24950 }, { "epoch": 1.1450139966041026, "grad_norm": 0.45824134349823, "learning_rate": 6.9994681476663975e-06, "loss": 0.3332, "step": 24951 }, { "epoch": 1.1450598871093571, "grad_norm": 0.42417895793914795, "learning_rate": 6.999243415452922e-06, "loss": 0.324, "step": 24952 }, { "epoch": 1.1451057776146116, "grad_norm": 0.44994741678237915, "learning_rate": 6.999018678431897e-06, "loss": 0.3086, "step": 24953 }, { "epoch": 1.145151668119866, "grad_norm": 0.47160306572914124, "learning_rate": 6.998793936603862e-06, "loss": 0.3572, "step": 24954 }, { "epoch": 1.1451975586251204, "grad_norm": 0.45894333720207214, "learning_rate": 6.998569189969361e-06, "loss": 0.3524, "step": 24955 }, { "epoch": 1.145243449130375, "grad_norm": 0.49134561419487, "learning_rate": 6.9983444385289314e-06, "loss": 0.4148, "step": 24956 }, { "epoch": 1.1452893396356294, "grad_norm": 0.44758114218711853, "learning_rate": 6.998119682283112e-06, "loss": 0.2971, "step": 24957 }, { "epoch": 1.145335230140884, "grad_norm": 0.43807709217071533, "learning_rate": 6.997894921232446e-06, "loss": 0.3042, "step": 24958 }, { "epoch": 1.1453811206461384, "grad_norm": 0.4467284381389618, "learning_rate": 6.997670155377474e-06, "loss": 0.3209, "step": 24959 }, { "epoch": 1.145427011151393, "grad_norm": 0.46406763792037964, "learning_rate": 6.997445384718736e-06, "loss": 0.3707, "step": 24960 }, { "epoch": 1.1454729016566472, "grad_norm": 0.4993608295917511, "learning_rate": 6.997220609256773e-06, "loss": 0.4195, "step": 24961 }, { "epoch": 1.1455187921619017, "grad_norm": 0.46961164474487305, "learning_rate": 6.996995828992123e-06, "loss": 0.3864, "step": 24962 }, { "epoch": 1.1455646826671562, "grad_norm": 0.4230894446372986, "learning_rate": 6.996771043925328e-06, "loss": 0.2755, "step": 24963 }, { "epoch": 1.1456105731724107, "grad_norm": 0.5033126473426819, "learning_rate": 6.996546254056931e-06, "loss": 0.4441, "step": 24964 }, { "epoch": 1.1456564636776652, "grad_norm": 0.4961814284324646, "learning_rate": 6.996321459387469e-06, "loss": 0.3651, "step": 24965 }, { "epoch": 1.1457023541829194, "grad_norm": 0.46157148480415344, "learning_rate": 6.996096659917484e-06, "loss": 0.3268, "step": 24966 }, { "epoch": 1.145748244688174, "grad_norm": 0.5087007880210876, "learning_rate": 6.995871855647517e-06, "loss": 0.4159, "step": 24967 }, { "epoch": 1.1457941351934284, "grad_norm": 0.4776315987110138, "learning_rate": 6.995647046578107e-06, "loss": 0.3599, "step": 24968 }, { "epoch": 1.145840025698683, "grad_norm": 0.4742911458015442, "learning_rate": 6.995422232709795e-06, "loss": 0.3703, "step": 24969 }, { "epoch": 1.1458859162039374, "grad_norm": 0.4834994971752167, "learning_rate": 6.995197414043125e-06, "loss": 0.409, "step": 24970 }, { "epoch": 1.145931806709192, "grad_norm": 0.4417472779750824, "learning_rate": 6.9949725905786334e-06, "loss": 0.3539, "step": 24971 }, { "epoch": 1.1459776972144464, "grad_norm": 0.4900830388069153, "learning_rate": 6.994747762316863e-06, "loss": 0.4409, "step": 24972 }, { "epoch": 1.1460235877197007, "grad_norm": 0.46553850173950195, "learning_rate": 6.994522929258354e-06, "loss": 0.3394, "step": 24973 }, { "epoch": 1.1460694782249552, "grad_norm": 0.48493367433547974, "learning_rate": 6.994298091403645e-06, "loss": 0.4149, "step": 24974 }, { "epoch": 1.1461153687302097, "grad_norm": 0.4602962136268616, "learning_rate": 6.994073248753279e-06, "loss": 0.343, "step": 24975 }, { "epoch": 1.1461612592354642, "grad_norm": 0.4861312210559845, "learning_rate": 6.993848401307798e-06, "loss": 0.3397, "step": 24976 }, { "epoch": 1.1462071497407187, "grad_norm": 0.479353129863739, "learning_rate": 6.99362354906774e-06, "loss": 0.3608, "step": 24977 }, { "epoch": 1.1462530402459732, "grad_norm": 0.47855016589164734, "learning_rate": 6.9933986920336464e-06, "loss": 0.3514, "step": 24978 }, { "epoch": 1.1462989307512275, "grad_norm": 0.418137788772583, "learning_rate": 6.993173830206057e-06, "loss": 0.2798, "step": 24979 }, { "epoch": 1.146344821256482, "grad_norm": 0.47327977418899536, "learning_rate": 6.992948963585515e-06, "loss": 0.3463, "step": 24980 }, { "epoch": 1.1463907117617365, "grad_norm": 0.4292834401130676, "learning_rate": 6.992724092172559e-06, "loss": 0.313, "step": 24981 }, { "epoch": 1.146436602266991, "grad_norm": 0.45037105679512024, "learning_rate": 6.9924992159677315e-06, "loss": 0.334, "step": 24982 }, { "epoch": 1.1464824927722455, "grad_norm": 0.45711550116539, "learning_rate": 6.992274334971572e-06, "loss": 0.3896, "step": 24983 }, { "epoch": 1.1465283832775, "grad_norm": 0.5297359228134155, "learning_rate": 6.992049449184621e-06, "loss": 0.3148, "step": 24984 }, { "epoch": 1.1465742737827544, "grad_norm": 0.4432131350040436, "learning_rate": 6.991824558607418e-06, "loss": 0.2908, "step": 24985 }, { "epoch": 1.1466201642880087, "grad_norm": 0.4860258996486664, "learning_rate": 6.9915996632405085e-06, "loss": 0.3926, "step": 24986 }, { "epoch": 1.1466660547932632, "grad_norm": 0.48344576358795166, "learning_rate": 6.9913747630844286e-06, "loss": 0.4093, "step": 24987 }, { "epoch": 1.1467119452985177, "grad_norm": 0.4971316456794739, "learning_rate": 6.991149858139723e-06, "loss": 0.3855, "step": 24988 }, { "epoch": 1.1467578358037722, "grad_norm": 0.4725916385650635, "learning_rate": 6.990924948406929e-06, "loss": 0.3957, "step": 24989 }, { "epoch": 1.1468037263090267, "grad_norm": 0.48780810832977295, "learning_rate": 6.990700033886588e-06, "loss": 0.3666, "step": 24990 }, { "epoch": 1.1468496168142812, "grad_norm": 0.46235063672065735, "learning_rate": 6.990475114579243e-06, "loss": 0.3314, "step": 24991 }, { "epoch": 1.1468955073195355, "grad_norm": 0.4588218033313751, "learning_rate": 6.990250190485433e-06, "loss": 0.3068, "step": 24992 }, { "epoch": 1.14694139782479, "grad_norm": 0.4766181707382202, "learning_rate": 6.9900252616057e-06, "loss": 0.3881, "step": 24993 }, { "epoch": 1.1469872883300445, "grad_norm": 0.47128942608833313, "learning_rate": 6.989800327940583e-06, "loss": 0.3417, "step": 24994 }, { "epoch": 1.147033178835299, "grad_norm": 0.4698542654514313, "learning_rate": 6.9895753894906256e-06, "loss": 0.3778, "step": 24995 }, { "epoch": 1.1470790693405535, "grad_norm": 0.45415112376213074, "learning_rate": 6.989350446256367e-06, "loss": 0.3225, "step": 24996 }, { "epoch": 1.147124959845808, "grad_norm": 0.4701600968837738, "learning_rate": 6.989125498238347e-06, "loss": 0.358, "step": 24997 }, { "epoch": 1.1471708503510625, "grad_norm": 0.43711742758750916, "learning_rate": 6.9889005454371095e-06, "loss": 0.3277, "step": 24998 }, { "epoch": 1.1472167408563168, "grad_norm": 0.47080016136169434, "learning_rate": 6.988675587853195e-06, "loss": 0.3526, "step": 24999 }, { "epoch": 1.1472626313615713, "grad_norm": 0.47006282210350037, "learning_rate": 6.9884506254871405e-06, "loss": 0.3544, "step": 25000 }, { "epoch": 1.1473085218668257, "grad_norm": 0.4615339934825897, "learning_rate": 6.9882256583394925e-06, "loss": 0.3519, "step": 25001 }, { "epoch": 1.1473544123720802, "grad_norm": 0.4525710344314575, "learning_rate": 6.988000686410787e-06, "loss": 0.3378, "step": 25002 }, { "epoch": 1.1474003028773347, "grad_norm": 0.4831273853778839, "learning_rate": 6.987775709701569e-06, "loss": 0.3861, "step": 25003 }, { "epoch": 1.147446193382589, "grad_norm": 0.47208690643310547, "learning_rate": 6.987550728212378e-06, "loss": 0.3589, "step": 25004 }, { "epoch": 1.1474920838878435, "grad_norm": 0.44566628336906433, "learning_rate": 6.987325741943753e-06, "loss": 0.3318, "step": 25005 }, { "epoch": 1.147537974393098, "grad_norm": 0.47661104798316956, "learning_rate": 6.987100750896237e-06, "loss": 0.3793, "step": 25006 }, { "epoch": 1.1475838648983525, "grad_norm": 0.4605101943016052, "learning_rate": 6.9868757550703726e-06, "loss": 0.3574, "step": 25007 }, { "epoch": 1.147629755403607, "grad_norm": 0.45379412174224854, "learning_rate": 6.986650754466698e-06, "loss": 0.3385, "step": 25008 }, { "epoch": 1.1476756459088615, "grad_norm": 0.45908984541893005, "learning_rate": 6.986425749085756e-06, "loss": 0.3595, "step": 25009 }, { "epoch": 1.147721536414116, "grad_norm": 0.47903504967689514, "learning_rate": 6.9862007389280864e-06, "loss": 0.386, "step": 25010 }, { "epoch": 1.1477674269193705, "grad_norm": 0.45941847562789917, "learning_rate": 6.985975723994229e-06, "loss": 0.3144, "step": 25011 }, { "epoch": 1.1478133174246248, "grad_norm": 0.49382543563842773, "learning_rate": 6.985750704284731e-06, "loss": 0.3635, "step": 25012 }, { "epoch": 1.1478592079298793, "grad_norm": 0.47839128971099854, "learning_rate": 6.985525679800126e-06, "loss": 0.2912, "step": 25013 }, { "epoch": 1.1479050984351338, "grad_norm": 0.4532592296600342, "learning_rate": 6.985300650540958e-06, "loss": 0.3685, "step": 25014 }, { "epoch": 1.1479509889403883, "grad_norm": 0.499133437871933, "learning_rate": 6.985075616507772e-06, "loss": 0.3948, "step": 25015 }, { "epoch": 1.1479968794456428, "grad_norm": 0.4837343096733093, "learning_rate": 6.984850577701103e-06, "loss": 0.3801, "step": 25016 }, { "epoch": 1.148042769950897, "grad_norm": 0.45721644163131714, "learning_rate": 6.984625534121494e-06, "loss": 0.3208, "step": 25017 }, { "epoch": 1.1480886604561515, "grad_norm": 0.4535588026046753, "learning_rate": 6.984400485769489e-06, "loss": 0.3555, "step": 25018 }, { "epoch": 1.148134550961406, "grad_norm": 0.47468113899230957, "learning_rate": 6.984175432645626e-06, "loss": 0.3572, "step": 25019 }, { "epoch": 1.1481804414666605, "grad_norm": 0.45623454451560974, "learning_rate": 6.983950374750448e-06, "loss": 0.317, "step": 25020 }, { "epoch": 1.148226331971915, "grad_norm": 0.4824194014072418, "learning_rate": 6.983725312084496e-06, "loss": 0.4157, "step": 25021 }, { "epoch": 1.1482722224771695, "grad_norm": 0.4603768289089203, "learning_rate": 6.9835002446483095e-06, "loss": 0.3386, "step": 25022 }, { "epoch": 1.148318112982424, "grad_norm": 0.4851486086845398, "learning_rate": 6.9832751724424306e-06, "loss": 0.4264, "step": 25023 }, { "epoch": 1.1483640034876783, "grad_norm": 0.5331316590309143, "learning_rate": 6.983050095467402e-06, "loss": 0.4294, "step": 25024 }, { "epoch": 1.1484098939929328, "grad_norm": 0.48154881596565247, "learning_rate": 6.982825013723763e-06, "loss": 0.3316, "step": 25025 }, { "epoch": 1.1484557844981873, "grad_norm": 0.46427908539772034, "learning_rate": 6.982599927212057e-06, "loss": 0.3834, "step": 25026 }, { "epoch": 1.1485016750034418, "grad_norm": 0.4564216732978821, "learning_rate": 6.982374835932823e-06, "loss": 0.3711, "step": 25027 }, { "epoch": 1.1485475655086963, "grad_norm": 0.47576141357421875, "learning_rate": 6.982149739886603e-06, "loss": 0.3755, "step": 25028 }, { "epoch": 1.1485934560139508, "grad_norm": 0.4606434106826782, "learning_rate": 6.9819246390739385e-06, "loss": 0.3609, "step": 25029 }, { "epoch": 1.148639346519205, "grad_norm": 0.44380488991737366, "learning_rate": 6.98169953349537e-06, "loss": 0.3267, "step": 25030 }, { "epoch": 1.1486852370244596, "grad_norm": 0.47449520230293274, "learning_rate": 6.981474423151441e-06, "loss": 0.362, "step": 25031 }, { "epoch": 1.148731127529714, "grad_norm": 0.4447198808193207, "learning_rate": 6.98124930804269e-06, "loss": 0.3052, "step": 25032 }, { "epoch": 1.1487770180349686, "grad_norm": 0.4412217140197754, "learning_rate": 6.9810241881696605e-06, "loss": 0.3239, "step": 25033 }, { "epoch": 1.148822908540223, "grad_norm": 0.4593147933483124, "learning_rate": 6.980799063532893e-06, "loss": 0.3502, "step": 25034 }, { "epoch": 1.1488687990454776, "grad_norm": 0.49950239062309265, "learning_rate": 6.980573934132929e-06, "loss": 0.4086, "step": 25035 }, { "epoch": 1.148914689550732, "grad_norm": 0.45279550552368164, "learning_rate": 6.980348799970309e-06, "loss": 0.3437, "step": 25036 }, { "epoch": 1.1489605800559863, "grad_norm": 0.49908551573753357, "learning_rate": 6.980123661045577e-06, "loss": 0.4254, "step": 25037 }, { "epoch": 1.1490064705612408, "grad_norm": 0.49343904852867126, "learning_rate": 6.979898517359272e-06, "loss": 0.4195, "step": 25038 }, { "epoch": 1.1490523610664953, "grad_norm": 0.4655086100101471, "learning_rate": 6.9796733689119345e-06, "loss": 0.3628, "step": 25039 }, { "epoch": 1.1490982515717498, "grad_norm": 0.4357227385044098, "learning_rate": 6.979448215704108e-06, "loss": 0.289, "step": 25040 }, { "epoch": 1.1491441420770043, "grad_norm": 0.43845608830451965, "learning_rate": 6.979223057736333e-06, "loss": 0.3178, "step": 25041 }, { "epoch": 1.1491900325822588, "grad_norm": 0.5134596228599548, "learning_rate": 6.978997895009152e-06, "loss": 0.4084, "step": 25042 }, { "epoch": 1.149235923087513, "grad_norm": 0.46119585633277893, "learning_rate": 6.978772727523106e-06, "loss": 0.3248, "step": 25043 }, { "epoch": 1.1492818135927676, "grad_norm": 0.5257668495178223, "learning_rate": 6.978547555278735e-06, "loss": 0.4427, "step": 25044 }, { "epoch": 1.149327704098022, "grad_norm": 0.4643856883049011, "learning_rate": 6.978322378276581e-06, "loss": 0.3563, "step": 25045 }, { "epoch": 1.1493735946032766, "grad_norm": 0.47052228450775146, "learning_rate": 6.978097196517189e-06, "loss": 0.3626, "step": 25046 }, { "epoch": 1.149419485108531, "grad_norm": 0.4688790440559387, "learning_rate": 6.977872010001094e-06, "loss": 0.3564, "step": 25047 }, { "epoch": 1.1494653756137856, "grad_norm": 0.5200122594833374, "learning_rate": 6.977646818728842e-06, "loss": 0.3544, "step": 25048 }, { "epoch": 1.14951126611904, "grad_norm": 0.4934723973274231, "learning_rate": 6.977421622700976e-06, "loss": 0.4402, "step": 25049 }, { "epoch": 1.1495571566242944, "grad_norm": 0.46207094192504883, "learning_rate": 6.9771964219180324e-06, "loss": 0.3619, "step": 25050 }, { "epoch": 1.1496030471295489, "grad_norm": 0.4683164954185486, "learning_rate": 6.976971216380556e-06, "loss": 0.3335, "step": 25051 }, { "epoch": 1.1496489376348034, "grad_norm": 0.45859313011169434, "learning_rate": 6.976746006089088e-06, "loss": 0.3262, "step": 25052 }, { "epoch": 1.1496948281400579, "grad_norm": 0.44494688510894775, "learning_rate": 6.97652079104417e-06, "loss": 0.3541, "step": 25053 }, { "epoch": 1.1497407186453124, "grad_norm": 0.4629078805446625, "learning_rate": 6.976295571246343e-06, "loss": 0.3477, "step": 25054 }, { "epoch": 1.1497866091505666, "grad_norm": 0.48030275106430054, "learning_rate": 6.97607034669615e-06, "loss": 0.3157, "step": 25055 }, { "epoch": 1.1498324996558211, "grad_norm": 0.4573032259941101, "learning_rate": 6.97584511739413e-06, "loss": 0.3494, "step": 25056 }, { "epoch": 1.1498783901610756, "grad_norm": 0.4814562201499939, "learning_rate": 6.9756198833408266e-06, "loss": 0.3862, "step": 25057 }, { "epoch": 1.1499242806663301, "grad_norm": 0.4575633704662323, "learning_rate": 6.975394644536781e-06, "loss": 0.3757, "step": 25058 }, { "epoch": 1.1499701711715846, "grad_norm": 0.5048182010650635, "learning_rate": 6.975169400982536e-06, "loss": 0.4492, "step": 25059 }, { "epoch": 1.1500160616768391, "grad_norm": 0.5243398547172546, "learning_rate": 6.97494415267863e-06, "loss": 0.3533, "step": 25060 }, { "epoch": 1.1500619521820936, "grad_norm": 0.45034703612327576, "learning_rate": 6.974718899625606e-06, "loss": 0.313, "step": 25061 }, { "epoch": 1.150107842687348, "grad_norm": 0.4944525957107544, "learning_rate": 6.974493641824007e-06, "loss": 0.3524, "step": 25062 }, { "epoch": 1.1501537331926024, "grad_norm": 0.4832689166069031, "learning_rate": 6.974268379274376e-06, "loss": 0.3611, "step": 25063 }, { "epoch": 1.1501996236978569, "grad_norm": 0.4869072437286377, "learning_rate": 6.974043111977251e-06, "loss": 0.3756, "step": 25064 }, { "epoch": 1.1502455142031114, "grad_norm": 0.49842196702957153, "learning_rate": 6.9738178399331746e-06, "loss": 0.4054, "step": 25065 }, { "epoch": 1.1502914047083659, "grad_norm": 0.4746134877204895, "learning_rate": 6.97359256314269e-06, "loss": 0.3668, "step": 25066 }, { "epoch": 1.1503372952136204, "grad_norm": 0.4971030056476593, "learning_rate": 6.9733672816063394e-06, "loss": 0.3943, "step": 25067 }, { "epoch": 1.1503831857188747, "grad_norm": 0.4551909267902374, "learning_rate": 6.973141995324661e-06, "loss": 0.3214, "step": 25068 }, { "epoch": 1.1504290762241292, "grad_norm": 0.5224339962005615, "learning_rate": 6.972916704298201e-06, "loss": 0.4281, "step": 25069 }, { "epoch": 1.1504749667293837, "grad_norm": 0.5089649558067322, "learning_rate": 6.972691408527498e-06, "loss": 0.3664, "step": 25070 }, { "epoch": 1.1505208572346382, "grad_norm": 0.4609854221343994, "learning_rate": 6.972466108013094e-06, "loss": 0.3539, "step": 25071 }, { "epoch": 1.1505667477398926, "grad_norm": 0.46647322177886963, "learning_rate": 6.972240802755533e-06, "loss": 0.4073, "step": 25072 }, { "epoch": 1.1506126382451471, "grad_norm": 0.511405885219574, "learning_rate": 6.972015492755354e-06, "loss": 0.3882, "step": 25073 }, { "epoch": 1.1506585287504016, "grad_norm": 0.4898398816585541, "learning_rate": 6.9717901780131015e-06, "loss": 0.4171, "step": 25074 }, { "epoch": 1.150704419255656, "grad_norm": 0.46157148480415344, "learning_rate": 6.9715648585293165e-06, "loss": 0.3196, "step": 25075 }, { "epoch": 1.1507503097609104, "grad_norm": 0.43727874755859375, "learning_rate": 6.971339534304537e-06, "loss": 0.2954, "step": 25076 }, { "epoch": 1.150796200266165, "grad_norm": 0.4599415063858032, "learning_rate": 6.971114205339311e-06, "loss": 0.3868, "step": 25077 }, { "epoch": 1.1508420907714194, "grad_norm": 0.4168073534965515, "learning_rate": 6.970888871634176e-06, "loss": 0.3008, "step": 25078 }, { "epoch": 1.150887981276674, "grad_norm": 0.526592493057251, "learning_rate": 6.970663533189676e-06, "loss": 0.5096, "step": 25079 }, { "epoch": 1.1509338717819284, "grad_norm": 0.43107932806015015, "learning_rate": 6.970438190006352e-06, "loss": 0.3221, "step": 25080 }, { "epoch": 1.1509797622871827, "grad_norm": 0.4706384837627411, "learning_rate": 6.970212842084746e-06, "loss": 0.3845, "step": 25081 }, { "epoch": 1.1510256527924372, "grad_norm": 1.004956841468811, "learning_rate": 6.9699874894253994e-06, "loss": 0.4594, "step": 25082 }, { "epoch": 1.1510715432976917, "grad_norm": 0.4478180408477783, "learning_rate": 6.969762132028856e-06, "loss": 0.3324, "step": 25083 }, { "epoch": 1.1511174338029462, "grad_norm": 0.4622480869293213, "learning_rate": 6.969536769895655e-06, "loss": 0.342, "step": 25084 }, { "epoch": 1.1511633243082007, "grad_norm": 0.4555993676185608, "learning_rate": 6.96931140302634e-06, "loss": 0.3336, "step": 25085 }, { "epoch": 1.1512092148134552, "grad_norm": 0.5121057033538818, "learning_rate": 6.969086031421454e-06, "loss": 0.4062, "step": 25086 }, { "epoch": 1.1512551053187097, "grad_norm": 0.4331211745738983, "learning_rate": 6.968860655081535e-06, "loss": 0.3127, "step": 25087 }, { "epoch": 1.151300995823964, "grad_norm": 0.5448230504989624, "learning_rate": 6.968635274007128e-06, "loss": 0.4088, "step": 25088 }, { "epoch": 1.1513468863292184, "grad_norm": 0.4776642620563507, "learning_rate": 6.968409888198777e-06, "loss": 0.384, "step": 25089 }, { "epoch": 1.151392776834473, "grad_norm": 0.44804856181144714, "learning_rate": 6.968184497657019e-06, "loss": 0.2944, "step": 25090 }, { "epoch": 1.1514386673397274, "grad_norm": 0.5468553900718689, "learning_rate": 6.9679591023824e-06, "loss": 0.46, "step": 25091 }, { "epoch": 1.151484557844982, "grad_norm": 0.4704417884349823, "learning_rate": 6.96773370237546e-06, "loss": 0.3806, "step": 25092 }, { "epoch": 1.1515304483502362, "grad_norm": 0.4762265086174011, "learning_rate": 6.967508297636741e-06, "loss": 0.3407, "step": 25093 }, { "epoch": 1.1515763388554907, "grad_norm": 0.4593409597873688, "learning_rate": 6.967282888166785e-06, "loss": 0.3661, "step": 25094 }, { "epoch": 1.1516222293607452, "grad_norm": 0.44153115153312683, "learning_rate": 6.967057473966136e-06, "loss": 0.3346, "step": 25095 }, { "epoch": 1.1516681198659997, "grad_norm": 0.4575146436691284, "learning_rate": 6.966832055035334e-06, "loss": 0.3252, "step": 25096 }, { "epoch": 1.1517140103712542, "grad_norm": 0.42585891485214233, "learning_rate": 6.966606631374922e-06, "loss": 0.3197, "step": 25097 }, { "epoch": 1.1517599008765087, "grad_norm": 0.4820810556411743, "learning_rate": 6.966381202985441e-06, "loss": 0.3356, "step": 25098 }, { "epoch": 1.1518057913817632, "grad_norm": 0.4558377265930176, "learning_rate": 6.966155769867433e-06, "loss": 0.3596, "step": 25099 }, { "epoch": 1.1518516818870177, "grad_norm": 0.5104401111602783, "learning_rate": 6.965930332021442e-06, "loss": 0.4248, "step": 25100 }, { "epoch": 1.151897572392272, "grad_norm": 0.46312642097473145, "learning_rate": 6.965704889448009e-06, "loss": 0.3531, "step": 25101 }, { "epoch": 1.1519434628975265, "grad_norm": 0.49762675166130066, "learning_rate": 6.965479442147677e-06, "loss": 0.4242, "step": 25102 }, { "epoch": 1.151989353402781, "grad_norm": 0.4090297520160675, "learning_rate": 6.965253990120986e-06, "loss": 0.2829, "step": 25103 }, { "epoch": 1.1520352439080355, "grad_norm": 0.47023677825927734, "learning_rate": 6.96502853336848e-06, "loss": 0.3319, "step": 25104 }, { "epoch": 1.15208113441329, "grad_norm": 0.5168111324310303, "learning_rate": 6.9648030718907e-06, "loss": 0.4479, "step": 25105 }, { "epoch": 1.1521270249185442, "grad_norm": 0.5148356556892395, "learning_rate": 6.964577605688189e-06, "loss": 0.4161, "step": 25106 }, { "epoch": 1.1521729154237987, "grad_norm": 0.4504427909851074, "learning_rate": 6.964352134761489e-06, "loss": 0.3489, "step": 25107 }, { "epoch": 1.1522188059290532, "grad_norm": 0.4421009123325348, "learning_rate": 6.964126659111144e-06, "loss": 0.3144, "step": 25108 }, { "epoch": 1.1522646964343077, "grad_norm": 0.4721224308013916, "learning_rate": 6.9639011787376905e-06, "loss": 0.3455, "step": 25109 }, { "epoch": 1.1523105869395622, "grad_norm": 0.4540870785713196, "learning_rate": 6.963675693641676e-06, "loss": 0.3221, "step": 25110 }, { "epoch": 1.1523564774448167, "grad_norm": 0.4359225630760193, "learning_rate": 6.9634502038236415e-06, "loss": 0.2943, "step": 25111 }, { "epoch": 1.1524023679500712, "grad_norm": 0.4352863132953644, "learning_rate": 6.963224709284129e-06, "loss": 0.2913, "step": 25112 }, { "epoch": 1.1524482584553255, "grad_norm": 0.48331311345100403, "learning_rate": 6.962999210023681e-06, "loss": 0.3788, "step": 25113 }, { "epoch": 1.15249414896058, "grad_norm": 0.4596453607082367, "learning_rate": 6.96277370604284e-06, "loss": 0.3547, "step": 25114 }, { "epoch": 1.1525400394658345, "grad_norm": 0.4663882255554199, "learning_rate": 6.962548197342146e-06, "loss": 0.4118, "step": 25115 }, { "epoch": 1.152585929971089, "grad_norm": 0.4594615399837494, "learning_rate": 6.962322683922143e-06, "loss": 0.3384, "step": 25116 }, { "epoch": 1.1526318204763435, "grad_norm": 0.46238380670547485, "learning_rate": 6.962097165783375e-06, "loss": 0.3311, "step": 25117 }, { "epoch": 1.152677710981598, "grad_norm": 0.44720688462257385, "learning_rate": 6.961871642926382e-06, "loss": 0.3011, "step": 25118 }, { "epoch": 1.1527236014868523, "grad_norm": 0.46242305636405945, "learning_rate": 6.961646115351706e-06, "loss": 0.3516, "step": 25119 }, { "epoch": 1.1527694919921068, "grad_norm": 0.4658792316913605, "learning_rate": 6.961420583059891e-06, "loss": 0.4176, "step": 25120 }, { "epoch": 1.1528153824973613, "grad_norm": 0.4210544228553772, "learning_rate": 6.961195046051478e-06, "loss": 0.293, "step": 25121 }, { "epoch": 1.1528612730026158, "grad_norm": 0.4550395905971527, "learning_rate": 6.96096950432701e-06, "loss": 0.3636, "step": 25122 }, { "epoch": 1.1529071635078703, "grad_norm": 0.4997739791870117, "learning_rate": 6.960743957887029e-06, "loss": 0.4454, "step": 25123 }, { "epoch": 1.1529530540131248, "grad_norm": 0.41521745920181274, "learning_rate": 6.9605184067320785e-06, "loss": 0.2538, "step": 25124 }, { "epoch": 1.1529989445183793, "grad_norm": 0.4375258982181549, "learning_rate": 6.9602928508627e-06, "loss": 0.3575, "step": 25125 }, { "epoch": 1.1530448350236335, "grad_norm": 0.4523445963859558, "learning_rate": 6.9600672902794355e-06, "loss": 0.3506, "step": 25126 }, { "epoch": 1.153090725528888, "grad_norm": 0.4959537386894226, "learning_rate": 6.959841724982827e-06, "loss": 0.3849, "step": 25127 }, { "epoch": 1.1531366160341425, "grad_norm": 0.44030407071113586, "learning_rate": 6.959616154973419e-06, "loss": 0.3315, "step": 25128 }, { "epoch": 1.153182506539397, "grad_norm": 0.4650985300540924, "learning_rate": 6.9593905802517525e-06, "loss": 0.3946, "step": 25129 }, { "epoch": 1.1532283970446515, "grad_norm": 0.5176205039024353, "learning_rate": 6.959165000818369e-06, "loss": 0.3847, "step": 25130 }, { "epoch": 1.153274287549906, "grad_norm": 0.4602415859699249, "learning_rate": 6.958939416673812e-06, "loss": 0.329, "step": 25131 }, { "epoch": 1.1533201780551603, "grad_norm": 0.4734410047531128, "learning_rate": 6.958713827818624e-06, "loss": 0.3744, "step": 25132 }, { "epoch": 1.1533660685604148, "grad_norm": 0.48495838046073914, "learning_rate": 6.958488234253347e-06, "loss": 0.4061, "step": 25133 }, { "epoch": 1.1534119590656693, "grad_norm": 0.4616854190826416, "learning_rate": 6.958262635978526e-06, "loss": 0.348, "step": 25134 }, { "epoch": 1.1534578495709238, "grad_norm": 0.4614546298980713, "learning_rate": 6.958037032994701e-06, "loss": 0.3626, "step": 25135 }, { "epoch": 1.1535037400761783, "grad_norm": 0.46961313486099243, "learning_rate": 6.9578114253024144e-06, "loss": 0.3573, "step": 25136 }, { "epoch": 1.1535496305814328, "grad_norm": 0.5064706206321716, "learning_rate": 6.9575858129022085e-06, "loss": 0.3466, "step": 25137 }, { "epoch": 1.1535955210866873, "grad_norm": 0.490616112947464, "learning_rate": 6.957360195794627e-06, "loss": 0.4145, "step": 25138 }, { "epoch": 1.1536414115919416, "grad_norm": 0.44774553179740906, "learning_rate": 6.957134573980212e-06, "loss": 0.342, "step": 25139 }, { "epoch": 1.153687302097196, "grad_norm": 0.470467209815979, "learning_rate": 6.956908947459507e-06, "loss": 0.3837, "step": 25140 }, { "epoch": 1.1537331926024506, "grad_norm": 0.4633384048938751, "learning_rate": 6.956683316233052e-06, "loss": 0.3311, "step": 25141 }, { "epoch": 1.153779083107705, "grad_norm": 0.47774162888526917, "learning_rate": 6.956457680301391e-06, "loss": 0.3326, "step": 25142 }, { "epoch": 1.1538249736129595, "grad_norm": 0.4819404184818268, "learning_rate": 6.9562320396650685e-06, "loss": 0.3999, "step": 25143 }, { "epoch": 1.1538708641182138, "grad_norm": 0.47395989298820496, "learning_rate": 6.956006394324624e-06, "loss": 0.3531, "step": 25144 }, { "epoch": 1.1539167546234683, "grad_norm": 0.4633278250694275, "learning_rate": 6.9557807442806025e-06, "loss": 0.3429, "step": 25145 }, { "epoch": 1.1539626451287228, "grad_norm": 0.49769124388694763, "learning_rate": 6.955555089533545e-06, "loss": 0.3978, "step": 25146 }, { "epoch": 1.1540085356339773, "grad_norm": 0.5108813643455505, "learning_rate": 6.955329430083995e-06, "loss": 0.4341, "step": 25147 }, { "epoch": 1.1540544261392318, "grad_norm": 0.44003668427467346, "learning_rate": 6.955103765932494e-06, "loss": 0.3034, "step": 25148 }, { "epoch": 1.1541003166444863, "grad_norm": 0.4526883065700531, "learning_rate": 6.954878097079584e-06, "loss": 0.2991, "step": 25149 }, { "epoch": 1.1541462071497408, "grad_norm": 0.46105608344078064, "learning_rate": 6.954652423525813e-06, "loss": 0.3586, "step": 25150 }, { "epoch": 1.154192097654995, "grad_norm": 0.44052544236183167, "learning_rate": 6.954426745271719e-06, "loss": 0.3502, "step": 25151 }, { "epoch": 1.1542379881602496, "grad_norm": 0.46082603931427, "learning_rate": 6.954201062317843e-06, "loss": 0.3472, "step": 25152 }, { "epoch": 1.154283878665504, "grad_norm": 0.4669497013092041, "learning_rate": 6.9539753746647325e-06, "loss": 0.3293, "step": 25153 }, { "epoch": 1.1543297691707586, "grad_norm": 0.431634783744812, "learning_rate": 6.953749682312927e-06, "loss": 0.2974, "step": 25154 }, { "epoch": 1.154375659676013, "grad_norm": 0.5211135745048523, "learning_rate": 6.9535239852629695e-06, "loss": 0.4514, "step": 25155 }, { "epoch": 1.1544215501812676, "grad_norm": 0.4230610728263855, "learning_rate": 6.953298283515404e-06, "loss": 0.2917, "step": 25156 }, { "epoch": 1.1544674406865219, "grad_norm": 0.4881574809551239, "learning_rate": 6.953072577070774e-06, "loss": 0.3732, "step": 25157 }, { "epoch": 1.1545133311917763, "grad_norm": 0.4339147210121155, "learning_rate": 6.9528468659296185e-06, "loss": 0.3174, "step": 25158 }, { "epoch": 1.1545592216970308, "grad_norm": 0.45540401339530945, "learning_rate": 6.952621150092484e-06, "loss": 0.3283, "step": 25159 }, { "epoch": 1.1546051122022853, "grad_norm": 0.48818036913871765, "learning_rate": 6.952395429559912e-06, "loss": 0.3536, "step": 25160 }, { "epoch": 1.1546510027075398, "grad_norm": 0.43416881561279297, "learning_rate": 6.9521697043324455e-06, "loss": 0.3232, "step": 25161 }, { "epoch": 1.1546968932127943, "grad_norm": 0.4796547591686249, "learning_rate": 6.951943974410626e-06, "loss": 0.3891, "step": 25162 }, { "epoch": 1.1547427837180488, "grad_norm": 0.46452245116233826, "learning_rate": 6.951718239794998e-06, "loss": 0.3416, "step": 25163 }, { "epoch": 1.1547886742233031, "grad_norm": 0.46585923433303833, "learning_rate": 6.951492500486101e-06, "loss": 0.3264, "step": 25164 }, { "epoch": 1.1548345647285576, "grad_norm": 0.41096577048301697, "learning_rate": 6.951266756484484e-06, "loss": 0.2852, "step": 25165 }, { "epoch": 1.154880455233812, "grad_norm": 0.47545772790908813, "learning_rate": 6.951041007790684e-06, "loss": 0.3939, "step": 25166 }, { "epoch": 1.1549263457390666, "grad_norm": 0.45712143182754517, "learning_rate": 6.950815254405248e-06, "loss": 0.3219, "step": 25167 }, { "epoch": 1.154972236244321, "grad_norm": 0.4984951317310333, "learning_rate": 6.9505894963287166e-06, "loss": 0.3738, "step": 25168 }, { "epoch": 1.1550181267495756, "grad_norm": 0.4408794045448303, "learning_rate": 6.950363733561631e-06, "loss": 0.3205, "step": 25169 }, { "epoch": 1.1550640172548299, "grad_norm": 0.4682091772556305, "learning_rate": 6.950137966104538e-06, "loss": 0.3211, "step": 25170 }, { "epoch": 1.1551099077600844, "grad_norm": 0.44546735286712646, "learning_rate": 6.94991219395798e-06, "loss": 0.3006, "step": 25171 }, { "epoch": 1.1551557982653389, "grad_norm": 0.5079686045646667, "learning_rate": 6.949686417122496e-06, "loss": 0.3882, "step": 25172 }, { "epoch": 1.1552016887705934, "grad_norm": 0.4540318548679352, "learning_rate": 6.949460635598633e-06, "loss": 0.3156, "step": 25173 }, { "epoch": 1.1552475792758479, "grad_norm": 0.4677923917770386, "learning_rate": 6.949234849386932e-06, "loss": 0.348, "step": 25174 }, { "epoch": 1.1552934697811024, "grad_norm": 0.44999071955680847, "learning_rate": 6.949009058487935e-06, "loss": 0.3397, "step": 25175 }, { "epoch": 1.1553393602863569, "grad_norm": 0.48444098234176636, "learning_rate": 6.948783262902189e-06, "loss": 0.3786, "step": 25176 }, { "epoch": 1.1553852507916111, "grad_norm": 0.4763903319835663, "learning_rate": 6.948557462630232e-06, "loss": 0.3723, "step": 25177 }, { "epoch": 1.1554311412968656, "grad_norm": 0.5264382362365723, "learning_rate": 6.948331657672611e-06, "loss": 0.3931, "step": 25178 }, { "epoch": 1.1554770318021201, "grad_norm": 0.4536794424057007, "learning_rate": 6.948105848029867e-06, "loss": 0.3503, "step": 25179 }, { "epoch": 1.1555229223073746, "grad_norm": 0.4981917142868042, "learning_rate": 6.947880033702542e-06, "loss": 0.415, "step": 25180 }, { "epoch": 1.1555688128126291, "grad_norm": 0.4961695969104767, "learning_rate": 6.947654214691181e-06, "loss": 0.4655, "step": 25181 }, { "epoch": 1.1556147033178834, "grad_norm": 0.49622824788093567, "learning_rate": 6.947428390996327e-06, "loss": 0.3966, "step": 25182 }, { "epoch": 1.155660593823138, "grad_norm": 0.4609910845756531, "learning_rate": 6.947202562618521e-06, "loss": 0.3658, "step": 25183 }, { "epoch": 1.1557064843283924, "grad_norm": 0.45820268988609314, "learning_rate": 6.946976729558308e-06, "loss": 0.3258, "step": 25184 }, { "epoch": 1.155752374833647, "grad_norm": 0.5128822326660156, "learning_rate": 6.946750891816232e-06, "loss": 0.4379, "step": 25185 }, { "epoch": 1.1557982653389014, "grad_norm": 0.46970242261886597, "learning_rate": 6.946525049392832e-06, "loss": 0.3597, "step": 25186 }, { "epoch": 1.155844155844156, "grad_norm": 0.44096696376800537, "learning_rate": 6.946299202288653e-06, "loss": 0.2919, "step": 25187 }, { "epoch": 1.1558900463494104, "grad_norm": 0.44802722334861755, "learning_rate": 6.9460733505042425e-06, "loss": 0.3339, "step": 25188 }, { "epoch": 1.155935936854665, "grad_norm": 0.4844057261943817, "learning_rate": 6.945847494040138e-06, "loss": 0.409, "step": 25189 }, { "epoch": 1.1559818273599192, "grad_norm": 0.4761686325073242, "learning_rate": 6.945621632896884e-06, "loss": 0.3674, "step": 25190 }, { "epoch": 1.1560277178651737, "grad_norm": 0.46879076957702637, "learning_rate": 6.9453957670750234e-06, "loss": 0.3138, "step": 25191 }, { "epoch": 1.1560736083704282, "grad_norm": 0.4658149778842926, "learning_rate": 6.9451698965751e-06, "loss": 0.3429, "step": 25192 }, { "epoch": 1.1561194988756827, "grad_norm": 0.45785707235336304, "learning_rate": 6.944944021397658e-06, "loss": 0.3426, "step": 25193 }, { "epoch": 1.1561653893809372, "grad_norm": 0.5196152925491333, "learning_rate": 6.94471814154324e-06, "loss": 0.4596, "step": 25194 }, { "epoch": 1.1562112798861914, "grad_norm": 0.4676278233528137, "learning_rate": 6.944492257012387e-06, "loss": 0.3429, "step": 25195 }, { "epoch": 1.156257170391446, "grad_norm": 0.47010675072669983, "learning_rate": 6.9442663678056455e-06, "loss": 0.3805, "step": 25196 }, { "epoch": 1.1563030608967004, "grad_norm": 0.4876898527145386, "learning_rate": 6.944040473923554e-06, "loss": 0.3749, "step": 25197 }, { "epoch": 1.156348951401955, "grad_norm": 0.5293440818786621, "learning_rate": 6.943814575366661e-06, "loss": 0.4463, "step": 25198 }, { "epoch": 1.1563948419072094, "grad_norm": 0.4519314467906952, "learning_rate": 6.943588672135509e-06, "loss": 0.3017, "step": 25199 }, { "epoch": 1.156440732412464, "grad_norm": 0.4078337848186493, "learning_rate": 6.943362764230636e-06, "loss": 0.2876, "step": 25200 }, { "epoch": 1.1564866229177184, "grad_norm": 0.5074722766876221, "learning_rate": 6.943136851652591e-06, "loss": 0.3904, "step": 25201 }, { "epoch": 1.1565325134229727, "grad_norm": 0.4733673632144928, "learning_rate": 6.9429109344019156e-06, "loss": 0.3445, "step": 25202 }, { "epoch": 1.1565784039282272, "grad_norm": 0.49511849880218506, "learning_rate": 6.942685012479151e-06, "loss": 0.429, "step": 25203 }, { "epoch": 1.1566242944334817, "grad_norm": 0.46987661719322205, "learning_rate": 6.9424590858848425e-06, "loss": 0.3306, "step": 25204 }, { "epoch": 1.1566701849387362, "grad_norm": 0.44555899500846863, "learning_rate": 6.942233154619535e-06, "loss": 0.3304, "step": 25205 }, { "epoch": 1.1567160754439907, "grad_norm": 0.4216049313545227, "learning_rate": 6.9420072186837685e-06, "loss": 0.3068, "step": 25206 }, { "epoch": 1.1567619659492452, "grad_norm": 0.48061463236808777, "learning_rate": 6.9417812780780865e-06, "loss": 0.371, "step": 25207 }, { "epoch": 1.1568078564544995, "grad_norm": 0.46951696276664734, "learning_rate": 6.941555332803034e-06, "loss": 0.4055, "step": 25208 }, { "epoch": 1.156853746959754, "grad_norm": 0.4714525640010834, "learning_rate": 6.941329382859153e-06, "loss": 0.3785, "step": 25209 }, { "epoch": 1.1568996374650085, "grad_norm": 0.4900856614112854, "learning_rate": 6.941103428246989e-06, "loss": 0.3723, "step": 25210 }, { "epoch": 1.156945527970263, "grad_norm": 0.5066500306129456, "learning_rate": 6.9408774689670845e-06, "loss": 0.4304, "step": 25211 }, { "epoch": 1.1569914184755175, "grad_norm": 0.4639904201030731, "learning_rate": 6.94065150501998e-06, "loss": 0.3456, "step": 25212 }, { "epoch": 1.157037308980772, "grad_norm": 0.46823763847351074, "learning_rate": 6.9404255364062226e-06, "loss": 0.3194, "step": 25213 }, { "epoch": 1.1570831994860264, "grad_norm": 0.47651904821395874, "learning_rate": 6.940199563126353e-06, "loss": 0.384, "step": 25214 }, { "epoch": 1.1571290899912807, "grad_norm": 0.4486108422279358, "learning_rate": 6.939973585180916e-06, "loss": 0.3337, "step": 25215 }, { "epoch": 1.1571749804965352, "grad_norm": 0.45768940448760986, "learning_rate": 6.939747602570457e-06, "loss": 0.3306, "step": 25216 }, { "epoch": 1.1572208710017897, "grad_norm": 0.510412871837616, "learning_rate": 6.939521615295515e-06, "loss": 0.3889, "step": 25217 }, { "epoch": 1.1572667615070442, "grad_norm": 0.4649767577648163, "learning_rate": 6.9392956233566344e-06, "loss": 0.3591, "step": 25218 }, { "epoch": 1.1573126520122987, "grad_norm": 0.4374452829360962, "learning_rate": 6.939069626754362e-06, "loss": 0.3173, "step": 25219 }, { "epoch": 1.1573585425175532, "grad_norm": 0.45500296354293823, "learning_rate": 6.938843625489239e-06, "loss": 0.3459, "step": 25220 }, { "epoch": 1.1574044330228075, "grad_norm": 0.47296443581581116, "learning_rate": 6.9386176195618094e-06, "loss": 0.4088, "step": 25221 }, { "epoch": 1.157450323528062, "grad_norm": 0.441616028547287, "learning_rate": 6.938391608972615e-06, "loss": 0.3063, "step": 25222 }, { "epoch": 1.1574962140333165, "grad_norm": 0.5002726912498474, "learning_rate": 6.938165593722201e-06, "loss": 0.3773, "step": 25223 }, { "epoch": 1.157542104538571, "grad_norm": 0.4404740035533905, "learning_rate": 6.937939573811111e-06, "loss": 0.3323, "step": 25224 }, { "epoch": 1.1575879950438255, "grad_norm": 0.46229061484336853, "learning_rate": 6.937713549239887e-06, "loss": 0.3706, "step": 25225 }, { "epoch": 1.15763388554908, "grad_norm": 0.47997400164604187, "learning_rate": 6.937487520009074e-06, "loss": 0.4016, "step": 25226 }, { "epoch": 1.1576797760543345, "grad_norm": 0.4615573287010193, "learning_rate": 6.937261486119216e-06, "loss": 0.349, "step": 25227 }, { "epoch": 1.1577256665595888, "grad_norm": 0.47035470604896545, "learning_rate": 6.937035447570855e-06, "loss": 0.3743, "step": 25228 }, { "epoch": 1.1577715570648432, "grad_norm": 0.46883466839790344, "learning_rate": 6.9368094043645325e-06, "loss": 0.3554, "step": 25229 }, { "epoch": 1.1578174475700977, "grad_norm": 0.4617813229560852, "learning_rate": 6.936583356500799e-06, "loss": 0.3241, "step": 25230 }, { "epoch": 1.1578633380753522, "grad_norm": 0.4416572153568268, "learning_rate": 6.93635730398019e-06, "loss": 0.3189, "step": 25231 }, { "epoch": 1.1579092285806067, "grad_norm": 0.4927392899990082, "learning_rate": 6.9361312468032545e-06, "loss": 0.4103, "step": 25232 }, { "epoch": 1.157955119085861, "grad_norm": 0.48185041546821594, "learning_rate": 6.935905184970535e-06, "loss": 0.3948, "step": 25233 }, { "epoch": 1.1580010095911155, "grad_norm": 0.505353569984436, "learning_rate": 6.935679118482574e-06, "loss": 0.4383, "step": 25234 }, { "epoch": 1.15804690009637, "grad_norm": 0.4556822180747986, "learning_rate": 6.935453047339915e-06, "loss": 0.3129, "step": 25235 }, { "epoch": 1.1580927906016245, "grad_norm": 0.4684167504310608, "learning_rate": 6.935226971543103e-06, "loss": 0.3553, "step": 25236 }, { "epoch": 1.158138681106879, "grad_norm": 0.48484474420547485, "learning_rate": 6.93500089109268e-06, "loss": 0.4001, "step": 25237 }, { "epoch": 1.1581845716121335, "grad_norm": 0.41930991411209106, "learning_rate": 6.934774805989191e-06, "loss": 0.2786, "step": 25238 }, { "epoch": 1.158230462117388, "grad_norm": 0.43269678950309753, "learning_rate": 6.93454871623318e-06, "loss": 0.3242, "step": 25239 }, { "epoch": 1.1582763526226423, "grad_norm": 0.5172913670539856, "learning_rate": 6.934322621825189e-06, "loss": 0.3978, "step": 25240 }, { "epoch": 1.1583222431278968, "grad_norm": 0.45003658533096313, "learning_rate": 6.934096522765763e-06, "loss": 0.3097, "step": 25241 }, { "epoch": 1.1583681336331513, "grad_norm": 0.4628738760948181, "learning_rate": 6.933870419055445e-06, "loss": 0.3356, "step": 25242 }, { "epoch": 1.1584140241384058, "grad_norm": 0.5006917119026184, "learning_rate": 6.9336443106947805e-06, "loss": 0.3966, "step": 25243 }, { "epoch": 1.1584599146436603, "grad_norm": 0.43102824687957764, "learning_rate": 6.9334181976843105e-06, "loss": 0.2692, "step": 25244 }, { "epoch": 1.1585058051489148, "grad_norm": 0.46626317501068115, "learning_rate": 6.933192080024579e-06, "loss": 0.3498, "step": 25245 }, { "epoch": 1.158551695654169, "grad_norm": 0.48062729835510254, "learning_rate": 6.932965957716131e-06, "loss": 0.4201, "step": 25246 }, { "epoch": 1.1585975861594235, "grad_norm": 0.48249563574790955, "learning_rate": 6.93273983075951e-06, "loss": 0.3944, "step": 25247 }, { "epoch": 1.158643476664678, "grad_norm": 0.49768292903900146, "learning_rate": 6.932513699155261e-06, "loss": 0.4218, "step": 25248 }, { "epoch": 1.1586893671699325, "grad_norm": 0.4939209520816803, "learning_rate": 6.932287562903926e-06, "loss": 0.4072, "step": 25249 }, { "epoch": 1.158735257675187, "grad_norm": 0.48353150486946106, "learning_rate": 6.932061422006049e-06, "loss": 0.3673, "step": 25250 }, { "epoch": 1.1587811481804415, "grad_norm": 0.5781121253967285, "learning_rate": 6.931835276462173e-06, "loss": 0.4524, "step": 25251 }, { "epoch": 1.158827038685696, "grad_norm": 0.4806913733482361, "learning_rate": 6.931609126272844e-06, "loss": 0.4372, "step": 25252 }, { "epoch": 1.1588729291909503, "grad_norm": 0.47583240270614624, "learning_rate": 6.931382971438605e-06, "loss": 0.325, "step": 25253 }, { "epoch": 1.1589188196962048, "grad_norm": 0.45305535197257996, "learning_rate": 6.931156811959999e-06, "loss": 0.3454, "step": 25254 }, { "epoch": 1.1589647102014593, "grad_norm": 0.468100905418396, "learning_rate": 6.930930647837571e-06, "loss": 0.4075, "step": 25255 }, { "epoch": 1.1590106007067138, "grad_norm": 0.4892304241657257, "learning_rate": 6.9307044790718625e-06, "loss": 0.4038, "step": 25256 }, { "epoch": 1.1590564912119683, "grad_norm": 0.4849899709224701, "learning_rate": 6.9304783056634194e-06, "loss": 0.4407, "step": 25257 }, { "epoch": 1.1591023817172228, "grad_norm": 0.46210673451423645, "learning_rate": 6.930252127612787e-06, "loss": 0.3722, "step": 25258 }, { "epoch": 1.159148272222477, "grad_norm": 0.5115764141082764, "learning_rate": 6.930025944920506e-06, "loss": 0.4995, "step": 25259 }, { "epoch": 1.1591941627277316, "grad_norm": 0.44128477573394775, "learning_rate": 6.9297997575871226e-06, "loss": 0.2958, "step": 25260 }, { "epoch": 1.159240053232986, "grad_norm": 0.4857756197452545, "learning_rate": 6.929573565613179e-06, "loss": 0.4311, "step": 25261 }, { "epoch": 1.1592859437382406, "grad_norm": 0.4294176995754242, "learning_rate": 6.929347368999221e-06, "loss": 0.2754, "step": 25262 }, { "epoch": 1.159331834243495, "grad_norm": 0.46627533435821533, "learning_rate": 6.929121167745789e-06, "loss": 0.3151, "step": 25263 }, { "epoch": 1.1593777247487496, "grad_norm": 0.477245956659317, "learning_rate": 6.928894961853433e-06, "loss": 0.3455, "step": 25264 }, { "epoch": 1.159423615254004, "grad_norm": 0.47511130571365356, "learning_rate": 6.928668751322692e-06, "loss": 0.3531, "step": 25265 }, { "epoch": 1.1594695057592583, "grad_norm": 0.4946886897087097, "learning_rate": 6.92844253615411e-06, "loss": 0.4103, "step": 25266 }, { "epoch": 1.1595153962645128, "grad_norm": 0.45668497681617737, "learning_rate": 6.928216316348234e-06, "loss": 0.3086, "step": 25267 }, { "epoch": 1.1595612867697673, "grad_norm": 0.5014060139656067, "learning_rate": 6.9279900919056054e-06, "loss": 0.4122, "step": 25268 }, { "epoch": 1.1596071772750218, "grad_norm": 0.47885796427726746, "learning_rate": 6.927763862826768e-06, "loss": 0.3637, "step": 25269 }, { "epoch": 1.1596530677802763, "grad_norm": 0.4194374084472656, "learning_rate": 6.92753762911227e-06, "loss": 0.2952, "step": 25270 }, { "epoch": 1.1596989582855306, "grad_norm": 0.4008837342262268, "learning_rate": 6.927311390762651e-06, "loss": 0.2415, "step": 25271 }, { "epoch": 1.159744848790785, "grad_norm": 0.5092943906784058, "learning_rate": 6.927085147778456e-06, "loss": 0.3767, "step": 25272 }, { "epoch": 1.1597907392960396, "grad_norm": 0.4472348392009735, "learning_rate": 6.926858900160229e-06, "loss": 0.2944, "step": 25273 }, { "epoch": 1.159836629801294, "grad_norm": 0.5028942227363586, "learning_rate": 6.926632647908516e-06, "loss": 0.4052, "step": 25274 }, { "epoch": 1.1598825203065486, "grad_norm": 0.434877872467041, "learning_rate": 6.926406391023858e-06, "loss": 0.3144, "step": 25275 }, { "epoch": 1.159928410811803, "grad_norm": 0.517604649066925, "learning_rate": 6.926180129506801e-06, "loss": 0.4569, "step": 25276 }, { "epoch": 1.1599743013170576, "grad_norm": 0.5182663202285767, "learning_rate": 6.925953863357888e-06, "loss": 0.4224, "step": 25277 }, { "epoch": 1.160020191822312, "grad_norm": 0.46685880422592163, "learning_rate": 6.925727592577666e-06, "loss": 0.347, "step": 25278 }, { "epoch": 1.1600660823275664, "grad_norm": 0.5070213675498962, "learning_rate": 6.925501317166675e-06, "loss": 0.4703, "step": 25279 }, { "epoch": 1.1601119728328209, "grad_norm": 0.45030733942985535, "learning_rate": 6.92527503712546e-06, "loss": 0.3131, "step": 25280 }, { "epoch": 1.1601578633380754, "grad_norm": 0.46435922384262085, "learning_rate": 6.925048752454569e-06, "loss": 0.3737, "step": 25281 }, { "epoch": 1.1602037538433299, "grad_norm": 0.4952225387096405, "learning_rate": 6.924822463154544e-06, "loss": 0.4117, "step": 25282 }, { "epoch": 1.1602496443485844, "grad_norm": 0.4554086923599243, "learning_rate": 6.924596169225925e-06, "loss": 0.3335, "step": 25283 }, { "epoch": 1.1602955348538386, "grad_norm": 0.514150083065033, "learning_rate": 6.924369870669262e-06, "loss": 0.3974, "step": 25284 }, { "epoch": 1.1603414253590931, "grad_norm": 0.4414784610271454, "learning_rate": 6.9241435674850954e-06, "loss": 0.339, "step": 25285 }, { "epoch": 1.1603873158643476, "grad_norm": 0.458553671836853, "learning_rate": 6.923917259673971e-06, "loss": 0.3528, "step": 25286 }, { "epoch": 1.1604332063696021, "grad_norm": 0.5030497908592224, "learning_rate": 6.923690947236435e-06, "loss": 0.3953, "step": 25287 }, { "epoch": 1.1604790968748566, "grad_norm": 0.46241697669029236, "learning_rate": 6.9234646301730265e-06, "loss": 0.3075, "step": 25288 }, { "epoch": 1.1605249873801111, "grad_norm": 0.47066304087638855, "learning_rate": 6.923238308484295e-06, "loss": 0.3541, "step": 25289 }, { "epoch": 1.1605708778853656, "grad_norm": 0.5196420550346375, "learning_rate": 6.92301198217078e-06, "loss": 0.444, "step": 25290 }, { "epoch": 1.16061676839062, "grad_norm": 0.46147480607032776, "learning_rate": 6.922785651233029e-06, "loss": 0.3269, "step": 25291 }, { "epoch": 1.1606626588958744, "grad_norm": 0.4913525879383087, "learning_rate": 6.922559315671586e-06, "loss": 0.3687, "step": 25292 }, { "epoch": 1.1607085494011289, "grad_norm": 0.497078001499176, "learning_rate": 6.922332975486994e-06, "loss": 0.3894, "step": 25293 }, { "epoch": 1.1607544399063834, "grad_norm": 0.5111665725708008, "learning_rate": 6.922106630679798e-06, "loss": 0.4513, "step": 25294 }, { "epoch": 1.1608003304116379, "grad_norm": 0.44512027502059937, "learning_rate": 6.921880281250543e-06, "loss": 0.3359, "step": 25295 }, { "epoch": 1.1608462209168924, "grad_norm": 0.46477943658828735, "learning_rate": 6.921653927199771e-06, "loss": 0.3589, "step": 25296 }, { "epoch": 1.1608921114221467, "grad_norm": 0.4413382411003113, "learning_rate": 6.921427568528029e-06, "loss": 0.3229, "step": 25297 }, { "epoch": 1.1609380019274012, "grad_norm": 0.432475209236145, "learning_rate": 6.9212012052358605e-06, "loss": 0.3076, "step": 25298 }, { "epoch": 1.1609838924326557, "grad_norm": 0.4439021050930023, "learning_rate": 6.9209748373238076e-06, "loss": 0.2889, "step": 25299 }, { "epoch": 1.1610297829379101, "grad_norm": 0.5060281753540039, "learning_rate": 6.920748464792417e-06, "loss": 0.4224, "step": 25300 }, { "epoch": 1.1610756734431646, "grad_norm": 0.46297165751457214, "learning_rate": 6.920522087642234e-06, "loss": 0.3559, "step": 25301 }, { "epoch": 1.1611215639484191, "grad_norm": 0.45897209644317627, "learning_rate": 6.9202957058738e-06, "loss": 0.2871, "step": 25302 }, { "epoch": 1.1611674544536736, "grad_norm": 0.44980618357658386, "learning_rate": 6.920069319487661e-06, "loss": 0.3623, "step": 25303 }, { "epoch": 1.161213344958928, "grad_norm": 0.46277329325675964, "learning_rate": 6.919842928484362e-06, "loss": 0.3662, "step": 25304 }, { "epoch": 1.1612592354641824, "grad_norm": 0.4254900813102722, "learning_rate": 6.919616532864446e-06, "loss": 0.2728, "step": 25305 }, { "epoch": 1.161305125969437, "grad_norm": 0.49500778317451477, "learning_rate": 6.91939013262846e-06, "loss": 0.4009, "step": 25306 }, { "epoch": 1.1613510164746914, "grad_norm": 0.48209160566329956, "learning_rate": 6.919163727776944e-06, "loss": 0.3866, "step": 25307 }, { "epoch": 1.161396906979946, "grad_norm": 0.44400402903556824, "learning_rate": 6.918937318310447e-06, "loss": 0.3085, "step": 25308 }, { "epoch": 1.1614427974852004, "grad_norm": 0.4763474762439728, "learning_rate": 6.9187109042295104e-06, "loss": 0.3417, "step": 25309 }, { "epoch": 1.1614886879904547, "grad_norm": 0.4443283677101135, "learning_rate": 6.918484485534679e-06, "loss": 0.298, "step": 25310 }, { "epoch": 1.1615345784957092, "grad_norm": 0.45248904824256897, "learning_rate": 6.918258062226498e-06, "loss": 0.3769, "step": 25311 }, { "epoch": 1.1615804690009637, "grad_norm": 0.4916403293609619, "learning_rate": 6.918031634305513e-06, "loss": 0.3548, "step": 25312 }, { "epoch": 1.1616263595062182, "grad_norm": 0.4852418005466461, "learning_rate": 6.917805201772265e-06, "loss": 0.3904, "step": 25313 }, { "epoch": 1.1616722500114727, "grad_norm": 0.4769500195980072, "learning_rate": 6.917578764627304e-06, "loss": 0.3941, "step": 25314 }, { "epoch": 1.1617181405167272, "grad_norm": 0.48406538367271423, "learning_rate": 6.917352322871169e-06, "loss": 0.3713, "step": 25315 }, { "epoch": 1.1617640310219817, "grad_norm": 0.44756704568862915, "learning_rate": 6.917125876504407e-06, "loss": 0.3288, "step": 25316 }, { "epoch": 1.161809921527236, "grad_norm": 0.5144450664520264, "learning_rate": 6.916899425527562e-06, "loss": 0.4499, "step": 25317 }, { "epoch": 1.1618558120324904, "grad_norm": 0.4784705340862274, "learning_rate": 6.9166729699411805e-06, "loss": 0.3915, "step": 25318 }, { "epoch": 1.161901702537745, "grad_norm": 0.45593640208244324, "learning_rate": 6.9164465097458045e-06, "loss": 0.3221, "step": 25319 }, { "epoch": 1.1619475930429994, "grad_norm": 0.4609876275062561, "learning_rate": 6.916220044941979e-06, "loss": 0.3682, "step": 25320 }, { "epoch": 1.161993483548254, "grad_norm": 0.47072482109069824, "learning_rate": 6.915993575530251e-06, "loss": 0.382, "step": 25321 }, { "epoch": 1.1620393740535082, "grad_norm": 0.4588839113712311, "learning_rate": 6.9157671015111606e-06, "loss": 0.3661, "step": 25322 }, { "epoch": 1.1620852645587627, "grad_norm": 0.4348493814468384, "learning_rate": 6.915540622885257e-06, "loss": 0.2828, "step": 25323 }, { "epoch": 1.1621311550640172, "grad_norm": 0.43835312128067017, "learning_rate": 6.915314139653081e-06, "loss": 0.3583, "step": 25324 }, { "epoch": 1.1621770455692717, "grad_norm": 0.4591067135334015, "learning_rate": 6.91508765181518e-06, "loss": 0.363, "step": 25325 }, { "epoch": 1.1622229360745262, "grad_norm": 0.4887416660785675, "learning_rate": 6.914861159372098e-06, "loss": 0.4255, "step": 25326 }, { "epoch": 1.1622688265797807, "grad_norm": 0.4880152940750122, "learning_rate": 6.9146346623243785e-06, "loss": 0.41, "step": 25327 }, { "epoch": 1.1623147170850352, "grad_norm": 0.4598153531551361, "learning_rate": 6.914408160672566e-06, "loss": 0.3955, "step": 25328 }, { "epoch": 1.1623606075902895, "grad_norm": 0.4725790321826935, "learning_rate": 6.914181654417208e-06, "loss": 0.3399, "step": 25329 }, { "epoch": 1.162406498095544, "grad_norm": 0.4594027101993561, "learning_rate": 6.913955143558847e-06, "loss": 0.388, "step": 25330 }, { "epoch": 1.1624523886007985, "grad_norm": 0.4589981734752655, "learning_rate": 6.913728628098027e-06, "loss": 0.3236, "step": 25331 }, { "epoch": 1.162498279106053, "grad_norm": 0.45130136609077454, "learning_rate": 6.913502108035295e-06, "loss": 0.3166, "step": 25332 }, { "epoch": 1.1625441696113075, "grad_norm": 0.5136481523513794, "learning_rate": 6.913275583371193e-06, "loss": 0.4793, "step": 25333 }, { "epoch": 1.162590060116562, "grad_norm": 0.4754766821861267, "learning_rate": 6.913049054106267e-06, "loss": 0.3851, "step": 25334 }, { "epoch": 1.1626359506218162, "grad_norm": 0.47055861353874207, "learning_rate": 6.912822520241063e-06, "loss": 0.338, "step": 25335 }, { "epoch": 1.1626818411270707, "grad_norm": 0.48005107045173645, "learning_rate": 6.912595981776124e-06, "loss": 0.3637, "step": 25336 }, { "epoch": 1.1627277316323252, "grad_norm": 0.4906352162361145, "learning_rate": 6.912369438711994e-06, "loss": 0.3415, "step": 25337 }, { "epoch": 1.1627736221375797, "grad_norm": 0.45833200216293335, "learning_rate": 6.91214289104922e-06, "loss": 0.3221, "step": 25338 }, { "epoch": 1.1628195126428342, "grad_norm": 0.4579266607761383, "learning_rate": 6.911916338788346e-06, "loss": 0.3306, "step": 25339 }, { "epoch": 1.1628654031480887, "grad_norm": 0.4561145305633545, "learning_rate": 6.911689781929916e-06, "loss": 0.3723, "step": 25340 }, { "epoch": 1.1629112936533432, "grad_norm": 0.4649745225906372, "learning_rate": 6.911463220474477e-06, "loss": 0.3454, "step": 25341 }, { "epoch": 1.1629571841585975, "grad_norm": 0.5232505798339844, "learning_rate": 6.9112366544225705e-06, "loss": 0.4065, "step": 25342 }, { "epoch": 1.163003074663852, "grad_norm": 0.4681083858013153, "learning_rate": 6.911010083774745e-06, "loss": 0.3952, "step": 25343 }, { "epoch": 1.1630489651691065, "grad_norm": 0.5087646245956421, "learning_rate": 6.910783508531542e-06, "loss": 0.4567, "step": 25344 }, { "epoch": 1.163094855674361, "grad_norm": 0.46805962920188904, "learning_rate": 6.910556928693507e-06, "loss": 0.3326, "step": 25345 }, { "epoch": 1.1631407461796155, "grad_norm": 0.4606439471244812, "learning_rate": 6.9103303442611865e-06, "loss": 0.3613, "step": 25346 }, { "epoch": 1.16318663668487, "grad_norm": 0.42853936553001404, "learning_rate": 6.910103755235125e-06, "loss": 0.2664, "step": 25347 }, { "epoch": 1.1632325271901243, "grad_norm": 0.4997710883617401, "learning_rate": 6.909877161615865e-06, "loss": 0.3501, "step": 25348 }, { "epoch": 1.1632784176953788, "grad_norm": 0.5081593990325928, "learning_rate": 6.9096505634039555e-06, "loss": 0.3867, "step": 25349 }, { "epoch": 1.1633243082006333, "grad_norm": 0.48145991563796997, "learning_rate": 6.909423960599938e-06, "loss": 0.4209, "step": 25350 }, { "epoch": 1.1633701987058878, "grad_norm": 0.5045231580734253, "learning_rate": 6.9091973532043584e-06, "loss": 0.4229, "step": 25351 }, { "epoch": 1.1634160892111423, "grad_norm": 0.48374122381210327, "learning_rate": 6.908970741217763e-06, "loss": 0.3364, "step": 25352 }, { "epoch": 1.1634619797163968, "grad_norm": 0.46208885312080383, "learning_rate": 6.908744124640694e-06, "loss": 0.3114, "step": 25353 }, { "epoch": 1.1635078702216513, "grad_norm": 0.49200767278671265, "learning_rate": 6.908517503473699e-06, "loss": 0.3584, "step": 25354 }, { "epoch": 1.1635537607269055, "grad_norm": 0.5330519080162048, "learning_rate": 6.908290877717319e-06, "loss": 0.4082, "step": 25355 }, { "epoch": 1.16359965123216, "grad_norm": 0.4337038993835449, "learning_rate": 6.908064247372104e-06, "loss": 0.3094, "step": 25356 }, { "epoch": 1.1636455417374145, "grad_norm": 0.4693990647792816, "learning_rate": 6.907837612438597e-06, "loss": 0.4038, "step": 25357 }, { "epoch": 1.163691432242669, "grad_norm": 0.4843118190765381, "learning_rate": 6.907610972917343e-06, "loss": 0.4056, "step": 25358 }, { "epoch": 1.1637373227479235, "grad_norm": 0.47962963581085205, "learning_rate": 6.9073843288088835e-06, "loss": 0.3479, "step": 25359 }, { "epoch": 1.1637832132531778, "grad_norm": 0.49168434739112854, "learning_rate": 6.9071576801137696e-06, "loss": 0.3839, "step": 25360 }, { "epoch": 1.1638291037584323, "grad_norm": 0.4488421678543091, "learning_rate": 6.9069310268325415e-06, "loss": 0.341, "step": 25361 }, { "epoch": 1.1638749942636868, "grad_norm": 0.5133140087127686, "learning_rate": 6.906704368965749e-06, "loss": 0.4209, "step": 25362 }, { "epoch": 1.1639208847689413, "grad_norm": 0.44302132725715637, "learning_rate": 6.906477706513932e-06, "loss": 0.3569, "step": 25363 }, { "epoch": 1.1639667752741958, "grad_norm": 0.46309328079223633, "learning_rate": 6.906251039477639e-06, "loss": 0.3753, "step": 25364 }, { "epoch": 1.1640126657794503, "grad_norm": 0.5443177223205566, "learning_rate": 6.906024367857411e-06, "loss": 0.4908, "step": 25365 }, { "epoch": 1.1640585562847048, "grad_norm": 0.5212720036506653, "learning_rate": 6.9057976916537995e-06, "loss": 0.4408, "step": 25366 }, { "epoch": 1.1641044467899593, "grad_norm": 0.4701431095600128, "learning_rate": 6.905571010867345e-06, "loss": 0.3286, "step": 25367 }, { "epoch": 1.1641503372952136, "grad_norm": 0.508514940738678, "learning_rate": 6.905344325498593e-06, "loss": 0.4153, "step": 25368 }, { "epoch": 1.164196227800468, "grad_norm": 0.46630316972732544, "learning_rate": 6.9051176355480895e-06, "loss": 0.3447, "step": 25369 }, { "epoch": 1.1642421183057226, "grad_norm": 0.4890194535255432, "learning_rate": 6.904890941016379e-06, "loss": 0.3874, "step": 25370 }, { "epoch": 1.164288008810977, "grad_norm": 0.4448188543319702, "learning_rate": 6.904664241904008e-06, "loss": 0.3095, "step": 25371 }, { "epoch": 1.1643338993162315, "grad_norm": 0.45271459221839905, "learning_rate": 6.904437538211519e-06, "loss": 0.3151, "step": 25372 }, { "epoch": 1.1643797898214858, "grad_norm": 0.48518118262290955, "learning_rate": 6.9042108299394604e-06, "loss": 0.3309, "step": 25373 }, { "epoch": 1.1644256803267403, "grad_norm": 0.47770512104034424, "learning_rate": 6.903984117088376e-06, "loss": 0.3442, "step": 25374 }, { "epoch": 1.1644715708319948, "grad_norm": 0.5800608992576599, "learning_rate": 6.9037573996588095e-06, "loss": 0.3897, "step": 25375 }, { "epoch": 1.1645174613372493, "grad_norm": 0.4707186222076416, "learning_rate": 6.903530677651307e-06, "loss": 0.342, "step": 25376 }, { "epoch": 1.1645633518425038, "grad_norm": 0.42263123393058777, "learning_rate": 6.903303951066416e-06, "loss": 0.3032, "step": 25377 }, { "epoch": 1.1646092423477583, "grad_norm": 0.4945104718208313, "learning_rate": 6.903077219904677e-06, "loss": 0.3525, "step": 25378 }, { "epoch": 1.1646551328530128, "grad_norm": 0.4968191087245941, "learning_rate": 6.90285048416664e-06, "loss": 0.3627, "step": 25379 }, { "epoch": 1.164701023358267, "grad_norm": 0.47359541058540344, "learning_rate": 6.902623743852848e-06, "loss": 0.3624, "step": 25380 }, { "epoch": 1.1647469138635216, "grad_norm": 0.4627218246459961, "learning_rate": 6.902396998963845e-06, "loss": 0.3408, "step": 25381 }, { "epoch": 1.164792804368776, "grad_norm": 0.46943625807762146, "learning_rate": 6.902170249500178e-06, "loss": 0.3878, "step": 25382 }, { "epoch": 1.1648386948740306, "grad_norm": 0.45839858055114746, "learning_rate": 6.901943495462392e-06, "loss": 0.3514, "step": 25383 }, { "epoch": 1.164884585379285, "grad_norm": 0.46193328499794006, "learning_rate": 6.901716736851033e-06, "loss": 0.3098, "step": 25384 }, { "epoch": 1.1649304758845396, "grad_norm": 0.4588625133037567, "learning_rate": 6.9014899736666444e-06, "loss": 0.3299, "step": 25385 }, { "epoch": 1.1649763663897938, "grad_norm": 0.45867034792900085, "learning_rate": 6.901263205909774e-06, "loss": 0.3076, "step": 25386 }, { "epoch": 1.1650222568950483, "grad_norm": 0.4793674051761627, "learning_rate": 6.901036433580965e-06, "loss": 0.3713, "step": 25387 }, { "epoch": 1.1650681474003028, "grad_norm": 0.4481409192085266, "learning_rate": 6.9008096566807624e-06, "loss": 0.3374, "step": 25388 }, { "epoch": 1.1651140379055573, "grad_norm": 0.506051242351532, "learning_rate": 6.900582875209713e-06, "loss": 0.4426, "step": 25389 }, { "epoch": 1.1651599284108118, "grad_norm": 0.43087759613990784, "learning_rate": 6.900356089168362e-06, "loss": 0.2899, "step": 25390 }, { "epoch": 1.1652058189160663, "grad_norm": 0.46279680728912354, "learning_rate": 6.900129298557254e-06, "loss": 0.3211, "step": 25391 }, { "epoch": 1.1652517094213208, "grad_norm": 0.4506016671657562, "learning_rate": 6.899902503376935e-06, "loss": 0.2686, "step": 25392 }, { "epoch": 1.1652975999265751, "grad_norm": 0.48673027753829956, "learning_rate": 6.899675703627949e-06, "loss": 0.439, "step": 25393 }, { "epoch": 1.1653434904318296, "grad_norm": 0.4714841842651367, "learning_rate": 6.899448899310844e-06, "loss": 0.3418, "step": 25394 }, { "epoch": 1.165389380937084, "grad_norm": 0.5234691500663757, "learning_rate": 6.8992220904261645e-06, "loss": 0.4416, "step": 25395 }, { "epoch": 1.1654352714423386, "grad_norm": 0.4563394784927368, "learning_rate": 6.8989952769744535e-06, "loss": 0.2863, "step": 25396 }, { "epoch": 1.165481161947593, "grad_norm": 0.453146368265152, "learning_rate": 6.898768458956258e-06, "loss": 0.283, "step": 25397 }, { "epoch": 1.1655270524528476, "grad_norm": 0.4867042303085327, "learning_rate": 6.898541636372124e-06, "loss": 0.3822, "step": 25398 }, { "epoch": 1.1655729429581019, "grad_norm": 0.49862805008888245, "learning_rate": 6.898314809222597e-06, "loss": 0.4114, "step": 25399 }, { "epoch": 1.1656188334633564, "grad_norm": 0.42709067463874817, "learning_rate": 6.898087977508223e-06, "loss": 0.3014, "step": 25400 }, { "epoch": 1.1656647239686109, "grad_norm": 0.4268510341644287, "learning_rate": 6.897861141229546e-06, "loss": 0.2818, "step": 25401 }, { "epoch": 1.1657106144738654, "grad_norm": 0.4229263365268707, "learning_rate": 6.897634300387111e-06, "loss": 0.2771, "step": 25402 }, { "epoch": 1.1657565049791199, "grad_norm": 0.4434099793434143, "learning_rate": 6.897407454981465e-06, "loss": 0.2923, "step": 25403 }, { "epoch": 1.1658023954843744, "grad_norm": 0.5200785398483276, "learning_rate": 6.8971806050131515e-06, "loss": 0.3991, "step": 25404 }, { "epoch": 1.1658482859896289, "grad_norm": 0.4354478418827057, "learning_rate": 6.89695375048272e-06, "loss": 0.2845, "step": 25405 }, { "epoch": 1.1658941764948831, "grad_norm": 0.4833483397960663, "learning_rate": 6.896726891390712e-06, "loss": 0.3548, "step": 25406 }, { "epoch": 1.1659400670001376, "grad_norm": 0.4884061813354492, "learning_rate": 6.896500027737674e-06, "loss": 0.3761, "step": 25407 }, { "epoch": 1.1659859575053921, "grad_norm": 0.5036185383796692, "learning_rate": 6.896273159524154e-06, "loss": 0.4164, "step": 25408 }, { "epoch": 1.1660318480106466, "grad_norm": 0.4726350009441376, "learning_rate": 6.896046286750692e-06, "loss": 0.3286, "step": 25409 }, { "epoch": 1.1660777385159011, "grad_norm": 0.4565579295158386, "learning_rate": 6.89581940941784e-06, "loss": 0.3271, "step": 25410 }, { "epoch": 1.1661236290211554, "grad_norm": 0.4399621784687042, "learning_rate": 6.895592527526141e-06, "loss": 0.3112, "step": 25411 }, { "epoch": 1.16616951952641, "grad_norm": 0.5009722709655762, "learning_rate": 6.895365641076139e-06, "loss": 0.3912, "step": 25412 }, { "epoch": 1.1662154100316644, "grad_norm": 0.46689409017562866, "learning_rate": 6.89513875006838e-06, "loss": 0.3777, "step": 25413 }, { "epoch": 1.166261300536919, "grad_norm": 0.4010829031467438, "learning_rate": 6.894911854503412e-06, "loss": 0.2553, "step": 25414 }, { "epoch": 1.1663071910421734, "grad_norm": 0.523944616317749, "learning_rate": 6.894684954381779e-06, "loss": 0.473, "step": 25415 }, { "epoch": 1.166353081547428, "grad_norm": 0.46361881494522095, "learning_rate": 6.8944580497040246e-06, "loss": 0.3912, "step": 25416 }, { "epoch": 1.1663989720526824, "grad_norm": 0.4904778003692627, "learning_rate": 6.8942311404706995e-06, "loss": 0.3859, "step": 25417 }, { "epoch": 1.1664448625579367, "grad_norm": 0.46722307801246643, "learning_rate": 6.894004226682344e-06, "loss": 0.3558, "step": 25418 }, { "epoch": 1.1664907530631912, "grad_norm": 0.4635440707206726, "learning_rate": 6.893777308339506e-06, "loss": 0.3694, "step": 25419 }, { "epoch": 1.1665366435684457, "grad_norm": 0.45665106177330017, "learning_rate": 6.893550385442733e-06, "loss": 0.3747, "step": 25420 }, { "epoch": 1.1665825340737002, "grad_norm": 0.4173048138618469, "learning_rate": 6.893323457992566e-06, "loss": 0.273, "step": 25421 }, { "epoch": 1.1666284245789547, "grad_norm": 0.45921358466148376, "learning_rate": 6.893096525989557e-06, "loss": 0.3133, "step": 25422 }, { "epoch": 1.1666743150842092, "grad_norm": 0.4952046573162079, "learning_rate": 6.892869589434248e-06, "loss": 0.4131, "step": 25423 }, { "epoch": 1.1667202055894634, "grad_norm": 0.4500468969345093, "learning_rate": 6.892642648327184e-06, "loss": 0.3685, "step": 25424 }, { "epoch": 1.166766096094718, "grad_norm": 0.4682065546512604, "learning_rate": 6.892415702668912e-06, "loss": 0.3819, "step": 25425 }, { "epoch": 1.1668119865999724, "grad_norm": 0.47604915499687195, "learning_rate": 6.892188752459976e-06, "loss": 0.3593, "step": 25426 }, { "epoch": 1.166857877105227, "grad_norm": 0.45105987787246704, "learning_rate": 6.8919617977009245e-06, "loss": 0.3524, "step": 25427 }, { "epoch": 1.1669037676104814, "grad_norm": 0.43191421031951904, "learning_rate": 6.891734838392303e-06, "loss": 0.2924, "step": 25428 }, { "epoch": 1.166949658115736, "grad_norm": 0.5031294226646423, "learning_rate": 6.891507874534655e-06, "loss": 0.3758, "step": 25429 }, { "epoch": 1.1669955486209904, "grad_norm": 0.5148729681968689, "learning_rate": 6.891280906128528e-06, "loss": 0.4353, "step": 25430 }, { "epoch": 1.1670414391262447, "grad_norm": 0.45822030305862427, "learning_rate": 6.891053933174467e-06, "loss": 0.3123, "step": 25431 }, { "epoch": 1.1670873296314992, "grad_norm": 0.48069366812705994, "learning_rate": 6.890826955673019e-06, "loss": 0.423, "step": 25432 }, { "epoch": 1.1671332201367537, "grad_norm": 0.43992629647254944, "learning_rate": 6.890599973624729e-06, "loss": 0.3065, "step": 25433 }, { "epoch": 1.1671791106420082, "grad_norm": 0.47621071338653564, "learning_rate": 6.890372987030141e-06, "loss": 0.3418, "step": 25434 }, { "epoch": 1.1672250011472627, "grad_norm": 0.4598264694213867, "learning_rate": 6.890145995889804e-06, "loss": 0.3401, "step": 25435 }, { "epoch": 1.1672708916525172, "grad_norm": 0.4815293550491333, "learning_rate": 6.889919000204263e-06, "loss": 0.3944, "step": 25436 }, { "epoch": 1.1673167821577715, "grad_norm": 0.4852719008922577, "learning_rate": 6.889691999974062e-06, "loss": 0.377, "step": 25437 }, { "epoch": 1.167362672663026, "grad_norm": 0.44850438833236694, "learning_rate": 6.889464995199747e-06, "loss": 0.3293, "step": 25438 }, { "epoch": 1.1674085631682805, "grad_norm": 0.490010529756546, "learning_rate": 6.889237985881868e-06, "loss": 0.3993, "step": 25439 }, { "epoch": 1.167454453673535, "grad_norm": 0.4726997911930084, "learning_rate": 6.889010972020967e-06, "loss": 0.3986, "step": 25440 }, { "epoch": 1.1675003441787895, "grad_norm": 0.532572865486145, "learning_rate": 6.888783953617588e-06, "loss": 0.359, "step": 25441 }, { "epoch": 1.167546234684044, "grad_norm": 0.4604973793029785, "learning_rate": 6.888556930672283e-06, "loss": 0.3485, "step": 25442 }, { "epoch": 1.1675921251892984, "grad_norm": 0.4495506286621094, "learning_rate": 6.888329903185593e-06, "loss": 0.2871, "step": 25443 }, { "epoch": 1.1676380156945527, "grad_norm": 0.45247775316238403, "learning_rate": 6.888102871158067e-06, "loss": 0.3337, "step": 25444 }, { "epoch": 1.1676839061998072, "grad_norm": 0.4888607859611511, "learning_rate": 6.887875834590249e-06, "loss": 0.3697, "step": 25445 }, { "epoch": 1.1677297967050617, "grad_norm": 0.46442824602127075, "learning_rate": 6.8876487934826845e-06, "loss": 0.3537, "step": 25446 }, { "epoch": 1.1677756872103162, "grad_norm": 0.44359052181243896, "learning_rate": 6.88742174783592e-06, "loss": 0.2998, "step": 25447 }, { "epoch": 1.1678215777155707, "grad_norm": 0.47937047481536865, "learning_rate": 6.887194697650503e-06, "loss": 0.3772, "step": 25448 }, { "epoch": 1.167867468220825, "grad_norm": 0.4643213450908661, "learning_rate": 6.886967642926978e-06, "loss": 0.347, "step": 25449 }, { "epoch": 1.1679133587260795, "grad_norm": 0.48892465233802795, "learning_rate": 6.8867405836658916e-06, "loss": 0.3745, "step": 25450 }, { "epoch": 1.167959249231334, "grad_norm": 0.4457699954509735, "learning_rate": 6.88651351986779e-06, "loss": 0.3598, "step": 25451 }, { "epoch": 1.1680051397365885, "grad_norm": 0.44912588596343994, "learning_rate": 6.886286451533219e-06, "loss": 0.3116, "step": 25452 }, { "epoch": 1.168051030241843, "grad_norm": 0.4273374676704407, "learning_rate": 6.8860593786627235e-06, "loss": 0.3425, "step": 25453 }, { "epoch": 1.1680969207470975, "grad_norm": 0.49525442719459534, "learning_rate": 6.8858323012568496e-06, "loss": 0.4119, "step": 25454 }, { "epoch": 1.168142811252352, "grad_norm": 0.463507741689682, "learning_rate": 6.885605219316146e-06, "loss": 0.4111, "step": 25455 }, { "epoch": 1.1681887017576065, "grad_norm": 0.5294719934463501, "learning_rate": 6.885378132841157e-06, "loss": 0.4825, "step": 25456 }, { "epoch": 1.1682345922628607, "grad_norm": 0.45793911814689636, "learning_rate": 6.885151041832427e-06, "loss": 0.3503, "step": 25457 }, { "epoch": 1.1682804827681152, "grad_norm": 0.4352853000164032, "learning_rate": 6.8849239462905045e-06, "loss": 0.3153, "step": 25458 }, { "epoch": 1.1683263732733697, "grad_norm": 0.47256094217300415, "learning_rate": 6.884696846215935e-06, "loss": 0.4032, "step": 25459 }, { "epoch": 1.1683722637786242, "grad_norm": 0.4533510208129883, "learning_rate": 6.884469741609264e-06, "loss": 0.3164, "step": 25460 }, { "epoch": 1.1684181542838787, "grad_norm": 0.45984402298927307, "learning_rate": 6.884242632471039e-06, "loss": 0.3677, "step": 25461 }, { "epoch": 1.168464044789133, "grad_norm": 0.5017122030258179, "learning_rate": 6.884015518801805e-06, "loss": 0.387, "step": 25462 }, { "epoch": 1.1685099352943875, "grad_norm": 0.42840832471847534, "learning_rate": 6.883788400602107e-06, "loss": 0.2877, "step": 25463 }, { "epoch": 1.168555825799642, "grad_norm": 0.4503520131111145, "learning_rate": 6.883561277872493e-06, "loss": 0.3155, "step": 25464 }, { "epoch": 1.1686017163048965, "grad_norm": 0.4706966280937195, "learning_rate": 6.883334150613509e-06, "loss": 0.3575, "step": 25465 }, { "epoch": 1.168647606810151, "grad_norm": 0.44303762912750244, "learning_rate": 6.883107018825701e-06, "loss": 0.3313, "step": 25466 }, { "epoch": 1.1686934973154055, "grad_norm": 0.4703931510448456, "learning_rate": 6.882879882509614e-06, "loss": 0.3647, "step": 25467 }, { "epoch": 1.16873938782066, "grad_norm": 0.4338523745536804, "learning_rate": 6.8826527416657964e-06, "loss": 0.3085, "step": 25468 }, { "epoch": 1.1687852783259143, "grad_norm": 0.46583548188209534, "learning_rate": 6.882425596294792e-06, "loss": 0.3921, "step": 25469 }, { "epoch": 1.1688311688311688, "grad_norm": 0.4471648931503296, "learning_rate": 6.882198446397148e-06, "loss": 0.3341, "step": 25470 }, { "epoch": 1.1688770593364233, "grad_norm": 0.4528176784515381, "learning_rate": 6.881971291973412e-06, "loss": 0.3289, "step": 25471 }, { "epoch": 1.1689229498416778, "grad_norm": 0.46092653274536133, "learning_rate": 6.881744133024128e-06, "loss": 0.3467, "step": 25472 }, { "epoch": 1.1689688403469323, "grad_norm": 0.48597103357315063, "learning_rate": 6.881516969549844e-06, "loss": 0.4126, "step": 25473 }, { "epoch": 1.1690147308521868, "grad_norm": 0.47000548243522644, "learning_rate": 6.881289801551105e-06, "loss": 0.3514, "step": 25474 }, { "epoch": 1.169060621357441, "grad_norm": 0.4788336157798767, "learning_rate": 6.881062629028458e-06, "loss": 0.3649, "step": 25475 }, { "epoch": 1.1691065118626955, "grad_norm": 0.4651496410369873, "learning_rate": 6.880835451982448e-06, "loss": 0.3849, "step": 25476 }, { "epoch": 1.16915240236795, "grad_norm": 0.4732339084148407, "learning_rate": 6.880608270413624e-06, "loss": 0.3713, "step": 25477 }, { "epoch": 1.1691982928732045, "grad_norm": 0.4777868390083313, "learning_rate": 6.88038108432253e-06, "loss": 0.3983, "step": 25478 }, { "epoch": 1.169244183378459, "grad_norm": 0.4273369014263153, "learning_rate": 6.880153893709714e-06, "loss": 0.2879, "step": 25479 }, { "epoch": 1.1692900738837135, "grad_norm": 0.4664499759674072, "learning_rate": 6.879926698575718e-06, "loss": 0.3304, "step": 25480 }, { "epoch": 1.169335964388968, "grad_norm": 0.47480228543281555, "learning_rate": 6.879699498921094e-06, "loss": 0.3704, "step": 25481 }, { "epoch": 1.1693818548942223, "grad_norm": 0.4617302715778351, "learning_rate": 6.879472294746386e-06, "loss": 0.3677, "step": 25482 }, { "epoch": 1.1694277453994768, "grad_norm": 0.4861379563808441, "learning_rate": 6.879245086052141e-06, "loss": 0.3729, "step": 25483 }, { "epoch": 1.1694736359047313, "grad_norm": 0.4335375726222992, "learning_rate": 6.879017872838904e-06, "loss": 0.3147, "step": 25484 }, { "epoch": 1.1695195264099858, "grad_norm": 0.4581329822540283, "learning_rate": 6.8787906551072214e-06, "loss": 0.3758, "step": 25485 }, { "epoch": 1.1695654169152403, "grad_norm": 0.4525098204612732, "learning_rate": 6.87856343285764e-06, "loss": 0.3256, "step": 25486 }, { "epoch": 1.1696113074204948, "grad_norm": 0.4844927191734314, "learning_rate": 6.878336206090708e-06, "loss": 0.3723, "step": 25487 }, { "epoch": 1.169657197925749, "grad_norm": 0.48377251625061035, "learning_rate": 6.87810897480697e-06, "loss": 0.3942, "step": 25488 }, { "epoch": 1.1697030884310036, "grad_norm": 0.496649831533432, "learning_rate": 6.877881739006971e-06, "loss": 0.437, "step": 25489 }, { "epoch": 1.169748978936258, "grad_norm": 0.4764748513698578, "learning_rate": 6.8776544986912595e-06, "loss": 0.4128, "step": 25490 }, { "epoch": 1.1697948694415126, "grad_norm": 0.45460137724876404, "learning_rate": 6.877427253860382e-06, "loss": 0.3368, "step": 25491 }, { "epoch": 1.169840759946767, "grad_norm": 0.49834832549095154, "learning_rate": 6.877200004514885e-06, "loss": 0.3702, "step": 25492 }, { "epoch": 1.1698866504520216, "grad_norm": 0.49267178773880005, "learning_rate": 6.876972750655314e-06, "loss": 0.4545, "step": 25493 }, { "epoch": 1.169932540957276, "grad_norm": 0.467572420835495, "learning_rate": 6.876745492282216e-06, "loss": 0.3415, "step": 25494 }, { "epoch": 1.1699784314625303, "grad_norm": 0.40683358907699585, "learning_rate": 6.876518229396137e-06, "loss": 0.2571, "step": 25495 }, { "epoch": 1.1700243219677848, "grad_norm": 0.5418713092803955, "learning_rate": 6.876290961997625e-06, "loss": 0.5379, "step": 25496 }, { "epoch": 1.1700702124730393, "grad_norm": 0.4679352343082428, "learning_rate": 6.876063690087224e-06, "loss": 0.3703, "step": 25497 }, { "epoch": 1.1701161029782938, "grad_norm": 0.49951857328414917, "learning_rate": 6.8758364136654834e-06, "loss": 0.3856, "step": 25498 }, { "epoch": 1.1701619934835483, "grad_norm": 0.46701744198799133, "learning_rate": 6.875609132732947e-06, "loss": 0.3543, "step": 25499 }, { "epoch": 1.1702078839888026, "grad_norm": 0.4533466398715973, "learning_rate": 6.875381847290163e-06, "loss": 0.3401, "step": 25500 }, { "epoch": 1.170253774494057, "grad_norm": 0.47816556692123413, "learning_rate": 6.875154557337677e-06, "loss": 0.3493, "step": 25501 }, { "epoch": 1.1702996649993116, "grad_norm": 0.4744378328323364, "learning_rate": 6.874927262876038e-06, "loss": 0.3789, "step": 25502 }, { "epoch": 1.170345555504566, "grad_norm": 0.4572046995162964, "learning_rate": 6.874699963905789e-06, "loss": 0.3496, "step": 25503 }, { "epoch": 1.1703914460098206, "grad_norm": 0.5027403235435486, "learning_rate": 6.874472660427478e-06, "loss": 0.3166, "step": 25504 }, { "epoch": 1.170437336515075, "grad_norm": 0.47575706243515015, "learning_rate": 6.874245352441653e-06, "loss": 0.3914, "step": 25505 }, { "epoch": 1.1704832270203296, "grad_norm": 0.4718310832977295, "learning_rate": 6.874018039948859e-06, "loss": 0.3845, "step": 25506 }, { "epoch": 1.1705291175255839, "grad_norm": 0.4763953983783722, "learning_rate": 6.873790722949643e-06, "loss": 0.3839, "step": 25507 }, { "epoch": 1.1705750080308384, "grad_norm": 0.4770369529724121, "learning_rate": 6.873563401444552e-06, "loss": 0.3491, "step": 25508 }, { "epoch": 1.1706208985360929, "grad_norm": 0.47129103541374207, "learning_rate": 6.873336075434132e-06, "loss": 0.3365, "step": 25509 }, { "epoch": 1.1706667890413474, "grad_norm": 0.44112104177474976, "learning_rate": 6.873108744918931e-06, "loss": 0.2996, "step": 25510 }, { "epoch": 1.1707126795466019, "grad_norm": 0.45545729994773865, "learning_rate": 6.872881409899492e-06, "loss": 0.2885, "step": 25511 }, { "epoch": 1.1707585700518564, "grad_norm": 0.48032909631729126, "learning_rate": 6.8726540703763654e-06, "loss": 0.3838, "step": 25512 }, { "epoch": 1.1708044605571106, "grad_norm": 0.4595769941806793, "learning_rate": 6.872426726350098e-06, "loss": 0.3441, "step": 25513 }, { "epoch": 1.1708503510623651, "grad_norm": 0.5449520349502563, "learning_rate": 6.872199377821235e-06, "loss": 0.3887, "step": 25514 }, { "epoch": 1.1708962415676196, "grad_norm": 0.4461211562156677, "learning_rate": 6.871972024790323e-06, "loss": 0.2934, "step": 25515 }, { "epoch": 1.1709421320728741, "grad_norm": 0.46130767464637756, "learning_rate": 6.871744667257909e-06, "loss": 0.3278, "step": 25516 }, { "epoch": 1.1709880225781286, "grad_norm": 0.4761263132095337, "learning_rate": 6.87151730522454e-06, "loss": 0.3662, "step": 25517 }, { "epoch": 1.1710339130833831, "grad_norm": 0.42998868227005005, "learning_rate": 6.8712899386907625e-06, "loss": 0.2858, "step": 25518 }, { "epoch": 1.1710798035886376, "grad_norm": 0.4436224102973938, "learning_rate": 6.871062567657123e-06, "loss": 0.2914, "step": 25519 }, { "epoch": 1.171125694093892, "grad_norm": 0.5102339386940002, "learning_rate": 6.87083519212417e-06, "loss": 0.4416, "step": 25520 }, { "epoch": 1.1711715845991464, "grad_norm": 0.5048193335533142, "learning_rate": 6.870607812092448e-06, "loss": 0.385, "step": 25521 }, { "epoch": 1.1712174751044009, "grad_norm": 0.45603710412979126, "learning_rate": 6.870380427562503e-06, "loss": 0.3494, "step": 25522 }, { "epoch": 1.1712633656096554, "grad_norm": 0.46542298793792725, "learning_rate": 6.870153038534884e-06, "loss": 0.3968, "step": 25523 }, { "epoch": 1.1713092561149099, "grad_norm": 0.4728873670101166, "learning_rate": 6.869925645010138e-06, "loss": 0.3807, "step": 25524 }, { "epoch": 1.1713551466201644, "grad_norm": 0.4072057604789734, "learning_rate": 6.8696982469888104e-06, "loss": 0.2677, "step": 25525 }, { "epoch": 1.1714010371254187, "grad_norm": 0.479343056678772, "learning_rate": 6.86947084447145e-06, "loss": 0.3752, "step": 25526 }, { "epoch": 1.1714469276306732, "grad_norm": 0.4866633713245392, "learning_rate": 6.8692434374586016e-06, "loss": 0.4162, "step": 25527 }, { "epoch": 1.1714928181359276, "grad_norm": 0.4854663014411926, "learning_rate": 6.869016025950812e-06, "loss": 0.3819, "step": 25528 }, { "epoch": 1.1715387086411821, "grad_norm": 0.44497814774513245, "learning_rate": 6.868788609948629e-06, "loss": 0.3335, "step": 25529 }, { "epoch": 1.1715845991464366, "grad_norm": 0.4877415597438812, "learning_rate": 6.8685611894526e-06, "loss": 0.3773, "step": 25530 }, { "epoch": 1.1716304896516911, "grad_norm": 0.4903859794139862, "learning_rate": 6.8683337644632695e-06, "loss": 0.3153, "step": 25531 }, { "epoch": 1.1716763801569456, "grad_norm": 0.4834446609020233, "learning_rate": 6.868106334981186e-06, "loss": 0.3352, "step": 25532 }, { "epoch": 1.1717222706622, "grad_norm": 0.4454856812953949, "learning_rate": 6.867878901006898e-06, "loss": 0.3213, "step": 25533 }, { "epoch": 1.1717681611674544, "grad_norm": 0.4969598948955536, "learning_rate": 6.86765146254095e-06, "loss": 0.3812, "step": 25534 }, { "epoch": 1.171814051672709, "grad_norm": 0.4507230222225189, "learning_rate": 6.867424019583889e-06, "loss": 0.3424, "step": 25535 }, { "epoch": 1.1718599421779634, "grad_norm": 0.4771530330181122, "learning_rate": 6.867196572136263e-06, "loss": 0.3642, "step": 25536 }, { "epoch": 1.171905832683218, "grad_norm": 0.46808308362960815, "learning_rate": 6.8669691201986185e-06, "loss": 0.3594, "step": 25537 }, { "epoch": 1.1719517231884722, "grad_norm": 0.4721062183380127, "learning_rate": 6.866741663771505e-06, "loss": 0.3545, "step": 25538 }, { "epoch": 1.1719976136937267, "grad_norm": 0.4789985120296478, "learning_rate": 6.866514202855463e-06, "loss": 0.3668, "step": 25539 }, { "epoch": 1.1720435041989812, "grad_norm": 0.457113116979599, "learning_rate": 6.8662867374510445e-06, "loss": 0.3594, "step": 25540 }, { "epoch": 1.1720893947042357, "grad_norm": 0.4772973954677582, "learning_rate": 6.866059267558797e-06, "loss": 0.3169, "step": 25541 }, { "epoch": 1.1721352852094902, "grad_norm": 0.46762266755104065, "learning_rate": 6.865831793179265e-06, "loss": 0.3948, "step": 25542 }, { "epoch": 1.1721811757147447, "grad_norm": 0.4718179404735565, "learning_rate": 6.865604314312996e-06, "loss": 0.3292, "step": 25543 }, { "epoch": 1.1722270662199992, "grad_norm": 0.47368523478507996, "learning_rate": 6.865376830960538e-06, "loss": 0.3759, "step": 25544 }, { "epoch": 1.1722729567252537, "grad_norm": 0.4707144498825073, "learning_rate": 6.865149343122437e-06, "loss": 0.3933, "step": 25545 }, { "epoch": 1.172318847230508, "grad_norm": 0.47251012921333313, "learning_rate": 6.8649218507992395e-06, "loss": 0.3768, "step": 25546 }, { "epoch": 1.1723647377357624, "grad_norm": 0.44325485825538635, "learning_rate": 6.864694353991496e-06, "loss": 0.3352, "step": 25547 }, { "epoch": 1.172410628241017, "grad_norm": 0.4542827904224396, "learning_rate": 6.8644668526997506e-06, "loss": 0.341, "step": 25548 }, { "epoch": 1.1724565187462714, "grad_norm": 0.4626162350177765, "learning_rate": 6.86423934692455e-06, "loss": 0.3746, "step": 25549 }, { "epoch": 1.172502409251526, "grad_norm": 0.47882887721061707, "learning_rate": 6.864011836666442e-06, "loss": 0.3779, "step": 25550 }, { "epoch": 1.1725482997567802, "grad_norm": 0.47287145256996155, "learning_rate": 6.8637843219259735e-06, "loss": 0.3969, "step": 25551 }, { "epoch": 1.1725941902620347, "grad_norm": 0.4821719825267792, "learning_rate": 6.8635568027036926e-06, "loss": 0.3075, "step": 25552 }, { "epoch": 1.1726400807672892, "grad_norm": 0.42865365743637085, "learning_rate": 6.863329279000146e-06, "loss": 0.2649, "step": 25553 }, { "epoch": 1.1726859712725437, "grad_norm": 0.4683023989200592, "learning_rate": 6.86310175081588e-06, "loss": 0.3813, "step": 25554 }, { "epoch": 1.1727318617777982, "grad_norm": 0.5178685188293457, "learning_rate": 6.862874218151443e-06, "loss": 0.4604, "step": 25555 }, { "epoch": 1.1727777522830527, "grad_norm": 0.5211215615272522, "learning_rate": 6.8626466810073815e-06, "loss": 0.4368, "step": 25556 }, { "epoch": 1.1728236427883072, "grad_norm": 0.4917925000190735, "learning_rate": 6.862419139384241e-06, "loss": 0.3835, "step": 25557 }, { "epoch": 1.1728695332935615, "grad_norm": 0.456134557723999, "learning_rate": 6.862191593282572e-06, "loss": 0.324, "step": 25558 }, { "epoch": 1.172915423798816, "grad_norm": 0.4897567927837372, "learning_rate": 6.86196404270292e-06, "loss": 0.3592, "step": 25559 }, { "epoch": 1.1729613143040705, "grad_norm": 0.4345453977584839, "learning_rate": 6.86173648764583e-06, "loss": 0.2746, "step": 25560 }, { "epoch": 1.173007204809325, "grad_norm": 0.48432499170303345, "learning_rate": 6.861508928111853e-06, "loss": 0.3551, "step": 25561 }, { "epoch": 1.1730530953145795, "grad_norm": 0.44443199038505554, "learning_rate": 6.861281364101534e-06, "loss": 0.3203, "step": 25562 }, { "epoch": 1.173098985819834, "grad_norm": 0.4816974699497223, "learning_rate": 6.861053795615422e-06, "loss": 0.3677, "step": 25563 }, { "epoch": 1.1731448763250882, "grad_norm": 0.4888845384120941, "learning_rate": 6.860826222654062e-06, "loss": 0.383, "step": 25564 }, { "epoch": 1.1731907668303427, "grad_norm": 0.4559001922607422, "learning_rate": 6.860598645218002e-06, "loss": 0.3036, "step": 25565 }, { "epoch": 1.1732366573355972, "grad_norm": 0.5008277297019958, "learning_rate": 6.86037106330779e-06, "loss": 0.4219, "step": 25566 }, { "epoch": 1.1732825478408517, "grad_norm": 0.4746609926223755, "learning_rate": 6.860143476923973e-06, "loss": 0.3543, "step": 25567 }, { "epoch": 1.1733284383461062, "grad_norm": 0.46074244379997253, "learning_rate": 6.8599158860670964e-06, "loss": 0.3508, "step": 25568 }, { "epoch": 1.1733743288513607, "grad_norm": 0.4928177297115326, "learning_rate": 6.859688290737711e-06, "loss": 0.4059, "step": 25569 }, { "epoch": 1.1734202193566152, "grad_norm": 0.5102314949035645, "learning_rate": 6.8594606909363615e-06, "loss": 0.408, "step": 25570 }, { "epoch": 1.1734661098618695, "grad_norm": 0.46034544706344604, "learning_rate": 6.859233086663595e-06, "loss": 0.3238, "step": 25571 }, { "epoch": 1.173512000367124, "grad_norm": 0.4639015197753906, "learning_rate": 6.859005477919961e-06, "loss": 0.354, "step": 25572 }, { "epoch": 1.1735578908723785, "grad_norm": 0.48340776562690735, "learning_rate": 6.858777864706004e-06, "loss": 0.331, "step": 25573 }, { "epoch": 1.173603781377633, "grad_norm": 0.5085130333900452, "learning_rate": 6.858550247022276e-06, "loss": 0.3755, "step": 25574 }, { "epoch": 1.1736496718828875, "grad_norm": 0.489210307598114, "learning_rate": 6.858322624869318e-06, "loss": 0.345, "step": 25575 }, { "epoch": 1.173695562388142, "grad_norm": 0.4802702069282532, "learning_rate": 6.858094998247682e-06, "loss": 0.383, "step": 25576 }, { "epoch": 1.1737414528933963, "grad_norm": 0.4607854187488556, "learning_rate": 6.857867367157913e-06, "loss": 0.3596, "step": 25577 }, { "epoch": 1.1737873433986508, "grad_norm": 0.48864156007766724, "learning_rate": 6.85763973160056e-06, "loss": 0.3358, "step": 25578 }, { "epoch": 1.1738332339039053, "grad_norm": 0.47249406576156616, "learning_rate": 6.85741209157617e-06, "loss": 0.328, "step": 25579 }, { "epoch": 1.1738791244091598, "grad_norm": 0.46810007095336914, "learning_rate": 6.85718444708529e-06, "loss": 0.3267, "step": 25580 }, { "epoch": 1.1739250149144143, "grad_norm": 0.4852651059627533, "learning_rate": 6.856956798128468e-06, "loss": 0.3885, "step": 25581 }, { "epoch": 1.1739709054196688, "grad_norm": 0.49484983086586, "learning_rate": 6.85672914470625e-06, "loss": 0.321, "step": 25582 }, { "epoch": 1.1740167959249233, "grad_norm": 0.492034912109375, "learning_rate": 6.856501486819185e-06, "loss": 0.3744, "step": 25583 }, { "epoch": 1.1740626864301775, "grad_norm": 0.4525633156299591, "learning_rate": 6.8562738244678195e-06, "loss": 0.3681, "step": 25584 }, { "epoch": 1.174108576935432, "grad_norm": 0.5134267807006836, "learning_rate": 6.856046157652703e-06, "loss": 0.422, "step": 25585 }, { "epoch": 1.1741544674406865, "grad_norm": 0.4716026782989502, "learning_rate": 6.855818486374379e-06, "loss": 0.3903, "step": 25586 }, { "epoch": 1.174200357945941, "grad_norm": 0.46050697565078735, "learning_rate": 6.855590810633398e-06, "loss": 0.342, "step": 25587 }, { "epoch": 1.1742462484511955, "grad_norm": 0.42134371399879456, "learning_rate": 6.855363130430306e-06, "loss": 0.2799, "step": 25588 }, { "epoch": 1.1742921389564498, "grad_norm": 0.5046181082725525, "learning_rate": 6.855135445765653e-06, "loss": 0.449, "step": 25589 }, { "epoch": 1.1743380294617043, "grad_norm": 0.5020381808280945, "learning_rate": 6.8549077566399825e-06, "loss": 0.4496, "step": 25590 }, { "epoch": 1.1743839199669588, "grad_norm": 0.42247965931892395, "learning_rate": 6.854680063053847e-06, "loss": 0.2492, "step": 25591 }, { "epoch": 1.1744298104722133, "grad_norm": 0.47944438457489014, "learning_rate": 6.85445236500779e-06, "loss": 0.4218, "step": 25592 }, { "epoch": 1.1744757009774678, "grad_norm": 0.435881108045578, "learning_rate": 6.8542246625023586e-06, "loss": 0.3162, "step": 25593 }, { "epoch": 1.1745215914827223, "grad_norm": 0.4483477473258972, "learning_rate": 6.853996955538104e-06, "loss": 0.3175, "step": 25594 }, { "epoch": 1.1745674819879768, "grad_norm": 0.4456421732902527, "learning_rate": 6.853769244115571e-06, "loss": 0.3153, "step": 25595 }, { "epoch": 1.174613372493231, "grad_norm": 0.44892561435699463, "learning_rate": 6.85354152823531e-06, "loss": 0.3115, "step": 25596 }, { "epoch": 1.1746592629984856, "grad_norm": 0.4607338607311249, "learning_rate": 6.853313807897864e-06, "loss": 0.3251, "step": 25597 }, { "epoch": 1.17470515350374, "grad_norm": 0.46220046281814575, "learning_rate": 6.853086083103786e-06, "loss": 0.3717, "step": 25598 }, { "epoch": 1.1747510440089945, "grad_norm": 0.4513967037200928, "learning_rate": 6.852858353853619e-06, "loss": 0.3286, "step": 25599 }, { "epoch": 1.174796934514249, "grad_norm": 0.4569966197013855, "learning_rate": 6.852630620147912e-06, "loss": 0.3589, "step": 25600 }, { "epoch": 1.1748428250195035, "grad_norm": 0.45557907223701477, "learning_rate": 6.852402881987213e-06, "loss": 0.3084, "step": 25601 }, { "epoch": 1.1748887155247578, "grad_norm": 0.48335859179496765, "learning_rate": 6.852175139372072e-06, "loss": 0.3435, "step": 25602 }, { "epoch": 1.1749346060300123, "grad_norm": 0.45509496331214905, "learning_rate": 6.851947392303033e-06, "loss": 0.3279, "step": 25603 }, { "epoch": 1.1749804965352668, "grad_norm": 0.5008500218391418, "learning_rate": 6.851719640780644e-06, "loss": 0.428, "step": 25604 }, { "epoch": 1.1750263870405213, "grad_norm": 0.48473989963531494, "learning_rate": 6.851491884805455e-06, "loss": 0.3611, "step": 25605 }, { "epoch": 1.1750722775457758, "grad_norm": 0.4900658428668976, "learning_rate": 6.851264124378012e-06, "loss": 0.3597, "step": 25606 }, { "epoch": 1.1751181680510303, "grad_norm": 0.42883649468421936, "learning_rate": 6.851036359498863e-06, "loss": 0.2933, "step": 25607 }, { "epoch": 1.1751640585562848, "grad_norm": 0.4408652186393738, "learning_rate": 6.850808590168555e-06, "loss": 0.3056, "step": 25608 }, { "epoch": 1.175209949061539, "grad_norm": 0.45024874806404114, "learning_rate": 6.850580816387638e-06, "loss": 0.3378, "step": 25609 }, { "epoch": 1.1752558395667936, "grad_norm": 0.47296851873397827, "learning_rate": 6.850353038156656e-06, "loss": 0.3958, "step": 25610 }, { "epoch": 1.175301730072048, "grad_norm": 0.46378618478775024, "learning_rate": 6.85012525547616e-06, "loss": 0.3969, "step": 25611 }, { "epoch": 1.1753476205773026, "grad_norm": 0.48663464188575745, "learning_rate": 6.849897468346698e-06, "loss": 0.3766, "step": 25612 }, { "epoch": 1.175393511082557, "grad_norm": 0.4591962397098541, "learning_rate": 6.849669676768816e-06, "loss": 0.3668, "step": 25613 }, { "epoch": 1.1754394015878116, "grad_norm": 0.4636283814907074, "learning_rate": 6.849441880743061e-06, "loss": 0.3545, "step": 25614 }, { "epoch": 1.1754852920930658, "grad_norm": 0.4776391088962555, "learning_rate": 6.849214080269982e-06, "loss": 0.3636, "step": 25615 }, { "epoch": 1.1755311825983203, "grad_norm": 0.6632169485092163, "learning_rate": 6.8489862753501264e-06, "loss": 0.4116, "step": 25616 }, { "epoch": 1.1755770731035748, "grad_norm": 0.4847368597984314, "learning_rate": 6.848758465984045e-06, "loss": 0.3627, "step": 25617 }, { "epoch": 1.1756229636088293, "grad_norm": 0.5001359581947327, "learning_rate": 6.848530652172282e-06, "loss": 0.389, "step": 25618 }, { "epoch": 1.1756688541140838, "grad_norm": 0.545688807964325, "learning_rate": 6.848302833915385e-06, "loss": 0.4821, "step": 25619 }, { "epoch": 1.1757147446193383, "grad_norm": 0.4666125774383545, "learning_rate": 6.8480750112139036e-06, "loss": 0.3225, "step": 25620 }, { "epoch": 1.1757606351245928, "grad_norm": 0.4411241114139557, "learning_rate": 6.8478471840683835e-06, "loss": 0.3399, "step": 25621 }, { "epoch": 1.175806525629847, "grad_norm": 0.5585257411003113, "learning_rate": 6.847619352479375e-06, "loss": 0.4158, "step": 25622 }, { "epoch": 1.1758524161351016, "grad_norm": 0.48042866587638855, "learning_rate": 6.8473915164474265e-06, "loss": 0.3272, "step": 25623 }, { "epoch": 1.175898306640356, "grad_norm": 0.45462745428085327, "learning_rate": 6.847163675973084e-06, "loss": 0.3389, "step": 25624 }, { "epoch": 1.1759441971456106, "grad_norm": 0.46928679943084717, "learning_rate": 6.846935831056896e-06, "loss": 0.3435, "step": 25625 }, { "epoch": 1.175990087650865, "grad_norm": 0.4700981080532074, "learning_rate": 6.8467079816994096e-06, "loss": 0.3285, "step": 25626 }, { "epoch": 1.1760359781561194, "grad_norm": 0.5453307032585144, "learning_rate": 6.846480127901173e-06, "loss": 0.4557, "step": 25627 }, { "epoch": 1.1760818686613739, "grad_norm": 0.48562899231910706, "learning_rate": 6.846252269662734e-06, "loss": 0.3247, "step": 25628 }, { "epoch": 1.1761277591666284, "grad_norm": 0.6901135444641113, "learning_rate": 6.846024406984644e-06, "loss": 0.4127, "step": 25629 }, { "epoch": 1.1761736496718829, "grad_norm": 0.47133857011795044, "learning_rate": 6.845796539867446e-06, "loss": 0.3205, "step": 25630 }, { "epoch": 1.1762195401771374, "grad_norm": 0.5129086375236511, "learning_rate": 6.845568668311688e-06, "loss": 0.417, "step": 25631 }, { "epoch": 1.1762654306823919, "grad_norm": 0.46819639205932617, "learning_rate": 6.845340792317923e-06, "loss": 0.3525, "step": 25632 }, { "epoch": 1.1763113211876464, "grad_norm": 0.48720142245292664, "learning_rate": 6.845112911886693e-06, "loss": 0.3302, "step": 25633 }, { "epoch": 1.1763572116929009, "grad_norm": 0.45668333768844604, "learning_rate": 6.84488502701855e-06, "loss": 0.324, "step": 25634 }, { "epoch": 1.1764031021981551, "grad_norm": 0.4703284502029419, "learning_rate": 6.844657137714042e-06, "loss": 0.3449, "step": 25635 }, { "epoch": 1.1764489927034096, "grad_norm": 0.4563477039337158, "learning_rate": 6.844429243973714e-06, "loss": 0.3197, "step": 25636 }, { "epoch": 1.1764948832086641, "grad_norm": 0.48183709383010864, "learning_rate": 6.844201345798117e-06, "loss": 0.3322, "step": 25637 }, { "epoch": 1.1765407737139186, "grad_norm": 0.44904422760009766, "learning_rate": 6.843973443187797e-06, "loss": 0.3087, "step": 25638 }, { "epoch": 1.1765866642191731, "grad_norm": 0.4431930482387543, "learning_rate": 6.843745536143302e-06, "loss": 0.2934, "step": 25639 }, { "epoch": 1.1766325547244274, "grad_norm": 0.4855479598045349, "learning_rate": 6.843517624665184e-06, "loss": 0.3854, "step": 25640 }, { "epoch": 1.176678445229682, "grad_norm": 0.48847299814224243, "learning_rate": 6.843289708753985e-06, "loss": 0.367, "step": 25641 }, { "epoch": 1.1767243357349364, "grad_norm": 0.4562525451183319, "learning_rate": 6.843061788410257e-06, "loss": 0.3174, "step": 25642 }, { "epoch": 1.176770226240191, "grad_norm": 0.4768557846546173, "learning_rate": 6.842833863634547e-06, "loss": 0.3949, "step": 25643 }, { "epoch": 1.1768161167454454, "grad_norm": 0.5186331272125244, "learning_rate": 6.842605934427402e-06, "loss": 0.4444, "step": 25644 }, { "epoch": 1.1768620072507, "grad_norm": 0.4745580554008484, "learning_rate": 6.8423780007893734e-06, "loss": 0.4202, "step": 25645 }, { "epoch": 1.1769078977559544, "grad_norm": 0.48131880164146423, "learning_rate": 6.842150062721007e-06, "loss": 0.3853, "step": 25646 }, { "epoch": 1.1769537882612087, "grad_norm": 0.457661509513855, "learning_rate": 6.8419221202228496e-06, "loss": 0.3322, "step": 25647 }, { "epoch": 1.1769996787664632, "grad_norm": 0.4485005736351013, "learning_rate": 6.841694173295452e-06, "loss": 0.2969, "step": 25648 }, { "epoch": 1.1770455692717177, "grad_norm": 0.4739619195461273, "learning_rate": 6.84146622193936e-06, "loss": 0.3797, "step": 25649 }, { "epoch": 1.1770914597769722, "grad_norm": 0.4598637819290161, "learning_rate": 6.8412382661551234e-06, "loss": 0.3463, "step": 25650 }, { "epoch": 1.1771373502822267, "grad_norm": 0.4538874924182892, "learning_rate": 6.841010305943292e-06, "loss": 0.3219, "step": 25651 }, { "epoch": 1.1771832407874812, "grad_norm": 0.4760102331638336, "learning_rate": 6.840782341304408e-06, "loss": 0.3449, "step": 25652 }, { "epoch": 1.1772291312927354, "grad_norm": 0.4779253900051117, "learning_rate": 6.840554372239025e-06, "loss": 0.3958, "step": 25653 }, { "epoch": 1.17727502179799, "grad_norm": 0.49629145860671997, "learning_rate": 6.840326398747691e-06, "loss": 0.384, "step": 25654 }, { "epoch": 1.1773209123032444, "grad_norm": 0.4855031669139862, "learning_rate": 6.840098420830952e-06, "loss": 0.3518, "step": 25655 }, { "epoch": 1.177366802808499, "grad_norm": 0.4801255464553833, "learning_rate": 6.839870438489356e-06, "loss": 0.3739, "step": 25656 }, { "epoch": 1.1774126933137534, "grad_norm": 0.45477619767189026, "learning_rate": 6.839642451723455e-06, "loss": 0.33, "step": 25657 }, { "epoch": 1.177458583819008, "grad_norm": 0.4763350784778595, "learning_rate": 6.839414460533792e-06, "loss": 0.3619, "step": 25658 }, { "epoch": 1.1775044743242624, "grad_norm": 0.46363896131515503, "learning_rate": 6.839186464920917e-06, "loss": 0.3178, "step": 25659 }, { "epoch": 1.1775503648295167, "grad_norm": 0.4309453070163727, "learning_rate": 6.838958464885382e-06, "loss": 0.2715, "step": 25660 }, { "epoch": 1.1775962553347712, "grad_norm": 0.45009729266166687, "learning_rate": 6.838730460427729e-06, "loss": 0.2688, "step": 25661 }, { "epoch": 1.1776421458400257, "grad_norm": 0.48578375577926636, "learning_rate": 6.838502451548512e-06, "loss": 0.3934, "step": 25662 }, { "epoch": 1.1776880363452802, "grad_norm": 0.4656767249107361, "learning_rate": 6.838274438248278e-06, "loss": 0.3317, "step": 25663 }, { "epoch": 1.1777339268505347, "grad_norm": 0.5049875974655151, "learning_rate": 6.838046420527571e-06, "loss": 0.3576, "step": 25664 }, { "epoch": 1.1777798173557892, "grad_norm": 0.45613908767700195, "learning_rate": 6.837818398386944e-06, "loss": 0.3219, "step": 25665 }, { "epoch": 1.1778257078610435, "grad_norm": 0.4677617847919464, "learning_rate": 6.837590371826943e-06, "loss": 0.3392, "step": 25666 }, { "epoch": 1.177871598366298, "grad_norm": 0.4620170593261719, "learning_rate": 6.837362340848119e-06, "loss": 0.323, "step": 25667 }, { "epoch": 1.1779174888715525, "grad_norm": 0.45046743750572205, "learning_rate": 6.8371343054510175e-06, "loss": 0.3301, "step": 25668 }, { "epoch": 1.177963379376807, "grad_norm": 0.5230370163917542, "learning_rate": 6.836906265636188e-06, "loss": 0.4577, "step": 25669 }, { "epoch": 1.1780092698820614, "grad_norm": 0.4484158754348755, "learning_rate": 6.836678221404176e-06, "loss": 0.2887, "step": 25670 }, { "epoch": 1.178055160387316, "grad_norm": 0.43846216797828674, "learning_rate": 6.836450172755535e-06, "loss": 0.3055, "step": 25671 }, { "epoch": 1.1781010508925704, "grad_norm": 0.47358906269073486, "learning_rate": 6.836222119690811e-06, "loss": 0.3138, "step": 25672 }, { "epoch": 1.1781469413978247, "grad_norm": 0.48473405838012695, "learning_rate": 6.835994062210552e-06, "loss": 0.3767, "step": 25673 }, { "epoch": 1.1781928319030792, "grad_norm": 0.47872573137283325, "learning_rate": 6.835766000315307e-06, "loss": 0.3694, "step": 25674 }, { "epoch": 1.1782387224083337, "grad_norm": 0.5203568339347839, "learning_rate": 6.835537934005624e-06, "loss": 0.4418, "step": 25675 }, { "epoch": 1.1782846129135882, "grad_norm": 0.6885707378387451, "learning_rate": 6.8353098632820504e-06, "loss": 0.3993, "step": 25676 }, { "epoch": 1.1783305034188427, "grad_norm": 0.4671296775341034, "learning_rate": 6.835081788145137e-06, "loss": 0.362, "step": 25677 }, { "epoch": 1.178376393924097, "grad_norm": 0.47926944494247437, "learning_rate": 6.834853708595433e-06, "loss": 0.4084, "step": 25678 }, { "epoch": 1.1784222844293515, "grad_norm": 0.47985947132110596, "learning_rate": 6.834625624633481e-06, "loss": 0.3555, "step": 25679 }, { "epoch": 1.178468174934606, "grad_norm": 0.4723998010158539, "learning_rate": 6.8343975362598355e-06, "loss": 0.3382, "step": 25680 }, { "epoch": 1.1785140654398605, "grad_norm": 0.482205331325531, "learning_rate": 6.834169443475043e-06, "loss": 0.2954, "step": 25681 }, { "epoch": 1.178559955945115, "grad_norm": 0.4411805272102356, "learning_rate": 6.833941346279651e-06, "loss": 0.2729, "step": 25682 }, { "epoch": 1.1786058464503695, "grad_norm": 0.44354721903800964, "learning_rate": 6.833713244674209e-06, "loss": 0.3232, "step": 25683 }, { "epoch": 1.178651736955624, "grad_norm": 0.4456596076488495, "learning_rate": 6.833485138659266e-06, "loss": 0.3318, "step": 25684 }, { "epoch": 1.1786976274608783, "grad_norm": 0.49992361664772034, "learning_rate": 6.833257028235371e-06, "loss": 0.3896, "step": 25685 }, { "epoch": 1.1787435179661327, "grad_norm": 0.42232975363731384, "learning_rate": 6.833028913403069e-06, "loss": 0.2892, "step": 25686 }, { "epoch": 1.1787894084713872, "grad_norm": 0.5516404509544373, "learning_rate": 6.832800794162912e-06, "loss": 0.4031, "step": 25687 }, { "epoch": 1.1788352989766417, "grad_norm": 0.4509449899196625, "learning_rate": 6.832572670515448e-06, "loss": 0.2954, "step": 25688 }, { "epoch": 1.1788811894818962, "grad_norm": 0.4559166431427002, "learning_rate": 6.8323445424612245e-06, "loss": 0.3141, "step": 25689 }, { "epoch": 1.1789270799871507, "grad_norm": 0.4875146448612213, "learning_rate": 6.8321164100007905e-06, "loss": 0.3361, "step": 25690 }, { "epoch": 1.178972970492405, "grad_norm": 0.46398815512657166, "learning_rate": 6.831888273134696e-06, "loss": 0.3456, "step": 25691 }, { "epoch": 1.1790188609976595, "grad_norm": 0.4595908224582672, "learning_rate": 6.831660131863487e-06, "loss": 0.3805, "step": 25692 }, { "epoch": 1.179064751502914, "grad_norm": 0.4863419532775879, "learning_rate": 6.831431986187713e-06, "loss": 0.3577, "step": 25693 }, { "epoch": 1.1791106420081685, "grad_norm": 0.4636259078979492, "learning_rate": 6.8312038361079246e-06, "loss": 0.3366, "step": 25694 }, { "epoch": 1.179156532513423, "grad_norm": 0.5204973816871643, "learning_rate": 6.8309756816246695e-06, "loss": 0.3578, "step": 25695 }, { "epoch": 1.1792024230186775, "grad_norm": 0.46602097153663635, "learning_rate": 6.830747522738495e-06, "loss": 0.3456, "step": 25696 }, { "epoch": 1.179248313523932, "grad_norm": 0.4656119644641876, "learning_rate": 6.8305193594499495e-06, "loss": 0.3534, "step": 25697 }, { "epoch": 1.1792942040291863, "grad_norm": 0.47305208444595337, "learning_rate": 6.830291191759582e-06, "loss": 0.3873, "step": 25698 }, { "epoch": 1.1793400945344408, "grad_norm": 0.5081974864006042, "learning_rate": 6.8300630196679455e-06, "loss": 0.3972, "step": 25699 }, { "epoch": 1.1793859850396953, "grad_norm": 0.45130231976509094, "learning_rate": 6.829834843175582e-06, "loss": 0.3215, "step": 25700 }, { "epoch": 1.1794318755449498, "grad_norm": 0.4616207480430603, "learning_rate": 6.829606662283043e-06, "loss": 0.3293, "step": 25701 }, { "epoch": 1.1794777660502043, "grad_norm": 0.4813922941684723, "learning_rate": 6.82937847699088e-06, "loss": 0.3318, "step": 25702 }, { "epoch": 1.1795236565554588, "grad_norm": 0.5179945230484009, "learning_rate": 6.829150287299637e-06, "loss": 0.4318, "step": 25703 }, { "epoch": 1.179569547060713, "grad_norm": 0.49133649468421936, "learning_rate": 6.828922093209865e-06, "loss": 0.3757, "step": 25704 }, { "epoch": 1.1796154375659675, "grad_norm": 0.473413348197937, "learning_rate": 6.828693894722113e-06, "loss": 0.3488, "step": 25705 }, { "epoch": 1.179661328071222, "grad_norm": 0.41615843772888184, "learning_rate": 6.82846569183693e-06, "loss": 0.2711, "step": 25706 }, { "epoch": 1.1797072185764765, "grad_norm": 0.46621787548065186, "learning_rate": 6.828237484554863e-06, "loss": 0.3223, "step": 25707 }, { "epoch": 1.179753109081731, "grad_norm": 0.44677236676216125, "learning_rate": 6.828009272876462e-06, "loss": 0.2948, "step": 25708 }, { "epoch": 1.1797989995869855, "grad_norm": 0.4850485920906067, "learning_rate": 6.827781056802275e-06, "loss": 0.3629, "step": 25709 }, { "epoch": 1.17984489009224, "grad_norm": 0.47587043046951294, "learning_rate": 6.8275528363328535e-06, "loss": 0.3564, "step": 25710 }, { "epoch": 1.1798907805974943, "grad_norm": 0.4721478819847107, "learning_rate": 6.827324611468743e-06, "loss": 0.3629, "step": 25711 }, { "epoch": 1.1799366711027488, "grad_norm": 0.485524982213974, "learning_rate": 6.827096382210493e-06, "loss": 0.3975, "step": 25712 }, { "epoch": 1.1799825616080033, "grad_norm": 0.4567892253398895, "learning_rate": 6.826868148558654e-06, "loss": 0.3295, "step": 25713 }, { "epoch": 1.1800284521132578, "grad_norm": 0.4610452651977539, "learning_rate": 6.826639910513772e-06, "loss": 0.3221, "step": 25714 }, { "epoch": 1.1800743426185123, "grad_norm": 0.45461663603782654, "learning_rate": 6.826411668076398e-06, "loss": 0.294, "step": 25715 }, { "epoch": 1.1801202331237666, "grad_norm": 0.43115073442459106, "learning_rate": 6.826183421247081e-06, "loss": 0.3046, "step": 25716 }, { "epoch": 1.180166123629021, "grad_norm": 0.44472235441207886, "learning_rate": 6.82595517002637e-06, "loss": 0.3308, "step": 25717 }, { "epoch": 1.1802120141342756, "grad_norm": 0.5108226537704468, "learning_rate": 6.825726914414812e-06, "loss": 0.4526, "step": 25718 }, { "epoch": 1.18025790463953, "grad_norm": 0.44894856214523315, "learning_rate": 6.825498654412958e-06, "loss": 0.3203, "step": 25719 }, { "epoch": 1.1803037951447846, "grad_norm": 0.4757935702800751, "learning_rate": 6.825270390021355e-06, "loss": 0.3813, "step": 25720 }, { "epoch": 1.180349685650039, "grad_norm": 0.4827066659927368, "learning_rate": 6.825042121240553e-06, "loss": 0.3366, "step": 25721 }, { "epoch": 1.1803955761552936, "grad_norm": 0.4591805338859558, "learning_rate": 6.824813848071101e-06, "loss": 0.344, "step": 25722 }, { "epoch": 1.180441466660548, "grad_norm": 0.49678781628608704, "learning_rate": 6.8245855705135466e-06, "loss": 0.4316, "step": 25723 }, { "epoch": 1.1804873571658023, "grad_norm": 0.4512389898300171, "learning_rate": 6.82435728856844e-06, "loss": 0.297, "step": 25724 }, { "epoch": 1.1805332476710568, "grad_norm": 0.4565200209617615, "learning_rate": 6.82412900223633e-06, "loss": 0.3211, "step": 25725 }, { "epoch": 1.1805791381763113, "grad_norm": 0.4762606620788574, "learning_rate": 6.8239007115177656e-06, "loss": 0.3737, "step": 25726 }, { "epoch": 1.1806250286815658, "grad_norm": 0.45365360379219055, "learning_rate": 6.8236724164132965e-06, "loss": 0.3543, "step": 25727 }, { "epoch": 1.1806709191868203, "grad_norm": 0.489239901304245, "learning_rate": 6.82344411692347e-06, "loss": 0.3743, "step": 25728 }, { "epoch": 1.1807168096920746, "grad_norm": 0.4461171627044678, "learning_rate": 6.8232158130488355e-06, "loss": 0.3059, "step": 25729 }, { "epoch": 1.180762700197329, "grad_norm": 0.47868844866752625, "learning_rate": 6.822987504789944e-06, "loss": 0.3467, "step": 25730 }, { "epoch": 1.1808085907025836, "grad_norm": 0.4418332874774933, "learning_rate": 6.822759192147341e-06, "loss": 0.3282, "step": 25731 }, { "epoch": 1.180854481207838, "grad_norm": 0.42974305152893066, "learning_rate": 6.8225308751215795e-06, "loss": 0.3079, "step": 25732 }, { "epoch": 1.1809003717130926, "grad_norm": 0.43337303400039673, "learning_rate": 6.8223025537132044e-06, "loss": 0.3025, "step": 25733 }, { "epoch": 1.180946262218347, "grad_norm": 0.4633924961090088, "learning_rate": 6.822074227922768e-06, "loss": 0.3528, "step": 25734 }, { "epoch": 1.1809921527236016, "grad_norm": 0.521598219871521, "learning_rate": 6.821845897750816e-06, "loss": 0.4649, "step": 25735 }, { "epoch": 1.1810380432288559, "grad_norm": 0.47039860486984253, "learning_rate": 6.821617563197902e-06, "loss": 0.3609, "step": 25736 }, { "epoch": 1.1810839337341104, "grad_norm": 0.48617884516716003, "learning_rate": 6.821389224264572e-06, "loss": 0.3588, "step": 25737 }, { "epoch": 1.1811298242393649, "grad_norm": 0.4291553497314453, "learning_rate": 6.821160880951376e-06, "loss": 0.2924, "step": 25738 }, { "epoch": 1.1811757147446194, "grad_norm": 0.4874695837497711, "learning_rate": 6.820932533258864e-06, "loss": 0.364, "step": 25739 }, { "epoch": 1.1812216052498739, "grad_norm": 0.4542138874530792, "learning_rate": 6.820704181187582e-06, "loss": 0.3632, "step": 25740 }, { "epoch": 1.1812674957551283, "grad_norm": 0.4938436448574066, "learning_rate": 6.8204758247380805e-06, "loss": 0.388, "step": 25741 }, { "epoch": 1.1813133862603826, "grad_norm": 0.432159960269928, "learning_rate": 6.8202474639109105e-06, "loss": 0.3214, "step": 25742 }, { "epoch": 1.1813592767656371, "grad_norm": 0.456684410572052, "learning_rate": 6.820019098706621e-06, "loss": 0.303, "step": 25743 }, { "epoch": 1.1814051672708916, "grad_norm": 0.45351463556289673, "learning_rate": 6.819790729125757e-06, "loss": 0.3644, "step": 25744 }, { "epoch": 1.1814510577761461, "grad_norm": 0.487506240606308, "learning_rate": 6.819562355168874e-06, "loss": 0.3601, "step": 25745 }, { "epoch": 1.1814969482814006, "grad_norm": 0.43712329864501953, "learning_rate": 6.819333976836515e-06, "loss": 0.3209, "step": 25746 }, { "epoch": 1.1815428387866551, "grad_norm": 0.5129201412200928, "learning_rate": 6.819105594129234e-06, "loss": 0.4145, "step": 25747 }, { "epoch": 1.1815887292919096, "grad_norm": 0.454344779253006, "learning_rate": 6.818877207047576e-06, "loss": 0.3187, "step": 25748 }, { "epoch": 1.1816346197971639, "grad_norm": 0.48502662777900696, "learning_rate": 6.818648815592095e-06, "loss": 0.3607, "step": 25749 }, { "epoch": 1.1816805103024184, "grad_norm": 0.4417001008987427, "learning_rate": 6.818420419763336e-06, "loss": 0.2897, "step": 25750 }, { "epoch": 1.1817264008076729, "grad_norm": 0.4868485927581787, "learning_rate": 6.818192019561849e-06, "loss": 0.3796, "step": 25751 }, { "epoch": 1.1817722913129274, "grad_norm": 0.4825170934200287, "learning_rate": 6.817963614988185e-06, "loss": 0.3756, "step": 25752 }, { "epoch": 1.1818181818181819, "grad_norm": 0.5274156332015991, "learning_rate": 6.817735206042893e-06, "loss": 0.4017, "step": 25753 }, { "epoch": 1.1818640723234364, "grad_norm": 0.5339662432670593, "learning_rate": 6.817506792726522e-06, "loss": 0.4089, "step": 25754 }, { "epoch": 1.1819099628286907, "grad_norm": 0.46307602524757385, "learning_rate": 6.817278375039617e-06, "loss": 0.3374, "step": 25755 }, { "epoch": 1.1819558533339452, "grad_norm": 0.48695841431617737, "learning_rate": 6.817049952982735e-06, "loss": 0.3594, "step": 25756 }, { "epoch": 1.1820017438391996, "grad_norm": 0.43565067648887634, "learning_rate": 6.816821526556419e-06, "loss": 0.2882, "step": 25757 }, { "epoch": 1.1820476343444541, "grad_norm": 0.41802576184272766, "learning_rate": 6.816593095761221e-06, "loss": 0.2853, "step": 25758 }, { "epoch": 1.1820935248497086, "grad_norm": 0.47147998213768005, "learning_rate": 6.816364660597692e-06, "loss": 0.356, "step": 25759 }, { "epoch": 1.1821394153549631, "grad_norm": 0.46147987246513367, "learning_rate": 6.816136221066378e-06, "loss": 0.3492, "step": 25760 }, { "epoch": 1.1821853058602176, "grad_norm": 0.4783500134944916, "learning_rate": 6.815907777167829e-06, "loss": 0.3575, "step": 25761 }, { "epoch": 1.182231196365472, "grad_norm": 0.44525212049484253, "learning_rate": 6.815679328902596e-06, "loss": 0.3525, "step": 25762 }, { "epoch": 1.1822770868707264, "grad_norm": 0.45950812101364136, "learning_rate": 6.815450876271225e-06, "loss": 0.3464, "step": 25763 }, { "epoch": 1.182322977375981, "grad_norm": 0.4603084623813629, "learning_rate": 6.8152224192742715e-06, "loss": 0.3368, "step": 25764 }, { "epoch": 1.1823688678812354, "grad_norm": 0.47860515117645264, "learning_rate": 6.814993957912279e-06, "loss": 0.3523, "step": 25765 }, { "epoch": 1.18241475838649, "grad_norm": 0.506956160068512, "learning_rate": 6.814765492185798e-06, "loss": 0.3872, "step": 25766 }, { "epoch": 1.1824606488917442, "grad_norm": 0.4673523008823395, "learning_rate": 6.81453702209538e-06, "loss": 0.36, "step": 25767 }, { "epoch": 1.1825065393969987, "grad_norm": 0.4840168356895447, "learning_rate": 6.8143085476415715e-06, "loss": 0.3727, "step": 25768 }, { "epoch": 1.1825524299022532, "grad_norm": 0.45054978132247925, "learning_rate": 6.814080068824924e-06, "loss": 0.283, "step": 25769 }, { "epoch": 1.1825983204075077, "grad_norm": 0.47714748978614807, "learning_rate": 6.813851585645987e-06, "loss": 0.4091, "step": 25770 }, { "epoch": 1.1826442109127622, "grad_norm": 0.4814198315143585, "learning_rate": 6.81362309810531e-06, "loss": 0.3678, "step": 25771 }, { "epoch": 1.1826901014180167, "grad_norm": 0.42910128831863403, "learning_rate": 6.81339460620344e-06, "loss": 0.3321, "step": 25772 }, { "epoch": 1.1827359919232712, "grad_norm": 0.4507873058319092, "learning_rate": 6.813166109940931e-06, "loss": 0.3279, "step": 25773 }, { "epoch": 1.1827818824285254, "grad_norm": 0.46349766850471497, "learning_rate": 6.812937609318328e-06, "loss": 0.3388, "step": 25774 }, { "epoch": 1.18282777293378, "grad_norm": 0.4771181046962738, "learning_rate": 6.812709104336182e-06, "loss": 0.4019, "step": 25775 }, { "epoch": 1.1828736634390344, "grad_norm": 0.45871517062187195, "learning_rate": 6.8124805949950444e-06, "loss": 0.3452, "step": 25776 }, { "epoch": 1.182919553944289, "grad_norm": 0.3916628062725067, "learning_rate": 6.8122520812954605e-06, "loss": 0.252, "step": 25777 }, { "epoch": 1.1829654444495434, "grad_norm": 0.48789238929748535, "learning_rate": 6.812023563237985e-06, "loss": 0.3824, "step": 25778 }, { "epoch": 1.183011334954798, "grad_norm": 0.47765928506851196, "learning_rate": 6.811795040823162e-06, "loss": 0.3755, "step": 25779 }, { "epoch": 1.1830572254600522, "grad_norm": 0.4620749056339264, "learning_rate": 6.811566514051545e-06, "loss": 0.3238, "step": 25780 }, { "epoch": 1.1831031159653067, "grad_norm": 0.432862251996994, "learning_rate": 6.811337982923683e-06, "loss": 0.29, "step": 25781 }, { "epoch": 1.1831490064705612, "grad_norm": 0.4892668128013611, "learning_rate": 6.811109447440125e-06, "loss": 0.3921, "step": 25782 }, { "epoch": 1.1831948969758157, "grad_norm": 0.448934406042099, "learning_rate": 6.8108809076014186e-06, "loss": 0.3325, "step": 25783 }, { "epoch": 1.1832407874810702, "grad_norm": 0.4568212926387787, "learning_rate": 6.8106523634081165e-06, "loss": 0.3572, "step": 25784 }, { "epoch": 1.1832866779863247, "grad_norm": 0.475515216588974, "learning_rate": 6.810423814860766e-06, "loss": 0.3484, "step": 25785 }, { "epoch": 1.1833325684915792, "grad_norm": 0.46559572219848633, "learning_rate": 6.810195261959919e-06, "loss": 0.4005, "step": 25786 }, { "epoch": 1.1833784589968335, "grad_norm": 0.4708154499530792, "learning_rate": 6.8099667047061235e-06, "loss": 0.3635, "step": 25787 }, { "epoch": 1.183424349502088, "grad_norm": 0.45441746711730957, "learning_rate": 6.809738143099928e-06, "loss": 0.337, "step": 25788 }, { "epoch": 1.1834702400073425, "grad_norm": 0.44106560945510864, "learning_rate": 6.809509577141884e-06, "loss": 0.3033, "step": 25789 }, { "epoch": 1.183516130512597, "grad_norm": 0.4885255694389343, "learning_rate": 6.809281006832541e-06, "loss": 0.3744, "step": 25790 }, { "epoch": 1.1835620210178515, "grad_norm": 0.5032009482383728, "learning_rate": 6.809052432172447e-06, "loss": 0.3978, "step": 25791 }, { "epoch": 1.183607911523106, "grad_norm": 0.4950259327888489, "learning_rate": 6.808823853162154e-06, "loss": 0.4124, "step": 25792 }, { "epoch": 1.1836538020283602, "grad_norm": 0.440635085105896, "learning_rate": 6.808595269802211e-06, "loss": 0.2972, "step": 25793 }, { "epoch": 1.1836996925336147, "grad_norm": 0.48225557804107666, "learning_rate": 6.808366682093166e-06, "loss": 0.3979, "step": 25794 }, { "epoch": 1.1837455830388692, "grad_norm": 0.45347875356674194, "learning_rate": 6.808138090035571e-06, "loss": 0.3325, "step": 25795 }, { "epoch": 1.1837914735441237, "grad_norm": 0.46068161725997925, "learning_rate": 6.807909493629972e-06, "loss": 0.3854, "step": 25796 }, { "epoch": 1.1838373640493782, "grad_norm": 0.47206705808639526, "learning_rate": 6.8076808928769245e-06, "loss": 0.3776, "step": 25797 }, { "epoch": 1.1838832545546327, "grad_norm": 0.4943772554397583, "learning_rate": 6.807452287776975e-06, "loss": 0.4392, "step": 25798 }, { "epoch": 1.1839291450598872, "grad_norm": 0.45922866463661194, "learning_rate": 6.80722367833067e-06, "loss": 0.3847, "step": 25799 }, { "epoch": 1.1839750355651415, "grad_norm": 0.46701470017433167, "learning_rate": 6.806995064538564e-06, "loss": 0.3494, "step": 25800 }, { "epoch": 1.184020926070396, "grad_norm": 0.6164257526397705, "learning_rate": 6.806766446401206e-06, "loss": 0.3562, "step": 25801 }, { "epoch": 1.1840668165756505, "grad_norm": 0.46305176615715027, "learning_rate": 6.806537823919144e-06, "loss": 0.3665, "step": 25802 }, { "epoch": 1.184112707080905, "grad_norm": 0.4965791404247284, "learning_rate": 6.8063091970929294e-06, "loss": 0.3783, "step": 25803 }, { "epoch": 1.1841585975861595, "grad_norm": 0.495858371257782, "learning_rate": 6.806080565923112e-06, "loss": 0.4197, "step": 25804 }, { "epoch": 1.1842044880914138, "grad_norm": 0.5072774887084961, "learning_rate": 6.805851930410239e-06, "loss": 0.3992, "step": 25805 }, { "epoch": 1.1842503785966683, "grad_norm": 0.4725784957408905, "learning_rate": 6.805623290554862e-06, "loss": 0.3286, "step": 25806 }, { "epoch": 1.1842962691019228, "grad_norm": 0.5396092534065247, "learning_rate": 6.805394646357533e-06, "loss": 0.4218, "step": 25807 }, { "epoch": 1.1843421596071773, "grad_norm": 0.44107815623283386, "learning_rate": 6.805165997818798e-06, "loss": 0.3224, "step": 25808 }, { "epoch": 1.1843880501124318, "grad_norm": 0.452438086271286, "learning_rate": 6.8049373449392086e-06, "loss": 0.3299, "step": 25809 }, { "epoch": 1.1844339406176863, "grad_norm": 0.4493481516838074, "learning_rate": 6.804708687719315e-06, "loss": 0.351, "step": 25810 }, { "epoch": 1.1844798311229408, "grad_norm": 0.4752193093299866, "learning_rate": 6.804480026159666e-06, "loss": 0.3159, "step": 25811 }, { "epoch": 1.1845257216281952, "grad_norm": 0.48184528946876526, "learning_rate": 6.804251360260813e-06, "loss": 0.3466, "step": 25812 }, { "epoch": 1.1845716121334495, "grad_norm": 0.4440290331840515, "learning_rate": 6.804022690023304e-06, "loss": 0.302, "step": 25813 }, { "epoch": 1.184617502638704, "grad_norm": 0.5067896246910095, "learning_rate": 6.80379401544769e-06, "loss": 0.4836, "step": 25814 }, { "epoch": 1.1846633931439585, "grad_norm": 0.45428988337516785, "learning_rate": 6.803565336534521e-06, "loss": 0.3232, "step": 25815 }, { "epoch": 1.184709283649213, "grad_norm": 0.47001200914382935, "learning_rate": 6.803336653284347e-06, "loss": 0.3567, "step": 25816 }, { "epoch": 1.1847551741544675, "grad_norm": 0.4891206622123718, "learning_rate": 6.803107965697715e-06, "loss": 0.3957, "step": 25817 }, { "epoch": 1.1848010646597218, "grad_norm": 0.47962456941604614, "learning_rate": 6.80287927377518e-06, "loss": 0.4058, "step": 25818 }, { "epoch": 1.1848469551649763, "grad_norm": 0.45609140396118164, "learning_rate": 6.802650577517289e-06, "loss": 0.3462, "step": 25819 }, { "epoch": 1.1848928456702308, "grad_norm": 0.44039976596832275, "learning_rate": 6.802421876924592e-06, "loss": 0.3102, "step": 25820 }, { "epoch": 1.1849387361754853, "grad_norm": 0.4810160994529724, "learning_rate": 6.802193171997639e-06, "loss": 0.3894, "step": 25821 }, { "epoch": 1.1849846266807398, "grad_norm": 0.42905473709106445, "learning_rate": 6.801964462736981e-06, "loss": 0.303, "step": 25822 }, { "epoch": 1.1850305171859943, "grad_norm": 0.5115122199058533, "learning_rate": 6.801735749143166e-06, "loss": 0.3651, "step": 25823 }, { "epoch": 1.1850764076912488, "grad_norm": 0.4448230564594269, "learning_rate": 6.801507031216746e-06, "loss": 0.3349, "step": 25824 }, { "epoch": 1.185122298196503, "grad_norm": 0.46673721075057983, "learning_rate": 6.8012783089582705e-06, "loss": 0.3442, "step": 25825 }, { "epoch": 1.1851681887017576, "grad_norm": 0.5651987195014954, "learning_rate": 6.801049582368288e-06, "loss": 0.4485, "step": 25826 }, { "epoch": 1.185214079207012, "grad_norm": 0.44050121307373047, "learning_rate": 6.800820851447351e-06, "loss": 0.3132, "step": 25827 }, { "epoch": 1.1852599697122665, "grad_norm": 0.432381272315979, "learning_rate": 6.800592116196006e-06, "loss": 0.3016, "step": 25828 }, { "epoch": 1.185305860217521, "grad_norm": 0.4215467870235443, "learning_rate": 6.800363376614808e-06, "loss": 0.2961, "step": 25829 }, { "epoch": 1.1853517507227755, "grad_norm": 0.5149332880973816, "learning_rate": 6.800134632704304e-06, "loss": 0.4266, "step": 25830 }, { "epoch": 1.1853976412280298, "grad_norm": 0.4385400712490082, "learning_rate": 6.799905884465042e-06, "loss": 0.3129, "step": 25831 }, { "epoch": 1.1854435317332843, "grad_norm": 0.4783274829387665, "learning_rate": 6.799677131897576e-06, "loss": 0.3715, "step": 25832 }, { "epoch": 1.1854894222385388, "grad_norm": 0.48721322417259216, "learning_rate": 6.799448375002454e-06, "loss": 0.3374, "step": 25833 }, { "epoch": 1.1855353127437933, "grad_norm": 0.44686073064804077, "learning_rate": 6.7992196137802266e-06, "loss": 0.2944, "step": 25834 }, { "epoch": 1.1855812032490478, "grad_norm": 0.45809438824653625, "learning_rate": 6.7989908482314444e-06, "loss": 0.3261, "step": 25835 }, { "epoch": 1.1856270937543023, "grad_norm": 0.41723141074180603, "learning_rate": 6.7987620783566575e-06, "loss": 0.2819, "step": 25836 }, { "epoch": 1.1856729842595568, "grad_norm": 0.43353667855262756, "learning_rate": 6.798533304156414e-06, "loss": 0.3106, "step": 25837 }, { "epoch": 1.185718874764811, "grad_norm": 0.45132872462272644, "learning_rate": 6.798304525631267e-06, "loss": 0.308, "step": 25838 }, { "epoch": 1.1857647652700656, "grad_norm": 0.45703521370887756, "learning_rate": 6.7980757427817645e-06, "loss": 0.3345, "step": 25839 }, { "epoch": 1.18581065577532, "grad_norm": 0.4652842581272125, "learning_rate": 6.797846955608456e-06, "loss": 0.3342, "step": 25840 }, { "epoch": 1.1858565462805746, "grad_norm": 0.4615703225135803, "learning_rate": 6.797618164111897e-06, "loss": 0.3714, "step": 25841 }, { "epoch": 1.185902436785829, "grad_norm": 0.43365955352783203, "learning_rate": 6.7973893682926294e-06, "loss": 0.3054, "step": 25842 }, { "epoch": 1.1859483272910836, "grad_norm": 0.4654073119163513, "learning_rate": 6.797160568151209e-06, "loss": 0.3332, "step": 25843 }, { "epoch": 1.1859942177963378, "grad_norm": 0.49784529209136963, "learning_rate": 6.796931763688185e-06, "loss": 0.3803, "step": 25844 }, { "epoch": 1.1860401083015923, "grad_norm": 0.42667368054389954, "learning_rate": 6.796702954904106e-06, "loss": 0.2666, "step": 25845 }, { "epoch": 1.1860859988068468, "grad_norm": 0.4812326729297638, "learning_rate": 6.796474141799525e-06, "loss": 0.405, "step": 25846 }, { "epoch": 1.1861318893121013, "grad_norm": 0.4847441613674164, "learning_rate": 6.796245324374991e-06, "loss": 0.3538, "step": 25847 }, { "epoch": 1.1861777798173558, "grad_norm": 0.46031486988067627, "learning_rate": 6.796016502631052e-06, "loss": 0.3846, "step": 25848 }, { "epoch": 1.1862236703226103, "grad_norm": 0.515594482421875, "learning_rate": 6.795787676568262e-06, "loss": 0.4836, "step": 25849 }, { "epoch": 1.1862695608278648, "grad_norm": 0.46542873978614807, "learning_rate": 6.795558846187168e-06, "loss": 0.3779, "step": 25850 }, { "epoch": 1.186315451333119, "grad_norm": 0.4470653235912323, "learning_rate": 6.795330011488322e-06, "loss": 0.344, "step": 25851 }, { "epoch": 1.1863613418383736, "grad_norm": 0.4862048923969269, "learning_rate": 6.795101172472276e-06, "loss": 0.4211, "step": 25852 }, { "epoch": 1.186407232343628, "grad_norm": 0.4666740894317627, "learning_rate": 6.794872329139576e-06, "loss": 0.3307, "step": 25853 }, { "epoch": 1.1864531228488826, "grad_norm": 0.46470269560813904, "learning_rate": 6.794643481490775e-06, "loss": 0.3489, "step": 25854 }, { "epoch": 1.186499013354137, "grad_norm": 0.472949355840683, "learning_rate": 6.794414629526423e-06, "loss": 0.3504, "step": 25855 }, { "epoch": 1.1865449038593914, "grad_norm": 0.43333902955055237, "learning_rate": 6.7941857732470686e-06, "loss": 0.315, "step": 25856 }, { "epoch": 1.1865907943646459, "grad_norm": 0.47295641899108887, "learning_rate": 6.793956912653266e-06, "loss": 0.3656, "step": 25857 }, { "epoch": 1.1866366848699004, "grad_norm": 0.47792354226112366, "learning_rate": 6.793728047745562e-06, "loss": 0.3869, "step": 25858 }, { "epoch": 1.1866825753751549, "grad_norm": 0.4636421203613281, "learning_rate": 6.793499178524509e-06, "loss": 0.3698, "step": 25859 }, { "epoch": 1.1867284658804094, "grad_norm": 0.4990634322166443, "learning_rate": 6.7932703049906555e-06, "loss": 0.3863, "step": 25860 }, { "epoch": 1.1867743563856639, "grad_norm": 0.5338382124900818, "learning_rate": 6.793041427144553e-06, "loss": 0.4114, "step": 25861 }, { "epoch": 1.1868202468909184, "grad_norm": 0.4765791893005371, "learning_rate": 6.792812544986752e-06, "loss": 0.3713, "step": 25862 }, { "epoch": 1.1868661373961726, "grad_norm": 0.48965150117874146, "learning_rate": 6.792583658517804e-06, "loss": 0.3929, "step": 25863 }, { "epoch": 1.1869120279014271, "grad_norm": 0.4491797685623169, "learning_rate": 6.7923547677382564e-06, "loss": 0.3446, "step": 25864 }, { "epoch": 1.1869579184066816, "grad_norm": 0.4675693213939667, "learning_rate": 6.792125872648661e-06, "loss": 0.3815, "step": 25865 }, { "epoch": 1.1870038089119361, "grad_norm": 0.4605647623538971, "learning_rate": 6.79189697324957e-06, "loss": 0.3513, "step": 25866 }, { "epoch": 1.1870496994171906, "grad_norm": 0.46836647391319275, "learning_rate": 6.791668069541531e-06, "loss": 0.3782, "step": 25867 }, { "epoch": 1.1870955899224451, "grad_norm": 0.47099512815475464, "learning_rate": 6.791439161525096e-06, "loss": 0.3867, "step": 25868 }, { "epoch": 1.1871414804276994, "grad_norm": 0.4759407639503479, "learning_rate": 6.791210249200817e-06, "loss": 0.412, "step": 25869 }, { "epoch": 1.187187370932954, "grad_norm": 0.4703865945339203, "learning_rate": 6.7909813325692395e-06, "loss": 0.347, "step": 25870 }, { "epoch": 1.1872332614382084, "grad_norm": 0.48088058829307556, "learning_rate": 6.790752411630919e-06, "loss": 0.4032, "step": 25871 }, { "epoch": 1.187279151943463, "grad_norm": 0.42564520239830017, "learning_rate": 6.790523486386404e-06, "loss": 0.264, "step": 25872 }, { "epoch": 1.1873250424487174, "grad_norm": 0.46695011854171753, "learning_rate": 6.7902945568362445e-06, "loss": 0.3181, "step": 25873 }, { "epoch": 1.187370932953972, "grad_norm": 0.48359858989715576, "learning_rate": 6.7900656229809925e-06, "loss": 0.3698, "step": 25874 }, { "epoch": 1.1874168234592264, "grad_norm": 0.514275074005127, "learning_rate": 6.789836684821198e-06, "loss": 0.4334, "step": 25875 }, { "epoch": 1.1874627139644807, "grad_norm": 0.5356282591819763, "learning_rate": 6.78960774235741e-06, "loss": 0.5047, "step": 25876 }, { "epoch": 1.1875086044697352, "grad_norm": 0.45261743664741516, "learning_rate": 6.789378795590181e-06, "loss": 0.3068, "step": 25877 }, { "epoch": 1.1875544949749897, "grad_norm": 0.48713764548301697, "learning_rate": 6.7891498445200595e-06, "loss": 0.3605, "step": 25878 }, { "epoch": 1.1876003854802442, "grad_norm": 0.5010494589805603, "learning_rate": 6.788920889147599e-06, "loss": 0.3639, "step": 25879 }, { "epoch": 1.1876462759854987, "grad_norm": 0.4508076012134552, "learning_rate": 6.788691929473348e-06, "loss": 0.3434, "step": 25880 }, { "epoch": 1.1876921664907532, "grad_norm": 0.4413585662841797, "learning_rate": 6.788462965497856e-06, "loss": 0.2992, "step": 25881 }, { "epoch": 1.1877380569960074, "grad_norm": 0.47807082533836365, "learning_rate": 6.788233997221676e-06, "loss": 0.372, "step": 25882 }, { "epoch": 1.187783947501262, "grad_norm": 0.47678834199905396, "learning_rate": 6.788005024645359e-06, "loss": 0.3926, "step": 25883 }, { "epoch": 1.1878298380065164, "grad_norm": 0.4768449366092682, "learning_rate": 6.787776047769452e-06, "loss": 0.3801, "step": 25884 }, { "epoch": 1.187875728511771, "grad_norm": 0.42654556035995483, "learning_rate": 6.787547066594509e-06, "loss": 0.2997, "step": 25885 }, { "epoch": 1.1879216190170254, "grad_norm": 0.48210984468460083, "learning_rate": 6.787318081121079e-06, "loss": 0.3324, "step": 25886 }, { "epoch": 1.18796750952228, "grad_norm": 0.43425706028938293, "learning_rate": 6.787089091349714e-06, "loss": 0.3248, "step": 25887 }, { "epoch": 1.1880134000275344, "grad_norm": 0.46285316348075867, "learning_rate": 6.7868600972809614e-06, "loss": 0.3718, "step": 25888 }, { "epoch": 1.1880592905327887, "grad_norm": 0.4728703796863556, "learning_rate": 6.786631098915376e-06, "loss": 0.3602, "step": 25889 }, { "epoch": 1.1881051810380432, "grad_norm": 0.46048831939697266, "learning_rate": 6.786402096253507e-06, "loss": 0.335, "step": 25890 }, { "epoch": 1.1881510715432977, "grad_norm": 0.4514266550540924, "learning_rate": 6.786173089295905e-06, "loss": 0.3114, "step": 25891 }, { "epoch": 1.1881969620485522, "grad_norm": 0.4883325695991516, "learning_rate": 6.78594407804312e-06, "loss": 0.3729, "step": 25892 }, { "epoch": 1.1882428525538067, "grad_norm": 0.4710811376571655, "learning_rate": 6.785715062495701e-06, "loss": 0.3855, "step": 25893 }, { "epoch": 1.188288743059061, "grad_norm": 0.4730255603790283, "learning_rate": 6.785486042654204e-06, "loss": 0.407, "step": 25894 }, { "epoch": 1.1883346335643155, "grad_norm": 0.4469076097011566, "learning_rate": 6.785257018519174e-06, "loss": 0.3189, "step": 25895 }, { "epoch": 1.18838052406957, "grad_norm": 0.4753378927707672, "learning_rate": 6.7850279900911665e-06, "loss": 0.4024, "step": 25896 }, { "epoch": 1.1884264145748245, "grad_norm": 0.4426412880420685, "learning_rate": 6.78479895737073e-06, "loss": 0.3401, "step": 25897 }, { "epoch": 1.188472305080079, "grad_norm": 0.46274593472480774, "learning_rate": 6.784569920358415e-06, "loss": 0.3505, "step": 25898 }, { "epoch": 1.1885181955853334, "grad_norm": 0.45692750811576843, "learning_rate": 6.78434087905477e-06, "loss": 0.339, "step": 25899 }, { "epoch": 1.188564086090588, "grad_norm": 0.4778342843055725, "learning_rate": 6.784111833460351e-06, "loss": 0.3796, "step": 25900 }, { "epoch": 1.1886099765958424, "grad_norm": 0.45717284083366394, "learning_rate": 6.783882783575707e-06, "loss": 0.303, "step": 25901 }, { "epoch": 1.1886558671010967, "grad_norm": 0.4876936078071594, "learning_rate": 6.783653729401386e-06, "loss": 0.3831, "step": 25902 }, { "epoch": 1.1887017576063512, "grad_norm": 0.4490300714969635, "learning_rate": 6.783424670937942e-06, "loss": 0.3035, "step": 25903 }, { "epoch": 1.1887476481116057, "grad_norm": 0.4157750904560089, "learning_rate": 6.783195608185923e-06, "loss": 0.2729, "step": 25904 }, { "epoch": 1.1887935386168602, "grad_norm": 0.5280031561851501, "learning_rate": 6.782966541145881e-06, "loss": 0.4031, "step": 25905 }, { "epoch": 1.1888394291221147, "grad_norm": 0.4866751730442047, "learning_rate": 6.782737469818369e-06, "loss": 0.388, "step": 25906 }, { "epoch": 1.188885319627369, "grad_norm": 0.4589093029499054, "learning_rate": 6.782508394203935e-06, "loss": 0.3296, "step": 25907 }, { "epoch": 1.1889312101326235, "grad_norm": 0.5291438102722168, "learning_rate": 6.782279314303131e-06, "loss": 0.4462, "step": 25908 }, { "epoch": 1.188977100637878, "grad_norm": 0.4655928611755371, "learning_rate": 6.7820502301165085e-06, "loss": 0.3521, "step": 25909 }, { "epoch": 1.1890229911431325, "grad_norm": 0.5223348736763, "learning_rate": 6.781821141644617e-06, "loss": 0.4315, "step": 25910 }, { "epoch": 1.189068881648387, "grad_norm": 0.45392537117004395, "learning_rate": 6.7815920488880085e-06, "loss": 0.3262, "step": 25911 }, { "epoch": 1.1891147721536415, "grad_norm": 0.6376566886901855, "learning_rate": 6.781362951847234e-06, "loss": 0.3814, "step": 25912 }, { "epoch": 1.189160662658896, "grad_norm": 0.4888342320919037, "learning_rate": 6.781133850522841e-06, "loss": 0.3679, "step": 25913 }, { "epoch": 1.1892065531641502, "grad_norm": 0.4981851577758789, "learning_rate": 6.780904744915387e-06, "loss": 0.4325, "step": 25914 }, { "epoch": 1.1892524436694047, "grad_norm": 0.46175143122673035, "learning_rate": 6.780675635025415e-06, "loss": 0.3626, "step": 25915 }, { "epoch": 1.1892983341746592, "grad_norm": 0.4537806808948517, "learning_rate": 6.780446520853482e-06, "loss": 0.2844, "step": 25916 }, { "epoch": 1.1893442246799137, "grad_norm": 0.46106258034706116, "learning_rate": 6.780217402400138e-06, "loss": 0.3602, "step": 25917 }, { "epoch": 1.1893901151851682, "grad_norm": 0.4781891703605652, "learning_rate": 6.779988279665933e-06, "loss": 0.3986, "step": 25918 }, { "epoch": 1.1894360056904227, "grad_norm": 0.5925687551498413, "learning_rate": 6.779759152651417e-06, "loss": 0.3336, "step": 25919 }, { "epoch": 1.189481896195677, "grad_norm": 0.45777544379234314, "learning_rate": 6.7795300213571426e-06, "loss": 0.3273, "step": 25920 }, { "epoch": 1.1895277867009315, "grad_norm": 0.4867023527622223, "learning_rate": 6.77930088578366e-06, "loss": 0.3918, "step": 25921 }, { "epoch": 1.189573677206186, "grad_norm": 0.5215076208114624, "learning_rate": 6.779071745931521e-06, "loss": 0.4414, "step": 25922 }, { "epoch": 1.1896195677114405, "grad_norm": 0.4816834330558777, "learning_rate": 6.778842601801275e-06, "loss": 0.3425, "step": 25923 }, { "epoch": 1.189665458216695, "grad_norm": 0.4663441777229309, "learning_rate": 6.778613453393472e-06, "loss": 0.3286, "step": 25924 }, { "epoch": 1.1897113487219495, "grad_norm": 0.4867030084133148, "learning_rate": 6.778384300708667e-06, "loss": 0.3435, "step": 25925 }, { "epoch": 1.189757239227204, "grad_norm": 0.4495197534561157, "learning_rate": 6.778155143747409e-06, "loss": 0.3137, "step": 25926 }, { "epoch": 1.1898031297324583, "grad_norm": 0.5067280530929565, "learning_rate": 6.77792598251025e-06, "loss": 0.3661, "step": 25927 }, { "epoch": 1.1898490202377128, "grad_norm": 0.44413623213768005, "learning_rate": 6.7776968169977386e-06, "loss": 0.3446, "step": 25928 }, { "epoch": 1.1898949107429673, "grad_norm": 0.46694329380989075, "learning_rate": 6.777467647210429e-06, "loss": 0.3936, "step": 25929 }, { "epoch": 1.1899408012482218, "grad_norm": 0.4870131313800812, "learning_rate": 6.77723847314887e-06, "loss": 0.3717, "step": 25930 }, { "epoch": 1.1899866917534763, "grad_norm": 0.4979531764984131, "learning_rate": 6.777009294813612e-06, "loss": 0.3763, "step": 25931 }, { "epoch": 1.1900325822587308, "grad_norm": 0.4934622347354889, "learning_rate": 6.776780112205209e-06, "loss": 0.4006, "step": 25932 }, { "epoch": 1.190078472763985, "grad_norm": 0.4944249093532562, "learning_rate": 6.77655092532421e-06, "loss": 0.3762, "step": 25933 }, { "epoch": 1.1901243632692395, "grad_norm": 0.49912315607070923, "learning_rate": 6.7763217341711685e-06, "loss": 0.3655, "step": 25934 }, { "epoch": 1.190170253774494, "grad_norm": 0.4628129005432129, "learning_rate": 6.776092538746632e-06, "loss": 0.4001, "step": 25935 }, { "epoch": 1.1902161442797485, "grad_norm": 0.4517259895801544, "learning_rate": 6.775863339051153e-06, "loss": 0.3198, "step": 25936 }, { "epoch": 1.190262034785003, "grad_norm": 0.5350093841552734, "learning_rate": 6.775634135085284e-06, "loss": 0.4095, "step": 25937 }, { "epoch": 1.1903079252902575, "grad_norm": 0.4601711928844452, "learning_rate": 6.775404926849575e-06, "loss": 0.3165, "step": 25938 }, { "epoch": 1.190353815795512, "grad_norm": 0.48809200525283813, "learning_rate": 6.775175714344579e-06, "loss": 0.3554, "step": 25939 }, { "epoch": 1.1903997063007663, "grad_norm": 0.47363296151161194, "learning_rate": 6.774946497570845e-06, "loss": 0.3667, "step": 25940 }, { "epoch": 1.1904455968060208, "grad_norm": 0.41922226548194885, "learning_rate": 6.774717276528923e-06, "loss": 0.2897, "step": 25941 }, { "epoch": 1.1904914873112753, "grad_norm": 0.47930824756622314, "learning_rate": 6.774488051219368e-06, "loss": 0.3724, "step": 25942 }, { "epoch": 1.1905373778165298, "grad_norm": 0.4527236819267273, "learning_rate": 6.774258821642727e-06, "loss": 0.3541, "step": 25943 }, { "epoch": 1.1905832683217843, "grad_norm": 0.47368934750556946, "learning_rate": 6.7740295877995565e-06, "loss": 0.3549, "step": 25944 }, { "epoch": 1.1906291588270386, "grad_norm": 0.4141766130924225, "learning_rate": 6.773800349690404e-06, "loss": 0.2789, "step": 25945 }, { "epoch": 1.190675049332293, "grad_norm": 0.48650825023651123, "learning_rate": 6.77357110731582e-06, "loss": 0.3666, "step": 25946 }, { "epoch": 1.1907209398375476, "grad_norm": 0.4485582113265991, "learning_rate": 6.7733418606763565e-06, "loss": 0.3516, "step": 25947 }, { "epoch": 1.190766830342802, "grad_norm": 0.5679374933242798, "learning_rate": 6.7731126097725685e-06, "loss": 0.3928, "step": 25948 }, { "epoch": 1.1908127208480566, "grad_norm": 0.4647097885608673, "learning_rate": 6.772883354605002e-06, "loss": 0.2742, "step": 25949 }, { "epoch": 1.190858611353311, "grad_norm": 0.49706825613975525, "learning_rate": 6.772654095174212e-06, "loss": 0.3798, "step": 25950 }, { "epoch": 1.1909045018585656, "grad_norm": 0.46696269512176514, "learning_rate": 6.7724248314807485e-06, "loss": 0.3478, "step": 25951 }, { "epoch": 1.1909503923638198, "grad_norm": 0.4528662860393524, "learning_rate": 6.7721955635251615e-06, "loss": 0.3582, "step": 25952 }, { "epoch": 1.1909962828690743, "grad_norm": 0.44142401218414307, "learning_rate": 6.771966291308003e-06, "loss": 0.3061, "step": 25953 }, { "epoch": 1.1910421733743288, "grad_norm": 0.5052987337112427, "learning_rate": 6.771737014829827e-06, "loss": 0.4165, "step": 25954 }, { "epoch": 1.1910880638795833, "grad_norm": 0.5155503153800964, "learning_rate": 6.771507734091182e-06, "loss": 0.3781, "step": 25955 }, { "epoch": 1.1911339543848378, "grad_norm": 0.47700414061546326, "learning_rate": 6.771278449092619e-06, "loss": 0.3653, "step": 25956 }, { "epoch": 1.1911798448900923, "grad_norm": 0.4596863389015198, "learning_rate": 6.77104915983469e-06, "loss": 0.3451, "step": 25957 }, { "epoch": 1.1912257353953466, "grad_norm": 0.49229907989501953, "learning_rate": 6.770819866317948e-06, "loss": 0.3955, "step": 25958 }, { "epoch": 1.191271625900601, "grad_norm": 0.48000815510749817, "learning_rate": 6.770590568542943e-06, "loss": 0.4009, "step": 25959 }, { "epoch": 1.1913175164058556, "grad_norm": 0.4557708501815796, "learning_rate": 6.770361266510226e-06, "loss": 0.3479, "step": 25960 }, { "epoch": 1.19136340691111, "grad_norm": 0.4382645785808563, "learning_rate": 6.770131960220349e-06, "loss": 0.3123, "step": 25961 }, { "epoch": 1.1914092974163646, "grad_norm": 0.4661819040775299, "learning_rate": 6.769902649673864e-06, "loss": 0.3936, "step": 25962 }, { "epoch": 1.191455187921619, "grad_norm": 0.4923458397388458, "learning_rate": 6.7696733348713206e-06, "loss": 0.4192, "step": 25963 }, { "epoch": 1.1915010784268736, "grad_norm": 0.4868899881839752, "learning_rate": 6.769444015813271e-06, "loss": 0.3863, "step": 25964 }, { "epoch": 1.1915469689321279, "grad_norm": 0.4725935161113739, "learning_rate": 6.7692146925002685e-06, "loss": 0.414, "step": 25965 }, { "epoch": 1.1915928594373824, "grad_norm": 0.48157671093940735, "learning_rate": 6.7689853649328625e-06, "loss": 0.4284, "step": 25966 }, { "epoch": 1.1916387499426369, "grad_norm": 0.441914439201355, "learning_rate": 6.7687560331116054e-06, "loss": 0.3121, "step": 25967 }, { "epoch": 1.1916846404478914, "grad_norm": 0.4885229170322418, "learning_rate": 6.768526697037048e-06, "loss": 0.3997, "step": 25968 }, { "epoch": 1.1917305309531459, "grad_norm": 0.44718679785728455, "learning_rate": 6.768297356709741e-06, "loss": 0.285, "step": 25969 }, { "epoch": 1.1917764214584003, "grad_norm": 0.48640960454940796, "learning_rate": 6.768068012130238e-06, "loss": 0.3566, "step": 25970 }, { "epoch": 1.1918223119636546, "grad_norm": 0.49467092752456665, "learning_rate": 6.767838663299089e-06, "loss": 0.4079, "step": 25971 }, { "epoch": 1.1918682024689091, "grad_norm": 0.49577146768569946, "learning_rate": 6.7676093102168475e-06, "loss": 0.4203, "step": 25972 }, { "epoch": 1.1919140929741636, "grad_norm": 0.4572511911392212, "learning_rate": 6.767379952884061e-06, "loss": 0.3519, "step": 25973 }, { "epoch": 1.1919599834794181, "grad_norm": 0.49097150564193726, "learning_rate": 6.767150591301286e-06, "loss": 0.3689, "step": 25974 }, { "epoch": 1.1920058739846726, "grad_norm": 0.44907674193382263, "learning_rate": 6.766921225469069e-06, "loss": 0.3174, "step": 25975 }, { "epoch": 1.1920517644899271, "grad_norm": 0.4941725432872772, "learning_rate": 6.766691855387965e-06, "loss": 0.4432, "step": 25976 }, { "epoch": 1.1920976549951816, "grad_norm": 0.46839800477027893, "learning_rate": 6.766462481058527e-06, "loss": 0.3289, "step": 25977 }, { "epoch": 1.1921435455004359, "grad_norm": 0.44031113386154175, "learning_rate": 6.766233102481301e-06, "loss": 0.3108, "step": 25978 }, { "epoch": 1.1921894360056904, "grad_norm": 0.47919028997421265, "learning_rate": 6.766003719656844e-06, "loss": 0.3907, "step": 25979 }, { "epoch": 1.1922353265109449, "grad_norm": 0.4434223771095276, "learning_rate": 6.765774332585704e-06, "loss": 0.3292, "step": 25980 }, { "epoch": 1.1922812170161994, "grad_norm": 0.457846462726593, "learning_rate": 6.765544941268433e-06, "loss": 0.3299, "step": 25981 }, { "epoch": 1.1923271075214539, "grad_norm": 0.42508599162101746, "learning_rate": 6.765315545705586e-06, "loss": 0.2625, "step": 25982 }, { "epoch": 1.1923729980267082, "grad_norm": 0.5122770667076111, "learning_rate": 6.7650861458977125e-06, "loss": 0.3031, "step": 25983 }, { "epoch": 1.1924188885319627, "grad_norm": 0.4588368535041809, "learning_rate": 6.764856741845362e-06, "loss": 0.3276, "step": 25984 }, { "epoch": 1.1924647790372171, "grad_norm": 0.4710131585597992, "learning_rate": 6.7646273335490895e-06, "loss": 0.3565, "step": 25985 }, { "epoch": 1.1925106695424716, "grad_norm": 0.4992418587207794, "learning_rate": 6.764397921009443e-06, "loss": 0.3728, "step": 25986 }, { "epoch": 1.1925565600477261, "grad_norm": 0.5704121589660645, "learning_rate": 6.764168504226979e-06, "loss": 0.3592, "step": 25987 }, { "epoch": 1.1926024505529806, "grad_norm": 0.5057510733604431, "learning_rate": 6.763939083202247e-06, "loss": 0.4246, "step": 25988 }, { "epoch": 1.1926483410582351, "grad_norm": 0.5328572392463684, "learning_rate": 6.763709657935795e-06, "loss": 0.381, "step": 25989 }, { "epoch": 1.1926942315634896, "grad_norm": 0.4945255517959595, "learning_rate": 6.763480228428179e-06, "loss": 0.4128, "step": 25990 }, { "epoch": 1.192740122068744, "grad_norm": 0.5088258385658264, "learning_rate": 6.763250794679951e-06, "loss": 0.408, "step": 25991 }, { "epoch": 1.1927860125739984, "grad_norm": 0.45027729868888855, "learning_rate": 6.76302135669166e-06, "loss": 0.3003, "step": 25992 }, { "epoch": 1.192831903079253, "grad_norm": 0.5133593678474426, "learning_rate": 6.76279191446386e-06, "loss": 0.4048, "step": 25993 }, { "epoch": 1.1928777935845074, "grad_norm": 0.42958834767341614, "learning_rate": 6.762562467997102e-06, "loss": 0.3216, "step": 25994 }, { "epoch": 1.192923684089762, "grad_norm": 0.48135435581207275, "learning_rate": 6.762333017291937e-06, "loss": 0.4292, "step": 25995 }, { "epoch": 1.1929695745950162, "grad_norm": 0.4664795994758606, "learning_rate": 6.7621035623489175e-06, "loss": 0.3474, "step": 25996 }, { "epoch": 1.1930154651002707, "grad_norm": 0.5002181529998779, "learning_rate": 6.7618741031685945e-06, "loss": 0.3412, "step": 25997 }, { "epoch": 1.1930613556055252, "grad_norm": 0.4589065909385681, "learning_rate": 6.761644639751521e-06, "loss": 0.3443, "step": 25998 }, { "epoch": 1.1931072461107797, "grad_norm": 0.5084084272384644, "learning_rate": 6.761415172098248e-06, "loss": 0.391, "step": 25999 }, { "epoch": 1.1931531366160342, "grad_norm": 0.462502658367157, "learning_rate": 6.761185700209327e-06, "loss": 0.3925, "step": 26000 }, { "epoch": 1.1931990271212887, "grad_norm": 0.504973292350769, "learning_rate": 6.7609562240853095e-06, "loss": 0.415, "step": 26001 }, { "epoch": 1.1932449176265432, "grad_norm": 0.4426838159561157, "learning_rate": 6.76072674372675e-06, "loss": 0.3282, "step": 26002 }, { "epoch": 1.1932908081317974, "grad_norm": 0.45195814967155457, "learning_rate": 6.7604972591341965e-06, "loss": 0.3168, "step": 26003 }, { "epoch": 1.193336698637052, "grad_norm": 0.5188601016998291, "learning_rate": 6.760267770308204e-06, "loss": 0.3947, "step": 26004 }, { "epoch": 1.1933825891423064, "grad_norm": 0.4773806631565094, "learning_rate": 6.760038277249324e-06, "loss": 0.4064, "step": 26005 }, { "epoch": 1.193428479647561, "grad_norm": 0.4679964780807495, "learning_rate": 6.759808779958105e-06, "loss": 0.3622, "step": 26006 }, { "epoch": 1.1934743701528154, "grad_norm": 0.4745357632637024, "learning_rate": 6.759579278435102e-06, "loss": 0.3311, "step": 26007 }, { "epoch": 1.19352026065807, "grad_norm": 0.45525866746902466, "learning_rate": 6.759349772680867e-06, "loss": 0.3717, "step": 26008 }, { "epoch": 1.1935661511633242, "grad_norm": 0.44895413517951965, "learning_rate": 6.7591202626959505e-06, "loss": 0.2865, "step": 26009 }, { "epoch": 1.1936120416685787, "grad_norm": 0.46721509099006653, "learning_rate": 6.758890748480906e-06, "loss": 0.3572, "step": 26010 }, { "epoch": 1.1936579321738332, "grad_norm": 0.49448081851005554, "learning_rate": 6.758661230036283e-06, "loss": 0.4002, "step": 26011 }, { "epoch": 1.1937038226790877, "grad_norm": 0.4414003789424896, "learning_rate": 6.758431707362635e-06, "loss": 0.3598, "step": 26012 }, { "epoch": 1.1937497131843422, "grad_norm": 0.46360763907432556, "learning_rate": 6.758202180460515e-06, "loss": 0.3176, "step": 26013 }, { "epoch": 1.1937956036895967, "grad_norm": 0.5055378079414368, "learning_rate": 6.757972649330471e-06, "loss": 0.3908, "step": 26014 }, { "epoch": 1.1938414941948512, "grad_norm": 0.4641452729701996, "learning_rate": 6.757743113973061e-06, "loss": 0.3369, "step": 26015 }, { "epoch": 1.1938873847001055, "grad_norm": 0.5657639503479004, "learning_rate": 6.757513574388832e-06, "loss": 0.3803, "step": 26016 }, { "epoch": 1.19393327520536, "grad_norm": 0.4505504071712494, "learning_rate": 6.7572840305783375e-06, "loss": 0.2996, "step": 26017 }, { "epoch": 1.1939791657106145, "grad_norm": 0.4757719337940216, "learning_rate": 6.757054482542128e-06, "loss": 0.3505, "step": 26018 }, { "epoch": 1.194025056215869, "grad_norm": 0.5091188549995422, "learning_rate": 6.756824930280759e-06, "loss": 0.4429, "step": 26019 }, { "epoch": 1.1940709467211235, "grad_norm": 0.44546669721603394, "learning_rate": 6.756595373794781e-06, "loss": 0.3109, "step": 26020 }, { "epoch": 1.194116837226378, "grad_norm": 0.43127334117889404, "learning_rate": 6.756365813084743e-06, "loss": 0.3169, "step": 26021 }, { "epoch": 1.1941627277316322, "grad_norm": 0.4896860718727112, "learning_rate": 6.756136248151202e-06, "loss": 0.3917, "step": 26022 }, { "epoch": 1.1942086182368867, "grad_norm": 0.49661147594451904, "learning_rate": 6.755906678994706e-06, "loss": 0.4013, "step": 26023 }, { "epoch": 1.1942545087421412, "grad_norm": 0.511570155620575, "learning_rate": 6.75567710561581e-06, "loss": 0.38, "step": 26024 }, { "epoch": 1.1943003992473957, "grad_norm": 0.43668752908706665, "learning_rate": 6.755447528015062e-06, "loss": 0.2987, "step": 26025 }, { "epoch": 1.1943462897526502, "grad_norm": 0.4372887909412384, "learning_rate": 6.75521794619302e-06, "loss": 0.3072, "step": 26026 }, { "epoch": 1.1943921802579047, "grad_norm": 0.4848904013633728, "learning_rate": 6.754988360150232e-06, "loss": 0.3867, "step": 26027 }, { "epoch": 1.1944380707631592, "grad_norm": 0.4502513110637665, "learning_rate": 6.75475876988725e-06, "loss": 0.3262, "step": 26028 }, { "epoch": 1.1944839612684135, "grad_norm": 0.45882129669189453, "learning_rate": 6.754529175404626e-06, "loss": 0.3628, "step": 26029 }, { "epoch": 1.194529851773668, "grad_norm": 0.47526127099990845, "learning_rate": 6.754299576702915e-06, "loss": 0.38, "step": 26030 }, { "epoch": 1.1945757422789225, "grad_norm": 0.4699185788631439, "learning_rate": 6.7540699737826666e-06, "loss": 0.3601, "step": 26031 }, { "epoch": 1.194621632784177, "grad_norm": 0.4853813946247101, "learning_rate": 6.753840366644433e-06, "loss": 0.3536, "step": 26032 }, { "epoch": 1.1946675232894315, "grad_norm": 0.536471962928772, "learning_rate": 6.753610755288769e-06, "loss": 0.4271, "step": 26033 }, { "epoch": 1.1947134137946858, "grad_norm": 0.4430462718009949, "learning_rate": 6.753381139716222e-06, "loss": 0.338, "step": 26034 }, { "epoch": 1.1947593042999403, "grad_norm": 0.4737832546234131, "learning_rate": 6.753151519927347e-06, "loss": 0.3228, "step": 26035 }, { "epoch": 1.1948051948051948, "grad_norm": 0.49070560932159424, "learning_rate": 6.752921895922697e-06, "loss": 0.4437, "step": 26036 }, { "epoch": 1.1948510853104493, "grad_norm": 0.529140055179596, "learning_rate": 6.752692267702823e-06, "loss": 0.468, "step": 26037 }, { "epoch": 1.1948969758157038, "grad_norm": 0.4984104633331299, "learning_rate": 6.752462635268275e-06, "loss": 0.3946, "step": 26038 }, { "epoch": 1.1949428663209583, "grad_norm": 0.4992067813873291, "learning_rate": 6.752232998619611e-06, "loss": 0.4241, "step": 26039 }, { "epoch": 1.1949887568262128, "grad_norm": 0.4722709059715271, "learning_rate": 6.752003357757377e-06, "loss": 0.3157, "step": 26040 }, { "epoch": 1.195034647331467, "grad_norm": 0.45488083362579346, "learning_rate": 6.751773712682129e-06, "loss": 0.3291, "step": 26041 }, { "epoch": 1.1950805378367215, "grad_norm": 0.44097647070884705, "learning_rate": 6.75154406339442e-06, "loss": 0.3208, "step": 26042 }, { "epoch": 1.195126428341976, "grad_norm": 0.4716513156890869, "learning_rate": 6.751314409894797e-06, "loss": 0.352, "step": 26043 }, { "epoch": 1.1951723188472305, "grad_norm": 0.5054554343223572, "learning_rate": 6.751084752183817e-06, "loss": 0.4051, "step": 26044 }, { "epoch": 1.195218209352485, "grad_norm": 0.5112261772155762, "learning_rate": 6.750855090262031e-06, "loss": 0.4042, "step": 26045 }, { "epoch": 1.1952640998577395, "grad_norm": 0.44552770256996155, "learning_rate": 6.750625424129989e-06, "loss": 0.324, "step": 26046 }, { "epoch": 1.1953099903629938, "grad_norm": 0.4912937581539154, "learning_rate": 6.750395753788248e-06, "loss": 0.4464, "step": 26047 }, { "epoch": 1.1953558808682483, "grad_norm": 0.4276401698589325, "learning_rate": 6.750166079237357e-06, "loss": 0.2715, "step": 26048 }, { "epoch": 1.1954017713735028, "grad_norm": 0.4514125883579254, "learning_rate": 6.749936400477869e-06, "loss": 0.3211, "step": 26049 }, { "epoch": 1.1954476618787573, "grad_norm": 0.48245614767074585, "learning_rate": 6.749706717510336e-06, "loss": 0.3617, "step": 26050 }, { "epoch": 1.1954935523840118, "grad_norm": 0.44183382391929626, "learning_rate": 6.749477030335309e-06, "loss": 0.3156, "step": 26051 }, { "epoch": 1.1955394428892663, "grad_norm": 0.4564642012119293, "learning_rate": 6.749247338953344e-06, "loss": 0.3091, "step": 26052 }, { "epoch": 1.1955853333945208, "grad_norm": 0.453042209148407, "learning_rate": 6.749017643364992e-06, "loss": 0.348, "step": 26053 }, { "epoch": 1.195631223899775, "grad_norm": 0.4675394296646118, "learning_rate": 6.7487879435708024e-06, "loss": 0.3484, "step": 26054 }, { "epoch": 1.1956771144050296, "grad_norm": 0.4503262937068939, "learning_rate": 6.74855823957133e-06, "loss": 0.3312, "step": 26055 }, { "epoch": 1.195723004910284, "grad_norm": 0.4758007824420929, "learning_rate": 6.748328531367128e-06, "loss": 0.3971, "step": 26056 }, { "epoch": 1.1957688954155385, "grad_norm": 0.4598787724971771, "learning_rate": 6.748098818958747e-06, "loss": 0.3515, "step": 26057 }, { "epoch": 1.195814785920793, "grad_norm": 0.424710750579834, "learning_rate": 6.7478691023467416e-06, "loss": 0.2848, "step": 26058 }, { "epoch": 1.1958606764260475, "grad_norm": 0.5040894746780396, "learning_rate": 6.7476393815316606e-06, "loss": 0.3673, "step": 26059 }, { "epoch": 1.1959065669313018, "grad_norm": 0.48820754885673523, "learning_rate": 6.747409656514059e-06, "loss": 0.3732, "step": 26060 }, { "epoch": 1.1959524574365563, "grad_norm": 0.4702054560184479, "learning_rate": 6.74717992729449e-06, "loss": 0.3873, "step": 26061 }, { "epoch": 1.1959983479418108, "grad_norm": 0.4773827791213989, "learning_rate": 6.746950193873503e-06, "loss": 0.3799, "step": 26062 }, { "epoch": 1.1960442384470653, "grad_norm": 0.5489702820777893, "learning_rate": 6.746720456251652e-06, "loss": 0.4463, "step": 26063 }, { "epoch": 1.1960901289523198, "grad_norm": 0.45339131355285645, "learning_rate": 6.746490714429491e-06, "loss": 0.3342, "step": 26064 }, { "epoch": 1.1961360194575743, "grad_norm": 0.47626203298568726, "learning_rate": 6.746260968407569e-06, "loss": 0.3506, "step": 26065 }, { "epoch": 1.1961819099628288, "grad_norm": 0.47827503085136414, "learning_rate": 6.746031218186442e-06, "loss": 0.3896, "step": 26066 }, { "epoch": 1.196227800468083, "grad_norm": 0.4923810362815857, "learning_rate": 6.745801463766661e-06, "loss": 0.3848, "step": 26067 }, { "epoch": 1.1962736909733376, "grad_norm": 0.5078055262565613, "learning_rate": 6.7455717051487775e-06, "loss": 0.4216, "step": 26068 }, { "epoch": 1.196319581478592, "grad_norm": 0.45491594076156616, "learning_rate": 6.745341942333347e-06, "loss": 0.3477, "step": 26069 }, { "epoch": 1.1963654719838466, "grad_norm": 0.4678437113761902, "learning_rate": 6.7451121753209194e-06, "loss": 0.3441, "step": 26070 }, { "epoch": 1.196411362489101, "grad_norm": 0.4935905635356903, "learning_rate": 6.744882404112046e-06, "loss": 0.3586, "step": 26071 }, { "epoch": 1.1964572529943553, "grad_norm": 0.42168205976486206, "learning_rate": 6.744652628707282e-06, "loss": 0.2881, "step": 26072 }, { "epoch": 1.1965031434996098, "grad_norm": 0.4519284665584564, "learning_rate": 6.74442284910718e-06, "loss": 0.3729, "step": 26073 }, { "epoch": 1.1965490340048643, "grad_norm": 0.5226909518241882, "learning_rate": 6.74419306531229e-06, "loss": 0.3775, "step": 26074 }, { "epoch": 1.1965949245101188, "grad_norm": 0.4678846299648285, "learning_rate": 6.743963277323169e-06, "loss": 0.3218, "step": 26075 }, { "epoch": 1.1966408150153733, "grad_norm": 0.4406469762325287, "learning_rate": 6.743733485140363e-06, "loss": 0.2907, "step": 26076 }, { "epoch": 1.1966867055206278, "grad_norm": 0.5737265348434448, "learning_rate": 6.74350368876443e-06, "loss": 0.416, "step": 26077 }, { "epoch": 1.1967325960258823, "grad_norm": 0.4920317232608795, "learning_rate": 6.743273888195922e-06, "loss": 0.367, "step": 26078 }, { "epoch": 1.1967784865311368, "grad_norm": 0.4715070128440857, "learning_rate": 6.743044083435388e-06, "loss": 0.4005, "step": 26079 }, { "epoch": 1.196824377036391, "grad_norm": 0.432295024394989, "learning_rate": 6.742814274483385e-06, "loss": 0.3024, "step": 26080 }, { "epoch": 1.1968702675416456, "grad_norm": 0.49165648221969604, "learning_rate": 6.742584461340465e-06, "loss": 0.4204, "step": 26081 }, { "epoch": 1.1969161580469, "grad_norm": 0.450988233089447, "learning_rate": 6.742354644007176e-06, "loss": 0.3148, "step": 26082 }, { "epoch": 1.1969620485521546, "grad_norm": 0.4914509952068329, "learning_rate": 6.742124822484075e-06, "loss": 0.3889, "step": 26083 }, { "epoch": 1.197007939057409, "grad_norm": 0.46313297748565674, "learning_rate": 6.741894996771714e-06, "loss": 0.3699, "step": 26084 }, { "epoch": 1.1970538295626634, "grad_norm": 0.4792465567588806, "learning_rate": 6.741665166870646e-06, "loss": 0.3049, "step": 26085 }, { "epoch": 1.1970997200679179, "grad_norm": 0.4526558518409729, "learning_rate": 6.741435332781423e-06, "loss": 0.3691, "step": 26086 }, { "epoch": 1.1971456105731724, "grad_norm": 0.4668537676334381, "learning_rate": 6.741205494504597e-06, "loss": 0.3487, "step": 26087 }, { "epoch": 1.1971915010784269, "grad_norm": 0.4602303206920624, "learning_rate": 6.740975652040721e-06, "loss": 0.3887, "step": 26088 }, { "epoch": 1.1972373915836814, "grad_norm": 0.4525696933269501, "learning_rate": 6.740745805390349e-06, "loss": 0.3406, "step": 26089 }, { "epoch": 1.1972832820889359, "grad_norm": 0.4667637050151825, "learning_rate": 6.7405159545540325e-06, "loss": 0.3631, "step": 26090 }, { "epoch": 1.1973291725941904, "grad_norm": 0.4322887659072876, "learning_rate": 6.740286099532324e-06, "loss": 0.302, "step": 26091 }, { "epoch": 1.1973750630994446, "grad_norm": 0.427450031042099, "learning_rate": 6.740056240325776e-06, "loss": 0.306, "step": 26092 }, { "epoch": 1.1974209536046991, "grad_norm": 0.48566752672195435, "learning_rate": 6.739826376934943e-06, "loss": 0.4197, "step": 26093 }, { "epoch": 1.1974668441099536, "grad_norm": 0.46061134338378906, "learning_rate": 6.739596509360376e-06, "loss": 0.3351, "step": 26094 }, { "epoch": 1.1975127346152081, "grad_norm": 0.4719395935535431, "learning_rate": 6.739366637602629e-06, "loss": 0.3298, "step": 26095 }, { "epoch": 1.1975586251204626, "grad_norm": 0.46186017990112305, "learning_rate": 6.739136761662254e-06, "loss": 0.3901, "step": 26096 }, { "epoch": 1.1976045156257171, "grad_norm": 0.46859508752822876, "learning_rate": 6.738906881539804e-06, "loss": 0.3761, "step": 26097 }, { "epoch": 1.1976504061309714, "grad_norm": 0.46848782896995544, "learning_rate": 6.7386769972358324e-06, "loss": 0.3675, "step": 26098 }, { "epoch": 1.197696296636226, "grad_norm": 0.47379419207572937, "learning_rate": 6.7384471087508896e-06, "loss": 0.4077, "step": 26099 }, { "epoch": 1.1977421871414804, "grad_norm": 0.4640737473964691, "learning_rate": 6.738217216085532e-06, "loss": 0.3888, "step": 26100 }, { "epoch": 1.197788077646735, "grad_norm": 0.46600186824798584, "learning_rate": 6.73798731924031e-06, "loss": 0.3275, "step": 26101 }, { "epoch": 1.1978339681519894, "grad_norm": 0.453753799200058, "learning_rate": 6.737757418215777e-06, "loss": 0.3375, "step": 26102 }, { "epoch": 1.197879858657244, "grad_norm": 0.481778085231781, "learning_rate": 6.737527513012485e-06, "loss": 0.3627, "step": 26103 }, { "epoch": 1.1979257491624984, "grad_norm": 0.4675348997116089, "learning_rate": 6.737297603630989e-06, "loss": 0.3111, "step": 26104 }, { "epoch": 1.1979716396677527, "grad_norm": 0.45217376947402954, "learning_rate": 6.73706769007184e-06, "loss": 0.3208, "step": 26105 }, { "epoch": 1.1980175301730072, "grad_norm": 0.4904526472091675, "learning_rate": 6.736837772335592e-06, "loss": 0.3744, "step": 26106 }, { "epoch": 1.1980634206782617, "grad_norm": 0.47067174315452576, "learning_rate": 6.736607850422796e-06, "loss": 0.3475, "step": 26107 }, { "epoch": 1.1981093111835162, "grad_norm": 0.4618355631828308, "learning_rate": 6.7363779243340075e-06, "loss": 0.369, "step": 26108 }, { "epoch": 1.1981552016887707, "grad_norm": 0.47348490357398987, "learning_rate": 6.736147994069779e-06, "loss": 0.4052, "step": 26109 }, { "epoch": 1.1982010921940252, "grad_norm": 0.4952775239944458, "learning_rate": 6.73591805963066e-06, "loss": 0.3741, "step": 26110 }, { "epoch": 1.1982469826992794, "grad_norm": 0.43276041746139526, "learning_rate": 6.735688121017207e-06, "loss": 0.3345, "step": 26111 }, { "epoch": 1.198292873204534, "grad_norm": 0.47764596343040466, "learning_rate": 6.735458178229972e-06, "loss": 0.3609, "step": 26112 }, { "epoch": 1.1983387637097884, "grad_norm": 0.49317577481269836, "learning_rate": 6.735228231269508e-06, "loss": 0.4288, "step": 26113 }, { "epoch": 1.198384654215043, "grad_norm": 0.48359501361846924, "learning_rate": 6.734998280136367e-06, "loss": 0.3724, "step": 26114 }, { "epoch": 1.1984305447202974, "grad_norm": 0.48468223214149475, "learning_rate": 6.734768324831104e-06, "loss": 0.3476, "step": 26115 }, { "epoch": 1.198476435225552, "grad_norm": 0.5027400851249695, "learning_rate": 6.734538365354269e-06, "loss": 0.4159, "step": 26116 }, { "epoch": 1.1985223257308064, "grad_norm": 0.45357784628868103, "learning_rate": 6.734308401706417e-06, "loss": 0.3586, "step": 26117 }, { "epoch": 1.1985682162360607, "grad_norm": 0.47452646493911743, "learning_rate": 6.734078433888102e-06, "loss": 0.3431, "step": 26118 }, { "epoch": 1.1986141067413152, "grad_norm": 0.4654628336429596, "learning_rate": 6.733848461899875e-06, "loss": 0.3542, "step": 26119 }, { "epoch": 1.1986599972465697, "grad_norm": 0.5092141032218933, "learning_rate": 6.733618485742289e-06, "loss": 0.4245, "step": 26120 }, { "epoch": 1.1987058877518242, "grad_norm": 0.5760002732276917, "learning_rate": 6.733388505415899e-06, "loss": 0.447, "step": 26121 }, { "epoch": 1.1987517782570787, "grad_norm": 0.4981926679611206, "learning_rate": 6.7331585209212555e-06, "loss": 0.4144, "step": 26122 }, { "epoch": 1.198797668762333, "grad_norm": 0.4596744775772095, "learning_rate": 6.7329285322589135e-06, "loss": 0.2877, "step": 26123 }, { "epoch": 1.1988435592675875, "grad_norm": 0.4607294201850891, "learning_rate": 6.732698539429425e-06, "loss": 0.3885, "step": 26124 }, { "epoch": 1.198889449772842, "grad_norm": 0.4720578193664551, "learning_rate": 6.732468542433344e-06, "loss": 0.3805, "step": 26125 }, { "epoch": 1.1989353402780965, "grad_norm": 0.4957895576953888, "learning_rate": 6.732238541271222e-06, "loss": 0.3739, "step": 26126 }, { "epoch": 1.198981230783351, "grad_norm": 0.48680728673934937, "learning_rate": 6.732008535943613e-06, "loss": 0.409, "step": 26127 }, { "epoch": 1.1990271212886054, "grad_norm": 0.4790697395801544, "learning_rate": 6.73177852645107e-06, "loss": 0.3344, "step": 26128 }, { "epoch": 1.19907301179386, "grad_norm": 0.4455365538597107, "learning_rate": 6.731548512794147e-06, "loss": 0.3461, "step": 26129 }, { "epoch": 1.1991189022991142, "grad_norm": 0.4932865500450134, "learning_rate": 6.7313184949733965e-06, "loss": 0.3814, "step": 26130 }, { "epoch": 1.1991647928043687, "grad_norm": 0.47092384099960327, "learning_rate": 6.731088472989371e-06, "loss": 0.344, "step": 26131 }, { "epoch": 1.1992106833096232, "grad_norm": 0.4598398506641388, "learning_rate": 6.730858446842624e-06, "loss": 0.3704, "step": 26132 }, { "epoch": 1.1992565738148777, "grad_norm": 0.5634466409683228, "learning_rate": 6.730628416533709e-06, "loss": 0.5104, "step": 26133 }, { "epoch": 1.1993024643201322, "grad_norm": 0.48962754011154175, "learning_rate": 6.730398382063179e-06, "loss": 0.3753, "step": 26134 }, { "epoch": 1.1993483548253867, "grad_norm": 0.45252367854118347, "learning_rate": 6.730168343431587e-06, "loss": 0.3523, "step": 26135 }, { "epoch": 1.199394245330641, "grad_norm": 0.5442420244216919, "learning_rate": 6.729938300639485e-06, "loss": 0.3365, "step": 26136 }, { "epoch": 1.1994401358358955, "grad_norm": 0.46540549397468567, "learning_rate": 6.729708253687428e-06, "loss": 0.3252, "step": 26137 }, { "epoch": 1.19948602634115, "grad_norm": 0.46021541953086853, "learning_rate": 6.729478202575969e-06, "loss": 0.3588, "step": 26138 }, { "epoch": 1.1995319168464045, "grad_norm": 0.5093053579330444, "learning_rate": 6.72924814730566e-06, "loss": 0.3894, "step": 26139 }, { "epoch": 1.199577807351659, "grad_norm": 0.5090047121047974, "learning_rate": 6.729018087877055e-06, "loss": 0.3832, "step": 26140 }, { "epoch": 1.1996236978569135, "grad_norm": 0.47606897354125977, "learning_rate": 6.728788024290709e-06, "loss": 0.3783, "step": 26141 }, { "epoch": 1.199669588362168, "grad_norm": 0.5121620893478394, "learning_rate": 6.728557956547171e-06, "loss": 0.3331, "step": 26142 }, { "epoch": 1.1997154788674222, "grad_norm": 0.4909416735172272, "learning_rate": 6.728327884646999e-06, "loss": 0.3806, "step": 26143 }, { "epoch": 1.1997613693726767, "grad_norm": 0.5029606223106384, "learning_rate": 6.728097808590741e-06, "loss": 0.4086, "step": 26144 }, { "epoch": 1.1998072598779312, "grad_norm": 0.5282790660858154, "learning_rate": 6.727867728378956e-06, "loss": 0.3713, "step": 26145 }, { "epoch": 1.1998531503831857, "grad_norm": 0.4807416796684265, "learning_rate": 6.727637644012193e-06, "loss": 0.3749, "step": 26146 }, { "epoch": 1.1998990408884402, "grad_norm": 0.4605295658111572, "learning_rate": 6.727407555491007e-06, "loss": 0.3555, "step": 26147 }, { "epoch": 1.1999449313936947, "grad_norm": 0.5081165432929993, "learning_rate": 6.7271774628159506e-06, "loss": 0.325, "step": 26148 }, { "epoch": 1.199990821898949, "grad_norm": 0.4737563729286194, "learning_rate": 6.726947365987578e-06, "loss": 0.3631, "step": 26149 }, { "epoch": 1.2000367124042035, "grad_norm": 0.5027435421943665, "learning_rate": 6.7267172650064414e-06, "loss": 0.4103, "step": 26150 }, { "epoch": 1.200082602909458, "grad_norm": 0.4983493983745575, "learning_rate": 6.7264871598730966e-06, "loss": 0.4112, "step": 26151 }, { "epoch": 1.2001284934147125, "grad_norm": 0.4430161416530609, "learning_rate": 6.726257050588094e-06, "loss": 0.3158, "step": 26152 }, { "epoch": 1.200174383919967, "grad_norm": 0.48199135065078735, "learning_rate": 6.726026937151987e-06, "loss": 0.3883, "step": 26153 }, { "epoch": 1.2002202744252215, "grad_norm": 0.46625375747680664, "learning_rate": 6.72579681956533e-06, "loss": 0.3563, "step": 26154 }, { "epoch": 1.200266164930476, "grad_norm": 0.49773257970809937, "learning_rate": 6.725566697828678e-06, "loss": 0.398, "step": 26155 }, { "epoch": 1.2003120554357303, "grad_norm": 0.5204520225524902, "learning_rate": 6.725336571942582e-06, "loss": 0.3651, "step": 26156 }, { "epoch": 1.2003579459409848, "grad_norm": 0.46030673384666443, "learning_rate": 6.725106441907596e-06, "loss": 0.3302, "step": 26157 }, { "epoch": 1.2004038364462393, "grad_norm": 0.4558969736099243, "learning_rate": 6.724876307724273e-06, "loss": 0.3558, "step": 26158 }, { "epoch": 1.2004497269514938, "grad_norm": 0.49725109338760376, "learning_rate": 6.7246461693931665e-06, "loss": 0.4036, "step": 26159 }, { "epoch": 1.2004956174567483, "grad_norm": 0.4795331358909607, "learning_rate": 6.724416026914832e-06, "loss": 0.4136, "step": 26160 }, { "epoch": 1.2005415079620025, "grad_norm": 0.4314914047718048, "learning_rate": 6.7241858802898195e-06, "loss": 0.323, "step": 26161 }, { "epoch": 1.200587398467257, "grad_norm": 0.47409364581108093, "learning_rate": 6.723955729518685e-06, "loss": 0.4046, "step": 26162 }, { "epoch": 1.2006332889725115, "grad_norm": 0.45685744285583496, "learning_rate": 6.7237255746019816e-06, "loss": 0.3049, "step": 26163 }, { "epoch": 1.200679179477766, "grad_norm": 0.42924290895462036, "learning_rate": 6.7234954155402606e-06, "loss": 0.2898, "step": 26164 }, { "epoch": 1.2007250699830205, "grad_norm": 0.44786280393600464, "learning_rate": 6.723265252334077e-06, "loss": 0.3286, "step": 26165 }, { "epoch": 1.200770960488275, "grad_norm": 0.4342515170574188, "learning_rate": 6.723035084983987e-06, "loss": 0.3151, "step": 26166 }, { "epoch": 1.2008168509935295, "grad_norm": 0.4713689088821411, "learning_rate": 6.722804913490539e-06, "loss": 0.4153, "step": 26167 }, { "epoch": 1.200862741498784, "grad_norm": 0.4732412099838257, "learning_rate": 6.7225747378542894e-06, "loss": 0.3737, "step": 26168 }, { "epoch": 1.2009086320040383, "grad_norm": 0.4564620554447174, "learning_rate": 6.722344558075792e-06, "loss": 0.3229, "step": 26169 }, { "epoch": 1.2009545225092928, "grad_norm": 0.5115981101989746, "learning_rate": 6.7221143741555985e-06, "loss": 0.4327, "step": 26170 }, { "epoch": 1.2010004130145473, "grad_norm": 0.4855562746524811, "learning_rate": 6.7218841860942654e-06, "loss": 0.4292, "step": 26171 }, { "epoch": 1.2010463035198018, "grad_norm": 0.4979616403579712, "learning_rate": 6.7216539938923406e-06, "loss": 0.3647, "step": 26172 }, { "epoch": 1.2010921940250563, "grad_norm": 0.4586527943611145, "learning_rate": 6.721423797550385e-06, "loss": 0.3281, "step": 26173 }, { "epoch": 1.2011380845303106, "grad_norm": 0.40455126762390137, "learning_rate": 6.7211935970689464e-06, "loss": 0.255, "step": 26174 }, { "epoch": 1.201183975035565, "grad_norm": 0.4757690727710724, "learning_rate": 6.720963392448581e-06, "loss": 0.356, "step": 26175 }, { "epoch": 1.2012298655408196, "grad_norm": 0.45172712206840515, "learning_rate": 6.720733183689842e-06, "loss": 0.3068, "step": 26176 }, { "epoch": 1.201275756046074, "grad_norm": 0.48597273230552673, "learning_rate": 6.720502970793283e-06, "loss": 0.3982, "step": 26177 }, { "epoch": 1.2013216465513286, "grad_norm": 0.4706517457962036, "learning_rate": 6.720272753759457e-06, "loss": 0.356, "step": 26178 }, { "epoch": 1.201367537056583, "grad_norm": 0.5427975654602051, "learning_rate": 6.720042532588917e-06, "loss": 0.43, "step": 26179 }, { "epoch": 1.2014134275618376, "grad_norm": 0.4759492874145508, "learning_rate": 6.719812307282219e-06, "loss": 0.3326, "step": 26180 }, { "epoch": 1.2014593180670918, "grad_norm": 0.462455689907074, "learning_rate": 6.719582077839915e-06, "loss": 0.3886, "step": 26181 }, { "epoch": 1.2015052085723463, "grad_norm": 0.4499509632587433, "learning_rate": 6.719351844262557e-06, "loss": 0.3279, "step": 26182 }, { "epoch": 1.2015510990776008, "grad_norm": 0.47346004843711853, "learning_rate": 6.719121606550702e-06, "loss": 0.3869, "step": 26183 }, { "epoch": 1.2015969895828553, "grad_norm": 0.45562854409217834, "learning_rate": 6.718891364704902e-06, "loss": 0.3497, "step": 26184 }, { "epoch": 1.2016428800881098, "grad_norm": 0.4594947397708893, "learning_rate": 6.718661118725711e-06, "loss": 0.3634, "step": 26185 }, { "epoch": 1.2016887705933643, "grad_norm": 0.4732590615749359, "learning_rate": 6.7184308686136835e-06, "loss": 0.357, "step": 26186 }, { "epoch": 1.2017346610986186, "grad_norm": 0.43773582577705383, "learning_rate": 6.71820061436937e-06, "loss": 0.2915, "step": 26187 }, { "epoch": 1.201780551603873, "grad_norm": 0.47648435831069946, "learning_rate": 6.717970355993327e-06, "loss": 0.4162, "step": 26188 }, { "epoch": 1.2018264421091276, "grad_norm": 0.49706488847732544, "learning_rate": 6.717740093486109e-06, "loss": 0.4175, "step": 26189 }, { "epoch": 1.201872332614382, "grad_norm": 0.4869595766067505, "learning_rate": 6.7175098268482666e-06, "loss": 0.4158, "step": 26190 }, { "epoch": 1.2019182231196366, "grad_norm": 0.42260971665382385, "learning_rate": 6.717279556080355e-06, "loss": 0.2884, "step": 26191 }, { "epoch": 1.201964113624891, "grad_norm": 0.49824056029319763, "learning_rate": 6.717049281182928e-06, "loss": 0.3658, "step": 26192 }, { "epoch": 1.2020100041301456, "grad_norm": 0.5208796262741089, "learning_rate": 6.716819002156539e-06, "loss": 0.4803, "step": 26193 }, { "epoch": 1.2020558946353999, "grad_norm": 0.44157856702804565, "learning_rate": 6.7165887190017445e-06, "loss": 0.2997, "step": 26194 }, { "epoch": 1.2021017851406544, "grad_norm": 0.46510204672813416, "learning_rate": 6.716358431719095e-06, "loss": 0.4025, "step": 26195 }, { "epoch": 1.2021476756459089, "grad_norm": 0.5099049210548401, "learning_rate": 6.716128140309144e-06, "loss": 0.3442, "step": 26196 }, { "epoch": 1.2021935661511634, "grad_norm": 0.4632403552532196, "learning_rate": 6.715897844772446e-06, "loss": 0.3525, "step": 26197 }, { "epoch": 1.2022394566564178, "grad_norm": 0.4940589666366577, "learning_rate": 6.715667545109556e-06, "loss": 0.4055, "step": 26198 }, { "epoch": 1.2022853471616723, "grad_norm": 0.4864792227745056, "learning_rate": 6.715437241321026e-06, "loss": 0.3509, "step": 26199 }, { "epoch": 1.2023312376669266, "grad_norm": 0.4595732092857361, "learning_rate": 6.7152069334074145e-06, "loss": 0.3577, "step": 26200 }, { "epoch": 1.2023771281721811, "grad_norm": 0.43776094913482666, "learning_rate": 6.714976621369269e-06, "loss": 0.3118, "step": 26201 }, { "epoch": 1.2024230186774356, "grad_norm": 0.45918309688568115, "learning_rate": 6.714746305207146e-06, "loss": 0.3461, "step": 26202 }, { "epoch": 1.2024689091826901, "grad_norm": 0.5382105112075806, "learning_rate": 6.714515984921599e-06, "loss": 0.4213, "step": 26203 }, { "epoch": 1.2025147996879446, "grad_norm": 0.44428038597106934, "learning_rate": 6.714285660513182e-06, "loss": 0.3214, "step": 26204 }, { "epoch": 1.2025606901931991, "grad_norm": 0.49366116523742676, "learning_rate": 6.714055331982451e-06, "loss": 0.3924, "step": 26205 }, { "epoch": 1.2026065806984536, "grad_norm": 0.4439978301525116, "learning_rate": 6.713824999329957e-06, "loss": 0.3111, "step": 26206 }, { "epoch": 1.2026524712037079, "grad_norm": 0.4891974627971649, "learning_rate": 6.713594662556254e-06, "loss": 0.3966, "step": 26207 }, { "epoch": 1.2026983617089624, "grad_norm": 0.42595887184143066, "learning_rate": 6.713364321661898e-06, "loss": 0.3179, "step": 26208 }, { "epoch": 1.2027442522142169, "grad_norm": 0.4877955913543701, "learning_rate": 6.71313397664744e-06, "loss": 0.3652, "step": 26209 }, { "epoch": 1.2027901427194714, "grad_norm": 0.4850307106971741, "learning_rate": 6.712903627513436e-06, "loss": 0.4385, "step": 26210 }, { "epoch": 1.2028360332247259, "grad_norm": 0.46195217967033386, "learning_rate": 6.712673274260441e-06, "loss": 0.3334, "step": 26211 }, { "epoch": 1.2028819237299802, "grad_norm": 0.46414753794670105, "learning_rate": 6.712442916889005e-06, "loss": 0.3513, "step": 26212 }, { "epoch": 1.2029278142352346, "grad_norm": 0.5122057795524597, "learning_rate": 6.712212555399684e-06, "loss": 0.4358, "step": 26213 }, { "epoch": 1.2029737047404891, "grad_norm": 0.4761491119861603, "learning_rate": 6.711982189793034e-06, "loss": 0.3571, "step": 26214 }, { "epoch": 1.2030195952457436, "grad_norm": 0.5160741209983826, "learning_rate": 6.711751820069607e-06, "loss": 0.4231, "step": 26215 }, { "epoch": 1.2030654857509981, "grad_norm": 0.43938082456588745, "learning_rate": 6.711521446229956e-06, "loss": 0.3292, "step": 26216 }, { "epoch": 1.2031113762562526, "grad_norm": 0.5030285716056824, "learning_rate": 6.711291068274637e-06, "loss": 0.4002, "step": 26217 }, { "epoch": 1.2031572667615071, "grad_norm": 0.49793127179145813, "learning_rate": 6.711060686204203e-06, "loss": 0.4086, "step": 26218 }, { "epoch": 1.2032031572667614, "grad_norm": 0.4553736746311188, "learning_rate": 6.710830300019207e-06, "loss": 0.3506, "step": 26219 }, { "epoch": 1.203249047772016, "grad_norm": 0.4768955707550049, "learning_rate": 6.710599909720206e-06, "loss": 0.3294, "step": 26220 }, { "epoch": 1.2032949382772704, "grad_norm": 0.47765055298805237, "learning_rate": 6.71036951530775e-06, "loss": 0.328, "step": 26221 }, { "epoch": 1.203340828782525, "grad_norm": 0.49707701802253723, "learning_rate": 6.710139116782397e-06, "loss": 0.4255, "step": 26222 }, { "epoch": 1.2033867192877794, "grad_norm": 0.5067639946937561, "learning_rate": 6.709908714144697e-06, "loss": 0.3856, "step": 26223 }, { "epoch": 1.203432609793034, "grad_norm": 0.5027065277099609, "learning_rate": 6.7096783073952074e-06, "loss": 0.4194, "step": 26224 }, { "epoch": 1.2034785002982882, "grad_norm": 0.4665835499763489, "learning_rate": 6.709447896534483e-06, "loss": 0.3319, "step": 26225 }, { "epoch": 1.2035243908035427, "grad_norm": 0.4455587565898895, "learning_rate": 6.709217481563073e-06, "loss": 0.3343, "step": 26226 }, { "epoch": 1.2035702813087972, "grad_norm": 0.5324321985244751, "learning_rate": 6.708987062481536e-06, "loss": 0.4107, "step": 26227 }, { "epoch": 1.2036161718140517, "grad_norm": 0.4528292417526245, "learning_rate": 6.708756639290424e-06, "loss": 0.3384, "step": 26228 }, { "epoch": 1.2036620623193062, "grad_norm": 0.5095716714859009, "learning_rate": 6.7085262119902905e-06, "loss": 0.4019, "step": 26229 }, { "epoch": 1.2037079528245607, "grad_norm": 0.4291735887527466, "learning_rate": 6.7082957805816916e-06, "loss": 0.2982, "step": 26230 }, { "epoch": 1.2037538433298152, "grad_norm": 0.46168407797813416, "learning_rate": 6.708065345065181e-06, "loss": 0.3472, "step": 26231 }, { "epoch": 1.2037997338350694, "grad_norm": 0.5247793793678284, "learning_rate": 6.707834905441312e-06, "loss": 0.43, "step": 26232 }, { "epoch": 1.203845624340324, "grad_norm": 0.4251473844051361, "learning_rate": 6.707604461710639e-06, "loss": 0.3184, "step": 26233 }, { "epoch": 1.2038915148455784, "grad_norm": 0.4705866277217865, "learning_rate": 6.707374013873716e-06, "loss": 0.3601, "step": 26234 }, { "epoch": 1.203937405350833, "grad_norm": 0.46238499879837036, "learning_rate": 6.707143561931097e-06, "loss": 0.3592, "step": 26235 }, { "epoch": 1.2039832958560874, "grad_norm": 0.4436958134174347, "learning_rate": 6.706913105883338e-06, "loss": 0.2951, "step": 26236 }, { "epoch": 1.204029186361342, "grad_norm": 0.5135268568992615, "learning_rate": 6.70668264573099e-06, "loss": 0.3812, "step": 26237 }, { "epoch": 1.2040750768665962, "grad_norm": 0.40967366099357605, "learning_rate": 6.7064521814746105e-06, "loss": 0.2781, "step": 26238 }, { "epoch": 1.2041209673718507, "grad_norm": 0.4583433270454407, "learning_rate": 6.706221713114752e-06, "loss": 0.3389, "step": 26239 }, { "epoch": 1.2041668578771052, "grad_norm": 0.4914122521877289, "learning_rate": 6.7059912406519674e-06, "loss": 0.3995, "step": 26240 }, { "epoch": 1.2042127483823597, "grad_norm": 0.4742193818092346, "learning_rate": 6.705760764086813e-06, "loss": 0.3831, "step": 26241 }, { "epoch": 1.2042586388876142, "grad_norm": 0.47057563066482544, "learning_rate": 6.705530283419842e-06, "loss": 0.3893, "step": 26242 }, { "epoch": 1.2043045293928687, "grad_norm": 0.45723989605903625, "learning_rate": 6.705299798651609e-06, "loss": 0.3587, "step": 26243 }, { "epoch": 1.2043504198981232, "grad_norm": 0.4973413944244385, "learning_rate": 6.705069309782668e-06, "loss": 0.3728, "step": 26244 }, { "epoch": 1.2043963104033775, "grad_norm": 0.45604830980300903, "learning_rate": 6.704838816813575e-06, "loss": 0.3525, "step": 26245 }, { "epoch": 1.204442200908632, "grad_norm": 0.4676319360733032, "learning_rate": 6.704608319744881e-06, "loss": 0.3323, "step": 26246 }, { "epoch": 1.2044880914138865, "grad_norm": 0.46418237686157227, "learning_rate": 6.7043778185771415e-06, "loss": 0.3319, "step": 26247 }, { "epoch": 1.204533981919141, "grad_norm": 0.4311172664165497, "learning_rate": 6.7041473133109135e-06, "loss": 0.3214, "step": 26248 }, { "epoch": 1.2045798724243955, "grad_norm": 0.45412370562553406, "learning_rate": 6.703916803946749e-06, "loss": 0.3558, "step": 26249 }, { "epoch": 1.2046257629296497, "grad_norm": 0.4197354316711426, "learning_rate": 6.7036862904852e-06, "loss": 0.2994, "step": 26250 }, { "epoch": 1.2046716534349042, "grad_norm": 0.45043888688087463, "learning_rate": 6.703455772926825e-06, "loss": 0.2961, "step": 26251 }, { "epoch": 1.2047175439401587, "grad_norm": 0.4745086431503296, "learning_rate": 6.703225251272174e-06, "loss": 0.324, "step": 26252 }, { "epoch": 1.2047634344454132, "grad_norm": 0.44831332564353943, "learning_rate": 6.7029947255218064e-06, "loss": 0.3126, "step": 26253 }, { "epoch": 1.2048093249506677, "grad_norm": 0.4630235731601715, "learning_rate": 6.702764195676272e-06, "loss": 0.3543, "step": 26254 }, { "epoch": 1.2048552154559222, "grad_norm": 0.4879850149154663, "learning_rate": 6.70253366173613e-06, "loss": 0.3889, "step": 26255 }, { "epoch": 1.2049011059611767, "grad_norm": 0.423740416765213, "learning_rate": 6.702303123701931e-06, "loss": 0.29, "step": 26256 }, { "epoch": 1.2049469964664312, "grad_norm": 0.43236997723579407, "learning_rate": 6.702072581574228e-06, "loss": 0.3418, "step": 26257 }, { "epoch": 1.2049928869716855, "grad_norm": 0.4861438572406769, "learning_rate": 6.701842035353579e-06, "loss": 0.3615, "step": 26258 }, { "epoch": 1.20503877747694, "grad_norm": 0.44887417554855347, "learning_rate": 6.701611485040538e-06, "loss": 0.3003, "step": 26259 }, { "epoch": 1.2050846679821945, "grad_norm": 0.48098137974739075, "learning_rate": 6.701380930635658e-06, "loss": 0.3792, "step": 26260 }, { "epoch": 1.205130558487449, "grad_norm": 0.45623332262039185, "learning_rate": 6.701150372139493e-06, "loss": 0.3218, "step": 26261 }, { "epoch": 1.2051764489927035, "grad_norm": 0.4856277406215668, "learning_rate": 6.700919809552598e-06, "loss": 0.3228, "step": 26262 }, { "epoch": 1.2052223394979578, "grad_norm": 0.46467652916908264, "learning_rate": 6.700689242875529e-06, "loss": 0.3404, "step": 26263 }, { "epoch": 1.2052682300032123, "grad_norm": 0.43331435322761536, "learning_rate": 6.7004586721088374e-06, "loss": 0.2844, "step": 26264 }, { "epoch": 1.2053141205084668, "grad_norm": 0.46413710713386536, "learning_rate": 6.700228097253081e-06, "loss": 0.3791, "step": 26265 }, { "epoch": 1.2053600110137213, "grad_norm": 0.48609262704849243, "learning_rate": 6.699997518308813e-06, "loss": 0.4106, "step": 26266 }, { "epoch": 1.2054059015189758, "grad_norm": 0.4860956370830536, "learning_rate": 6.699766935276586e-06, "loss": 0.337, "step": 26267 }, { "epoch": 1.2054517920242303, "grad_norm": 0.4540322422981262, "learning_rate": 6.699536348156956e-06, "loss": 0.3252, "step": 26268 }, { "epoch": 1.2054976825294847, "grad_norm": 0.43985068798065186, "learning_rate": 6.699305756950478e-06, "loss": 0.3024, "step": 26269 }, { "epoch": 1.205543573034739, "grad_norm": 0.48652422428131104, "learning_rate": 6.699075161657707e-06, "loss": 0.3808, "step": 26270 }, { "epoch": 1.2055894635399935, "grad_norm": 0.4419287145137787, "learning_rate": 6.698844562279195e-06, "loss": 0.3619, "step": 26271 }, { "epoch": 1.205635354045248, "grad_norm": 0.5074459910392761, "learning_rate": 6.6986139588154985e-06, "loss": 0.3877, "step": 26272 }, { "epoch": 1.2056812445505025, "grad_norm": 0.5021203756332397, "learning_rate": 6.6983833512671715e-06, "loss": 0.3737, "step": 26273 }, { "epoch": 1.205727135055757, "grad_norm": 0.465021550655365, "learning_rate": 6.698152739634769e-06, "loss": 0.3671, "step": 26274 }, { "epoch": 1.2057730255610115, "grad_norm": 0.44124123454093933, "learning_rate": 6.697922123918843e-06, "loss": 0.3016, "step": 26275 }, { "epoch": 1.2058189160662658, "grad_norm": 0.4807187020778656, "learning_rate": 6.697691504119953e-06, "loss": 0.3704, "step": 26276 }, { "epoch": 1.2058648065715203, "grad_norm": 0.4972396790981293, "learning_rate": 6.69746088023865e-06, "loss": 0.4012, "step": 26277 }, { "epoch": 1.2059106970767748, "grad_norm": 0.4556688070297241, "learning_rate": 6.697230252275488e-06, "loss": 0.3282, "step": 26278 }, { "epoch": 1.2059565875820293, "grad_norm": 0.5092169046401978, "learning_rate": 6.696999620231024e-06, "loss": 0.3938, "step": 26279 }, { "epoch": 1.2060024780872838, "grad_norm": 0.5076898336410522, "learning_rate": 6.6967689841058115e-06, "loss": 0.4275, "step": 26280 }, { "epoch": 1.2060483685925383, "grad_norm": 0.43630626797676086, "learning_rate": 6.696538343900405e-06, "loss": 0.2756, "step": 26281 }, { "epoch": 1.2060942590977928, "grad_norm": 0.5017236471176147, "learning_rate": 6.69630769961536e-06, "loss": 0.3919, "step": 26282 }, { "epoch": 1.206140149603047, "grad_norm": 0.46831414103507996, "learning_rate": 6.696077051251228e-06, "loss": 0.4012, "step": 26283 }, { "epoch": 1.2061860401083015, "grad_norm": 0.48872166872024536, "learning_rate": 6.695846398808568e-06, "loss": 0.3529, "step": 26284 }, { "epoch": 1.206231930613556, "grad_norm": 0.44164133071899414, "learning_rate": 6.695615742287932e-06, "loss": 0.3428, "step": 26285 }, { "epoch": 1.2062778211188105, "grad_norm": 0.4228169918060303, "learning_rate": 6.695385081689874e-06, "loss": 0.2839, "step": 26286 }, { "epoch": 1.206323711624065, "grad_norm": 0.5638141632080078, "learning_rate": 6.695154417014953e-06, "loss": 0.4096, "step": 26287 }, { "epoch": 1.2063696021293195, "grad_norm": 0.4505161643028259, "learning_rate": 6.69492374826372e-06, "loss": 0.3216, "step": 26288 }, { "epoch": 1.2064154926345738, "grad_norm": 0.49491262435913086, "learning_rate": 6.694693075436728e-06, "loss": 0.3865, "step": 26289 }, { "epoch": 1.2064613831398283, "grad_norm": 0.4188593327999115, "learning_rate": 6.694462398534536e-06, "loss": 0.2587, "step": 26290 }, { "epoch": 1.2065072736450828, "grad_norm": 0.4225176274776459, "learning_rate": 6.694231717557696e-06, "loss": 0.3194, "step": 26291 }, { "epoch": 1.2065531641503373, "grad_norm": 0.475223571062088, "learning_rate": 6.694001032506763e-06, "loss": 0.3147, "step": 26292 }, { "epoch": 1.2065990546555918, "grad_norm": 0.4677279591560364, "learning_rate": 6.693770343382293e-06, "loss": 0.4191, "step": 26293 }, { "epoch": 1.2066449451608463, "grad_norm": 0.4817289113998413, "learning_rate": 6.6935396501848395e-06, "loss": 0.404, "step": 26294 }, { "epoch": 1.2066908356661008, "grad_norm": 0.44928985834121704, "learning_rate": 6.693308952914958e-06, "loss": 0.3348, "step": 26295 }, { "epoch": 1.206736726171355, "grad_norm": 0.47700172662734985, "learning_rate": 6.693078251573202e-06, "loss": 0.3802, "step": 26296 }, { "epoch": 1.2067826166766096, "grad_norm": 0.4978356659412384, "learning_rate": 6.692847546160127e-06, "loss": 0.39, "step": 26297 }, { "epoch": 1.206828507181864, "grad_norm": 0.4507928788661957, "learning_rate": 6.692616836676289e-06, "loss": 0.3445, "step": 26298 }, { "epoch": 1.2068743976871186, "grad_norm": 0.45175814628601074, "learning_rate": 6.692386123122243e-06, "loss": 0.3249, "step": 26299 }, { "epoch": 1.206920288192373, "grad_norm": 0.45762115716934204, "learning_rate": 6.692155405498539e-06, "loss": 0.3458, "step": 26300 }, { "epoch": 1.2069661786976273, "grad_norm": 0.4887836277484894, "learning_rate": 6.691924683805737e-06, "loss": 0.3541, "step": 26301 }, { "epoch": 1.2070120692028818, "grad_norm": 0.5671867728233337, "learning_rate": 6.6916939580443914e-06, "loss": 0.4129, "step": 26302 }, { "epoch": 1.2070579597081363, "grad_norm": 0.44942793250083923, "learning_rate": 6.691463228215055e-06, "loss": 0.3406, "step": 26303 }, { "epoch": 1.2071038502133908, "grad_norm": 0.46860775351524353, "learning_rate": 6.6912324943182836e-06, "loss": 0.3897, "step": 26304 }, { "epoch": 1.2071497407186453, "grad_norm": 0.4592318534851074, "learning_rate": 6.691001756354631e-06, "loss": 0.3756, "step": 26305 }, { "epoch": 1.2071956312238998, "grad_norm": 0.41784992814064026, "learning_rate": 6.690771014324652e-06, "loss": 0.2671, "step": 26306 }, { "epoch": 1.2072415217291543, "grad_norm": 0.49648523330688477, "learning_rate": 6.690540268228904e-06, "loss": 0.3559, "step": 26307 }, { "epoch": 1.2072874122344086, "grad_norm": 0.4858267605304718, "learning_rate": 6.690309518067939e-06, "loss": 0.3568, "step": 26308 }, { "epoch": 1.207333302739663, "grad_norm": 0.4511782228946686, "learning_rate": 6.690078763842315e-06, "loss": 0.321, "step": 26309 }, { "epoch": 1.2073791932449176, "grad_norm": 0.4388591945171356, "learning_rate": 6.689848005552584e-06, "loss": 0.3106, "step": 26310 }, { "epoch": 1.207425083750172, "grad_norm": 0.5721585750579834, "learning_rate": 6.6896172431993e-06, "loss": 0.3226, "step": 26311 }, { "epoch": 1.2074709742554266, "grad_norm": 0.4585697054862976, "learning_rate": 6.689386476783022e-06, "loss": 0.3681, "step": 26312 }, { "epoch": 1.207516864760681, "grad_norm": 0.5368145108222961, "learning_rate": 6.689155706304302e-06, "loss": 0.4756, "step": 26313 }, { "epoch": 1.2075627552659354, "grad_norm": 0.42462533712387085, "learning_rate": 6.688924931763696e-06, "loss": 0.2955, "step": 26314 }, { "epoch": 1.2076086457711899, "grad_norm": 0.44715964794158936, "learning_rate": 6.688694153161757e-06, "loss": 0.3368, "step": 26315 }, { "epoch": 1.2076545362764444, "grad_norm": 0.45989862084388733, "learning_rate": 6.688463370499044e-06, "loss": 0.354, "step": 26316 }, { "epoch": 1.2077004267816989, "grad_norm": 0.45057427883148193, "learning_rate": 6.688232583776108e-06, "loss": 0.3022, "step": 26317 }, { "epoch": 1.2077463172869534, "grad_norm": 0.4942217469215393, "learning_rate": 6.688001792993506e-06, "loss": 0.3757, "step": 26318 }, { "epoch": 1.2077922077922079, "grad_norm": 0.46702468395233154, "learning_rate": 6.687770998151792e-06, "loss": 0.3216, "step": 26319 }, { "epoch": 1.2078380982974624, "grad_norm": 0.5165749788284302, "learning_rate": 6.687540199251522e-06, "loss": 0.4367, "step": 26320 }, { "epoch": 1.2078839888027166, "grad_norm": 0.5066128969192505, "learning_rate": 6.687309396293251e-06, "loss": 0.3999, "step": 26321 }, { "epoch": 1.2079298793079711, "grad_norm": 0.5232498645782471, "learning_rate": 6.687078589277531e-06, "loss": 0.4476, "step": 26322 }, { "epoch": 1.2079757698132256, "grad_norm": 0.47634729743003845, "learning_rate": 6.686847778204921e-06, "loss": 0.3649, "step": 26323 }, { "epoch": 1.2080216603184801, "grad_norm": 0.4598873555660248, "learning_rate": 6.686616963075975e-06, "loss": 0.3733, "step": 26324 }, { "epoch": 1.2080675508237346, "grad_norm": 0.4656367003917694, "learning_rate": 6.686386143891248e-06, "loss": 0.3467, "step": 26325 }, { "epoch": 1.2081134413289891, "grad_norm": 0.45353811979293823, "learning_rate": 6.686155320651293e-06, "loss": 0.3194, "step": 26326 }, { "epoch": 1.2081593318342434, "grad_norm": 0.7006787657737732, "learning_rate": 6.6859244933566665e-06, "loss": 0.325, "step": 26327 }, { "epoch": 1.208205222339498, "grad_norm": 0.4548848271369934, "learning_rate": 6.6856936620079234e-06, "loss": 0.3721, "step": 26328 }, { "epoch": 1.2082511128447524, "grad_norm": 0.4923316538333893, "learning_rate": 6.6854628266056195e-06, "loss": 0.4009, "step": 26329 }, { "epoch": 1.208297003350007, "grad_norm": 0.4743734300136566, "learning_rate": 6.685231987150311e-06, "loss": 0.3556, "step": 26330 }, { "epoch": 1.2083428938552614, "grad_norm": 0.48585203289985657, "learning_rate": 6.685001143642551e-06, "loss": 0.3862, "step": 26331 }, { "epoch": 1.2083887843605159, "grad_norm": 0.4953347146511078, "learning_rate": 6.684770296082894e-06, "loss": 0.4365, "step": 26332 }, { "epoch": 1.2084346748657704, "grad_norm": 0.45943862199783325, "learning_rate": 6.684539444471897e-06, "loss": 0.3838, "step": 26333 }, { "epoch": 1.2084805653710247, "grad_norm": 0.4836080074310303, "learning_rate": 6.684308588810113e-06, "loss": 0.3811, "step": 26334 }, { "epoch": 1.2085264558762792, "grad_norm": 0.4816010296344757, "learning_rate": 6.684077729098099e-06, "loss": 0.3048, "step": 26335 }, { "epoch": 1.2085723463815337, "grad_norm": 0.47682085633277893, "learning_rate": 6.683846865336412e-06, "loss": 0.348, "step": 26336 }, { "epoch": 1.2086182368867882, "grad_norm": 0.445208340883255, "learning_rate": 6.683615997525602e-06, "loss": 0.2998, "step": 26337 }, { "epoch": 1.2086641273920427, "grad_norm": 0.4780639111995697, "learning_rate": 6.683385125666227e-06, "loss": 0.3817, "step": 26338 }, { "epoch": 1.208710017897297, "grad_norm": 0.45561856031417847, "learning_rate": 6.6831542497588435e-06, "loss": 0.3545, "step": 26339 }, { "epoch": 1.2087559084025514, "grad_norm": 0.5210508704185486, "learning_rate": 6.682923369804003e-06, "loss": 0.4559, "step": 26340 }, { "epoch": 1.208801798907806, "grad_norm": 0.48558399081230164, "learning_rate": 6.682692485802265e-06, "loss": 0.4038, "step": 26341 }, { "epoch": 1.2088476894130604, "grad_norm": 0.45005717873573303, "learning_rate": 6.6824615977541815e-06, "loss": 0.3425, "step": 26342 }, { "epoch": 1.208893579918315, "grad_norm": 0.4151028096675873, "learning_rate": 6.68223070566031e-06, "loss": 0.2643, "step": 26343 }, { "epoch": 1.2089394704235694, "grad_norm": 0.45483630895614624, "learning_rate": 6.681999809521203e-06, "loss": 0.3502, "step": 26344 }, { "epoch": 1.208985360928824, "grad_norm": 0.4719693958759308, "learning_rate": 6.681768909337418e-06, "loss": 0.3489, "step": 26345 }, { "epoch": 1.2090312514340784, "grad_norm": 0.5175364017486572, "learning_rate": 6.68153800510951e-06, "loss": 0.4042, "step": 26346 }, { "epoch": 1.2090771419393327, "grad_norm": 0.408557653427124, "learning_rate": 6.681307096838034e-06, "loss": 0.2672, "step": 26347 }, { "epoch": 1.2091230324445872, "grad_norm": 0.46874916553497314, "learning_rate": 6.681076184523545e-06, "loss": 0.383, "step": 26348 }, { "epoch": 1.2091689229498417, "grad_norm": 0.43633827567100525, "learning_rate": 6.680845268166597e-06, "loss": 0.3444, "step": 26349 }, { "epoch": 1.2092148134550962, "grad_norm": 0.5227893590927124, "learning_rate": 6.680614347767748e-06, "loss": 0.4102, "step": 26350 }, { "epoch": 1.2092607039603507, "grad_norm": 0.4707088768482208, "learning_rate": 6.680383423327551e-06, "loss": 0.3518, "step": 26351 }, { "epoch": 1.209306594465605, "grad_norm": 0.46726468205451965, "learning_rate": 6.6801524948465635e-06, "loss": 0.3454, "step": 26352 }, { "epoch": 1.2093524849708595, "grad_norm": 0.48585352301597595, "learning_rate": 6.679921562325339e-06, "loss": 0.3918, "step": 26353 }, { "epoch": 1.209398375476114, "grad_norm": 0.5078784227371216, "learning_rate": 6.679690625764432e-06, "loss": 0.438, "step": 26354 }, { "epoch": 1.2094442659813684, "grad_norm": 0.46684470772743225, "learning_rate": 6.679459685164401e-06, "loss": 0.3509, "step": 26355 }, { "epoch": 1.209490156486623, "grad_norm": 0.4665757417678833, "learning_rate": 6.679228740525798e-06, "loss": 0.4004, "step": 26356 }, { "epoch": 1.2095360469918774, "grad_norm": 0.463304340839386, "learning_rate": 6.678997791849181e-06, "loss": 0.3455, "step": 26357 }, { "epoch": 1.209581937497132, "grad_norm": 0.4518081247806549, "learning_rate": 6.678766839135104e-06, "loss": 0.3337, "step": 26358 }, { "epoch": 1.2096278280023862, "grad_norm": 0.4891918897628784, "learning_rate": 6.678535882384122e-06, "loss": 0.3666, "step": 26359 }, { "epoch": 1.2096737185076407, "grad_norm": 0.4654916822910309, "learning_rate": 6.678304921596791e-06, "loss": 0.3643, "step": 26360 }, { "epoch": 1.2097196090128952, "grad_norm": 0.47407469153404236, "learning_rate": 6.678073956773668e-06, "loss": 0.3233, "step": 26361 }, { "epoch": 1.2097654995181497, "grad_norm": 0.49482840299606323, "learning_rate": 6.677842987915304e-06, "loss": 0.3442, "step": 26362 }, { "epoch": 1.2098113900234042, "grad_norm": 0.4636666476726532, "learning_rate": 6.677612015022259e-06, "loss": 0.3388, "step": 26363 }, { "epoch": 1.2098572805286587, "grad_norm": 0.45837557315826416, "learning_rate": 6.677381038095087e-06, "loss": 0.3071, "step": 26364 }, { "epoch": 1.209903171033913, "grad_norm": 0.4642752408981323, "learning_rate": 6.677150057134341e-06, "loss": 0.3054, "step": 26365 }, { "epoch": 1.2099490615391675, "grad_norm": 0.4554455876350403, "learning_rate": 6.67691907214058e-06, "loss": 0.319, "step": 26366 }, { "epoch": 1.209994952044422, "grad_norm": 0.4928278625011444, "learning_rate": 6.676688083114358e-06, "loss": 0.4069, "step": 26367 }, { "epoch": 1.2100408425496765, "grad_norm": 0.5136226415634155, "learning_rate": 6.67645709005623e-06, "loss": 0.3769, "step": 26368 }, { "epoch": 1.210086733054931, "grad_norm": 0.48957252502441406, "learning_rate": 6.676226092966751e-06, "loss": 0.4138, "step": 26369 }, { "epoch": 1.2101326235601855, "grad_norm": 0.4844408333301544, "learning_rate": 6.6759950918464775e-06, "loss": 0.3755, "step": 26370 }, { "epoch": 1.21017851406544, "grad_norm": 0.45277372002601624, "learning_rate": 6.675764086695965e-06, "loss": 0.3772, "step": 26371 }, { "epoch": 1.2102244045706942, "grad_norm": 0.4847615361213684, "learning_rate": 6.67553307751577e-06, "loss": 0.3897, "step": 26372 }, { "epoch": 1.2102702950759487, "grad_norm": 0.442344069480896, "learning_rate": 6.675302064306445e-06, "loss": 0.3397, "step": 26373 }, { "epoch": 1.2103161855812032, "grad_norm": 0.549284040927887, "learning_rate": 6.675071047068549e-06, "loss": 0.4431, "step": 26374 }, { "epoch": 1.2103620760864577, "grad_norm": 0.4563557803630829, "learning_rate": 6.674840025802635e-06, "loss": 0.322, "step": 26375 }, { "epoch": 1.2104079665917122, "grad_norm": 0.456889808177948, "learning_rate": 6.674609000509258e-06, "loss": 0.3434, "step": 26376 }, { "epoch": 1.2104538570969667, "grad_norm": 0.49003541469573975, "learning_rate": 6.674377971188975e-06, "loss": 0.3741, "step": 26377 }, { "epoch": 1.210499747602221, "grad_norm": 0.4436534345149994, "learning_rate": 6.674146937842343e-06, "loss": 0.3431, "step": 26378 }, { "epoch": 1.2105456381074755, "grad_norm": 0.4744948446750641, "learning_rate": 6.673915900469916e-06, "loss": 0.3959, "step": 26379 }, { "epoch": 1.21059152861273, "grad_norm": 0.4869026243686676, "learning_rate": 6.673684859072248e-06, "loss": 0.4364, "step": 26380 }, { "epoch": 1.2106374191179845, "grad_norm": 0.4801062047481537, "learning_rate": 6.6734538136498985e-06, "loss": 0.3892, "step": 26381 }, { "epoch": 1.210683309623239, "grad_norm": 0.4716678261756897, "learning_rate": 6.673222764203418e-06, "loss": 0.3588, "step": 26382 }, { "epoch": 1.2107292001284935, "grad_norm": 0.4320415258407593, "learning_rate": 6.672991710733367e-06, "loss": 0.2798, "step": 26383 }, { "epoch": 1.210775090633748, "grad_norm": 0.45236414670944214, "learning_rate": 6.6727606532402975e-06, "loss": 0.3026, "step": 26384 }, { "epoch": 1.2108209811390023, "grad_norm": 0.48730623722076416, "learning_rate": 6.6725295917247675e-06, "loss": 0.3428, "step": 26385 }, { "epoch": 1.2108668716442568, "grad_norm": 0.4927803874015808, "learning_rate": 6.672298526187332e-06, "loss": 0.3779, "step": 26386 }, { "epoch": 1.2109127621495113, "grad_norm": 0.4604911804199219, "learning_rate": 6.672067456628545e-06, "loss": 0.3406, "step": 26387 }, { "epoch": 1.2109586526547658, "grad_norm": 0.47935548424720764, "learning_rate": 6.671836383048964e-06, "loss": 0.3807, "step": 26388 }, { "epoch": 1.2110045431600203, "grad_norm": 0.5358847379684448, "learning_rate": 6.671605305449145e-06, "loss": 0.4129, "step": 26389 }, { "epoch": 1.2110504336652745, "grad_norm": 0.42999929189682007, "learning_rate": 6.671374223829642e-06, "loss": 0.2877, "step": 26390 }, { "epoch": 1.211096324170529, "grad_norm": 0.4419626295566559, "learning_rate": 6.671143138191012e-06, "loss": 0.3006, "step": 26391 }, { "epoch": 1.2111422146757835, "grad_norm": 0.4754124581813812, "learning_rate": 6.67091204853381e-06, "loss": 0.3742, "step": 26392 }, { "epoch": 1.211188105181038, "grad_norm": 0.5523090362548828, "learning_rate": 6.67068095485859e-06, "loss": 0.5309, "step": 26393 }, { "epoch": 1.2112339956862925, "grad_norm": 0.5166401863098145, "learning_rate": 6.670449857165911e-06, "loss": 0.3643, "step": 26394 }, { "epoch": 1.211279886191547, "grad_norm": 0.4745306074619293, "learning_rate": 6.670218755456329e-06, "loss": 0.3361, "step": 26395 }, { "epoch": 1.2113257766968015, "grad_norm": 0.5197217464447021, "learning_rate": 6.6699876497303974e-06, "loss": 0.429, "step": 26396 }, { "epoch": 1.2113716672020558, "grad_norm": 0.4642806947231293, "learning_rate": 6.669756539988671e-06, "loss": 0.3348, "step": 26397 }, { "epoch": 1.2114175577073103, "grad_norm": 0.5018004775047302, "learning_rate": 6.6695254262317075e-06, "loss": 0.4091, "step": 26398 }, { "epoch": 1.2114634482125648, "grad_norm": 0.4532126188278198, "learning_rate": 6.6692943084600625e-06, "loss": 0.3072, "step": 26399 }, { "epoch": 1.2115093387178193, "grad_norm": 0.4658781886100769, "learning_rate": 6.669063186674293e-06, "loss": 0.3126, "step": 26400 }, { "epoch": 1.2115552292230738, "grad_norm": 0.43039998412132263, "learning_rate": 6.668832060874952e-06, "loss": 0.3163, "step": 26401 }, { "epoch": 1.2116011197283283, "grad_norm": 0.5052329897880554, "learning_rate": 6.668600931062597e-06, "loss": 0.4467, "step": 26402 }, { "epoch": 1.2116470102335826, "grad_norm": 0.44631054997444153, "learning_rate": 6.6683697972377825e-06, "loss": 0.3454, "step": 26403 }, { "epoch": 1.211692900738837, "grad_norm": 0.4222322702407837, "learning_rate": 6.6681386594010645e-06, "loss": 0.2758, "step": 26404 }, { "epoch": 1.2117387912440916, "grad_norm": 0.46659547090530396, "learning_rate": 6.667907517553001e-06, "loss": 0.3681, "step": 26405 }, { "epoch": 1.211784681749346, "grad_norm": 0.44925379753112793, "learning_rate": 6.6676763716941465e-06, "loss": 0.3212, "step": 26406 }, { "epoch": 1.2118305722546006, "grad_norm": 0.5020961761474609, "learning_rate": 6.667445221825056e-06, "loss": 0.3701, "step": 26407 }, { "epoch": 1.211876462759855, "grad_norm": 0.4451366662979126, "learning_rate": 6.667214067946286e-06, "loss": 0.3001, "step": 26408 }, { "epoch": 1.2119223532651096, "grad_norm": 0.46006685495376587, "learning_rate": 6.666982910058393e-06, "loss": 0.3403, "step": 26409 }, { "epoch": 1.2119682437703638, "grad_norm": 0.45795106887817383, "learning_rate": 6.666751748161932e-06, "loss": 0.3149, "step": 26410 }, { "epoch": 1.2120141342756183, "grad_norm": 0.464626282453537, "learning_rate": 6.666520582257457e-06, "loss": 0.3928, "step": 26411 }, { "epoch": 1.2120600247808728, "grad_norm": 0.4776332378387451, "learning_rate": 6.66628941234553e-06, "loss": 0.3455, "step": 26412 }, { "epoch": 1.2121059152861273, "grad_norm": 0.40015533566474915, "learning_rate": 6.6660582384267e-06, "loss": 0.2512, "step": 26413 }, { "epoch": 1.2121518057913818, "grad_norm": 0.4789672791957855, "learning_rate": 6.665827060501527e-06, "loss": 0.3265, "step": 26414 }, { "epoch": 1.2121976962966363, "grad_norm": 0.4617554843425751, "learning_rate": 6.665595878570565e-06, "loss": 0.3525, "step": 26415 }, { "epoch": 1.2122435868018906, "grad_norm": 0.4716511070728302, "learning_rate": 6.665364692634369e-06, "loss": 0.3519, "step": 26416 }, { "epoch": 1.212289477307145, "grad_norm": 0.44423145055770874, "learning_rate": 6.6651335026934995e-06, "loss": 0.3406, "step": 26417 }, { "epoch": 1.2123353678123996, "grad_norm": 0.47908568382263184, "learning_rate": 6.6649023087485084e-06, "loss": 0.4083, "step": 26418 }, { "epoch": 1.212381258317654, "grad_norm": 0.5334530472755432, "learning_rate": 6.664671110799952e-06, "loss": 0.4623, "step": 26419 }, { "epoch": 1.2124271488229086, "grad_norm": 0.4641796350479126, "learning_rate": 6.664439908848388e-06, "loss": 0.3281, "step": 26420 }, { "epoch": 1.212473039328163, "grad_norm": 0.4517514109611511, "learning_rate": 6.66420870289437e-06, "loss": 0.3612, "step": 26421 }, { "epoch": 1.2125189298334176, "grad_norm": 0.46616753935813904, "learning_rate": 6.663977492938456e-06, "loss": 0.2958, "step": 26422 }, { "epoch": 1.2125648203386719, "grad_norm": 0.45648351311683655, "learning_rate": 6.663746278981203e-06, "loss": 0.3633, "step": 26423 }, { "epoch": 1.2126107108439264, "grad_norm": 0.48374369740486145, "learning_rate": 6.663515061023162e-06, "loss": 0.3769, "step": 26424 }, { "epoch": 1.2126566013491809, "grad_norm": 0.5126987099647522, "learning_rate": 6.6632838390648935e-06, "loss": 0.3928, "step": 26425 }, { "epoch": 1.2127024918544353, "grad_norm": 0.4751146733760834, "learning_rate": 6.663052613106954e-06, "loss": 0.4092, "step": 26426 }, { "epoch": 1.2127483823596898, "grad_norm": 0.45421040058135986, "learning_rate": 6.662821383149895e-06, "loss": 0.3283, "step": 26427 }, { "epoch": 1.2127942728649441, "grad_norm": 0.4889911711215973, "learning_rate": 6.662590149194278e-06, "loss": 0.3985, "step": 26428 }, { "epoch": 1.2128401633701986, "grad_norm": 0.5051060318946838, "learning_rate": 6.662358911240656e-06, "loss": 0.447, "step": 26429 }, { "epoch": 1.2128860538754531, "grad_norm": 0.4766351580619812, "learning_rate": 6.662127669289584e-06, "loss": 0.3786, "step": 26430 }, { "epoch": 1.2129319443807076, "grad_norm": 0.452815443277359, "learning_rate": 6.66189642334162e-06, "loss": 0.3179, "step": 26431 }, { "epoch": 1.2129778348859621, "grad_norm": 0.4612380862236023, "learning_rate": 6.66166517339732e-06, "loss": 0.3611, "step": 26432 }, { "epoch": 1.2130237253912166, "grad_norm": 0.451398104429245, "learning_rate": 6.66143391945724e-06, "loss": 0.3378, "step": 26433 }, { "epoch": 1.213069615896471, "grad_norm": 0.4431014358997345, "learning_rate": 6.6612026615219375e-06, "loss": 0.3556, "step": 26434 }, { "epoch": 1.2131155064017256, "grad_norm": 0.4857947528362274, "learning_rate": 6.660971399591965e-06, "loss": 0.3406, "step": 26435 }, { "epoch": 1.2131613969069799, "grad_norm": 0.45997658371925354, "learning_rate": 6.660740133667879e-06, "loss": 0.3117, "step": 26436 }, { "epoch": 1.2132072874122344, "grad_norm": 0.48422735929489136, "learning_rate": 6.660508863750239e-06, "loss": 0.3749, "step": 26437 }, { "epoch": 1.2132531779174889, "grad_norm": 0.6273134350776672, "learning_rate": 6.660277589839598e-06, "loss": 0.3967, "step": 26438 }, { "epoch": 1.2132990684227434, "grad_norm": 0.4786001741886139, "learning_rate": 6.660046311936514e-06, "loss": 0.3251, "step": 26439 }, { "epoch": 1.2133449589279979, "grad_norm": 0.5093516111373901, "learning_rate": 6.6598150300415434e-06, "loss": 0.3985, "step": 26440 }, { "epoch": 1.2133908494332521, "grad_norm": 0.44140133261680603, "learning_rate": 6.65958374415524e-06, "loss": 0.2994, "step": 26441 }, { "epoch": 1.2134367399385066, "grad_norm": 0.4560340642929077, "learning_rate": 6.659352454278163e-06, "loss": 0.3328, "step": 26442 }, { "epoch": 1.2134826304437611, "grad_norm": 0.4609656035900116, "learning_rate": 6.659121160410866e-06, "loss": 0.3109, "step": 26443 }, { "epoch": 1.2135285209490156, "grad_norm": 0.4403704106807709, "learning_rate": 6.658889862553907e-06, "loss": 0.2964, "step": 26444 }, { "epoch": 1.2135744114542701, "grad_norm": 0.4830181896686554, "learning_rate": 6.65865856070784e-06, "loss": 0.3805, "step": 26445 }, { "epoch": 1.2136203019595246, "grad_norm": 0.4448684751987457, "learning_rate": 6.658427254873225e-06, "loss": 0.3411, "step": 26446 }, { "epoch": 1.2136661924647791, "grad_norm": 0.5083604454994202, "learning_rate": 6.658195945050614e-06, "loss": 0.3782, "step": 26447 }, { "epoch": 1.2137120829700334, "grad_norm": 0.4937281608581543, "learning_rate": 6.6579646312405656e-06, "loss": 0.422, "step": 26448 }, { "epoch": 1.213757973475288, "grad_norm": 0.4181835353374481, "learning_rate": 6.657733313443635e-06, "loss": 0.2713, "step": 26449 }, { "epoch": 1.2138038639805424, "grad_norm": 0.47744104266166687, "learning_rate": 6.65750199166038e-06, "loss": 0.3566, "step": 26450 }, { "epoch": 1.213849754485797, "grad_norm": 0.49983495473861694, "learning_rate": 6.657270665891357e-06, "loss": 0.3754, "step": 26451 }, { "epoch": 1.2138956449910514, "grad_norm": 0.4469482898712158, "learning_rate": 6.657039336137119e-06, "loss": 0.3372, "step": 26452 }, { "epoch": 1.213941535496306, "grad_norm": 0.44555771350860596, "learning_rate": 6.656808002398224e-06, "loss": 0.339, "step": 26453 }, { "epoch": 1.2139874260015602, "grad_norm": 0.42917266488075256, "learning_rate": 6.656576664675229e-06, "loss": 0.3027, "step": 26454 }, { "epoch": 1.2140333165068147, "grad_norm": 0.47455111145973206, "learning_rate": 6.656345322968691e-06, "loss": 0.353, "step": 26455 }, { "epoch": 1.2140792070120692, "grad_norm": 0.4238443970680237, "learning_rate": 6.656113977279165e-06, "loss": 0.2895, "step": 26456 }, { "epoch": 1.2141250975173237, "grad_norm": 0.4935763478279114, "learning_rate": 6.655882627607207e-06, "loss": 0.4059, "step": 26457 }, { "epoch": 1.2141709880225782, "grad_norm": 0.43882501125335693, "learning_rate": 6.655651273953374e-06, "loss": 0.3077, "step": 26458 }, { "epoch": 1.2142168785278327, "grad_norm": 0.5318296551704407, "learning_rate": 6.655419916318222e-06, "loss": 0.4453, "step": 26459 }, { "epoch": 1.2142627690330872, "grad_norm": 0.4326549470424652, "learning_rate": 6.655188554702309e-06, "loss": 0.3145, "step": 26460 }, { "epoch": 1.2143086595383414, "grad_norm": 0.47563329339027405, "learning_rate": 6.654957189106189e-06, "loss": 0.3672, "step": 26461 }, { "epoch": 1.214354550043596, "grad_norm": 0.48194050788879395, "learning_rate": 6.6547258195304186e-06, "loss": 0.3967, "step": 26462 }, { "epoch": 1.2144004405488504, "grad_norm": 0.5419139266014099, "learning_rate": 6.6544944459755554e-06, "loss": 0.496, "step": 26463 }, { "epoch": 1.214446331054105, "grad_norm": 0.48219239711761475, "learning_rate": 6.654263068442155e-06, "loss": 0.363, "step": 26464 }, { "epoch": 1.2144922215593594, "grad_norm": 0.5022880434989929, "learning_rate": 6.654031686930775e-06, "loss": 0.3624, "step": 26465 }, { "epoch": 1.214538112064614, "grad_norm": 0.4882560670375824, "learning_rate": 6.65380030144197e-06, "loss": 0.3481, "step": 26466 }, { "epoch": 1.2145840025698682, "grad_norm": 0.5415197014808655, "learning_rate": 6.653568911976297e-06, "loss": 0.428, "step": 26467 }, { "epoch": 1.2146298930751227, "grad_norm": 0.478347510099411, "learning_rate": 6.653337518534314e-06, "loss": 0.3416, "step": 26468 }, { "epoch": 1.2146757835803772, "grad_norm": 0.49029624462127686, "learning_rate": 6.653106121116575e-06, "loss": 0.3324, "step": 26469 }, { "epoch": 1.2147216740856317, "grad_norm": 0.4428369998931885, "learning_rate": 6.652874719723637e-06, "loss": 0.2993, "step": 26470 }, { "epoch": 1.2147675645908862, "grad_norm": 0.4888448417186737, "learning_rate": 6.652643314356058e-06, "loss": 0.3229, "step": 26471 }, { "epoch": 1.2148134550961407, "grad_norm": 0.5043996572494507, "learning_rate": 6.652411905014393e-06, "loss": 0.4181, "step": 26472 }, { "epoch": 1.2148593456013952, "grad_norm": 0.4533562958240509, "learning_rate": 6.652180491699199e-06, "loss": 0.3288, "step": 26473 }, { "epoch": 1.2149052361066495, "grad_norm": 0.44803428649902344, "learning_rate": 6.651949074411032e-06, "loss": 0.3367, "step": 26474 }, { "epoch": 1.214951126611904, "grad_norm": 0.4290565252304077, "learning_rate": 6.651717653150448e-06, "loss": 0.272, "step": 26475 }, { "epoch": 1.2149970171171585, "grad_norm": 0.40704265236854553, "learning_rate": 6.651486227918005e-06, "loss": 0.2749, "step": 26476 }, { "epoch": 1.215042907622413, "grad_norm": 0.49190792441368103, "learning_rate": 6.6512547987142596e-06, "loss": 0.384, "step": 26477 }, { "epoch": 1.2150887981276675, "grad_norm": 0.7012590765953064, "learning_rate": 6.651023365539769e-06, "loss": 0.4232, "step": 26478 }, { "epoch": 1.2151346886329217, "grad_norm": 0.4812316596508026, "learning_rate": 6.650791928395085e-06, "loss": 0.4585, "step": 26479 }, { "epoch": 1.2151805791381762, "grad_norm": 0.43747609853744507, "learning_rate": 6.650560487280769e-06, "loss": 0.3326, "step": 26480 }, { "epoch": 1.2152264696434307, "grad_norm": 0.47373586893081665, "learning_rate": 6.650329042197375e-06, "loss": 0.3477, "step": 26481 }, { "epoch": 1.2152723601486852, "grad_norm": 0.4390137195587158, "learning_rate": 6.650097593145463e-06, "loss": 0.2851, "step": 26482 }, { "epoch": 1.2153182506539397, "grad_norm": 0.4655608534812927, "learning_rate": 6.6498661401255845e-06, "loss": 0.3319, "step": 26483 }, { "epoch": 1.2153641411591942, "grad_norm": 0.4914844334125519, "learning_rate": 6.6496346831383e-06, "loss": 0.3812, "step": 26484 }, { "epoch": 1.2154100316644487, "grad_norm": 0.5391208529472351, "learning_rate": 6.649403222184164e-06, "loss": 0.4293, "step": 26485 }, { "epoch": 1.215455922169703, "grad_norm": 0.4957500398159027, "learning_rate": 6.649171757263734e-06, "loss": 0.4034, "step": 26486 }, { "epoch": 1.2155018126749575, "grad_norm": 0.5317193865776062, "learning_rate": 6.648940288377565e-06, "loss": 0.3771, "step": 26487 }, { "epoch": 1.215547703180212, "grad_norm": 0.5028627514839172, "learning_rate": 6.648708815526218e-06, "loss": 0.4635, "step": 26488 }, { "epoch": 1.2155935936854665, "grad_norm": 0.44702839851379395, "learning_rate": 6.648477338710246e-06, "loss": 0.3613, "step": 26489 }, { "epoch": 1.215639484190721, "grad_norm": 0.47877851128578186, "learning_rate": 6.648245857930205e-06, "loss": 0.3658, "step": 26490 }, { "epoch": 1.2156853746959755, "grad_norm": 0.48605743050575256, "learning_rate": 6.648014373186653e-06, "loss": 0.3923, "step": 26491 }, { "epoch": 1.2157312652012298, "grad_norm": 0.4536574184894562, "learning_rate": 6.647782884480148e-06, "loss": 0.3288, "step": 26492 }, { "epoch": 1.2157771557064843, "grad_norm": 0.4504633843898773, "learning_rate": 6.647551391811243e-06, "loss": 0.2962, "step": 26493 }, { "epoch": 1.2158230462117388, "grad_norm": 0.5358149409294128, "learning_rate": 6.6473198951805e-06, "loss": 0.4343, "step": 26494 }, { "epoch": 1.2158689367169933, "grad_norm": 0.4692898094654083, "learning_rate": 6.647088394588469e-06, "loss": 0.3549, "step": 26495 }, { "epoch": 1.2159148272222478, "grad_norm": 0.4898407757282257, "learning_rate": 6.646856890035711e-06, "loss": 0.359, "step": 26496 }, { "epoch": 1.2159607177275022, "grad_norm": 0.484628289937973, "learning_rate": 6.646625381522784e-06, "loss": 0.3601, "step": 26497 }, { "epoch": 1.2160066082327567, "grad_norm": 0.4548763632774353, "learning_rate": 6.64639386905024e-06, "loss": 0.3018, "step": 26498 }, { "epoch": 1.216052498738011, "grad_norm": 0.4199632704257965, "learning_rate": 6.646162352618641e-06, "loss": 0.3319, "step": 26499 }, { "epoch": 1.2160983892432655, "grad_norm": 0.43360844254493713, "learning_rate": 6.645930832228541e-06, "loss": 0.3152, "step": 26500 }, { "epoch": 1.21614427974852, "grad_norm": 0.5200902223587036, "learning_rate": 6.645699307880496e-06, "loss": 0.4344, "step": 26501 }, { "epoch": 1.2161901702537745, "grad_norm": 0.46995633840560913, "learning_rate": 6.645467779575062e-06, "loss": 0.359, "step": 26502 }, { "epoch": 1.216236060759029, "grad_norm": 0.42737650871276855, "learning_rate": 6.6452362473127985e-06, "loss": 0.3075, "step": 26503 }, { "epoch": 1.2162819512642835, "grad_norm": 0.4868408143520355, "learning_rate": 6.645004711094262e-06, "loss": 0.3962, "step": 26504 }, { "epoch": 1.2163278417695378, "grad_norm": 0.48662471771240234, "learning_rate": 6.644773170920008e-06, "loss": 0.3435, "step": 26505 }, { "epoch": 1.2163737322747923, "grad_norm": 0.4803045988082886, "learning_rate": 6.644541626790592e-06, "loss": 0.4428, "step": 26506 }, { "epoch": 1.2164196227800468, "grad_norm": 0.4640991687774658, "learning_rate": 6.644310078706573e-06, "loss": 0.3423, "step": 26507 }, { "epoch": 1.2164655132853013, "grad_norm": 0.48399603366851807, "learning_rate": 6.64407852666851e-06, "loss": 0.4255, "step": 26508 }, { "epoch": 1.2165114037905558, "grad_norm": 0.4583907723426819, "learning_rate": 6.643846970676953e-06, "loss": 0.3853, "step": 26509 }, { "epoch": 1.2165572942958103, "grad_norm": 0.48331916332244873, "learning_rate": 6.643615410732465e-06, "loss": 0.4226, "step": 26510 }, { "epoch": 1.2166031848010648, "grad_norm": 0.5015382766723633, "learning_rate": 6.6433838468356e-06, "loss": 0.4226, "step": 26511 }, { "epoch": 1.216649075306319, "grad_norm": 0.4789227843284607, "learning_rate": 6.643152278986916e-06, "loss": 0.3551, "step": 26512 }, { "epoch": 1.2166949658115735, "grad_norm": 0.44056299328804016, "learning_rate": 6.642920707186968e-06, "loss": 0.3149, "step": 26513 }, { "epoch": 1.216740856316828, "grad_norm": 0.4695565104484558, "learning_rate": 6.642689131436317e-06, "loss": 0.4132, "step": 26514 }, { "epoch": 1.2167867468220825, "grad_norm": 0.4440353214740753, "learning_rate": 6.642457551735516e-06, "loss": 0.3034, "step": 26515 }, { "epoch": 1.216832637327337, "grad_norm": 0.46822142601013184, "learning_rate": 6.642225968085123e-06, "loss": 0.3553, "step": 26516 }, { "epoch": 1.2168785278325913, "grad_norm": 0.4997451901435852, "learning_rate": 6.641994380485693e-06, "loss": 0.441, "step": 26517 }, { "epoch": 1.2169244183378458, "grad_norm": 0.4446333944797516, "learning_rate": 6.641762788937786e-06, "loss": 0.3269, "step": 26518 }, { "epoch": 1.2169703088431003, "grad_norm": 0.45543307065963745, "learning_rate": 6.641531193441959e-06, "loss": 0.4177, "step": 26519 }, { "epoch": 1.2170161993483548, "grad_norm": 0.4097939431667328, "learning_rate": 6.641299593998765e-06, "loss": 0.2536, "step": 26520 }, { "epoch": 1.2170620898536093, "grad_norm": 0.5092282891273499, "learning_rate": 6.641067990608765e-06, "loss": 0.4529, "step": 26521 }, { "epoch": 1.2171079803588638, "grad_norm": 0.47216612100601196, "learning_rate": 6.6408363832725154e-06, "loss": 0.4249, "step": 26522 }, { "epoch": 1.2171538708641183, "grad_norm": 0.44188278913497925, "learning_rate": 6.640604771990571e-06, "loss": 0.3356, "step": 26523 }, { "epoch": 1.2171997613693728, "grad_norm": 0.43510234355926514, "learning_rate": 6.640373156763489e-06, "loss": 0.3104, "step": 26524 }, { "epoch": 1.217245651874627, "grad_norm": 0.4596518278121948, "learning_rate": 6.640141537591829e-06, "loss": 0.283, "step": 26525 }, { "epoch": 1.2172915423798816, "grad_norm": 0.4891955256462097, "learning_rate": 6.639909914476145e-06, "loss": 0.4092, "step": 26526 }, { "epoch": 1.217337432885136, "grad_norm": 0.5185647010803223, "learning_rate": 6.639678287416995e-06, "loss": 0.4304, "step": 26527 }, { "epoch": 1.2173833233903906, "grad_norm": 0.4624726474285126, "learning_rate": 6.639446656414937e-06, "loss": 0.3279, "step": 26528 }, { "epoch": 1.217429213895645, "grad_norm": 0.4289744794368744, "learning_rate": 6.639215021470526e-06, "loss": 0.2879, "step": 26529 }, { "epoch": 1.2174751044008993, "grad_norm": 0.4355056881904602, "learning_rate": 6.638983382584322e-06, "loss": 0.2924, "step": 26530 }, { "epoch": 1.2175209949061538, "grad_norm": 0.4906013011932373, "learning_rate": 6.63875173975688e-06, "loss": 0.4147, "step": 26531 }, { "epoch": 1.2175668854114083, "grad_norm": 0.4621824324131012, "learning_rate": 6.638520092988757e-06, "loss": 0.369, "step": 26532 }, { "epoch": 1.2176127759166628, "grad_norm": 0.4605434536933899, "learning_rate": 6.638288442280511e-06, "loss": 0.367, "step": 26533 }, { "epoch": 1.2176586664219173, "grad_norm": 0.4299205541610718, "learning_rate": 6.638056787632696e-06, "loss": 0.2991, "step": 26534 }, { "epoch": 1.2177045569271718, "grad_norm": 0.44541507959365845, "learning_rate": 6.637825129045873e-06, "loss": 0.3184, "step": 26535 }, { "epoch": 1.2177504474324263, "grad_norm": 0.45722100138664246, "learning_rate": 6.637593466520598e-06, "loss": 0.3374, "step": 26536 }, { "epoch": 1.2177963379376806, "grad_norm": 0.45860129594802856, "learning_rate": 6.637361800057427e-06, "loss": 0.3596, "step": 26537 }, { "epoch": 1.217842228442935, "grad_norm": 0.47997209429740906, "learning_rate": 6.637130129656917e-06, "loss": 0.3826, "step": 26538 }, { "epoch": 1.2178881189481896, "grad_norm": 0.48910272121429443, "learning_rate": 6.636898455319627e-06, "loss": 0.3881, "step": 26539 }, { "epoch": 1.217934009453444, "grad_norm": 0.49934619665145874, "learning_rate": 6.636666777046111e-06, "loss": 0.4209, "step": 26540 }, { "epoch": 1.2179798999586986, "grad_norm": 0.501197874546051, "learning_rate": 6.636435094836928e-06, "loss": 0.4366, "step": 26541 }, { "epoch": 1.218025790463953, "grad_norm": 0.4443577826023102, "learning_rate": 6.636203408692637e-06, "loss": 0.3429, "step": 26542 }, { "epoch": 1.2180716809692074, "grad_norm": 0.4614202678203583, "learning_rate": 6.635971718613794e-06, "loss": 0.323, "step": 26543 }, { "epoch": 1.2181175714744619, "grad_norm": 0.5034525394439697, "learning_rate": 6.6357400246009514e-06, "loss": 0.4231, "step": 26544 }, { "epoch": 1.2181634619797164, "grad_norm": 0.47992390394210815, "learning_rate": 6.635508326654674e-06, "loss": 0.3489, "step": 26545 }, { "epoch": 1.2182093524849709, "grad_norm": 0.4550305902957916, "learning_rate": 6.635276624775513e-06, "loss": 0.3266, "step": 26546 }, { "epoch": 1.2182552429902254, "grad_norm": 0.47528770565986633, "learning_rate": 6.63504491896403e-06, "loss": 0.3431, "step": 26547 }, { "epoch": 1.2183011334954799, "grad_norm": 0.4603108763694763, "learning_rate": 6.634813209220779e-06, "loss": 0.3525, "step": 26548 }, { "epoch": 1.2183470240007344, "grad_norm": 0.5018152594566345, "learning_rate": 6.634581495546317e-06, "loss": 0.4565, "step": 26549 }, { "epoch": 1.2183929145059886, "grad_norm": 0.4865402281284332, "learning_rate": 6.634349777941203e-06, "loss": 0.3871, "step": 26550 }, { "epoch": 1.2184388050112431, "grad_norm": 0.4930585026741028, "learning_rate": 6.634118056405994e-06, "loss": 0.4138, "step": 26551 }, { "epoch": 1.2184846955164976, "grad_norm": 0.5254998803138733, "learning_rate": 6.633886330941246e-06, "loss": 0.5326, "step": 26552 }, { "epoch": 1.2185305860217521, "grad_norm": 0.4398391842842102, "learning_rate": 6.633654601547519e-06, "loss": 0.3597, "step": 26553 }, { "epoch": 1.2185764765270066, "grad_norm": 0.4414946138858795, "learning_rate": 6.633422868225367e-06, "loss": 0.3108, "step": 26554 }, { "epoch": 1.2186223670322611, "grad_norm": 0.5112548470497131, "learning_rate": 6.6331911309753475e-06, "loss": 0.4108, "step": 26555 }, { "epoch": 1.2186682575375154, "grad_norm": 0.46837103366851807, "learning_rate": 6.63295938979802e-06, "loss": 0.357, "step": 26556 }, { "epoch": 1.21871414804277, "grad_norm": 0.44416484236717224, "learning_rate": 6.63272764469394e-06, "loss": 0.3515, "step": 26557 }, { "epoch": 1.2187600385480244, "grad_norm": 0.41245585680007935, "learning_rate": 6.632495895663667e-06, "loss": 0.2781, "step": 26558 }, { "epoch": 1.218805929053279, "grad_norm": 0.5223581790924072, "learning_rate": 6.632264142707755e-06, "loss": 0.4338, "step": 26559 }, { "epoch": 1.2188518195585334, "grad_norm": 0.5180321335792542, "learning_rate": 6.632032385826763e-06, "loss": 0.4397, "step": 26560 }, { "epoch": 1.2188977100637879, "grad_norm": 0.5114618539810181, "learning_rate": 6.631800625021248e-06, "loss": 0.4103, "step": 26561 }, { "epoch": 1.2189436005690424, "grad_norm": 0.48380526900291443, "learning_rate": 6.631568860291768e-06, "loss": 0.3766, "step": 26562 }, { "epoch": 1.2189894910742967, "grad_norm": 0.5175414681434631, "learning_rate": 6.631337091638878e-06, "loss": 0.3806, "step": 26563 }, { "epoch": 1.2190353815795512, "grad_norm": 0.4947982728481293, "learning_rate": 6.631105319063141e-06, "loss": 0.4123, "step": 26564 }, { "epoch": 1.2190812720848057, "grad_norm": 0.45331743359565735, "learning_rate": 6.6308735425651085e-06, "loss": 0.3472, "step": 26565 }, { "epoch": 1.2191271625900602, "grad_norm": 0.45677581429481506, "learning_rate": 6.630641762145339e-06, "loss": 0.3695, "step": 26566 }, { "epoch": 1.2191730530953147, "grad_norm": 0.4524634778499603, "learning_rate": 6.630409977804392e-06, "loss": 0.3537, "step": 26567 }, { "epoch": 1.219218943600569, "grad_norm": 0.47633957862854004, "learning_rate": 6.630178189542822e-06, "loss": 0.3775, "step": 26568 }, { "epoch": 1.2192648341058234, "grad_norm": 0.48561084270477295, "learning_rate": 6.629946397361189e-06, "loss": 0.4026, "step": 26569 }, { "epoch": 1.219310724611078, "grad_norm": 0.5073794722557068, "learning_rate": 6.62971460126005e-06, "loss": 0.4541, "step": 26570 }, { "epoch": 1.2193566151163324, "grad_norm": 0.49884694814682007, "learning_rate": 6.629482801239961e-06, "loss": 0.4371, "step": 26571 }, { "epoch": 1.219402505621587, "grad_norm": 0.4563567340373993, "learning_rate": 6.6292509973014796e-06, "loss": 0.3667, "step": 26572 }, { "epoch": 1.2194483961268414, "grad_norm": 0.4753012955188751, "learning_rate": 6.629019189445165e-06, "loss": 0.4142, "step": 26573 }, { "epoch": 1.219494286632096, "grad_norm": 0.4564734399318695, "learning_rate": 6.628787377671572e-06, "loss": 0.3011, "step": 26574 }, { "epoch": 1.2195401771373502, "grad_norm": 0.5007832646369934, "learning_rate": 6.62855556198126e-06, "loss": 0.4545, "step": 26575 }, { "epoch": 1.2195860676426047, "grad_norm": 0.4220879077911377, "learning_rate": 6.628323742374787e-06, "loss": 0.2826, "step": 26576 }, { "epoch": 1.2196319581478592, "grad_norm": 0.47722509503364563, "learning_rate": 6.628091918852707e-06, "loss": 0.3948, "step": 26577 }, { "epoch": 1.2196778486531137, "grad_norm": 0.46549999713897705, "learning_rate": 6.627860091415581e-06, "loss": 0.3289, "step": 26578 }, { "epoch": 1.2197237391583682, "grad_norm": 0.4445994198322296, "learning_rate": 6.627628260063965e-06, "loss": 0.3065, "step": 26579 }, { "epoch": 1.2197696296636227, "grad_norm": 0.47771313786506653, "learning_rate": 6.627396424798418e-06, "loss": 0.3604, "step": 26580 }, { "epoch": 1.219815520168877, "grad_norm": 0.4614464342594147, "learning_rate": 6.627164585619496e-06, "loss": 0.3547, "step": 26581 }, { "epoch": 1.2198614106741315, "grad_norm": 0.49790000915527344, "learning_rate": 6.626932742527755e-06, "loss": 0.4394, "step": 26582 }, { "epoch": 1.219907301179386, "grad_norm": 0.5032796859741211, "learning_rate": 6.626700895523754e-06, "loss": 0.4007, "step": 26583 }, { "epoch": 1.2199531916846404, "grad_norm": 0.48040270805358887, "learning_rate": 6.626469044608053e-06, "loss": 0.3841, "step": 26584 }, { "epoch": 1.219999082189895, "grad_norm": 0.45900288224220276, "learning_rate": 6.626237189781206e-06, "loss": 0.3341, "step": 26585 }, { "epoch": 1.2200449726951494, "grad_norm": 0.43939143419265747, "learning_rate": 6.626005331043772e-06, "loss": 0.3188, "step": 26586 }, { "epoch": 1.220090863200404, "grad_norm": 0.4664939343929291, "learning_rate": 6.625773468396309e-06, "loss": 0.334, "step": 26587 }, { "epoch": 1.2201367537056582, "grad_norm": 0.5194387435913086, "learning_rate": 6.625541601839374e-06, "loss": 0.4502, "step": 26588 }, { "epoch": 1.2201826442109127, "grad_norm": 0.5061895847320557, "learning_rate": 6.625309731373523e-06, "loss": 0.401, "step": 26589 }, { "epoch": 1.2202285347161672, "grad_norm": 0.4548039138317108, "learning_rate": 6.6250778569993155e-06, "loss": 0.2948, "step": 26590 }, { "epoch": 1.2202744252214217, "grad_norm": 0.46734148263931274, "learning_rate": 6.624845978717309e-06, "loss": 0.3494, "step": 26591 }, { "epoch": 1.2203203157266762, "grad_norm": 0.47018906474113464, "learning_rate": 6.624614096528061e-06, "loss": 0.3263, "step": 26592 }, { "epoch": 1.2203662062319307, "grad_norm": 0.49716323614120483, "learning_rate": 6.624382210432128e-06, "loss": 0.3946, "step": 26593 }, { "epoch": 1.220412096737185, "grad_norm": 0.48397278785705566, "learning_rate": 6.624150320430069e-06, "loss": 0.3766, "step": 26594 }, { "epoch": 1.2204579872424395, "grad_norm": 0.47527289390563965, "learning_rate": 6.623918426522441e-06, "loss": 0.3758, "step": 26595 }, { "epoch": 1.220503877747694, "grad_norm": 0.477151095867157, "learning_rate": 6.623686528709802e-06, "loss": 0.4356, "step": 26596 }, { "epoch": 1.2205497682529485, "grad_norm": 0.5443152785301208, "learning_rate": 6.623454626992709e-06, "loss": 0.3504, "step": 26597 }, { "epoch": 1.220595658758203, "grad_norm": 0.47887110710144043, "learning_rate": 6.623222721371721e-06, "loss": 0.4004, "step": 26598 }, { "epoch": 1.2206415492634575, "grad_norm": 0.46839427947998047, "learning_rate": 6.622990811847394e-06, "loss": 0.331, "step": 26599 }, { "epoch": 1.220687439768712, "grad_norm": 0.4455777406692505, "learning_rate": 6.622758898420285e-06, "loss": 0.3123, "step": 26600 }, { "epoch": 1.2207333302739662, "grad_norm": 0.4671574831008911, "learning_rate": 6.622526981090955e-06, "loss": 0.3371, "step": 26601 }, { "epoch": 1.2207792207792207, "grad_norm": 0.49463027715682983, "learning_rate": 6.622295059859959e-06, "loss": 0.4429, "step": 26602 }, { "epoch": 1.2208251112844752, "grad_norm": 0.44564488530158997, "learning_rate": 6.622063134727855e-06, "loss": 0.3624, "step": 26603 }, { "epoch": 1.2208710017897297, "grad_norm": 0.47399866580963135, "learning_rate": 6.621831205695202e-06, "loss": 0.3729, "step": 26604 }, { "epoch": 1.2209168922949842, "grad_norm": 0.47438278794288635, "learning_rate": 6.621599272762555e-06, "loss": 0.3911, "step": 26605 }, { "epoch": 1.2209627828002385, "grad_norm": 0.480461061000824, "learning_rate": 6.621367335930475e-06, "loss": 0.3966, "step": 26606 }, { "epoch": 1.221008673305493, "grad_norm": 0.48052120208740234, "learning_rate": 6.621135395199518e-06, "loss": 0.3406, "step": 26607 }, { "epoch": 1.2210545638107475, "grad_norm": 0.5039651393890381, "learning_rate": 6.620903450570242e-06, "loss": 0.4136, "step": 26608 }, { "epoch": 1.221100454316002, "grad_norm": 0.47502970695495605, "learning_rate": 6.620671502043205e-06, "loss": 0.4025, "step": 26609 }, { "epoch": 1.2211463448212565, "grad_norm": 0.4570407271385193, "learning_rate": 6.620439549618965e-06, "loss": 0.3472, "step": 26610 }, { "epoch": 1.221192235326511, "grad_norm": 0.4864865839481354, "learning_rate": 6.620207593298078e-06, "loss": 0.363, "step": 26611 }, { "epoch": 1.2212381258317655, "grad_norm": 0.44479483366012573, "learning_rate": 6.6199756330811036e-06, "loss": 0.3208, "step": 26612 }, { "epoch": 1.22128401633702, "grad_norm": 0.46428120136260986, "learning_rate": 6.619743668968599e-06, "loss": 0.3307, "step": 26613 }, { "epoch": 1.2213299068422743, "grad_norm": 0.42367973923683167, "learning_rate": 6.619511700961122e-06, "loss": 0.2835, "step": 26614 }, { "epoch": 1.2213757973475288, "grad_norm": 0.4806128144264221, "learning_rate": 6.619279729059231e-06, "loss": 0.3788, "step": 26615 }, { "epoch": 1.2214216878527833, "grad_norm": 0.44946739077568054, "learning_rate": 6.6190477532634835e-06, "loss": 0.3261, "step": 26616 }, { "epoch": 1.2214675783580378, "grad_norm": 0.5040525197982788, "learning_rate": 6.618815773574436e-06, "loss": 0.3887, "step": 26617 }, { "epoch": 1.2215134688632923, "grad_norm": 0.5048179030418396, "learning_rate": 6.618583789992649e-06, "loss": 0.3933, "step": 26618 }, { "epoch": 1.2215593593685465, "grad_norm": 0.5670266151428223, "learning_rate": 6.6183518025186775e-06, "loss": 0.4758, "step": 26619 }, { "epoch": 1.221605249873801, "grad_norm": 0.4258519411087036, "learning_rate": 6.618119811153081e-06, "loss": 0.3006, "step": 26620 }, { "epoch": 1.2216511403790555, "grad_norm": 0.4902563989162445, "learning_rate": 6.617887815896418e-06, "loss": 0.3195, "step": 26621 }, { "epoch": 1.22169703088431, "grad_norm": 0.46177878975868225, "learning_rate": 6.617655816749245e-06, "loss": 0.3574, "step": 26622 }, { "epoch": 1.2217429213895645, "grad_norm": 0.46349120140075684, "learning_rate": 6.617423813712119e-06, "loss": 0.3945, "step": 26623 }, { "epoch": 1.221788811894819, "grad_norm": 0.43407168984413147, "learning_rate": 6.617191806785601e-06, "loss": 0.2791, "step": 26624 }, { "epoch": 1.2218347024000735, "grad_norm": 0.45522263646125793, "learning_rate": 6.616959795970246e-06, "loss": 0.3159, "step": 26625 }, { "epoch": 1.2218805929053278, "grad_norm": 0.44813239574432373, "learning_rate": 6.616727781266612e-06, "loss": 0.3023, "step": 26626 }, { "epoch": 1.2219264834105823, "grad_norm": 0.48098501563072205, "learning_rate": 6.61649576267526e-06, "loss": 0.3839, "step": 26627 }, { "epoch": 1.2219723739158368, "grad_norm": 0.4383300542831421, "learning_rate": 6.616263740196744e-06, "loss": 0.3167, "step": 26628 }, { "epoch": 1.2220182644210913, "grad_norm": 0.4839453399181366, "learning_rate": 6.616031713831625e-06, "loss": 0.4099, "step": 26629 }, { "epoch": 1.2220641549263458, "grad_norm": 0.44403553009033203, "learning_rate": 6.6157996835804616e-06, "loss": 0.2893, "step": 26630 }, { "epoch": 1.2221100454316003, "grad_norm": 0.46382936835289, "learning_rate": 6.615567649443807e-06, "loss": 0.3223, "step": 26631 }, { "epoch": 1.2221559359368546, "grad_norm": 0.514137864112854, "learning_rate": 6.615335611422224e-06, "loss": 0.4226, "step": 26632 }, { "epoch": 1.222201826442109, "grad_norm": 0.4380362629890442, "learning_rate": 6.615103569516267e-06, "loss": 0.3039, "step": 26633 }, { "epoch": 1.2222477169473636, "grad_norm": 0.47946828603744507, "learning_rate": 6.614871523726496e-06, "loss": 0.3945, "step": 26634 }, { "epoch": 1.222293607452618, "grad_norm": 0.4449380040168762, "learning_rate": 6.614639474053471e-06, "loss": 0.3262, "step": 26635 }, { "epoch": 1.2223394979578726, "grad_norm": 0.45133545994758606, "learning_rate": 6.614407420497744e-06, "loss": 0.3285, "step": 26636 }, { "epoch": 1.222385388463127, "grad_norm": 0.4084988832473755, "learning_rate": 6.614175363059878e-06, "loss": 0.2693, "step": 26637 }, { "epoch": 1.2224312789683816, "grad_norm": 0.4470478892326355, "learning_rate": 6.613943301740431e-06, "loss": 0.3217, "step": 26638 }, { "epoch": 1.2224771694736358, "grad_norm": 0.44927704334259033, "learning_rate": 6.613711236539958e-06, "loss": 0.3392, "step": 26639 }, { "epoch": 1.2225230599788903, "grad_norm": 0.41971129179000854, "learning_rate": 6.6134791674590205e-06, "loss": 0.2846, "step": 26640 }, { "epoch": 1.2225689504841448, "grad_norm": 0.46104666590690613, "learning_rate": 6.613247094498174e-06, "loss": 0.3361, "step": 26641 }, { "epoch": 1.2226148409893993, "grad_norm": 0.5026975274085999, "learning_rate": 6.613015017657977e-06, "loss": 0.4027, "step": 26642 }, { "epoch": 1.2226607314946538, "grad_norm": 0.457438200712204, "learning_rate": 6.612782936938987e-06, "loss": 0.3322, "step": 26643 }, { "epoch": 1.222706621999908, "grad_norm": 0.5010858774185181, "learning_rate": 6.612550852341766e-06, "loss": 0.4125, "step": 26644 }, { "epoch": 1.2227525125051626, "grad_norm": 0.5267516374588013, "learning_rate": 6.612318763866866e-06, "loss": 0.4613, "step": 26645 }, { "epoch": 1.222798403010417, "grad_norm": 0.4591442346572876, "learning_rate": 6.612086671514851e-06, "loss": 0.3525, "step": 26646 }, { "epoch": 1.2228442935156716, "grad_norm": 0.4552706778049469, "learning_rate": 6.611854575286275e-06, "loss": 0.3012, "step": 26647 }, { "epoch": 1.222890184020926, "grad_norm": 0.5300567746162415, "learning_rate": 6.611622475181697e-06, "loss": 0.4432, "step": 26648 }, { "epoch": 1.2229360745261806, "grad_norm": 0.48310747742652893, "learning_rate": 6.611390371201677e-06, "loss": 0.377, "step": 26649 }, { "epoch": 1.222981965031435, "grad_norm": 0.4536762237548828, "learning_rate": 6.61115826334677e-06, "loss": 0.3634, "step": 26650 }, { "epoch": 1.2230278555366896, "grad_norm": 0.5072546005249023, "learning_rate": 6.610926151617537e-06, "loss": 0.4282, "step": 26651 }, { "epoch": 1.2230737460419439, "grad_norm": 0.46690991520881653, "learning_rate": 6.610694036014535e-06, "loss": 0.3988, "step": 26652 }, { "epoch": 1.2231196365471984, "grad_norm": 0.4949071705341339, "learning_rate": 6.6104619165383225e-06, "loss": 0.3899, "step": 26653 }, { "epoch": 1.2231655270524529, "grad_norm": 0.47922465205192566, "learning_rate": 6.610229793189456e-06, "loss": 0.3448, "step": 26654 }, { "epoch": 1.2232114175577073, "grad_norm": 0.4879293441772461, "learning_rate": 6.609997665968496e-06, "loss": 0.3738, "step": 26655 }, { "epoch": 1.2232573080629618, "grad_norm": 0.481689453125, "learning_rate": 6.609765534875999e-06, "loss": 0.4249, "step": 26656 }, { "epoch": 1.2233031985682161, "grad_norm": 0.4812041223049164, "learning_rate": 6.609533399912524e-06, "loss": 0.3779, "step": 26657 }, { "epoch": 1.2233490890734706, "grad_norm": 0.5270069241523743, "learning_rate": 6.6093012610786315e-06, "loss": 0.3324, "step": 26658 }, { "epoch": 1.2233949795787251, "grad_norm": 0.48506709933280945, "learning_rate": 6.609069118374875e-06, "loss": 0.3507, "step": 26659 }, { "epoch": 1.2234408700839796, "grad_norm": 0.47983241081237793, "learning_rate": 6.6088369718018155e-06, "loss": 0.3793, "step": 26660 }, { "epoch": 1.2234867605892341, "grad_norm": 0.47340551018714905, "learning_rate": 6.60860482136001e-06, "loss": 0.3265, "step": 26661 }, { "epoch": 1.2235326510944886, "grad_norm": 0.4626866579055786, "learning_rate": 6.60837266705002e-06, "loss": 0.3706, "step": 26662 }, { "epoch": 1.223578541599743, "grad_norm": 0.5242679715156555, "learning_rate": 6.6081405088724e-06, "loss": 0.3932, "step": 26663 }, { "epoch": 1.2236244321049974, "grad_norm": 0.500994086265564, "learning_rate": 6.607908346827709e-06, "loss": 0.3713, "step": 26664 }, { "epoch": 1.2236703226102519, "grad_norm": 0.44292595982551575, "learning_rate": 6.607676180916507e-06, "loss": 0.3095, "step": 26665 }, { "epoch": 1.2237162131155064, "grad_norm": 0.43451201915740967, "learning_rate": 6.60744401113935e-06, "loss": 0.3078, "step": 26666 }, { "epoch": 1.2237621036207609, "grad_norm": 0.49092504382133484, "learning_rate": 6.607211837496799e-06, "loss": 0.4224, "step": 26667 }, { "epoch": 1.2238079941260154, "grad_norm": 0.4264480173587799, "learning_rate": 6.6069796599894095e-06, "loss": 0.2876, "step": 26668 }, { "epoch": 1.2238538846312699, "grad_norm": 0.44917911291122437, "learning_rate": 6.606747478617742e-06, "loss": 0.3676, "step": 26669 }, { "epoch": 1.2238997751365241, "grad_norm": 0.4808778762817383, "learning_rate": 6.606515293382353e-06, "loss": 0.4156, "step": 26670 }, { "epoch": 1.2239456656417786, "grad_norm": 0.48629358410835266, "learning_rate": 6.606283104283801e-06, "loss": 0.3727, "step": 26671 }, { "epoch": 1.2239915561470331, "grad_norm": 0.47963541746139526, "learning_rate": 6.606050911322648e-06, "loss": 0.4065, "step": 26672 }, { "epoch": 1.2240374466522876, "grad_norm": 0.45682913064956665, "learning_rate": 6.605818714499448e-06, "loss": 0.3318, "step": 26673 }, { "epoch": 1.2240833371575421, "grad_norm": 0.47557318210601807, "learning_rate": 6.605586513814759e-06, "loss": 0.3235, "step": 26674 }, { "epoch": 1.2241292276627966, "grad_norm": 0.4608170986175537, "learning_rate": 6.605354309269144e-06, "loss": 0.3425, "step": 26675 }, { "epoch": 1.2241751181680511, "grad_norm": 0.5554015040397644, "learning_rate": 6.605122100863156e-06, "loss": 0.4969, "step": 26676 }, { "epoch": 1.2242210086733054, "grad_norm": 0.4447786211967468, "learning_rate": 6.604889888597357e-06, "loss": 0.326, "step": 26677 }, { "epoch": 1.22426689917856, "grad_norm": 0.5328484773635864, "learning_rate": 6.604657672472306e-06, "loss": 0.3906, "step": 26678 }, { "epoch": 1.2243127896838144, "grad_norm": 0.47341039776802063, "learning_rate": 6.604425452488558e-06, "loss": 0.3613, "step": 26679 }, { "epoch": 1.224358680189069, "grad_norm": 0.42413538694381714, "learning_rate": 6.604193228646674e-06, "loss": 0.2839, "step": 26680 }, { "epoch": 1.2244045706943234, "grad_norm": 0.4877067804336548, "learning_rate": 6.60396100094721e-06, "loss": 0.3349, "step": 26681 }, { "epoch": 1.224450461199578, "grad_norm": 0.45451751351356506, "learning_rate": 6.6037287693907275e-06, "loss": 0.3211, "step": 26682 }, { "epoch": 1.2244963517048322, "grad_norm": 0.4359849989414215, "learning_rate": 6.603496533977782e-06, "loss": 0.3061, "step": 26683 }, { "epoch": 1.2245422422100867, "grad_norm": 0.46876490116119385, "learning_rate": 6.6032642947089355e-06, "loss": 0.3669, "step": 26684 }, { "epoch": 1.2245881327153412, "grad_norm": 0.49478214979171753, "learning_rate": 6.603032051584744e-06, "loss": 0.4302, "step": 26685 }, { "epoch": 1.2246340232205957, "grad_norm": 0.4658558666706085, "learning_rate": 6.602799804605765e-06, "loss": 0.3306, "step": 26686 }, { "epoch": 1.2246799137258502, "grad_norm": 0.4269648492336273, "learning_rate": 6.602567553772559e-06, "loss": 0.3003, "step": 26687 }, { "epoch": 1.2247258042311047, "grad_norm": 0.45072057843208313, "learning_rate": 6.602335299085683e-06, "loss": 0.3522, "step": 26688 }, { "epoch": 1.2247716947363592, "grad_norm": 0.49658888578414917, "learning_rate": 6.6021030405456975e-06, "loss": 0.4318, "step": 26689 }, { "epoch": 1.2248175852416134, "grad_norm": 0.46530285477638245, "learning_rate": 6.60187077815316e-06, "loss": 0.3191, "step": 26690 }, { "epoch": 1.224863475746868, "grad_norm": 0.4730072319507599, "learning_rate": 6.6016385119086276e-06, "loss": 0.3655, "step": 26691 }, { "epoch": 1.2249093662521224, "grad_norm": 0.5185195803642273, "learning_rate": 6.601406241812661e-06, "loss": 0.398, "step": 26692 }, { "epoch": 1.224955256757377, "grad_norm": 0.49943414330482483, "learning_rate": 6.601173967865817e-06, "loss": 0.4051, "step": 26693 }, { "epoch": 1.2250011472626314, "grad_norm": 0.5599116683006287, "learning_rate": 6.600941690068656e-06, "loss": 0.3842, "step": 26694 }, { "epoch": 1.2250470377678857, "grad_norm": 0.4581001102924347, "learning_rate": 6.600709408421735e-06, "loss": 0.3438, "step": 26695 }, { "epoch": 1.2250929282731402, "grad_norm": 0.48061642050743103, "learning_rate": 6.600477122925613e-06, "loss": 0.3622, "step": 26696 }, { "epoch": 1.2251388187783947, "grad_norm": 0.4949869215488434, "learning_rate": 6.600244833580849e-06, "loss": 0.3871, "step": 26697 }, { "epoch": 1.2251847092836492, "grad_norm": 0.45928436517715454, "learning_rate": 6.600012540388001e-06, "loss": 0.3336, "step": 26698 }, { "epoch": 1.2252305997889037, "grad_norm": 0.48367682099342346, "learning_rate": 6.599780243347627e-06, "loss": 0.4157, "step": 26699 }, { "epoch": 1.2252764902941582, "grad_norm": 0.45616573095321655, "learning_rate": 6.599547942460287e-06, "loss": 0.3361, "step": 26700 }, { "epoch": 1.2253223807994127, "grad_norm": 0.4276859760284424, "learning_rate": 6.59931563772654e-06, "loss": 0.2667, "step": 26701 }, { "epoch": 1.2253682713046672, "grad_norm": 0.4788587689399719, "learning_rate": 6.5990833291469425e-06, "loss": 0.3476, "step": 26702 }, { "epoch": 1.2254141618099215, "grad_norm": 0.5263826847076416, "learning_rate": 6.598851016722054e-06, "loss": 0.3683, "step": 26703 }, { "epoch": 1.225460052315176, "grad_norm": 0.4685773253440857, "learning_rate": 6.598618700452435e-06, "loss": 0.3313, "step": 26704 }, { "epoch": 1.2255059428204305, "grad_norm": 0.45673033595085144, "learning_rate": 6.598386380338642e-06, "loss": 0.3552, "step": 26705 }, { "epoch": 1.225551833325685, "grad_norm": 0.44203996658325195, "learning_rate": 6.598154056381234e-06, "loss": 0.3027, "step": 26706 }, { "epoch": 1.2255977238309395, "grad_norm": 0.47695299983024597, "learning_rate": 6.59792172858077e-06, "loss": 0.3732, "step": 26707 }, { "epoch": 1.2256436143361937, "grad_norm": 0.46547001600265503, "learning_rate": 6.5976893969378065e-06, "loss": 0.3783, "step": 26708 }, { "epoch": 1.2256895048414482, "grad_norm": 0.47934696078300476, "learning_rate": 6.597457061452907e-06, "loss": 0.3194, "step": 26709 }, { "epoch": 1.2257353953467027, "grad_norm": 0.46296289563179016, "learning_rate": 6.597224722126626e-06, "loss": 0.3844, "step": 26710 }, { "epoch": 1.2257812858519572, "grad_norm": 0.44655686616897583, "learning_rate": 6.596992378959525e-06, "loss": 0.352, "step": 26711 }, { "epoch": 1.2258271763572117, "grad_norm": 0.4813513457775116, "learning_rate": 6.59676003195216e-06, "loss": 0.3595, "step": 26712 }, { "epoch": 1.2258730668624662, "grad_norm": 0.4577640891075134, "learning_rate": 6.596527681105092e-06, "loss": 0.3397, "step": 26713 }, { "epoch": 1.2259189573677207, "grad_norm": 0.4611766040325165, "learning_rate": 6.596295326418878e-06, "loss": 0.3521, "step": 26714 }, { "epoch": 1.225964847872975, "grad_norm": 0.4652535915374756, "learning_rate": 6.596062967894077e-06, "loss": 0.3547, "step": 26715 }, { "epoch": 1.2260107383782295, "grad_norm": 0.49030205607414246, "learning_rate": 6.59583060553125e-06, "loss": 0.418, "step": 26716 }, { "epoch": 1.226056628883484, "grad_norm": 0.5197466611862183, "learning_rate": 6.595598239330954e-06, "loss": 0.42, "step": 26717 }, { "epoch": 1.2261025193887385, "grad_norm": 0.48115867376327515, "learning_rate": 6.595365869293746e-06, "loss": 0.4039, "step": 26718 }, { "epoch": 1.226148409893993, "grad_norm": 0.4573347568511963, "learning_rate": 6.5951334954201875e-06, "loss": 0.3679, "step": 26719 }, { "epoch": 1.2261943003992475, "grad_norm": 0.49239447712898254, "learning_rate": 6.594901117710837e-06, "loss": 0.4232, "step": 26720 }, { "epoch": 1.2262401909045018, "grad_norm": 0.4402032196521759, "learning_rate": 6.594668736166252e-06, "loss": 0.2833, "step": 26721 }, { "epoch": 1.2262860814097563, "grad_norm": 0.4782503545284271, "learning_rate": 6.5944363507869926e-06, "loss": 0.3787, "step": 26722 }, { "epoch": 1.2263319719150108, "grad_norm": 0.47473153471946716, "learning_rate": 6.594203961573617e-06, "loss": 0.3923, "step": 26723 }, { "epoch": 1.2263778624202653, "grad_norm": 0.47355198860168457, "learning_rate": 6.593971568526683e-06, "loss": 0.3625, "step": 26724 }, { "epoch": 1.2264237529255198, "grad_norm": 0.44638296961784363, "learning_rate": 6.59373917164675e-06, "loss": 0.3068, "step": 26725 }, { "epoch": 1.2264696434307742, "grad_norm": 0.47800934314727783, "learning_rate": 6.593506770934379e-06, "loss": 0.3767, "step": 26726 }, { "epoch": 1.2265155339360287, "grad_norm": 0.4054155945777893, "learning_rate": 6.593274366390128e-06, "loss": 0.285, "step": 26727 }, { "epoch": 1.226561424441283, "grad_norm": 0.4662257134914398, "learning_rate": 6.593041958014553e-06, "loss": 0.3606, "step": 26728 }, { "epoch": 1.2266073149465375, "grad_norm": 0.5130449533462524, "learning_rate": 6.592809545808215e-06, "loss": 0.4042, "step": 26729 }, { "epoch": 1.226653205451792, "grad_norm": 0.4662942588329315, "learning_rate": 6.592577129771672e-06, "loss": 0.3165, "step": 26730 }, { "epoch": 1.2266990959570465, "grad_norm": 0.480329692363739, "learning_rate": 6.592344709905486e-06, "loss": 0.4103, "step": 26731 }, { "epoch": 1.226744986462301, "grad_norm": 0.45333853363990784, "learning_rate": 6.592112286210211e-06, "loss": 0.3518, "step": 26732 }, { "epoch": 1.2267908769675553, "grad_norm": 0.44146671891212463, "learning_rate": 6.591879858686411e-06, "loss": 0.3206, "step": 26733 }, { "epoch": 1.2268367674728098, "grad_norm": 0.4588165879249573, "learning_rate": 6.591647427334641e-06, "loss": 0.2941, "step": 26734 }, { "epoch": 1.2268826579780643, "grad_norm": 0.5139807462692261, "learning_rate": 6.5914149921554605e-06, "loss": 0.4425, "step": 26735 }, { "epoch": 1.2269285484833188, "grad_norm": 0.4632120728492737, "learning_rate": 6.59118255314943e-06, "loss": 0.374, "step": 26736 }, { "epoch": 1.2269744389885733, "grad_norm": 0.4953192174434662, "learning_rate": 6.590950110317108e-06, "loss": 0.3077, "step": 26737 }, { "epoch": 1.2270203294938278, "grad_norm": 0.4845247268676758, "learning_rate": 6.590717663659054e-06, "loss": 0.3685, "step": 26738 }, { "epoch": 1.2270662199990823, "grad_norm": 0.4843023121356964, "learning_rate": 6.590485213175823e-06, "loss": 0.3594, "step": 26739 }, { "epoch": 1.2271121105043368, "grad_norm": 0.48512545228004456, "learning_rate": 6.59025275886798e-06, "loss": 0.3811, "step": 26740 }, { "epoch": 1.227158001009591, "grad_norm": 0.49913138151168823, "learning_rate": 6.59002030073608e-06, "loss": 0.3861, "step": 26741 }, { "epoch": 1.2272038915148455, "grad_norm": 0.4929863214492798, "learning_rate": 6.589787838780682e-06, "loss": 0.4037, "step": 26742 }, { "epoch": 1.2272497820201, "grad_norm": 0.49018481373786926, "learning_rate": 6.589555373002347e-06, "loss": 0.4216, "step": 26743 }, { "epoch": 1.2272956725253545, "grad_norm": 0.45413222908973694, "learning_rate": 6.589322903401634e-06, "loss": 0.3564, "step": 26744 }, { "epoch": 1.227341563030609, "grad_norm": 0.48070651292800903, "learning_rate": 6.589090429979101e-06, "loss": 0.3251, "step": 26745 }, { "epoch": 1.2273874535358633, "grad_norm": 0.47393009066581726, "learning_rate": 6.588857952735305e-06, "loss": 0.3235, "step": 26746 }, { "epoch": 1.2274333440411178, "grad_norm": 0.48133018612861633, "learning_rate": 6.5886254716708066e-06, "loss": 0.3205, "step": 26747 }, { "epoch": 1.2274792345463723, "grad_norm": 0.44927507638931274, "learning_rate": 6.588392986786167e-06, "loss": 0.3194, "step": 26748 }, { "epoch": 1.2275251250516268, "grad_norm": 0.5140783190727234, "learning_rate": 6.588160498081945e-06, "loss": 0.3846, "step": 26749 }, { "epoch": 1.2275710155568813, "grad_norm": 0.4788554906845093, "learning_rate": 6.587928005558696e-06, "loss": 0.386, "step": 26750 }, { "epoch": 1.2276169060621358, "grad_norm": 0.47465676069259644, "learning_rate": 6.587695509216983e-06, "loss": 0.3351, "step": 26751 }, { "epoch": 1.2276627965673903, "grad_norm": 0.4933360815048218, "learning_rate": 6.587463009057361e-06, "loss": 0.3599, "step": 26752 }, { "epoch": 1.2277086870726446, "grad_norm": 0.43314623832702637, "learning_rate": 6.587230505080392e-06, "loss": 0.3161, "step": 26753 }, { "epoch": 1.227754577577899, "grad_norm": 0.48525261878967285, "learning_rate": 6.586997997286636e-06, "loss": 0.3938, "step": 26754 }, { "epoch": 1.2278004680831536, "grad_norm": 0.4501439929008484, "learning_rate": 6.586765485676651e-06, "loss": 0.3563, "step": 26755 }, { "epoch": 1.227846358588408, "grad_norm": 0.47431784868240356, "learning_rate": 6.586532970250994e-06, "loss": 0.327, "step": 26756 }, { "epoch": 1.2278922490936626, "grad_norm": 0.5014031529426575, "learning_rate": 6.586300451010227e-06, "loss": 0.3329, "step": 26757 }, { "epoch": 1.227938139598917, "grad_norm": 0.4664098918437958, "learning_rate": 6.586067927954907e-06, "loss": 0.3508, "step": 26758 }, { "epoch": 1.2279840301041713, "grad_norm": 0.5229552388191223, "learning_rate": 6.585835401085597e-06, "loss": 0.432, "step": 26759 }, { "epoch": 1.2280299206094258, "grad_norm": 0.46785643696784973, "learning_rate": 6.585602870402851e-06, "loss": 0.3618, "step": 26760 }, { "epoch": 1.2280758111146803, "grad_norm": 0.4325353801250458, "learning_rate": 6.585370335907231e-06, "loss": 0.2929, "step": 26761 }, { "epoch": 1.2281217016199348, "grad_norm": 0.5042179226875305, "learning_rate": 6.585137797599296e-06, "loss": 0.4254, "step": 26762 }, { "epoch": 1.2281675921251893, "grad_norm": 0.5022868514060974, "learning_rate": 6.584905255479603e-06, "loss": 0.4651, "step": 26763 }, { "epoch": 1.2282134826304438, "grad_norm": 0.4896521270275116, "learning_rate": 6.584672709548714e-06, "loss": 0.3747, "step": 26764 }, { "epoch": 1.2282593731356983, "grad_norm": 0.4874773621559143, "learning_rate": 6.584440159807188e-06, "loss": 0.4197, "step": 26765 }, { "epoch": 1.2283052636409526, "grad_norm": 0.4975726008415222, "learning_rate": 6.584207606255585e-06, "loss": 0.4746, "step": 26766 }, { "epoch": 1.228351154146207, "grad_norm": 0.46541446447372437, "learning_rate": 6.58397504889446e-06, "loss": 0.3522, "step": 26767 }, { "epoch": 1.2283970446514616, "grad_norm": 0.42959362268447876, "learning_rate": 6.583742487724377e-06, "loss": 0.3029, "step": 26768 }, { "epoch": 1.228442935156716, "grad_norm": 0.49454110860824585, "learning_rate": 6.583509922745892e-06, "loss": 0.3915, "step": 26769 }, { "epoch": 1.2284888256619706, "grad_norm": 0.462556928396225, "learning_rate": 6.583277353959565e-06, "loss": 0.4012, "step": 26770 }, { "epoch": 1.228534716167225, "grad_norm": 0.4470585584640503, "learning_rate": 6.583044781365957e-06, "loss": 0.3354, "step": 26771 }, { "epoch": 1.2285806066724794, "grad_norm": 0.4601151943206787, "learning_rate": 6.582812204965625e-06, "loss": 0.349, "step": 26772 }, { "epoch": 1.2286264971777339, "grad_norm": 0.508791983127594, "learning_rate": 6.582579624759129e-06, "loss": 0.3977, "step": 26773 }, { "epoch": 1.2286723876829884, "grad_norm": 0.47290509939193726, "learning_rate": 6.582347040747029e-06, "loss": 0.3758, "step": 26774 }, { "epoch": 1.2287182781882429, "grad_norm": 0.43615254759788513, "learning_rate": 6.582114452929883e-06, "loss": 0.3073, "step": 26775 }, { "epoch": 1.2287641686934974, "grad_norm": 0.500091016292572, "learning_rate": 6.581881861308253e-06, "loss": 0.367, "step": 26776 }, { "epoch": 1.2288100591987519, "grad_norm": 0.4326653778553009, "learning_rate": 6.581649265882695e-06, "loss": 0.3306, "step": 26777 }, { "epoch": 1.2288559497040064, "grad_norm": 0.4728100895881653, "learning_rate": 6.581416666653768e-06, "loss": 0.3392, "step": 26778 }, { "epoch": 1.2289018402092606, "grad_norm": 0.49063777923583984, "learning_rate": 6.581184063622036e-06, "loss": 0.3878, "step": 26779 }, { "epoch": 1.2289477307145151, "grad_norm": 0.5307078957557678, "learning_rate": 6.580951456788054e-06, "loss": 0.4702, "step": 26780 }, { "epoch": 1.2289936212197696, "grad_norm": 0.46190381050109863, "learning_rate": 6.580718846152383e-06, "loss": 0.313, "step": 26781 }, { "epoch": 1.2290395117250241, "grad_norm": 0.4525344967842102, "learning_rate": 6.580486231715582e-06, "loss": 0.3299, "step": 26782 }, { "epoch": 1.2290854022302786, "grad_norm": 0.4335699677467346, "learning_rate": 6.58025361347821e-06, "loss": 0.3196, "step": 26783 }, { "epoch": 1.229131292735533, "grad_norm": 0.5410346984863281, "learning_rate": 6.5800209914408266e-06, "loss": 0.4164, "step": 26784 }, { "epoch": 1.2291771832407874, "grad_norm": 0.47890084981918335, "learning_rate": 6.579788365603992e-06, "loss": 0.3683, "step": 26785 }, { "epoch": 1.229223073746042, "grad_norm": 0.4500812590122223, "learning_rate": 6.579555735968265e-06, "loss": 0.3211, "step": 26786 }, { "epoch": 1.2292689642512964, "grad_norm": 0.49350765347480774, "learning_rate": 6.579323102534205e-06, "loss": 0.405, "step": 26787 }, { "epoch": 1.229314854756551, "grad_norm": 0.503693699836731, "learning_rate": 6.579090465302371e-06, "loss": 0.4402, "step": 26788 }, { "epoch": 1.2293607452618054, "grad_norm": 0.4721743166446686, "learning_rate": 6.578857824273323e-06, "loss": 0.3828, "step": 26789 }, { "epoch": 1.2294066357670599, "grad_norm": 0.483810693025589, "learning_rate": 6.57862517944762e-06, "loss": 0.3364, "step": 26790 }, { "epoch": 1.2294525262723142, "grad_norm": 0.4622254967689514, "learning_rate": 6.578392530825823e-06, "loss": 0.35, "step": 26791 }, { "epoch": 1.2294984167775687, "grad_norm": 0.4170423448085785, "learning_rate": 6.578159878408489e-06, "loss": 0.2992, "step": 26792 }, { "epoch": 1.2295443072828232, "grad_norm": 0.49348220229148865, "learning_rate": 6.577927222196179e-06, "loss": 0.3829, "step": 26793 }, { "epoch": 1.2295901977880777, "grad_norm": 0.49790406227111816, "learning_rate": 6.577694562189452e-06, "loss": 0.3119, "step": 26794 }, { "epoch": 1.2296360882933322, "grad_norm": 0.4708254933357239, "learning_rate": 6.577461898388866e-06, "loss": 0.3073, "step": 26795 }, { "epoch": 1.2296819787985867, "grad_norm": 0.4812576472759247, "learning_rate": 6.5772292307949835e-06, "loss": 0.3589, "step": 26796 }, { "epoch": 1.229727869303841, "grad_norm": 0.4936792850494385, "learning_rate": 6.576996559408362e-06, "loss": 0.4122, "step": 26797 }, { "epoch": 1.2297737598090954, "grad_norm": 0.4703616797924042, "learning_rate": 6.576763884229562e-06, "loss": 0.3229, "step": 26798 }, { "epoch": 1.22981965031435, "grad_norm": 0.4486231207847595, "learning_rate": 6.576531205259142e-06, "loss": 0.3465, "step": 26799 }, { "epoch": 1.2298655408196044, "grad_norm": 0.49977901577949524, "learning_rate": 6.576298522497663e-06, "loss": 0.3883, "step": 26800 }, { "epoch": 1.229911431324859, "grad_norm": 0.4543660879135132, "learning_rate": 6.576065835945682e-06, "loss": 0.2982, "step": 26801 }, { "epoch": 1.2299573218301134, "grad_norm": 0.45072004199028015, "learning_rate": 6.575833145603761e-06, "loss": 0.3036, "step": 26802 }, { "epoch": 1.230003212335368, "grad_norm": 0.45425841212272644, "learning_rate": 6.575600451472459e-06, "loss": 0.3604, "step": 26803 }, { "epoch": 1.2300491028406222, "grad_norm": 0.4760380685329437, "learning_rate": 6.575367753552334e-06, "loss": 0.3756, "step": 26804 }, { "epoch": 1.2300949933458767, "grad_norm": 0.507774829864502, "learning_rate": 6.575135051843948e-06, "loss": 0.4101, "step": 26805 }, { "epoch": 1.2301408838511312, "grad_norm": 0.4474605917930603, "learning_rate": 6.574902346347858e-06, "loss": 0.3411, "step": 26806 }, { "epoch": 1.2301867743563857, "grad_norm": 0.4946867525577545, "learning_rate": 6.574669637064626e-06, "loss": 0.398, "step": 26807 }, { "epoch": 1.2302326648616402, "grad_norm": 0.4577609896659851, "learning_rate": 6.574436923994809e-06, "loss": 0.3508, "step": 26808 }, { "epoch": 1.2302785553668947, "grad_norm": 0.4675033390522003, "learning_rate": 6.57420420713897e-06, "loss": 0.3497, "step": 26809 }, { "epoch": 1.230324445872149, "grad_norm": 0.44263747334480286, "learning_rate": 6.573971486497667e-06, "loss": 0.2828, "step": 26810 }, { "epoch": 1.2303703363774035, "grad_norm": 0.4902964234352112, "learning_rate": 6.5737387620714574e-06, "loss": 0.3919, "step": 26811 }, { "epoch": 1.230416226882658, "grad_norm": 0.4896829128265381, "learning_rate": 6.573506033860904e-06, "loss": 0.4181, "step": 26812 }, { "epoch": 1.2304621173879124, "grad_norm": 0.46180468797683716, "learning_rate": 6.573273301866565e-06, "loss": 0.3498, "step": 26813 }, { "epoch": 1.230508007893167, "grad_norm": 0.4772176146507263, "learning_rate": 6.573040566089001e-06, "loss": 0.3521, "step": 26814 }, { "epoch": 1.2305538983984214, "grad_norm": 0.47646379470825195, "learning_rate": 6.572807826528769e-06, "loss": 0.3462, "step": 26815 }, { "epoch": 1.230599788903676, "grad_norm": 0.4549470543861389, "learning_rate": 6.572575083186432e-06, "loss": 0.3323, "step": 26816 }, { "epoch": 1.2306456794089302, "grad_norm": 0.5174025297164917, "learning_rate": 6.5723423360625475e-06, "loss": 0.4986, "step": 26817 }, { "epoch": 1.2306915699141847, "grad_norm": 0.4619024395942688, "learning_rate": 6.572109585157676e-06, "loss": 0.3496, "step": 26818 }, { "epoch": 1.2307374604194392, "grad_norm": 0.4593014419078827, "learning_rate": 6.5718768304723764e-06, "loss": 0.3146, "step": 26819 }, { "epoch": 1.2307833509246937, "grad_norm": 0.4824551045894623, "learning_rate": 6.571644072007211e-06, "loss": 0.3638, "step": 26820 }, { "epoch": 1.2308292414299482, "grad_norm": 0.469211608171463, "learning_rate": 6.571411309762736e-06, "loss": 0.3814, "step": 26821 }, { "epoch": 1.2308751319352025, "grad_norm": 0.47740665078163147, "learning_rate": 6.5711785437395145e-06, "loss": 0.3401, "step": 26822 }, { "epoch": 1.230921022440457, "grad_norm": 0.476235955953598, "learning_rate": 6.570945773938103e-06, "loss": 0.3494, "step": 26823 }, { "epoch": 1.2309669129457115, "grad_norm": 0.4675827622413635, "learning_rate": 6.5707130003590625e-06, "loss": 0.3653, "step": 26824 }, { "epoch": 1.231012803450966, "grad_norm": 0.45169585943222046, "learning_rate": 6.570480223002955e-06, "loss": 0.3657, "step": 26825 }, { "epoch": 1.2310586939562205, "grad_norm": 0.48313525319099426, "learning_rate": 6.570247441870336e-06, "loss": 0.4119, "step": 26826 }, { "epoch": 1.231104584461475, "grad_norm": 0.45709624886512756, "learning_rate": 6.57001465696177e-06, "loss": 0.3206, "step": 26827 }, { "epoch": 1.2311504749667295, "grad_norm": 0.45487353205680847, "learning_rate": 6.569781868277812e-06, "loss": 0.301, "step": 26828 }, { "epoch": 1.231196365471984, "grad_norm": 0.508780837059021, "learning_rate": 6.569549075819024e-06, "loss": 0.3975, "step": 26829 }, { "epoch": 1.2312422559772382, "grad_norm": 0.4841972589492798, "learning_rate": 6.569316279585967e-06, "loss": 0.3536, "step": 26830 }, { "epoch": 1.2312881464824927, "grad_norm": 0.47733545303344727, "learning_rate": 6.569083479579199e-06, "loss": 0.4017, "step": 26831 }, { "epoch": 1.2313340369877472, "grad_norm": 0.4763106405735016, "learning_rate": 6.568850675799281e-06, "loss": 0.374, "step": 26832 }, { "epoch": 1.2313799274930017, "grad_norm": 0.4054230749607086, "learning_rate": 6.568617868246771e-06, "loss": 0.2554, "step": 26833 }, { "epoch": 1.2314258179982562, "grad_norm": 0.48868876695632935, "learning_rate": 6.5683850569222305e-06, "loss": 0.3971, "step": 26834 }, { "epoch": 1.2314717085035105, "grad_norm": 0.4994022250175476, "learning_rate": 6.5681522418262204e-06, "loss": 0.4521, "step": 26835 }, { "epoch": 1.231517599008765, "grad_norm": 0.4670258164405823, "learning_rate": 6.567919422959299e-06, "loss": 0.3863, "step": 26836 }, { "epoch": 1.2315634895140195, "grad_norm": 0.4919496178627014, "learning_rate": 6.5676866003220245e-06, "loss": 0.4444, "step": 26837 }, { "epoch": 1.231609380019274, "grad_norm": 0.40840062499046326, "learning_rate": 6.567453773914959e-06, "loss": 0.2712, "step": 26838 }, { "epoch": 1.2316552705245285, "grad_norm": 0.5102701783180237, "learning_rate": 6.5672209437386625e-06, "loss": 0.4313, "step": 26839 }, { "epoch": 1.231701161029783, "grad_norm": 0.4413122534751892, "learning_rate": 6.566988109793693e-06, "loss": 0.3124, "step": 26840 }, { "epoch": 1.2317470515350375, "grad_norm": 0.43845438957214355, "learning_rate": 6.566755272080614e-06, "loss": 0.3198, "step": 26841 }, { "epoch": 1.2317929420402918, "grad_norm": 0.4456599950790405, "learning_rate": 6.566522430599982e-06, "loss": 0.2847, "step": 26842 }, { "epoch": 1.2318388325455463, "grad_norm": 0.7040122747421265, "learning_rate": 6.566289585352356e-06, "loss": 0.3266, "step": 26843 }, { "epoch": 1.2318847230508008, "grad_norm": 0.48630186915397644, "learning_rate": 6.5660567363383e-06, "loss": 0.3942, "step": 26844 }, { "epoch": 1.2319306135560553, "grad_norm": 0.5093220472335815, "learning_rate": 6.565823883558371e-06, "loss": 0.3951, "step": 26845 }, { "epoch": 1.2319765040613098, "grad_norm": 0.47909197211265564, "learning_rate": 6.56559102701313e-06, "loss": 0.2966, "step": 26846 }, { "epoch": 1.2320223945665643, "grad_norm": 0.5358510613441467, "learning_rate": 6.565358166703137e-06, "loss": 0.3791, "step": 26847 }, { "epoch": 1.2320682850718185, "grad_norm": 0.5173203945159912, "learning_rate": 6.565125302628952e-06, "loss": 0.3836, "step": 26848 }, { "epoch": 1.232114175577073, "grad_norm": 0.4838600754737854, "learning_rate": 6.564892434791134e-06, "loss": 0.3832, "step": 26849 }, { "epoch": 1.2321600660823275, "grad_norm": 0.45927414298057556, "learning_rate": 6.5646595631902436e-06, "loss": 0.2631, "step": 26850 }, { "epoch": 1.232205956587582, "grad_norm": 0.4793802797794342, "learning_rate": 6.564426687826841e-06, "loss": 0.3356, "step": 26851 }, { "epoch": 1.2322518470928365, "grad_norm": 0.4469888210296631, "learning_rate": 6.564193808701486e-06, "loss": 0.3038, "step": 26852 }, { "epoch": 1.232297737598091, "grad_norm": 0.4813072085380554, "learning_rate": 6.563960925814739e-06, "loss": 0.3151, "step": 26853 }, { "epoch": 1.2323436281033455, "grad_norm": 0.5374873280525208, "learning_rate": 6.563728039167158e-06, "loss": 0.4666, "step": 26854 }, { "epoch": 1.2323895186085998, "grad_norm": 0.4790475368499756, "learning_rate": 6.563495148759305e-06, "loss": 0.3576, "step": 26855 }, { "epoch": 1.2324354091138543, "grad_norm": 0.5061254501342773, "learning_rate": 6.56326225459174e-06, "loss": 0.3917, "step": 26856 }, { "epoch": 1.2324812996191088, "grad_norm": 0.47590959072113037, "learning_rate": 6.563029356665022e-06, "loss": 0.3691, "step": 26857 }, { "epoch": 1.2325271901243633, "grad_norm": 0.4405446946620941, "learning_rate": 6.562796454979714e-06, "loss": 0.3329, "step": 26858 }, { "epoch": 1.2325730806296178, "grad_norm": 0.4576973617076874, "learning_rate": 6.562563549536371e-06, "loss": 0.3423, "step": 26859 }, { "epoch": 1.2326189711348723, "grad_norm": 0.4621069133281708, "learning_rate": 6.562330640335557e-06, "loss": 0.3791, "step": 26860 }, { "epoch": 1.2326648616401266, "grad_norm": 0.42332491278648376, "learning_rate": 6.562097727377832e-06, "loss": 0.2838, "step": 26861 }, { "epoch": 1.232710752145381, "grad_norm": 0.4770330488681793, "learning_rate": 6.561864810663753e-06, "loss": 0.3204, "step": 26862 }, { "epoch": 1.2327566426506356, "grad_norm": 0.4412253201007843, "learning_rate": 6.5616318901938836e-06, "loss": 0.309, "step": 26863 }, { "epoch": 1.23280253315589, "grad_norm": 0.484821081161499, "learning_rate": 6.561398965968781e-06, "loss": 0.4124, "step": 26864 }, { "epoch": 1.2328484236611446, "grad_norm": 0.46991825103759766, "learning_rate": 6.561166037989009e-06, "loss": 0.3227, "step": 26865 }, { "epoch": 1.232894314166399, "grad_norm": 0.46430015563964844, "learning_rate": 6.560933106255122e-06, "loss": 0.3774, "step": 26866 }, { "epoch": 1.2329402046716536, "grad_norm": 0.46950358152389526, "learning_rate": 6.560700170767686e-06, "loss": 0.3203, "step": 26867 }, { "epoch": 1.2329860951769078, "grad_norm": 0.4655531942844391, "learning_rate": 6.560467231527258e-06, "loss": 0.3326, "step": 26868 }, { "epoch": 1.2330319856821623, "grad_norm": 0.5244982242584229, "learning_rate": 6.560234288534398e-06, "loss": 0.4344, "step": 26869 }, { "epoch": 1.2330778761874168, "grad_norm": 0.4458554685115814, "learning_rate": 6.560001341789669e-06, "loss": 0.3256, "step": 26870 }, { "epoch": 1.2331237666926713, "grad_norm": 0.45476922392845154, "learning_rate": 6.559768391293627e-06, "loss": 0.3349, "step": 26871 }, { "epoch": 1.2331696571979258, "grad_norm": 0.481824666261673, "learning_rate": 6.559535437046836e-06, "loss": 0.3337, "step": 26872 }, { "epoch": 1.23321554770318, "grad_norm": 0.45806920528411865, "learning_rate": 6.559302479049852e-06, "loss": 0.3283, "step": 26873 }, { "epoch": 1.2332614382084346, "grad_norm": 0.454267293214798, "learning_rate": 6.55906951730324e-06, "loss": 0.3487, "step": 26874 }, { "epoch": 1.233307328713689, "grad_norm": 0.45353686809539795, "learning_rate": 6.5588365518075594e-06, "loss": 0.3321, "step": 26875 }, { "epoch": 1.2333532192189436, "grad_norm": 0.4311416745185852, "learning_rate": 6.558603582563366e-06, "loss": 0.2918, "step": 26876 }, { "epoch": 1.233399109724198, "grad_norm": 0.48102760314941406, "learning_rate": 6.558370609571223e-06, "loss": 0.409, "step": 26877 }, { "epoch": 1.2334450002294526, "grad_norm": 0.45825618505477905, "learning_rate": 6.5581376328316916e-06, "loss": 0.3382, "step": 26878 }, { "epoch": 1.233490890734707, "grad_norm": 0.47809654474258423, "learning_rate": 6.557904652345329e-06, "loss": 0.3492, "step": 26879 }, { "epoch": 1.2335367812399614, "grad_norm": 0.48459911346435547, "learning_rate": 6.5576716681126995e-06, "loss": 0.3929, "step": 26880 }, { "epoch": 1.2335826717452159, "grad_norm": 0.4668811857700348, "learning_rate": 6.557438680134361e-06, "loss": 0.3612, "step": 26881 }, { "epoch": 1.2336285622504704, "grad_norm": 0.4894791543483734, "learning_rate": 6.557205688410873e-06, "loss": 0.3296, "step": 26882 }, { "epoch": 1.2336744527557248, "grad_norm": 0.45420366525650024, "learning_rate": 6.5569726929427965e-06, "loss": 0.3452, "step": 26883 }, { "epoch": 1.2337203432609793, "grad_norm": 0.4797327518463135, "learning_rate": 6.556739693730694e-06, "loss": 0.3514, "step": 26884 }, { "epoch": 1.2337662337662338, "grad_norm": 0.4625242352485657, "learning_rate": 6.556506690775124e-06, "loss": 0.3346, "step": 26885 }, { "epoch": 1.2338121242714881, "grad_norm": 0.49694740772247314, "learning_rate": 6.556273684076644e-06, "loss": 0.4041, "step": 26886 }, { "epoch": 1.2338580147767426, "grad_norm": 0.44436293840408325, "learning_rate": 6.556040673635819e-06, "loss": 0.2896, "step": 26887 }, { "epoch": 1.2339039052819971, "grad_norm": 0.4799072742462158, "learning_rate": 6.555807659453206e-06, "loss": 0.4117, "step": 26888 }, { "epoch": 1.2339497957872516, "grad_norm": 0.48397886753082275, "learning_rate": 6.555574641529368e-06, "loss": 0.3754, "step": 26889 }, { "epoch": 1.233995686292506, "grad_norm": 0.4468926191329956, "learning_rate": 6.555341619864863e-06, "loss": 0.3261, "step": 26890 }, { "epoch": 1.2340415767977606, "grad_norm": 0.4418638050556183, "learning_rate": 6.555108594460253e-06, "loss": 0.3159, "step": 26891 }, { "epoch": 1.234087467303015, "grad_norm": 0.47592806816101074, "learning_rate": 6.554875565316098e-06, "loss": 0.3464, "step": 26892 }, { "epoch": 1.2341333578082694, "grad_norm": 0.490142285823822, "learning_rate": 6.554642532432956e-06, "loss": 0.329, "step": 26893 }, { "epoch": 1.2341792483135239, "grad_norm": 0.4986588954925537, "learning_rate": 6.55440949581139e-06, "loss": 0.3877, "step": 26894 }, { "epoch": 1.2342251388187784, "grad_norm": 0.4800160527229309, "learning_rate": 6.554176455451961e-06, "loss": 0.3859, "step": 26895 }, { "epoch": 1.2342710293240329, "grad_norm": 0.4542986750602722, "learning_rate": 6.553943411355228e-06, "loss": 0.3504, "step": 26896 }, { "epoch": 1.2343169198292874, "grad_norm": 0.48610973358154297, "learning_rate": 6.55371036352175e-06, "loss": 0.3409, "step": 26897 }, { "epoch": 1.2343628103345419, "grad_norm": 0.44587013125419617, "learning_rate": 6.553477311952091e-06, "loss": 0.321, "step": 26898 }, { "epoch": 1.2344087008397961, "grad_norm": 0.4322940409183502, "learning_rate": 6.553244256646808e-06, "loss": 0.282, "step": 26899 }, { "epoch": 1.2344545913450506, "grad_norm": 0.5377055406570435, "learning_rate": 6.5530111976064625e-06, "loss": 0.4434, "step": 26900 }, { "epoch": 1.2345004818503051, "grad_norm": 0.46853041648864746, "learning_rate": 6.552778134831615e-06, "loss": 0.3261, "step": 26901 }, { "epoch": 1.2345463723555596, "grad_norm": 0.4879055619239807, "learning_rate": 6.552545068322829e-06, "loss": 0.3213, "step": 26902 }, { "epoch": 1.2345922628608141, "grad_norm": 0.45755067467689514, "learning_rate": 6.552311998080658e-06, "loss": 0.3143, "step": 26903 }, { "epoch": 1.2346381533660686, "grad_norm": 0.4872325360774994, "learning_rate": 6.55207892410567e-06, "loss": 0.4006, "step": 26904 }, { "epoch": 1.2346840438713231, "grad_norm": 0.47123241424560547, "learning_rate": 6.55184584639842e-06, "loss": 0.403, "step": 26905 }, { "epoch": 1.2347299343765774, "grad_norm": 0.4868556261062622, "learning_rate": 6.551612764959472e-06, "loss": 0.3382, "step": 26906 }, { "epoch": 1.234775824881832, "grad_norm": 0.4847387671470642, "learning_rate": 6.551379679789385e-06, "loss": 0.3544, "step": 26907 }, { "epoch": 1.2348217153870864, "grad_norm": 0.46393391489982605, "learning_rate": 6.551146590888718e-06, "loss": 0.3015, "step": 26908 }, { "epoch": 1.234867605892341, "grad_norm": 0.47640296816825867, "learning_rate": 6.550913498258033e-06, "loss": 0.2767, "step": 26909 }, { "epoch": 1.2349134963975954, "grad_norm": 0.4468924403190613, "learning_rate": 6.550680401897891e-06, "loss": 0.3234, "step": 26910 }, { "epoch": 1.2349593869028497, "grad_norm": 0.4720715880393982, "learning_rate": 6.5504473018088525e-06, "loss": 0.3577, "step": 26911 }, { "epoch": 1.2350052774081042, "grad_norm": 0.4553471505641937, "learning_rate": 6.550214197991477e-06, "loss": 0.3775, "step": 26912 }, { "epoch": 1.2350511679133587, "grad_norm": 0.4348546862602234, "learning_rate": 6.549981090446327e-06, "loss": 0.2837, "step": 26913 }, { "epoch": 1.2350970584186132, "grad_norm": 0.49493667483329773, "learning_rate": 6.549747979173959e-06, "loss": 0.3908, "step": 26914 }, { "epoch": 1.2351429489238677, "grad_norm": 0.4428330659866333, "learning_rate": 6.54951486417494e-06, "loss": 0.2846, "step": 26915 }, { "epoch": 1.2351888394291222, "grad_norm": 0.46119460463523865, "learning_rate": 6.549281745449822e-06, "loss": 0.2908, "step": 26916 }, { "epoch": 1.2352347299343767, "grad_norm": 0.4523824453353882, "learning_rate": 6.549048622999174e-06, "loss": 0.3318, "step": 26917 }, { "epoch": 1.2352806204396312, "grad_norm": 0.5092880725860596, "learning_rate": 6.5488154968235525e-06, "loss": 0.5022, "step": 26918 }, { "epoch": 1.2353265109448854, "grad_norm": 0.5115890502929688, "learning_rate": 6.5485823669235175e-06, "loss": 0.3908, "step": 26919 }, { "epoch": 1.23537240145014, "grad_norm": 0.4918225407600403, "learning_rate": 6.548349233299631e-06, "loss": 0.3776, "step": 26920 }, { "epoch": 1.2354182919553944, "grad_norm": 0.49637696146965027, "learning_rate": 6.5481160959524535e-06, "loss": 0.4294, "step": 26921 }, { "epoch": 1.235464182460649, "grad_norm": 0.4784561097621918, "learning_rate": 6.547882954882545e-06, "loss": 0.3609, "step": 26922 }, { "epoch": 1.2355100729659034, "grad_norm": 0.4702552855014801, "learning_rate": 6.547649810090467e-06, "loss": 0.3588, "step": 26923 }, { "epoch": 1.2355559634711577, "grad_norm": 0.44575244188308716, "learning_rate": 6.54741666157678e-06, "loss": 0.3504, "step": 26924 }, { "epoch": 1.2356018539764122, "grad_norm": 0.44645342230796814, "learning_rate": 6.547183509342043e-06, "loss": 0.3583, "step": 26925 }, { "epoch": 1.2356477444816667, "grad_norm": 0.49024203419685364, "learning_rate": 6.54695035338682e-06, "loss": 0.3396, "step": 26926 }, { "epoch": 1.2356936349869212, "grad_norm": 0.46850958466529846, "learning_rate": 6.546717193711667e-06, "loss": 0.4147, "step": 26927 }, { "epoch": 1.2357395254921757, "grad_norm": 0.5124899744987488, "learning_rate": 6.546484030317149e-06, "loss": 0.3334, "step": 26928 }, { "epoch": 1.2357854159974302, "grad_norm": 0.4713386297225952, "learning_rate": 6.546250863203825e-06, "loss": 0.4174, "step": 26929 }, { "epoch": 1.2358313065026847, "grad_norm": 0.44826072454452515, "learning_rate": 6.546017692372255e-06, "loss": 0.3338, "step": 26930 }, { "epoch": 1.235877197007939, "grad_norm": 0.43639466166496277, "learning_rate": 6.545784517823001e-06, "loss": 0.2978, "step": 26931 }, { "epoch": 1.2359230875131935, "grad_norm": 0.46823742985725403, "learning_rate": 6.545551339556623e-06, "loss": 0.3326, "step": 26932 }, { "epoch": 1.235968978018448, "grad_norm": 0.4965291917324066, "learning_rate": 6.545318157573681e-06, "loss": 0.3488, "step": 26933 }, { "epoch": 1.2360148685237025, "grad_norm": 0.487789124250412, "learning_rate": 6.545084971874738e-06, "loss": 0.4168, "step": 26934 }, { "epoch": 1.236060759028957, "grad_norm": 0.4685787260532379, "learning_rate": 6.544851782460353e-06, "loss": 0.3856, "step": 26935 }, { "epoch": 1.2361066495342115, "grad_norm": 0.4797455668449402, "learning_rate": 6.544618589331086e-06, "loss": 0.3865, "step": 26936 }, { "epoch": 1.2361525400394657, "grad_norm": 0.5031578540802002, "learning_rate": 6.544385392487499e-06, "loss": 0.392, "step": 26937 }, { "epoch": 1.2361984305447202, "grad_norm": 0.5607506036758423, "learning_rate": 6.544152191930153e-06, "loss": 0.4603, "step": 26938 }, { "epoch": 1.2362443210499747, "grad_norm": 0.4962524175643921, "learning_rate": 6.543918987659609e-06, "loss": 0.3546, "step": 26939 }, { "epoch": 1.2362902115552292, "grad_norm": 0.4590578079223633, "learning_rate": 6.543685779676426e-06, "loss": 0.3234, "step": 26940 }, { "epoch": 1.2363361020604837, "grad_norm": 0.4443036615848541, "learning_rate": 6.543452567981165e-06, "loss": 0.2957, "step": 26941 }, { "epoch": 1.2363819925657382, "grad_norm": 0.44369983673095703, "learning_rate": 6.543219352574389e-06, "loss": 0.3133, "step": 26942 }, { "epoch": 1.2364278830709927, "grad_norm": 0.48077884316444397, "learning_rate": 6.542986133456658e-06, "loss": 0.3768, "step": 26943 }, { "epoch": 1.236473773576247, "grad_norm": 0.4984472095966339, "learning_rate": 6.542752910628531e-06, "loss": 0.442, "step": 26944 }, { "epoch": 1.2365196640815015, "grad_norm": 0.47822272777557373, "learning_rate": 6.542519684090571e-06, "loss": 0.3725, "step": 26945 }, { "epoch": 1.236565554586756, "grad_norm": 0.47450053691864014, "learning_rate": 6.54228645384334e-06, "loss": 0.3869, "step": 26946 }, { "epoch": 1.2366114450920105, "grad_norm": 0.4498036801815033, "learning_rate": 6.5420532198873935e-06, "loss": 0.2921, "step": 26947 }, { "epoch": 1.236657335597265, "grad_norm": 0.5055592060089111, "learning_rate": 6.541819982223296e-06, "loss": 0.4103, "step": 26948 }, { "epoch": 1.2367032261025195, "grad_norm": 0.47839269042015076, "learning_rate": 6.541586740851609e-06, "loss": 0.3183, "step": 26949 }, { "epoch": 1.2367491166077738, "grad_norm": 0.4832196533679962, "learning_rate": 6.541353495772893e-06, "loss": 0.3915, "step": 26950 }, { "epoch": 1.2367950071130283, "grad_norm": 0.4669204652309418, "learning_rate": 6.541120246987708e-06, "loss": 0.3561, "step": 26951 }, { "epoch": 1.2368408976182828, "grad_norm": 0.49969688057899475, "learning_rate": 6.540886994496616e-06, "loss": 0.4243, "step": 26952 }, { "epoch": 1.2368867881235373, "grad_norm": 0.47471991181373596, "learning_rate": 6.5406537383001745e-06, "loss": 0.3639, "step": 26953 }, { "epoch": 1.2369326786287917, "grad_norm": 0.47805774211883545, "learning_rate": 6.5404204783989475e-06, "loss": 0.3665, "step": 26954 }, { "epoch": 1.2369785691340462, "grad_norm": 0.46443846821784973, "learning_rate": 6.540187214793497e-06, "loss": 0.3429, "step": 26955 }, { "epoch": 1.2370244596393007, "grad_norm": 0.48014798760414124, "learning_rate": 6.539953947484383e-06, "loss": 0.3773, "step": 26956 }, { "epoch": 1.237070350144555, "grad_norm": 0.500694990158081, "learning_rate": 6.539720676472165e-06, "loss": 0.4159, "step": 26957 }, { "epoch": 1.2371162406498095, "grad_norm": 0.4682350158691406, "learning_rate": 6.539487401757404e-06, "loss": 0.3489, "step": 26958 }, { "epoch": 1.237162131155064, "grad_norm": 0.4632737636566162, "learning_rate": 6.539254123340661e-06, "loss": 0.3415, "step": 26959 }, { "epoch": 1.2372080216603185, "grad_norm": 0.6620158553123474, "learning_rate": 6.539020841222499e-06, "loss": 0.3913, "step": 26960 }, { "epoch": 1.237253912165573, "grad_norm": 0.5854431390762329, "learning_rate": 6.538787555403477e-06, "loss": 0.3687, "step": 26961 }, { "epoch": 1.2372998026708273, "grad_norm": 1.7830449342727661, "learning_rate": 6.5385542658841575e-06, "loss": 0.3288, "step": 26962 }, { "epoch": 1.2373456931760818, "grad_norm": 0.4354385435581207, "learning_rate": 6.5383209726651e-06, "loss": 0.2974, "step": 26963 }, { "epoch": 1.2373915836813363, "grad_norm": 0.45981118083000183, "learning_rate": 6.538087675746865e-06, "loss": 0.3721, "step": 26964 }, { "epoch": 1.2374374741865908, "grad_norm": 0.4643864631652832, "learning_rate": 6.537854375130015e-06, "loss": 0.3472, "step": 26965 }, { "epoch": 1.2374833646918453, "grad_norm": 0.5077660083770752, "learning_rate": 6.537621070815112e-06, "loss": 0.3968, "step": 26966 }, { "epoch": 1.2375292551970998, "grad_norm": 0.4875354766845703, "learning_rate": 6.537387762802715e-06, "loss": 0.3605, "step": 26967 }, { "epoch": 1.2375751457023543, "grad_norm": 0.4593157172203064, "learning_rate": 6.537154451093385e-06, "loss": 0.3803, "step": 26968 }, { "epoch": 1.2376210362076085, "grad_norm": 0.5361263155937195, "learning_rate": 6.536921135687684e-06, "loss": 0.448, "step": 26969 }, { "epoch": 1.237666926712863, "grad_norm": 0.43269380927085876, "learning_rate": 6.5366878165861725e-06, "loss": 0.2993, "step": 26970 }, { "epoch": 1.2377128172181175, "grad_norm": 0.4363768994808197, "learning_rate": 6.536454493789413e-06, "loss": 0.3202, "step": 26971 }, { "epoch": 1.237758707723372, "grad_norm": 0.5158581733703613, "learning_rate": 6.536221167297965e-06, "loss": 0.4569, "step": 26972 }, { "epoch": 1.2378045982286265, "grad_norm": 0.4847043752670288, "learning_rate": 6.535987837112389e-06, "loss": 0.4479, "step": 26973 }, { "epoch": 1.237850488733881, "grad_norm": 0.4664457142353058, "learning_rate": 6.535754503233247e-06, "loss": 0.4061, "step": 26974 }, { "epoch": 1.2378963792391353, "grad_norm": 0.41358092427253723, "learning_rate": 6.535521165661101e-06, "loss": 0.301, "step": 26975 }, { "epoch": 1.2379422697443898, "grad_norm": 0.4339200556278229, "learning_rate": 6.535287824396509e-06, "loss": 0.3009, "step": 26976 }, { "epoch": 1.2379881602496443, "grad_norm": 0.5721866488456726, "learning_rate": 6.535054479440037e-06, "loss": 0.3713, "step": 26977 }, { "epoch": 1.2380340507548988, "grad_norm": 0.4904795289039612, "learning_rate": 6.5348211307922425e-06, "loss": 0.3775, "step": 26978 }, { "epoch": 1.2380799412601533, "grad_norm": 0.45070287585258484, "learning_rate": 6.534587778453687e-06, "loss": 0.341, "step": 26979 }, { "epoch": 1.2381258317654078, "grad_norm": 0.4584466516971588, "learning_rate": 6.534354422424933e-06, "loss": 0.3777, "step": 26980 }, { "epoch": 1.2381717222706623, "grad_norm": 0.47643256187438965, "learning_rate": 6.534121062706541e-06, "loss": 0.3443, "step": 26981 }, { "epoch": 1.2382176127759166, "grad_norm": 0.49365726113319397, "learning_rate": 6.533887699299072e-06, "loss": 0.3948, "step": 26982 }, { "epoch": 1.238263503281171, "grad_norm": 0.42879343032836914, "learning_rate": 6.533654332203088e-06, "loss": 0.2832, "step": 26983 }, { "epoch": 1.2383093937864256, "grad_norm": 0.44917890429496765, "learning_rate": 6.533420961419147e-06, "loss": 0.3251, "step": 26984 }, { "epoch": 1.23835528429168, "grad_norm": 0.49695175886154175, "learning_rate": 6.533187586947814e-06, "loss": 0.4568, "step": 26985 }, { "epoch": 1.2384011747969346, "grad_norm": 0.4405040144920349, "learning_rate": 6.532954208789648e-06, "loss": 0.3266, "step": 26986 }, { "epoch": 1.238447065302189, "grad_norm": 0.5252926349639893, "learning_rate": 6.532720826945212e-06, "loss": 0.47, "step": 26987 }, { "epoch": 1.2384929558074433, "grad_norm": 0.4611525535583496, "learning_rate": 6.532487441415065e-06, "loss": 0.3311, "step": 26988 }, { "epoch": 1.2385388463126978, "grad_norm": 0.47140541672706604, "learning_rate": 6.532254052199771e-06, "loss": 0.374, "step": 26989 }, { "epoch": 1.2385847368179523, "grad_norm": 0.45425155758857727, "learning_rate": 6.532020659299888e-06, "loss": 0.2876, "step": 26990 }, { "epoch": 1.2386306273232068, "grad_norm": 0.5095045566558838, "learning_rate": 6.53178726271598e-06, "loss": 0.4331, "step": 26991 }, { "epoch": 1.2386765178284613, "grad_norm": 0.46734675765037537, "learning_rate": 6.531553862448606e-06, "loss": 0.3577, "step": 26992 }, { "epoch": 1.2387224083337158, "grad_norm": 0.46144795417785645, "learning_rate": 6.531320458498329e-06, "loss": 0.345, "step": 26993 }, { "epoch": 1.2387682988389703, "grad_norm": 0.4448409974575043, "learning_rate": 6.5310870508657095e-06, "loss": 0.3145, "step": 26994 }, { "epoch": 1.2388141893442246, "grad_norm": 0.46644696593284607, "learning_rate": 6.530853639551309e-06, "loss": 0.3414, "step": 26995 }, { "epoch": 1.238860079849479, "grad_norm": 0.4957960844039917, "learning_rate": 6.530620224555689e-06, "loss": 0.382, "step": 26996 }, { "epoch": 1.2389059703547336, "grad_norm": 0.4218040108680725, "learning_rate": 6.53038680587941e-06, "loss": 0.3138, "step": 26997 }, { "epoch": 1.238951860859988, "grad_norm": 0.4730508327484131, "learning_rate": 6.530153383523032e-06, "loss": 0.3536, "step": 26998 }, { "epoch": 1.2389977513652426, "grad_norm": 0.49037477374076843, "learning_rate": 6.52991995748712e-06, "loss": 0.3787, "step": 26999 }, { "epoch": 1.2390436418704969, "grad_norm": 0.470256507396698, "learning_rate": 6.529686527772234e-06, "loss": 0.3712, "step": 27000 }, { "epoch": 1.2390895323757514, "grad_norm": 0.46006613969802856, "learning_rate": 6.529453094378934e-06, "loss": 0.3473, "step": 27001 }, { "epoch": 1.2391354228810059, "grad_norm": 0.46078142523765564, "learning_rate": 6.52921965730778e-06, "loss": 0.3201, "step": 27002 }, { "epoch": 1.2391813133862604, "grad_norm": 0.45331457257270813, "learning_rate": 6.528986216559339e-06, "loss": 0.3173, "step": 27003 }, { "epoch": 1.2392272038915149, "grad_norm": 0.5347026586532593, "learning_rate": 6.528752772134166e-06, "loss": 0.3577, "step": 27004 }, { "epoch": 1.2392730943967694, "grad_norm": 0.4585721492767334, "learning_rate": 6.528519324032827e-06, "loss": 0.3283, "step": 27005 }, { "epoch": 1.2393189849020239, "grad_norm": 0.4250403046607971, "learning_rate": 6.5282858722558796e-06, "loss": 0.2673, "step": 27006 }, { "epoch": 1.2393648754072784, "grad_norm": 0.5245912671089172, "learning_rate": 6.528052416803887e-06, "loss": 0.4066, "step": 27007 }, { "epoch": 1.2394107659125326, "grad_norm": 0.4414653778076172, "learning_rate": 6.527818957677412e-06, "loss": 0.3352, "step": 27008 }, { "epoch": 1.2394566564177871, "grad_norm": 0.5111692547798157, "learning_rate": 6.527585494877014e-06, "loss": 0.418, "step": 27009 }, { "epoch": 1.2395025469230416, "grad_norm": 0.47435471415519714, "learning_rate": 6.527352028403255e-06, "loss": 0.3396, "step": 27010 }, { "epoch": 1.2395484374282961, "grad_norm": 0.47256824374198914, "learning_rate": 6.527118558256696e-06, "loss": 0.3652, "step": 27011 }, { "epoch": 1.2395943279335506, "grad_norm": 0.5033325552940369, "learning_rate": 6.5268850844379e-06, "loss": 0.367, "step": 27012 }, { "epoch": 1.239640218438805, "grad_norm": 0.45205235481262207, "learning_rate": 6.526651606947425e-06, "loss": 0.364, "step": 27013 }, { "epoch": 1.2396861089440594, "grad_norm": 0.47104787826538086, "learning_rate": 6.526418125785837e-06, "loss": 0.3452, "step": 27014 }, { "epoch": 1.239731999449314, "grad_norm": 0.5172480344772339, "learning_rate": 6.526184640953694e-06, "loss": 0.3746, "step": 27015 }, { "epoch": 1.2397778899545684, "grad_norm": 0.4598272144794464, "learning_rate": 6.52595115245156e-06, "loss": 0.3468, "step": 27016 }, { "epoch": 1.2398237804598229, "grad_norm": 0.48532307147979736, "learning_rate": 6.525717660279993e-06, "loss": 0.4508, "step": 27017 }, { "epoch": 1.2398696709650774, "grad_norm": 0.5078039765357971, "learning_rate": 6.525484164439559e-06, "loss": 0.4335, "step": 27018 }, { "epoch": 1.2399155614703319, "grad_norm": 0.48845285177230835, "learning_rate": 6.525250664930814e-06, "loss": 0.3794, "step": 27019 }, { "epoch": 1.2399614519755862, "grad_norm": 0.47792676091194153, "learning_rate": 6.5250171617543244e-06, "loss": 0.3839, "step": 27020 }, { "epoch": 1.2400073424808407, "grad_norm": 0.4934951364994049, "learning_rate": 6.52478365491065e-06, "loss": 0.4532, "step": 27021 }, { "epoch": 1.2400532329860952, "grad_norm": 0.45053303241729736, "learning_rate": 6.524550144400352e-06, "loss": 0.3412, "step": 27022 }, { "epoch": 1.2400991234913497, "grad_norm": 0.495858371257782, "learning_rate": 6.524316630223993e-06, "loss": 0.4128, "step": 27023 }, { "epoch": 1.2401450139966042, "grad_norm": 0.4718649387359619, "learning_rate": 6.524083112382131e-06, "loss": 0.3944, "step": 27024 }, { "epoch": 1.2401909045018586, "grad_norm": 0.4177536368370056, "learning_rate": 6.523849590875332e-06, "loss": 0.2724, "step": 27025 }, { "epoch": 1.240236795007113, "grad_norm": 0.48354819416999817, "learning_rate": 6.523616065704156e-06, "loss": 0.3641, "step": 27026 }, { "epoch": 1.2402826855123674, "grad_norm": 0.4603208005428314, "learning_rate": 6.523382536869163e-06, "loss": 0.3502, "step": 27027 }, { "epoch": 1.240328576017622, "grad_norm": 0.47348088026046753, "learning_rate": 6.523149004370916e-06, "loss": 0.3881, "step": 27028 }, { "epoch": 1.2403744665228764, "grad_norm": 0.4721221923828125, "learning_rate": 6.522915468209976e-06, "loss": 0.3658, "step": 27029 }, { "epoch": 1.240420357028131, "grad_norm": 0.49434778094291687, "learning_rate": 6.522681928386905e-06, "loss": 0.3965, "step": 27030 }, { "epoch": 1.2404662475333854, "grad_norm": 0.4491208791732788, "learning_rate": 6.522448384902266e-06, "loss": 0.32, "step": 27031 }, { "epoch": 1.24051213803864, "grad_norm": 1.0284695625305176, "learning_rate": 6.5222148377566185e-06, "loss": 0.3332, "step": 27032 }, { "epoch": 1.2405580285438942, "grad_norm": 0.4934678077697754, "learning_rate": 6.521981286950524e-06, "loss": 0.3832, "step": 27033 }, { "epoch": 1.2406039190491487, "grad_norm": 0.4557664096355438, "learning_rate": 6.521747732484546e-06, "loss": 0.2853, "step": 27034 }, { "epoch": 1.2406498095544032, "grad_norm": 0.4769366383552551, "learning_rate": 6.521514174359244e-06, "loss": 0.3585, "step": 27035 }, { "epoch": 1.2406957000596577, "grad_norm": 0.5254905819892883, "learning_rate": 6.52128061257518e-06, "loss": 0.329, "step": 27036 }, { "epoch": 1.2407415905649122, "grad_norm": 0.4639548659324646, "learning_rate": 6.521047047132919e-06, "loss": 0.3544, "step": 27037 }, { "epoch": 1.2407874810701667, "grad_norm": 0.452947735786438, "learning_rate": 6.520813478033017e-06, "loss": 0.36, "step": 27038 }, { "epoch": 1.240833371575421, "grad_norm": 0.45113468170166016, "learning_rate": 6.520579905276041e-06, "loss": 0.34, "step": 27039 }, { "epoch": 1.2408792620806754, "grad_norm": 0.48438915610313416, "learning_rate": 6.520346328862548e-06, "loss": 0.3656, "step": 27040 }, { "epoch": 1.24092515258593, "grad_norm": 0.4963419437408447, "learning_rate": 6.520112748793101e-06, "loss": 0.3681, "step": 27041 }, { "epoch": 1.2409710430911844, "grad_norm": 0.49887871742248535, "learning_rate": 6.5198791650682655e-06, "loss": 0.3859, "step": 27042 }, { "epoch": 1.241016933596439, "grad_norm": 0.4632721543312073, "learning_rate": 6.5196455776886e-06, "loss": 0.3682, "step": 27043 }, { "epoch": 1.2410628241016934, "grad_norm": 0.4863838851451874, "learning_rate": 6.519411986654666e-06, "loss": 0.3986, "step": 27044 }, { "epoch": 1.241108714606948, "grad_norm": 0.48101967573165894, "learning_rate": 6.519178391967025e-06, "loss": 0.3614, "step": 27045 }, { "epoch": 1.2411546051122022, "grad_norm": 0.4778022766113281, "learning_rate": 6.51894479362624e-06, "loss": 0.3631, "step": 27046 }, { "epoch": 1.2412004956174567, "grad_norm": 0.476870596408844, "learning_rate": 6.518711191632871e-06, "loss": 0.3834, "step": 27047 }, { "epoch": 1.2412463861227112, "grad_norm": 0.4745154082775116, "learning_rate": 6.5184775859874835e-06, "loss": 0.2806, "step": 27048 }, { "epoch": 1.2412922766279657, "grad_norm": 0.5122151970863342, "learning_rate": 6.518243976690635e-06, "loss": 0.4812, "step": 27049 }, { "epoch": 1.2413381671332202, "grad_norm": 0.48808810114860535, "learning_rate": 6.518010363742888e-06, "loss": 0.3531, "step": 27050 }, { "epoch": 1.2413840576384745, "grad_norm": 0.4663884937763214, "learning_rate": 6.517776747144806e-06, "loss": 0.4014, "step": 27051 }, { "epoch": 1.241429948143729, "grad_norm": 0.453797847032547, "learning_rate": 6.51754312689695e-06, "loss": 0.3648, "step": 27052 }, { "epoch": 1.2414758386489835, "grad_norm": 0.44562968611717224, "learning_rate": 6.517309502999882e-06, "loss": 0.3443, "step": 27053 }, { "epoch": 1.241521729154238, "grad_norm": 0.4483977258205414, "learning_rate": 6.517075875454164e-06, "loss": 0.339, "step": 27054 }, { "epoch": 1.2415676196594925, "grad_norm": 0.5322286486625671, "learning_rate": 6.516842244260356e-06, "loss": 0.4193, "step": 27055 }, { "epoch": 1.241613510164747, "grad_norm": 0.4517311155796051, "learning_rate": 6.516608609419023e-06, "loss": 0.3207, "step": 27056 }, { "epoch": 1.2416594006700015, "grad_norm": 0.4524323642253876, "learning_rate": 6.516374970930724e-06, "loss": 0.3182, "step": 27057 }, { "epoch": 1.2417052911752557, "grad_norm": 0.44993674755096436, "learning_rate": 6.51614132879602e-06, "loss": 0.3157, "step": 27058 }, { "epoch": 1.2417511816805102, "grad_norm": 0.45945224165916443, "learning_rate": 6.515907683015478e-06, "loss": 0.3139, "step": 27059 }, { "epoch": 1.2417970721857647, "grad_norm": 0.49321249127388, "learning_rate": 6.515674033589654e-06, "loss": 0.3891, "step": 27060 }, { "epoch": 1.2418429626910192, "grad_norm": 0.5147197842597961, "learning_rate": 6.515440380519112e-06, "loss": 0.3847, "step": 27061 }, { "epoch": 1.2418888531962737, "grad_norm": 0.6308175921440125, "learning_rate": 6.515206723804415e-06, "loss": 0.2835, "step": 27062 }, { "epoch": 1.2419347437015282, "grad_norm": 0.5028957724571228, "learning_rate": 6.514973063446124e-06, "loss": 0.4323, "step": 27063 }, { "epoch": 1.2419806342067825, "grad_norm": 0.46063849329948425, "learning_rate": 6.5147393994448015e-06, "loss": 0.3438, "step": 27064 }, { "epoch": 1.242026524712037, "grad_norm": 0.4767475724220276, "learning_rate": 6.514505731801008e-06, "loss": 0.3757, "step": 27065 }, { "epoch": 1.2420724152172915, "grad_norm": 0.5024803280830383, "learning_rate": 6.514272060515306e-06, "loss": 0.4082, "step": 27066 }, { "epoch": 1.242118305722546, "grad_norm": 0.4939013421535492, "learning_rate": 6.514038385588258e-06, "loss": 0.4305, "step": 27067 }, { "epoch": 1.2421641962278005, "grad_norm": 0.48958125710487366, "learning_rate": 6.513804707020427e-06, "loss": 0.4225, "step": 27068 }, { "epoch": 1.242210086733055, "grad_norm": 0.4751177728176117, "learning_rate": 6.51357102481237e-06, "loss": 0.3562, "step": 27069 }, { "epoch": 1.2422559772383095, "grad_norm": 0.4508625268936157, "learning_rate": 6.513337338964657e-06, "loss": 0.279, "step": 27070 }, { "epoch": 1.2423018677435638, "grad_norm": 0.4461248517036438, "learning_rate": 6.513103649477843e-06, "loss": 0.3398, "step": 27071 }, { "epoch": 1.2423477582488183, "grad_norm": 0.5069220662117004, "learning_rate": 6.512869956352491e-06, "loss": 0.3689, "step": 27072 }, { "epoch": 1.2423936487540728, "grad_norm": 0.4731628894805908, "learning_rate": 6.512636259589166e-06, "loss": 0.427, "step": 27073 }, { "epoch": 1.2424395392593273, "grad_norm": 0.45678165555000305, "learning_rate": 6.512402559188427e-06, "loss": 0.3305, "step": 27074 }, { "epoch": 1.2424854297645818, "grad_norm": 0.4793204665184021, "learning_rate": 6.512168855150837e-06, "loss": 0.3611, "step": 27075 }, { "epoch": 1.2425313202698363, "grad_norm": 0.45435789227485657, "learning_rate": 6.511935147476961e-06, "loss": 0.3789, "step": 27076 }, { "epoch": 1.2425772107750905, "grad_norm": 0.48214176297187805, "learning_rate": 6.511701436167355e-06, "loss": 0.4038, "step": 27077 }, { "epoch": 1.242623101280345, "grad_norm": 0.454705148935318, "learning_rate": 6.511467721222584e-06, "loss": 0.3433, "step": 27078 }, { "epoch": 1.2426689917855995, "grad_norm": 0.45880579948425293, "learning_rate": 6.511234002643212e-06, "loss": 0.3627, "step": 27079 }, { "epoch": 1.242714882290854, "grad_norm": 0.4805181324481964, "learning_rate": 6.511000280429797e-06, "loss": 0.374, "step": 27080 }, { "epoch": 1.2427607727961085, "grad_norm": 0.5005503296852112, "learning_rate": 6.510766554582907e-06, "loss": 0.395, "step": 27081 }, { "epoch": 1.242806663301363, "grad_norm": 0.45770904421806335, "learning_rate": 6.510532825103099e-06, "loss": 0.3561, "step": 27082 }, { "epoch": 1.2428525538066175, "grad_norm": 0.4728145897388458, "learning_rate": 6.510299091990934e-06, "loss": 0.3565, "step": 27083 }, { "epoch": 1.2428984443118718, "grad_norm": 0.5317184329032898, "learning_rate": 6.510065355246977e-06, "loss": 0.4554, "step": 27084 }, { "epoch": 1.2429443348171263, "grad_norm": 0.47025206685066223, "learning_rate": 6.509831614871791e-06, "loss": 0.3775, "step": 27085 }, { "epoch": 1.2429902253223808, "grad_norm": 0.4879731237888336, "learning_rate": 6.509597870865936e-06, "loss": 0.4014, "step": 27086 }, { "epoch": 1.2430361158276353, "grad_norm": 0.4621317982673645, "learning_rate": 6.509364123229974e-06, "loss": 0.3109, "step": 27087 }, { "epoch": 1.2430820063328898, "grad_norm": 0.43527066707611084, "learning_rate": 6.5091303719644675e-06, "loss": 0.3271, "step": 27088 }, { "epoch": 1.243127896838144, "grad_norm": 0.49288082122802734, "learning_rate": 6.508896617069979e-06, "loss": 0.3941, "step": 27089 }, { "epoch": 1.2431737873433986, "grad_norm": 0.4846830368041992, "learning_rate": 6.50866285854707e-06, "loss": 0.3568, "step": 27090 }, { "epoch": 1.243219677848653, "grad_norm": 0.46757686138153076, "learning_rate": 6.508429096396303e-06, "loss": 0.3623, "step": 27091 }, { "epoch": 1.2432655683539076, "grad_norm": 0.49050650000572205, "learning_rate": 6.508195330618241e-06, "loss": 0.3479, "step": 27092 }, { "epoch": 1.243311458859162, "grad_norm": 0.5163153409957886, "learning_rate": 6.507961561213445e-06, "loss": 0.3729, "step": 27093 }, { "epoch": 1.2433573493644166, "grad_norm": 0.44548898935317993, "learning_rate": 6.507727788182476e-06, "loss": 0.3323, "step": 27094 }, { "epoch": 1.243403239869671, "grad_norm": 0.4674201011657715, "learning_rate": 6.5074940115258985e-06, "loss": 0.3293, "step": 27095 }, { "epoch": 1.2434491303749255, "grad_norm": 0.4328274130821228, "learning_rate": 6.507260231244275e-06, "loss": 0.2988, "step": 27096 }, { "epoch": 1.2434950208801798, "grad_norm": 0.4327007234096527, "learning_rate": 6.507026447338165e-06, "loss": 0.3085, "step": 27097 }, { "epoch": 1.2435409113854343, "grad_norm": 0.504535436630249, "learning_rate": 6.506792659808131e-06, "loss": 0.4225, "step": 27098 }, { "epoch": 1.2435868018906888, "grad_norm": 0.4561856985092163, "learning_rate": 6.506558868654738e-06, "loss": 0.3527, "step": 27099 }, { "epoch": 1.2436326923959433, "grad_norm": 0.4526936113834381, "learning_rate": 6.506325073878546e-06, "loss": 0.3482, "step": 27100 }, { "epoch": 1.2436785829011978, "grad_norm": 0.4887792468070984, "learning_rate": 6.506091275480115e-06, "loss": 0.3903, "step": 27101 }, { "epoch": 1.243724473406452, "grad_norm": 0.5030981302261353, "learning_rate": 6.5058574734600135e-06, "loss": 0.4261, "step": 27102 }, { "epoch": 1.2437703639117066, "grad_norm": 0.45109376311302185, "learning_rate": 6.505623667818799e-06, "loss": 0.362, "step": 27103 }, { "epoch": 1.243816254416961, "grad_norm": 0.4737619161605835, "learning_rate": 6.505389858557035e-06, "loss": 0.3107, "step": 27104 }, { "epoch": 1.2438621449222156, "grad_norm": 0.47073888778686523, "learning_rate": 6.505156045675282e-06, "loss": 0.3485, "step": 27105 }, { "epoch": 1.24390803542747, "grad_norm": 0.44162726402282715, "learning_rate": 6.5049222291741045e-06, "loss": 0.3237, "step": 27106 }, { "epoch": 1.2439539259327246, "grad_norm": 0.4589567482471466, "learning_rate": 6.504688409054065e-06, "loss": 0.3339, "step": 27107 }, { "epoch": 1.243999816437979, "grad_norm": 0.5723713636398315, "learning_rate": 6.504454585315725e-06, "loss": 0.3789, "step": 27108 }, { "epoch": 1.2440457069432334, "grad_norm": 0.48852717876434326, "learning_rate": 6.504220757959645e-06, "loss": 0.3488, "step": 27109 }, { "epoch": 1.2440915974484879, "grad_norm": 0.44218942523002625, "learning_rate": 6.5039869269863895e-06, "loss": 0.3132, "step": 27110 }, { "epoch": 1.2441374879537423, "grad_norm": 0.44037508964538574, "learning_rate": 6.503753092396519e-06, "loss": 0.2886, "step": 27111 }, { "epoch": 1.2441833784589968, "grad_norm": 0.5884010195732117, "learning_rate": 6.503519254190598e-06, "loss": 0.3991, "step": 27112 }, { "epoch": 1.2442292689642513, "grad_norm": 0.4810219407081604, "learning_rate": 6.503285412369187e-06, "loss": 0.3945, "step": 27113 }, { "epoch": 1.2442751594695058, "grad_norm": 0.45130655169487, "learning_rate": 6.503051566932851e-06, "loss": 0.3166, "step": 27114 }, { "epoch": 1.2443210499747601, "grad_norm": 0.5156421065330505, "learning_rate": 6.502817717882148e-06, "loss": 0.4503, "step": 27115 }, { "epoch": 1.2443669404800146, "grad_norm": 0.48779308795928955, "learning_rate": 6.502583865217644e-06, "loss": 0.415, "step": 27116 }, { "epoch": 1.2444128309852691, "grad_norm": 0.47189587354660034, "learning_rate": 6.5023500089399e-06, "loss": 0.3494, "step": 27117 }, { "epoch": 1.2444587214905236, "grad_norm": 0.5093399882316589, "learning_rate": 6.502116149049478e-06, "loss": 0.319, "step": 27118 }, { "epoch": 1.244504611995778, "grad_norm": 0.47336456179618835, "learning_rate": 6.501882285546941e-06, "loss": 0.3365, "step": 27119 }, { "epoch": 1.2445505025010326, "grad_norm": 0.4626924395561218, "learning_rate": 6.501648418432851e-06, "loss": 0.3972, "step": 27120 }, { "epoch": 1.244596393006287, "grad_norm": 0.5014014840126038, "learning_rate": 6.50141454770777e-06, "loss": 0.3588, "step": 27121 }, { "epoch": 1.2446422835115414, "grad_norm": 0.4969446361064911, "learning_rate": 6.501180673372262e-06, "loss": 0.3625, "step": 27122 }, { "epoch": 1.2446881740167959, "grad_norm": 0.4638162851333618, "learning_rate": 6.5009467954268855e-06, "loss": 0.3486, "step": 27123 }, { "epoch": 1.2447340645220504, "grad_norm": 0.5057275295257568, "learning_rate": 6.500712913872209e-06, "loss": 0.4504, "step": 27124 }, { "epoch": 1.2447799550273049, "grad_norm": 0.4455331861972809, "learning_rate": 6.500479028708791e-06, "loss": 0.3306, "step": 27125 }, { "epoch": 1.2448258455325594, "grad_norm": 0.4613574147224426, "learning_rate": 6.500245139937193e-06, "loss": 0.3458, "step": 27126 }, { "epoch": 1.2448717360378139, "grad_norm": 0.5263022780418396, "learning_rate": 6.50001124755798e-06, "loss": 0.409, "step": 27127 }, { "epoch": 1.2449176265430681, "grad_norm": 0.4352143108844757, "learning_rate": 6.499777351571713e-06, "loss": 0.3336, "step": 27128 }, { "epoch": 1.2449635170483226, "grad_norm": 0.476104736328125, "learning_rate": 6.499543451978956e-06, "loss": 0.357, "step": 27129 }, { "epoch": 1.2450094075535771, "grad_norm": 0.44013720750808716, "learning_rate": 6.499309548780269e-06, "loss": 0.312, "step": 27130 }, { "epoch": 1.2450552980588316, "grad_norm": 0.492827832698822, "learning_rate": 6.499075641976216e-06, "loss": 0.375, "step": 27131 }, { "epoch": 1.2451011885640861, "grad_norm": 0.4549662172794342, "learning_rate": 6.498841731567358e-06, "loss": 0.3835, "step": 27132 }, { "epoch": 1.2451470790693406, "grad_norm": 0.45174142718315125, "learning_rate": 6.49860781755426e-06, "loss": 0.3238, "step": 27133 }, { "epoch": 1.2451929695745951, "grad_norm": 0.46549710631370544, "learning_rate": 6.498373899937482e-06, "loss": 0.3863, "step": 27134 }, { "epoch": 1.2452388600798494, "grad_norm": 0.4798084795475006, "learning_rate": 6.498139978717591e-06, "loss": 0.3684, "step": 27135 }, { "epoch": 1.245284750585104, "grad_norm": 0.43202319741249084, "learning_rate": 6.4979060538951435e-06, "loss": 0.3268, "step": 27136 }, { "epoch": 1.2453306410903584, "grad_norm": 0.48386749625205994, "learning_rate": 6.497672125470704e-06, "loss": 0.3453, "step": 27137 }, { "epoch": 1.245376531595613, "grad_norm": 0.4435933232307434, "learning_rate": 6.497438193444837e-06, "loss": 0.2943, "step": 27138 }, { "epoch": 1.2454224221008674, "grad_norm": 0.4361483156681061, "learning_rate": 6.497204257818104e-06, "loss": 0.2846, "step": 27139 }, { "epoch": 1.2454683126061217, "grad_norm": 0.4878710210323334, "learning_rate": 6.496970318591067e-06, "loss": 0.3316, "step": 27140 }, { "epoch": 1.2455142031113762, "grad_norm": 0.45536360144615173, "learning_rate": 6.496736375764288e-06, "loss": 0.3738, "step": 27141 }, { "epoch": 1.2455600936166307, "grad_norm": 0.41184940934181213, "learning_rate": 6.496502429338332e-06, "loss": 0.2516, "step": 27142 }, { "epoch": 1.2456059841218852, "grad_norm": 0.48244231939315796, "learning_rate": 6.496268479313758e-06, "loss": 0.3632, "step": 27143 }, { "epoch": 1.2456518746271397, "grad_norm": 0.5049140453338623, "learning_rate": 6.496034525691131e-06, "loss": 0.4627, "step": 27144 }, { "epoch": 1.2456977651323942, "grad_norm": 0.5351001024246216, "learning_rate": 6.495800568471013e-06, "loss": 0.4518, "step": 27145 }, { "epoch": 1.2457436556376487, "grad_norm": 0.4875046908855438, "learning_rate": 6.495566607653969e-06, "loss": 0.3724, "step": 27146 }, { "epoch": 1.245789546142903, "grad_norm": 0.473929762840271, "learning_rate": 6.495332643240558e-06, "loss": 0.3876, "step": 27147 }, { "epoch": 1.2458354366481574, "grad_norm": 0.46286073327064514, "learning_rate": 6.495098675231343e-06, "loss": 0.3544, "step": 27148 }, { "epoch": 1.245881327153412, "grad_norm": 0.4387279152870178, "learning_rate": 6.494864703626887e-06, "loss": 0.2931, "step": 27149 }, { "epoch": 1.2459272176586664, "grad_norm": 0.4251987636089325, "learning_rate": 6.494630728427755e-06, "loss": 0.2825, "step": 27150 }, { "epoch": 1.245973108163921, "grad_norm": 0.48286065459251404, "learning_rate": 6.494396749634508e-06, "loss": 0.4215, "step": 27151 }, { "epoch": 1.2460189986691754, "grad_norm": 0.4984102249145508, "learning_rate": 6.494162767247708e-06, "loss": 0.4167, "step": 27152 }, { "epoch": 1.2460648891744297, "grad_norm": 0.4980386197566986, "learning_rate": 6.493928781267917e-06, "loss": 0.4143, "step": 27153 }, { "epoch": 1.2461107796796842, "grad_norm": 0.46676304936408997, "learning_rate": 6.493694791695699e-06, "loss": 0.3292, "step": 27154 }, { "epoch": 1.2461566701849387, "grad_norm": 0.45811837911605835, "learning_rate": 6.493460798531617e-06, "loss": 0.3526, "step": 27155 }, { "epoch": 1.2462025606901932, "grad_norm": 0.46026745438575745, "learning_rate": 6.493226801776234e-06, "loss": 0.3132, "step": 27156 }, { "epoch": 1.2462484511954477, "grad_norm": 0.4545271694660187, "learning_rate": 6.4929928014301114e-06, "loss": 0.327, "step": 27157 }, { "epoch": 1.2462943417007022, "grad_norm": 0.4925821125507355, "learning_rate": 6.492758797493812e-06, "loss": 0.401, "step": 27158 }, { "epoch": 1.2463402322059567, "grad_norm": 0.48520565032958984, "learning_rate": 6.492524789967898e-06, "loss": 0.406, "step": 27159 }, { "epoch": 1.246386122711211, "grad_norm": 0.45077237486839294, "learning_rate": 6.492290778852933e-06, "loss": 0.3173, "step": 27160 }, { "epoch": 1.2464320132164655, "grad_norm": 0.43259233236312866, "learning_rate": 6.492056764149481e-06, "loss": 0.2771, "step": 27161 }, { "epoch": 1.24647790372172, "grad_norm": 0.4657677412033081, "learning_rate": 6.491822745858103e-06, "loss": 0.3328, "step": 27162 }, { "epoch": 1.2465237942269745, "grad_norm": 0.45552247762680054, "learning_rate": 6.49158872397936e-06, "loss": 0.3116, "step": 27163 }, { "epoch": 1.246569684732229, "grad_norm": 0.49901121854782104, "learning_rate": 6.491354698513821e-06, "loss": 0.4331, "step": 27164 }, { "epoch": 1.2466155752374835, "grad_norm": 0.45761552453041077, "learning_rate": 6.491120669462041e-06, "loss": 0.3783, "step": 27165 }, { "epoch": 1.2466614657427377, "grad_norm": 0.5073399543762207, "learning_rate": 6.490886636824587e-06, "loss": 0.3487, "step": 27166 }, { "epoch": 1.2467073562479922, "grad_norm": 0.45310312509536743, "learning_rate": 6.490652600602023e-06, "loss": 0.3355, "step": 27167 }, { "epoch": 1.2467532467532467, "grad_norm": 0.4901655316352844, "learning_rate": 6.490418560794909e-06, "loss": 0.4591, "step": 27168 }, { "epoch": 1.2467991372585012, "grad_norm": 0.44069012999534607, "learning_rate": 6.490184517403809e-06, "loss": 0.3134, "step": 27169 }, { "epoch": 1.2468450277637557, "grad_norm": 0.44979336857795715, "learning_rate": 6.489950470429285e-06, "loss": 0.2566, "step": 27170 }, { "epoch": 1.2468909182690102, "grad_norm": 0.47115257382392883, "learning_rate": 6.4897164198718996e-06, "loss": 0.3662, "step": 27171 }, { "epoch": 1.2469368087742647, "grad_norm": 0.46368563175201416, "learning_rate": 6.489482365732218e-06, "loss": 0.3669, "step": 27172 }, { "epoch": 1.246982699279519, "grad_norm": 0.49674177169799805, "learning_rate": 6.4892483080108005e-06, "loss": 0.381, "step": 27173 }, { "epoch": 1.2470285897847735, "grad_norm": 0.46072840690612793, "learning_rate": 6.4890142467082104e-06, "loss": 0.383, "step": 27174 }, { "epoch": 1.247074480290028, "grad_norm": 0.47060084342956543, "learning_rate": 6.488780181825013e-06, "loss": 0.3264, "step": 27175 }, { "epoch": 1.2471203707952825, "grad_norm": 0.4531290531158447, "learning_rate": 6.488546113361767e-06, "loss": 0.3049, "step": 27176 }, { "epoch": 1.247166261300537, "grad_norm": 0.6784154176712036, "learning_rate": 6.488312041319037e-06, "loss": 0.3631, "step": 27177 }, { "epoch": 1.2472121518057913, "grad_norm": 0.43427348136901855, "learning_rate": 6.488077965697388e-06, "loss": 0.3193, "step": 27178 }, { "epoch": 1.2472580423110458, "grad_norm": 0.47857213020324707, "learning_rate": 6.487843886497381e-06, "loss": 0.3771, "step": 27179 }, { "epoch": 1.2473039328163003, "grad_norm": 0.4701474606990814, "learning_rate": 6.487609803719579e-06, "loss": 0.315, "step": 27180 }, { "epoch": 1.2473498233215548, "grad_norm": 0.47626417875289917, "learning_rate": 6.487375717364545e-06, "loss": 0.347, "step": 27181 }, { "epoch": 1.2473957138268092, "grad_norm": 0.519077479839325, "learning_rate": 6.48714162743284e-06, "loss": 0.4706, "step": 27182 }, { "epoch": 1.2474416043320637, "grad_norm": 0.5012746453285217, "learning_rate": 6.486907533925031e-06, "loss": 0.4132, "step": 27183 }, { "epoch": 1.2474874948373182, "grad_norm": 0.541907012462616, "learning_rate": 6.486673436841677e-06, "loss": 0.4564, "step": 27184 }, { "epoch": 1.2475333853425727, "grad_norm": 0.5083046555519104, "learning_rate": 6.4864393361833435e-06, "loss": 0.4422, "step": 27185 }, { "epoch": 1.247579275847827, "grad_norm": 0.4666883051395416, "learning_rate": 6.486205231950593e-06, "loss": 0.3647, "step": 27186 }, { "epoch": 1.2476251663530815, "grad_norm": 0.45006126165390015, "learning_rate": 6.485971124143987e-06, "loss": 0.3361, "step": 27187 }, { "epoch": 1.247671056858336, "grad_norm": 0.4726943075656891, "learning_rate": 6.4857370127640885e-06, "loss": 0.3291, "step": 27188 }, { "epoch": 1.2477169473635905, "grad_norm": 0.4755069613456726, "learning_rate": 6.485502897811464e-06, "loss": 0.3634, "step": 27189 }, { "epoch": 1.247762837868845, "grad_norm": 0.4654363691806793, "learning_rate": 6.485268779286673e-06, "loss": 0.4089, "step": 27190 }, { "epoch": 1.2478087283740993, "grad_norm": 0.47353288531303406, "learning_rate": 6.485034657190278e-06, "loss": 0.3729, "step": 27191 }, { "epoch": 1.2478546188793538, "grad_norm": 0.43786486983299255, "learning_rate": 6.484800531522844e-06, "loss": 0.2831, "step": 27192 }, { "epoch": 1.2479005093846083, "grad_norm": 0.49401047825813293, "learning_rate": 6.484566402284934e-06, "loss": 0.4025, "step": 27193 }, { "epoch": 1.2479463998898628, "grad_norm": 0.4683270752429962, "learning_rate": 6.484332269477111e-06, "loss": 0.3675, "step": 27194 }, { "epoch": 1.2479922903951173, "grad_norm": 0.43790438771247864, "learning_rate": 6.4840981330999365e-06, "loss": 0.3211, "step": 27195 }, { "epoch": 1.2480381809003718, "grad_norm": 0.4515499472618103, "learning_rate": 6.483863993153973e-06, "loss": 0.3473, "step": 27196 }, { "epoch": 1.2480840714056263, "grad_norm": 0.49788567423820496, "learning_rate": 6.483629849639785e-06, "loss": 0.4362, "step": 27197 }, { "epoch": 1.2481299619108805, "grad_norm": 0.4535635709762573, "learning_rate": 6.4833957025579376e-06, "loss": 0.3675, "step": 27198 }, { "epoch": 1.248175852416135, "grad_norm": 0.4510275423526764, "learning_rate": 6.48316155190899e-06, "loss": 0.3352, "step": 27199 }, { "epoch": 1.2482217429213895, "grad_norm": 0.43900376558303833, "learning_rate": 6.482927397693508e-06, "loss": 0.3238, "step": 27200 }, { "epoch": 1.248267633426644, "grad_norm": 0.5351582169532776, "learning_rate": 6.482693239912055e-06, "loss": 0.4256, "step": 27201 }, { "epoch": 1.2483135239318985, "grad_norm": 0.4647214710712433, "learning_rate": 6.482459078565189e-06, "loss": 0.3481, "step": 27202 }, { "epoch": 1.248359414437153, "grad_norm": 0.48952463269233704, "learning_rate": 6.48222491365348e-06, "loss": 0.4242, "step": 27203 }, { "epoch": 1.2484053049424073, "grad_norm": 0.44437965750694275, "learning_rate": 6.481990745177485e-06, "loss": 0.3148, "step": 27204 }, { "epoch": 1.2484511954476618, "grad_norm": 0.4677712917327881, "learning_rate": 6.481756573137773e-06, "loss": 0.3766, "step": 27205 }, { "epoch": 1.2484970859529163, "grad_norm": 0.4753119945526123, "learning_rate": 6.4815223975349025e-06, "loss": 0.3706, "step": 27206 }, { "epoch": 1.2485429764581708, "grad_norm": 0.4815599024295807, "learning_rate": 6.481288218369438e-06, "loss": 0.367, "step": 27207 }, { "epoch": 1.2485888669634253, "grad_norm": 0.5009384751319885, "learning_rate": 6.4810540356419424e-06, "loss": 0.3893, "step": 27208 }, { "epoch": 1.2486347574686798, "grad_norm": 0.4978771209716797, "learning_rate": 6.480819849352982e-06, "loss": 0.392, "step": 27209 }, { "epoch": 1.2486806479739343, "grad_norm": 0.4856208562850952, "learning_rate": 6.480585659503114e-06, "loss": 0.3929, "step": 27210 }, { "epoch": 1.2487265384791886, "grad_norm": 0.4629942774772644, "learning_rate": 6.4803514660929065e-06, "loss": 0.3576, "step": 27211 }, { "epoch": 1.248772428984443, "grad_norm": 0.48653069138526917, "learning_rate": 6.4801172691229204e-06, "loss": 0.3947, "step": 27212 }, { "epoch": 1.2488183194896976, "grad_norm": 0.43936172127723694, "learning_rate": 6.47988306859372e-06, "loss": 0.3342, "step": 27213 }, { "epoch": 1.248864209994952, "grad_norm": 0.49076971411705017, "learning_rate": 6.479648864505867e-06, "loss": 0.3826, "step": 27214 }, { "epoch": 1.2489101005002066, "grad_norm": 0.48805180191993713, "learning_rate": 6.479414656859926e-06, "loss": 0.3719, "step": 27215 }, { "epoch": 1.248955991005461, "grad_norm": 0.4522589445114136, "learning_rate": 6.47918044565646e-06, "loss": 0.3174, "step": 27216 }, { "epoch": 1.2490018815107153, "grad_norm": 0.4690479636192322, "learning_rate": 6.478946230896033e-06, "loss": 0.3743, "step": 27217 }, { "epoch": 1.2490477720159698, "grad_norm": 0.4970743954181671, "learning_rate": 6.478712012579205e-06, "loss": 0.378, "step": 27218 }, { "epoch": 1.2490936625212243, "grad_norm": 0.49913227558135986, "learning_rate": 6.478477790706541e-06, "loss": 0.4181, "step": 27219 }, { "epoch": 1.2491395530264788, "grad_norm": 0.4689364731311798, "learning_rate": 6.478243565278606e-06, "loss": 0.3248, "step": 27220 }, { "epoch": 1.2491854435317333, "grad_norm": 0.46845316886901855, "learning_rate": 6.4780093362959606e-06, "loss": 0.3372, "step": 27221 }, { "epoch": 1.2492313340369878, "grad_norm": 0.4573359489440918, "learning_rate": 6.477775103759169e-06, "loss": 0.3412, "step": 27222 }, { "epoch": 1.2492772245422423, "grad_norm": 0.4407694935798645, "learning_rate": 6.477540867668796e-06, "loss": 0.2921, "step": 27223 }, { "epoch": 1.2493231150474966, "grad_norm": 0.4281848669052124, "learning_rate": 6.477306628025404e-06, "loss": 0.2777, "step": 27224 }, { "epoch": 1.249369005552751, "grad_norm": 0.44407904148101807, "learning_rate": 6.4770723848295536e-06, "loss": 0.3662, "step": 27225 }, { "epoch": 1.2494148960580056, "grad_norm": 0.4712226688861847, "learning_rate": 6.476838138081811e-06, "loss": 0.4326, "step": 27226 }, { "epoch": 1.24946078656326, "grad_norm": 0.4651430547237396, "learning_rate": 6.476603887782741e-06, "loss": 0.3885, "step": 27227 }, { "epoch": 1.2495066770685146, "grad_norm": 0.42073217034339905, "learning_rate": 6.4763696339329016e-06, "loss": 0.3075, "step": 27228 }, { "epoch": 1.2495525675737689, "grad_norm": 0.4523237347602844, "learning_rate": 6.476135376532862e-06, "loss": 0.3176, "step": 27229 }, { "epoch": 1.2495984580790234, "grad_norm": 0.48597121238708496, "learning_rate": 6.4759011155831805e-06, "loss": 0.3587, "step": 27230 }, { "epoch": 1.2496443485842779, "grad_norm": 0.46277087926864624, "learning_rate": 6.475666851084422e-06, "loss": 0.3903, "step": 27231 }, { "epoch": 1.2496902390895324, "grad_norm": 0.4517253041267395, "learning_rate": 6.475432583037153e-06, "loss": 0.341, "step": 27232 }, { "epoch": 1.2497361295947869, "grad_norm": 0.5187800526618958, "learning_rate": 6.475198311441933e-06, "loss": 0.4078, "step": 27233 }, { "epoch": 1.2497820201000414, "grad_norm": 0.5033402442932129, "learning_rate": 6.474964036299328e-06, "loss": 0.4273, "step": 27234 }, { "epoch": 1.2498279106052959, "grad_norm": 0.45251283049583435, "learning_rate": 6.474729757609898e-06, "loss": 0.3037, "step": 27235 }, { "epoch": 1.2498738011105501, "grad_norm": 0.486616313457489, "learning_rate": 6.474495475374208e-06, "loss": 0.366, "step": 27236 }, { "epoch": 1.2499196916158046, "grad_norm": 0.49515849351882935, "learning_rate": 6.4742611895928245e-06, "loss": 0.4268, "step": 27237 }, { "epoch": 1.2499655821210591, "grad_norm": 0.4791102409362793, "learning_rate": 6.474026900266307e-06, "loss": 0.3547, "step": 27238 }, { "epoch": 1.2500114726263136, "grad_norm": 0.4404602646827698, "learning_rate": 6.4737926073952185e-06, "loss": 0.3096, "step": 27239 }, { "epoch": 1.2500573631315681, "grad_norm": 0.4520892798900604, "learning_rate": 6.473558310980126e-06, "loss": 0.3117, "step": 27240 }, { "epoch": 1.2501032536368226, "grad_norm": 0.5221748352050781, "learning_rate": 6.473324011021589e-06, "loss": 0.4096, "step": 27241 }, { "epoch": 1.250149144142077, "grad_norm": 0.4728541672229767, "learning_rate": 6.473089707520173e-06, "loss": 0.3596, "step": 27242 }, { "epoch": 1.2501950346473314, "grad_norm": 0.4331679344177246, "learning_rate": 6.472855400476442e-06, "loss": 0.2978, "step": 27243 }, { "epoch": 1.250240925152586, "grad_norm": 0.4619464576244354, "learning_rate": 6.4726210898909595e-06, "loss": 0.37, "step": 27244 }, { "epoch": 1.2502868156578404, "grad_norm": 0.47641655802726746, "learning_rate": 6.4723867757642855e-06, "loss": 0.3627, "step": 27245 }, { "epoch": 1.2503327061630949, "grad_norm": 0.49338141083717346, "learning_rate": 6.472152458096988e-06, "loss": 0.3702, "step": 27246 }, { "epoch": 1.2503785966683494, "grad_norm": 0.5107830166816711, "learning_rate": 6.471918136889628e-06, "loss": 0.3915, "step": 27247 }, { "epoch": 1.2504244871736039, "grad_norm": 0.5124556422233582, "learning_rate": 6.471683812142769e-06, "loss": 0.3591, "step": 27248 }, { "epoch": 1.2504703776788584, "grad_norm": 0.4866150915622711, "learning_rate": 6.471449483856977e-06, "loss": 0.3901, "step": 27249 }, { "epoch": 1.2505162681841127, "grad_norm": 0.4900915324687958, "learning_rate": 6.4712151520328106e-06, "loss": 0.3943, "step": 27250 }, { "epoch": 1.2505621586893672, "grad_norm": 0.48075392842292786, "learning_rate": 6.470980816670838e-06, "loss": 0.4632, "step": 27251 }, { "epoch": 1.2506080491946217, "grad_norm": 0.4823262393474579, "learning_rate": 6.470746477771619e-06, "loss": 0.3604, "step": 27252 }, { "epoch": 1.2506539396998761, "grad_norm": 0.4462185800075531, "learning_rate": 6.470512135335718e-06, "loss": 0.2931, "step": 27253 }, { "epoch": 1.2506998302051304, "grad_norm": 0.47803962230682373, "learning_rate": 6.470277789363703e-06, "loss": 0.3741, "step": 27254 }, { "epoch": 1.250745720710385, "grad_norm": 0.4516344666481018, "learning_rate": 6.470043439856132e-06, "loss": 0.3447, "step": 27255 }, { "epoch": 1.2507916112156394, "grad_norm": 0.4276409447193146, "learning_rate": 6.46980908681357e-06, "loss": 0.3131, "step": 27256 }, { "epoch": 1.250837501720894, "grad_norm": 0.4838365912437439, "learning_rate": 6.469574730236582e-06, "loss": 0.3954, "step": 27257 }, { "epoch": 1.2508833922261484, "grad_norm": 0.5313409566879272, "learning_rate": 6.46934037012573e-06, "loss": 0.4375, "step": 27258 }, { "epoch": 1.250929282731403, "grad_norm": 0.4585217535495758, "learning_rate": 6.4691060064815785e-06, "loss": 0.3827, "step": 27259 }, { "epoch": 1.2509751732366574, "grad_norm": 0.5102069973945618, "learning_rate": 6.4688716393046915e-06, "loss": 0.3667, "step": 27260 }, { "epoch": 1.251021063741912, "grad_norm": 0.4488201439380646, "learning_rate": 6.46863726859563e-06, "loss": 0.317, "step": 27261 }, { "epoch": 1.2510669542471662, "grad_norm": 0.5075010657310486, "learning_rate": 6.46840289435496e-06, "loss": 0.4359, "step": 27262 }, { "epoch": 1.2511128447524207, "grad_norm": 0.5182188749313354, "learning_rate": 6.468168516583246e-06, "loss": 0.4055, "step": 27263 }, { "epoch": 1.2511587352576752, "grad_norm": 0.4916212558746338, "learning_rate": 6.4679341352810474e-06, "loss": 0.4283, "step": 27264 }, { "epoch": 1.2512046257629297, "grad_norm": 0.5063515305519104, "learning_rate": 6.467699750448933e-06, "loss": 0.41, "step": 27265 }, { "epoch": 1.2512505162681842, "grad_norm": 0.4606711268424988, "learning_rate": 6.467465362087463e-06, "loss": 0.3375, "step": 27266 }, { "epoch": 1.2512964067734385, "grad_norm": 0.470992773771286, "learning_rate": 6.467230970197201e-06, "loss": 0.3258, "step": 27267 }, { "epoch": 1.251342297278693, "grad_norm": 0.44808486104011536, "learning_rate": 6.4669965747787135e-06, "loss": 0.3455, "step": 27268 }, { "epoch": 1.2513881877839474, "grad_norm": 0.47690799832344055, "learning_rate": 6.466762175832561e-06, "loss": 0.3703, "step": 27269 }, { "epoch": 1.251434078289202, "grad_norm": 0.4546023905277252, "learning_rate": 6.466527773359307e-06, "loss": 0.3986, "step": 27270 }, { "epoch": 1.2514799687944564, "grad_norm": 0.45347532629966736, "learning_rate": 6.466293367359519e-06, "loss": 0.3243, "step": 27271 }, { "epoch": 1.251525859299711, "grad_norm": 0.4647709131240845, "learning_rate": 6.466058957833757e-06, "loss": 0.374, "step": 27272 }, { "epoch": 1.2515717498049654, "grad_norm": 0.4561839699745178, "learning_rate": 6.4658245447825865e-06, "loss": 0.3298, "step": 27273 }, { "epoch": 1.25161764031022, "grad_norm": 0.5078872442245483, "learning_rate": 6.46559012820657e-06, "loss": 0.3837, "step": 27274 }, { "epoch": 1.2516635308154742, "grad_norm": 0.4874109625816345, "learning_rate": 6.465355708106272e-06, "loss": 0.3703, "step": 27275 }, { "epoch": 1.2517094213207287, "grad_norm": 0.46631520986557007, "learning_rate": 6.465121284482256e-06, "loss": 0.3529, "step": 27276 }, { "epoch": 1.2517553118259832, "grad_norm": 0.47768092155456543, "learning_rate": 6.464886857335087e-06, "loss": 0.3945, "step": 27277 }, { "epoch": 1.2518012023312377, "grad_norm": 0.49916690587997437, "learning_rate": 6.464652426665325e-06, "loss": 0.3714, "step": 27278 }, { "epoch": 1.2518470928364922, "grad_norm": 0.4545755684375763, "learning_rate": 6.4644179924735375e-06, "loss": 0.3271, "step": 27279 }, { "epoch": 1.2518929833417465, "grad_norm": 0.45780059695243835, "learning_rate": 6.464183554760287e-06, "loss": 0.3794, "step": 27280 }, { "epoch": 1.251938873847001, "grad_norm": 0.4386332929134369, "learning_rate": 6.463949113526137e-06, "loss": 0.3137, "step": 27281 }, { "epoch": 1.2519847643522555, "grad_norm": 0.49703049659729004, "learning_rate": 6.463714668771652e-06, "loss": 0.4014, "step": 27282 }, { "epoch": 1.25203065485751, "grad_norm": 0.4717913269996643, "learning_rate": 6.463480220497395e-06, "loss": 0.4001, "step": 27283 }, { "epoch": 1.2520765453627645, "grad_norm": 0.5007030367851257, "learning_rate": 6.4632457687039295e-06, "loss": 0.4419, "step": 27284 }, { "epoch": 1.252122435868019, "grad_norm": 0.4780285656452179, "learning_rate": 6.46301131339182e-06, "loss": 0.3943, "step": 27285 }, { "epoch": 1.2521683263732735, "grad_norm": 0.44789066910743713, "learning_rate": 6.46277685456163e-06, "loss": 0.3161, "step": 27286 }, { "epoch": 1.252214216878528, "grad_norm": 0.49652865529060364, "learning_rate": 6.462542392213923e-06, "loss": 0.4167, "step": 27287 }, { "epoch": 1.2522601073837822, "grad_norm": 0.42882874608039856, "learning_rate": 6.462307926349264e-06, "loss": 0.2817, "step": 27288 }, { "epoch": 1.2523059978890367, "grad_norm": 0.4951099157333374, "learning_rate": 6.462073456968214e-06, "loss": 0.3456, "step": 27289 }, { "epoch": 1.2523518883942912, "grad_norm": 0.4809695780277252, "learning_rate": 6.46183898407134e-06, "loss": 0.3839, "step": 27290 }, { "epoch": 1.2523977788995457, "grad_norm": 0.42414382100105286, "learning_rate": 6.461604507659205e-06, "loss": 0.2902, "step": 27291 }, { "epoch": 1.2524436694048, "grad_norm": 0.4763728976249695, "learning_rate": 6.461370027732372e-06, "loss": 0.3859, "step": 27292 }, { "epoch": 1.2524895599100545, "grad_norm": 0.5009507536888123, "learning_rate": 6.461135544291406e-06, "loss": 0.4565, "step": 27293 }, { "epoch": 1.252535450415309, "grad_norm": 0.5027029514312744, "learning_rate": 6.46090105733687e-06, "loss": 0.4065, "step": 27294 }, { "epoch": 1.2525813409205635, "grad_norm": 0.48650291562080383, "learning_rate": 6.460666566869327e-06, "loss": 0.4255, "step": 27295 }, { "epoch": 1.252627231425818, "grad_norm": 0.5015364289283752, "learning_rate": 6.4604320728893426e-06, "loss": 0.4235, "step": 27296 }, { "epoch": 1.2526731219310725, "grad_norm": 0.49396607279777527, "learning_rate": 6.46019757539748e-06, "loss": 0.3616, "step": 27297 }, { "epoch": 1.252719012436327, "grad_norm": 0.5200433731079102, "learning_rate": 6.459963074394303e-06, "loss": 0.3297, "step": 27298 }, { "epoch": 1.2527649029415815, "grad_norm": 0.4375634491443634, "learning_rate": 6.4597285698803755e-06, "loss": 0.3509, "step": 27299 }, { "epoch": 1.2528107934468358, "grad_norm": 0.47292789816856384, "learning_rate": 6.4594940618562615e-06, "loss": 0.3988, "step": 27300 }, { "epoch": 1.2528566839520903, "grad_norm": 0.4142512381076813, "learning_rate": 6.459259550322524e-06, "loss": 0.2741, "step": 27301 }, { "epoch": 1.2529025744573448, "grad_norm": 0.40675899386405945, "learning_rate": 6.45902503527973e-06, "loss": 0.2475, "step": 27302 }, { "epoch": 1.2529484649625993, "grad_norm": 0.5075702667236328, "learning_rate": 6.45879051672844e-06, "loss": 0.4184, "step": 27303 }, { "epoch": 1.2529943554678538, "grad_norm": 0.48809295892715454, "learning_rate": 6.458555994669219e-06, "loss": 0.3865, "step": 27304 }, { "epoch": 1.253040245973108, "grad_norm": 0.4407748281955719, "learning_rate": 6.458321469102632e-06, "loss": 0.3288, "step": 27305 }, { "epoch": 1.2530861364783625, "grad_norm": 0.46890732645988464, "learning_rate": 6.458086940029241e-06, "loss": 0.3237, "step": 27306 }, { "epoch": 1.253132026983617, "grad_norm": 0.48231950402259827, "learning_rate": 6.457852407449612e-06, "loss": 0.3485, "step": 27307 }, { "epoch": 1.2531779174888715, "grad_norm": 0.5226314663887024, "learning_rate": 6.457617871364308e-06, "loss": 0.3753, "step": 27308 }, { "epoch": 1.253223807994126, "grad_norm": 0.46313074231147766, "learning_rate": 6.457383331773893e-06, "loss": 0.3608, "step": 27309 }, { "epoch": 1.2532696984993805, "grad_norm": 0.4911595284938812, "learning_rate": 6.457148788678931e-06, "loss": 0.3746, "step": 27310 }, { "epoch": 1.253315589004635, "grad_norm": 0.45556026697158813, "learning_rate": 6.456914242079986e-06, "loss": 0.3074, "step": 27311 }, { "epoch": 1.2533614795098895, "grad_norm": 0.5266536474227905, "learning_rate": 6.4566796919776205e-06, "loss": 0.4278, "step": 27312 }, { "epoch": 1.2534073700151438, "grad_norm": 0.46113714575767517, "learning_rate": 6.456445138372401e-06, "loss": 0.3475, "step": 27313 }, { "epoch": 1.2534532605203983, "grad_norm": 0.4841267764568329, "learning_rate": 6.456210581264892e-06, "loss": 0.3841, "step": 27314 }, { "epoch": 1.2534991510256528, "grad_norm": 0.4584927558898926, "learning_rate": 6.455976020655656e-06, "loss": 0.327, "step": 27315 }, { "epoch": 1.2535450415309073, "grad_norm": 0.4986528158187866, "learning_rate": 6.455741456545257e-06, "loss": 0.4362, "step": 27316 }, { "epoch": 1.2535909320361618, "grad_norm": 0.4732993245124817, "learning_rate": 6.4555068889342586e-06, "loss": 0.3799, "step": 27317 }, { "epoch": 1.253636822541416, "grad_norm": 0.4676668345928192, "learning_rate": 6.455272317823225e-06, "loss": 0.3543, "step": 27318 }, { "epoch": 1.2536827130466706, "grad_norm": 0.46188101172447205, "learning_rate": 6.455037743212722e-06, "loss": 0.351, "step": 27319 }, { "epoch": 1.253728603551925, "grad_norm": 0.4650830030441284, "learning_rate": 6.454803165103312e-06, "loss": 0.3983, "step": 27320 }, { "epoch": 1.2537744940571796, "grad_norm": 0.4522091746330261, "learning_rate": 6.454568583495559e-06, "loss": 0.3641, "step": 27321 }, { "epoch": 1.253820384562434, "grad_norm": 0.4822550117969513, "learning_rate": 6.454333998390029e-06, "loss": 0.3818, "step": 27322 }, { "epoch": 1.2538662750676886, "grad_norm": 0.4960421621799469, "learning_rate": 6.454099409787284e-06, "loss": 0.3993, "step": 27323 }, { "epoch": 1.253912165572943, "grad_norm": 0.5017940998077393, "learning_rate": 6.453864817687887e-06, "loss": 0.461, "step": 27324 }, { "epoch": 1.2539580560781975, "grad_norm": 0.4477796256542206, "learning_rate": 6.453630222092406e-06, "loss": 0.3523, "step": 27325 }, { "epoch": 1.2540039465834518, "grad_norm": 0.4454113245010376, "learning_rate": 6.453395623001404e-06, "loss": 0.3267, "step": 27326 }, { "epoch": 1.2540498370887063, "grad_norm": 0.46828290820121765, "learning_rate": 6.453161020415442e-06, "loss": 0.4084, "step": 27327 }, { "epoch": 1.2540957275939608, "grad_norm": 0.46313586831092834, "learning_rate": 6.452926414335088e-06, "loss": 0.3653, "step": 27328 }, { "epoch": 1.2541416180992153, "grad_norm": 0.4159192740917206, "learning_rate": 6.452691804760902e-06, "loss": 0.2897, "step": 27329 }, { "epoch": 1.2541875086044698, "grad_norm": 0.44550374150276184, "learning_rate": 6.452457191693454e-06, "loss": 0.3429, "step": 27330 }, { "epoch": 1.254233399109724, "grad_norm": 0.43923720717430115, "learning_rate": 6.452222575133304e-06, "loss": 0.3058, "step": 27331 }, { "epoch": 1.2542792896149786, "grad_norm": 0.4404500424861908, "learning_rate": 6.451987955081016e-06, "loss": 0.3122, "step": 27332 }, { "epoch": 1.254325180120233, "grad_norm": 0.4310649037361145, "learning_rate": 6.451753331537156e-06, "loss": 0.3359, "step": 27333 }, { "epoch": 1.2543710706254876, "grad_norm": 0.416260689496994, "learning_rate": 6.451518704502286e-06, "loss": 0.2686, "step": 27334 }, { "epoch": 1.254416961130742, "grad_norm": 0.43878620862960815, "learning_rate": 6.4512840739769726e-06, "loss": 0.372, "step": 27335 }, { "epoch": 1.2544628516359966, "grad_norm": 0.4648166000843048, "learning_rate": 6.45104943996178e-06, "loss": 0.3414, "step": 27336 }, { "epoch": 1.254508742141251, "grad_norm": 0.4448631703853607, "learning_rate": 6.45081480245727e-06, "loss": 0.2514, "step": 27337 }, { "epoch": 1.2545546326465056, "grad_norm": 0.49841955304145813, "learning_rate": 6.450580161464008e-06, "loss": 0.385, "step": 27338 }, { "epoch": 1.2546005231517598, "grad_norm": 0.49078845977783203, "learning_rate": 6.450345516982561e-06, "loss": 0.4091, "step": 27339 }, { "epoch": 1.2546464136570143, "grad_norm": 0.46568289399147034, "learning_rate": 6.450110869013488e-06, "loss": 0.3583, "step": 27340 }, { "epoch": 1.2546923041622688, "grad_norm": 0.4634729325771332, "learning_rate": 6.449876217557358e-06, "loss": 0.3439, "step": 27341 }, { "epoch": 1.2547381946675233, "grad_norm": 0.5169396996498108, "learning_rate": 6.449641562614733e-06, "loss": 0.4693, "step": 27342 }, { "epoch": 1.2547840851727776, "grad_norm": 0.4925481677055359, "learning_rate": 6.449406904186177e-06, "loss": 0.3334, "step": 27343 }, { "epoch": 1.2548299756780321, "grad_norm": 0.4659465253353119, "learning_rate": 6.449172242272255e-06, "loss": 0.3493, "step": 27344 }, { "epoch": 1.2548758661832866, "grad_norm": 0.4478093087673187, "learning_rate": 6.448937576873531e-06, "loss": 0.3264, "step": 27345 }, { "epoch": 1.2549217566885411, "grad_norm": 0.4676917493343353, "learning_rate": 6.448702907990568e-06, "loss": 0.3635, "step": 27346 }, { "epoch": 1.2549676471937956, "grad_norm": 0.4481979012489319, "learning_rate": 6.448468235623935e-06, "loss": 0.3355, "step": 27347 }, { "epoch": 1.25501353769905, "grad_norm": 0.450545072555542, "learning_rate": 6.4482335597741905e-06, "loss": 0.3451, "step": 27348 }, { "epoch": 1.2550594282043046, "grad_norm": 0.45113304257392883, "learning_rate": 6.447998880441902e-06, "loss": 0.2902, "step": 27349 }, { "epoch": 1.255105318709559, "grad_norm": 0.4956468939781189, "learning_rate": 6.447764197627634e-06, "loss": 0.406, "step": 27350 }, { "epoch": 1.2551512092148134, "grad_norm": 0.47298917174339294, "learning_rate": 6.447529511331949e-06, "loss": 0.3796, "step": 27351 }, { "epoch": 1.2551970997200679, "grad_norm": 0.4574783444404602, "learning_rate": 6.447294821555413e-06, "loss": 0.3448, "step": 27352 }, { "epoch": 1.2552429902253224, "grad_norm": 0.4723958671092987, "learning_rate": 6.44706012829859e-06, "loss": 0.3888, "step": 27353 }, { "epoch": 1.2552888807305769, "grad_norm": 0.4733197093009949, "learning_rate": 6.446825431562043e-06, "loss": 0.3426, "step": 27354 }, { "epoch": 1.2553347712358314, "grad_norm": 0.4987959563732147, "learning_rate": 6.446590731346337e-06, "loss": 0.4193, "step": 27355 }, { "epoch": 1.2553806617410856, "grad_norm": 0.46568313241004944, "learning_rate": 6.446356027652039e-06, "loss": 0.3772, "step": 27356 }, { "epoch": 1.2554265522463401, "grad_norm": 0.44380950927734375, "learning_rate": 6.446121320479709e-06, "loss": 0.3134, "step": 27357 }, { "epoch": 1.2554724427515946, "grad_norm": 0.47735467553138733, "learning_rate": 6.445886609829916e-06, "loss": 0.4035, "step": 27358 }, { "epoch": 1.2555183332568491, "grad_norm": 0.44859904050827026, "learning_rate": 6.445651895703221e-06, "loss": 0.3243, "step": 27359 }, { "epoch": 1.2555642237621036, "grad_norm": 0.4424631893634796, "learning_rate": 6.445417178100188e-06, "loss": 0.3525, "step": 27360 }, { "epoch": 1.2556101142673581, "grad_norm": 0.48520800471305847, "learning_rate": 6.445182457021384e-06, "loss": 0.3882, "step": 27361 }, { "epoch": 1.2556560047726126, "grad_norm": 0.519506573677063, "learning_rate": 6.444947732467372e-06, "loss": 0.4342, "step": 27362 }, { "epoch": 1.2557018952778671, "grad_norm": 0.4862143099308014, "learning_rate": 6.444713004438719e-06, "loss": 0.3842, "step": 27363 }, { "epoch": 1.2557477857831214, "grad_norm": 0.47181349992752075, "learning_rate": 6.444478272935985e-06, "loss": 0.3706, "step": 27364 }, { "epoch": 1.255793676288376, "grad_norm": 0.46996310353279114, "learning_rate": 6.4442435379597365e-06, "loss": 0.3711, "step": 27365 }, { "epoch": 1.2558395667936304, "grad_norm": 0.46993768215179443, "learning_rate": 6.444008799510538e-06, "loss": 0.3296, "step": 27366 }, { "epoch": 1.255885457298885, "grad_norm": 0.4352472424507141, "learning_rate": 6.443774057588956e-06, "loss": 0.3062, "step": 27367 }, { "epoch": 1.2559313478041394, "grad_norm": 0.47730687260627747, "learning_rate": 6.44353931219555e-06, "loss": 0.3869, "step": 27368 }, { "epoch": 1.2559772383093937, "grad_norm": 0.46295374631881714, "learning_rate": 6.443304563330891e-06, "loss": 0.3439, "step": 27369 }, { "epoch": 1.2560231288146482, "grad_norm": 0.45740842819213867, "learning_rate": 6.44306981099554e-06, "loss": 0.3291, "step": 27370 }, { "epoch": 1.2560690193199027, "grad_norm": 0.4948623776435852, "learning_rate": 6.4428350551900574e-06, "loss": 0.3984, "step": 27371 }, { "epoch": 1.2561149098251572, "grad_norm": 0.45749011635780334, "learning_rate": 6.442600295915015e-06, "loss": 0.3415, "step": 27372 }, { "epoch": 1.2561608003304117, "grad_norm": 0.47931888699531555, "learning_rate": 6.442365533170973e-06, "loss": 0.3926, "step": 27373 }, { "epoch": 1.2562066908356662, "grad_norm": 0.48011156916618347, "learning_rate": 6.442130766958499e-06, "loss": 0.3607, "step": 27374 }, { "epoch": 1.2562525813409207, "grad_norm": 0.4658646285533905, "learning_rate": 6.441895997278154e-06, "loss": 0.3889, "step": 27375 }, { "epoch": 1.2562984718461752, "grad_norm": 0.4985107481479645, "learning_rate": 6.4416612241305045e-06, "loss": 0.4059, "step": 27376 }, { "epoch": 1.2563443623514294, "grad_norm": 0.479005366563797, "learning_rate": 6.441426447516114e-06, "loss": 0.3684, "step": 27377 }, { "epoch": 1.256390252856684, "grad_norm": 0.49608471989631653, "learning_rate": 6.4411916674355475e-06, "loss": 0.3488, "step": 27378 }, { "epoch": 1.2564361433619384, "grad_norm": 0.4559023678302765, "learning_rate": 6.440956883889372e-06, "loss": 0.3502, "step": 27379 }, { "epoch": 1.256482033867193, "grad_norm": 0.4452434480190277, "learning_rate": 6.440722096878149e-06, "loss": 0.299, "step": 27380 }, { "epoch": 1.2565279243724472, "grad_norm": 0.5096319317817688, "learning_rate": 6.440487306402446e-06, "loss": 0.3857, "step": 27381 }, { "epoch": 1.2565738148777017, "grad_norm": 0.48911380767822266, "learning_rate": 6.440252512462823e-06, "loss": 0.3912, "step": 27382 }, { "epoch": 1.2566197053829562, "grad_norm": 0.48096635937690735, "learning_rate": 6.4400177150598464e-06, "loss": 0.3833, "step": 27383 }, { "epoch": 1.2566655958882107, "grad_norm": 0.4651685357093811, "learning_rate": 6.4397829141940834e-06, "loss": 0.3839, "step": 27384 }, { "epoch": 1.2567114863934652, "grad_norm": 0.45169171690940857, "learning_rate": 6.439548109866097e-06, "loss": 0.3504, "step": 27385 }, { "epoch": 1.2567573768987197, "grad_norm": 0.5323764085769653, "learning_rate": 6.439313302076451e-06, "loss": 0.4764, "step": 27386 }, { "epoch": 1.2568032674039742, "grad_norm": 0.4629278779029846, "learning_rate": 6.439078490825712e-06, "loss": 0.3094, "step": 27387 }, { "epoch": 1.2568491579092287, "grad_norm": 0.5124077796936035, "learning_rate": 6.438843676114442e-06, "loss": 0.3843, "step": 27388 }, { "epoch": 1.256895048414483, "grad_norm": 0.44896194338798523, "learning_rate": 6.4386088579432075e-06, "loss": 0.3178, "step": 27389 }, { "epoch": 1.2569409389197375, "grad_norm": 0.4462103545665741, "learning_rate": 6.438374036312573e-06, "loss": 0.3131, "step": 27390 }, { "epoch": 1.256986829424992, "grad_norm": 0.5211013555526733, "learning_rate": 6.438139211223104e-06, "loss": 0.4195, "step": 27391 }, { "epoch": 1.2570327199302465, "grad_norm": 0.47750577330589294, "learning_rate": 6.437904382675362e-06, "loss": 0.3717, "step": 27392 }, { "epoch": 1.257078610435501, "grad_norm": 0.46769195795059204, "learning_rate": 6.437669550669915e-06, "loss": 0.3464, "step": 27393 }, { "epoch": 1.2571245009407552, "grad_norm": 0.4533455967903137, "learning_rate": 6.437434715207326e-06, "loss": 0.3603, "step": 27394 }, { "epoch": 1.2571703914460097, "grad_norm": 0.44893261790275574, "learning_rate": 6.43719987628816e-06, "loss": 0.3029, "step": 27395 }, { "epoch": 1.2572162819512642, "grad_norm": 0.48795512318611145, "learning_rate": 6.436965033912983e-06, "loss": 0.4208, "step": 27396 }, { "epoch": 1.2572621724565187, "grad_norm": 0.4330460727214813, "learning_rate": 6.436730188082357e-06, "loss": 0.3209, "step": 27397 }, { "epoch": 1.2573080629617732, "grad_norm": 0.4401741027832031, "learning_rate": 6.436495338796851e-06, "loss": 0.3352, "step": 27398 }, { "epoch": 1.2573539534670277, "grad_norm": 0.48083239793777466, "learning_rate": 6.4362604860570245e-06, "loss": 0.4377, "step": 27399 }, { "epoch": 1.2573998439722822, "grad_norm": 0.43463778495788574, "learning_rate": 6.436025629863445e-06, "loss": 0.3104, "step": 27400 }, { "epoch": 1.2574457344775367, "grad_norm": 0.4763415455818176, "learning_rate": 6.4357907702166774e-06, "loss": 0.3303, "step": 27401 }, { "epoch": 1.257491624982791, "grad_norm": 0.456005722284317, "learning_rate": 6.435555907117288e-06, "loss": 0.3485, "step": 27402 }, { "epoch": 1.2575375154880455, "grad_norm": 0.44495245814323425, "learning_rate": 6.435321040565838e-06, "loss": 0.3221, "step": 27403 }, { "epoch": 1.2575834059933, "grad_norm": 0.4608716666698456, "learning_rate": 6.435086170562893e-06, "loss": 0.3572, "step": 27404 }, { "epoch": 1.2576292964985545, "grad_norm": 0.4946003258228302, "learning_rate": 6.43485129710902e-06, "loss": 0.4525, "step": 27405 }, { "epoch": 1.257675187003809, "grad_norm": 0.4673561751842499, "learning_rate": 6.434616420204781e-06, "loss": 0.3377, "step": 27406 }, { "epoch": 1.2577210775090633, "grad_norm": 0.4342801570892334, "learning_rate": 6.4343815398507445e-06, "loss": 0.2992, "step": 27407 }, { "epoch": 1.2577669680143178, "grad_norm": 0.43486708402633667, "learning_rate": 6.4341466560474734e-06, "loss": 0.31, "step": 27408 }, { "epoch": 1.2578128585195723, "grad_norm": 0.4756821095943451, "learning_rate": 6.433911768795529e-06, "loss": 0.3776, "step": 27409 }, { "epoch": 1.2578587490248267, "grad_norm": 0.4886906147003174, "learning_rate": 6.433676878095483e-06, "loss": 0.3615, "step": 27410 }, { "epoch": 1.2579046395300812, "grad_norm": 0.4969492554664612, "learning_rate": 6.433441983947894e-06, "loss": 0.4425, "step": 27411 }, { "epoch": 1.2579505300353357, "grad_norm": 0.4656849503517151, "learning_rate": 6.433207086353331e-06, "loss": 0.3977, "step": 27412 }, { "epoch": 1.2579964205405902, "grad_norm": 0.4875065088272095, "learning_rate": 6.432972185312356e-06, "loss": 0.3805, "step": 27413 }, { "epoch": 1.2580423110458447, "grad_norm": 0.4738095998764038, "learning_rate": 6.432737280825536e-06, "loss": 0.3411, "step": 27414 }, { "epoch": 1.258088201551099, "grad_norm": 0.4672408103942871, "learning_rate": 6.432502372893435e-06, "loss": 0.3446, "step": 27415 }, { "epoch": 1.2581340920563535, "grad_norm": 0.5167033672332764, "learning_rate": 6.432267461516617e-06, "loss": 0.4079, "step": 27416 }, { "epoch": 1.258179982561608, "grad_norm": 0.49711379408836365, "learning_rate": 6.4320325466956475e-06, "loss": 0.3765, "step": 27417 }, { "epoch": 1.2582258730668625, "grad_norm": 0.4630419611930847, "learning_rate": 6.431797628431094e-06, "loss": 0.3144, "step": 27418 }, { "epoch": 1.258271763572117, "grad_norm": 0.5263121128082275, "learning_rate": 6.431562706723517e-06, "loss": 0.4252, "step": 27419 }, { "epoch": 1.2583176540773713, "grad_norm": 0.4480116367340088, "learning_rate": 6.431327781573484e-06, "loss": 0.3088, "step": 27420 }, { "epoch": 1.2583635445826258, "grad_norm": 0.4972078800201416, "learning_rate": 6.4310928529815605e-06, "loss": 0.3808, "step": 27421 }, { "epoch": 1.2584094350878803, "grad_norm": 0.5099966526031494, "learning_rate": 6.430857920948309e-06, "loss": 0.4092, "step": 27422 }, { "epoch": 1.2584553255931348, "grad_norm": 0.4731709659099579, "learning_rate": 6.4306229854742975e-06, "loss": 0.3377, "step": 27423 }, { "epoch": 1.2585012160983893, "grad_norm": 0.441536009311676, "learning_rate": 6.430388046560087e-06, "loss": 0.3295, "step": 27424 }, { "epoch": 1.2585471066036438, "grad_norm": 0.5139393210411072, "learning_rate": 6.430153104206246e-06, "loss": 0.3394, "step": 27425 }, { "epoch": 1.2585929971088983, "grad_norm": 0.4942586421966553, "learning_rate": 6.429918158413338e-06, "loss": 0.4194, "step": 27426 }, { "epoch": 1.2586388876141528, "grad_norm": 0.5035301446914673, "learning_rate": 6.429683209181928e-06, "loss": 0.3469, "step": 27427 }, { "epoch": 1.258684778119407, "grad_norm": 0.4992572069168091, "learning_rate": 6.429448256512581e-06, "loss": 0.3907, "step": 27428 }, { "epoch": 1.2587306686246615, "grad_norm": 0.45103514194488525, "learning_rate": 6.429213300405862e-06, "loss": 0.2883, "step": 27429 }, { "epoch": 1.258776559129916, "grad_norm": 0.5293766856193542, "learning_rate": 6.4289783408623375e-06, "loss": 0.3804, "step": 27430 }, { "epoch": 1.2588224496351705, "grad_norm": 0.4605061411857605, "learning_rate": 6.4287433778825694e-06, "loss": 0.3605, "step": 27431 }, { "epoch": 1.2588683401404248, "grad_norm": 0.527533769607544, "learning_rate": 6.428508411467125e-06, "loss": 0.4505, "step": 27432 }, { "epoch": 1.2589142306456793, "grad_norm": 0.45651015639305115, "learning_rate": 6.4282734416165685e-06, "loss": 0.3497, "step": 27433 }, { "epoch": 1.2589601211509338, "grad_norm": 0.468691885471344, "learning_rate": 6.4280384683314655e-06, "loss": 0.3984, "step": 27434 }, { "epoch": 1.2590060116561883, "grad_norm": 0.4796467125415802, "learning_rate": 6.427803491612381e-06, "loss": 0.4059, "step": 27435 }, { "epoch": 1.2590519021614428, "grad_norm": 0.48359155654907227, "learning_rate": 6.427568511459879e-06, "loss": 0.345, "step": 27436 }, { "epoch": 1.2590977926666973, "grad_norm": 0.5045320987701416, "learning_rate": 6.427333527874524e-06, "loss": 0.3704, "step": 27437 }, { "epoch": 1.2591436831719518, "grad_norm": 0.4661056101322174, "learning_rate": 6.427098540856884e-06, "loss": 0.3578, "step": 27438 }, { "epoch": 1.2591895736772063, "grad_norm": 0.4306385815143585, "learning_rate": 6.426863550407522e-06, "loss": 0.3231, "step": 27439 }, { "epoch": 1.2592354641824606, "grad_norm": 0.46582722663879395, "learning_rate": 6.426628556527005e-06, "loss": 0.3659, "step": 27440 }, { "epoch": 1.259281354687715, "grad_norm": 0.47344449162483215, "learning_rate": 6.426393559215895e-06, "loss": 0.3624, "step": 27441 }, { "epoch": 1.2593272451929696, "grad_norm": 0.4470025300979614, "learning_rate": 6.426158558474759e-06, "loss": 0.3232, "step": 27442 }, { "epoch": 1.259373135698224, "grad_norm": 0.4395503103733063, "learning_rate": 6.425923554304161e-06, "loss": 0.2822, "step": 27443 }, { "epoch": 1.2594190262034786, "grad_norm": 0.4978513717651367, "learning_rate": 6.425688546704668e-06, "loss": 0.4115, "step": 27444 }, { "epoch": 1.2594649167087328, "grad_norm": 0.47735491394996643, "learning_rate": 6.425453535676844e-06, "loss": 0.3859, "step": 27445 }, { "epoch": 1.2595108072139873, "grad_norm": 0.404693603515625, "learning_rate": 6.425218521221255e-06, "loss": 0.245, "step": 27446 }, { "epoch": 1.2595566977192418, "grad_norm": 0.45366817712783813, "learning_rate": 6.424983503338464e-06, "loss": 0.3298, "step": 27447 }, { "epoch": 1.2596025882244963, "grad_norm": 0.49920982122421265, "learning_rate": 6.424748482029036e-06, "loss": 0.4011, "step": 27448 }, { "epoch": 1.2596484787297508, "grad_norm": 0.4661264419555664, "learning_rate": 6.424513457293539e-06, "loss": 0.3911, "step": 27449 }, { "epoch": 1.2596943692350053, "grad_norm": 0.48897841572761536, "learning_rate": 6.424278429132537e-06, "loss": 0.4129, "step": 27450 }, { "epoch": 1.2597402597402598, "grad_norm": 0.46444171667099, "learning_rate": 6.424043397546595e-06, "loss": 0.3218, "step": 27451 }, { "epoch": 1.2597861502455143, "grad_norm": 0.5604879260063171, "learning_rate": 6.423808362536278e-06, "loss": 0.4183, "step": 27452 }, { "epoch": 1.2598320407507686, "grad_norm": 0.46222352981567383, "learning_rate": 6.4235733241021505e-06, "loss": 0.3919, "step": 27453 }, { "epoch": 1.259877931256023, "grad_norm": 0.4743099808692932, "learning_rate": 6.423338282244778e-06, "loss": 0.3528, "step": 27454 }, { "epoch": 1.2599238217612776, "grad_norm": 0.5122509002685547, "learning_rate": 6.423103236964727e-06, "loss": 0.396, "step": 27455 }, { "epoch": 1.259969712266532, "grad_norm": 0.4523339569568634, "learning_rate": 6.4228681882625634e-06, "loss": 0.3475, "step": 27456 }, { "epoch": 1.2600156027717866, "grad_norm": 0.4637714922428131, "learning_rate": 6.422633136138848e-06, "loss": 0.3559, "step": 27457 }, { "epoch": 1.2600614932770409, "grad_norm": 0.4727868139743805, "learning_rate": 6.422398080594151e-06, "loss": 0.3832, "step": 27458 }, { "epoch": 1.2601073837822954, "grad_norm": 0.47968143224716187, "learning_rate": 6.422163021629035e-06, "loss": 0.3862, "step": 27459 }, { "epoch": 1.2601532742875499, "grad_norm": 0.49586865305900574, "learning_rate": 6.4219279592440655e-06, "loss": 0.4087, "step": 27460 }, { "epoch": 1.2601991647928044, "grad_norm": 0.43487364053726196, "learning_rate": 6.4216928934398085e-06, "loss": 0.2956, "step": 27461 }, { "epoch": 1.2602450552980589, "grad_norm": 0.4320218861103058, "learning_rate": 6.421457824216828e-06, "loss": 0.3053, "step": 27462 }, { "epoch": 1.2602909458033134, "grad_norm": 0.44533586502075195, "learning_rate": 6.421222751575691e-06, "loss": 0.3244, "step": 27463 }, { "epoch": 1.2603368363085679, "grad_norm": 0.45658960938453674, "learning_rate": 6.420987675516961e-06, "loss": 0.3459, "step": 27464 }, { "epoch": 1.2603827268138224, "grad_norm": 0.4522017240524292, "learning_rate": 6.420752596041204e-06, "loss": 0.3183, "step": 27465 }, { "epoch": 1.2604286173190766, "grad_norm": 0.54719078540802, "learning_rate": 6.420517513148986e-06, "loss": 0.3792, "step": 27466 }, { "epoch": 1.2604745078243311, "grad_norm": 0.4125523865222931, "learning_rate": 6.420282426840872e-06, "loss": 0.2948, "step": 27467 }, { "epoch": 1.2605203983295856, "grad_norm": 0.46355879306793213, "learning_rate": 6.420047337117426e-06, "loss": 0.3354, "step": 27468 }, { "epoch": 1.2605662888348401, "grad_norm": 0.4678686261177063, "learning_rate": 6.419812243979215e-06, "loss": 0.3833, "step": 27469 }, { "epoch": 1.2606121793400944, "grad_norm": 0.5016003251075745, "learning_rate": 6.419577147426804e-06, "loss": 0.439, "step": 27470 }, { "epoch": 1.260658069845349, "grad_norm": 0.4770630896091461, "learning_rate": 6.419342047460757e-06, "loss": 0.3956, "step": 27471 }, { "epoch": 1.2607039603506034, "grad_norm": 0.42309603095054626, "learning_rate": 6.419106944081641e-06, "loss": 0.2696, "step": 27472 }, { "epoch": 1.260749850855858, "grad_norm": 0.4692918062210083, "learning_rate": 6.418871837290021e-06, "loss": 0.3546, "step": 27473 }, { "epoch": 1.2607957413611124, "grad_norm": 0.45896798372268677, "learning_rate": 6.4186367270864615e-06, "loss": 0.3419, "step": 27474 }, { "epoch": 1.2608416318663669, "grad_norm": 0.45902541279792786, "learning_rate": 6.418401613471529e-06, "loss": 0.3337, "step": 27475 }, { "epoch": 1.2608875223716214, "grad_norm": 0.4546019732952118, "learning_rate": 6.418166496445788e-06, "loss": 0.3661, "step": 27476 }, { "epoch": 1.2609334128768759, "grad_norm": 0.467183917760849, "learning_rate": 6.417931376009804e-06, "loss": 0.3613, "step": 27477 }, { "epoch": 1.2609793033821302, "grad_norm": 0.46778103709220886, "learning_rate": 6.417696252164143e-06, "loss": 0.3508, "step": 27478 }, { "epoch": 1.2610251938873847, "grad_norm": 0.4331210255622864, "learning_rate": 6.417461124909369e-06, "loss": 0.3027, "step": 27479 }, { "epoch": 1.2610710843926392, "grad_norm": 0.43157076835632324, "learning_rate": 6.417225994246049e-06, "loss": 0.2937, "step": 27480 }, { "epoch": 1.2611169748978937, "grad_norm": 0.5246948003768921, "learning_rate": 6.416990860174748e-06, "loss": 0.4274, "step": 27481 }, { "epoch": 1.2611628654031481, "grad_norm": 0.4471305310726166, "learning_rate": 6.416755722696028e-06, "loss": 0.3353, "step": 27482 }, { "epoch": 1.2612087559084024, "grad_norm": 0.4778052270412445, "learning_rate": 6.416520581810463e-06, "loss": 0.3411, "step": 27483 }, { "epoch": 1.261254646413657, "grad_norm": 0.4761069118976593, "learning_rate": 6.41628543751861e-06, "loss": 0.3694, "step": 27484 }, { "epoch": 1.2613005369189114, "grad_norm": 0.4455127418041229, "learning_rate": 6.416050289821037e-06, "loss": 0.3049, "step": 27485 }, { "epoch": 1.261346427424166, "grad_norm": 0.44119274616241455, "learning_rate": 6.415815138718312e-06, "loss": 0.2802, "step": 27486 }, { "epoch": 1.2613923179294204, "grad_norm": 0.4375648498535156, "learning_rate": 6.415579984210997e-06, "loss": 0.3163, "step": 27487 }, { "epoch": 1.261438208434675, "grad_norm": 0.4907718598842621, "learning_rate": 6.41534482629966e-06, "loss": 0.423, "step": 27488 }, { "epoch": 1.2614840989399294, "grad_norm": 0.4556412994861603, "learning_rate": 6.415109664984866e-06, "loss": 0.3281, "step": 27489 }, { "epoch": 1.261529989445184, "grad_norm": 0.45051687955856323, "learning_rate": 6.4148745002671775e-06, "loss": 0.3687, "step": 27490 }, { "epoch": 1.2615758799504382, "grad_norm": 0.4505678117275238, "learning_rate": 6.414639332147162e-06, "loss": 0.3196, "step": 27491 }, { "epoch": 1.2616217704556927, "grad_norm": 0.45384714007377625, "learning_rate": 6.414404160625388e-06, "loss": 0.3312, "step": 27492 }, { "epoch": 1.2616676609609472, "grad_norm": 0.44400930404663086, "learning_rate": 6.414168985702416e-06, "loss": 0.3492, "step": 27493 }, { "epoch": 1.2617135514662017, "grad_norm": 0.4834831953048706, "learning_rate": 6.413933807378816e-06, "loss": 0.3598, "step": 27494 }, { "epoch": 1.2617594419714562, "grad_norm": 0.45370545983314514, "learning_rate": 6.413698625655152e-06, "loss": 0.3321, "step": 27495 }, { "epoch": 1.2618053324767105, "grad_norm": 0.47711634635925293, "learning_rate": 6.413463440531986e-06, "loss": 0.3715, "step": 27496 }, { "epoch": 1.261851222981965, "grad_norm": 0.45871588587760925, "learning_rate": 6.4132282520098885e-06, "loss": 0.3501, "step": 27497 }, { "epoch": 1.2618971134872194, "grad_norm": 0.46482929587364197, "learning_rate": 6.412993060089423e-06, "loss": 0.3847, "step": 27498 }, { "epoch": 1.261943003992474, "grad_norm": 0.46674227714538574, "learning_rate": 6.412757864771155e-06, "loss": 0.3972, "step": 27499 }, { "epoch": 1.2619888944977284, "grad_norm": 0.45041075348854065, "learning_rate": 6.412522666055651e-06, "loss": 0.3488, "step": 27500 }, { "epoch": 1.262034785002983, "grad_norm": 0.45454224944114685, "learning_rate": 6.412287463943474e-06, "loss": 0.3364, "step": 27501 }, { "epoch": 1.2620806755082374, "grad_norm": 0.4382898807525635, "learning_rate": 6.4120522584351905e-06, "loss": 0.3233, "step": 27502 }, { "epoch": 1.262126566013492, "grad_norm": 0.49850529432296753, "learning_rate": 6.4118170495313705e-06, "loss": 0.3666, "step": 27503 }, { "epoch": 1.2621724565187462, "grad_norm": 0.4846073389053345, "learning_rate": 6.411581837232573e-06, "loss": 0.3796, "step": 27504 }, { "epoch": 1.2622183470240007, "grad_norm": 0.49917495250701904, "learning_rate": 6.41134662153937e-06, "loss": 0.415, "step": 27505 }, { "epoch": 1.2622642375292552, "grad_norm": 0.4647367596626282, "learning_rate": 6.4111114024523215e-06, "loss": 0.385, "step": 27506 }, { "epoch": 1.2623101280345097, "grad_norm": 0.4864891469478607, "learning_rate": 6.410876179971994e-06, "loss": 0.3972, "step": 27507 }, { "epoch": 1.2623560185397642, "grad_norm": 0.45369046926498413, "learning_rate": 6.4106409540989565e-06, "loss": 0.3603, "step": 27508 }, { "epoch": 1.2624019090450185, "grad_norm": 0.4304215908050537, "learning_rate": 6.410405724833772e-06, "loss": 0.3234, "step": 27509 }, { "epoch": 1.262447799550273, "grad_norm": 0.4529515504837036, "learning_rate": 6.410170492177009e-06, "loss": 0.3892, "step": 27510 }, { "epoch": 1.2624936900555275, "grad_norm": 0.4793500304222107, "learning_rate": 6.409935256129229e-06, "loss": 0.3516, "step": 27511 }, { "epoch": 1.262539580560782, "grad_norm": 0.45193198323249817, "learning_rate": 6.409700016691001e-06, "loss": 0.3521, "step": 27512 }, { "epoch": 1.2625854710660365, "grad_norm": 0.45834222435951233, "learning_rate": 6.409464773862887e-06, "loss": 0.3346, "step": 27513 }, { "epoch": 1.262631361571291, "grad_norm": 0.5123468041419983, "learning_rate": 6.409229527645457e-06, "loss": 0.424, "step": 27514 }, { "epoch": 1.2626772520765455, "grad_norm": 0.4853701889514923, "learning_rate": 6.408994278039273e-06, "loss": 0.3899, "step": 27515 }, { "epoch": 1.2627231425818, "grad_norm": 0.48521438241004944, "learning_rate": 6.408759025044905e-06, "loss": 0.3705, "step": 27516 }, { "epoch": 1.2627690330870542, "grad_norm": 0.44487500190734863, "learning_rate": 6.4085237686629155e-06, "loss": 0.3103, "step": 27517 }, { "epoch": 1.2628149235923087, "grad_norm": 0.5089526772499084, "learning_rate": 6.408288508893869e-06, "loss": 0.3754, "step": 27518 }, { "epoch": 1.2628608140975632, "grad_norm": 0.48424121737480164, "learning_rate": 6.408053245738335e-06, "loss": 0.3972, "step": 27519 }, { "epoch": 1.2629067046028177, "grad_norm": 0.5489816665649414, "learning_rate": 6.407817979196877e-06, "loss": 0.3357, "step": 27520 }, { "epoch": 1.262952595108072, "grad_norm": 0.4366982579231262, "learning_rate": 6.407582709270061e-06, "loss": 0.32, "step": 27521 }, { "epoch": 1.2629984856133265, "grad_norm": 0.45980849862098694, "learning_rate": 6.407347435958452e-06, "loss": 0.3309, "step": 27522 }, { "epoch": 1.263044376118581, "grad_norm": 0.4334490895271301, "learning_rate": 6.407112159262618e-06, "loss": 0.292, "step": 27523 }, { "epoch": 1.2630902666238355, "grad_norm": 0.4605666697025299, "learning_rate": 6.406876879183122e-06, "loss": 0.351, "step": 27524 }, { "epoch": 1.26313615712909, "grad_norm": 0.4492349624633789, "learning_rate": 6.406641595720531e-06, "loss": 0.3104, "step": 27525 }, { "epoch": 1.2631820476343445, "grad_norm": 0.4943992793560028, "learning_rate": 6.4064063088754126e-06, "loss": 0.3845, "step": 27526 }, { "epoch": 1.263227938139599, "grad_norm": 0.4596658945083618, "learning_rate": 6.4061710186483295e-06, "loss": 0.3239, "step": 27527 }, { "epoch": 1.2632738286448535, "grad_norm": 0.4748929738998413, "learning_rate": 6.40593572503985e-06, "loss": 0.3647, "step": 27528 }, { "epoch": 1.2633197191501078, "grad_norm": 0.4443962275981903, "learning_rate": 6.405700428050537e-06, "loss": 0.3043, "step": 27529 }, { "epoch": 1.2633656096553623, "grad_norm": 0.46790122985839844, "learning_rate": 6.405465127680959e-06, "loss": 0.3737, "step": 27530 }, { "epoch": 1.2634115001606168, "grad_norm": 0.4724853038787842, "learning_rate": 6.4052298239316824e-06, "loss": 0.4104, "step": 27531 }, { "epoch": 1.2634573906658713, "grad_norm": 0.4438176155090332, "learning_rate": 6.40499451680327e-06, "loss": 0.3637, "step": 27532 }, { "epoch": 1.2635032811711258, "grad_norm": 0.4821753203868866, "learning_rate": 6.404759206296289e-06, "loss": 0.3557, "step": 27533 }, { "epoch": 1.26354917167638, "grad_norm": 0.4461735785007477, "learning_rate": 6.4045238924113074e-06, "loss": 0.3095, "step": 27534 }, { "epoch": 1.2635950621816345, "grad_norm": 0.4672586917877197, "learning_rate": 6.404288575148887e-06, "loss": 0.3472, "step": 27535 }, { "epoch": 1.263640952686889, "grad_norm": 0.44491419196128845, "learning_rate": 6.404053254509596e-06, "loss": 0.3221, "step": 27536 }, { "epoch": 1.2636868431921435, "grad_norm": 0.4458046853542328, "learning_rate": 6.403817930494001e-06, "loss": 0.2938, "step": 27537 }, { "epoch": 1.263732733697398, "grad_norm": 0.4586712121963501, "learning_rate": 6.403582603102667e-06, "loss": 0.3624, "step": 27538 }, { "epoch": 1.2637786242026525, "grad_norm": 0.4363486170768738, "learning_rate": 6.403347272336159e-06, "loss": 0.3172, "step": 27539 }, { "epoch": 1.263824514707907, "grad_norm": 0.4918195903301239, "learning_rate": 6.403111938195044e-06, "loss": 0.391, "step": 27540 }, { "epoch": 1.2638704052131615, "grad_norm": 0.43595102429389954, "learning_rate": 6.402876600679888e-06, "loss": 0.3302, "step": 27541 }, { "epoch": 1.2639162957184158, "grad_norm": 0.545981228351593, "learning_rate": 6.402641259791256e-06, "loss": 0.4357, "step": 27542 }, { "epoch": 1.2639621862236703, "grad_norm": 0.4933203458786011, "learning_rate": 6.402405915529716e-06, "loss": 0.4018, "step": 27543 }, { "epoch": 1.2640080767289248, "grad_norm": 0.4720374047756195, "learning_rate": 6.4021705678958305e-06, "loss": 0.3914, "step": 27544 }, { "epoch": 1.2640539672341793, "grad_norm": 0.5160587430000305, "learning_rate": 6.401935216890168e-06, "loss": 0.4113, "step": 27545 }, { "epoch": 1.2640998577394338, "grad_norm": 0.49615901708602905, "learning_rate": 6.401699862513293e-06, "loss": 0.4063, "step": 27546 }, { "epoch": 1.264145748244688, "grad_norm": 0.46220389008522034, "learning_rate": 6.401464504765772e-06, "loss": 0.3632, "step": 27547 }, { "epoch": 1.2641916387499426, "grad_norm": 0.45945844054222107, "learning_rate": 6.4012291436481735e-06, "loss": 0.3449, "step": 27548 }, { "epoch": 1.264237529255197, "grad_norm": 0.4806166887283325, "learning_rate": 6.40099377916106e-06, "loss": 0.4271, "step": 27549 }, { "epoch": 1.2642834197604516, "grad_norm": 0.4782956540584564, "learning_rate": 6.400758411304997e-06, "loss": 0.3545, "step": 27550 }, { "epoch": 1.264329310265706, "grad_norm": 0.45929577946662903, "learning_rate": 6.400523040080554e-06, "loss": 0.3405, "step": 27551 }, { "epoch": 1.2643752007709606, "grad_norm": 0.5132370591163635, "learning_rate": 6.400287665488294e-06, "loss": 0.367, "step": 27552 }, { "epoch": 1.264421091276215, "grad_norm": 0.44626620411872864, "learning_rate": 6.400052287528785e-06, "loss": 0.2948, "step": 27553 }, { "epoch": 1.2644669817814695, "grad_norm": 0.4999266266822815, "learning_rate": 6.399816906202593e-06, "loss": 0.4058, "step": 27554 }, { "epoch": 1.2645128722867238, "grad_norm": 0.4727477729320526, "learning_rate": 6.3995815215102805e-06, "loss": 0.3277, "step": 27555 }, { "epoch": 1.2645587627919783, "grad_norm": 0.5031111240386963, "learning_rate": 6.3993461334524175e-06, "loss": 0.3776, "step": 27556 }, { "epoch": 1.2646046532972328, "grad_norm": 0.41710031032562256, "learning_rate": 6.3991107420295696e-06, "loss": 0.2401, "step": 27557 }, { "epoch": 1.2646505438024873, "grad_norm": 0.4364396035671234, "learning_rate": 6.3988753472423e-06, "loss": 0.3025, "step": 27558 }, { "epoch": 1.2646964343077416, "grad_norm": 0.4538706839084625, "learning_rate": 6.398639949091179e-06, "loss": 0.3334, "step": 27559 }, { "epoch": 1.264742324812996, "grad_norm": 0.47165536880493164, "learning_rate": 6.3984045475767685e-06, "loss": 0.3203, "step": 27560 }, { "epoch": 1.2647882153182506, "grad_norm": 0.4871077537536621, "learning_rate": 6.3981691426996374e-06, "loss": 0.2973, "step": 27561 }, { "epoch": 1.264834105823505, "grad_norm": 0.4624306559562683, "learning_rate": 6.397933734460351e-06, "loss": 0.3361, "step": 27562 }, { "epoch": 1.2648799963287596, "grad_norm": 0.47159844636917114, "learning_rate": 6.3976983228594735e-06, "loss": 0.3408, "step": 27563 }, { "epoch": 1.264925886834014, "grad_norm": 0.5319923162460327, "learning_rate": 6.397462907897575e-06, "loss": 0.4742, "step": 27564 }, { "epoch": 1.2649717773392686, "grad_norm": 0.48090559244155884, "learning_rate": 6.397227489575219e-06, "loss": 0.3832, "step": 27565 }, { "epoch": 1.265017667844523, "grad_norm": 0.5140282511711121, "learning_rate": 6.39699206789297e-06, "loss": 0.4137, "step": 27566 }, { "epoch": 1.2650635583497774, "grad_norm": 0.4645368754863739, "learning_rate": 6.396756642851396e-06, "loss": 0.3617, "step": 27567 }, { "epoch": 1.2651094488550318, "grad_norm": 0.4952695369720459, "learning_rate": 6.396521214451064e-06, "loss": 0.3922, "step": 27568 }, { "epoch": 1.2651553393602863, "grad_norm": 0.4704573154449463, "learning_rate": 6.39628578269254e-06, "loss": 0.3553, "step": 27569 }, { "epoch": 1.2652012298655408, "grad_norm": 0.42104077339172363, "learning_rate": 6.396050347576388e-06, "loss": 0.267, "step": 27570 }, { "epoch": 1.2652471203707953, "grad_norm": 0.4680958688259125, "learning_rate": 6.3958149091031775e-06, "loss": 0.3795, "step": 27571 }, { "epoch": 1.2652930108760496, "grad_norm": 0.47072237730026245, "learning_rate": 6.395579467273469e-06, "loss": 0.3296, "step": 27572 }, { "epoch": 1.2653389013813041, "grad_norm": 0.4713965654373169, "learning_rate": 6.395344022087835e-06, "loss": 0.3496, "step": 27573 }, { "epoch": 1.2653847918865586, "grad_norm": 0.45085129141807556, "learning_rate": 6.39510857354684e-06, "loss": 0.3299, "step": 27574 }, { "epoch": 1.265430682391813, "grad_norm": 0.5161347985267639, "learning_rate": 6.394873121651047e-06, "loss": 0.3893, "step": 27575 }, { "epoch": 1.2654765728970676, "grad_norm": 0.41738277673721313, "learning_rate": 6.394637666401025e-06, "loss": 0.2939, "step": 27576 }, { "epoch": 1.265522463402322, "grad_norm": 0.44335660338401794, "learning_rate": 6.394402207797341e-06, "loss": 0.2906, "step": 27577 }, { "epoch": 1.2655683539075766, "grad_norm": 0.5002272725105286, "learning_rate": 6.394166745840558e-06, "loss": 0.3658, "step": 27578 }, { "epoch": 1.265614244412831, "grad_norm": 0.47426778078079224, "learning_rate": 6.3939312805312445e-06, "loss": 0.3464, "step": 27579 }, { "epoch": 1.2656601349180854, "grad_norm": 0.47478267550468445, "learning_rate": 6.393695811869965e-06, "loss": 0.3656, "step": 27580 }, { "epoch": 1.2657060254233399, "grad_norm": 0.5770941376686096, "learning_rate": 6.393460339857289e-06, "loss": 0.5207, "step": 27581 }, { "epoch": 1.2657519159285944, "grad_norm": 0.4493562877178192, "learning_rate": 6.393224864493781e-06, "loss": 0.3333, "step": 27582 }, { "epoch": 1.2657978064338489, "grad_norm": 0.43955740332603455, "learning_rate": 6.392989385780005e-06, "loss": 0.3025, "step": 27583 }, { "epoch": 1.2658436969391034, "grad_norm": 0.4719174802303314, "learning_rate": 6.392753903716529e-06, "loss": 0.3893, "step": 27584 }, { "epoch": 1.2658895874443576, "grad_norm": 0.4536304771900177, "learning_rate": 6.392518418303921e-06, "loss": 0.3378, "step": 27585 }, { "epoch": 1.2659354779496121, "grad_norm": 0.46752119064331055, "learning_rate": 6.392282929542746e-06, "loss": 0.3754, "step": 27586 }, { "epoch": 1.2659813684548666, "grad_norm": 0.45089420676231384, "learning_rate": 6.392047437433568e-06, "loss": 0.2966, "step": 27587 }, { "epoch": 1.2660272589601211, "grad_norm": 0.48386380076408386, "learning_rate": 6.391811941976957e-06, "loss": 0.3946, "step": 27588 }, { "epoch": 1.2660731494653756, "grad_norm": 0.4909362494945526, "learning_rate": 6.391576443173476e-06, "loss": 0.3895, "step": 27589 }, { "epoch": 1.2661190399706301, "grad_norm": 0.45384734869003296, "learning_rate": 6.3913409410236935e-06, "loss": 0.3374, "step": 27590 }, { "epoch": 1.2661649304758846, "grad_norm": 0.4328356683254242, "learning_rate": 6.391105435528175e-06, "loss": 0.3194, "step": 27591 }, { "epoch": 1.2662108209811391, "grad_norm": 0.5189058780670166, "learning_rate": 6.390869926687489e-06, "loss": 0.4336, "step": 27592 }, { "epoch": 1.2662567114863934, "grad_norm": 0.4861741364002228, "learning_rate": 6.390634414502198e-06, "loss": 0.4137, "step": 27593 }, { "epoch": 1.266302601991648, "grad_norm": 0.4722899794578552, "learning_rate": 6.39039889897287e-06, "loss": 0.3337, "step": 27594 }, { "epoch": 1.2663484924969024, "grad_norm": 0.424702525138855, "learning_rate": 6.390163380100071e-06, "loss": 0.2705, "step": 27595 }, { "epoch": 1.266394383002157, "grad_norm": 0.4307391345500946, "learning_rate": 6.3899278578843695e-06, "loss": 0.293, "step": 27596 }, { "epoch": 1.2664402735074114, "grad_norm": 0.45151200890541077, "learning_rate": 6.389692332326329e-06, "loss": 0.3171, "step": 27597 }, { "epoch": 1.2664861640126657, "grad_norm": 0.504805326461792, "learning_rate": 6.389456803426517e-06, "loss": 0.4074, "step": 27598 }, { "epoch": 1.2665320545179202, "grad_norm": 0.5217872262001038, "learning_rate": 6.3892212711855006e-06, "loss": 0.4443, "step": 27599 }, { "epoch": 1.2665779450231747, "grad_norm": 0.42626968026161194, "learning_rate": 6.3889857356038445e-06, "loss": 0.3009, "step": 27600 }, { "epoch": 1.2666238355284292, "grad_norm": 0.47009164094924927, "learning_rate": 6.388750196682116e-06, "loss": 0.3473, "step": 27601 }, { "epoch": 1.2666697260336837, "grad_norm": 0.46404513716697693, "learning_rate": 6.388514654420883e-06, "loss": 0.3647, "step": 27602 }, { "epoch": 1.2667156165389382, "grad_norm": 0.4919244050979614, "learning_rate": 6.38827910882071e-06, "loss": 0.3633, "step": 27603 }, { "epoch": 1.2667615070441927, "grad_norm": 0.474376916885376, "learning_rate": 6.3880435598821634e-06, "loss": 0.3567, "step": 27604 }, { "epoch": 1.2668073975494472, "grad_norm": 0.488635778427124, "learning_rate": 6.38780800760581e-06, "loss": 0.4396, "step": 27605 }, { "epoch": 1.2668532880547014, "grad_norm": 0.5359803438186646, "learning_rate": 6.387572451992217e-06, "loss": 0.4788, "step": 27606 }, { "epoch": 1.266899178559956, "grad_norm": 0.5098769068717957, "learning_rate": 6.387336893041949e-06, "loss": 0.4241, "step": 27607 }, { "epoch": 1.2669450690652104, "grad_norm": 0.45125171542167664, "learning_rate": 6.387101330755576e-06, "loss": 0.3201, "step": 27608 }, { "epoch": 1.266990959570465, "grad_norm": 0.5050342679023743, "learning_rate": 6.386865765133659e-06, "loss": 0.3959, "step": 27609 }, { "epoch": 1.2670368500757192, "grad_norm": 0.47305357456207275, "learning_rate": 6.3866301961767705e-06, "loss": 0.4083, "step": 27610 }, { "epoch": 1.2670827405809737, "grad_norm": 0.438044011592865, "learning_rate": 6.386394623885472e-06, "loss": 0.3202, "step": 27611 }, { "epoch": 1.2671286310862282, "grad_norm": 0.46742984652519226, "learning_rate": 6.386159048260332e-06, "loss": 0.3689, "step": 27612 }, { "epoch": 1.2671745215914827, "grad_norm": 0.4527471363544464, "learning_rate": 6.385923469301918e-06, "loss": 0.3171, "step": 27613 }, { "epoch": 1.2672204120967372, "grad_norm": 0.4945203959941864, "learning_rate": 6.385687887010797e-06, "loss": 0.3693, "step": 27614 }, { "epoch": 1.2672663026019917, "grad_norm": 0.5503880381584167, "learning_rate": 6.38545230138753e-06, "loss": 0.396, "step": 27615 }, { "epoch": 1.2673121931072462, "grad_norm": 0.46545058488845825, "learning_rate": 6.385216712432691e-06, "loss": 0.3269, "step": 27616 }, { "epoch": 1.2673580836125007, "grad_norm": 0.4329378008842468, "learning_rate": 6.384981120146841e-06, "loss": 0.2852, "step": 27617 }, { "epoch": 1.267403974117755, "grad_norm": 0.4398607909679413, "learning_rate": 6.384745524530547e-06, "loss": 0.3364, "step": 27618 }, { "epoch": 1.2674498646230095, "grad_norm": 0.4571370780467987, "learning_rate": 6.3845099255843815e-06, "loss": 0.3314, "step": 27619 }, { "epoch": 1.267495755128264, "grad_norm": 0.4782978594303131, "learning_rate": 6.384274323308903e-06, "loss": 0.3351, "step": 27620 }, { "epoch": 1.2675416456335185, "grad_norm": 0.435498982667923, "learning_rate": 6.384038717704682e-06, "loss": 0.3132, "step": 27621 }, { "epoch": 1.267587536138773, "grad_norm": 0.460575133562088, "learning_rate": 6.383803108772286e-06, "loss": 0.3921, "step": 27622 }, { "epoch": 1.2676334266440272, "grad_norm": 0.47944214940071106, "learning_rate": 6.38356749651228e-06, "loss": 0.3586, "step": 27623 }, { "epoch": 1.2676793171492817, "grad_norm": 0.45531216263771057, "learning_rate": 6.383331880925231e-06, "loss": 0.3837, "step": 27624 }, { "epoch": 1.2677252076545362, "grad_norm": 0.4527263939380646, "learning_rate": 6.383096262011705e-06, "loss": 0.3522, "step": 27625 }, { "epoch": 1.2677710981597907, "grad_norm": 0.5104132890701294, "learning_rate": 6.382860639772268e-06, "loss": 0.3813, "step": 27626 }, { "epoch": 1.2678169886650452, "grad_norm": 0.46782177686691284, "learning_rate": 6.38262501420749e-06, "loss": 0.3899, "step": 27627 }, { "epoch": 1.2678628791702997, "grad_norm": 0.42873355746269226, "learning_rate": 6.382389385317933e-06, "loss": 0.2763, "step": 27628 }, { "epoch": 1.2679087696755542, "grad_norm": 0.45529672503471375, "learning_rate": 6.3821537531041655e-06, "loss": 0.3372, "step": 27629 }, { "epoch": 1.2679546601808087, "grad_norm": 0.528867781162262, "learning_rate": 6.381918117566758e-06, "loss": 0.4643, "step": 27630 }, { "epoch": 1.268000550686063, "grad_norm": 0.46760040521621704, "learning_rate": 6.38168247870627e-06, "loss": 0.3534, "step": 27631 }, { "epoch": 1.2680464411913175, "grad_norm": 0.4564807116985321, "learning_rate": 6.381446836523273e-06, "loss": 0.3144, "step": 27632 }, { "epoch": 1.268092331696572, "grad_norm": 0.529423177242279, "learning_rate": 6.381211191018333e-06, "loss": 0.4665, "step": 27633 }, { "epoch": 1.2681382222018265, "grad_norm": 0.49141743779182434, "learning_rate": 6.380975542192014e-06, "loss": 0.3926, "step": 27634 }, { "epoch": 1.268184112707081, "grad_norm": 0.46112850308418274, "learning_rate": 6.380739890044887e-06, "loss": 0.3651, "step": 27635 }, { "epoch": 1.2682300032123353, "grad_norm": 0.49295279383659363, "learning_rate": 6.380504234577515e-06, "loss": 0.4101, "step": 27636 }, { "epoch": 1.2682758937175898, "grad_norm": 0.45224055647850037, "learning_rate": 6.380268575790466e-06, "loss": 0.318, "step": 27637 }, { "epoch": 1.2683217842228443, "grad_norm": 0.44189614057540894, "learning_rate": 6.380032913684306e-06, "loss": 0.3637, "step": 27638 }, { "epoch": 1.2683676747280987, "grad_norm": 0.46932274103164673, "learning_rate": 6.379797248259603e-06, "loss": 0.3646, "step": 27639 }, { "epoch": 1.2684135652333532, "grad_norm": 0.44524338841438293, "learning_rate": 6.379561579516924e-06, "loss": 0.2983, "step": 27640 }, { "epoch": 1.2684594557386077, "grad_norm": 0.4686581492424011, "learning_rate": 6.379325907456835e-06, "loss": 0.3442, "step": 27641 }, { "epoch": 1.2685053462438622, "grad_norm": 0.4844357669353485, "learning_rate": 6.379090232079902e-06, "loss": 0.3847, "step": 27642 }, { "epoch": 1.2685512367491167, "grad_norm": 0.4317905306816101, "learning_rate": 6.378854553386691e-06, "loss": 0.2788, "step": 27643 }, { "epoch": 1.268597127254371, "grad_norm": 0.464546799659729, "learning_rate": 6.378618871377772e-06, "loss": 0.3323, "step": 27644 }, { "epoch": 1.2686430177596255, "grad_norm": 0.4505314826965332, "learning_rate": 6.378383186053708e-06, "loss": 0.3371, "step": 27645 }, { "epoch": 1.26868890826488, "grad_norm": 0.45072025060653687, "learning_rate": 6.378147497415068e-06, "loss": 0.3383, "step": 27646 }, { "epoch": 1.2687347987701345, "grad_norm": 0.458962619304657, "learning_rate": 6.37791180546242e-06, "loss": 0.3508, "step": 27647 }, { "epoch": 1.2687806892753888, "grad_norm": 0.4548460841178894, "learning_rate": 6.377676110196327e-06, "loss": 0.3543, "step": 27648 }, { "epoch": 1.2688265797806433, "grad_norm": 0.45942509174346924, "learning_rate": 6.377440411617357e-06, "loss": 0.3352, "step": 27649 }, { "epoch": 1.2688724702858978, "grad_norm": 0.5117129683494568, "learning_rate": 6.377204709726079e-06, "loss": 0.4137, "step": 27650 }, { "epoch": 1.2689183607911523, "grad_norm": 0.46068593859672546, "learning_rate": 6.376969004523058e-06, "loss": 0.3288, "step": 27651 }, { "epoch": 1.2689642512964068, "grad_norm": 0.487785667181015, "learning_rate": 6.376733296008862e-06, "loss": 0.3864, "step": 27652 }, { "epoch": 1.2690101418016613, "grad_norm": 0.4759564697742462, "learning_rate": 6.376497584184056e-06, "loss": 0.3675, "step": 27653 }, { "epoch": 1.2690560323069158, "grad_norm": 0.47933080792427063, "learning_rate": 6.3762618690492074e-06, "loss": 0.4023, "step": 27654 }, { "epoch": 1.2691019228121703, "grad_norm": 0.4949531853199005, "learning_rate": 6.376026150604884e-06, "loss": 0.4638, "step": 27655 }, { "epoch": 1.2691478133174245, "grad_norm": 0.49106329679489136, "learning_rate": 6.375790428851651e-06, "loss": 0.3574, "step": 27656 }, { "epoch": 1.269193703822679, "grad_norm": 0.49616047739982605, "learning_rate": 6.375554703790078e-06, "loss": 0.3837, "step": 27657 }, { "epoch": 1.2692395943279335, "grad_norm": 0.44554466009140015, "learning_rate": 6.375318975420728e-06, "loss": 0.3303, "step": 27658 }, { "epoch": 1.269285484833188, "grad_norm": 0.4941883683204651, "learning_rate": 6.375083243744172e-06, "loss": 0.3913, "step": 27659 }, { "epoch": 1.2693313753384425, "grad_norm": 0.45048707723617554, "learning_rate": 6.374847508760973e-06, "loss": 0.3259, "step": 27660 }, { "epoch": 1.2693772658436968, "grad_norm": 0.49106863141059875, "learning_rate": 6.374611770471702e-06, "loss": 0.3863, "step": 27661 }, { "epoch": 1.2694231563489513, "grad_norm": 0.4536474347114563, "learning_rate": 6.374376028876921e-06, "loss": 0.3374, "step": 27662 }, { "epoch": 1.2694690468542058, "grad_norm": 0.4945515990257263, "learning_rate": 6.3741402839772e-06, "loss": 0.3618, "step": 27663 }, { "epoch": 1.2695149373594603, "grad_norm": 0.47918277978897095, "learning_rate": 6.3739045357731066e-06, "loss": 0.3556, "step": 27664 }, { "epoch": 1.2695608278647148, "grad_norm": 0.4663005769252777, "learning_rate": 6.373668784265206e-06, "loss": 0.3711, "step": 27665 }, { "epoch": 1.2696067183699693, "grad_norm": 0.5003321170806885, "learning_rate": 6.373433029454064e-06, "loss": 0.3801, "step": 27666 }, { "epoch": 1.2696526088752238, "grad_norm": 0.482926607131958, "learning_rate": 6.37319727134025e-06, "loss": 0.4158, "step": 27667 }, { "epoch": 1.2696984993804783, "grad_norm": 0.5018066763877869, "learning_rate": 6.372961509924331e-06, "loss": 0.4695, "step": 27668 }, { "epoch": 1.2697443898857326, "grad_norm": 0.43777206540107727, "learning_rate": 6.372725745206871e-06, "loss": 0.2976, "step": 27669 }, { "epoch": 1.269790280390987, "grad_norm": 0.42043137550354004, "learning_rate": 6.372489977188441e-06, "loss": 0.2756, "step": 27670 }, { "epoch": 1.2698361708962416, "grad_norm": 0.45214179158210754, "learning_rate": 6.372254205869604e-06, "loss": 0.3646, "step": 27671 }, { "epoch": 1.269882061401496, "grad_norm": 0.6788287162780762, "learning_rate": 6.372018431250928e-06, "loss": 0.3146, "step": 27672 }, { "epoch": 1.2699279519067506, "grad_norm": 0.4498259723186493, "learning_rate": 6.371782653332983e-06, "loss": 0.3244, "step": 27673 }, { "epoch": 1.2699738424120048, "grad_norm": 0.5554794669151306, "learning_rate": 6.371546872116332e-06, "loss": 0.3083, "step": 27674 }, { "epoch": 1.2700197329172593, "grad_norm": 0.5111182928085327, "learning_rate": 6.371311087601546e-06, "loss": 0.4078, "step": 27675 }, { "epoch": 1.2700656234225138, "grad_norm": 0.4748038947582245, "learning_rate": 6.371075299789187e-06, "loss": 0.3292, "step": 27676 }, { "epoch": 1.2701115139277683, "grad_norm": 0.44243666529655457, "learning_rate": 6.370839508679825e-06, "loss": 0.2967, "step": 27677 }, { "epoch": 1.2701574044330228, "grad_norm": 0.4877657890319824, "learning_rate": 6.370603714274027e-06, "loss": 0.3723, "step": 27678 }, { "epoch": 1.2702032949382773, "grad_norm": 0.5037795305252075, "learning_rate": 6.370367916572361e-06, "loss": 0.3559, "step": 27679 }, { "epoch": 1.2702491854435318, "grad_norm": 0.4582667648792267, "learning_rate": 6.37013211557539e-06, "loss": 0.3292, "step": 27680 }, { "epoch": 1.2702950759487863, "grad_norm": 0.4378514289855957, "learning_rate": 6.369896311283686e-06, "loss": 0.2793, "step": 27681 }, { "epoch": 1.2703409664540406, "grad_norm": 0.45867490768432617, "learning_rate": 6.369660503697812e-06, "loss": 0.317, "step": 27682 }, { "epoch": 1.270386856959295, "grad_norm": 0.5073931217193604, "learning_rate": 6.369424692818338e-06, "loss": 0.4601, "step": 27683 }, { "epoch": 1.2704327474645496, "grad_norm": 0.4697217345237732, "learning_rate": 6.36918887864583e-06, "loss": 0.3317, "step": 27684 }, { "epoch": 1.270478637969804, "grad_norm": 0.4507666528224945, "learning_rate": 6.3689530611808545e-06, "loss": 0.3363, "step": 27685 }, { "epoch": 1.2705245284750586, "grad_norm": 0.5221207141876221, "learning_rate": 6.3687172404239785e-06, "loss": 0.443, "step": 27686 }, { "epoch": 1.2705704189803129, "grad_norm": 0.5138861536979675, "learning_rate": 6.3684814163757705e-06, "loss": 0.4166, "step": 27687 }, { "epoch": 1.2706163094855674, "grad_norm": 0.4725380837917328, "learning_rate": 6.368245589036795e-06, "loss": 0.3798, "step": 27688 }, { "epoch": 1.2706621999908219, "grad_norm": 0.4867097735404968, "learning_rate": 6.368009758407622e-06, "loss": 0.3819, "step": 27689 }, { "epoch": 1.2707080904960764, "grad_norm": 0.4538317322731018, "learning_rate": 6.367773924488817e-06, "loss": 0.348, "step": 27690 }, { "epoch": 1.2707539810013309, "grad_norm": 0.48461124300956726, "learning_rate": 6.367538087280947e-06, "loss": 0.396, "step": 27691 }, { "epoch": 1.2707998715065854, "grad_norm": 0.4916684925556183, "learning_rate": 6.36730224678458e-06, "loss": 0.3963, "step": 27692 }, { "epoch": 1.2708457620118399, "grad_norm": 0.4852026402950287, "learning_rate": 6.367066403000282e-06, "loss": 0.3604, "step": 27693 }, { "epoch": 1.2708916525170944, "grad_norm": 0.4776019752025604, "learning_rate": 6.366830555928621e-06, "loss": 0.3816, "step": 27694 }, { "epoch": 1.2709375430223486, "grad_norm": 0.4557274281978607, "learning_rate": 6.366594705570164e-06, "loss": 0.3461, "step": 27695 }, { "epoch": 1.2709834335276031, "grad_norm": 0.5726621150970459, "learning_rate": 6.366358851925478e-06, "loss": 0.3206, "step": 27696 }, { "epoch": 1.2710293240328576, "grad_norm": 0.4164447486400604, "learning_rate": 6.366122994995131e-06, "loss": 0.2849, "step": 27697 }, { "epoch": 1.2710752145381121, "grad_norm": 0.4856354296207428, "learning_rate": 6.365887134779689e-06, "loss": 0.3535, "step": 27698 }, { "epoch": 1.2711211050433664, "grad_norm": 0.4723111093044281, "learning_rate": 6.365651271279718e-06, "loss": 0.3109, "step": 27699 }, { "epoch": 1.271166995548621, "grad_norm": 0.468791663646698, "learning_rate": 6.365415404495788e-06, "loss": 0.3558, "step": 27700 }, { "epoch": 1.2712128860538754, "grad_norm": 0.501873791217804, "learning_rate": 6.365179534428466e-06, "loss": 0.4088, "step": 27701 }, { "epoch": 1.2712587765591299, "grad_norm": 0.45098021626472473, "learning_rate": 6.364943661078316e-06, "loss": 0.3059, "step": 27702 }, { "epoch": 1.2713046670643844, "grad_norm": 0.4567374289035797, "learning_rate": 6.364707784445908e-06, "loss": 0.2963, "step": 27703 }, { "epoch": 1.2713505575696389, "grad_norm": 0.46257248520851135, "learning_rate": 6.364471904531809e-06, "loss": 0.374, "step": 27704 }, { "epoch": 1.2713964480748934, "grad_norm": 0.4433690011501312, "learning_rate": 6.364236021336585e-06, "loss": 0.3029, "step": 27705 }, { "epoch": 1.2714423385801479, "grad_norm": 0.4488614499568939, "learning_rate": 6.364000134860803e-06, "loss": 0.2932, "step": 27706 }, { "epoch": 1.2714882290854022, "grad_norm": 0.4548358619213104, "learning_rate": 6.363764245105034e-06, "loss": 0.321, "step": 27707 }, { "epoch": 1.2715341195906567, "grad_norm": 0.46632638573646545, "learning_rate": 6.363528352069841e-06, "loss": 0.3196, "step": 27708 }, { "epoch": 1.2715800100959112, "grad_norm": 0.47722724080085754, "learning_rate": 6.363292455755793e-06, "loss": 0.3911, "step": 27709 }, { "epoch": 1.2716259006011656, "grad_norm": 0.4412669241428375, "learning_rate": 6.363056556163456e-06, "loss": 0.3152, "step": 27710 }, { "epoch": 1.2716717911064201, "grad_norm": 0.5177837014198303, "learning_rate": 6.362820653293399e-06, "loss": 0.4264, "step": 27711 }, { "epoch": 1.2717176816116744, "grad_norm": 0.46601539850234985, "learning_rate": 6.362584747146187e-06, "loss": 0.3251, "step": 27712 }, { "epoch": 1.271763572116929, "grad_norm": 0.4942941963672638, "learning_rate": 6.3623488377223904e-06, "loss": 0.403, "step": 27713 }, { "epoch": 1.2718094626221834, "grad_norm": 0.4795243442058563, "learning_rate": 6.362112925022573e-06, "loss": 0.3908, "step": 27714 }, { "epoch": 1.271855353127438, "grad_norm": 0.45312339067459106, "learning_rate": 6.361877009047306e-06, "loss": 0.3743, "step": 27715 }, { "epoch": 1.2719012436326924, "grad_norm": 0.5065512657165527, "learning_rate": 6.361641089797153e-06, "loss": 0.3953, "step": 27716 }, { "epoch": 1.271947134137947, "grad_norm": 0.4847389757633209, "learning_rate": 6.361405167272685e-06, "loss": 0.3872, "step": 27717 }, { "epoch": 1.2719930246432014, "grad_norm": 0.4697864055633545, "learning_rate": 6.361169241474465e-06, "loss": 0.3662, "step": 27718 }, { "epoch": 1.272038915148456, "grad_norm": 0.48790040612220764, "learning_rate": 6.360933312403063e-06, "loss": 0.3693, "step": 27719 }, { "epoch": 1.2720848056537102, "grad_norm": 0.459744393825531, "learning_rate": 6.360697380059047e-06, "loss": 0.3517, "step": 27720 }, { "epoch": 1.2721306961589647, "grad_norm": 0.4446001648902893, "learning_rate": 6.360461444442983e-06, "loss": 0.3219, "step": 27721 }, { "epoch": 1.2721765866642192, "grad_norm": 0.44017964601516724, "learning_rate": 6.360225505555439e-06, "loss": 0.3367, "step": 27722 }, { "epoch": 1.2722224771694737, "grad_norm": 0.5778737664222717, "learning_rate": 6.35998956339698e-06, "loss": 0.428, "step": 27723 }, { "epoch": 1.2722683676747282, "grad_norm": 0.4325180947780609, "learning_rate": 6.359753617968176e-06, "loss": 0.2938, "step": 27724 }, { "epoch": 1.2723142581799824, "grad_norm": 0.48611775040626526, "learning_rate": 6.359517669269595e-06, "loss": 0.405, "step": 27725 }, { "epoch": 1.272360148685237, "grad_norm": 0.497148722410202, "learning_rate": 6.359281717301802e-06, "loss": 0.3991, "step": 27726 }, { "epoch": 1.2724060391904914, "grad_norm": 0.4622134268283844, "learning_rate": 6.359045762065366e-06, "loss": 0.3067, "step": 27727 }, { "epoch": 1.272451929695746, "grad_norm": 0.4684372544288635, "learning_rate": 6.358809803560854e-06, "loss": 0.3542, "step": 27728 }, { "epoch": 1.2724978202010004, "grad_norm": 0.45311111211776733, "learning_rate": 6.358573841788833e-06, "loss": 0.3228, "step": 27729 }, { "epoch": 1.272543710706255, "grad_norm": 0.43367260694503784, "learning_rate": 6.358337876749869e-06, "loss": 0.3027, "step": 27730 }, { "epoch": 1.2725896012115094, "grad_norm": 0.491629034280777, "learning_rate": 6.358101908444533e-06, "loss": 0.4069, "step": 27731 }, { "epoch": 1.272635491716764, "grad_norm": 0.4487422704696655, "learning_rate": 6.357865936873391e-06, "loss": 0.3202, "step": 27732 }, { "epoch": 1.2726813822220182, "grad_norm": 0.44172459840774536, "learning_rate": 6.357629962037009e-06, "loss": 0.3243, "step": 27733 }, { "epoch": 1.2727272727272727, "grad_norm": 0.49263107776641846, "learning_rate": 6.3573939839359555e-06, "loss": 0.4038, "step": 27734 }, { "epoch": 1.2727731632325272, "grad_norm": 0.5221304297447205, "learning_rate": 6.357158002570798e-06, "loss": 0.4229, "step": 27735 }, { "epoch": 1.2728190537377817, "grad_norm": 0.46377697587013245, "learning_rate": 6.3569220179421046e-06, "loss": 0.3518, "step": 27736 }, { "epoch": 1.272864944243036, "grad_norm": 0.4802818298339844, "learning_rate": 6.35668603005044e-06, "loss": 0.4259, "step": 27737 }, { "epoch": 1.2729108347482905, "grad_norm": 0.45367375016212463, "learning_rate": 6.3564500388963754e-06, "loss": 0.3282, "step": 27738 }, { "epoch": 1.272956725253545, "grad_norm": 0.5213709473609924, "learning_rate": 6.356214044480477e-06, "loss": 0.3703, "step": 27739 }, { "epoch": 1.2730026157587995, "grad_norm": 0.4445798695087433, "learning_rate": 6.355978046803309e-06, "loss": 0.3346, "step": 27740 }, { "epoch": 1.273048506264054, "grad_norm": 0.4821888506412506, "learning_rate": 6.355742045865445e-06, "loss": 0.4088, "step": 27741 }, { "epoch": 1.2730943967693085, "grad_norm": 0.4570859372615814, "learning_rate": 6.355506041667447e-06, "loss": 0.3456, "step": 27742 }, { "epoch": 1.273140287274563, "grad_norm": 0.46337783336639404, "learning_rate": 6.355270034209886e-06, "loss": 0.3239, "step": 27743 }, { "epoch": 1.2731861777798175, "grad_norm": 0.47391360998153687, "learning_rate": 6.355034023493329e-06, "loss": 0.3342, "step": 27744 }, { "epoch": 1.2732320682850717, "grad_norm": 0.49589842557907104, "learning_rate": 6.354798009518341e-06, "loss": 0.3654, "step": 27745 }, { "epoch": 1.2732779587903262, "grad_norm": 0.5324549078941345, "learning_rate": 6.354561992285493e-06, "loss": 0.462, "step": 27746 }, { "epoch": 1.2733238492955807, "grad_norm": 0.448321670293808, "learning_rate": 6.35432597179535e-06, "loss": 0.3379, "step": 27747 }, { "epoch": 1.2733697398008352, "grad_norm": 0.45970746874809265, "learning_rate": 6.354089948048479e-06, "loss": 0.3228, "step": 27748 }, { "epoch": 1.2734156303060897, "grad_norm": 0.4916815757751465, "learning_rate": 6.353853921045452e-06, "loss": 0.4162, "step": 27749 }, { "epoch": 1.273461520811344, "grad_norm": 0.46182844042778015, "learning_rate": 6.3536178907868325e-06, "loss": 0.3783, "step": 27750 }, { "epoch": 1.2735074113165985, "grad_norm": 0.48527708649635315, "learning_rate": 6.353381857273189e-06, "loss": 0.4172, "step": 27751 }, { "epoch": 1.273553301821853, "grad_norm": 0.4575256109237671, "learning_rate": 6.353145820505089e-06, "loss": 0.3263, "step": 27752 }, { "epoch": 1.2735991923271075, "grad_norm": 0.4684695899486542, "learning_rate": 6.352909780483101e-06, "loss": 0.3489, "step": 27753 }, { "epoch": 1.273645082832362, "grad_norm": 0.4623975157737732, "learning_rate": 6.352673737207792e-06, "loss": 0.3885, "step": 27754 }, { "epoch": 1.2736909733376165, "grad_norm": 0.44689875841140747, "learning_rate": 6.352437690679729e-06, "loss": 0.3394, "step": 27755 }, { "epoch": 1.273736863842871, "grad_norm": 0.45736265182495117, "learning_rate": 6.35220164089948e-06, "loss": 0.3562, "step": 27756 }, { "epoch": 1.2737827543481255, "grad_norm": 0.4494754672050476, "learning_rate": 6.3519655878676134e-06, "loss": 0.3323, "step": 27757 }, { "epoch": 1.2738286448533798, "grad_norm": 0.45911118388175964, "learning_rate": 6.351729531584696e-06, "loss": 0.3249, "step": 27758 }, { "epoch": 1.2738745353586343, "grad_norm": 0.46890345215797424, "learning_rate": 6.351493472051296e-06, "loss": 0.3387, "step": 27759 }, { "epoch": 1.2739204258638888, "grad_norm": 0.4978278577327728, "learning_rate": 6.3512574092679805e-06, "loss": 0.3754, "step": 27760 }, { "epoch": 1.2739663163691433, "grad_norm": 0.4443426728248596, "learning_rate": 6.3510213432353175e-06, "loss": 0.3067, "step": 27761 }, { "epoch": 1.2740122068743978, "grad_norm": 0.4407588541507721, "learning_rate": 6.350785273953875e-06, "loss": 0.3352, "step": 27762 }, { "epoch": 1.274058097379652, "grad_norm": 0.4598753750324249, "learning_rate": 6.350549201424218e-06, "loss": 0.324, "step": 27763 }, { "epoch": 1.2741039878849065, "grad_norm": 0.45335572957992554, "learning_rate": 6.350313125646919e-06, "loss": 0.3249, "step": 27764 }, { "epoch": 1.274149878390161, "grad_norm": 0.5272057056427002, "learning_rate": 6.3500770466225425e-06, "loss": 0.4214, "step": 27765 }, { "epoch": 1.2741957688954155, "grad_norm": 0.49180492758750916, "learning_rate": 6.3498409643516564e-06, "loss": 0.3947, "step": 27766 }, { "epoch": 1.27424165940067, "grad_norm": 0.4625456631183624, "learning_rate": 6.349604878834828e-06, "loss": 0.3705, "step": 27767 }, { "epoch": 1.2742875499059245, "grad_norm": 0.5673028230667114, "learning_rate": 6.3493687900726265e-06, "loss": 0.4642, "step": 27768 }, { "epoch": 1.274333440411179, "grad_norm": 0.4585072994232178, "learning_rate": 6.34913269806562e-06, "loss": 0.344, "step": 27769 }, { "epoch": 1.2743793309164335, "grad_norm": 0.5007902383804321, "learning_rate": 6.3488966028143735e-06, "loss": 0.3926, "step": 27770 }, { "epoch": 1.2744252214216878, "grad_norm": 0.4929814338684082, "learning_rate": 6.348660504319457e-06, "loss": 0.4062, "step": 27771 }, { "epoch": 1.2744711119269423, "grad_norm": 0.44114091992378235, "learning_rate": 6.348424402581439e-06, "loss": 0.2567, "step": 27772 }, { "epoch": 1.2745170024321968, "grad_norm": 0.49472641944885254, "learning_rate": 6.348188297600885e-06, "loss": 0.4029, "step": 27773 }, { "epoch": 1.2745628929374513, "grad_norm": 0.4959041476249695, "learning_rate": 6.347952189378364e-06, "loss": 0.3903, "step": 27774 }, { "epoch": 1.2746087834427058, "grad_norm": 0.4321146309375763, "learning_rate": 6.347716077914441e-06, "loss": 0.3046, "step": 27775 }, { "epoch": 1.27465467394796, "grad_norm": 0.5117290616035461, "learning_rate": 6.347479963209688e-06, "loss": 0.4144, "step": 27776 }, { "epoch": 1.2747005644532146, "grad_norm": 0.47882482409477234, "learning_rate": 6.347243845264672e-06, "loss": 0.3656, "step": 27777 }, { "epoch": 1.274746454958469, "grad_norm": 0.51138836145401, "learning_rate": 6.347007724079959e-06, "loss": 0.3872, "step": 27778 }, { "epoch": 1.2747923454637236, "grad_norm": 0.48766204714775085, "learning_rate": 6.346771599656117e-06, "loss": 0.3752, "step": 27779 }, { "epoch": 1.274838235968978, "grad_norm": 0.48240339756011963, "learning_rate": 6.346535471993714e-06, "loss": 0.4053, "step": 27780 }, { "epoch": 1.2748841264742325, "grad_norm": 0.4583371579647064, "learning_rate": 6.346299341093318e-06, "loss": 0.3163, "step": 27781 }, { "epoch": 1.274930016979487, "grad_norm": 0.44486430287361145, "learning_rate": 6.346063206955499e-06, "loss": 0.3162, "step": 27782 }, { "epoch": 1.2749759074847415, "grad_norm": 0.4537929892539978, "learning_rate": 6.345827069580821e-06, "loss": 0.3073, "step": 27783 }, { "epoch": 1.2750217979899958, "grad_norm": 0.5201525688171387, "learning_rate": 6.3455909289698536e-06, "loss": 0.4283, "step": 27784 }, { "epoch": 1.2750676884952503, "grad_norm": 0.4402295649051666, "learning_rate": 6.345354785123165e-06, "loss": 0.3323, "step": 27785 }, { "epoch": 1.2751135790005048, "grad_norm": 0.47450944781303406, "learning_rate": 6.345118638041323e-06, "loss": 0.3929, "step": 27786 }, { "epoch": 1.2751594695057593, "grad_norm": 0.45171263813972473, "learning_rate": 6.344882487724897e-06, "loss": 0.3066, "step": 27787 }, { "epoch": 1.2752053600110136, "grad_norm": 0.41587045788764954, "learning_rate": 6.344646334174451e-06, "loss": 0.2839, "step": 27788 }, { "epoch": 1.275251250516268, "grad_norm": 0.48728400468826294, "learning_rate": 6.344410177390556e-06, "loss": 0.4047, "step": 27789 }, { "epoch": 1.2752971410215226, "grad_norm": 0.5393722057342529, "learning_rate": 6.344174017373776e-06, "loss": 0.4912, "step": 27790 }, { "epoch": 1.275343031526777, "grad_norm": 0.4480230212211609, "learning_rate": 6.343937854124686e-06, "loss": 0.3259, "step": 27791 }, { "epoch": 1.2753889220320316, "grad_norm": 0.4706697165966034, "learning_rate": 6.3437016876438465e-06, "loss": 0.3792, "step": 27792 }, { "epoch": 1.275434812537286, "grad_norm": 0.4190223515033722, "learning_rate": 6.343465517931829e-06, "loss": 0.3036, "step": 27793 }, { "epoch": 1.2754807030425406, "grad_norm": 0.45606130361557007, "learning_rate": 6.343229344989203e-06, "loss": 0.3293, "step": 27794 }, { "epoch": 1.275526593547795, "grad_norm": 0.47468486428260803, "learning_rate": 6.342993168816531e-06, "loss": 0.34, "step": 27795 }, { "epoch": 1.2755724840530493, "grad_norm": 0.46183130145072937, "learning_rate": 6.342756989414386e-06, "loss": 0.3538, "step": 27796 }, { "epoch": 1.2756183745583038, "grad_norm": 0.48262646794319153, "learning_rate": 6.3425208067833345e-06, "loss": 0.3488, "step": 27797 }, { "epoch": 1.2756642650635583, "grad_norm": 0.48504653573036194, "learning_rate": 6.342284620923945e-06, "loss": 0.4129, "step": 27798 }, { "epoch": 1.2757101555688128, "grad_norm": 0.41733768582344055, "learning_rate": 6.342048431836783e-06, "loss": 0.2754, "step": 27799 }, { "epoch": 1.2757560460740673, "grad_norm": 0.4881349503993988, "learning_rate": 6.3418122395224195e-06, "loss": 0.3698, "step": 27800 }, { "epoch": 1.2758019365793216, "grad_norm": 0.4576893448829651, "learning_rate": 6.341576043981419e-06, "loss": 0.3205, "step": 27801 }, { "epoch": 1.2758478270845761, "grad_norm": 0.45796912908554077, "learning_rate": 6.341339845214353e-06, "loss": 0.3611, "step": 27802 }, { "epoch": 1.2758937175898306, "grad_norm": 0.4663476049900055, "learning_rate": 6.341103643221788e-06, "loss": 0.3323, "step": 27803 }, { "epoch": 1.275939608095085, "grad_norm": 0.500990092754364, "learning_rate": 6.340867438004292e-06, "loss": 0.4587, "step": 27804 }, { "epoch": 1.2759854986003396, "grad_norm": 0.4402237832546234, "learning_rate": 6.340631229562433e-06, "loss": 0.3257, "step": 27805 }, { "epoch": 1.276031389105594, "grad_norm": 0.4308221638202667, "learning_rate": 6.340395017896779e-06, "loss": 0.2634, "step": 27806 }, { "epoch": 1.2760772796108486, "grad_norm": 0.46913942694664, "learning_rate": 6.3401588030078965e-06, "loss": 0.3688, "step": 27807 }, { "epoch": 1.276123170116103, "grad_norm": 0.46698325872421265, "learning_rate": 6.339922584896357e-06, "loss": 0.3226, "step": 27808 }, { "epoch": 1.2761690606213574, "grad_norm": 0.4292261600494385, "learning_rate": 6.3396863635627274e-06, "loss": 0.2728, "step": 27809 }, { "epoch": 1.2762149511266119, "grad_norm": 0.471407413482666, "learning_rate": 6.339450139007572e-06, "loss": 0.3625, "step": 27810 }, { "epoch": 1.2762608416318664, "grad_norm": 0.4777388274669647, "learning_rate": 6.339213911231463e-06, "loss": 0.3614, "step": 27811 }, { "epoch": 1.2763067321371209, "grad_norm": 0.4551113247871399, "learning_rate": 6.338977680234968e-06, "loss": 0.3365, "step": 27812 }, { "epoch": 1.2763526226423754, "grad_norm": 0.4405660629272461, "learning_rate": 6.338741446018652e-06, "loss": 0.2739, "step": 27813 }, { "epoch": 1.2763985131476296, "grad_norm": 0.5292825698852539, "learning_rate": 6.338505208583088e-06, "loss": 0.3245, "step": 27814 }, { "epoch": 1.2764444036528841, "grad_norm": 0.4380699098110199, "learning_rate": 6.338268967928841e-06, "loss": 0.3076, "step": 27815 }, { "epoch": 1.2764902941581386, "grad_norm": 0.4487307071685791, "learning_rate": 6.338032724056479e-06, "loss": 0.2934, "step": 27816 }, { "epoch": 1.2765361846633931, "grad_norm": 0.44275882840156555, "learning_rate": 6.337796476966571e-06, "loss": 0.3099, "step": 27817 }, { "epoch": 1.2765820751686476, "grad_norm": 0.48330292105674744, "learning_rate": 6.337560226659683e-06, "loss": 0.3393, "step": 27818 }, { "epoch": 1.2766279656739021, "grad_norm": 0.46951282024383545, "learning_rate": 6.337323973136386e-06, "loss": 0.3486, "step": 27819 }, { "epoch": 1.2766738561791566, "grad_norm": 0.47778233885765076, "learning_rate": 6.337087716397248e-06, "loss": 0.3767, "step": 27820 }, { "epoch": 1.2767197466844111, "grad_norm": 0.4995887279510498, "learning_rate": 6.336851456442834e-06, "loss": 0.3803, "step": 27821 }, { "epoch": 1.2767656371896654, "grad_norm": 0.4737626612186432, "learning_rate": 6.336615193273716e-06, "loss": 0.3694, "step": 27822 }, { "epoch": 1.27681152769492, "grad_norm": 0.4725928008556366, "learning_rate": 6.336378926890459e-06, "loss": 0.3573, "step": 27823 }, { "epoch": 1.2768574182001744, "grad_norm": 0.41937023401260376, "learning_rate": 6.336142657293633e-06, "loss": 0.2877, "step": 27824 }, { "epoch": 1.276903308705429, "grad_norm": 0.44668638706207275, "learning_rate": 6.335906384483805e-06, "loss": 0.3053, "step": 27825 }, { "epoch": 1.2769491992106832, "grad_norm": 0.4732566475868225, "learning_rate": 6.3356701084615454e-06, "loss": 0.3582, "step": 27826 }, { "epoch": 1.2769950897159377, "grad_norm": 0.45154693722724915, "learning_rate": 6.335433829227418e-06, "loss": 0.3078, "step": 27827 }, { "epoch": 1.2770409802211922, "grad_norm": 0.4531998932361603, "learning_rate": 6.335197546781997e-06, "loss": 0.3445, "step": 27828 }, { "epoch": 1.2770868707264467, "grad_norm": 0.459494948387146, "learning_rate": 6.334961261125845e-06, "loss": 0.3286, "step": 27829 }, { "epoch": 1.2771327612317012, "grad_norm": 0.5111603736877441, "learning_rate": 6.334724972259533e-06, "loss": 0.4654, "step": 27830 }, { "epoch": 1.2771786517369557, "grad_norm": 0.4479008913040161, "learning_rate": 6.3344886801836305e-06, "loss": 0.315, "step": 27831 }, { "epoch": 1.2772245422422102, "grad_norm": 0.4457554519176483, "learning_rate": 6.334252384898702e-06, "loss": 0.3266, "step": 27832 }, { "epoch": 1.2772704327474647, "grad_norm": 0.45665106177330017, "learning_rate": 6.334016086405317e-06, "loss": 0.3638, "step": 27833 }, { "epoch": 1.277316323252719, "grad_norm": 0.4667263925075531, "learning_rate": 6.333779784704045e-06, "loss": 0.378, "step": 27834 }, { "epoch": 1.2773622137579734, "grad_norm": 0.455411821603775, "learning_rate": 6.333543479795453e-06, "loss": 0.3221, "step": 27835 }, { "epoch": 1.277408104263228, "grad_norm": 0.5142090320587158, "learning_rate": 6.333307171680112e-06, "loss": 0.3701, "step": 27836 }, { "epoch": 1.2774539947684824, "grad_norm": 0.44929060339927673, "learning_rate": 6.3330708603585865e-06, "loss": 0.3913, "step": 27837 }, { "epoch": 1.277499885273737, "grad_norm": 0.48981133103370667, "learning_rate": 6.332834545831446e-06, "loss": 0.3828, "step": 27838 }, { "epoch": 1.2775457757789912, "grad_norm": 0.4668658375740051, "learning_rate": 6.3325982280992605e-06, "loss": 0.3805, "step": 27839 }, { "epoch": 1.2775916662842457, "grad_norm": 0.5057257413864136, "learning_rate": 6.3323619071625945e-06, "loss": 0.4525, "step": 27840 }, { "epoch": 1.2776375567895002, "grad_norm": 0.44307488203048706, "learning_rate": 6.3321255830220195e-06, "loss": 0.2996, "step": 27841 }, { "epoch": 1.2776834472947547, "grad_norm": 0.49730661511421204, "learning_rate": 6.331889255678105e-06, "loss": 0.3831, "step": 27842 }, { "epoch": 1.2777293378000092, "grad_norm": 0.4450420141220093, "learning_rate": 6.3316529251314156e-06, "loss": 0.3415, "step": 27843 }, { "epoch": 1.2777752283052637, "grad_norm": 0.48045793175697327, "learning_rate": 6.33141659138252e-06, "loss": 0.359, "step": 27844 }, { "epoch": 1.2778211188105182, "grad_norm": 0.4715976417064667, "learning_rate": 6.33118025443199e-06, "loss": 0.3892, "step": 27845 }, { "epoch": 1.2778670093157727, "grad_norm": 0.5009045600891113, "learning_rate": 6.33094391428039e-06, "loss": 0.3689, "step": 27846 }, { "epoch": 1.277912899821027, "grad_norm": 0.4447994828224182, "learning_rate": 6.330707570928291e-06, "loss": 0.3284, "step": 27847 }, { "epoch": 1.2779587903262815, "grad_norm": 0.45731762051582336, "learning_rate": 6.33047122437626e-06, "loss": 0.3385, "step": 27848 }, { "epoch": 1.278004680831536, "grad_norm": 0.456718772649765, "learning_rate": 6.330234874624864e-06, "loss": 0.3533, "step": 27849 }, { "epoch": 1.2780505713367905, "grad_norm": 0.4643952548503876, "learning_rate": 6.329998521674674e-06, "loss": 0.3378, "step": 27850 }, { "epoch": 1.278096461842045, "grad_norm": 0.4562808871269226, "learning_rate": 6.329762165526259e-06, "loss": 0.3475, "step": 27851 }, { "epoch": 1.2781423523472992, "grad_norm": 0.42403310537338257, "learning_rate": 6.329525806180183e-06, "loss": 0.27, "step": 27852 }, { "epoch": 1.2781882428525537, "grad_norm": 0.497005432844162, "learning_rate": 6.329289443637019e-06, "loss": 0.3936, "step": 27853 }, { "epoch": 1.2782341333578082, "grad_norm": 0.4689309895038605, "learning_rate": 6.329053077897333e-06, "loss": 0.3557, "step": 27854 }, { "epoch": 1.2782800238630627, "grad_norm": 0.4359360635280609, "learning_rate": 6.328816708961694e-06, "loss": 0.3159, "step": 27855 }, { "epoch": 1.2783259143683172, "grad_norm": 0.4672968089580536, "learning_rate": 6.32858033683067e-06, "loss": 0.3799, "step": 27856 }, { "epoch": 1.2783718048735717, "grad_norm": 0.4896391034126282, "learning_rate": 6.328343961504828e-06, "loss": 0.3618, "step": 27857 }, { "epoch": 1.2784176953788262, "grad_norm": 0.5053120851516724, "learning_rate": 6.3281075829847395e-06, "loss": 0.4248, "step": 27858 }, { "epoch": 1.2784635858840807, "grad_norm": 0.4995259642601013, "learning_rate": 6.327871201270972e-06, "loss": 0.4058, "step": 27859 }, { "epoch": 1.278509476389335, "grad_norm": 0.4661610424518585, "learning_rate": 6.327634816364092e-06, "loss": 0.3305, "step": 27860 }, { "epoch": 1.2785553668945895, "grad_norm": 0.48423781991004944, "learning_rate": 6.327398428264669e-06, "loss": 0.3611, "step": 27861 }, { "epoch": 1.278601257399844, "grad_norm": 0.4712240695953369, "learning_rate": 6.327162036973273e-06, "loss": 0.3256, "step": 27862 }, { "epoch": 1.2786471479050985, "grad_norm": 0.4493459165096283, "learning_rate": 6.326925642490471e-06, "loss": 0.3117, "step": 27863 }, { "epoch": 1.278693038410353, "grad_norm": 0.5155889987945557, "learning_rate": 6.326689244816832e-06, "loss": 0.424, "step": 27864 }, { "epoch": 1.2787389289156073, "grad_norm": 0.45732903480529785, "learning_rate": 6.3264528439529236e-06, "loss": 0.323, "step": 27865 }, { "epoch": 1.2787848194208618, "grad_norm": 0.4880560338497162, "learning_rate": 6.326216439899314e-06, "loss": 0.3859, "step": 27866 }, { "epoch": 1.2788307099261162, "grad_norm": 0.45793065428733826, "learning_rate": 6.325980032656574e-06, "loss": 0.3431, "step": 27867 }, { "epoch": 1.2788766004313707, "grad_norm": 0.46137070655822754, "learning_rate": 6.325743622225269e-06, "loss": 0.3249, "step": 27868 }, { "epoch": 1.2789224909366252, "grad_norm": 0.4644947350025177, "learning_rate": 6.32550720860597e-06, "loss": 0.3684, "step": 27869 }, { "epoch": 1.2789683814418797, "grad_norm": 0.44817861914634705, "learning_rate": 6.325270791799244e-06, "loss": 0.3479, "step": 27870 }, { "epoch": 1.2790142719471342, "grad_norm": 0.485521137714386, "learning_rate": 6.32503437180566e-06, "loss": 0.3698, "step": 27871 }, { "epoch": 1.2790601624523887, "grad_norm": 0.6072576642036438, "learning_rate": 6.324797948625787e-06, "loss": 0.4526, "step": 27872 }, { "epoch": 1.279106052957643, "grad_norm": 0.48102983832359314, "learning_rate": 6.324561522260193e-06, "loss": 0.3412, "step": 27873 }, { "epoch": 1.2791519434628975, "grad_norm": 0.48291513323783875, "learning_rate": 6.324325092709446e-06, "loss": 0.3393, "step": 27874 }, { "epoch": 1.279197833968152, "grad_norm": 0.4775524437427521, "learning_rate": 6.324088659974116e-06, "loss": 0.3672, "step": 27875 }, { "epoch": 1.2792437244734065, "grad_norm": 0.5014622807502747, "learning_rate": 6.323852224054769e-06, "loss": 0.4466, "step": 27876 }, { "epoch": 1.2792896149786608, "grad_norm": 0.43454504013061523, "learning_rate": 6.3236157849519765e-06, "loss": 0.3206, "step": 27877 }, { "epoch": 1.2793355054839153, "grad_norm": 0.48976725339889526, "learning_rate": 6.323379342666305e-06, "loss": 0.3665, "step": 27878 }, { "epoch": 1.2793813959891698, "grad_norm": 0.4953702688217163, "learning_rate": 6.323142897198326e-06, "loss": 0.3923, "step": 27879 }, { "epoch": 1.2794272864944243, "grad_norm": 0.4747747480869293, "learning_rate": 6.322906448548604e-06, "loss": 0.3555, "step": 27880 }, { "epoch": 1.2794731769996788, "grad_norm": 0.4344126284122467, "learning_rate": 6.322669996717709e-06, "loss": 0.2973, "step": 27881 }, { "epoch": 1.2795190675049333, "grad_norm": 0.4542183578014374, "learning_rate": 6.322433541706212e-06, "loss": 0.3521, "step": 27882 }, { "epoch": 1.2795649580101878, "grad_norm": 0.4738132655620575, "learning_rate": 6.322197083514678e-06, "loss": 0.322, "step": 27883 }, { "epoch": 1.2796108485154423, "grad_norm": 0.45783182978630066, "learning_rate": 6.3219606221436766e-06, "loss": 0.3551, "step": 27884 }, { "epoch": 1.2796567390206965, "grad_norm": 0.4686388373374939, "learning_rate": 6.3217241575937805e-06, "loss": 0.3814, "step": 27885 }, { "epoch": 1.279702629525951, "grad_norm": 0.5053128004074097, "learning_rate": 6.321487689865553e-06, "loss": 0.4596, "step": 27886 }, { "epoch": 1.2797485200312055, "grad_norm": 0.4901399612426758, "learning_rate": 6.3212512189595645e-06, "loss": 0.4471, "step": 27887 }, { "epoch": 1.27979441053646, "grad_norm": 0.4393260180950165, "learning_rate": 6.321014744876384e-06, "loss": 0.3062, "step": 27888 }, { "epoch": 1.2798403010417145, "grad_norm": 0.49528950452804565, "learning_rate": 6.32077826761658e-06, "loss": 0.4384, "step": 27889 }, { "epoch": 1.2798861915469688, "grad_norm": 0.4651753604412079, "learning_rate": 6.32054178718072e-06, "loss": 0.374, "step": 27890 }, { "epoch": 1.2799320820522233, "grad_norm": 0.46469226479530334, "learning_rate": 6.320305303569376e-06, "loss": 0.3684, "step": 27891 }, { "epoch": 1.2799779725574778, "grad_norm": 0.4812019169330597, "learning_rate": 6.320068816783113e-06, "loss": 0.3874, "step": 27892 }, { "epoch": 1.2800238630627323, "grad_norm": 0.4360223412513733, "learning_rate": 6.319832326822502e-06, "loss": 0.3161, "step": 27893 }, { "epoch": 1.2800697535679868, "grad_norm": 0.523171603679657, "learning_rate": 6.319595833688109e-06, "loss": 0.4543, "step": 27894 }, { "epoch": 1.2801156440732413, "grad_norm": 0.5291123390197754, "learning_rate": 6.319359337380505e-06, "loss": 0.4026, "step": 27895 }, { "epoch": 1.2801615345784958, "grad_norm": 0.43482133746147156, "learning_rate": 6.31912283790026e-06, "loss": 0.2857, "step": 27896 }, { "epoch": 1.2802074250837503, "grad_norm": 0.46525150537490845, "learning_rate": 6.31888633524794e-06, "loss": 0.3572, "step": 27897 }, { "epoch": 1.2802533155890046, "grad_norm": 0.45712149143218994, "learning_rate": 6.318649829424114e-06, "loss": 0.3175, "step": 27898 }, { "epoch": 1.280299206094259, "grad_norm": 0.4696604609489441, "learning_rate": 6.318413320429352e-06, "loss": 0.3428, "step": 27899 }, { "epoch": 1.2803450965995136, "grad_norm": 0.4610048234462738, "learning_rate": 6.318176808264221e-06, "loss": 0.382, "step": 27900 }, { "epoch": 1.280390987104768, "grad_norm": 0.4528370797634125, "learning_rate": 6.317940292929292e-06, "loss": 0.3834, "step": 27901 }, { "epoch": 1.2804368776100226, "grad_norm": 0.461145281791687, "learning_rate": 6.3177037744251335e-06, "loss": 0.3133, "step": 27902 }, { "epoch": 1.2804827681152768, "grad_norm": 0.4779565632343292, "learning_rate": 6.317467252752312e-06, "loss": 0.3341, "step": 27903 }, { "epoch": 1.2805286586205313, "grad_norm": 0.5112033486366272, "learning_rate": 6.317230727911398e-06, "loss": 0.393, "step": 27904 }, { "epoch": 1.2805745491257858, "grad_norm": 0.48595502972602844, "learning_rate": 6.316994199902958e-06, "loss": 0.4026, "step": 27905 }, { "epoch": 1.2806204396310403, "grad_norm": 0.4899119734764099, "learning_rate": 6.316757668727564e-06, "loss": 0.4013, "step": 27906 }, { "epoch": 1.2806663301362948, "grad_norm": 0.48145240545272827, "learning_rate": 6.316521134385784e-06, "loss": 0.3462, "step": 27907 }, { "epoch": 1.2807122206415493, "grad_norm": 0.47885310649871826, "learning_rate": 6.316284596878185e-06, "loss": 0.4113, "step": 27908 }, { "epoch": 1.2807581111468038, "grad_norm": 0.4890669882297516, "learning_rate": 6.316048056205337e-06, "loss": 0.3958, "step": 27909 }, { "epoch": 1.2808040016520583, "grad_norm": 0.4389622211456299, "learning_rate": 6.31581151236781e-06, "loss": 0.2998, "step": 27910 }, { "epoch": 1.2808498921573126, "grad_norm": 0.4524305760860443, "learning_rate": 6.3155749653661705e-06, "loss": 0.3378, "step": 27911 }, { "epoch": 1.280895782662567, "grad_norm": 0.5244722962379456, "learning_rate": 6.315338415200989e-06, "loss": 0.3106, "step": 27912 }, { "epoch": 1.2809416731678216, "grad_norm": 0.46941402554512024, "learning_rate": 6.315101861872834e-06, "loss": 0.3822, "step": 27913 }, { "epoch": 1.280987563673076, "grad_norm": 0.4887068569660187, "learning_rate": 6.314865305382274e-06, "loss": 0.3536, "step": 27914 }, { "epoch": 1.2810334541783304, "grad_norm": 0.5552395582199097, "learning_rate": 6.314628745729876e-06, "loss": 0.4114, "step": 27915 }, { "epoch": 1.2810793446835849, "grad_norm": 0.481122225522995, "learning_rate": 6.314392182916213e-06, "loss": 0.4299, "step": 27916 }, { "epoch": 1.2811252351888394, "grad_norm": 0.42810603976249695, "learning_rate": 6.31415561694185e-06, "loss": 0.2749, "step": 27917 }, { "epoch": 1.2811711256940939, "grad_norm": 0.4875357449054718, "learning_rate": 6.31391904780736e-06, "loss": 0.3853, "step": 27918 }, { "epoch": 1.2812170161993484, "grad_norm": 0.4573619067668915, "learning_rate": 6.313682475513307e-06, "loss": 0.3495, "step": 27919 }, { "epoch": 1.2812629067046029, "grad_norm": 0.4879096448421478, "learning_rate": 6.3134459000602635e-06, "loss": 0.3623, "step": 27920 }, { "epoch": 1.2813087972098574, "grad_norm": 0.4448162317276001, "learning_rate": 6.313209321448796e-06, "loss": 0.3152, "step": 27921 }, { "epoch": 1.2813546877151119, "grad_norm": 0.4345643222332001, "learning_rate": 6.312972739679476e-06, "loss": 0.3223, "step": 27922 }, { "epoch": 1.2814005782203661, "grad_norm": 0.4322528541088104, "learning_rate": 6.31273615475287e-06, "loss": 0.2858, "step": 27923 }, { "epoch": 1.2814464687256206, "grad_norm": 0.5135278105735779, "learning_rate": 6.312499566669549e-06, "loss": 0.3911, "step": 27924 }, { "epoch": 1.2814923592308751, "grad_norm": 0.46670961380004883, "learning_rate": 6.312262975430079e-06, "loss": 0.3437, "step": 27925 }, { "epoch": 1.2815382497361296, "grad_norm": 0.4446643888950348, "learning_rate": 6.312026381035031e-06, "loss": 0.3172, "step": 27926 }, { "epoch": 1.2815841402413841, "grad_norm": 0.4797717034816742, "learning_rate": 6.311789783484975e-06, "loss": 0.3902, "step": 27927 }, { "epoch": 1.2816300307466384, "grad_norm": 0.49759751558303833, "learning_rate": 6.311553182780478e-06, "loss": 0.409, "step": 27928 }, { "epoch": 1.281675921251893, "grad_norm": 0.48105165362358093, "learning_rate": 6.311316578922109e-06, "loss": 0.4148, "step": 27929 }, { "epoch": 1.2817218117571474, "grad_norm": 0.450704962015152, "learning_rate": 6.311079971910438e-06, "loss": 0.3413, "step": 27930 }, { "epoch": 1.2817677022624019, "grad_norm": 0.44499722123146057, "learning_rate": 6.310843361746033e-06, "loss": 0.3054, "step": 27931 }, { "epoch": 1.2818135927676564, "grad_norm": 0.46481916308403015, "learning_rate": 6.310606748429463e-06, "loss": 0.4073, "step": 27932 }, { "epoch": 1.2818594832729109, "grad_norm": 0.46222159266471863, "learning_rate": 6.3103701319612985e-06, "loss": 0.3373, "step": 27933 }, { "epoch": 1.2819053737781654, "grad_norm": 0.4440707862377167, "learning_rate": 6.3101335123421085e-06, "loss": 0.3275, "step": 27934 }, { "epoch": 1.2819512642834199, "grad_norm": 0.45265746116638184, "learning_rate": 6.3098968895724574e-06, "loss": 0.3214, "step": 27935 }, { "epoch": 1.2819971547886742, "grad_norm": 0.4968338906764984, "learning_rate": 6.309660263652921e-06, "loss": 0.3438, "step": 27936 }, { "epoch": 1.2820430452939287, "grad_norm": 0.47855669260025024, "learning_rate": 6.309423634584063e-06, "loss": 0.3631, "step": 27937 }, { "epoch": 1.2820889357991831, "grad_norm": 0.47526058554649353, "learning_rate": 6.309187002366456e-06, "loss": 0.3582, "step": 27938 }, { "epoch": 1.2821348263044376, "grad_norm": 0.4880504310131073, "learning_rate": 6.308950367000666e-06, "loss": 0.4343, "step": 27939 }, { "epoch": 1.2821807168096921, "grad_norm": 0.4558505415916443, "learning_rate": 6.308713728487264e-06, "loss": 0.3352, "step": 27940 }, { "epoch": 1.2822266073149464, "grad_norm": 0.4458456337451935, "learning_rate": 6.308477086826819e-06, "loss": 0.3777, "step": 27941 }, { "epoch": 1.282272497820201, "grad_norm": 0.5169133543968201, "learning_rate": 6.308240442019899e-06, "loss": 0.3909, "step": 27942 }, { "epoch": 1.2823183883254554, "grad_norm": 0.5154356360435486, "learning_rate": 6.3080037940670736e-06, "loss": 0.4837, "step": 27943 }, { "epoch": 1.28236427883071, "grad_norm": 0.5061883330345154, "learning_rate": 6.307767142968912e-06, "loss": 0.4431, "step": 27944 }, { "epoch": 1.2824101693359644, "grad_norm": 0.49453747272491455, "learning_rate": 6.307530488725983e-06, "loss": 0.3729, "step": 27945 }, { "epoch": 1.282456059841219, "grad_norm": 0.45170867443084717, "learning_rate": 6.307293831338856e-06, "loss": 0.3173, "step": 27946 }, { "epoch": 1.2825019503464734, "grad_norm": 0.43428173661231995, "learning_rate": 6.3070571708081e-06, "loss": 0.3111, "step": 27947 }, { "epoch": 1.282547840851728, "grad_norm": 0.4775456190109253, "learning_rate": 6.306820507134285e-06, "loss": 0.3464, "step": 27948 }, { "epoch": 1.2825937313569822, "grad_norm": 0.45871850848197937, "learning_rate": 6.306583840317977e-06, "loss": 0.3635, "step": 27949 }, { "epoch": 1.2826396218622367, "grad_norm": 0.4917430281639099, "learning_rate": 6.306347170359749e-06, "loss": 0.3302, "step": 27950 }, { "epoch": 1.2826855123674912, "grad_norm": 0.44096633791923523, "learning_rate": 6.306110497260169e-06, "loss": 0.291, "step": 27951 }, { "epoch": 1.2827314028727457, "grad_norm": 0.4854171872138977, "learning_rate": 6.3058738210198035e-06, "loss": 0.3807, "step": 27952 }, { "epoch": 1.2827772933780002, "grad_norm": 0.4535027742385864, "learning_rate": 6.305637141639225e-06, "loss": 0.3188, "step": 27953 }, { "epoch": 1.2828231838832544, "grad_norm": 0.5031104683876038, "learning_rate": 6.3054004591189995e-06, "loss": 0.3868, "step": 27954 }, { "epoch": 1.282869074388509, "grad_norm": 0.511268675327301, "learning_rate": 6.305163773459699e-06, "loss": 0.4064, "step": 27955 }, { "epoch": 1.2829149648937634, "grad_norm": 0.49902039766311646, "learning_rate": 6.304927084661893e-06, "loss": 0.4054, "step": 27956 }, { "epoch": 1.282960855399018, "grad_norm": 0.4851906895637512, "learning_rate": 6.304690392726147e-06, "loss": 0.3505, "step": 27957 }, { "epoch": 1.2830067459042724, "grad_norm": 0.44817718863487244, "learning_rate": 6.304453697653033e-06, "loss": 0.3679, "step": 27958 }, { "epoch": 1.283052636409527, "grad_norm": 0.45906683802604675, "learning_rate": 6.3042169994431194e-06, "loss": 0.3813, "step": 27959 }, { "epoch": 1.2830985269147814, "grad_norm": 0.4838658273220062, "learning_rate": 6.303980298096975e-06, "loss": 0.3445, "step": 27960 }, { "epoch": 1.283144417420036, "grad_norm": 0.4532667398452759, "learning_rate": 6.30374359361517e-06, "loss": 0.3289, "step": 27961 }, { "epoch": 1.2831903079252902, "grad_norm": 0.5090999603271484, "learning_rate": 6.303506885998275e-06, "loss": 0.4119, "step": 27962 }, { "epoch": 1.2832361984305447, "grad_norm": 0.44353389739990234, "learning_rate": 6.303270175246855e-06, "loss": 0.2971, "step": 27963 }, { "epoch": 1.2832820889357992, "grad_norm": 0.45460933446884155, "learning_rate": 6.303033461361483e-06, "loss": 0.3096, "step": 27964 }, { "epoch": 1.2833279794410537, "grad_norm": 0.5018517374992371, "learning_rate": 6.302796744342725e-06, "loss": 0.4098, "step": 27965 }, { "epoch": 1.283373869946308, "grad_norm": 0.47725749015808105, "learning_rate": 6.3025600241911544e-06, "loss": 0.309, "step": 27966 }, { "epoch": 1.2834197604515625, "grad_norm": 0.46066346764564514, "learning_rate": 6.302323300907337e-06, "loss": 0.3804, "step": 27967 }, { "epoch": 1.283465650956817, "grad_norm": 0.4454992711544037, "learning_rate": 6.302086574491842e-06, "loss": 0.3211, "step": 27968 }, { "epoch": 1.2835115414620715, "grad_norm": 0.4430641233921051, "learning_rate": 6.30184984494524e-06, "loss": 0.3084, "step": 27969 }, { "epoch": 1.283557431967326, "grad_norm": 0.4556950330734253, "learning_rate": 6.301613112268101e-06, "loss": 0.3343, "step": 27970 }, { "epoch": 1.2836033224725805, "grad_norm": 0.4609871208667755, "learning_rate": 6.301376376460993e-06, "loss": 0.3627, "step": 27971 }, { "epoch": 1.283649212977835, "grad_norm": 0.46355926990509033, "learning_rate": 6.301139637524486e-06, "loss": 0.3697, "step": 27972 }, { "epoch": 1.2836951034830895, "grad_norm": 0.4374755918979645, "learning_rate": 6.300902895459149e-06, "loss": 0.3374, "step": 27973 }, { "epoch": 1.2837409939883437, "grad_norm": 0.5259857177734375, "learning_rate": 6.300666150265549e-06, "loss": 0.3814, "step": 27974 }, { "epoch": 1.2837868844935982, "grad_norm": 0.46450692415237427, "learning_rate": 6.300429401944259e-06, "loss": 0.3353, "step": 27975 }, { "epoch": 1.2838327749988527, "grad_norm": 0.45779046416282654, "learning_rate": 6.300192650495847e-06, "loss": 0.3714, "step": 27976 }, { "epoch": 1.2838786655041072, "grad_norm": 0.45795565843582153, "learning_rate": 6.299955895920882e-06, "loss": 0.3299, "step": 27977 }, { "epoch": 1.2839245560093617, "grad_norm": 0.495059609413147, "learning_rate": 6.299719138219933e-06, "loss": 0.3935, "step": 27978 }, { "epoch": 1.283970446514616, "grad_norm": 0.47626081109046936, "learning_rate": 6.299482377393569e-06, "loss": 0.3286, "step": 27979 }, { "epoch": 1.2840163370198705, "grad_norm": 0.4811919033527374, "learning_rate": 6.2992456134423595e-06, "loss": 0.3813, "step": 27980 }, { "epoch": 1.284062227525125, "grad_norm": 0.4993738830089569, "learning_rate": 6.2990088463668765e-06, "loss": 0.4374, "step": 27981 }, { "epoch": 1.2841081180303795, "grad_norm": 0.49372154474258423, "learning_rate": 6.298772076167685e-06, "loss": 0.396, "step": 27982 }, { "epoch": 1.284154008535634, "grad_norm": 0.4576811194419861, "learning_rate": 6.298535302845358e-06, "loss": 0.3555, "step": 27983 }, { "epoch": 1.2841998990408885, "grad_norm": 0.5154021382331848, "learning_rate": 6.298298526400464e-06, "loss": 0.4067, "step": 27984 }, { "epoch": 1.284245789546143, "grad_norm": 0.5037956237792969, "learning_rate": 6.29806174683357e-06, "loss": 0.342, "step": 27985 }, { "epoch": 1.2842916800513975, "grad_norm": 0.47566887736320496, "learning_rate": 6.297824964145248e-06, "loss": 0.3654, "step": 27986 }, { "epoch": 1.2843375705566518, "grad_norm": 0.5180149674415588, "learning_rate": 6.297588178336067e-06, "loss": 0.401, "step": 27987 }, { "epoch": 1.2843834610619063, "grad_norm": 0.4895417094230652, "learning_rate": 6.297351389406596e-06, "loss": 0.3987, "step": 27988 }, { "epoch": 1.2844293515671608, "grad_norm": 0.4662227928638458, "learning_rate": 6.2971145973574044e-06, "loss": 0.3356, "step": 27989 }, { "epoch": 1.2844752420724153, "grad_norm": 0.5006363391876221, "learning_rate": 6.296877802189061e-06, "loss": 0.4051, "step": 27990 }, { "epoch": 1.2845211325776698, "grad_norm": 0.4571870267391205, "learning_rate": 6.296641003902134e-06, "loss": 0.3287, "step": 27991 }, { "epoch": 1.284567023082924, "grad_norm": 0.5016179084777832, "learning_rate": 6.2964042024971975e-06, "loss": 0.3375, "step": 27992 }, { "epoch": 1.2846129135881785, "grad_norm": 0.49141407012939453, "learning_rate": 6.296167397974817e-06, "loss": 0.3696, "step": 27993 }, { "epoch": 1.284658804093433, "grad_norm": 0.5017837285995483, "learning_rate": 6.2959305903355635e-06, "loss": 0.4086, "step": 27994 }, { "epoch": 1.2847046945986875, "grad_norm": 0.4881719946861267, "learning_rate": 6.295693779580005e-06, "loss": 0.4277, "step": 27995 }, { "epoch": 1.284750585103942, "grad_norm": 0.4799562990665436, "learning_rate": 6.295456965708713e-06, "loss": 0.3243, "step": 27996 }, { "epoch": 1.2847964756091965, "grad_norm": 0.455783873796463, "learning_rate": 6.295220148722254e-06, "loss": 0.3909, "step": 27997 }, { "epoch": 1.284842366114451, "grad_norm": 0.4835767447948456, "learning_rate": 6.294983328621202e-06, "loss": 0.3856, "step": 27998 }, { "epoch": 1.2848882566197055, "grad_norm": 0.4736703932285309, "learning_rate": 6.2947465054061215e-06, "loss": 0.4078, "step": 27999 }, { "epoch": 1.2849341471249598, "grad_norm": 0.5019463896751404, "learning_rate": 6.294509679077585e-06, "loss": 0.4465, "step": 28000 }, { "epoch": 1.2849800376302143, "grad_norm": 0.4571838080883026, "learning_rate": 6.2942728496361625e-06, "loss": 0.3292, "step": 28001 }, { "epoch": 1.2850259281354688, "grad_norm": 0.4683063328266144, "learning_rate": 6.294036017082421e-06, "loss": 0.3556, "step": 28002 }, { "epoch": 1.2850718186407233, "grad_norm": 0.4869784414768219, "learning_rate": 6.293799181416931e-06, "loss": 0.4098, "step": 28003 }, { "epoch": 1.2851177091459776, "grad_norm": 0.46651285886764526, "learning_rate": 6.293562342640262e-06, "loss": 0.3221, "step": 28004 }, { "epoch": 1.285163599651232, "grad_norm": 0.5062577128410339, "learning_rate": 6.293325500752986e-06, "loss": 0.3812, "step": 28005 }, { "epoch": 1.2852094901564866, "grad_norm": 0.44168779253959656, "learning_rate": 6.293088655755669e-06, "loss": 0.3291, "step": 28006 }, { "epoch": 1.285255380661741, "grad_norm": 0.518917441368103, "learning_rate": 6.292851807648882e-06, "loss": 0.3835, "step": 28007 }, { "epoch": 1.2853012711669956, "grad_norm": 0.4333477020263672, "learning_rate": 6.292614956433194e-06, "loss": 0.3032, "step": 28008 }, { "epoch": 1.28534716167225, "grad_norm": 0.43126508593559265, "learning_rate": 6.292378102109175e-06, "loss": 0.2957, "step": 28009 }, { "epoch": 1.2853930521775045, "grad_norm": 0.4491947591304779, "learning_rate": 6.2921412446773955e-06, "loss": 0.3322, "step": 28010 }, { "epoch": 1.285438942682759, "grad_norm": 0.4510684311389923, "learning_rate": 6.2919043841384235e-06, "loss": 0.3245, "step": 28011 }, { "epoch": 1.2854848331880133, "grad_norm": 0.4779126048088074, "learning_rate": 6.291667520492829e-06, "loss": 0.393, "step": 28012 }, { "epoch": 1.2855307236932678, "grad_norm": 0.47748321294784546, "learning_rate": 6.2914306537411815e-06, "loss": 0.3868, "step": 28013 }, { "epoch": 1.2855766141985223, "grad_norm": 0.46777623891830444, "learning_rate": 6.291193783884051e-06, "loss": 0.3615, "step": 28014 }, { "epoch": 1.2856225047037768, "grad_norm": 0.46542301774024963, "learning_rate": 6.290956910922008e-06, "loss": 0.3441, "step": 28015 }, { "epoch": 1.2856683952090313, "grad_norm": 0.47854384779930115, "learning_rate": 6.29072003485562e-06, "loss": 0.4114, "step": 28016 }, { "epoch": 1.2857142857142856, "grad_norm": 0.5072178244590759, "learning_rate": 6.290483155685458e-06, "loss": 0.4176, "step": 28017 }, { "epoch": 1.28576017621954, "grad_norm": 0.5169182419776917, "learning_rate": 6.290246273412092e-06, "loss": 0.4133, "step": 28018 }, { "epoch": 1.2858060667247946, "grad_norm": 0.45771628618240356, "learning_rate": 6.290009388036089e-06, "loss": 0.3192, "step": 28019 }, { "epoch": 1.285851957230049, "grad_norm": 0.47772571444511414, "learning_rate": 6.2897724995580225e-06, "loss": 0.3721, "step": 28020 }, { "epoch": 1.2858978477353036, "grad_norm": 0.47401484847068787, "learning_rate": 6.289535607978461e-06, "loss": 0.3787, "step": 28021 }, { "epoch": 1.285943738240558, "grad_norm": 0.46425482630729675, "learning_rate": 6.289298713297971e-06, "loss": 0.3446, "step": 28022 }, { "epoch": 1.2859896287458126, "grad_norm": 0.4534512162208557, "learning_rate": 6.289061815517126e-06, "loss": 0.2999, "step": 28023 }, { "epoch": 1.286035519251067, "grad_norm": 0.4260218143463135, "learning_rate": 6.288824914636493e-06, "loss": 0.297, "step": 28024 }, { "epoch": 1.2860814097563213, "grad_norm": 0.4626574218273163, "learning_rate": 6.2885880106566425e-06, "loss": 0.3411, "step": 28025 }, { "epoch": 1.2861273002615758, "grad_norm": 0.4694831967353821, "learning_rate": 6.288351103578145e-06, "loss": 0.361, "step": 28026 }, { "epoch": 1.2861731907668303, "grad_norm": 0.47322583198547363, "learning_rate": 6.288114193401571e-06, "loss": 0.3463, "step": 28027 }, { "epoch": 1.2862190812720848, "grad_norm": 0.42018210887908936, "learning_rate": 6.287877280127488e-06, "loss": 0.2408, "step": 28028 }, { "epoch": 1.2862649717773393, "grad_norm": 0.4446282684803009, "learning_rate": 6.287640363756468e-06, "loss": 0.3537, "step": 28029 }, { "epoch": 1.2863108622825936, "grad_norm": 0.44221577048301697, "learning_rate": 6.287403444289077e-06, "loss": 0.3018, "step": 28030 }, { "epoch": 1.2863567527878481, "grad_norm": 0.4677393436431885, "learning_rate": 6.2871665217258875e-06, "loss": 0.3272, "step": 28031 }, { "epoch": 1.2864026432931026, "grad_norm": 0.514629065990448, "learning_rate": 6.2869295960674705e-06, "loss": 0.3446, "step": 28032 }, { "epoch": 1.286448533798357, "grad_norm": 0.43056970834732056, "learning_rate": 6.2866926673143924e-06, "loss": 0.263, "step": 28033 }, { "epoch": 1.2864944243036116, "grad_norm": 0.5042280554771423, "learning_rate": 6.286455735467224e-06, "loss": 0.4868, "step": 28034 }, { "epoch": 1.286540314808866, "grad_norm": 0.4717193841934204, "learning_rate": 6.2862188005265365e-06, "loss": 0.402, "step": 28035 }, { "epoch": 1.2865862053141206, "grad_norm": 0.48021817207336426, "learning_rate": 6.285981862492898e-06, "loss": 0.3892, "step": 28036 }, { "epoch": 1.286632095819375, "grad_norm": 0.49710315465927124, "learning_rate": 6.285744921366881e-06, "loss": 0.394, "step": 28037 }, { "epoch": 1.2866779863246294, "grad_norm": 0.43318331241607666, "learning_rate": 6.285507977149052e-06, "loss": 0.2841, "step": 28038 }, { "epoch": 1.2867238768298839, "grad_norm": 0.4696180820465088, "learning_rate": 6.285271029839981e-06, "loss": 0.3057, "step": 28039 }, { "epoch": 1.2867697673351384, "grad_norm": 0.46836262941360474, "learning_rate": 6.285034079440241e-06, "loss": 0.3766, "step": 28040 }, { "epoch": 1.2868156578403929, "grad_norm": 0.43587324023246765, "learning_rate": 6.284797125950397e-06, "loss": 0.2917, "step": 28041 }, { "epoch": 1.2868615483456474, "grad_norm": 0.4919810891151428, "learning_rate": 6.284560169371023e-06, "loss": 0.3502, "step": 28042 }, { "epoch": 1.2869074388509016, "grad_norm": 0.5080631971359253, "learning_rate": 6.284323209702688e-06, "loss": 0.4267, "step": 28043 }, { "epoch": 1.2869533293561561, "grad_norm": 0.45041918754577637, "learning_rate": 6.28408624694596e-06, "loss": 0.3629, "step": 28044 }, { "epoch": 1.2869992198614106, "grad_norm": 0.5092109441757202, "learning_rate": 6.283849281101409e-06, "loss": 0.395, "step": 28045 }, { "epoch": 1.2870451103666651, "grad_norm": 0.45699307322502136, "learning_rate": 6.2836123121696065e-06, "loss": 0.3538, "step": 28046 }, { "epoch": 1.2870910008719196, "grad_norm": 0.44629091024398804, "learning_rate": 6.2833753401511214e-06, "loss": 0.3173, "step": 28047 }, { "epoch": 1.2871368913771741, "grad_norm": 0.4377323389053345, "learning_rate": 6.2831383650465235e-06, "loss": 0.3246, "step": 28048 }, { "epoch": 1.2871827818824286, "grad_norm": 0.48890864849090576, "learning_rate": 6.282901386856385e-06, "loss": 0.4058, "step": 28049 }, { "epoch": 1.2872286723876831, "grad_norm": 0.4579782485961914, "learning_rate": 6.282664405581272e-06, "loss": 0.3218, "step": 28050 }, { "epoch": 1.2872745628929374, "grad_norm": 0.43510380387306213, "learning_rate": 6.282427421221755e-06, "loss": 0.2847, "step": 28051 }, { "epoch": 1.287320453398192, "grad_norm": 0.4762396812438965, "learning_rate": 6.282190433778405e-06, "loss": 0.3605, "step": 28052 }, { "epoch": 1.2873663439034464, "grad_norm": 0.45483747124671936, "learning_rate": 6.2819534432517915e-06, "loss": 0.3466, "step": 28053 }, { "epoch": 1.287412234408701, "grad_norm": 0.48180606961250305, "learning_rate": 6.2817164496424865e-06, "loss": 0.4147, "step": 28054 }, { "epoch": 1.2874581249139552, "grad_norm": 0.4472403824329376, "learning_rate": 6.281479452951056e-06, "loss": 0.2997, "step": 28055 }, { "epoch": 1.2875040154192097, "grad_norm": 0.4397311508655548, "learning_rate": 6.2812424531780716e-06, "loss": 0.3331, "step": 28056 }, { "epoch": 1.2875499059244642, "grad_norm": 0.49483487010002136, "learning_rate": 6.281005450324105e-06, "loss": 0.3491, "step": 28057 }, { "epoch": 1.2875957964297187, "grad_norm": 0.4228760600090027, "learning_rate": 6.280768444389724e-06, "loss": 0.2597, "step": 28058 }, { "epoch": 1.2876416869349732, "grad_norm": 0.5105610489845276, "learning_rate": 6.280531435375499e-06, "loss": 0.4204, "step": 28059 }, { "epoch": 1.2876875774402277, "grad_norm": 0.4278881251811981, "learning_rate": 6.280294423282001e-06, "loss": 0.2995, "step": 28060 }, { "epoch": 1.2877334679454822, "grad_norm": 0.4799186885356903, "learning_rate": 6.2800574081097965e-06, "loss": 0.4124, "step": 28061 }, { "epoch": 1.2877793584507367, "grad_norm": 0.5145034193992615, "learning_rate": 6.279820389859459e-06, "loss": 0.4406, "step": 28062 }, { "epoch": 1.287825248955991, "grad_norm": 0.4484533965587616, "learning_rate": 6.279583368531559e-06, "loss": 0.3257, "step": 28063 }, { "epoch": 1.2878711394612454, "grad_norm": 0.48334038257598877, "learning_rate": 6.2793463441266625e-06, "loss": 0.3922, "step": 28064 }, { "epoch": 1.2879170299665, "grad_norm": 0.49381059408187866, "learning_rate": 6.279109316645344e-06, "loss": 0.3571, "step": 28065 }, { "epoch": 1.2879629204717544, "grad_norm": 0.4796421527862549, "learning_rate": 6.278872286088171e-06, "loss": 0.3597, "step": 28066 }, { "epoch": 1.288008810977009, "grad_norm": 0.43482673168182373, "learning_rate": 6.278635252455711e-06, "loss": 0.3238, "step": 28067 }, { "epoch": 1.2880547014822632, "grad_norm": 0.47177642583847046, "learning_rate": 6.27839821574854e-06, "loss": 0.3606, "step": 28068 }, { "epoch": 1.2881005919875177, "grad_norm": 0.4608008563518524, "learning_rate": 6.278161175967223e-06, "loss": 0.3545, "step": 28069 }, { "epoch": 1.2881464824927722, "grad_norm": 0.4514561891555786, "learning_rate": 6.277924133112333e-06, "loss": 0.3217, "step": 28070 }, { "epoch": 1.2881923729980267, "grad_norm": 0.5066956281661987, "learning_rate": 6.277687087184438e-06, "loss": 0.2917, "step": 28071 }, { "epoch": 1.2882382635032812, "grad_norm": 0.4906873106956482, "learning_rate": 6.27745003818411e-06, "loss": 0.3635, "step": 28072 }, { "epoch": 1.2882841540085357, "grad_norm": 0.4679623544216156, "learning_rate": 6.277212986111916e-06, "loss": 0.3692, "step": 28073 }, { "epoch": 1.2883300445137902, "grad_norm": 0.46598020195961, "learning_rate": 6.276975930968429e-06, "loss": 0.3122, "step": 28074 }, { "epoch": 1.2883759350190447, "grad_norm": 0.4327479898929596, "learning_rate": 6.276738872754217e-06, "loss": 0.3091, "step": 28075 }, { "epoch": 1.288421825524299, "grad_norm": 0.5081113576889038, "learning_rate": 6.276501811469852e-06, "loss": 0.4075, "step": 28076 }, { "epoch": 1.2884677160295535, "grad_norm": 0.46634232997894287, "learning_rate": 6.276264747115903e-06, "loss": 0.3285, "step": 28077 }, { "epoch": 1.288513606534808, "grad_norm": 0.43505290150642395, "learning_rate": 6.27602767969294e-06, "loss": 0.3273, "step": 28078 }, { "epoch": 1.2885594970400625, "grad_norm": 0.46722015738487244, "learning_rate": 6.275790609201532e-06, "loss": 0.3797, "step": 28079 }, { "epoch": 1.288605387545317, "grad_norm": 0.4527287483215332, "learning_rate": 6.275553535642251e-06, "loss": 0.2893, "step": 28080 }, { "epoch": 1.2886512780505712, "grad_norm": 0.43942123651504517, "learning_rate": 6.275316459015667e-06, "loss": 0.3013, "step": 28081 }, { "epoch": 1.2886971685558257, "grad_norm": 0.4568745195865631, "learning_rate": 6.275079379322349e-06, "loss": 0.3836, "step": 28082 }, { "epoch": 1.2887430590610802, "grad_norm": 0.452274352312088, "learning_rate": 6.2748422965628675e-06, "loss": 0.3487, "step": 28083 }, { "epoch": 1.2887889495663347, "grad_norm": 0.4649752676486969, "learning_rate": 6.274605210737792e-06, "loss": 0.3436, "step": 28084 }, { "epoch": 1.2888348400715892, "grad_norm": 0.4587714374065399, "learning_rate": 6.274368121847694e-06, "loss": 0.3281, "step": 28085 }, { "epoch": 1.2888807305768437, "grad_norm": 0.4453941285610199, "learning_rate": 6.274131029893142e-06, "loss": 0.3082, "step": 28086 }, { "epoch": 1.2889266210820982, "grad_norm": 0.4791351854801178, "learning_rate": 6.273893934874708e-06, "loss": 0.3766, "step": 28087 }, { "epoch": 1.2889725115873527, "grad_norm": 0.47649839520454407, "learning_rate": 6.27365683679296e-06, "loss": 0.3301, "step": 28088 }, { "epoch": 1.289018402092607, "grad_norm": 0.5120162963867188, "learning_rate": 6.273419735648471e-06, "loss": 0.3997, "step": 28089 }, { "epoch": 1.2890642925978615, "grad_norm": 0.4805338978767395, "learning_rate": 6.273182631441808e-06, "loss": 0.4045, "step": 28090 }, { "epoch": 1.289110183103116, "grad_norm": 0.5130795836448669, "learning_rate": 6.272945524173543e-06, "loss": 0.4084, "step": 28091 }, { "epoch": 1.2891560736083705, "grad_norm": 0.49978095293045044, "learning_rate": 6.272708413844246e-06, "loss": 0.4578, "step": 28092 }, { "epoch": 1.2892019641136248, "grad_norm": 0.5015215873718262, "learning_rate": 6.272471300454486e-06, "loss": 0.4575, "step": 28093 }, { "epoch": 1.2892478546188793, "grad_norm": 0.4563833773136139, "learning_rate": 6.272234184004835e-06, "loss": 0.3083, "step": 28094 }, { "epoch": 1.2892937451241337, "grad_norm": 0.4357316493988037, "learning_rate": 6.271997064495863e-06, "loss": 0.3064, "step": 28095 }, { "epoch": 1.2893396356293882, "grad_norm": 0.47616976499557495, "learning_rate": 6.271759941928137e-06, "loss": 0.4032, "step": 28096 }, { "epoch": 1.2893855261346427, "grad_norm": 0.46328607201576233, "learning_rate": 6.271522816302232e-06, "loss": 0.3417, "step": 28097 }, { "epoch": 1.2894314166398972, "grad_norm": 0.47418463230133057, "learning_rate": 6.271285687618716e-06, "loss": 0.39, "step": 28098 }, { "epoch": 1.2894773071451517, "grad_norm": 0.4340604841709137, "learning_rate": 6.271048555878159e-06, "loss": 0.3276, "step": 28099 }, { "epoch": 1.2895231976504062, "grad_norm": 0.4773349165916443, "learning_rate": 6.27081142108113e-06, "loss": 0.4498, "step": 28100 }, { "epoch": 1.2895690881556605, "grad_norm": 0.4368971288204193, "learning_rate": 6.270574283228201e-06, "loss": 0.2995, "step": 28101 }, { "epoch": 1.289614978660915, "grad_norm": 0.43238645792007446, "learning_rate": 6.270337142319943e-06, "loss": 0.3204, "step": 28102 }, { "epoch": 1.2896608691661695, "grad_norm": 0.4700338542461395, "learning_rate": 6.270099998356924e-06, "loss": 0.3588, "step": 28103 }, { "epoch": 1.289706759671424, "grad_norm": 0.5410152673721313, "learning_rate": 6.269862851339716e-06, "loss": 0.2912, "step": 28104 }, { "epoch": 1.2897526501766785, "grad_norm": 0.48020198941230774, "learning_rate": 6.269625701268889e-06, "loss": 0.3536, "step": 28105 }, { "epoch": 1.2897985406819328, "grad_norm": 0.6920510530471802, "learning_rate": 6.269388548145012e-06, "loss": 0.3465, "step": 28106 }, { "epoch": 1.2898444311871873, "grad_norm": 0.5102250576019287, "learning_rate": 6.2691513919686555e-06, "loss": 0.4095, "step": 28107 }, { "epoch": 1.2898903216924418, "grad_norm": 0.4701817035675049, "learning_rate": 6.2689142327403925e-06, "loss": 0.3207, "step": 28108 }, { "epoch": 1.2899362121976963, "grad_norm": 0.435823917388916, "learning_rate": 6.26867707046079e-06, "loss": 0.3429, "step": 28109 }, { "epoch": 1.2899821027029508, "grad_norm": 0.5534860491752625, "learning_rate": 6.268439905130419e-06, "loss": 0.3097, "step": 28110 }, { "epoch": 1.2900279932082053, "grad_norm": 0.5199395418167114, "learning_rate": 6.268202736749852e-06, "loss": 0.3592, "step": 28111 }, { "epoch": 1.2900738837134598, "grad_norm": 0.4815235733985901, "learning_rate": 6.267965565319656e-06, "loss": 0.3306, "step": 28112 }, { "epoch": 1.2901197742187143, "grad_norm": 0.446381151676178, "learning_rate": 6.267728390840405e-06, "loss": 0.2692, "step": 28113 }, { "epoch": 1.2901656647239685, "grad_norm": 0.5055413246154785, "learning_rate": 6.267491213312666e-06, "loss": 0.419, "step": 28114 }, { "epoch": 1.290211555229223, "grad_norm": 0.5072341561317444, "learning_rate": 6.267254032737011e-06, "loss": 0.3564, "step": 28115 }, { "epoch": 1.2902574457344775, "grad_norm": 0.4785483181476593, "learning_rate": 6.26701684911401e-06, "loss": 0.3798, "step": 28116 }, { "epoch": 1.290303336239732, "grad_norm": 0.504549503326416, "learning_rate": 6.266779662444232e-06, "loss": 0.3771, "step": 28117 }, { "epoch": 1.2903492267449865, "grad_norm": 0.4654318690299988, "learning_rate": 6.26654247272825e-06, "loss": 0.3245, "step": 28118 }, { "epoch": 1.2903951172502408, "grad_norm": 0.46297597885131836, "learning_rate": 6.266305279966634e-06, "loss": 0.3523, "step": 28119 }, { "epoch": 1.2904410077554953, "grad_norm": 0.45811977982521057, "learning_rate": 6.266068084159952e-06, "loss": 0.3425, "step": 28120 }, { "epoch": 1.2904868982607498, "grad_norm": 0.48623883724212646, "learning_rate": 6.265830885308776e-06, "loss": 0.3405, "step": 28121 }, { "epoch": 1.2905327887660043, "grad_norm": 0.4823985695838928, "learning_rate": 6.265593683413676e-06, "loss": 0.3867, "step": 28122 }, { "epoch": 1.2905786792712588, "grad_norm": 0.4745047092437744, "learning_rate": 6.265356478475223e-06, "loss": 0.4005, "step": 28123 }, { "epoch": 1.2906245697765133, "grad_norm": 0.49596139788627625, "learning_rate": 6.265119270493988e-06, "loss": 0.3793, "step": 28124 }, { "epoch": 1.2906704602817678, "grad_norm": 0.4973370134830475, "learning_rate": 6.26488205947054e-06, "loss": 0.4494, "step": 28125 }, { "epoch": 1.2907163507870223, "grad_norm": 0.4480249285697937, "learning_rate": 6.2646448454054485e-06, "loss": 0.3388, "step": 28126 }, { "epoch": 1.2907622412922766, "grad_norm": 0.5113403797149658, "learning_rate": 6.264407628299286e-06, "loss": 0.4539, "step": 28127 }, { "epoch": 1.290808131797531, "grad_norm": 0.4826340973377228, "learning_rate": 6.264170408152623e-06, "loss": 0.3484, "step": 28128 }, { "epoch": 1.2908540223027856, "grad_norm": 0.4176933467388153, "learning_rate": 6.263933184966028e-06, "loss": 0.2705, "step": 28129 }, { "epoch": 1.29089991280804, "grad_norm": 0.4535112679004669, "learning_rate": 6.263695958740074e-06, "loss": 0.3543, "step": 28130 }, { "epoch": 1.2909458033132946, "grad_norm": 0.5444042086601257, "learning_rate": 6.263458729475329e-06, "loss": 0.4367, "step": 28131 }, { "epoch": 1.2909916938185488, "grad_norm": 0.4246787428855896, "learning_rate": 6.263221497172365e-06, "loss": 0.2671, "step": 28132 }, { "epoch": 1.2910375843238033, "grad_norm": 0.4835907518863678, "learning_rate": 6.262984261831751e-06, "loss": 0.3489, "step": 28133 }, { "epoch": 1.2910834748290578, "grad_norm": 0.47306519746780396, "learning_rate": 6.262747023454059e-06, "loss": 0.33, "step": 28134 }, { "epoch": 1.2911293653343123, "grad_norm": 0.4270380437374115, "learning_rate": 6.2625097820398596e-06, "loss": 0.2835, "step": 28135 }, { "epoch": 1.2911752558395668, "grad_norm": 0.46325618028640747, "learning_rate": 6.262272537589723e-06, "loss": 0.3682, "step": 28136 }, { "epoch": 1.2912211463448213, "grad_norm": 0.4826434254646301, "learning_rate": 6.262035290104218e-06, "loss": 0.4039, "step": 28137 }, { "epoch": 1.2912670368500758, "grad_norm": 0.45712363719940186, "learning_rate": 6.261798039583916e-06, "loss": 0.3793, "step": 28138 }, { "epoch": 1.2913129273553303, "grad_norm": 0.4697333872318268, "learning_rate": 6.261560786029389e-06, "loss": 0.3943, "step": 28139 }, { "epoch": 1.2913588178605846, "grad_norm": 0.5037807822227478, "learning_rate": 6.261323529441206e-06, "loss": 0.4085, "step": 28140 }, { "epoch": 1.291404708365839, "grad_norm": 0.4362018406391144, "learning_rate": 6.261086269819939e-06, "loss": 0.3536, "step": 28141 }, { "epoch": 1.2914505988710936, "grad_norm": 0.46879464387893677, "learning_rate": 6.260849007166157e-06, "loss": 0.3829, "step": 28142 }, { "epoch": 1.291496489376348, "grad_norm": 0.4510067105293274, "learning_rate": 6.260611741480431e-06, "loss": 0.3328, "step": 28143 }, { "epoch": 1.2915423798816024, "grad_norm": 0.4698733687400818, "learning_rate": 6.260374472763331e-06, "loss": 0.3529, "step": 28144 }, { "epoch": 1.2915882703868569, "grad_norm": 0.4291249215602875, "learning_rate": 6.26013720101543e-06, "loss": 0.3013, "step": 28145 }, { "epoch": 1.2916341608921114, "grad_norm": 0.46072617173194885, "learning_rate": 6.259899926237295e-06, "loss": 0.3379, "step": 28146 }, { "epoch": 1.2916800513973659, "grad_norm": 0.485603004693985, "learning_rate": 6.259662648429499e-06, "loss": 0.3651, "step": 28147 }, { "epoch": 1.2917259419026204, "grad_norm": 0.4442216157913208, "learning_rate": 6.2594253675926105e-06, "loss": 0.2849, "step": 28148 }, { "epoch": 1.2917718324078749, "grad_norm": 0.4689701795578003, "learning_rate": 6.259188083727203e-06, "loss": 0.3912, "step": 28149 }, { "epoch": 1.2918177229131294, "grad_norm": 0.4457116425037384, "learning_rate": 6.258950796833846e-06, "loss": 0.3408, "step": 28150 }, { "epoch": 1.2918636134183838, "grad_norm": 0.43954628705978394, "learning_rate": 6.258713506913108e-06, "loss": 0.3076, "step": 28151 }, { "epoch": 1.2919095039236381, "grad_norm": 0.4560212790966034, "learning_rate": 6.258476213965563e-06, "loss": 0.3593, "step": 28152 }, { "epoch": 1.2919553944288926, "grad_norm": 0.44616764783859253, "learning_rate": 6.258238917991779e-06, "loss": 0.3094, "step": 28153 }, { "epoch": 1.2920012849341471, "grad_norm": 0.46181008219718933, "learning_rate": 6.258001618992328e-06, "loss": 0.3503, "step": 28154 }, { "epoch": 1.2920471754394016, "grad_norm": 0.4951649606227875, "learning_rate": 6.2577643169677785e-06, "loss": 0.4082, "step": 28155 }, { "epoch": 1.2920930659446561, "grad_norm": 0.4375470280647278, "learning_rate": 6.257527011918705e-06, "loss": 0.3185, "step": 28156 }, { "epoch": 1.2921389564499104, "grad_norm": 0.4763641655445099, "learning_rate": 6.257289703845675e-06, "loss": 0.3792, "step": 28157 }, { "epoch": 1.292184846955165, "grad_norm": 0.4842449724674225, "learning_rate": 6.257052392749259e-06, "loss": 0.3769, "step": 28158 }, { "epoch": 1.2922307374604194, "grad_norm": 0.5028340220451355, "learning_rate": 6.2568150786300305e-06, "loss": 0.4051, "step": 28159 }, { "epoch": 1.2922766279656739, "grad_norm": 0.499295711517334, "learning_rate": 6.256577761488557e-06, "loss": 0.3924, "step": 28160 }, { "epoch": 1.2923225184709284, "grad_norm": 0.4599973261356354, "learning_rate": 6.25634044132541e-06, "loss": 0.3195, "step": 28161 }, { "epoch": 1.2923684089761829, "grad_norm": 0.49181047081947327, "learning_rate": 6.256103118141164e-06, "loss": 0.3546, "step": 28162 }, { "epoch": 1.2924142994814374, "grad_norm": 0.4558887183666229, "learning_rate": 6.255865791936384e-06, "loss": 0.3421, "step": 28163 }, { "epoch": 1.2924601899866919, "grad_norm": 0.49994340538978577, "learning_rate": 6.255628462711642e-06, "loss": 0.3873, "step": 28164 }, { "epoch": 1.2925060804919462, "grad_norm": 0.47716203331947327, "learning_rate": 6.255391130467513e-06, "loss": 0.3733, "step": 28165 }, { "epoch": 1.2925519709972006, "grad_norm": 0.4969201683998108, "learning_rate": 6.255153795204563e-06, "loss": 0.4054, "step": 28166 }, { "epoch": 1.2925978615024551, "grad_norm": 0.4709016680717468, "learning_rate": 6.254916456923364e-06, "loss": 0.3499, "step": 28167 }, { "epoch": 1.2926437520077096, "grad_norm": 0.44989922642707825, "learning_rate": 6.254679115624488e-06, "loss": 0.3179, "step": 28168 }, { "epoch": 1.2926896425129641, "grad_norm": 0.49080345034599304, "learning_rate": 6.254441771308503e-06, "loss": 0.3675, "step": 28169 }, { "epoch": 1.2927355330182184, "grad_norm": 0.444807231426239, "learning_rate": 6.2542044239759825e-06, "loss": 0.3354, "step": 28170 }, { "epoch": 1.292781423523473, "grad_norm": 0.44433027505874634, "learning_rate": 6.253967073627496e-06, "loss": 0.3119, "step": 28171 }, { "epoch": 1.2928273140287274, "grad_norm": 0.5566893815994263, "learning_rate": 6.253729720263614e-06, "loss": 0.4518, "step": 28172 }, { "epoch": 1.292873204533982, "grad_norm": 0.48656103014945984, "learning_rate": 6.253492363884908e-06, "loss": 0.3435, "step": 28173 }, { "epoch": 1.2929190950392364, "grad_norm": 0.4757208526134491, "learning_rate": 6.25325500449195e-06, "loss": 0.3762, "step": 28174 }, { "epoch": 1.292964985544491, "grad_norm": 0.4750703275203705, "learning_rate": 6.253017642085306e-06, "loss": 0.3832, "step": 28175 }, { "epoch": 1.2930108760497454, "grad_norm": 0.507757842540741, "learning_rate": 6.252780276665553e-06, "loss": 0.3553, "step": 28176 }, { "epoch": 1.293056766555, "grad_norm": 0.4610876739025116, "learning_rate": 6.252542908233257e-06, "loss": 0.3333, "step": 28177 }, { "epoch": 1.2931026570602542, "grad_norm": 0.448053240776062, "learning_rate": 6.252305536788992e-06, "loss": 0.3208, "step": 28178 }, { "epoch": 1.2931485475655087, "grad_norm": 0.47671446204185486, "learning_rate": 6.252068162333328e-06, "loss": 0.3969, "step": 28179 }, { "epoch": 1.2931944380707632, "grad_norm": 0.484769731760025, "learning_rate": 6.251830784866832e-06, "loss": 0.4313, "step": 28180 }, { "epoch": 1.2932403285760177, "grad_norm": 0.4288400709629059, "learning_rate": 6.2515934043900805e-06, "loss": 0.3216, "step": 28181 }, { "epoch": 1.293286219081272, "grad_norm": 0.48768994212150574, "learning_rate": 6.251356020903642e-06, "loss": 0.393, "step": 28182 }, { "epoch": 1.2933321095865264, "grad_norm": 0.4802165627479553, "learning_rate": 6.251118634408087e-06, "loss": 0.3804, "step": 28183 }, { "epoch": 1.293378000091781, "grad_norm": 0.4553995132446289, "learning_rate": 6.250881244903986e-06, "loss": 0.3245, "step": 28184 }, { "epoch": 1.2934238905970354, "grad_norm": 0.5096217393875122, "learning_rate": 6.250643852391911e-06, "loss": 0.4705, "step": 28185 }, { "epoch": 1.29346978110229, "grad_norm": 0.4675668179988861, "learning_rate": 6.250406456872431e-06, "loss": 0.3645, "step": 28186 }, { "epoch": 1.2935156716075444, "grad_norm": 0.44361206889152527, "learning_rate": 6.25016905834612e-06, "loss": 0.3019, "step": 28187 }, { "epoch": 1.293561562112799, "grad_norm": 0.4849269688129425, "learning_rate": 6.249931656813545e-06, "loss": 0.338, "step": 28188 }, { "epoch": 1.2936074526180534, "grad_norm": 0.4396539330482483, "learning_rate": 6.24969425227528e-06, "loss": 0.3303, "step": 28189 }, { "epoch": 1.2936533431233077, "grad_norm": 0.4313536584377289, "learning_rate": 6.2494568447318955e-06, "loss": 0.3233, "step": 28190 }, { "epoch": 1.2936992336285622, "grad_norm": 0.46855443716049194, "learning_rate": 6.24921943418396e-06, "loss": 0.3187, "step": 28191 }, { "epoch": 1.2937451241338167, "grad_norm": 0.4327333867549896, "learning_rate": 6.248982020632046e-06, "loss": 0.299, "step": 28192 }, { "epoch": 1.2937910146390712, "grad_norm": 0.4876275956630707, "learning_rate": 6.248744604076726e-06, "loss": 0.4235, "step": 28193 }, { "epoch": 1.2938369051443257, "grad_norm": 0.5048226118087769, "learning_rate": 6.248507184518567e-06, "loss": 0.3805, "step": 28194 }, { "epoch": 1.29388279564958, "grad_norm": 0.4671584665775299, "learning_rate": 6.248269761958146e-06, "loss": 0.3041, "step": 28195 }, { "epoch": 1.2939286861548345, "grad_norm": 0.4680176377296448, "learning_rate": 6.2480323363960274e-06, "loss": 0.3695, "step": 28196 }, { "epoch": 1.293974576660089, "grad_norm": 0.4423791468143463, "learning_rate": 6.247794907832785e-06, "loss": 0.3188, "step": 28197 }, { "epoch": 1.2940204671653435, "grad_norm": 0.46270719170570374, "learning_rate": 6.24755747626899e-06, "loss": 0.3352, "step": 28198 }, { "epoch": 1.294066357670598, "grad_norm": 0.4656553864479065, "learning_rate": 6.247320041705214e-06, "loss": 0.3331, "step": 28199 }, { "epoch": 1.2941122481758525, "grad_norm": 0.46538588404655457, "learning_rate": 6.247082604142026e-06, "loss": 0.3587, "step": 28200 }, { "epoch": 1.294158138681107, "grad_norm": 0.5083321332931519, "learning_rate": 6.246845163579999e-06, "loss": 0.4305, "step": 28201 }, { "epoch": 1.2942040291863615, "grad_norm": 0.5457224249839783, "learning_rate": 6.246607720019701e-06, "loss": 0.5146, "step": 28202 }, { "epoch": 1.2942499196916157, "grad_norm": 0.5039021372795105, "learning_rate": 6.246370273461705e-06, "loss": 0.4287, "step": 28203 }, { "epoch": 1.2942958101968702, "grad_norm": 0.47966036200523376, "learning_rate": 6.246132823906583e-06, "loss": 0.3393, "step": 28204 }, { "epoch": 1.2943417007021247, "grad_norm": 0.4261705279350281, "learning_rate": 6.245895371354904e-06, "loss": 0.2795, "step": 28205 }, { "epoch": 1.2943875912073792, "grad_norm": 0.47341057658195496, "learning_rate": 6.24565791580724e-06, "loss": 0.3718, "step": 28206 }, { "epoch": 1.2944334817126337, "grad_norm": 0.47819915413856506, "learning_rate": 6.2454204572641645e-06, "loss": 0.3834, "step": 28207 }, { "epoch": 1.294479372217888, "grad_norm": 0.47994464635849, "learning_rate": 6.245182995726242e-06, "loss": 0.4399, "step": 28208 }, { "epoch": 1.2945252627231425, "grad_norm": 0.4513194262981415, "learning_rate": 6.244945531194049e-06, "loss": 0.3292, "step": 28209 }, { "epoch": 1.294571153228397, "grad_norm": 0.47888657450675964, "learning_rate": 6.244708063668155e-06, "loss": 0.4301, "step": 28210 }, { "epoch": 1.2946170437336515, "grad_norm": 0.4408484101295471, "learning_rate": 6.244470593149131e-06, "loss": 0.3045, "step": 28211 }, { "epoch": 1.294662934238906, "grad_norm": 0.49501144886016846, "learning_rate": 6.244233119637547e-06, "loss": 0.4154, "step": 28212 }, { "epoch": 1.2947088247441605, "grad_norm": 0.4493750035762787, "learning_rate": 6.243995643133976e-06, "loss": 0.3593, "step": 28213 }, { "epoch": 1.294754715249415, "grad_norm": 0.5314096212387085, "learning_rate": 6.2437581636389875e-06, "loss": 0.5131, "step": 28214 }, { "epoch": 1.2948006057546695, "grad_norm": 0.4515240490436554, "learning_rate": 6.243520681153155e-06, "loss": 0.295, "step": 28215 }, { "epoch": 1.2948464962599238, "grad_norm": 0.47085344791412354, "learning_rate": 6.2432831956770454e-06, "loss": 0.3808, "step": 28216 }, { "epoch": 1.2948923867651783, "grad_norm": 0.4392392039299011, "learning_rate": 6.243045707211234e-06, "loss": 0.3087, "step": 28217 }, { "epoch": 1.2949382772704328, "grad_norm": 0.5048328042030334, "learning_rate": 6.242808215756288e-06, "loss": 0.4668, "step": 28218 }, { "epoch": 1.2949841677756873, "grad_norm": 0.4718846082687378, "learning_rate": 6.2425707213127815e-06, "loss": 0.3438, "step": 28219 }, { "epoch": 1.2950300582809418, "grad_norm": 0.47589579224586487, "learning_rate": 6.242333223881285e-06, "loss": 0.3538, "step": 28220 }, { "epoch": 1.295075948786196, "grad_norm": 0.4690905809402466, "learning_rate": 6.242095723462369e-06, "loss": 0.3895, "step": 28221 }, { "epoch": 1.2951218392914505, "grad_norm": 0.4229196608066559, "learning_rate": 6.241858220056605e-06, "loss": 0.2873, "step": 28222 }, { "epoch": 1.295167729796705, "grad_norm": 0.48218896985054016, "learning_rate": 6.241620713664562e-06, "loss": 0.3911, "step": 28223 }, { "epoch": 1.2952136203019595, "grad_norm": 0.48509618639945984, "learning_rate": 6.241383204286816e-06, "loss": 0.3555, "step": 28224 }, { "epoch": 1.295259510807214, "grad_norm": 0.4459286630153656, "learning_rate": 6.2411456919239335e-06, "loss": 0.2965, "step": 28225 }, { "epoch": 1.2953054013124685, "grad_norm": 0.46515509486198425, "learning_rate": 6.240908176576487e-06, "loss": 0.355, "step": 28226 }, { "epoch": 1.295351291817723, "grad_norm": 0.5120500326156616, "learning_rate": 6.240670658245049e-06, "loss": 0.4591, "step": 28227 }, { "epoch": 1.2953971823229775, "grad_norm": 0.5213598608970642, "learning_rate": 6.24043313693019e-06, "loss": 0.4478, "step": 28228 }, { "epoch": 1.2954430728282318, "grad_norm": 0.5212617516517639, "learning_rate": 6.240195612632478e-06, "loss": 0.4635, "step": 28229 }, { "epoch": 1.2954889633334863, "grad_norm": 0.47674497961997986, "learning_rate": 6.23995808535249e-06, "loss": 0.3728, "step": 28230 }, { "epoch": 1.2955348538387408, "grad_norm": 0.47173964977264404, "learning_rate": 6.2397205550907925e-06, "loss": 0.381, "step": 28231 }, { "epoch": 1.2955807443439953, "grad_norm": 0.47051575779914856, "learning_rate": 6.23948302184796e-06, "loss": 0.3345, "step": 28232 }, { "epoch": 1.2956266348492496, "grad_norm": 0.42756760120391846, "learning_rate": 6.239245485624561e-06, "loss": 0.2984, "step": 28233 }, { "epoch": 1.295672525354504, "grad_norm": 0.49231743812561035, "learning_rate": 6.239007946421167e-06, "loss": 0.389, "step": 28234 }, { "epoch": 1.2957184158597586, "grad_norm": 0.45892927050590515, "learning_rate": 6.238770404238351e-06, "loss": 0.344, "step": 28235 }, { "epoch": 1.295764306365013, "grad_norm": 0.46634969115257263, "learning_rate": 6.238532859076682e-06, "loss": 0.2992, "step": 28236 }, { "epoch": 1.2958101968702675, "grad_norm": 0.45209044218063354, "learning_rate": 6.238295310936733e-06, "loss": 0.3137, "step": 28237 }, { "epoch": 1.295856087375522, "grad_norm": 0.48368358612060547, "learning_rate": 6.238057759819075e-06, "loss": 0.4326, "step": 28238 }, { "epoch": 1.2959019778807765, "grad_norm": 0.48004165291786194, "learning_rate": 6.2378202057242785e-06, "loss": 0.3677, "step": 28239 }, { "epoch": 1.295947868386031, "grad_norm": 0.4553161561489105, "learning_rate": 6.237582648652916e-06, "loss": 0.3316, "step": 28240 }, { "epoch": 1.2959937588912853, "grad_norm": 0.4572404623031616, "learning_rate": 6.237345088605556e-06, "loss": 0.2856, "step": 28241 }, { "epoch": 1.2960396493965398, "grad_norm": 0.48816996812820435, "learning_rate": 6.237107525582773e-06, "loss": 0.385, "step": 28242 }, { "epoch": 1.2960855399017943, "grad_norm": 0.48697546124458313, "learning_rate": 6.236869959585136e-06, "loss": 0.3783, "step": 28243 }, { "epoch": 1.2961314304070488, "grad_norm": 0.4897775948047638, "learning_rate": 6.2366323906132185e-06, "loss": 0.413, "step": 28244 }, { "epoch": 1.2961773209123033, "grad_norm": 0.468680739402771, "learning_rate": 6.236394818667588e-06, "loss": 0.3582, "step": 28245 }, { "epoch": 1.2962232114175576, "grad_norm": 0.473310261964798, "learning_rate": 6.236157243748819e-06, "loss": 0.3965, "step": 28246 }, { "epoch": 1.296269101922812, "grad_norm": 0.44930437207221985, "learning_rate": 6.235919665857484e-06, "loss": 0.3288, "step": 28247 }, { "epoch": 1.2963149924280666, "grad_norm": 0.4284410774707794, "learning_rate": 6.23568208499415e-06, "loss": 0.3321, "step": 28248 }, { "epoch": 1.296360882933321, "grad_norm": 0.498472660779953, "learning_rate": 6.235444501159391e-06, "loss": 0.3642, "step": 28249 }, { "epoch": 1.2964067734385756, "grad_norm": 0.4493360221385956, "learning_rate": 6.23520691435378e-06, "loss": 0.3786, "step": 28250 }, { "epoch": 1.29645266394383, "grad_norm": 0.4264218509197235, "learning_rate": 6.234969324577883e-06, "loss": 0.2964, "step": 28251 }, { "epoch": 1.2964985544490846, "grad_norm": 0.4268330931663513, "learning_rate": 6.2347317318322774e-06, "loss": 0.3447, "step": 28252 }, { "epoch": 1.296544444954339, "grad_norm": 0.44102004170417786, "learning_rate": 6.234494136117529e-06, "loss": 0.3017, "step": 28253 }, { "epoch": 1.2965903354595933, "grad_norm": 0.4497438669204712, "learning_rate": 6.234256537434212e-06, "loss": 0.3793, "step": 28254 }, { "epoch": 1.2966362259648478, "grad_norm": 0.44043946266174316, "learning_rate": 6.2340189357829e-06, "loss": 0.3284, "step": 28255 }, { "epoch": 1.2966821164701023, "grad_norm": 0.4701074957847595, "learning_rate": 6.233781331164161e-06, "loss": 0.3397, "step": 28256 }, { "epoch": 1.2967280069753568, "grad_norm": 0.46878552436828613, "learning_rate": 6.233543723578567e-06, "loss": 0.3204, "step": 28257 }, { "epoch": 1.2967738974806113, "grad_norm": 0.516370415687561, "learning_rate": 6.233306113026689e-06, "loss": 0.4495, "step": 28258 }, { "epoch": 1.2968197879858656, "grad_norm": 0.5256620645523071, "learning_rate": 6.2330684995091e-06, "loss": 0.4481, "step": 28259 }, { "epoch": 1.29686567849112, "grad_norm": 0.45697155594825745, "learning_rate": 6.232830883026369e-06, "loss": 0.318, "step": 28260 }, { "epoch": 1.2969115689963746, "grad_norm": 0.4993169605731964, "learning_rate": 6.232593263579071e-06, "loss": 0.4147, "step": 28261 }, { "epoch": 1.296957459501629, "grad_norm": 0.4609990119934082, "learning_rate": 6.232355641167773e-06, "loss": 0.3249, "step": 28262 }, { "epoch": 1.2970033500068836, "grad_norm": 0.479338675737381, "learning_rate": 6.23211801579305e-06, "loss": 0.3672, "step": 28263 }, { "epoch": 1.297049240512138, "grad_norm": 0.47226232290267944, "learning_rate": 6.231880387455472e-06, "loss": 0.3919, "step": 28264 }, { "epoch": 1.2970951310173926, "grad_norm": 0.4605559706687927, "learning_rate": 6.2316427561556095e-06, "loss": 0.3614, "step": 28265 }, { "epoch": 1.297141021522647, "grad_norm": 0.4790906310081482, "learning_rate": 6.231405121894038e-06, "loss": 0.4085, "step": 28266 }, { "epoch": 1.2971869120279014, "grad_norm": 0.6127444505691528, "learning_rate": 6.231167484671322e-06, "loss": 0.4463, "step": 28267 }, { "epoch": 1.2972328025331559, "grad_norm": 0.4843725562095642, "learning_rate": 6.2309298444880385e-06, "loss": 0.3586, "step": 28268 }, { "epoch": 1.2972786930384104, "grad_norm": 0.4457184374332428, "learning_rate": 6.230692201344757e-06, "loss": 0.324, "step": 28269 }, { "epoch": 1.2973245835436649, "grad_norm": 0.4672364890575409, "learning_rate": 6.2304545552420485e-06, "loss": 0.3241, "step": 28270 }, { "epoch": 1.2973704740489191, "grad_norm": 0.502672016620636, "learning_rate": 6.230216906180486e-06, "loss": 0.3669, "step": 28271 }, { "epoch": 1.2974163645541736, "grad_norm": 0.46793845295906067, "learning_rate": 6.229979254160641e-06, "loss": 0.3768, "step": 28272 }, { "epoch": 1.2974622550594281, "grad_norm": 0.44317319989204407, "learning_rate": 6.229741599183082e-06, "loss": 0.2986, "step": 28273 }, { "epoch": 1.2975081455646826, "grad_norm": 0.4593021869659424, "learning_rate": 6.229503941248382e-06, "loss": 0.3457, "step": 28274 }, { "epoch": 1.2975540360699371, "grad_norm": 0.45081472396850586, "learning_rate": 6.229266280357116e-06, "loss": 0.3153, "step": 28275 }, { "epoch": 1.2975999265751916, "grad_norm": 0.4700773358345032, "learning_rate": 6.22902861650985e-06, "loss": 0.357, "step": 28276 }, { "epoch": 1.2976458170804461, "grad_norm": 0.48508551716804504, "learning_rate": 6.2287909497071595e-06, "loss": 0.3732, "step": 28277 }, { "epoch": 1.2976917075857006, "grad_norm": 0.434691846370697, "learning_rate": 6.228553279949615e-06, "loss": 0.3188, "step": 28278 }, { "epoch": 1.297737598090955, "grad_norm": 0.4558145999908447, "learning_rate": 6.2283156072377856e-06, "loss": 0.3531, "step": 28279 }, { "epoch": 1.2977834885962094, "grad_norm": 0.4659963846206665, "learning_rate": 6.228077931572247e-06, "loss": 0.3821, "step": 28280 }, { "epoch": 1.297829379101464, "grad_norm": 0.42511171102523804, "learning_rate": 6.2278402529535675e-06, "loss": 0.2894, "step": 28281 }, { "epoch": 1.2978752696067184, "grad_norm": 0.4993799030780792, "learning_rate": 6.22760257138232e-06, "loss": 0.4487, "step": 28282 }, { "epoch": 1.297921160111973, "grad_norm": 0.45986226201057434, "learning_rate": 6.227364886859077e-06, "loss": 0.3467, "step": 28283 }, { "epoch": 1.2979670506172272, "grad_norm": 0.4547714591026306, "learning_rate": 6.2271271993844066e-06, "loss": 0.3359, "step": 28284 }, { "epoch": 1.2980129411224817, "grad_norm": 0.474901407957077, "learning_rate": 6.226889508958882e-06, "loss": 0.3685, "step": 28285 }, { "epoch": 1.2980588316277362, "grad_norm": 0.41690266132354736, "learning_rate": 6.226651815583078e-06, "loss": 0.271, "step": 28286 }, { "epoch": 1.2981047221329907, "grad_norm": 0.4565311074256897, "learning_rate": 6.226414119257561e-06, "loss": 0.339, "step": 28287 }, { "epoch": 1.2981506126382452, "grad_norm": 0.4872124493122101, "learning_rate": 6.226176419982907e-06, "loss": 0.4063, "step": 28288 }, { "epoch": 1.2981965031434997, "grad_norm": 0.4359271228313446, "learning_rate": 6.225938717759685e-06, "loss": 0.3107, "step": 28289 }, { "epoch": 1.2982423936487542, "grad_norm": 0.47329047322273254, "learning_rate": 6.225701012588468e-06, "loss": 0.341, "step": 28290 }, { "epoch": 1.2982882841540087, "grad_norm": 0.4435488283634186, "learning_rate": 6.225463304469825e-06, "loss": 0.2882, "step": 28291 }, { "epoch": 1.298334174659263, "grad_norm": 0.47545769810676575, "learning_rate": 6.225225593404331e-06, "loss": 0.3836, "step": 28292 }, { "epoch": 1.2983800651645174, "grad_norm": 0.4938840866088867, "learning_rate": 6.224987879392557e-06, "loss": 0.4022, "step": 28293 }, { "epoch": 1.298425955669772, "grad_norm": 0.4281056821346283, "learning_rate": 6.224750162435073e-06, "loss": 0.2745, "step": 28294 }, { "epoch": 1.2984718461750264, "grad_norm": 0.44834575057029724, "learning_rate": 6.224512442532451e-06, "loss": 0.359, "step": 28295 }, { "epoch": 1.298517736680281, "grad_norm": 0.4557170867919922, "learning_rate": 6.2242747196852625e-06, "loss": 0.3059, "step": 28296 }, { "epoch": 1.2985636271855352, "grad_norm": 0.5180224180221558, "learning_rate": 6.22403699389408e-06, "loss": 0.3492, "step": 28297 }, { "epoch": 1.2986095176907897, "grad_norm": 0.4579494595527649, "learning_rate": 6.223799265159476e-06, "loss": 0.3378, "step": 28298 }, { "epoch": 1.2986554081960442, "grad_norm": 0.4819929301738739, "learning_rate": 6.2235615334820195e-06, "loss": 0.3668, "step": 28299 }, { "epoch": 1.2987012987012987, "grad_norm": 0.46763843297958374, "learning_rate": 6.223323798862285e-06, "loss": 0.3574, "step": 28300 }, { "epoch": 1.2987471892065532, "grad_norm": 0.4747743308544159, "learning_rate": 6.223086061300842e-06, "loss": 0.403, "step": 28301 }, { "epoch": 1.2987930797118077, "grad_norm": 0.44085994362831116, "learning_rate": 6.222848320798262e-06, "loss": 0.3486, "step": 28302 }, { "epoch": 1.2988389702170622, "grad_norm": 0.5846200585365295, "learning_rate": 6.22261057735512e-06, "loss": 0.408, "step": 28303 }, { "epoch": 1.2988848607223167, "grad_norm": 0.48225364089012146, "learning_rate": 6.222372830971985e-06, "loss": 0.3845, "step": 28304 }, { "epoch": 1.298930751227571, "grad_norm": 0.5132493376731873, "learning_rate": 6.222135081649428e-06, "loss": 0.3747, "step": 28305 }, { "epoch": 1.2989766417328255, "grad_norm": 0.4257825016975403, "learning_rate": 6.221897329388024e-06, "loss": 0.2875, "step": 28306 }, { "epoch": 1.29902253223808, "grad_norm": 0.4708353579044342, "learning_rate": 6.2216595741883405e-06, "loss": 0.374, "step": 28307 }, { "epoch": 1.2990684227433344, "grad_norm": 0.49343109130859375, "learning_rate": 6.221421816050951e-06, "loss": 0.371, "step": 28308 }, { "epoch": 1.299114313248589, "grad_norm": 0.4671335518360138, "learning_rate": 6.221184054976428e-06, "loss": 0.35, "step": 28309 }, { "epoch": 1.2991602037538432, "grad_norm": 0.4626711308956146, "learning_rate": 6.220946290965344e-06, "loss": 0.3825, "step": 28310 }, { "epoch": 1.2992060942590977, "grad_norm": 0.45057567954063416, "learning_rate": 6.220708524018268e-06, "loss": 0.3183, "step": 28311 }, { "epoch": 1.2992519847643522, "grad_norm": 0.47545817494392395, "learning_rate": 6.220470754135775e-06, "loss": 0.3945, "step": 28312 }, { "epoch": 1.2992978752696067, "grad_norm": 0.4843747913837433, "learning_rate": 6.220232981318432e-06, "loss": 0.3951, "step": 28313 }, { "epoch": 1.2993437657748612, "grad_norm": 0.4609571695327759, "learning_rate": 6.219995205566817e-06, "loss": 0.3216, "step": 28314 }, { "epoch": 1.2993896562801157, "grad_norm": 0.49624764919281006, "learning_rate": 6.2197574268814975e-06, "loss": 0.443, "step": 28315 }, { "epoch": 1.2994355467853702, "grad_norm": 0.46185392141342163, "learning_rate": 6.219519645263046e-06, "loss": 0.3311, "step": 28316 }, { "epoch": 1.2994814372906247, "grad_norm": 0.44932329654693604, "learning_rate": 6.219281860712034e-06, "loss": 0.2944, "step": 28317 }, { "epoch": 1.299527327795879, "grad_norm": 0.45799845457077026, "learning_rate": 6.2190440732290345e-06, "loss": 0.3128, "step": 28318 }, { "epoch": 1.2995732183011335, "grad_norm": 0.5325301289558411, "learning_rate": 6.218806282814619e-06, "loss": 0.4391, "step": 28319 }, { "epoch": 1.299619108806388, "grad_norm": 0.5052163600921631, "learning_rate": 6.218568489469359e-06, "loss": 0.416, "step": 28320 }, { "epoch": 1.2996649993116425, "grad_norm": 0.47152796387672424, "learning_rate": 6.218330693193827e-06, "loss": 0.3621, "step": 28321 }, { "epoch": 1.2997108898168968, "grad_norm": 0.5058289766311646, "learning_rate": 6.2180928939885935e-06, "loss": 0.4142, "step": 28322 }, { "epoch": 1.2997567803221513, "grad_norm": 0.4970706105232239, "learning_rate": 6.217855091854231e-06, "loss": 0.3947, "step": 28323 }, { "epoch": 1.2998026708274057, "grad_norm": 0.49988478422164917, "learning_rate": 6.217617286791311e-06, "loss": 0.3538, "step": 28324 }, { "epoch": 1.2998485613326602, "grad_norm": 0.47883984446525574, "learning_rate": 6.2173794788004074e-06, "loss": 0.4016, "step": 28325 }, { "epoch": 1.2998944518379147, "grad_norm": 0.48086363077163696, "learning_rate": 6.21714166788209e-06, "loss": 0.3781, "step": 28326 }, { "epoch": 1.2999403423431692, "grad_norm": 0.475839227437973, "learning_rate": 6.2169038540369285e-06, "loss": 0.3768, "step": 28327 }, { "epoch": 1.2999862328484237, "grad_norm": 0.43456992506980896, "learning_rate": 6.216666037265499e-06, "loss": 0.3142, "step": 28328 }, { "epoch": 1.3000321233536782, "grad_norm": 0.5148123502731323, "learning_rate": 6.216428217568372e-06, "loss": 0.3877, "step": 28329 }, { "epoch": 1.3000780138589325, "grad_norm": 0.47856128215789795, "learning_rate": 6.2161903949461176e-06, "loss": 0.3666, "step": 28330 }, { "epoch": 1.300123904364187, "grad_norm": 0.45078423619270325, "learning_rate": 6.2159525693993105e-06, "loss": 0.3584, "step": 28331 }, { "epoch": 1.3001697948694415, "grad_norm": 0.4434889853000641, "learning_rate": 6.215714740928523e-06, "loss": 0.324, "step": 28332 }, { "epoch": 1.300215685374696, "grad_norm": 0.4791339933872223, "learning_rate": 6.215476909534321e-06, "loss": 0.3505, "step": 28333 }, { "epoch": 1.3002615758799505, "grad_norm": 0.4591011106967926, "learning_rate": 6.215239075217284e-06, "loss": 0.3607, "step": 28334 }, { "epoch": 1.3003074663852048, "grad_norm": 0.4973049461841583, "learning_rate": 6.215001237977979e-06, "loss": 0.3913, "step": 28335 }, { "epoch": 1.3003533568904593, "grad_norm": 0.45025011897087097, "learning_rate": 6.2147633978169805e-06, "loss": 0.3052, "step": 28336 }, { "epoch": 1.3003992473957138, "grad_norm": 0.4808560609817505, "learning_rate": 6.214525554734859e-06, "loss": 0.3789, "step": 28337 }, { "epoch": 1.3004451379009683, "grad_norm": 0.45503878593444824, "learning_rate": 6.214287708732186e-06, "loss": 0.3557, "step": 28338 }, { "epoch": 1.3004910284062228, "grad_norm": 0.4446350932121277, "learning_rate": 6.214049859809535e-06, "loss": 0.3532, "step": 28339 }, { "epoch": 1.3005369189114773, "grad_norm": 0.47719836235046387, "learning_rate": 6.213812007967478e-06, "loss": 0.3446, "step": 28340 }, { "epoch": 1.3005828094167318, "grad_norm": 0.4537985026836395, "learning_rate": 6.213574153206584e-06, "loss": 0.409, "step": 28341 }, { "epoch": 1.3006286999219863, "grad_norm": 0.4644417464733124, "learning_rate": 6.21333629552743e-06, "loss": 0.3155, "step": 28342 }, { "epoch": 1.3006745904272405, "grad_norm": 0.4750857949256897, "learning_rate": 6.213098434930583e-06, "loss": 0.3529, "step": 28343 }, { "epoch": 1.300720480932495, "grad_norm": 0.4374186098575592, "learning_rate": 6.212860571416618e-06, "loss": 0.3264, "step": 28344 }, { "epoch": 1.3007663714377495, "grad_norm": 0.4876135587692261, "learning_rate": 6.212622704986105e-06, "loss": 0.3447, "step": 28345 }, { "epoch": 1.300812261943004, "grad_norm": 0.46964141726493835, "learning_rate": 6.21238483563962e-06, "loss": 0.3728, "step": 28346 }, { "epoch": 1.3008581524482585, "grad_norm": 0.5390739440917969, "learning_rate": 6.21214696337773e-06, "loss": 0.3837, "step": 28347 }, { "epoch": 1.3009040429535128, "grad_norm": 0.5065861940383911, "learning_rate": 6.21190908820101e-06, "loss": 0.3438, "step": 28348 }, { "epoch": 1.3009499334587673, "grad_norm": 0.48037227988243103, "learning_rate": 6.211671210110029e-06, "loss": 0.3316, "step": 28349 }, { "epoch": 1.3009958239640218, "grad_norm": 0.54294753074646, "learning_rate": 6.211433329105363e-06, "loss": 0.4446, "step": 28350 }, { "epoch": 1.3010417144692763, "grad_norm": 0.4697289764881134, "learning_rate": 6.2111954451875825e-06, "loss": 0.3726, "step": 28351 }, { "epoch": 1.3010876049745308, "grad_norm": 0.4775595963001251, "learning_rate": 6.210957558357258e-06, "loss": 0.3637, "step": 28352 }, { "epoch": 1.3011334954797853, "grad_norm": 0.6707074046134949, "learning_rate": 6.210719668614964e-06, "loss": 0.382, "step": 28353 }, { "epoch": 1.3011793859850398, "grad_norm": 0.43275824189186096, "learning_rate": 6.21048177596127e-06, "loss": 0.2764, "step": 28354 }, { "epoch": 1.3012252764902943, "grad_norm": 0.46235793828964233, "learning_rate": 6.2102438803967504e-06, "loss": 0.3744, "step": 28355 }, { "epoch": 1.3012711669955486, "grad_norm": 0.42501163482666016, "learning_rate": 6.210005981921975e-06, "loss": 0.2634, "step": 28356 }, { "epoch": 1.301317057500803, "grad_norm": 0.4884771704673767, "learning_rate": 6.209768080537518e-06, "loss": 0.3574, "step": 28357 }, { "epoch": 1.3013629480060576, "grad_norm": 0.5541201829910278, "learning_rate": 6.209530176243952e-06, "loss": 0.46, "step": 28358 }, { "epoch": 1.301408838511312, "grad_norm": 0.4862549304962158, "learning_rate": 6.209292269041844e-06, "loss": 0.3509, "step": 28359 }, { "epoch": 1.3014547290165663, "grad_norm": 0.5106180906295776, "learning_rate": 6.209054358931773e-06, "loss": 0.425, "step": 28360 }, { "epoch": 1.3015006195218208, "grad_norm": 0.43822890520095825, "learning_rate": 6.208816445914306e-06, "loss": 0.3214, "step": 28361 }, { "epoch": 1.3015465100270753, "grad_norm": 0.49049073457717896, "learning_rate": 6.208578529990019e-06, "loss": 0.3877, "step": 28362 }, { "epoch": 1.3015924005323298, "grad_norm": 0.4863899052143097, "learning_rate": 6.208340611159481e-06, "loss": 0.3604, "step": 28363 }, { "epoch": 1.3016382910375843, "grad_norm": 0.46089184284210205, "learning_rate": 6.208102689423265e-06, "loss": 0.3423, "step": 28364 }, { "epoch": 1.3016841815428388, "grad_norm": 0.5719490051269531, "learning_rate": 6.2078647647819435e-06, "loss": 0.48, "step": 28365 }, { "epoch": 1.3017300720480933, "grad_norm": 0.475108802318573, "learning_rate": 6.207626837236088e-06, "loss": 0.3777, "step": 28366 }, { "epoch": 1.3017759625533478, "grad_norm": 0.4528774917125702, "learning_rate": 6.2073889067862705e-06, "loss": 0.3712, "step": 28367 }, { "epoch": 1.301821853058602, "grad_norm": 0.4743521809577942, "learning_rate": 6.207150973433065e-06, "loss": 0.4073, "step": 28368 }, { "epoch": 1.3018677435638566, "grad_norm": 0.4361277222633362, "learning_rate": 6.206913037177042e-06, "loss": 0.301, "step": 28369 }, { "epoch": 1.301913634069111, "grad_norm": 0.5007175207138062, "learning_rate": 6.206675098018774e-06, "loss": 0.4149, "step": 28370 }, { "epoch": 1.3019595245743656, "grad_norm": 0.4634784460067749, "learning_rate": 6.206437155958834e-06, "loss": 0.3695, "step": 28371 }, { "epoch": 1.30200541507962, "grad_norm": 0.46629971265792847, "learning_rate": 6.206199210997792e-06, "loss": 0.3505, "step": 28372 }, { "epoch": 1.3020513055848744, "grad_norm": 0.42915579676628113, "learning_rate": 6.205961263136222e-06, "loss": 0.3249, "step": 28373 }, { "epoch": 1.3020971960901289, "grad_norm": 0.4242452085018158, "learning_rate": 6.2057233123746965e-06, "loss": 0.3291, "step": 28374 }, { "epoch": 1.3021430865953834, "grad_norm": 0.46684548258781433, "learning_rate": 6.205485358713788e-06, "loss": 0.3923, "step": 28375 }, { "epoch": 1.3021889771006379, "grad_norm": 0.46860218048095703, "learning_rate": 6.205247402154064e-06, "loss": 0.3548, "step": 28376 }, { "epoch": 1.3022348676058924, "grad_norm": 0.44261425733566284, "learning_rate": 6.2050094426961036e-06, "loss": 0.3258, "step": 28377 }, { "epoch": 1.3022807581111469, "grad_norm": 0.45004382729530334, "learning_rate": 6.204771480340474e-06, "loss": 0.3323, "step": 28378 }, { "epoch": 1.3023266486164014, "grad_norm": 0.42827728390693665, "learning_rate": 6.20453351508775e-06, "loss": 0.2778, "step": 28379 }, { "epoch": 1.3023725391216558, "grad_norm": 0.4929460287094116, "learning_rate": 6.204295546938505e-06, "loss": 0.4184, "step": 28380 }, { "epoch": 1.3024184296269101, "grad_norm": 0.4692342281341553, "learning_rate": 6.204057575893307e-06, "loss": 0.3864, "step": 28381 }, { "epoch": 1.3024643201321646, "grad_norm": 0.41919299960136414, "learning_rate": 6.203819601952731e-06, "loss": 0.2552, "step": 28382 }, { "epoch": 1.3025102106374191, "grad_norm": 0.48206856846809387, "learning_rate": 6.203581625117348e-06, "loss": 0.3526, "step": 28383 }, { "epoch": 1.3025561011426736, "grad_norm": 0.44326165318489075, "learning_rate": 6.203343645387731e-06, "loss": 0.2985, "step": 28384 }, { "epoch": 1.3026019916479281, "grad_norm": 0.4822196066379547, "learning_rate": 6.2031056627644535e-06, "loss": 0.3871, "step": 28385 }, { "epoch": 1.3026478821531824, "grad_norm": 0.44136297702789307, "learning_rate": 6.2028676772480865e-06, "loss": 0.285, "step": 28386 }, { "epoch": 1.3026937726584369, "grad_norm": 0.4884640872478485, "learning_rate": 6.202629688839201e-06, "loss": 0.3419, "step": 28387 }, { "epoch": 1.3027396631636914, "grad_norm": 0.43173038959503174, "learning_rate": 6.202391697538373e-06, "loss": 0.2726, "step": 28388 }, { "epoch": 1.3027855536689459, "grad_norm": 0.45792195200920105, "learning_rate": 6.202153703346171e-06, "loss": 0.3467, "step": 28389 }, { "epoch": 1.3028314441742004, "grad_norm": 0.49218297004699707, "learning_rate": 6.201915706263169e-06, "loss": 0.328, "step": 28390 }, { "epoch": 1.3028773346794549, "grad_norm": 0.4530586302280426, "learning_rate": 6.201677706289939e-06, "loss": 0.3038, "step": 28391 }, { "epoch": 1.3029232251847094, "grad_norm": 0.44972261786460876, "learning_rate": 6.201439703427054e-06, "loss": 0.3281, "step": 28392 }, { "epoch": 1.3029691156899639, "grad_norm": 0.5540485978126526, "learning_rate": 6.201201697675083e-06, "loss": 0.37, "step": 28393 }, { "epoch": 1.3030150061952182, "grad_norm": 0.4657600224018097, "learning_rate": 6.200963689034605e-06, "loss": 0.3585, "step": 28394 }, { "epoch": 1.3030608967004726, "grad_norm": 0.48026615381240845, "learning_rate": 6.200725677506186e-06, "loss": 0.3738, "step": 28395 }, { "epoch": 1.3031067872057271, "grad_norm": 0.44920414686203003, "learning_rate": 6.200487663090401e-06, "loss": 0.3532, "step": 28396 }, { "epoch": 1.3031526777109816, "grad_norm": 0.4611596465110779, "learning_rate": 6.200249645787823e-06, "loss": 0.3191, "step": 28397 }, { "epoch": 1.3031985682162361, "grad_norm": 0.44115522503852844, "learning_rate": 6.200011625599022e-06, "loss": 0.332, "step": 28398 }, { "epoch": 1.3032444587214904, "grad_norm": 0.4785827398300171, "learning_rate": 6.199773602524573e-06, "loss": 0.3534, "step": 28399 }, { "epoch": 1.303290349226745, "grad_norm": 0.5511216521263123, "learning_rate": 6.1995355765650455e-06, "loss": 0.449, "step": 28400 }, { "epoch": 1.3033362397319994, "grad_norm": 0.508884072303772, "learning_rate": 6.1992975477210145e-06, "loss": 0.4017, "step": 28401 }, { "epoch": 1.303382130237254, "grad_norm": 0.48739388585090637, "learning_rate": 6.199059515993052e-06, "loss": 0.396, "step": 28402 }, { "epoch": 1.3034280207425084, "grad_norm": 0.49885162711143494, "learning_rate": 6.198821481381729e-06, "loss": 0.3899, "step": 28403 }, { "epoch": 1.303473911247763, "grad_norm": 0.47940605878829956, "learning_rate": 6.198583443887619e-06, "loss": 0.3374, "step": 28404 }, { "epoch": 1.3035198017530174, "grad_norm": 0.49607333540916443, "learning_rate": 6.1983454035112946e-06, "loss": 0.4084, "step": 28405 }, { "epoch": 1.303565692258272, "grad_norm": 0.44045209884643555, "learning_rate": 6.198107360253326e-06, "loss": 0.3291, "step": 28406 }, { "epoch": 1.3036115827635262, "grad_norm": 0.5088791847229004, "learning_rate": 6.197869314114288e-06, "loss": 0.4133, "step": 28407 }, { "epoch": 1.3036574732687807, "grad_norm": 0.4871130883693695, "learning_rate": 6.197631265094754e-06, "loss": 0.4436, "step": 28408 }, { "epoch": 1.3037033637740352, "grad_norm": 0.530781090259552, "learning_rate": 6.1973932131952945e-06, "loss": 0.4311, "step": 28409 }, { "epoch": 1.3037492542792897, "grad_norm": 0.42919108271598816, "learning_rate": 6.19715515841648e-06, "loss": 0.2838, "step": 28410 }, { "epoch": 1.303795144784544, "grad_norm": 0.4410487413406372, "learning_rate": 6.196917100758887e-06, "loss": 0.3307, "step": 28411 }, { "epoch": 1.3038410352897984, "grad_norm": 0.4688359200954437, "learning_rate": 6.196679040223086e-06, "loss": 0.3708, "step": 28412 }, { "epoch": 1.303886925795053, "grad_norm": 0.47724464535713196, "learning_rate": 6.19644097680965e-06, "loss": 0.3128, "step": 28413 }, { "epoch": 1.3039328163003074, "grad_norm": 0.456680566072464, "learning_rate": 6.19620291051915e-06, "loss": 0.3219, "step": 28414 }, { "epoch": 1.303978706805562, "grad_norm": 0.47553175687789917, "learning_rate": 6.195964841352159e-06, "loss": 0.364, "step": 28415 }, { "epoch": 1.3040245973108164, "grad_norm": 0.45054730772972107, "learning_rate": 6.1957267693092516e-06, "loss": 0.319, "step": 28416 }, { "epoch": 1.304070487816071, "grad_norm": 0.4467466473579407, "learning_rate": 6.195488694390997e-06, "loss": 0.2866, "step": 28417 }, { "epoch": 1.3041163783213254, "grad_norm": 0.5217620730400085, "learning_rate": 6.195250616597972e-06, "loss": 0.4796, "step": 28418 }, { "epoch": 1.3041622688265797, "grad_norm": 0.5000486969947815, "learning_rate": 6.195012535930745e-06, "loss": 0.4225, "step": 28419 }, { "epoch": 1.3042081593318342, "grad_norm": 0.49872806668281555, "learning_rate": 6.194774452389889e-06, "loss": 0.3938, "step": 28420 }, { "epoch": 1.3042540498370887, "grad_norm": 0.4550837278366089, "learning_rate": 6.194536365975979e-06, "loss": 0.3249, "step": 28421 }, { "epoch": 1.3042999403423432, "grad_norm": 0.4734867513179779, "learning_rate": 6.194298276689585e-06, "loss": 0.3488, "step": 28422 }, { "epoch": 1.3043458308475977, "grad_norm": 0.4773116707801819, "learning_rate": 6.194060184531281e-06, "loss": 0.3632, "step": 28423 }, { "epoch": 1.304391721352852, "grad_norm": 0.4427802562713623, "learning_rate": 6.193822089501639e-06, "loss": 0.3135, "step": 28424 }, { "epoch": 1.3044376118581065, "grad_norm": 0.5037828087806702, "learning_rate": 6.193583991601232e-06, "loss": 0.3806, "step": 28425 }, { "epoch": 1.304483502363361, "grad_norm": 0.48367759585380554, "learning_rate": 6.193345890830632e-06, "loss": 0.3272, "step": 28426 }, { "epoch": 1.3045293928686155, "grad_norm": 0.4710841476917267, "learning_rate": 6.193107787190412e-06, "loss": 0.3728, "step": 28427 }, { "epoch": 1.30457528337387, "grad_norm": 0.48471495509147644, "learning_rate": 6.192869680681144e-06, "loss": 0.3808, "step": 28428 }, { "epoch": 1.3046211738791245, "grad_norm": 0.4482499957084656, "learning_rate": 6.192631571303401e-06, "loss": 0.3245, "step": 28429 }, { "epoch": 1.304667064384379, "grad_norm": 0.4741309583187103, "learning_rate": 6.192393459057756e-06, "loss": 0.3609, "step": 28430 }, { "epoch": 1.3047129548896335, "grad_norm": 0.4384614825248718, "learning_rate": 6.19215534394478e-06, "loss": 0.3385, "step": 28431 }, { "epoch": 1.3047588453948877, "grad_norm": 0.5107391476631165, "learning_rate": 6.191917225965048e-06, "loss": 0.4438, "step": 28432 }, { "epoch": 1.3048047359001422, "grad_norm": 0.44049322605133057, "learning_rate": 6.1916791051191295e-06, "loss": 0.2815, "step": 28433 }, { "epoch": 1.3048506264053967, "grad_norm": 0.43820860981941223, "learning_rate": 6.191440981407601e-06, "loss": 0.2984, "step": 28434 }, { "epoch": 1.3048965169106512, "grad_norm": 0.4667779803276062, "learning_rate": 6.191202854831031e-06, "loss": 0.3383, "step": 28435 }, { "epoch": 1.3049424074159057, "grad_norm": 0.47031718492507935, "learning_rate": 6.1909647253899955e-06, "loss": 0.3415, "step": 28436 }, { "epoch": 1.30498829792116, "grad_norm": 0.47843238711357117, "learning_rate": 6.190726593085063e-06, "loss": 0.3401, "step": 28437 }, { "epoch": 1.3050341884264145, "grad_norm": 0.49746066331863403, "learning_rate": 6.190488457916811e-06, "loss": 0.4139, "step": 28438 }, { "epoch": 1.305080078931669, "grad_norm": 0.47530797123908997, "learning_rate": 6.190250319885811e-06, "loss": 0.3762, "step": 28439 }, { "epoch": 1.3051259694369235, "grad_norm": 0.4877783954143524, "learning_rate": 6.190012178992634e-06, "loss": 0.4019, "step": 28440 }, { "epoch": 1.305171859942178, "grad_norm": 0.46580812335014343, "learning_rate": 6.189774035237852e-06, "loss": 0.369, "step": 28441 }, { "epoch": 1.3052177504474325, "grad_norm": 0.46480369567871094, "learning_rate": 6.18953588862204e-06, "loss": 0.3474, "step": 28442 }, { "epoch": 1.305263640952687, "grad_norm": 0.47290506958961487, "learning_rate": 6.18929773914577e-06, "loss": 0.3566, "step": 28443 }, { "epoch": 1.3053095314579415, "grad_norm": 0.43707790970802307, "learning_rate": 6.189059586809614e-06, "loss": 0.3216, "step": 28444 }, { "epoch": 1.3053554219631958, "grad_norm": 0.48596128821372986, "learning_rate": 6.188821431614145e-06, "loss": 0.3701, "step": 28445 }, { "epoch": 1.3054013124684503, "grad_norm": 0.46648699045181274, "learning_rate": 6.188583273559935e-06, "loss": 0.3376, "step": 28446 }, { "epoch": 1.3054472029737048, "grad_norm": 0.4911003112792969, "learning_rate": 6.188345112647558e-06, "loss": 0.4069, "step": 28447 }, { "epoch": 1.3054930934789593, "grad_norm": 0.4514539837837219, "learning_rate": 6.188106948877585e-06, "loss": 0.3508, "step": 28448 }, { "epoch": 1.3055389839842135, "grad_norm": 0.4720689654350281, "learning_rate": 6.18786878225059e-06, "loss": 0.3627, "step": 28449 }, { "epoch": 1.305584874489468, "grad_norm": 0.5006113648414612, "learning_rate": 6.187630612767146e-06, "loss": 0.379, "step": 28450 }, { "epoch": 1.3056307649947225, "grad_norm": 0.4580785036087036, "learning_rate": 6.187392440427826e-06, "loss": 0.3312, "step": 28451 }, { "epoch": 1.305676655499977, "grad_norm": 0.4630391597747803, "learning_rate": 6.1871542652332e-06, "loss": 0.3858, "step": 28452 }, { "epoch": 1.3057225460052315, "grad_norm": 0.4813069999217987, "learning_rate": 6.1869160871838456e-06, "loss": 0.3974, "step": 28453 }, { "epoch": 1.305768436510486, "grad_norm": 0.49546414613723755, "learning_rate": 6.1866779062803295e-06, "loss": 0.3618, "step": 28454 }, { "epoch": 1.3058143270157405, "grad_norm": 0.4998624324798584, "learning_rate": 6.186439722523227e-06, "loss": 0.3496, "step": 28455 }, { "epoch": 1.305860217520995, "grad_norm": 0.4507425129413605, "learning_rate": 6.186201535913115e-06, "loss": 0.3686, "step": 28456 }, { "epoch": 1.3059061080262493, "grad_norm": 0.3999234437942505, "learning_rate": 6.185963346450561e-06, "loss": 0.2334, "step": 28457 }, { "epoch": 1.3059519985315038, "grad_norm": 0.4932142198085785, "learning_rate": 6.18572515413614e-06, "loss": 0.3719, "step": 28458 }, { "epoch": 1.3059978890367583, "grad_norm": 0.45936155319213867, "learning_rate": 6.1854869589704234e-06, "loss": 0.3537, "step": 28459 }, { "epoch": 1.3060437795420128, "grad_norm": 0.4460750222206116, "learning_rate": 6.185248760953985e-06, "loss": 0.2843, "step": 28460 }, { "epoch": 1.3060896700472673, "grad_norm": 0.4730258584022522, "learning_rate": 6.185010560087398e-06, "loss": 0.3895, "step": 28461 }, { "epoch": 1.3061355605525216, "grad_norm": 0.46346402168273926, "learning_rate": 6.184772356371234e-06, "loss": 0.3535, "step": 28462 }, { "epoch": 1.306181451057776, "grad_norm": 0.46151021122932434, "learning_rate": 6.184534149806066e-06, "loss": 0.3312, "step": 28463 }, { "epoch": 1.3062273415630306, "grad_norm": 0.515189528465271, "learning_rate": 6.184295940392469e-06, "loss": 0.3874, "step": 28464 }, { "epoch": 1.306273232068285, "grad_norm": 0.4880011975765228, "learning_rate": 6.184057728131011e-06, "loss": 0.4162, "step": 28465 }, { "epoch": 1.3063191225735395, "grad_norm": 0.44706106185913086, "learning_rate": 6.183819513022269e-06, "loss": 0.3099, "step": 28466 }, { "epoch": 1.306365013078794, "grad_norm": 0.48643556237220764, "learning_rate": 6.183581295066817e-06, "loss": 0.3412, "step": 28467 }, { "epoch": 1.3064109035840485, "grad_norm": 0.4929741322994232, "learning_rate": 6.183343074265224e-06, "loss": 0.3529, "step": 28468 }, { "epoch": 1.306456794089303, "grad_norm": 0.47997915744781494, "learning_rate": 6.183104850618064e-06, "loss": 0.3008, "step": 28469 }, { "epoch": 1.3065026845945573, "grad_norm": 0.505718469619751, "learning_rate": 6.1828666241259095e-06, "loss": 0.3989, "step": 28470 }, { "epoch": 1.3065485750998118, "grad_norm": 0.4884539544582367, "learning_rate": 6.182628394789335e-06, "loss": 0.4117, "step": 28471 }, { "epoch": 1.3065944656050663, "grad_norm": 0.4639051854610443, "learning_rate": 6.182390162608913e-06, "loss": 0.3284, "step": 28472 }, { "epoch": 1.3066403561103208, "grad_norm": 0.4939505159854889, "learning_rate": 6.1821519275852156e-06, "loss": 0.412, "step": 28473 }, { "epoch": 1.3066862466155753, "grad_norm": 0.4454679787158966, "learning_rate": 6.181913689718815e-06, "loss": 0.3201, "step": 28474 }, { "epoch": 1.3067321371208296, "grad_norm": 0.4373287260532379, "learning_rate": 6.1816754490102845e-06, "loss": 0.2965, "step": 28475 }, { "epoch": 1.306778027626084, "grad_norm": 0.4283183515071869, "learning_rate": 6.1814372054602e-06, "loss": 0.277, "step": 28476 }, { "epoch": 1.3068239181313386, "grad_norm": 0.43781235814094543, "learning_rate": 6.18119895906913e-06, "loss": 0.309, "step": 28477 }, { "epoch": 1.306869808636593, "grad_norm": 0.4613669514656067, "learning_rate": 6.1809607098376515e-06, "loss": 0.3414, "step": 28478 }, { "epoch": 1.3069156991418476, "grad_norm": 0.44764384627342224, "learning_rate": 6.180722457766332e-06, "loss": 0.3411, "step": 28479 }, { "epoch": 1.306961589647102, "grad_norm": 0.47301849722862244, "learning_rate": 6.18048420285575e-06, "loss": 0.3546, "step": 28480 }, { "epoch": 1.3070074801523566, "grad_norm": 0.4886377453804016, "learning_rate": 6.180245945106475e-06, "loss": 0.326, "step": 28481 }, { "epoch": 1.307053370657611, "grad_norm": 0.4973951578140259, "learning_rate": 6.18000768451908e-06, "loss": 0.4175, "step": 28482 }, { "epoch": 1.3070992611628653, "grad_norm": 0.5406638979911804, "learning_rate": 6.179769421094141e-06, "loss": 0.4031, "step": 28483 }, { "epoch": 1.3071451516681198, "grad_norm": 0.4691663384437561, "learning_rate": 6.179531154832229e-06, "loss": 0.3605, "step": 28484 }, { "epoch": 1.3071910421733743, "grad_norm": 0.4627991318702698, "learning_rate": 6.179292885733915e-06, "loss": 0.3176, "step": 28485 }, { "epoch": 1.3072369326786288, "grad_norm": 0.4395746886730194, "learning_rate": 6.179054613799774e-06, "loss": 0.2944, "step": 28486 }, { "epoch": 1.3072828231838833, "grad_norm": 0.4376395344734192, "learning_rate": 6.17881633903038e-06, "loss": 0.2796, "step": 28487 }, { "epoch": 1.3073287136891376, "grad_norm": 0.4390043616294861, "learning_rate": 6.178578061426304e-06, "loss": 0.3046, "step": 28488 }, { "epoch": 1.307374604194392, "grad_norm": 0.4263704717159271, "learning_rate": 6.17833978098812e-06, "loss": 0.2943, "step": 28489 }, { "epoch": 1.3074204946996466, "grad_norm": 0.4778251349925995, "learning_rate": 6.178101497716402e-06, "loss": 0.3582, "step": 28490 }, { "epoch": 1.307466385204901, "grad_norm": 0.5198782682418823, "learning_rate": 6.17786321161172e-06, "loss": 0.4067, "step": 28491 }, { "epoch": 1.3075122757101556, "grad_norm": 0.4476759433746338, "learning_rate": 6.177624922674648e-06, "loss": 0.3016, "step": 28492 }, { "epoch": 1.30755816621541, "grad_norm": 0.4724082052707672, "learning_rate": 6.1773866309057615e-06, "loss": 0.3988, "step": 28493 }, { "epoch": 1.3076040567206646, "grad_norm": 0.49371281266212463, "learning_rate": 6.177148336305632e-06, "loss": 0.3919, "step": 28494 }, { "epoch": 1.307649947225919, "grad_norm": 0.4505222737789154, "learning_rate": 6.176910038874831e-06, "loss": 0.3482, "step": 28495 }, { "epoch": 1.3076958377311734, "grad_norm": 0.4916074275970459, "learning_rate": 6.176671738613933e-06, "loss": 0.3962, "step": 28496 }, { "epoch": 1.3077417282364279, "grad_norm": 0.471711665391922, "learning_rate": 6.17643343552351e-06, "loss": 0.3532, "step": 28497 }, { "epoch": 1.3077876187416824, "grad_norm": 0.46106070280075073, "learning_rate": 6.176195129604138e-06, "loss": 0.3353, "step": 28498 }, { "epoch": 1.3078335092469369, "grad_norm": 0.44245418906211853, "learning_rate": 6.175956820856386e-06, "loss": 0.3278, "step": 28499 }, { "epoch": 1.3078793997521911, "grad_norm": 0.47627493739128113, "learning_rate": 6.1757185092808305e-06, "loss": 0.3779, "step": 28500 }, { "epoch": 1.3079252902574456, "grad_norm": 0.45083844661712646, "learning_rate": 6.175480194878043e-06, "loss": 0.3726, "step": 28501 }, { "epoch": 1.3079711807627001, "grad_norm": 0.4742313325405121, "learning_rate": 6.175241877648596e-06, "loss": 0.3789, "step": 28502 }, { "epoch": 1.3080170712679546, "grad_norm": 0.5404680967330933, "learning_rate": 6.175003557593062e-06, "loss": 0.3533, "step": 28503 }, { "epoch": 1.3080629617732091, "grad_norm": 0.5208746790885925, "learning_rate": 6.174765234712017e-06, "loss": 0.4807, "step": 28504 }, { "epoch": 1.3081088522784636, "grad_norm": 0.4549843966960907, "learning_rate": 6.174526909006033e-06, "loss": 0.3604, "step": 28505 }, { "epoch": 1.3081547427837181, "grad_norm": 0.45503559708595276, "learning_rate": 6.17428858047568e-06, "loss": 0.3889, "step": 28506 }, { "epoch": 1.3082006332889726, "grad_norm": 0.43390995264053345, "learning_rate": 6.174050249121535e-06, "loss": 0.2902, "step": 28507 }, { "epoch": 1.308246523794227, "grad_norm": 0.49847468733787537, "learning_rate": 6.17381191494417e-06, "loss": 0.4126, "step": 28508 }, { "epoch": 1.3082924142994814, "grad_norm": 0.44737550616264343, "learning_rate": 6.173573577944158e-06, "loss": 0.3372, "step": 28509 }, { "epoch": 1.308338304804736, "grad_norm": 0.47744810581207275, "learning_rate": 6.17333523812207e-06, "loss": 0.4259, "step": 28510 }, { "epoch": 1.3083841953099904, "grad_norm": 0.5481723546981812, "learning_rate": 6.1730968954784835e-06, "loss": 0.5268, "step": 28511 }, { "epoch": 1.308430085815245, "grad_norm": 0.477098286151886, "learning_rate": 6.172858550013968e-06, "loss": 0.3497, "step": 28512 }, { "epoch": 1.3084759763204992, "grad_norm": 0.47116851806640625, "learning_rate": 6.172620201729097e-06, "loss": 0.3119, "step": 28513 }, { "epoch": 1.3085218668257537, "grad_norm": 0.4341926872730255, "learning_rate": 6.172381850624445e-06, "loss": 0.2937, "step": 28514 }, { "epoch": 1.3085677573310082, "grad_norm": 0.5140688419342041, "learning_rate": 6.172143496700587e-06, "loss": 0.4393, "step": 28515 }, { "epoch": 1.3086136478362627, "grad_norm": 0.714664101600647, "learning_rate": 6.171905139958092e-06, "loss": 0.3828, "step": 28516 }, { "epoch": 1.3086595383415172, "grad_norm": 0.48204636573791504, "learning_rate": 6.171666780397534e-06, "loss": 0.3353, "step": 28517 }, { "epoch": 1.3087054288467717, "grad_norm": 0.4299159348011017, "learning_rate": 6.171428418019488e-06, "loss": 0.3335, "step": 28518 }, { "epoch": 1.3087513193520262, "grad_norm": 0.49862587451934814, "learning_rate": 6.171190052824526e-06, "loss": 0.4326, "step": 28519 }, { "epoch": 1.3087972098572807, "grad_norm": 0.4644327461719513, "learning_rate": 6.170951684813222e-06, "loss": 0.3636, "step": 28520 }, { "epoch": 1.308843100362535, "grad_norm": 0.4814310073852539, "learning_rate": 6.17071331398615e-06, "loss": 0.3797, "step": 28521 }, { "epoch": 1.3088889908677894, "grad_norm": 0.49443548917770386, "learning_rate": 6.17047494034388e-06, "loss": 0.3874, "step": 28522 }, { "epoch": 1.308934881373044, "grad_norm": 0.4782508611679077, "learning_rate": 6.170236563886988e-06, "loss": 0.3733, "step": 28523 }, { "epoch": 1.3089807718782984, "grad_norm": 0.47029030323028564, "learning_rate": 6.1699981846160475e-06, "loss": 0.3413, "step": 28524 }, { "epoch": 1.309026662383553, "grad_norm": 0.47686097025871277, "learning_rate": 6.169759802531629e-06, "loss": 0.3726, "step": 28525 }, { "epoch": 1.3090725528888072, "grad_norm": 0.4316127598285675, "learning_rate": 6.1695214176343074e-06, "loss": 0.3175, "step": 28526 }, { "epoch": 1.3091184433940617, "grad_norm": 0.503305971622467, "learning_rate": 6.169283029924657e-06, "loss": 0.4216, "step": 28527 }, { "epoch": 1.3091643338993162, "grad_norm": 0.48264917731285095, "learning_rate": 6.169044639403249e-06, "loss": 0.3673, "step": 28528 }, { "epoch": 1.3092102244045707, "grad_norm": 0.4756075143814087, "learning_rate": 6.168806246070659e-06, "loss": 0.3806, "step": 28529 }, { "epoch": 1.3092561149098252, "grad_norm": 0.518028974533081, "learning_rate": 6.168567849927457e-06, "loss": 0.4301, "step": 28530 }, { "epoch": 1.3093020054150797, "grad_norm": 0.46427789330482483, "learning_rate": 6.168329450974217e-06, "loss": 0.3651, "step": 28531 }, { "epoch": 1.3093478959203342, "grad_norm": 0.4986291527748108, "learning_rate": 6.168091049211516e-06, "loss": 0.4542, "step": 28532 }, { "epoch": 1.3093937864255887, "grad_norm": 0.41481804847717285, "learning_rate": 6.167852644639925e-06, "loss": 0.2913, "step": 28533 }, { "epoch": 1.309439676930843, "grad_norm": 0.4598534405231476, "learning_rate": 6.167614237260015e-06, "loss": 0.3372, "step": 28534 }, { "epoch": 1.3094855674360975, "grad_norm": 0.44803857803344727, "learning_rate": 6.167375827072364e-06, "loss": 0.3402, "step": 28535 }, { "epoch": 1.309531457941352, "grad_norm": 0.4834659993648529, "learning_rate": 6.16713741407754e-06, "loss": 0.3986, "step": 28536 }, { "epoch": 1.3095773484466064, "grad_norm": 0.4776255488395691, "learning_rate": 6.166898998276121e-06, "loss": 0.3446, "step": 28537 }, { "epoch": 1.3096232389518607, "grad_norm": 0.4730452299118042, "learning_rate": 6.166660579668677e-06, "loss": 0.4089, "step": 28538 }, { "epoch": 1.3096691294571152, "grad_norm": 0.4686093330383301, "learning_rate": 6.166422158255781e-06, "loss": 0.3704, "step": 28539 }, { "epoch": 1.3097150199623697, "grad_norm": 0.5005902051925659, "learning_rate": 6.166183734038009e-06, "loss": 0.3907, "step": 28540 }, { "epoch": 1.3097609104676242, "grad_norm": 0.45917049050331116, "learning_rate": 6.165945307015933e-06, "loss": 0.3282, "step": 28541 }, { "epoch": 1.3098068009728787, "grad_norm": 0.4347369074821472, "learning_rate": 6.165706877190127e-06, "loss": 0.3175, "step": 28542 }, { "epoch": 1.3098526914781332, "grad_norm": 0.461350679397583, "learning_rate": 6.165468444561164e-06, "loss": 0.3034, "step": 28543 }, { "epoch": 1.3098985819833877, "grad_norm": 0.476781964302063, "learning_rate": 6.165230009129618e-06, "loss": 0.3833, "step": 28544 }, { "epoch": 1.3099444724886422, "grad_norm": 0.4625754654407501, "learning_rate": 6.164991570896061e-06, "loss": 0.3656, "step": 28545 }, { "epoch": 1.3099903629938965, "grad_norm": 0.4508261978626251, "learning_rate": 6.164753129861066e-06, "loss": 0.3136, "step": 28546 }, { "epoch": 1.310036253499151, "grad_norm": 0.4516075551509857, "learning_rate": 6.164514686025208e-06, "loss": 0.3564, "step": 28547 }, { "epoch": 1.3100821440044055, "grad_norm": 0.484607458114624, "learning_rate": 6.1642762393890595e-06, "loss": 0.3464, "step": 28548 }, { "epoch": 1.31012803450966, "grad_norm": 0.4777067005634308, "learning_rate": 6.164037789953195e-06, "loss": 0.3728, "step": 28549 }, { "epoch": 1.3101739250149145, "grad_norm": 0.4741908013820648, "learning_rate": 6.163799337718186e-06, "loss": 0.3439, "step": 28550 }, { "epoch": 1.3102198155201688, "grad_norm": 0.5139142274856567, "learning_rate": 6.163560882684606e-06, "loss": 0.4746, "step": 28551 }, { "epoch": 1.3102657060254232, "grad_norm": 0.4546709358692169, "learning_rate": 6.163322424853031e-06, "loss": 0.3308, "step": 28552 }, { "epoch": 1.3103115965306777, "grad_norm": 0.4857752025127411, "learning_rate": 6.163083964224031e-06, "loss": 0.3868, "step": 28553 }, { "epoch": 1.3103574870359322, "grad_norm": 0.47456473112106323, "learning_rate": 6.162845500798184e-06, "loss": 0.36, "step": 28554 }, { "epoch": 1.3104033775411867, "grad_norm": 0.49496662616729736, "learning_rate": 6.1626070345760595e-06, "loss": 0.4222, "step": 28555 }, { "epoch": 1.3104492680464412, "grad_norm": 0.4268829822540283, "learning_rate": 6.16236856555823e-06, "loss": 0.3263, "step": 28556 }, { "epoch": 1.3104951585516957, "grad_norm": 0.5098237991333008, "learning_rate": 6.1621300937452726e-06, "loss": 0.3907, "step": 28557 }, { "epoch": 1.3105410490569502, "grad_norm": 0.44447413086891174, "learning_rate": 6.1618916191377585e-06, "loss": 0.3339, "step": 28558 }, { "epoch": 1.3105869395622045, "grad_norm": 0.5054704546928406, "learning_rate": 6.161653141736264e-06, "loss": 0.4602, "step": 28559 }, { "epoch": 1.310632830067459, "grad_norm": 0.461489737033844, "learning_rate": 6.161414661541358e-06, "loss": 0.3764, "step": 28560 }, { "epoch": 1.3106787205727135, "grad_norm": 0.48149654269218445, "learning_rate": 6.161176178553616e-06, "loss": 0.3654, "step": 28561 }, { "epoch": 1.310724611077968, "grad_norm": 0.44392064213752747, "learning_rate": 6.160937692773612e-06, "loss": 0.3485, "step": 28562 }, { "epoch": 1.3107705015832225, "grad_norm": 0.4637071490287781, "learning_rate": 6.160699204201921e-06, "loss": 0.3375, "step": 28563 }, { "epoch": 1.3108163920884768, "grad_norm": 0.46568265557289124, "learning_rate": 6.160460712839113e-06, "loss": 0.319, "step": 28564 }, { "epoch": 1.3108622825937313, "grad_norm": 0.45059555768966675, "learning_rate": 6.160222218685763e-06, "loss": 0.3324, "step": 28565 }, { "epoch": 1.3109081730989858, "grad_norm": 0.47354385256767273, "learning_rate": 6.1599837217424464e-06, "loss": 0.3738, "step": 28566 }, { "epoch": 1.3109540636042403, "grad_norm": 0.4755570590496063, "learning_rate": 6.159745222009733e-06, "loss": 0.3454, "step": 28567 }, { "epoch": 1.3109999541094948, "grad_norm": 0.4850596785545349, "learning_rate": 6.1595067194882e-06, "loss": 0.4403, "step": 28568 }, { "epoch": 1.3110458446147493, "grad_norm": 0.49915286898612976, "learning_rate": 6.159268214178419e-06, "loss": 0.4438, "step": 28569 }, { "epoch": 1.3110917351200038, "grad_norm": 0.48258668184280396, "learning_rate": 6.1590297060809634e-06, "loss": 0.3903, "step": 28570 }, { "epoch": 1.3111376256252583, "grad_norm": 0.44674497842788696, "learning_rate": 6.158791195196407e-06, "loss": 0.3466, "step": 28571 }, { "epoch": 1.3111835161305125, "grad_norm": 0.47380977869033813, "learning_rate": 6.158552681525325e-06, "loss": 0.3686, "step": 28572 }, { "epoch": 1.311229406635767, "grad_norm": 0.44130846858024597, "learning_rate": 6.158314165068287e-06, "loss": 0.3436, "step": 28573 }, { "epoch": 1.3112752971410215, "grad_norm": 0.4399886727333069, "learning_rate": 6.15807564582587e-06, "loss": 0.3195, "step": 28574 }, { "epoch": 1.311321187646276, "grad_norm": 0.47643277049064636, "learning_rate": 6.157837123798648e-06, "loss": 0.3699, "step": 28575 }, { "epoch": 1.3113670781515305, "grad_norm": 0.4628371596336365, "learning_rate": 6.157598598987193e-06, "loss": 0.3854, "step": 28576 }, { "epoch": 1.3114129686567848, "grad_norm": 0.46979060769081116, "learning_rate": 6.1573600713920765e-06, "loss": 0.3697, "step": 28577 }, { "epoch": 1.3114588591620393, "grad_norm": 0.43809646368026733, "learning_rate": 6.157121541013875e-06, "loss": 0.2975, "step": 28578 }, { "epoch": 1.3115047496672938, "grad_norm": 0.48673689365386963, "learning_rate": 6.156883007853161e-06, "loss": 0.3443, "step": 28579 }, { "epoch": 1.3115506401725483, "grad_norm": 0.4550205171108246, "learning_rate": 6.156644471910511e-06, "loss": 0.3474, "step": 28580 }, { "epoch": 1.3115965306778028, "grad_norm": 0.46326082944869995, "learning_rate": 6.156405933186494e-06, "loss": 0.3179, "step": 28581 }, { "epoch": 1.3116424211830573, "grad_norm": 0.4759811460971832, "learning_rate": 6.156167391681687e-06, "loss": 0.3256, "step": 28582 }, { "epoch": 1.3116883116883118, "grad_norm": 0.47733089327812195, "learning_rate": 6.155928847396661e-06, "loss": 0.3304, "step": 28583 }, { "epoch": 1.3117342021935663, "grad_norm": 0.4848475158214569, "learning_rate": 6.155690300331991e-06, "loss": 0.3442, "step": 28584 }, { "epoch": 1.3117800926988206, "grad_norm": 0.4695901572704315, "learning_rate": 6.15545175048825e-06, "loss": 0.3261, "step": 28585 }, { "epoch": 1.311825983204075, "grad_norm": 0.4677128493785858, "learning_rate": 6.155213197866014e-06, "loss": 0.3208, "step": 28586 }, { "epoch": 1.3118718737093296, "grad_norm": 0.478785902261734, "learning_rate": 6.154974642465855e-06, "loss": 0.3797, "step": 28587 }, { "epoch": 1.311917764214584, "grad_norm": 0.43242666125297546, "learning_rate": 6.154736084288345e-06, "loss": 0.3033, "step": 28588 }, { "epoch": 1.3119636547198383, "grad_norm": 0.5031163692474365, "learning_rate": 6.154497523334061e-06, "loss": 0.3918, "step": 28589 }, { "epoch": 1.3120095452250928, "grad_norm": 0.4640864431858063, "learning_rate": 6.154258959603573e-06, "loss": 0.3663, "step": 28590 }, { "epoch": 1.3120554357303473, "grad_norm": 0.4770459234714508, "learning_rate": 6.1540203930974575e-06, "loss": 0.3563, "step": 28591 }, { "epoch": 1.3121013262356018, "grad_norm": 0.48488062620162964, "learning_rate": 6.1537818238162874e-06, "loss": 0.418, "step": 28592 }, { "epoch": 1.3121472167408563, "grad_norm": 0.4416406452655792, "learning_rate": 6.153543251760634e-06, "loss": 0.3619, "step": 28593 }, { "epoch": 1.3121931072461108, "grad_norm": 0.505152702331543, "learning_rate": 6.153304676931075e-06, "loss": 0.436, "step": 28594 }, { "epoch": 1.3122389977513653, "grad_norm": 0.4532804489135742, "learning_rate": 6.1530660993281824e-06, "loss": 0.33, "step": 28595 }, { "epoch": 1.3122848882566198, "grad_norm": 0.4559321701526642, "learning_rate": 6.152827518952529e-06, "loss": 0.339, "step": 28596 }, { "epoch": 1.312330778761874, "grad_norm": 0.47121185064315796, "learning_rate": 6.15258893580469e-06, "loss": 0.3846, "step": 28597 }, { "epoch": 1.3123766692671286, "grad_norm": 0.4465380907058716, "learning_rate": 6.1523503498852375e-06, "loss": 0.3448, "step": 28598 }, { "epoch": 1.312422559772383, "grad_norm": 0.43431711196899414, "learning_rate": 6.152111761194747e-06, "loss": 0.2726, "step": 28599 }, { "epoch": 1.3124684502776376, "grad_norm": 0.470115065574646, "learning_rate": 6.1518731697337905e-06, "loss": 0.3722, "step": 28600 }, { "epoch": 1.312514340782892, "grad_norm": 0.4716876447200775, "learning_rate": 6.151634575502943e-06, "loss": 0.3637, "step": 28601 }, { "epoch": 1.3125602312881464, "grad_norm": 0.4862874150276184, "learning_rate": 6.1513959785027764e-06, "loss": 0.3013, "step": 28602 }, { "epoch": 1.3126061217934009, "grad_norm": 0.42651960253715515, "learning_rate": 6.15115737873387e-06, "loss": 0.3022, "step": 28603 }, { "epoch": 1.3126520122986554, "grad_norm": 0.46749043464660645, "learning_rate": 6.150918776196789e-06, "loss": 0.3815, "step": 28604 }, { "epoch": 1.3126979028039099, "grad_norm": 0.46974918246269226, "learning_rate": 6.150680170892113e-06, "loss": 0.3741, "step": 28605 }, { "epoch": 1.3127437933091644, "grad_norm": 0.49506789445877075, "learning_rate": 6.150441562820415e-06, "loss": 0.3798, "step": 28606 }, { "epoch": 1.3127896838144189, "grad_norm": 0.4572712481021881, "learning_rate": 6.150202951982267e-06, "loss": 0.3512, "step": 28607 }, { "epoch": 1.3128355743196733, "grad_norm": 0.4933372437953949, "learning_rate": 6.149964338378245e-06, "loss": 0.3827, "step": 28608 }, { "epoch": 1.3128814648249278, "grad_norm": 0.42919716238975525, "learning_rate": 6.149725722008922e-06, "loss": 0.3089, "step": 28609 }, { "epoch": 1.3129273553301821, "grad_norm": 0.4745995104312897, "learning_rate": 6.14948710287487e-06, "loss": 0.3309, "step": 28610 }, { "epoch": 1.3129732458354366, "grad_norm": 0.5293872952461243, "learning_rate": 6.149248480976666e-06, "loss": 0.4771, "step": 28611 }, { "epoch": 1.3130191363406911, "grad_norm": 0.4576182961463928, "learning_rate": 6.14900985631488e-06, "loss": 0.3734, "step": 28612 }, { "epoch": 1.3130650268459456, "grad_norm": 0.4510018229484558, "learning_rate": 6.148771228890089e-06, "loss": 0.3272, "step": 28613 }, { "epoch": 1.3131109173512001, "grad_norm": 0.4822823405265808, "learning_rate": 6.1485325987028675e-06, "loss": 0.4001, "step": 28614 }, { "epoch": 1.3131568078564544, "grad_norm": 0.479226291179657, "learning_rate": 6.148293965753786e-06, "loss": 0.3757, "step": 28615 }, { "epoch": 1.3132026983617089, "grad_norm": 0.47885024547576904, "learning_rate": 6.148055330043419e-06, "loss": 0.422, "step": 28616 }, { "epoch": 1.3132485888669634, "grad_norm": 0.5850602984428406, "learning_rate": 6.147816691572342e-06, "loss": 0.3542, "step": 28617 }, { "epoch": 1.3132944793722179, "grad_norm": 0.5081523656845093, "learning_rate": 6.147578050341129e-06, "loss": 0.4153, "step": 28618 }, { "epoch": 1.3133403698774724, "grad_norm": 0.4890824258327484, "learning_rate": 6.147339406350352e-06, "loss": 0.3518, "step": 28619 }, { "epoch": 1.3133862603827269, "grad_norm": 0.5079331994056702, "learning_rate": 6.147100759600587e-06, "loss": 0.3607, "step": 28620 }, { "epoch": 1.3134321508879814, "grad_norm": 0.47907009720802307, "learning_rate": 6.146862110092405e-06, "loss": 0.4094, "step": 28621 }, { "epoch": 1.3134780413932359, "grad_norm": 0.4969707727432251, "learning_rate": 6.146623457826382e-06, "loss": 0.3658, "step": 28622 }, { "epoch": 1.3135239318984901, "grad_norm": 0.4608663320541382, "learning_rate": 6.146384802803093e-06, "loss": 0.3578, "step": 28623 }, { "epoch": 1.3135698224037446, "grad_norm": 0.455472469329834, "learning_rate": 6.146146145023108e-06, "loss": 0.3626, "step": 28624 }, { "epoch": 1.3136157129089991, "grad_norm": 0.4790176749229431, "learning_rate": 6.145907484487006e-06, "loss": 0.3742, "step": 28625 }, { "epoch": 1.3136616034142536, "grad_norm": 0.4668760597705841, "learning_rate": 6.145668821195356e-06, "loss": 0.3469, "step": 28626 }, { "epoch": 1.313707493919508, "grad_norm": 0.48299357295036316, "learning_rate": 6.145430155148735e-06, "loss": 0.3808, "step": 28627 }, { "epoch": 1.3137533844247624, "grad_norm": 0.4882251024246216, "learning_rate": 6.145191486347715e-06, "loss": 0.388, "step": 28628 }, { "epoch": 1.313799274930017, "grad_norm": 0.4279222786426544, "learning_rate": 6.144952814792872e-06, "loss": 0.2825, "step": 28629 }, { "epoch": 1.3138451654352714, "grad_norm": 0.5230405926704407, "learning_rate": 6.144714140484779e-06, "loss": 0.3894, "step": 28630 }, { "epoch": 1.313891055940526, "grad_norm": 0.4978478252887726, "learning_rate": 6.144475463424009e-06, "loss": 0.3642, "step": 28631 }, { "epoch": 1.3139369464457804, "grad_norm": 0.47039347887039185, "learning_rate": 6.144236783611137e-06, "loss": 0.3923, "step": 28632 }, { "epoch": 1.313982836951035, "grad_norm": 0.4302489757537842, "learning_rate": 6.143998101046736e-06, "loss": 0.2861, "step": 28633 }, { "epoch": 1.3140287274562894, "grad_norm": 0.46030721068382263, "learning_rate": 6.143759415731382e-06, "loss": 0.3553, "step": 28634 }, { "epoch": 1.3140746179615437, "grad_norm": 0.44281432032585144, "learning_rate": 6.143520727665646e-06, "loss": 0.2706, "step": 28635 }, { "epoch": 1.3141205084667982, "grad_norm": 0.5107075572013855, "learning_rate": 6.143282036850105e-06, "loss": 0.4007, "step": 28636 }, { "epoch": 1.3141663989720527, "grad_norm": 0.46805277466773987, "learning_rate": 6.143043343285331e-06, "loss": 0.4281, "step": 28637 }, { "epoch": 1.3142122894773072, "grad_norm": 0.48218128085136414, "learning_rate": 6.142804646971898e-06, "loss": 0.3595, "step": 28638 }, { "epoch": 1.3142581799825617, "grad_norm": 0.5096114873886108, "learning_rate": 6.1425659479103805e-06, "loss": 0.4215, "step": 28639 }, { "epoch": 1.314304070487816, "grad_norm": 0.4957711398601532, "learning_rate": 6.142327246101354e-06, "loss": 0.4389, "step": 28640 }, { "epoch": 1.3143499609930704, "grad_norm": 0.45786359906196594, "learning_rate": 6.14208854154539e-06, "loss": 0.3458, "step": 28641 }, { "epoch": 1.314395851498325, "grad_norm": 0.4453641176223755, "learning_rate": 6.141849834243063e-06, "loss": 0.3449, "step": 28642 }, { "epoch": 1.3144417420035794, "grad_norm": 0.5161198973655701, "learning_rate": 6.141611124194948e-06, "loss": 0.3953, "step": 28643 }, { "epoch": 1.314487632508834, "grad_norm": 0.4899829626083374, "learning_rate": 6.141372411401618e-06, "loss": 0.3986, "step": 28644 }, { "epoch": 1.3145335230140884, "grad_norm": 0.4501853287220001, "learning_rate": 6.1411336958636495e-06, "loss": 0.3136, "step": 28645 }, { "epoch": 1.314579413519343, "grad_norm": 0.47929906845092773, "learning_rate": 6.140894977581612e-06, "loss": 0.3416, "step": 28646 }, { "epoch": 1.3146253040245974, "grad_norm": 0.46178215742111206, "learning_rate": 6.140656256556083e-06, "loss": 0.3903, "step": 28647 }, { "epoch": 1.3146711945298517, "grad_norm": 0.4913558065891266, "learning_rate": 6.140417532787637e-06, "loss": 0.3957, "step": 28648 }, { "epoch": 1.3147170850351062, "grad_norm": 0.45453307032585144, "learning_rate": 6.140178806276846e-06, "loss": 0.3552, "step": 28649 }, { "epoch": 1.3147629755403607, "grad_norm": 0.4590317904949188, "learning_rate": 6.1399400770242835e-06, "loss": 0.3189, "step": 28650 }, { "epoch": 1.3148088660456152, "grad_norm": 0.47262808680534363, "learning_rate": 6.139701345030526e-06, "loss": 0.3828, "step": 28651 }, { "epoch": 1.3148547565508697, "grad_norm": 0.46079179644584656, "learning_rate": 6.139462610296147e-06, "loss": 0.3459, "step": 28652 }, { "epoch": 1.314900647056124, "grad_norm": 0.48321473598480225, "learning_rate": 6.13922387282172e-06, "loss": 0.3425, "step": 28653 }, { "epoch": 1.3149465375613785, "grad_norm": 0.41569674015045166, "learning_rate": 6.138985132607819e-06, "loss": 0.2977, "step": 28654 }, { "epoch": 1.314992428066633, "grad_norm": 0.4783664047718048, "learning_rate": 6.138746389655017e-06, "loss": 0.3426, "step": 28655 }, { "epoch": 1.3150383185718875, "grad_norm": 0.5073363184928894, "learning_rate": 6.138507643963891e-06, "loss": 0.4288, "step": 28656 }, { "epoch": 1.315084209077142, "grad_norm": 0.5021705627441406, "learning_rate": 6.138268895535012e-06, "loss": 0.3827, "step": 28657 }, { "epoch": 1.3151300995823965, "grad_norm": 0.4829600155353546, "learning_rate": 6.138030144368957e-06, "loss": 0.4518, "step": 28658 }, { "epoch": 1.315175990087651, "grad_norm": 0.44453486800193787, "learning_rate": 6.137791390466299e-06, "loss": 0.316, "step": 28659 }, { "epoch": 1.3152218805929055, "grad_norm": 0.43608519434928894, "learning_rate": 6.1375526338276096e-06, "loss": 0.3149, "step": 28660 }, { "epoch": 1.3152677710981597, "grad_norm": 0.48824161291122437, "learning_rate": 6.137313874453466e-06, "loss": 0.3727, "step": 28661 }, { "epoch": 1.3153136616034142, "grad_norm": 0.46130818128585815, "learning_rate": 6.137075112344441e-06, "loss": 0.381, "step": 28662 }, { "epoch": 1.3153595521086687, "grad_norm": 0.4756026566028595, "learning_rate": 6.1368363475011116e-06, "loss": 0.3186, "step": 28663 }, { "epoch": 1.3154054426139232, "grad_norm": 0.506165087223053, "learning_rate": 6.136597579924047e-06, "loss": 0.3983, "step": 28664 }, { "epoch": 1.3154513331191777, "grad_norm": 0.4536078870296478, "learning_rate": 6.136358809613826e-06, "loss": 0.3175, "step": 28665 }, { "epoch": 1.315497223624432, "grad_norm": 0.43774083256721497, "learning_rate": 6.136120036571019e-06, "loss": 0.3168, "step": 28666 }, { "epoch": 1.3155431141296865, "grad_norm": 0.4618346691131592, "learning_rate": 6.135881260796202e-06, "loss": 0.3758, "step": 28667 }, { "epoch": 1.315589004634941, "grad_norm": 0.4773160219192505, "learning_rate": 6.1356424822899494e-06, "loss": 0.354, "step": 28668 }, { "epoch": 1.3156348951401955, "grad_norm": 0.4365794062614441, "learning_rate": 6.135403701052836e-06, "loss": 0.3176, "step": 28669 }, { "epoch": 1.31568078564545, "grad_norm": 0.5028301477432251, "learning_rate": 6.135164917085433e-06, "loss": 0.4069, "step": 28670 }, { "epoch": 1.3157266761507045, "grad_norm": 0.47298485040664673, "learning_rate": 6.134926130388319e-06, "loss": 0.3439, "step": 28671 }, { "epoch": 1.315772566655959, "grad_norm": 0.47862347960472107, "learning_rate": 6.134687340962064e-06, "loss": 0.375, "step": 28672 }, { "epoch": 1.3158184571612133, "grad_norm": 0.4909436106681824, "learning_rate": 6.134448548807246e-06, "loss": 0.3979, "step": 28673 }, { "epoch": 1.3158643476664678, "grad_norm": 0.47980645298957825, "learning_rate": 6.134209753924436e-06, "loss": 0.3455, "step": 28674 }, { "epoch": 1.3159102381717223, "grad_norm": 0.4563957452774048, "learning_rate": 6.133970956314209e-06, "loss": 0.3092, "step": 28675 }, { "epoch": 1.3159561286769768, "grad_norm": 0.46782323718070984, "learning_rate": 6.1337321559771415e-06, "loss": 0.3481, "step": 28676 }, { "epoch": 1.3160020191822313, "grad_norm": 0.4639793038368225, "learning_rate": 6.133493352913804e-06, "loss": 0.3211, "step": 28677 }, { "epoch": 1.3160479096874855, "grad_norm": 0.45938003063201904, "learning_rate": 6.133254547124773e-06, "loss": 0.3195, "step": 28678 }, { "epoch": 1.31609380019274, "grad_norm": 0.4461861252784729, "learning_rate": 6.133015738610622e-06, "loss": 0.3176, "step": 28679 }, { "epoch": 1.3161396906979945, "grad_norm": 0.4675120711326599, "learning_rate": 6.132776927371929e-06, "loss": 0.3625, "step": 28680 }, { "epoch": 1.316185581203249, "grad_norm": 0.4619564414024353, "learning_rate": 6.132538113409261e-06, "loss": 0.3193, "step": 28681 }, { "epoch": 1.3162314717085035, "grad_norm": 0.46000364422798157, "learning_rate": 6.132299296723199e-06, "loss": 0.3481, "step": 28682 }, { "epoch": 1.316277362213758, "grad_norm": 0.4760548174381256, "learning_rate": 6.132060477314313e-06, "loss": 0.3845, "step": 28683 }, { "epoch": 1.3163232527190125, "grad_norm": 0.47714975476264954, "learning_rate": 6.13182165518318e-06, "loss": 0.4089, "step": 28684 }, { "epoch": 1.316369143224267, "grad_norm": 0.514667809009552, "learning_rate": 6.131582830330373e-06, "loss": 0.4341, "step": 28685 }, { "epoch": 1.3164150337295213, "grad_norm": 0.47793710231781006, "learning_rate": 6.131344002756465e-06, "loss": 0.3515, "step": 28686 }, { "epoch": 1.3164609242347758, "grad_norm": 0.4991830587387085, "learning_rate": 6.131105172462033e-06, "loss": 0.367, "step": 28687 }, { "epoch": 1.3165068147400303, "grad_norm": 0.4995472729206085, "learning_rate": 6.13086633944765e-06, "loss": 0.4261, "step": 28688 }, { "epoch": 1.3165527052452848, "grad_norm": 0.4544256329536438, "learning_rate": 6.130627503713889e-06, "loss": 0.3499, "step": 28689 }, { "epoch": 1.3165985957505393, "grad_norm": 0.4764747619628906, "learning_rate": 6.130388665261328e-06, "loss": 0.3816, "step": 28690 }, { "epoch": 1.3166444862557936, "grad_norm": 0.481368750333786, "learning_rate": 6.130149824090538e-06, "loss": 0.3675, "step": 28691 }, { "epoch": 1.316690376761048, "grad_norm": 0.48215124011039734, "learning_rate": 6.1299109802020936e-06, "loss": 0.3744, "step": 28692 }, { "epoch": 1.3167362672663026, "grad_norm": 0.45092716813087463, "learning_rate": 6.129672133596572e-06, "loss": 0.3616, "step": 28693 }, { "epoch": 1.316782157771557, "grad_norm": 0.5013589859008789, "learning_rate": 6.129433284274543e-06, "loss": 0.3956, "step": 28694 }, { "epoch": 1.3168280482768115, "grad_norm": 0.4437345862388611, "learning_rate": 6.129194432236585e-06, "loss": 0.3213, "step": 28695 }, { "epoch": 1.316873938782066, "grad_norm": 0.5237861275672913, "learning_rate": 6.12895557748327e-06, "loss": 0.4029, "step": 28696 }, { "epoch": 1.3169198292873205, "grad_norm": 0.44656768441200256, "learning_rate": 6.128716720015173e-06, "loss": 0.3255, "step": 28697 }, { "epoch": 1.316965719792575, "grad_norm": 0.46499359607696533, "learning_rate": 6.128477859832869e-06, "loss": 0.3676, "step": 28698 }, { "epoch": 1.3170116102978293, "grad_norm": 0.46420058608055115, "learning_rate": 6.1282389969369316e-06, "loss": 0.3626, "step": 28699 }, { "epoch": 1.3170575008030838, "grad_norm": 0.46368592977523804, "learning_rate": 6.128000131327935e-06, "loss": 0.3572, "step": 28700 }, { "epoch": 1.3171033913083383, "grad_norm": 0.4811917841434479, "learning_rate": 6.127761263006455e-06, "loss": 0.3889, "step": 28701 }, { "epoch": 1.3171492818135928, "grad_norm": 0.45587828755378723, "learning_rate": 6.127522391973065e-06, "loss": 0.3427, "step": 28702 }, { "epoch": 1.3171951723188473, "grad_norm": 0.48763564229011536, "learning_rate": 6.12728351822834e-06, "loss": 0.3771, "step": 28703 }, { "epoch": 1.3172410628241016, "grad_norm": 0.4185521900653839, "learning_rate": 6.127044641772851e-06, "loss": 0.3012, "step": 28704 }, { "epoch": 1.317286953329356, "grad_norm": 0.4987112879753113, "learning_rate": 6.1268057626071785e-06, "loss": 0.4733, "step": 28705 }, { "epoch": 1.3173328438346106, "grad_norm": 0.48191723227500916, "learning_rate": 6.126566880731893e-06, "loss": 0.42, "step": 28706 }, { "epoch": 1.317378734339865, "grad_norm": 0.5103091597557068, "learning_rate": 6.1263279961475705e-06, "loss": 0.4178, "step": 28707 }, { "epoch": 1.3174246248451196, "grad_norm": 0.45616957545280457, "learning_rate": 6.126089108854784e-06, "loss": 0.3446, "step": 28708 }, { "epoch": 1.317470515350374, "grad_norm": 0.4339410662651062, "learning_rate": 6.125850218854107e-06, "loss": 0.3436, "step": 28709 }, { "epoch": 1.3175164058556286, "grad_norm": 0.4546510577201843, "learning_rate": 6.125611326146117e-06, "loss": 0.3351, "step": 28710 }, { "epoch": 1.317562296360883, "grad_norm": 0.4705520272254944, "learning_rate": 6.125372430731387e-06, "loss": 0.3167, "step": 28711 }, { "epoch": 1.3176081868661373, "grad_norm": 0.44667354226112366, "learning_rate": 6.125133532610492e-06, "loss": 0.3184, "step": 28712 }, { "epoch": 1.3176540773713918, "grad_norm": 0.482857882976532, "learning_rate": 6.124894631784005e-06, "loss": 0.348, "step": 28713 }, { "epoch": 1.3176999678766463, "grad_norm": 0.45483601093292236, "learning_rate": 6.124655728252502e-06, "loss": 0.2971, "step": 28714 }, { "epoch": 1.3177458583819008, "grad_norm": 0.4621037542819977, "learning_rate": 6.124416822016556e-06, "loss": 0.3932, "step": 28715 }, { "epoch": 1.3177917488871551, "grad_norm": 0.49478116631507874, "learning_rate": 6.124177913076743e-06, "loss": 0.3786, "step": 28716 }, { "epoch": 1.3178376393924096, "grad_norm": 0.45734739303588867, "learning_rate": 6.123939001433637e-06, "loss": 0.3459, "step": 28717 }, { "epoch": 1.317883529897664, "grad_norm": 0.5198931097984314, "learning_rate": 6.123700087087813e-06, "loss": 0.4695, "step": 28718 }, { "epoch": 1.3179294204029186, "grad_norm": 0.5072784423828125, "learning_rate": 6.123461170039844e-06, "loss": 0.4385, "step": 28719 }, { "epoch": 1.317975310908173, "grad_norm": 0.4468470513820648, "learning_rate": 6.123222250290306e-06, "loss": 0.3, "step": 28720 }, { "epoch": 1.3180212014134276, "grad_norm": 0.47366008162498474, "learning_rate": 6.122983327839772e-06, "loss": 0.3349, "step": 28721 }, { "epoch": 1.318067091918682, "grad_norm": 0.4719436764717102, "learning_rate": 6.122744402688818e-06, "loss": 0.4105, "step": 28722 }, { "epoch": 1.3181129824239366, "grad_norm": 0.4298560917377472, "learning_rate": 6.12250547483802e-06, "loss": 0.2977, "step": 28723 }, { "epoch": 1.3181588729291909, "grad_norm": 0.43682661652565, "learning_rate": 6.122266544287949e-06, "loss": 0.3409, "step": 28724 }, { "epoch": 1.3182047634344454, "grad_norm": 0.46767759323120117, "learning_rate": 6.122027611039181e-06, "loss": 0.3625, "step": 28725 }, { "epoch": 1.3182506539396999, "grad_norm": 0.4883808493614197, "learning_rate": 6.12178867509229e-06, "loss": 0.3525, "step": 28726 }, { "epoch": 1.3182965444449544, "grad_norm": 0.4817774295806885, "learning_rate": 6.121549736447852e-06, "loss": 0.4221, "step": 28727 }, { "epoch": 1.3183424349502089, "grad_norm": 0.5234060883522034, "learning_rate": 6.121310795106442e-06, "loss": 0.4346, "step": 28728 }, { "epoch": 1.3183883254554631, "grad_norm": 0.45140326023101807, "learning_rate": 6.121071851068632e-06, "loss": 0.3188, "step": 28729 }, { "epoch": 1.3184342159607176, "grad_norm": 0.46804988384246826, "learning_rate": 6.120832904334999e-06, "loss": 0.3647, "step": 28730 }, { "epoch": 1.3184801064659721, "grad_norm": 0.5010818839073181, "learning_rate": 6.120593954906115e-06, "loss": 0.4082, "step": 28731 }, { "epoch": 1.3185259969712266, "grad_norm": 0.5314507484436035, "learning_rate": 6.1203550027825576e-06, "loss": 0.3899, "step": 28732 }, { "epoch": 1.3185718874764811, "grad_norm": 0.4837670624256134, "learning_rate": 6.1201160479649005e-06, "loss": 0.3857, "step": 28733 }, { "epoch": 1.3186177779817356, "grad_norm": 0.4791371524333954, "learning_rate": 6.119877090453718e-06, "loss": 0.3845, "step": 28734 }, { "epoch": 1.3186636684869901, "grad_norm": 0.42712026834487915, "learning_rate": 6.119638130249582e-06, "loss": 0.2916, "step": 28735 }, { "epoch": 1.3187095589922446, "grad_norm": 0.4438757300376892, "learning_rate": 6.1193991673530716e-06, "loss": 0.3434, "step": 28736 }, { "epoch": 1.318755449497499, "grad_norm": 0.45712098479270935, "learning_rate": 6.119160201764759e-06, "loss": 0.3206, "step": 28737 }, { "epoch": 1.3188013400027534, "grad_norm": 0.5130568146705627, "learning_rate": 6.11892123348522e-06, "loss": 0.439, "step": 28738 }, { "epoch": 1.318847230508008, "grad_norm": 0.44837361574172974, "learning_rate": 6.118682262515028e-06, "loss": 0.3406, "step": 28739 }, { "epoch": 1.3188931210132624, "grad_norm": 0.4731118977069855, "learning_rate": 6.118443288854759e-06, "loss": 0.3967, "step": 28740 }, { "epoch": 1.318939011518517, "grad_norm": 0.46926721930503845, "learning_rate": 6.118204312504985e-06, "loss": 0.3784, "step": 28741 }, { "epoch": 1.3189849020237712, "grad_norm": 0.47100934386253357, "learning_rate": 6.117965333466283e-06, "loss": 0.3429, "step": 28742 }, { "epoch": 1.3190307925290257, "grad_norm": 0.4595562517642975, "learning_rate": 6.117726351739228e-06, "loss": 0.3985, "step": 28743 }, { "epoch": 1.3190766830342802, "grad_norm": 0.4745666980743408, "learning_rate": 6.117487367324394e-06, "loss": 0.3658, "step": 28744 }, { "epoch": 1.3191225735395347, "grad_norm": 0.4708336889743805, "learning_rate": 6.117248380222356e-06, "loss": 0.3359, "step": 28745 }, { "epoch": 1.3191684640447892, "grad_norm": 0.4649316370487213, "learning_rate": 6.117009390433687e-06, "loss": 0.3601, "step": 28746 }, { "epoch": 1.3192143545500437, "grad_norm": 0.49175596237182617, "learning_rate": 6.116770397958965e-06, "loss": 0.3862, "step": 28747 }, { "epoch": 1.3192602450552982, "grad_norm": 0.4427841901779175, "learning_rate": 6.11653140279876e-06, "loss": 0.3127, "step": 28748 }, { "epoch": 1.3193061355605527, "grad_norm": 0.3980049788951874, "learning_rate": 6.116292404953651e-06, "loss": 0.2641, "step": 28749 }, { "epoch": 1.319352026065807, "grad_norm": 0.523370623588562, "learning_rate": 6.116053404424212e-06, "loss": 0.4591, "step": 28750 }, { "epoch": 1.3193979165710614, "grad_norm": 0.47151267528533936, "learning_rate": 6.1158144012110145e-06, "loss": 0.3701, "step": 28751 }, { "epoch": 1.319443807076316, "grad_norm": 0.49633678793907166, "learning_rate": 6.115575395314636e-06, "loss": 0.3905, "step": 28752 }, { "epoch": 1.3194896975815704, "grad_norm": 0.48400455713272095, "learning_rate": 6.115336386735651e-06, "loss": 0.3985, "step": 28753 }, { "epoch": 1.319535588086825, "grad_norm": 0.48351743817329407, "learning_rate": 6.115097375474634e-06, "loss": 0.3817, "step": 28754 }, { "epoch": 1.3195814785920792, "grad_norm": 0.4967452883720398, "learning_rate": 6.11485836153216e-06, "loss": 0.3872, "step": 28755 }, { "epoch": 1.3196273690973337, "grad_norm": 0.46331241726875305, "learning_rate": 6.114619344908804e-06, "loss": 0.375, "step": 28756 }, { "epoch": 1.3196732596025882, "grad_norm": 0.5013239979743958, "learning_rate": 6.114380325605139e-06, "loss": 0.4031, "step": 28757 }, { "epoch": 1.3197191501078427, "grad_norm": 0.45188966393470764, "learning_rate": 6.114141303621743e-06, "loss": 0.3618, "step": 28758 }, { "epoch": 1.3197650406130972, "grad_norm": 0.52292799949646, "learning_rate": 6.113902278959187e-06, "loss": 0.4792, "step": 28759 }, { "epoch": 1.3198109311183517, "grad_norm": 0.45787322521209717, "learning_rate": 6.113663251618048e-06, "loss": 0.3082, "step": 28760 }, { "epoch": 1.3198568216236062, "grad_norm": 0.5172033309936523, "learning_rate": 6.113424221598902e-06, "loss": 0.4238, "step": 28761 }, { "epoch": 1.3199027121288605, "grad_norm": 0.47714874148368835, "learning_rate": 6.11318518890232e-06, "loss": 0.3141, "step": 28762 }, { "epoch": 1.319948602634115, "grad_norm": 0.49230214953422546, "learning_rate": 6.1129461535288785e-06, "loss": 0.3783, "step": 28763 }, { "epoch": 1.3199944931393695, "grad_norm": 0.49386778473854065, "learning_rate": 6.112707115479155e-06, "loss": 0.3934, "step": 28764 }, { "epoch": 1.320040383644624, "grad_norm": 0.4632430076599121, "learning_rate": 6.11246807475372e-06, "loss": 0.3342, "step": 28765 }, { "epoch": 1.3200862741498784, "grad_norm": 0.48206397891044617, "learning_rate": 6.112229031353151e-06, "loss": 0.3254, "step": 28766 }, { "epoch": 1.3201321646551327, "grad_norm": 0.43939170241355896, "learning_rate": 6.111989985278024e-06, "loss": 0.3064, "step": 28767 }, { "epoch": 1.3201780551603872, "grad_norm": 0.4627799093723297, "learning_rate": 6.11175093652891e-06, "loss": 0.325, "step": 28768 }, { "epoch": 1.3202239456656417, "grad_norm": 0.5178590416908264, "learning_rate": 6.111511885106386e-06, "loss": 0.3238, "step": 28769 }, { "epoch": 1.3202698361708962, "grad_norm": 0.4612278640270233, "learning_rate": 6.111272831011027e-06, "loss": 0.348, "step": 28770 }, { "epoch": 1.3203157266761507, "grad_norm": 0.448106586933136, "learning_rate": 6.111033774243409e-06, "loss": 0.3062, "step": 28771 }, { "epoch": 1.3203616171814052, "grad_norm": 0.47459936141967773, "learning_rate": 6.1107947148041046e-06, "loss": 0.3743, "step": 28772 }, { "epoch": 1.3204075076866597, "grad_norm": 0.4853750467300415, "learning_rate": 6.110555652693688e-06, "loss": 0.3799, "step": 28773 }, { "epoch": 1.3204533981919142, "grad_norm": 0.5146495699882507, "learning_rate": 6.1103165879127355e-06, "loss": 0.4869, "step": 28774 }, { "epoch": 1.3204992886971685, "grad_norm": 0.4930976927280426, "learning_rate": 6.110077520461824e-06, "loss": 0.2907, "step": 28775 }, { "epoch": 1.320545179202423, "grad_norm": 0.49036678671836853, "learning_rate": 6.109838450341525e-06, "loss": 0.4013, "step": 28776 }, { "epoch": 1.3205910697076775, "grad_norm": 0.47645196318626404, "learning_rate": 6.109599377552417e-06, "loss": 0.3734, "step": 28777 }, { "epoch": 1.320636960212932, "grad_norm": 0.45227330923080444, "learning_rate": 6.109360302095071e-06, "loss": 0.3111, "step": 28778 }, { "epoch": 1.3206828507181865, "grad_norm": 0.5247656106948853, "learning_rate": 6.109121223970063e-06, "loss": 0.4283, "step": 28779 }, { "epoch": 1.3207287412234407, "grad_norm": 0.40907084941864014, "learning_rate": 6.108882143177969e-06, "loss": 0.2603, "step": 28780 }, { "epoch": 1.3207746317286952, "grad_norm": 0.4714578688144684, "learning_rate": 6.108643059719364e-06, "loss": 0.3833, "step": 28781 }, { "epoch": 1.3208205222339497, "grad_norm": 0.45785200595855713, "learning_rate": 6.108403973594822e-06, "loss": 0.341, "step": 28782 }, { "epoch": 1.3208664127392042, "grad_norm": 0.47247743606567383, "learning_rate": 6.108164884804918e-06, "loss": 0.3735, "step": 28783 }, { "epoch": 1.3209123032444587, "grad_norm": 0.4861489534378052, "learning_rate": 6.107925793350228e-06, "loss": 0.3951, "step": 28784 }, { "epoch": 1.3209581937497132, "grad_norm": 0.4769836664199829, "learning_rate": 6.107686699231325e-06, "loss": 0.3886, "step": 28785 }, { "epoch": 1.3210040842549677, "grad_norm": 0.4841054677963257, "learning_rate": 6.107447602448784e-06, "loss": 0.4031, "step": 28786 }, { "epoch": 1.3210499747602222, "grad_norm": 0.4911634027957916, "learning_rate": 6.107208503003184e-06, "loss": 0.3792, "step": 28787 }, { "epoch": 1.3210958652654765, "grad_norm": 0.4687567949295044, "learning_rate": 6.106969400895095e-06, "loss": 0.3489, "step": 28788 }, { "epoch": 1.321141755770731, "grad_norm": 0.47050556540489197, "learning_rate": 6.106730296125096e-06, "loss": 0.3502, "step": 28789 }, { "epoch": 1.3211876462759855, "grad_norm": 0.550982654094696, "learning_rate": 6.106491188693759e-06, "loss": 0.4271, "step": 28790 }, { "epoch": 1.32123353678124, "grad_norm": 0.478751540184021, "learning_rate": 6.106252078601658e-06, "loss": 0.3733, "step": 28791 }, { "epoch": 1.3212794272864945, "grad_norm": 0.45403558015823364, "learning_rate": 6.106012965849372e-06, "loss": 0.3514, "step": 28792 }, { "epoch": 1.3213253177917488, "grad_norm": 0.5096951723098755, "learning_rate": 6.105773850437473e-06, "loss": 0.4177, "step": 28793 }, { "epoch": 1.3213712082970033, "grad_norm": 0.4999421536922455, "learning_rate": 6.105534732366537e-06, "loss": 0.4599, "step": 28794 }, { "epoch": 1.3214170988022578, "grad_norm": 0.44926419854164124, "learning_rate": 6.10529561163714e-06, "loss": 0.3015, "step": 28795 }, { "epoch": 1.3214629893075123, "grad_norm": 0.48875126242637634, "learning_rate": 6.105056488249854e-06, "loss": 0.3779, "step": 28796 }, { "epoch": 1.3215088798127668, "grad_norm": 0.5126500725746155, "learning_rate": 6.104817362205255e-06, "loss": 0.4675, "step": 28797 }, { "epoch": 1.3215547703180213, "grad_norm": 0.4920293688774109, "learning_rate": 6.104578233503922e-06, "loss": 0.37, "step": 28798 }, { "epoch": 1.3216006608232758, "grad_norm": 0.4848904013633728, "learning_rate": 6.104339102146426e-06, "loss": 0.3925, "step": 28799 }, { "epoch": 1.3216465513285303, "grad_norm": 0.4011046886444092, "learning_rate": 6.104099968133342e-06, "loss": 0.2607, "step": 28800 }, { "epoch": 1.3216924418337845, "grad_norm": 0.48108533024787903, "learning_rate": 6.103860831465248e-06, "loss": 0.3626, "step": 28801 }, { "epoch": 1.321738332339039, "grad_norm": 0.4518919289112091, "learning_rate": 6.103621692142715e-06, "loss": 0.3445, "step": 28802 }, { "epoch": 1.3217842228442935, "grad_norm": 0.43407300114631653, "learning_rate": 6.103382550166321e-06, "loss": 0.3009, "step": 28803 }, { "epoch": 1.321830113349548, "grad_norm": 0.4407515823841095, "learning_rate": 6.103143405536641e-06, "loss": 0.3267, "step": 28804 }, { "epoch": 1.3218760038548023, "grad_norm": 0.4812732934951782, "learning_rate": 6.1029042582542476e-06, "loss": 0.3565, "step": 28805 }, { "epoch": 1.3219218943600568, "grad_norm": 0.4357263147830963, "learning_rate": 6.1026651083197195e-06, "loss": 0.3004, "step": 28806 }, { "epoch": 1.3219677848653113, "grad_norm": 0.4157315790653229, "learning_rate": 6.102425955733628e-06, "loss": 0.2891, "step": 28807 }, { "epoch": 1.3220136753705658, "grad_norm": 0.49445128440856934, "learning_rate": 6.10218680049655e-06, "loss": 0.4284, "step": 28808 }, { "epoch": 1.3220595658758203, "grad_norm": 0.4910227358341217, "learning_rate": 6.101947642609062e-06, "loss": 0.4165, "step": 28809 }, { "epoch": 1.3221054563810748, "grad_norm": 0.48263034224510193, "learning_rate": 6.101708482071737e-06, "loss": 0.3931, "step": 28810 }, { "epoch": 1.3221513468863293, "grad_norm": 0.47633007168769836, "learning_rate": 6.10146931888515e-06, "loss": 0.3549, "step": 28811 }, { "epoch": 1.3221972373915838, "grad_norm": 0.4339072108268738, "learning_rate": 6.101230153049879e-06, "loss": 0.2901, "step": 28812 }, { "epoch": 1.322243127896838, "grad_norm": 0.45886462926864624, "learning_rate": 6.100990984566495e-06, "loss": 0.3436, "step": 28813 }, { "epoch": 1.3222890184020926, "grad_norm": 0.4765680730342865, "learning_rate": 6.100751813435576e-06, "loss": 0.3428, "step": 28814 }, { "epoch": 1.322334908907347, "grad_norm": 0.44993728399276733, "learning_rate": 6.1005126396576974e-06, "loss": 0.3583, "step": 28815 }, { "epoch": 1.3223807994126016, "grad_norm": 0.4260273873806, "learning_rate": 6.100273463233431e-06, "loss": 0.2781, "step": 28816 }, { "epoch": 1.322426689917856, "grad_norm": 0.48214298486709595, "learning_rate": 6.100034284163356e-06, "loss": 0.3684, "step": 28817 }, { "epoch": 1.3224725804231103, "grad_norm": 0.43614086508750916, "learning_rate": 6.099795102448045e-06, "loss": 0.338, "step": 28818 }, { "epoch": 1.3225184709283648, "grad_norm": 0.45613259077072144, "learning_rate": 6.099555918088074e-06, "loss": 0.3464, "step": 28819 }, { "epoch": 1.3225643614336193, "grad_norm": 0.4602162837982178, "learning_rate": 6.099316731084017e-06, "loss": 0.3385, "step": 28820 }, { "epoch": 1.3226102519388738, "grad_norm": 0.4515390992164612, "learning_rate": 6.099077541436452e-06, "loss": 0.3431, "step": 28821 }, { "epoch": 1.3226561424441283, "grad_norm": 0.46693888306617737, "learning_rate": 6.0988383491459514e-06, "loss": 0.3618, "step": 28822 }, { "epoch": 1.3227020329493828, "grad_norm": 0.5288559198379517, "learning_rate": 6.098599154213092e-06, "loss": 0.4536, "step": 28823 }, { "epoch": 1.3227479234546373, "grad_norm": 0.5215124487876892, "learning_rate": 6.098359956638447e-06, "loss": 0.4181, "step": 28824 }, { "epoch": 1.3227938139598918, "grad_norm": 0.4563755989074707, "learning_rate": 6.098120756422593e-06, "loss": 0.3309, "step": 28825 }, { "epoch": 1.322839704465146, "grad_norm": 0.505741536617279, "learning_rate": 6.0978815535661085e-06, "loss": 0.471, "step": 28826 }, { "epoch": 1.3228855949704006, "grad_norm": 0.4916747212409973, "learning_rate": 6.0976423480695615e-06, "loss": 0.3445, "step": 28827 }, { "epoch": 1.322931485475655, "grad_norm": 0.4388931691646576, "learning_rate": 6.097403139933532e-06, "loss": 0.3083, "step": 28828 }, { "epoch": 1.3229773759809096, "grad_norm": 0.4685806334018707, "learning_rate": 6.097163929158595e-06, "loss": 0.3806, "step": 28829 }, { "epoch": 1.323023266486164, "grad_norm": 0.4984460771083832, "learning_rate": 6.096924715745325e-06, "loss": 0.3961, "step": 28830 }, { "epoch": 1.3230691569914184, "grad_norm": 0.4493517577648163, "learning_rate": 6.096685499694297e-06, "loss": 0.3435, "step": 28831 }, { "epoch": 1.3231150474966729, "grad_norm": 0.48422810435295105, "learning_rate": 6.096446281006088e-06, "loss": 0.3803, "step": 28832 }, { "epoch": 1.3231609380019274, "grad_norm": 0.49591103196144104, "learning_rate": 6.096207059681268e-06, "loss": 0.4613, "step": 28833 }, { "epoch": 1.3232068285071819, "grad_norm": 0.4480378329753876, "learning_rate": 6.0959678357204185e-06, "loss": 0.305, "step": 28834 }, { "epoch": 1.3232527190124364, "grad_norm": 0.43708014488220215, "learning_rate": 6.095728609124112e-06, "loss": 0.33, "step": 28835 }, { "epoch": 1.3232986095176908, "grad_norm": 0.457009881734848, "learning_rate": 6.0954893798929235e-06, "loss": 0.3434, "step": 28836 }, { "epoch": 1.3233445000229453, "grad_norm": 0.47787559032440186, "learning_rate": 6.095250148027432e-06, "loss": 0.3901, "step": 28837 }, { "epoch": 1.3233903905281998, "grad_norm": 0.46493804454803467, "learning_rate": 6.095010913528206e-06, "loss": 0.3186, "step": 28838 }, { "epoch": 1.3234362810334541, "grad_norm": 0.47897231578826904, "learning_rate": 6.094771676395824e-06, "loss": 0.2995, "step": 28839 }, { "epoch": 1.3234821715387086, "grad_norm": 0.4451633095741272, "learning_rate": 6.0945324366308636e-06, "loss": 0.3042, "step": 28840 }, { "epoch": 1.3235280620439631, "grad_norm": 0.4683212339878082, "learning_rate": 6.094293194233897e-06, "loss": 0.336, "step": 28841 }, { "epoch": 1.3235739525492176, "grad_norm": 0.46995723247528076, "learning_rate": 6.0940539492055016e-06, "loss": 0.3423, "step": 28842 }, { "epoch": 1.3236198430544721, "grad_norm": 0.46864521503448486, "learning_rate": 6.093814701546251e-06, "loss": 0.3549, "step": 28843 }, { "epoch": 1.3236657335597264, "grad_norm": 0.4793601334095001, "learning_rate": 6.093575451256722e-06, "loss": 0.3764, "step": 28844 }, { "epoch": 1.3237116240649809, "grad_norm": 0.4651247262954712, "learning_rate": 6.0933361983374876e-06, "loss": 0.36, "step": 28845 }, { "epoch": 1.3237575145702354, "grad_norm": 0.48365968465805054, "learning_rate": 6.093096942789126e-06, "loss": 0.3849, "step": 28846 }, { "epoch": 1.3238034050754899, "grad_norm": 0.4886632561683655, "learning_rate": 6.09285768461221e-06, "loss": 0.3727, "step": 28847 }, { "epoch": 1.3238492955807444, "grad_norm": 0.4789888560771942, "learning_rate": 6.0926184238073185e-06, "loss": 0.3777, "step": 28848 }, { "epoch": 1.3238951860859989, "grad_norm": 0.44817668199539185, "learning_rate": 6.092379160375024e-06, "loss": 0.3378, "step": 28849 }, { "epoch": 1.3239410765912534, "grad_norm": 0.5150731801986694, "learning_rate": 6.092139894315901e-06, "loss": 0.405, "step": 28850 }, { "epoch": 1.3239869670965076, "grad_norm": 0.5056014060974121, "learning_rate": 6.0919006256305255e-06, "loss": 0.4361, "step": 28851 }, { "epoch": 1.3240328576017621, "grad_norm": 0.5149839520454407, "learning_rate": 6.091661354319476e-06, "loss": 0.4439, "step": 28852 }, { "epoch": 1.3240787481070166, "grad_norm": 0.4365101158618927, "learning_rate": 6.091422080383324e-06, "loss": 0.3577, "step": 28853 }, { "epoch": 1.3241246386122711, "grad_norm": 0.4697846472263336, "learning_rate": 6.0911828038226475e-06, "loss": 0.3603, "step": 28854 }, { "epoch": 1.3241705291175256, "grad_norm": 0.4836871027946472, "learning_rate": 6.09094352463802e-06, "loss": 0.3759, "step": 28855 }, { "epoch": 1.32421641962278, "grad_norm": 0.5125709176063538, "learning_rate": 6.090704242830017e-06, "loss": 0.4308, "step": 28856 }, { "epoch": 1.3242623101280344, "grad_norm": 0.4375849664211273, "learning_rate": 6.090464958399216e-06, "loss": 0.3054, "step": 28857 }, { "epoch": 1.324308200633289, "grad_norm": 0.4840388298034668, "learning_rate": 6.090225671346189e-06, "loss": 0.3877, "step": 28858 }, { "epoch": 1.3243540911385434, "grad_norm": 0.4650684595108032, "learning_rate": 6.089986381671516e-06, "loss": 0.4002, "step": 28859 }, { "epoch": 1.324399981643798, "grad_norm": 0.4824790954589844, "learning_rate": 6.089747089375768e-06, "loss": 0.4424, "step": 28860 }, { "epoch": 1.3244458721490524, "grad_norm": 0.4908510446548462, "learning_rate": 6.089507794459521e-06, "loss": 0.3785, "step": 28861 }, { "epoch": 1.324491762654307, "grad_norm": 0.4910569489002228, "learning_rate": 6.089268496923353e-06, "loss": 0.3366, "step": 28862 }, { "epoch": 1.3245376531595614, "grad_norm": 0.46018582582473755, "learning_rate": 6.089029196767838e-06, "loss": 0.3545, "step": 28863 }, { "epoch": 1.3245835436648157, "grad_norm": 0.50713050365448, "learning_rate": 6.088789893993553e-06, "loss": 0.3568, "step": 28864 }, { "epoch": 1.3246294341700702, "grad_norm": 0.5026248097419739, "learning_rate": 6.08855058860107e-06, "loss": 0.4023, "step": 28865 }, { "epoch": 1.3246753246753247, "grad_norm": 0.4650111794471741, "learning_rate": 6.088311280590967e-06, "loss": 0.3418, "step": 28866 }, { "epoch": 1.3247212151805792, "grad_norm": 0.42097482085227966, "learning_rate": 6.088071969963818e-06, "loss": 0.3022, "step": 28867 }, { "epoch": 1.3247671056858337, "grad_norm": 0.45232096314430237, "learning_rate": 6.087832656720199e-06, "loss": 0.3284, "step": 28868 }, { "epoch": 1.324812996191088, "grad_norm": 0.4476914405822754, "learning_rate": 6.087593340860688e-06, "loss": 0.2881, "step": 28869 }, { "epoch": 1.3248588866963424, "grad_norm": 0.4717225134372711, "learning_rate": 6.087354022385857e-06, "loss": 0.3248, "step": 28870 }, { "epoch": 1.324904777201597, "grad_norm": 0.42621830105781555, "learning_rate": 6.087114701296284e-06, "loss": 0.3406, "step": 28871 }, { "epoch": 1.3249506677068514, "grad_norm": 0.5103136897087097, "learning_rate": 6.086875377592542e-06, "loss": 0.322, "step": 28872 }, { "epoch": 1.324996558212106, "grad_norm": 0.48601871728897095, "learning_rate": 6.086636051275207e-06, "loss": 0.3731, "step": 28873 }, { "epoch": 1.3250424487173604, "grad_norm": 0.44567441940307617, "learning_rate": 6.086396722344856e-06, "loss": 0.308, "step": 28874 }, { "epoch": 1.325088339222615, "grad_norm": 0.4981589913368225, "learning_rate": 6.086157390802064e-06, "loss": 0.4166, "step": 28875 }, { "epoch": 1.3251342297278694, "grad_norm": 0.4742518961429596, "learning_rate": 6.085918056647407e-06, "loss": 0.3054, "step": 28876 }, { "epoch": 1.3251801202331237, "grad_norm": 0.46937453746795654, "learning_rate": 6.0856787198814585e-06, "loss": 0.3399, "step": 28877 }, { "epoch": 1.3252260107383782, "grad_norm": 0.5208996534347534, "learning_rate": 6.085439380504795e-06, "loss": 0.3902, "step": 28878 }, { "epoch": 1.3252719012436327, "grad_norm": 0.528114914894104, "learning_rate": 6.085200038517993e-06, "loss": 0.4347, "step": 28879 }, { "epoch": 1.3253177917488872, "grad_norm": 0.5162122845649719, "learning_rate": 6.0849606939216275e-06, "loss": 0.4032, "step": 28880 }, { "epoch": 1.3253636822541417, "grad_norm": 0.43470171093940735, "learning_rate": 6.0847213467162744e-06, "loss": 0.2754, "step": 28881 }, { "epoch": 1.325409572759396, "grad_norm": 0.4528656005859375, "learning_rate": 6.084481996902508e-06, "loss": 0.3063, "step": 28882 }, { "epoch": 1.3254554632646505, "grad_norm": 0.4595014154911041, "learning_rate": 6.0842426444809066e-06, "loss": 0.3483, "step": 28883 }, { "epoch": 1.325501353769905, "grad_norm": 0.5022663474082947, "learning_rate": 6.08400328945204e-06, "loss": 0.3372, "step": 28884 }, { "epoch": 1.3255472442751595, "grad_norm": 0.4986295700073242, "learning_rate": 6.083763931816492e-06, "loss": 0.4221, "step": 28885 }, { "epoch": 1.325593134780414, "grad_norm": 0.419863760471344, "learning_rate": 6.083524571574832e-06, "loss": 0.2692, "step": 28886 }, { "epoch": 1.3256390252856685, "grad_norm": 0.5348986983299255, "learning_rate": 6.0832852087276364e-06, "loss": 0.4852, "step": 28887 }, { "epoch": 1.325684915790923, "grad_norm": 0.459759384393692, "learning_rate": 6.0830458432754835e-06, "loss": 0.328, "step": 28888 }, { "epoch": 1.3257308062961775, "grad_norm": 0.4817798137664795, "learning_rate": 6.082806475218946e-06, "loss": 0.4039, "step": 28889 }, { "epoch": 1.3257766968014317, "grad_norm": 0.4742611348628998, "learning_rate": 6.082567104558599e-06, "loss": 0.3765, "step": 28890 }, { "epoch": 1.3258225873066862, "grad_norm": 0.526427686214447, "learning_rate": 6.0823277312950225e-06, "loss": 0.4169, "step": 28891 }, { "epoch": 1.3258684778119407, "grad_norm": 0.4859611392021179, "learning_rate": 6.082088355428789e-06, "loss": 0.3967, "step": 28892 }, { "epoch": 1.3259143683171952, "grad_norm": 0.45125874876976013, "learning_rate": 6.081848976960472e-06, "loss": 0.3414, "step": 28893 }, { "epoch": 1.3259602588224495, "grad_norm": 0.47578346729278564, "learning_rate": 6.081609595890653e-06, "loss": 0.356, "step": 28894 }, { "epoch": 1.326006149327704, "grad_norm": 0.4551136791706085, "learning_rate": 6.081370212219902e-06, "loss": 0.3339, "step": 28895 }, { "epoch": 1.3260520398329585, "grad_norm": 0.42970919609069824, "learning_rate": 6.081130825948799e-06, "loss": 0.2987, "step": 28896 }, { "epoch": 1.326097930338213, "grad_norm": 0.4999580681324005, "learning_rate": 6.080891437077917e-06, "loss": 0.3882, "step": 28897 }, { "epoch": 1.3261438208434675, "grad_norm": 0.46115192770957947, "learning_rate": 6.080652045607831e-06, "loss": 0.3756, "step": 28898 }, { "epoch": 1.326189711348722, "grad_norm": 0.49412643909454346, "learning_rate": 6.080412651539118e-06, "loss": 0.3947, "step": 28899 }, { "epoch": 1.3262356018539765, "grad_norm": 0.4687947928905487, "learning_rate": 6.0801732548723546e-06, "loss": 0.3754, "step": 28900 }, { "epoch": 1.326281492359231, "grad_norm": 0.471955806016922, "learning_rate": 6.079933855608114e-06, "loss": 0.3467, "step": 28901 }, { "epoch": 1.3263273828644853, "grad_norm": 0.45274701714515686, "learning_rate": 6.079694453746975e-06, "loss": 0.3632, "step": 28902 }, { "epoch": 1.3263732733697398, "grad_norm": 0.47636544704437256, "learning_rate": 6.079455049289512e-06, "loss": 0.3725, "step": 28903 }, { "epoch": 1.3264191638749943, "grad_norm": 0.49059930443763733, "learning_rate": 6.0792156422362984e-06, "loss": 0.3961, "step": 28904 }, { "epoch": 1.3264650543802488, "grad_norm": 0.4730585217475891, "learning_rate": 6.078976232587913e-06, "loss": 0.3731, "step": 28905 }, { "epoch": 1.3265109448855033, "grad_norm": 0.4916800558567047, "learning_rate": 6.07873682034493e-06, "loss": 0.3604, "step": 28906 }, { "epoch": 1.3265568353907575, "grad_norm": 0.5163220763206482, "learning_rate": 6.078497405507925e-06, "loss": 0.453, "step": 28907 }, { "epoch": 1.326602725896012, "grad_norm": 0.48936569690704346, "learning_rate": 6.078257988077475e-06, "loss": 0.372, "step": 28908 }, { "epoch": 1.3266486164012665, "grad_norm": 0.5114070177078247, "learning_rate": 6.078018568054154e-06, "loss": 0.3726, "step": 28909 }, { "epoch": 1.326694506906521, "grad_norm": 0.4688384532928467, "learning_rate": 6.077779145438538e-06, "loss": 0.3663, "step": 28910 }, { "epoch": 1.3267403974117755, "grad_norm": 0.488557368516922, "learning_rate": 6.077539720231204e-06, "loss": 0.3996, "step": 28911 }, { "epoch": 1.32678628791703, "grad_norm": 0.4941939413547516, "learning_rate": 6.077300292432727e-06, "loss": 0.3945, "step": 28912 }, { "epoch": 1.3268321784222845, "grad_norm": 0.4762782156467438, "learning_rate": 6.0770608620436835e-06, "loss": 0.3735, "step": 28913 }, { "epoch": 1.326878068927539, "grad_norm": 0.5439191460609436, "learning_rate": 6.076821429064649e-06, "loss": 0.4726, "step": 28914 }, { "epoch": 1.3269239594327933, "grad_norm": 0.47040224075317383, "learning_rate": 6.076581993496197e-06, "loss": 0.3459, "step": 28915 }, { "epoch": 1.3269698499380478, "grad_norm": 0.4570206105709076, "learning_rate": 6.0763425553389056e-06, "loss": 0.3575, "step": 28916 }, { "epoch": 1.3270157404433023, "grad_norm": 0.4774021506309509, "learning_rate": 6.07610311459335e-06, "loss": 0.3853, "step": 28917 }, { "epoch": 1.3270616309485568, "grad_norm": 0.4381569027900696, "learning_rate": 6.0758636712601075e-06, "loss": 0.3545, "step": 28918 }, { "epoch": 1.3271075214538113, "grad_norm": 0.5099719166755676, "learning_rate": 6.075624225339751e-06, "loss": 0.3976, "step": 28919 }, { "epoch": 1.3271534119590656, "grad_norm": 0.45369580388069153, "learning_rate": 6.075384776832858e-06, "loss": 0.3607, "step": 28920 }, { "epoch": 1.32719930246432, "grad_norm": 0.4879549443721771, "learning_rate": 6.075145325740004e-06, "loss": 0.3616, "step": 28921 }, { "epoch": 1.3272451929695745, "grad_norm": 0.4932876527309418, "learning_rate": 6.074905872061765e-06, "loss": 0.3684, "step": 28922 }, { "epoch": 1.327291083474829, "grad_norm": 0.46634000539779663, "learning_rate": 6.074666415798716e-06, "loss": 0.3865, "step": 28923 }, { "epoch": 1.3273369739800835, "grad_norm": 0.4864497184753418, "learning_rate": 6.074426956951435e-06, "loss": 0.3999, "step": 28924 }, { "epoch": 1.327382864485338, "grad_norm": 0.4584881365299225, "learning_rate": 6.074187495520496e-06, "loss": 0.3126, "step": 28925 }, { "epoch": 1.3274287549905925, "grad_norm": 0.6558611392974854, "learning_rate": 6.073948031506474e-06, "loss": 0.3467, "step": 28926 }, { "epoch": 1.327474645495847, "grad_norm": 0.4778306186199188, "learning_rate": 6.073708564909945e-06, "loss": 0.3927, "step": 28927 }, { "epoch": 1.3275205360011013, "grad_norm": 0.43326690793037415, "learning_rate": 6.073469095731488e-06, "loss": 0.3394, "step": 28928 }, { "epoch": 1.3275664265063558, "grad_norm": 0.46001869440078735, "learning_rate": 6.073229623971676e-06, "loss": 0.3159, "step": 28929 }, { "epoch": 1.3276123170116103, "grad_norm": 0.5068168044090271, "learning_rate": 6.072990149631084e-06, "loss": 0.3515, "step": 28930 }, { "epoch": 1.3276582075168648, "grad_norm": 0.4732169806957245, "learning_rate": 6.072750672710292e-06, "loss": 0.3656, "step": 28931 }, { "epoch": 1.3277040980221193, "grad_norm": 0.47115200757980347, "learning_rate": 6.07251119320987e-06, "loss": 0.3693, "step": 28932 }, { "epoch": 1.3277499885273736, "grad_norm": 0.457491010427475, "learning_rate": 6.072271711130399e-06, "loss": 0.3201, "step": 28933 }, { "epoch": 1.327795879032628, "grad_norm": 0.4461002051830292, "learning_rate": 6.072032226472453e-06, "loss": 0.2987, "step": 28934 }, { "epoch": 1.3278417695378826, "grad_norm": 0.5364105105400085, "learning_rate": 6.0717927392366086e-06, "loss": 0.4464, "step": 28935 }, { "epoch": 1.327887660043137, "grad_norm": 0.4862484931945801, "learning_rate": 6.07155324942344e-06, "loss": 0.399, "step": 28936 }, { "epoch": 1.3279335505483916, "grad_norm": 0.5264572501182556, "learning_rate": 6.0713137570335235e-06, "loss": 0.4773, "step": 28937 }, { "epoch": 1.327979441053646, "grad_norm": 0.4510900676250458, "learning_rate": 6.071074262067434e-06, "loss": 0.3387, "step": 28938 }, { "epoch": 1.3280253315589006, "grad_norm": 0.509158730506897, "learning_rate": 6.070834764525752e-06, "loss": 0.4329, "step": 28939 }, { "epoch": 1.3280712220641548, "grad_norm": 0.49007681012153625, "learning_rate": 6.07059526440905e-06, "loss": 0.3629, "step": 28940 }, { "epoch": 1.3281171125694093, "grad_norm": 0.46558547019958496, "learning_rate": 6.070355761717903e-06, "loss": 0.371, "step": 28941 }, { "epoch": 1.3281630030746638, "grad_norm": 0.47143054008483887, "learning_rate": 6.070116256452889e-06, "loss": 0.41, "step": 28942 }, { "epoch": 1.3282088935799183, "grad_norm": 0.5403587818145752, "learning_rate": 6.069876748614583e-06, "loss": 0.4573, "step": 28943 }, { "epoch": 1.3282547840851728, "grad_norm": 0.4359770119190216, "learning_rate": 6.06963723820356e-06, "loss": 0.2848, "step": 28944 }, { "epoch": 1.328300674590427, "grad_norm": 0.4860462546348572, "learning_rate": 6.069397725220399e-06, "loss": 0.396, "step": 28945 }, { "epoch": 1.3283465650956816, "grad_norm": 0.48685625195503235, "learning_rate": 6.069158209665673e-06, "loss": 0.4343, "step": 28946 }, { "epoch": 1.328392455600936, "grad_norm": 0.41911035776138306, "learning_rate": 6.068918691539958e-06, "loss": 0.2684, "step": 28947 }, { "epoch": 1.3284383461061906, "grad_norm": 0.4337615668773651, "learning_rate": 6.068679170843832e-06, "loss": 0.3004, "step": 28948 }, { "epoch": 1.328484236611445, "grad_norm": 0.5006253719329834, "learning_rate": 6.068439647577869e-06, "loss": 0.4114, "step": 28949 }, { "epoch": 1.3285301271166996, "grad_norm": 0.4485139548778534, "learning_rate": 6.068200121742647e-06, "loss": 0.3132, "step": 28950 }, { "epoch": 1.328576017621954, "grad_norm": 0.45289793610572815, "learning_rate": 6.067960593338741e-06, "loss": 0.3396, "step": 28951 }, { "epoch": 1.3286219081272086, "grad_norm": 0.4437621831893921, "learning_rate": 6.067721062366726e-06, "loss": 0.3356, "step": 28952 }, { "epoch": 1.3286677986324629, "grad_norm": 0.4823054075241089, "learning_rate": 6.067481528827179e-06, "loss": 0.3408, "step": 28953 }, { "epoch": 1.3287136891377174, "grad_norm": 0.4142948389053345, "learning_rate": 6.067241992720676e-06, "loss": 0.2937, "step": 28954 }, { "epoch": 1.3287595796429719, "grad_norm": 0.4394124448299408, "learning_rate": 6.0670024540477925e-06, "loss": 0.3239, "step": 28955 }, { "epoch": 1.3288054701482264, "grad_norm": 0.41455572843551636, "learning_rate": 6.066762912809106e-06, "loss": 0.258, "step": 28956 }, { "epoch": 1.3288513606534809, "grad_norm": 0.49027204513549805, "learning_rate": 6.06652336900519e-06, "loss": 0.3854, "step": 28957 }, { "epoch": 1.3288972511587351, "grad_norm": 0.4961969554424286, "learning_rate": 6.066283822636621e-06, "loss": 0.4236, "step": 28958 }, { "epoch": 1.3289431416639896, "grad_norm": 0.4746111035346985, "learning_rate": 6.066044273703978e-06, "loss": 0.34, "step": 28959 }, { "epoch": 1.3289890321692441, "grad_norm": 0.4441932737827301, "learning_rate": 6.065804722207834e-06, "loss": 0.3281, "step": 28960 }, { "epoch": 1.3290349226744986, "grad_norm": 0.45794716477394104, "learning_rate": 6.065565168148767e-06, "loss": 0.3418, "step": 28961 }, { "epoch": 1.3290808131797531, "grad_norm": 0.4718148112297058, "learning_rate": 6.065325611527352e-06, "loss": 0.3599, "step": 28962 }, { "epoch": 1.3291267036850076, "grad_norm": 0.4929414689540863, "learning_rate": 6.0650860523441634e-06, "loss": 0.4058, "step": 28963 }, { "epoch": 1.3291725941902621, "grad_norm": 0.46294182538986206, "learning_rate": 6.06484649059978e-06, "loss": 0.3041, "step": 28964 }, { "epoch": 1.3292184846955166, "grad_norm": 0.458987832069397, "learning_rate": 6.064606926294777e-06, "loss": 0.3618, "step": 28965 }, { "epoch": 1.329264375200771, "grad_norm": 0.44872206449508667, "learning_rate": 6.06436735942973e-06, "loss": 0.3237, "step": 28966 }, { "epoch": 1.3293102657060254, "grad_norm": 0.4783572852611542, "learning_rate": 6.064127790005216e-06, "loss": 0.3904, "step": 28967 }, { "epoch": 1.32935615621128, "grad_norm": 0.43776899576187134, "learning_rate": 6.063888218021811e-06, "loss": 0.339, "step": 28968 }, { "epoch": 1.3294020467165344, "grad_norm": 0.457673043012619, "learning_rate": 6.063648643480089e-06, "loss": 0.3797, "step": 28969 }, { "epoch": 1.329447937221789, "grad_norm": 0.4363172948360443, "learning_rate": 6.0634090663806286e-06, "loss": 0.2934, "step": 28970 }, { "epoch": 1.3294938277270432, "grad_norm": 0.4343571662902832, "learning_rate": 6.063169486724004e-06, "loss": 0.3189, "step": 28971 }, { "epoch": 1.3295397182322977, "grad_norm": 0.4812133014202118, "learning_rate": 6.062929904510795e-06, "loss": 0.3641, "step": 28972 }, { "epoch": 1.3295856087375522, "grad_norm": 0.47825053334236145, "learning_rate": 6.062690319741573e-06, "loss": 0.3338, "step": 28973 }, { "epoch": 1.3296314992428067, "grad_norm": 0.4277571439743042, "learning_rate": 6.062450732416916e-06, "loss": 0.2822, "step": 28974 }, { "epoch": 1.3296773897480612, "grad_norm": 0.5003933310508728, "learning_rate": 6.062211142537401e-06, "loss": 0.4247, "step": 28975 }, { "epoch": 1.3297232802533157, "grad_norm": 0.4528597891330719, "learning_rate": 6.061971550103603e-06, "loss": 0.331, "step": 28976 }, { "epoch": 1.3297691707585702, "grad_norm": 0.474589079618454, "learning_rate": 6.061731955116099e-06, "loss": 0.375, "step": 28977 }, { "epoch": 1.3298150612638246, "grad_norm": 0.48176315426826477, "learning_rate": 6.0614923575754645e-06, "loss": 0.3957, "step": 28978 }, { "epoch": 1.329860951769079, "grad_norm": 0.4220033586025238, "learning_rate": 6.061252757482276e-06, "loss": 0.294, "step": 28979 }, { "epoch": 1.3299068422743334, "grad_norm": 0.6418368220329285, "learning_rate": 6.06101315483711e-06, "loss": 0.4326, "step": 28980 }, { "epoch": 1.329952732779588, "grad_norm": 0.4186230003833771, "learning_rate": 6.060773549640541e-06, "loss": 0.2793, "step": 28981 }, { "epoch": 1.3299986232848424, "grad_norm": 0.46027758717536926, "learning_rate": 6.060533941893147e-06, "loss": 0.3611, "step": 28982 }, { "epoch": 1.3300445137900967, "grad_norm": 0.4906064569950104, "learning_rate": 6.060294331595504e-06, "loss": 0.3965, "step": 28983 }, { "epoch": 1.3300904042953512, "grad_norm": 0.46489042043685913, "learning_rate": 6.0600547187481885e-06, "loss": 0.2833, "step": 28984 }, { "epoch": 1.3301362948006057, "grad_norm": 0.5202928781509399, "learning_rate": 6.059815103351775e-06, "loss": 0.4042, "step": 28985 }, { "epoch": 1.3301821853058602, "grad_norm": 0.4678945243358612, "learning_rate": 6.05957548540684e-06, "loss": 0.3884, "step": 28986 }, { "epoch": 1.3302280758111147, "grad_norm": 0.42390763759613037, "learning_rate": 6.0593358649139615e-06, "loss": 0.2863, "step": 28987 }, { "epoch": 1.3302739663163692, "grad_norm": 0.4540550410747528, "learning_rate": 6.059096241873713e-06, "loss": 0.3131, "step": 28988 }, { "epoch": 1.3303198568216237, "grad_norm": 0.47083231806755066, "learning_rate": 6.058856616286675e-06, "loss": 0.3589, "step": 28989 }, { "epoch": 1.3303657473268782, "grad_norm": 0.4826127886772156, "learning_rate": 6.05861698815342e-06, "loss": 0.3811, "step": 28990 }, { "epoch": 1.3304116378321325, "grad_norm": 0.5192810297012329, "learning_rate": 6.058377357474523e-06, "loss": 0.4256, "step": 28991 }, { "epoch": 1.330457528337387, "grad_norm": 0.48335832357406616, "learning_rate": 6.058137724250563e-06, "loss": 0.3457, "step": 28992 }, { "epoch": 1.3305034188426414, "grad_norm": 0.481261283159256, "learning_rate": 6.057898088482118e-06, "loss": 0.3784, "step": 28993 }, { "epoch": 1.330549309347896, "grad_norm": 0.4477469027042389, "learning_rate": 6.057658450169761e-06, "loss": 0.3632, "step": 28994 }, { "epoch": 1.3305951998531504, "grad_norm": 0.4719911515712738, "learning_rate": 6.057418809314069e-06, "loss": 0.4089, "step": 28995 }, { "epoch": 1.3306410903584047, "grad_norm": 0.4626944661140442, "learning_rate": 6.057179165915619e-06, "loss": 0.3487, "step": 28996 }, { "epoch": 1.3306869808636592, "grad_norm": 0.450916588306427, "learning_rate": 6.056939519974986e-06, "loss": 0.3277, "step": 28997 }, { "epoch": 1.3307328713689137, "grad_norm": 0.42776787281036377, "learning_rate": 6.056699871492747e-06, "loss": 0.2854, "step": 28998 }, { "epoch": 1.3307787618741682, "grad_norm": 0.44800302386283875, "learning_rate": 6.056460220469479e-06, "loss": 0.3298, "step": 28999 }, { "epoch": 1.3308246523794227, "grad_norm": 0.46218302845954895, "learning_rate": 6.056220566905759e-06, "loss": 0.3484, "step": 29000 }, { "epoch": 1.3308705428846772, "grad_norm": 0.4502412974834442, "learning_rate": 6.055980910802159e-06, "loss": 0.3195, "step": 29001 }, { "epoch": 1.3309164333899317, "grad_norm": 0.4486466348171234, "learning_rate": 6.05574125215926e-06, "loss": 0.3761, "step": 29002 }, { "epoch": 1.3309623238951862, "grad_norm": 0.4678104519844055, "learning_rate": 6.055501590977635e-06, "loss": 0.3584, "step": 29003 }, { "epoch": 1.3310082144004405, "grad_norm": 0.4770292341709137, "learning_rate": 6.055261927257863e-06, "loss": 0.3524, "step": 29004 }, { "epoch": 1.331054104905695, "grad_norm": 0.48011305928230286, "learning_rate": 6.055022261000519e-06, "loss": 0.3169, "step": 29005 }, { "epoch": 1.3310999954109495, "grad_norm": 0.4528268873691559, "learning_rate": 6.054782592206178e-06, "loss": 0.322, "step": 29006 }, { "epoch": 1.331145885916204, "grad_norm": 0.4829457104206085, "learning_rate": 6.0545429208754205e-06, "loss": 0.3836, "step": 29007 }, { "epoch": 1.3311917764214585, "grad_norm": 0.47240519523620605, "learning_rate": 6.054303247008818e-06, "loss": 0.3461, "step": 29008 }, { "epoch": 1.3312376669267127, "grad_norm": 0.5094849467277527, "learning_rate": 6.05406357060695e-06, "loss": 0.429, "step": 29009 }, { "epoch": 1.3312835574319672, "grad_norm": 0.48100197315216064, "learning_rate": 6.053823891670391e-06, "loss": 0.3358, "step": 29010 }, { "epoch": 1.3313294479372217, "grad_norm": 0.5158067941665649, "learning_rate": 6.0535842101997186e-06, "loss": 0.4049, "step": 29011 }, { "epoch": 1.3313753384424762, "grad_norm": 0.4862521290779114, "learning_rate": 6.053344526195508e-06, "loss": 0.4207, "step": 29012 }, { "epoch": 1.3314212289477307, "grad_norm": 0.5173901915550232, "learning_rate": 6.053104839658337e-06, "loss": 0.3751, "step": 29013 }, { "epoch": 1.3314671194529852, "grad_norm": 0.45692718029022217, "learning_rate": 6.05286515058878e-06, "loss": 0.3559, "step": 29014 }, { "epoch": 1.3315130099582397, "grad_norm": 0.4491845369338989, "learning_rate": 6.052625458987415e-06, "loss": 0.322, "step": 29015 }, { "epoch": 1.3315589004634942, "grad_norm": 0.4488137364387512, "learning_rate": 6.052385764854821e-06, "loss": 0.3175, "step": 29016 }, { "epoch": 1.3316047909687485, "grad_norm": 0.5227344036102295, "learning_rate": 6.052146068191569e-06, "loss": 0.4253, "step": 29017 }, { "epoch": 1.331650681474003, "grad_norm": 0.5366879105567932, "learning_rate": 6.051906368998238e-06, "loss": 0.4597, "step": 29018 }, { "epoch": 1.3316965719792575, "grad_norm": 0.4673616290092468, "learning_rate": 6.051666667275403e-06, "loss": 0.3125, "step": 29019 }, { "epoch": 1.331742462484512, "grad_norm": 0.5072886943817139, "learning_rate": 6.051426963023642e-06, "loss": 0.4252, "step": 29020 }, { "epoch": 1.3317883529897665, "grad_norm": 0.5162843465805054, "learning_rate": 6.051187256243532e-06, "loss": 0.4086, "step": 29021 }, { "epoch": 1.3318342434950208, "grad_norm": 0.4688956141471863, "learning_rate": 6.050947546935648e-06, "loss": 0.3503, "step": 29022 }, { "epoch": 1.3318801340002753, "grad_norm": 0.500285804271698, "learning_rate": 6.050707835100567e-06, "loss": 0.4001, "step": 29023 }, { "epoch": 1.3319260245055298, "grad_norm": 0.4660649299621582, "learning_rate": 6.050468120738866e-06, "loss": 0.3053, "step": 29024 }, { "epoch": 1.3319719150107843, "grad_norm": 0.4897279143333435, "learning_rate": 6.050228403851118e-06, "loss": 0.3962, "step": 29025 }, { "epoch": 1.3320178055160388, "grad_norm": 0.4715103209018707, "learning_rate": 6.049988684437904e-06, "loss": 0.3629, "step": 29026 }, { "epoch": 1.3320636960212933, "grad_norm": 0.45087769627571106, "learning_rate": 6.0497489624998e-06, "loss": 0.3286, "step": 29027 }, { "epoch": 1.3321095865265478, "grad_norm": 0.45602095127105713, "learning_rate": 6.049509238037379e-06, "loss": 0.3176, "step": 29028 }, { "epoch": 1.332155477031802, "grad_norm": 0.5036959052085876, "learning_rate": 6.04926951105122e-06, "loss": 0.4548, "step": 29029 }, { "epoch": 1.3322013675370565, "grad_norm": 0.44432955980300903, "learning_rate": 6.0490297815419e-06, "loss": 0.3067, "step": 29030 }, { "epoch": 1.332247258042311, "grad_norm": 0.48377448320388794, "learning_rate": 6.048790049509992e-06, "loss": 0.4027, "step": 29031 }, { "epoch": 1.3322931485475655, "grad_norm": 0.4680689573287964, "learning_rate": 6.048550314956077e-06, "loss": 0.3543, "step": 29032 }, { "epoch": 1.33233903905282, "grad_norm": 0.47265562415122986, "learning_rate": 6.0483105778807295e-06, "loss": 0.3232, "step": 29033 }, { "epoch": 1.3323849295580743, "grad_norm": 0.4647786617279053, "learning_rate": 6.048070838284524e-06, "loss": 0.3292, "step": 29034 }, { "epoch": 1.3324308200633288, "grad_norm": 0.4825431704521179, "learning_rate": 6.047831096168042e-06, "loss": 0.4088, "step": 29035 }, { "epoch": 1.3324767105685833, "grad_norm": 0.4414055645465851, "learning_rate": 6.047591351531854e-06, "loss": 0.3186, "step": 29036 }, { "epoch": 1.3325226010738378, "grad_norm": 0.4920535385608673, "learning_rate": 6.04735160437654e-06, "loss": 0.3769, "step": 29037 }, { "epoch": 1.3325684915790923, "grad_norm": 0.44899627566337585, "learning_rate": 6.047111854702678e-06, "loss": 0.3417, "step": 29038 }, { "epoch": 1.3326143820843468, "grad_norm": 0.44957560300827026, "learning_rate": 6.04687210251084e-06, "loss": 0.3325, "step": 29039 }, { "epoch": 1.3326602725896013, "grad_norm": 0.48307403922080994, "learning_rate": 6.046632347801606e-06, "loss": 0.3824, "step": 29040 }, { "epoch": 1.3327061630948558, "grad_norm": 0.471717894077301, "learning_rate": 6.046392590575552e-06, "loss": 0.3361, "step": 29041 }, { "epoch": 1.33275205360011, "grad_norm": 0.4740654528141022, "learning_rate": 6.046152830833253e-06, "loss": 0.393, "step": 29042 }, { "epoch": 1.3327979441053646, "grad_norm": 0.5043904781341553, "learning_rate": 6.045913068575287e-06, "loss": 0.4055, "step": 29043 }, { "epoch": 1.332843834610619, "grad_norm": 0.4397449493408203, "learning_rate": 6.045673303802232e-06, "loss": 0.309, "step": 29044 }, { "epoch": 1.3328897251158736, "grad_norm": 0.489668071269989, "learning_rate": 6.04543353651466e-06, "loss": 0.3688, "step": 29045 }, { "epoch": 1.332935615621128, "grad_norm": 0.4514453113079071, "learning_rate": 6.045193766713151e-06, "loss": 0.3236, "step": 29046 }, { "epoch": 1.3329815061263823, "grad_norm": 0.47197476029396057, "learning_rate": 6.044953994398281e-06, "loss": 0.3832, "step": 29047 }, { "epoch": 1.3330273966316368, "grad_norm": 0.4888337254524231, "learning_rate": 6.0447142195706265e-06, "loss": 0.4154, "step": 29048 }, { "epoch": 1.3330732871368913, "grad_norm": 0.4410566985607147, "learning_rate": 6.044474442230765e-06, "loss": 0.3098, "step": 29049 }, { "epoch": 1.3331191776421458, "grad_norm": 0.46776658296585083, "learning_rate": 6.044234662379271e-06, "loss": 0.382, "step": 29050 }, { "epoch": 1.3331650681474003, "grad_norm": 0.47066453099250793, "learning_rate": 6.043994880016722e-06, "loss": 0.3828, "step": 29051 }, { "epoch": 1.3332109586526548, "grad_norm": 0.5110417008399963, "learning_rate": 6.0437550951436965e-06, "loss": 0.3837, "step": 29052 }, { "epoch": 1.3332568491579093, "grad_norm": 0.4114798605442047, "learning_rate": 6.043515307760768e-06, "loss": 0.2627, "step": 29053 }, { "epoch": 1.3333027396631638, "grad_norm": 0.46696728467941284, "learning_rate": 6.043275517868516e-06, "loss": 0.3637, "step": 29054 }, { "epoch": 1.333348630168418, "grad_norm": 0.4680844247341156, "learning_rate": 6.043035725467514e-06, "loss": 0.3729, "step": 29055 }, { "epoch": 1.3333945206736726, "grad_norm": 0.4294092655181885, "learning_rate": 6.042795930558341e-06, "loss": 0.2926, "step": 29056 }, { "epoch": 1.333440411178927, "grad_norm": 0.5240764021873474, "learning_rate": 6.042556133141572e-06, "loss": 0.4671, "step": 29057 }, { "epoch": 1.3334863016841816, "grad_norm": 0.44590306282043457, "learning_rate": 6.0423163332177845e-06, "loss": 0.3209, "step": 29058 }, { "epoch": 1.333532192189436, "grad_norm": 0.46013468503952026, "learning_rate": 6.0420765307875555e-06, "loss": 0.3272, "step": 29059 }, { "epoch": 1.3335780826946904, "grad_norm": 0.4538838267326355, "learning_rate": 6.041836725851462e-06, "loss": 0.3525, "step": 29060 }, { "epoch": 1.3336239731999449, "grad_norm": 0.46814173460006714, "learning_rate": 6.041596918410081e-06, "loss": 0.3442, "step": 29061 }, { "epoch": 1.3336698637051994, "grad_norm": 0.4819115400314331, "learning_rate": 6.041357108463985e-06, "loss": 0.3698, "step": 29062 }, { "epoch": 1.3337157542104539, "grad_norm": 0.48723849654197693, "learning_rate": 6.0411172960137555e-06, "loss": 0.361, "step": 29063 }, { "epoch": 1.3337616447157083, "grad_norm": 0.48952552676200867, "learning_rate": 6.040877481059968e-06, "loss": 0.3999, "step": 29064 }, { "epoch": 1.3338075352209628, "grad_norm": 0.49114885926246643, "learning_rate": 6.0406376636031994e-06, "loss": 0.3813, "step": 29065 }, { "epoch": 1.3338534257262173, "grad_norm": 0.4573996961116791, "learning_rate": 6.040397843644025e-06, "loss": 0.3697, "step": 29066 }, { "epoch": 1.3338993162314718, "grad_norm": 0.4565688371658325, "learning_rate": 6.040158021183021e-06, "loss": 0.354, "step": 29067 }, { "epoch": 1.3339452067367261, "grad_norm": 0.47614648938179016, "learning_rate": 6.0399181962207655e-06, "loss": 0.3609, "step": 29068 }, { "epoch": 1.3339910972419806, "grad_norm": 0.491591215133667, "learning_rate": 6.0396783687578356e-06, "loss": 0.3959, "step": 29069 }, { "epoch": 1.3340369877472351, "grad_norm": 0.4400084912776947, "learning_rate": 6.0394385387948066e-06, "loss": 0.3126, "step": 29070 }, { "epoch": 1.3340828782524896, "grad_norm": 0.4673841893672943, "learning_rate": 6.039198706332256e-06, "loss": 0.3864, "step": 29071 }, { "epoch": 1.3341287687577439, "grad_norm": 0.46085745096206665, "learning_rate": 6.038958871370763e-06, "loss": 0.3372, "step": 29072 }, { "epoch": 1.3341746592629984, "grad_norm": 0.48850929737091064, "learning_rate": 6.038719033910898e-06, "loss": 0.4339, "step": 29073 }, { "epoch": 1.3342205497682529, "grad_norm": 0.4771660268306732, "learning_rate": 6.038479193953244e-06, "loss": 0.4049, "step": 29074 }, { "epoch": 1.3342664402735074, "grad_norm": 0.47540417313575745, "learning_rate": 6.038239351498374e-06, "loss": 0.3767, "step": 29075 }, { "epoch": 1.3343123307787619, "grad_norm": 0.5048332810401917, "learning_rate": 6.0379995065468675e-06, "loss": 0.428, "step": 29076 }, { "epoch": 1.3343582212840164, "grad_norm": 0.48667386174201965, "learning_rate": 6.037759659099298e-06, "loss": 0.3817, "step": 29077 }, { "epoch": 1.3344041117892709, "grad_norm": 0.41540807485580444, "learning_rate": 6.037519809156246e-06, "loss": 0.2676, "step": 29078 }, { "epoch": 1.3344500022945254, "grad_norm": 0.5446277260780334, "learning_rate": 6.037279956718285e-06, "loss": 0.4665, "step": 29079 }, { "epoch": 1.3344958927997796, "grad_norm": 0.4949190616607666, "learning_rate": 6.037040101785993e-06, "loss": 0.4343, "step": 29080 }, { "epoch": 1.3345417833050341, "grad_norm": 0.4788063168525696, "learning_rate": 6.0368002443599475e-06, "loss": 0.3714, "step": 29081 }, { "epoch": 1.3345876738102886, "grad_norm": 0.5107227563858032, "learning_rate": 6.036560384440725e-06, "loss": 0.3988, "step": 29082 }, { "epoch": 1.3346335643155431, "grad_norm": 0.4969682991504669, "learning_rate": 6.0363205220289014e-06, "loss": 0.4347, "step": 29083 }, { "epoch": 1.3346794548207976, "grad_norm": 0.4305925965309143, "learning_rate": 6.036080657125054e-06, "loss": 0.2783, "step": 29084 }, { "epoch": 1.334725345326052, "grad_norm": 0.4621310234069824, "learning_rate": 6.035840789729759e-06, "loss": 0.39, "step": 29085 }, { "epoch": 1.3347712358313064, "grad_norm": 0.49027860164642334, "learning_rate": 6.035600919843594e-06, "loss": 0.4084, "step": 29086 }, { "epoch": 1.334817126336561, "grad_norm": 0.4194851517677307, "learning_rate": 6.035361047467137e-06, "loss": 0.2912, "step": 29087 }, { "epoch": 1.3348630168418154, "grad_norm": 0.4698309004306793, "learning_rate": 6.035121172600962e-06, "loss": 0.3502, "step": 29088 }, { "epoch": 1.33490890734707, "grad_norm": 0.5209037661552429, "learning_rate": 6.034881295245648e-06, "loss": 0.4788, "step": 29089 }, { "epoch": 1.3349547978523244, "grad_norm": 0.4469691514968872, "learning_rate": 6.03464141540177e-06, "loss": 0.3633, "step": 29090 }, { "epoch": 1.335000688357579, "grad_norm": 0.4826221466064453, "learning_rate": 6.034401533069907e-06, "loss": 0.3918, "step": 29091 }, { "epoch": 1.3350465788628334, "grad_norm": 0.48209673166275024, "learning_rate": 6.034161648250635e-06, "loss": 0.3297, "step": 29092 }, { "epoch": 1.3350924693680877, "grad_norm": 0.46209263801574707, "learning_rate": 6.03392176094453e-06, "loss": 0.3968, "step": 29093 }, { "epoch": 1.3351383598733422, "grad_norm": 0.4549507200717926, "learning_rate": 6.03368187115217e-06, "loss": 0.3401, "step": 29094 }, { "epoch": 1.3351842503785967, "grad_norm": 0.46352294087409973, "learning_rate": 6.0334419788741305e-06, "loss": 0.3601, "step": 29095 }, { "epoch": 1.3352301408838512, "grad_norm": 0.4696026146411896, "learning_rate": 6.03320208411099e-06, "loss": 0.3645, "step": 29096 }, { "epoch": 1.3352760313891057, "grad_norm": 0.43401142954826355, "learning_rate": 6.032962186863325e-06, "loss": 0.2856, "step": 29097 }, { "epoch": 1.33532192189436, "grad_norm": 0.4843392074108124, "learning_rate": 6.032722287131712e-06, "loss": 0.4358, "step": 29098 }, { "epoch": 1.3353678123996144, "grad_norm": 0.5364860892295837, "learning_rate": 6.032482384916726e-06, "loss": 0.4417, "step": 29099 }, { "epoch": 1.335413702904869, "grad_norm": 0.4722587466239929, "learning_rate": 6.032242480218947e-06, "loss": 0.3723, "step": 29100 }, { "epoch": 1.3354595934101234, "grad_norm": 0.4423934519290924, "learning_rate": 6.03200257303895e-06, "loss": 0.2997, "step": 29101 }, { "epoch": 1.335505483915378, "grad_norm": 0.4923318028450012, "learning_rate": 6.031762663377312e-06, "loss": 0.4, "step": 29102 }, { "epoch": 1.3355513744206324, "grad_norm": 0.7345426082611084, "learning_rate": 6.031522751234612e-06, "loss": 0.4046, "step": 29103 }, { "epoch": 1.335597264925887, "grad_norm": 0.4803784191608429, "learning_rate": 6.031282836611425e-06, "loss": 0.3589, "step": 29104 }, { "epoch": 1.3356431554311414, "grad_norm": 0.4763603210449219, "learning_rate": 6.031042919508327e-06, "loss": 0.3919, "step": 29105 }, { "epoch": 1.3356890459363957, "grad_norm": 0.43661871552467346, "learning_rate": 6.030802999925898e-06, "loss": 0.333, "step": 29106 }, { "epoch": 1.3357349364416502, "grad_norm": 0.48853445053100586, "learning_rate": 6.0305630778647115e-06, "loss": 0.3877, "step": 29107 }, { "epoch": 1.3357808269469047, "grad_norm": 0.46471449732780457, "learning_rate": 6.030323153325348e-06, "loss": 0.3201, "step": 29108 }, { "epoch": 1.3358267174521592, "grad_norm": 0.47538265585899353, "learning_rate": 6.030083226308381e-06, "loss": 0.3635, "step": 29109 }, { "epoch": 1.3358726079574137, "grad_norm": 0.5639995336532593, "learning_rate": 6.02984329681439e-06, "loss": 0.4227, "step": 29110 }, { "epoch": 1.335918498462668, "grad_norm": 0.49205997586250305, "learning_rate": 6.029603364843949e-06, "loss": 0.3726, "step": 29111 }, { "epoch": 1.3359643889679225, "grad_norm": 0.4508155882358551, "learning_rate": 6.029363430397638e-06, "loss": 0.3507, "step": 29112 }, { "epoch": 1.336010279473177, "grad_norm": 0.4871511161327362, "learning_rate": 6.0291234934760325e-06, "loss": 0.338, "step": 29113 }, { "epoch": 1.3360561699784315, "grad_norm": 0.49588778614997864, "learning_rate": 6.028883554079711e-06, "loss": 0.3799, "step": 29114 }, { "epoch": 1.336102060483686, "grad_norm": 0.4879857301712036, "learning_rate": 6.028643612209248e-06, "loss": 0.3989, "step": 29115 }, { "epoch": 1.3361479509889405, "grad_norm": 0.46847161650657654, "learning_rate": 6.028403667865223e-06, "loss": 0.3592, "step": 29116 }, { "epoch": 1.336193841494195, "grad_norm": 0.4904146194458008, "learning_rate": 6.02816372104821e-06, "loss": 0.3626, "step": 29117 }, { "epoch": 1.3362397319994492, "grad_norm": 0.46270591020584106, "learning_rate": 6.027923771758788e-06, "loss": 0.3229, "step": 29118 }, { "epoch": 1.3362856225047037, "grad_norm": 0.486944317817688, "learning_rate": 6.027683819997535e-06, "loss": 0.3783, "step": 29119 }, { "epoch": 1.3363315130099582, "grad_norm": 0.6919149160385132, "learning_rate": 6.027443865765027e-06, "loss": 0.3677, "step": 29120 }, { "epoch": 1.3363774035152127, "grad_norm": 0.48998329043388367, "learning_rate": 6.0272039090618395e-06, "loss": 0.3782, "step": 29121 }, { "epoch": 1.3364232940204672, "grad_norm": 0.48060932755470276, "learning_rate": 6.026963949888552e-06, "loss": 0.3729, "step": 29122 }, { "epoch": 1.3364691845257215, "grad_norm": 0.5158085227012634, "learning_rate": 6.026723988245738e-06, "loss": 0.4264, "step": 29123 }, { "epoch": 1.336515075030976, "grad_norm": 0.46642041206359863, "learning_rate": 6.026484024133978e-06, "loss": 0.3654, "step": 29124 }, { "epoch": 1.3365609655362305, "grad_norm": 0.45447760820388794, "learning_rate": 6.0262440575538495e-06, "loss": 0.3594, "step": 29125 }, { "epoch": 1.336606856041485, "grad_norm": 0.4749845266342163, "learning_rate": 6.026004088505927e-06, "loss": 0.4262, "step": 29126 }, { "epoch": 1.3366527465467395, "grad_norm": 0.45667627453804016, "learning_rate": 6.025764116990788e-06, "loss": 0.3211, "step": 29127 }, { "epoch": 1.336698637051994, "grad_norm": 0.503494381904602, "learning_rate": 6.025524143009009e-06, "loss": 0.4426, "step": 29128 }, { "epoch": 1.3367445275572485, "grad_norm": 0.4467994272708893, "learning_rate": 6.0252841665611704e-06, "loss": 0.3122, "step": 29129 }, { "epoch": 1.336790418062503, "grad_norm": 0.5051474571228027, "learning_rate": 6.025044187647846e-06, "loss": 0.3807, "step": 29130 }, { "epoch": 1.3368363085677573, "grad_norm": 0.46390625834465027, "learning_rate": 6.0248042062696145e-06, "loss": 0.3113, "step": 29131 }, { "epoch": 1.3368821990730118, "grad_norm": 0.5040279626846313, "learning_rate": 6.024564222427052e-06, "loss": 0.4103, "step": 29132 }, { "epoch": 1.3369280895782663, "grad_norm": 0.42201948165893555, "learning_rate": 6.024324236120735e-06, "loss": 0.2983, "step": 29133 }, { "epoch": 1.3369739800835208, "grad_norm": 0.4583841562271118, "learning_rate": 6.0240842473512415e-06, "loss": 0.3565, "step": 29134 }, { "epoch": 1.3370198705887752, "grad_norm": 0.4819902181625366, "learning_rate": 6.023844256119149e-06, "loss": 0.3512, "step": 29135 }, { "epoch": 1.3370657610940295, "grad_norm": 0.47656092047691345, "learning_rate": 6.023604262425035e-06, "loss": 0.3525, "step": 29136 }, { "epoch": 1.337111651599284, "grad_norm": 0.5208480358123779, "learning_rate": 6.0233642662694755e-06, "loss": 0.4017, "step": 29137 }, { "epoch": 1.3371575421045385, "grad_norm": 0.4388142228126526, "learning_rate": 6.023124267653046e-06, "loss": 0.2908, "step": 29138 }, { "epoch": 1.337203432609793, "grad_norm": 0.43300411105155945, "learning_rate": 6.022884266576328e-06, "loss": 0.2763, "step": 29139 }, { "epoch": 1.3372493231150475, "grad_norm": 0.4286147654056549, "learning_rate": 6.022644263039895e-06, "loss": 0.3175, "step": 29140 }, { "epoch": 1.337295213620302, "grad_norm": 0.47516924142837524, "learning_rate": 6.022404257044327e-06, "loss": 0.3727, "step": 29141 }, { "epoch": 1.3373411041255565, "grad_norm": 0.4633125066757202, "learning_rate": 6.022164248590197e-06, "loss": 0.3797, "step": 29142 }, { "epoch": 1.337386994630811, "grad_norm": 0.4670012295246124, "learning_rate": 6.021924237678085e-06, "loss": 0.3713, "step": 29143 }, { "epoch": 1.3374328851360653, "grad_norm": 0.4662891924381256, "learning_rate": 6.021684224308568e-06, "loss": 0.3578, "step": 29144 }, { "epoch": 1.3374787756413198, "grad_norm": 0.43505269289016724, "learning_rate": 6.021444208482223e-06, "loss": 0.3147, "step": 29145 }, { "epoch": 1.3375246661465743, "grad_norm": 0.4830741584300995, "learning_rate": 6.021204190199627e-06, "loss": 0.3963, "step": 29146 }, { "epoch": 1.3375705566518288, "grad_norm": 0.4690886437892914, "learning_rate": 6.020964169461359e-06, "loss": 0.3139, "step": 29147 }, { "epoch": 1.3376164471570833, "grad_norm": 0.4609993100166321, "learning_rate": 6.0207241462679935e-06, "loss": 0.3112, "step": 29148 }, { "epoch": 1.3376623376623376, "grad_norm": 0.49577590823173523, "learning_rate": 6.020484120620107e-06, "loss": 0.3925, "step": 29149 }, { "epoch": 1.337708228167592, "grad_norm": 0.529504120349884, "learning_rate": 6.020244092518278e-06, "loss": 0.3966, "step": 29150 }, { "epoch": 1.3377541186728465, "grad_norm": 0.4904326796531677, "learning_rate": 6.020004061963086e-06, "loss": 0.384, "step": 29151 }, { "epoch": 1.337800009178101, "grad_norm": 0.48486241698265076, "learning_rate": 6.019764028955107e-06, "loss": 0.3826, "step": 29152 }, { "epoch": 1.3378458996833555, "grad_norm": 0.46342960000038147, "learning_rate": 6.019523993494914e-06, "loss": 0.313, "step": 29153 }, { "epoch": 1.33789179018861, "grad_norm": 0.44884222745895386, "learning_rate": 6.0192839555830895e-06, "loss": 0.3525, "step": 29154 }, { "epoch": 1.3379376806938645, "grad_norm": 0.43570676445961, "learning_rate": 6.019043915220209e-06, "loss": 0.3099, "step": 29155 }, { "epoch": 1.337983571199119, "grad_norm": 0.4415733218193054, "learning_rate": 6.018803872406847e-06, "loss": 0.3582, "step": 29156 }, { "epoch": 1.3380294617043733, "grad_norm": 0.46555572748184204, "learning_rate": 6.018563827143586e-06, "loss": 0.3564, "step": 29157 }, { "epoch": 1.3380753522096278, "grad_norm": 0.4606015086174011, "learning_rate": 6.018323779431001e-06, "loss": 0.3365, "step": 29158 }, { "epoch": 1.3381212427148823, "grad_norm": 0.49656912684440613, "learning_rate": 6.018083729269667e-06, "loss": 0.4424, "step": 29159 }, { "epoch": 1.3381671332201368, "grad_norm": 0.5200998187065125, "learning_rate": 6.017843676660163e-06, "loss": 0.452, "step": 29160 }, { "epoch": 1.338213023725391, "grad_norm": 0.4752213656902313, "learning_rate": 6.017603621603066e-06, "loss": 0.4025, "step": 29161 }, { "epoch": 1.3382589142306456, "grad_norm": 0.5262995958328247, "learning_rate": 6.017363564098954e-06, "loss": 0.5039, "step": 29162 }, { "epoch": 1.3383048047359, "grad_norm": 0.4242538511753082, "learning_rate": 6.017123504148405e-06, "loss": 0.281, "step": 29163 }, { "epoch": 1.3383506952411546, "grad_norm": 0.4679216146469116, "learning_rate": 6.016883441751994e-06, "loss": 0.3451, "step": 29164 }, { "epoch": 1.338396585746409, "grad_norm": 0.4858410358428955, "learning_rate": 6.0166433769102995e-06, "loss": 0.4013, "step": 29165 }, { "epoch": 1.3384424762516636, "grad_norm": 0.4601886570453644, "learning_rate": 6.016403309623897e-06, "loss": 0.3516, "step": 29166 }, { "epoch": 1.338488366756918, "grad_norm": 0.5062465071678162, "learning_rate": 6.016163239893366e-06, "loss": 0.3948, "step": 29167 }, { "epoch": 1.3385342572621726, "grad_norm": 0.47910308837890625, "learning_rate": 6.0159231677192844e-06, "loss": 0.3026, "step": 29168 }, { "epoch": 1.3385801477674268, "grad_norm": 0.4425312578678131, "learning_rate": 6.015683093102227e-06, "loss": 0.2365, "step": 29169 }, { "epoch": 1.3386260382726813, "grad_norm": 0.3974357545375824, "learning_rate": 6.015443016042772e-06, "loss": 0.2507, "step": 29170 }, { "epoch": 1.3386719287779358, "grad_norm": 0.42777663469314575, "learning_rate": 6.015202936541499e-06, "loss": 0.2879, "step": 29171 }, { "epoch": 1.3387178192831903, "grad_norm": 0.49514320492744446, "learning_rate": 6.014962854598981e-06, "loss": 0.3383, "step": 29172 }, { "epoch": 1.3387637097884448, "grad_norm": 0.4531586468219757, "learning_rate": 6.0147227702158e-06, "loss": 0.3209, "step": 29173 }, { "epoch": 1.338809600293699, "grad_norm": 0.48989927768707275, "learning_rate": 6.014482683392529e-06, "loss": 0.458, "step": 29174 }, { "epoch": 1.3388554907989536, "grad_norm": 0.4495933949947357, "learning_rate": 6.014242594129748e-06, "loss": 0.3322, "step": 29175 }, { "epoch": 1.338901381304208, "grad_norm": 0.44451335072517395, "learning_rate": 6.014002502428033e-06, "loss": 0.3086, "step": 29176 }, { "epoch": 1.3389472718094626, "grad_norm": 0.5433889627456665, "learning_rate": 6.013762408287963e-06, "loss": 0.3257, "step": 29177 }, { "epoch": 1.338993162314717, "grad_norm": 0.4271731674671173, "learning_rate": 6.013522311710114e-06, "loss": 0.3107, "step": 29178 }, { "epoch": 1.3390390528199716, "grad_norm": 0.44732773303985596, "learning_rate": 6.013282212695064e-06, "loss": 0.3194, "step": 29179 }, { "epoch": 1.339084943325226, "grad_norm": 0.4863176941871643, "learning_rate": 6.0130421112433905e-06, "loss": 0.3817, "step": 29180 }, { "epoch": 1.3391308338304806, "grad_norm": 0.4189088046550751, "learning_rate": 6.01280200735567e-06, "loss": 0.2958, "step": 29181 }, { "epoch": 1.3391767243357349, "grad_norm": 0.45848438143730164, "learning_rate": 6.01256190103248e-06, "loss": 0.3663, "step": 29182 }, { "epoch": 1.3392226148409894, "grad_norm": 0.461305171251297, "learning_rate": 6.0123217922743975e-06, "loss": 0.3468, "step": 29183 }, { "epoch": 1.3392685053462439, "grad_norm": 0.4286729395389557, "learning_rate": 6.012081681082002e-06, "loss": 0.3124, "step": 29184 }, { "epoch": 1.3393143958514984, "grad_norm": 0.46640273928642273, "learning_rate": 6.011841567455869e-06, "loss": 0.3347, "step": 29185 }, { "epoch": 1.3393602863567529, "grad_norm": 0.45960262417793274, "learning_rate": 6.011601451396576e-06, "loss": 0.3709, "step": 29186 }, { "epoch": 1.3394061768620071, "grad_norm": 0.5025110840797424, "learning_rate": 6.0113613329046995e-06, "loss": 0.4115, "step": 29187 }, { "epoch": 1.3394520673672616, "grad_norm": 0.43345943093299866, "learning_rate": 6.011121211980821e-06, "loss": 0.3528, "step": 29188 }, { "epoch": 1.3394979578725161, "grad_norm": 0.47966325283050537, "learning_rate": 6.010881088625513e-06, "loss": 0.3672, "step": 29189 }, { "epoch": 1.3395438483777706, "grad_norm": 0.4822651445865631, "learning_rate": 6.010640962839356e-06, "loss": 0.4286, "step": 29190 }, { "epoch": 1.3395897388830251, "grad_norm": 0.46695074439048767, "learning_rate": 6.010400834622927e-06, "loss": 0.3477, "step": 29191 }, { "epoch": 1.3396356293882796, "grad_norm": 0.4966851770877838, "learning_rate": 6.010160703976802e-06, "loss": 0.3702, "step": 29192 }, { "epoch": 1.3396815198935341, "grad_norm": 0.511989951133728, "learning_rate": 6.009920570901559e-06, "loss": 0.4057, "step": 29193 }, { "epoch": 1.3397274103987886, "grad_norm": 0.4585323631763458, "learning_rate": 6.0096804353977765e-06, "loss": 0.3958, "step": 29194 }, { "epoch": 1.339773300904043, "grad_norm": 0.43832558393478394, "learning_rate": 6.009440297466031e-06, "loss": 0.3008, "step": 29195 }, { "epoch": 1.3398191914092974, "grad_norm": 0.48879435658454895, "learning_rate": 6.009200157106901e-06, "loss": 0.3257, "step": 29196 }, { "epoch": 1.339865081914552, "grad_norm": 0.4715830385684967, "learning_rate": 6.0089600143209615e-06, "loss": 0.3845, "step": 29197 }, { "epoch": 1.3399109724198064, "grad_norm": 0.4752826988697052, "learning_rate": 6.008719869108793e-06, "loss": 0.3562, "step": 29198 }, { "epoch": 1.3399568629250609, "grad_norm": 0.4479648768901825, "learning_rate": 6.00847972147097e-06, "loss": 0.3128, "step": 29199 }, { "epoch": 1.3400027534303152, "grad_norm": 0.514366090297699, "learning_rate": 6.0082395714080725e-06, "loss": 0.444, "step": 29200 }, { "epoch": 1.3400486439355697, "grad_norm": 0.47742584347724915, "learning_rate": 6.007999418920678e-06, "loss": 0.3997, "step": 29201 }, { "epoch": 1.3400945344408242, "grad_norm": 0.47242453694343567, "learning_rate": 6.007759264009362e-06, "loss": 0.4068, "step": 29202 }, { "epoch": 1.3401404249460787, "grad_norm": 0.47234049439430237, "learning_rate": 6.0075191066747025e-06, "loss": 0.3423, "step": 29203 }, { "epoch": 1.3401863154513332, "grad_norm": 0.5121144652366638, "learning_rate": 6.007278946917278e-06, "loss": 0.4467, "step": 29204 }, { "epoch": 1.3402322059565877, "grad_norm": 0.45934152603149414, "learning_rate": 6.007038784737665e-06, "loss": 0.3231, "step": 29205 }, { "epoch": 1.3402780964618421, "grad_norm": 0.4620693027973175, "learning_rate": 6.006798620136444e-06, "loss": 0.3653, "step": 29206 }, { "epoch": 1.3403239869670964, "grad_norm": 0.44651469588279724, "learning_rate": 6.006558453114187e-06, "loss": 0.3069, "step": 29207 }, { "epoch": 1.340369877472351, "grad_norm": 0.48637107014656067, "learning_rate": 6.006318283671478e-06, "loss": 0.3881, "step": 29208 }, { "epoch": 1.3404157679776054, "grad_norm": 0.4704943895339966, "learning_rate": 6.0060781118088875e-06, "loss": 0.3223, "step": 29209 }, { "epoch": 1.34046165848286, "grad_norm": 0.4406324326992035, "learning_rate": 6.005837937526997e-06, "loss": 0.308, "step": 29210 }, { "epoch": 1.3405075489881144, "grad_norm": 0.4804038405418396, "learning_rate": 6.005597760826386e-06, "loss": 0.4133, "step": 29211 }, { "epoch": 1.3405534394933687, "grad_norm": 0.45972132682800293, "learning_rate": 6.0053575817076294e-06, "loss": 0.3278, "step": 29212 }, { "epoch": 1.3405993299986232, "grad_norm": 0.46383020281791687, "learning_rate": 6.005117400171304e-06, "loss": 0.3179, "step": 29213 }, { "epoch": 1.3406452205038777, "grad_norm": 0.5002734065055847, "learning_rate": 6.004877216217989e-06, "loss": 0.3718, "step": 29214 }, { "epoch": 1.3406911110091322, "grad_norm": 0.42621147632598877, "learning_rate": 6.004637029848261e-06, "loss": 0.295, "step": 29215 }, { "epoch": 1.3407370015143867, "grad_norm": 0.4734596908092499, "learning_rate": 6.004396841062699e-06, "loss": 0.3351, "step": 29216 }, { "epoch": 1.3407828920196412, "grad_norm": 0.4835848808288574, "learning_rate": 6.004156649861879e-06, "loss": 0.3252, "step": 29217 }, { "epoch": 1.3408287825248957, "grad_norm": 0.4600129723548889, "learning_rate": 6.003916456246378e-06, "loss": 0.3439, "step": 29218 }, { "epoch": 1.3408746730301502, "grad_norm": 0.5287603139877319, "learning_rate": 6.0036762602167766e-06, "loss": 0.4619, "step": 29219 }, { "epoch": 1.3409205635354045, "grad_norm": 0.45106521248817444, "learning_rate": 6.00343606177365e-06, "loss": 0.3235, "step": 29220 }, { "epoch": 1.340966454040659, "grad_norm": 0.4701556861400604, "learning_rate": 6.003195860917576e-06, "loss": 0.3741, "step": 29221 }, { "epoch": 1.3410123445459134, "grad_norm": 0.5195840001106262, "learning_rate": 6.0029556576491325e-06, "loss": 0.3878, "step": 29222 }, { "epoch": 1.341058235051168, "grad_norm": 0.4832955598831177, "learning_rate": 6.002715451968898e-06, "loss": 0.3408, "step": 29223 }, { "epoch": 1.3411041255564224, "grad_norm": 0.6319329738616943, "learning_rate": 6.002475243877448e-06, "loss": 0.371, "step": 29224 }, { "epoch": 1.3411500160616767, "grad_norm": 0.47997426986694336, "learning_rate": 6.002235033375363e-06, "loss": 0.3273, "step": 29225 }, { "epoch": 1.3411959065669312, "grad_norm": 0.46783602237701416, "learning_rate": 6.0019948204632175e-06, "loss": 0.3058, "step": 29226 }, { "epoch": 1.3412417970721857, "grad_norm": 0.46668142080307007, "learning_rate": 6.001754605141591e-06, "loss": 0.3735, "step": 29227 }, { "epoch": 1.3412876875774402, "grad_norm": 0.4915667474269867, "learning_rate": 6.001514387411062e-06, "loss": 0.4122, "step": 29228 }, { "epoch": 1.3413335780826947, "grad_norm": 0.46938446164131165, "learning_rate": 6.001274167272206e-06, "loss": 0.3416, "step": 29229 }, { "epoch": 1.3413794685879492, "grad_norm": 0.45874449610710144, "learning_rate": 6.001033944725602e-06, "loss": 0.3811, "step": 29230 }, { "epoch": 1.3414253590932037, "grad_norm": 0.4958639144897461, "learning_rate": 6.0007937197718266e-06, "loss": 0.4073, "step": 29231 }, { "epoch": 1.3414712495984582, "grad_norm": 0.4950929284095764, "learning_rate": 6.00055349241146e-06, "loss": 0.3752, "step": 29232 }, { "epoch": 1.3415171401037125, "grad_norm": 0.5208605527877808, "learning_rate": 6.000313262645077e-06, "loss": 0.4061, "step": 29233 }, { "epoch": 1.341563030608967, "grad_norm": 0.4799065887928009, "learning_rate": 6.000073030473256e-06, "loss": 0.3618, "step": 29234 }, { "epoch": 1.3416089211142215, "grad_norm": 0.47568821907043457, "learning_rate": 5.999832795896574e-06, "loss": 0.3414, "step": 29235 }, { "epoch": 1.341654811619476, "grad_norm": 0.473205029964447, "learning_rate": 5.9995925589156126e-06, "loss": 0.3282, "step": 29236 }, { "epoch": 1.3417007021247305, "grad_norm": 0.45543763041496277, "learning_rate": 5.999352319530943e-06, "loss": 0.3263, "step": 29237 }, { "epoch": 1.3417465926299847, "grad_norm": 0.491754949092865, "learning_rate": 5.999112077743148e-06, "loss": 0.4378, "step": 29238 }, { "epoch": 1.3417924831352392, "grad_norm": 0.5080037713050842, "learning_rate": 5.998871833552806e-06, "loss": 0.4605, "step": 29239 }, { "epoch": 1.3418383736404937, "grad_norm": 0.49499720335006714, "learning_rate": 5.998631586960489e-06, "loss": 0.4204, "step": 29240 }, { "epoch": 1.3418842641457482, "grad_norm": 0.4667193591594696, "learning_rate": 5.99839133796678e-06, "loss": 0.3662, "step": 29241 }, { "epoch": 1.3419301546510027, "grad_norm": 0.47726163268089294, "learning_rate": 5.998151086572255e-06, "loss": 0.4058, "step": 29242 }, { "epoch": 1.3419760451562572, "grad_norm": 0.45335137844085693, "learning_rate": 5.997910832777491e-06, "loss": 0.3492, "step": 29243 }, { "epoch": 1.3420219356615117, "grad_norm": 0.49384984374046326, "learning_rate": 5.997670576583066e-06, "loss": 0.3954, "step": 29244 }, { "epoch": 1.3420678261667662, "grad_norm": 0.4665132462978363, "learning_rate": 5.997430317989561e-06, "loss": 0.3708, "step": 29245 }, { "epoch": 1.3421137166720205, "grad_norm": 0.4168911278247833, "learning_rate": 5.997190056997548e-06, "loss": 0.289, "step": 29246 }, { "epoch": 1.342159607177275, "grad_norm": 0.5059893131256104, "learning_rate": 5.996949793607608e-06, "loss": 0.4336, "step": 29247 }, { "epoch": 1.3422054976825295, "grad_norm": 0.4751115143299103, "learning_rate": 5.996709527820319e-06, "loss": 0.3896, "step": 29248 }, { "epoch": 1.342251388187784, "grad_norm": 0.4761570990085602, "learning_rate": 5.996469259636257e-06, "loss": 0.3447, "step": 29249 }, { "epoch": 1.3422972786930383, "grad_norm": 0.46875420212745667, "learning_rate": 5.996228989056004e-06, "loss": 0.3623, "step": 29250 }, { "epoch": 1.3423431691982928, "grad_norm": 0.4587228298187256, "learning_rate": 5.995988716080131e-06, "loss": 0.3514, "step": 29251 }, { "epoch": 1.3423890597035473, "grad_norm": 0.5551150441169739, "learning_rate": 5.995748440709221e-06, "loss": 0.3841, "step": 29252 }, { "epoch": 1.3424349502088018, "grad_norm": 0.45440763235092163, "learning_rate": 5.995508162943851e-06, "loss": 0.3104, "step": 29253 }, { "epoch": 1.3424808407140563, "grad_norm": 0.441155344247818, "learning_rate": 5.995267882784597e-06, "loss": 0.3069, "step": 29254 }, { "epoch": 1.3425267312193108, "grad_norm": 0.4764816462993622, "learning_rate": 5.995027600232038e-06, "loss": 0.3589, "step": 29255 }, { "epoch": 1.3425726217245653, "grad_norm": 0.47266799211502075, "learning_rate": 5.994787315286753e-06, "loss": 0.3876, "step": 29256 }, { "epoch": 1.3426185122298198, "grad_norm": 0.46778780221939087, "learning_rate": 5.994547027949317e-06, "loss": 0.4038, "step": 29257 }, { "epoch": 1.342664402735074, "grad_norm": 0.4520222842693329, "learning_rate": 5.994306738220308e-06, "loss": 0.3226, "step": 29258 }, { "epoch": 1.3427102932403285, "grad_norm": 0.4519561231136322, "learning_rate": 5.9940664461003086e-06, "loss": 0.3265, "step": 29259 }, { "epoch": 1.342756183745583, "grad_norm": 0.46091336011886597, "learning_rate": 5.99382615158989e-06, "loss": 0.3535, "step": 29260 }, { "epoch": 1.3428020742508375, "grad_norm": 0.4783730208873749, "learning_rate": 5.993585854689637e-06, "loss": 0.3242, "step": 29261 }, { "epoch": 1.342847964756092, "grad_norm": 0.48416611552238464, "learning_rate": 5.99334555540012e-06, "loss": 0.3499, "step": 29262 }, { "epoch": 1.3428938552613463, "grad_norm": 0.4815567135810852, "learning_rate": 5.99310525372192e-06, "loss": 0.3739, "step": 29263 }, { "epoch": 1.3429397457666008, "grad_norm": 0.4653523862361908, "learning_rate": 5.9928649496556175e-06, "loss": 0.372, "step": 29264 }, { "epoch": 1.3429856362718553, "grad_norm": 0.45613953471183777, "learning_rate": 5.9926246432017875e-06, "loss": 0.3495, "step": 29265 }, { "epoch": 1.3430315267771098, "grad_norm": 0.49128955602645874, "learning_rate": 5.9923843343610085e-06, "loss": 0.3955, "step": 29266 }, { "epoch": 1.3430774172823643, "grad_norm": 0.44284576177597046, "learning_rate": 5.992144023133859e-06, "loss": 0.3216, "step": 29267 }, { "epoch": 1.3431233077876188, "grad_norm": 0.496848464012146, "learning_rate": 5.991903709520915e-06, "loss": 0.3998, "step": 29268 }, { "epoch": 1.3431691982928733, "grad_norm": 0.4777551293373108, "learning_rate": 5.991663393522755e-06, "loss": 0.3358, "step": 29269 }, { "epoch": 1.3432150887981278, "grad_norm": 0.5262637138366699, "learning_rate": 5.991423075139959e-06, "loss": 0.4132, "step": 29270 }, { "epoch": 1.343260979303382, "grad_norm": 0.48929035663604736, "learning_rate": 5.991182754373104e-06, "loss": 0.3423, "step": 29271 }, { "epoch": 1.3433068698086366, "grad_norm": 0.4848423898220062, "learning_rate": 5.990942431222766e-06, "loss": 0.3328, "step": 29272 }, { "epoch": 1.343352760313891, "grad_norm": 0.4752117991447449, "learning_rate": 5.990702105689525e-06, "loss": 0.412, "step": 29273 }, { "epoch": 1.3433986508191456, "grad_norm": 0.4373140335083008, "learning_rate": 5.990461777773958e-06, "loss": 0.3372, "step": 29274 }, { "epoch": 1.3434445413244, "grad_norm": 0.46440523862838745, "learning_rate": 5.990221447476642e-06, "loss": 0.359, "step": 29275 }, { "epoch": 1.3434904318296543, "grad_norm": 0.46621769666671753, "learning_rate": 5.989981114798157e-06, "loss": 0.4351, "step": 29276 }, { "epoch": 1.3435363223349088, "grad_norm": 0.48791220784187317, "learning_rate": 5.9897407797390805e-06, "loss": 0.3422, "step": 29277 }, { "epoch": 1.3435822128401633, "grad_norm": 0.4702817499637604, "learning_rate": 5.989500442299989e-06, "loss": 0.3847, "step": 29278 }, { "epoch": 1.3436281033454178, "grad_norm": 0.4191371500492096, "learning_rate": 5.989260102481461e-06, "loss": 0.2699, "step": 29279 }, { "epoch": 1.3436739938506723, "grad_norm": 0.4672558307647705, "learning_rate": 5.989019760284074e-06, "loss": 0.3237, "step": 29280 }, { "epoch": 1.3437198843559268, "grad_norm": 0.4731612503528595, "learning_rate": 5.988779415708408e-06, "loss": 0.3365, "step": 29281 }, { "epoch": 1.3437657748611813, "grad_norm": 0.4600823223590851, "learning_rate": 5.988539068755039e-06, "loss": 0.3741, "step": 29282 }, { "epoch": 1.3438116653664358, "grad_norm": 0.4978110194206238, "learning_rate": 5.988298719424546e-06, "loss": 0.4088, "step": 29283 }, { "epoch": 1.34385755587169, "grad_norm": 0.4753190875053406, "learning_rate": 5.988058367717506e-06, "loss": 0.3013, "step": 29284 }, { "epoch": 1.3439034463769446, "grad_norm": 0.46011069416999817, "learning_rate": 5.987818013634497e-06, "loss": 0.3252, "step": 29285 }, { "epoch": 1.343949336882199, "grad_norm": 0.4473492205142975, "learning_rate": 5.987577657176097e-06, "loss": 0.3192, "step": 29286 }, { "epoch": 1.3439952273874536, "grad_norm": 0.4538627862930298, "learning_rate": 5.9873372983428855e-06, "loss": 0.3417, "step": 29287 }, { "epoch": 1.344041117892708, "grad_norm": 0.5157557725906372, "learning_rate": 5.987096937135441e-06, "loss": 0.4165, "step": 29288 }, { "epoch": 1.3440870083979624, "grad_norm": 0.5188693404197693, "learning_rate": 5.9868565735543375e-06, "loss": 0.4651, "step": 29289 }, { "epoch": 1.3441328989032169, "grad_norm": 0.4536497890949249, "learning_rate": 5.9866162076001555e-06, "loss": 0.3454, "step": 29290 }, { "epoch": 1.3441787894084714, "grad_norm": 0.46054399013519287, "learning_rate": 5.986375839273474e-06, "loss": 0.3482, "step": 29291 }, { "epoch": 1.3442246799137259, "grad_norm": 0.4543088674545288, "learning_rate": 5.986135468574868e-06, "loss": 0.3568, "step": 29292 }, { "epoch": 1.3442705704189803, "grad_norm": 0.4450756907463074, "learning_rate": 5.98589509550492e-06, "loss": 0.3319, "step": 29293 }, { "epoch": 1.3443164609242348, "grad_norm": 0.4665926992893219, "learning_rate": 5.9856547200642045e-06, "loss": 0.3656, "step": 29294 }, { "epoch": 1.3443623514294893, "grad_norm": 0.47160178422927856, "learning_rate": 5.985414342253301e-06, "loss": 0.345, "step": 29295 }, { "epoch": 1.3444082419347436, "grad_norm": 0.47906485199928284, "learning_rate": 5.985173962072786e-06, "loss": 0.412, "step": 29296 }, { "epoch": 1.3444541324399981, "grad_norm": 0.49599015712738037, "learning_rate": 5.984933579523238e-06, "loss": 0.402, "step": 29297 }, { "epoch": 1.3445000229452526, "grad_norm": 0.43745845556259155, "learning_rate": 5.9846931946052365e-06, "loss": 0.3526, "step": 29298 }, { "epoch": 1.3445459134505071, "grad_norm": 0.4768558442592621, "learning_rate": 5.984452807319359e-06, "loss": 0.3544, "step": 29299 }, { "epoch": 1.3445918039557616, "grad_norm": 0.4561288058757782, "learning_rate": 5.984212417666182e-06, "loss": 0.3838, "step": 29300 }, { "epoch": 1.3446376944610159, "grad_norm": 0.4625436067581177, "learning_rate": 5.983972025646286e-06, "loss": 0.377, "step": 29301 }, { "epoch": 1.3446835849662704, "grad_norm": 0.5314272046089172, "learning_rate": 5.983731631260247e-06, "loss": 0.3961, "step": 29302 }, { "epoch": 1.3447294754715249, "grad_norm": 0.47937533259391785, "learning_rate": 5.983491234508644e-06, "loss": 0.3598, "step": 29303 }, { "epoch": 1.3447753659767794, "grad_norm": 0.5026845335960388, "learning_rate": 5.9832508353920556e-06, "loss": 0.4013, "step": 29304 }, { "epoch": 1.3448212564820339, "grad_norm": 0.48508208990097046, "learning_rate": 5.9830104339110595e-06, "loss": 0.3907, "step": 29305 }, { "epoch": 1.3448671469872884, "grad_norm": 0.45594292879104614, "learning_rate": 5.982770030066232e-06, "loss": 0.35, "step": 29306 }, { "epoch": 1.3449130374925429, "grad_norm": 0.4764085114002228, "learning_rate": 5.982529623858154e-06, "loss": 0.3926, "step": 29307 }, { "epoch": 1.3449589279977974, "grad_norm": 0.43037670850753784, "learning_rate": 5.982289215287402e-06, "loss": 0.2791, "step": 29308 }, { "epoch": 1.3450048185030516, "grad_norm": 0.47275522351264954, "learning_rate": 5.9820488043545545e-06, "loss": 0.3858, "step": 29309 }, { "epoch": 1.3450507090083061, "grad_norm": 0.44493111968040466, "learning_rate": 5.981808391060191e-06, "loss": 0.3403, "step": 29310 }, { "epoch": 1.3450965995135606, "grad_norm": 0.4585282802581787, "learning_rate": 5.981567975404885e-06, "loss": 0.3438, "step": 29311 }, { "epoch": 1.3451424900188151, "grad_norm": 0.43969306349754333, "learning_rate": 5.981327557389221e-06, "loss": 0.3205, "step": 29312 }, { "epoch": 1.3451883805240696, "grad_norm": 0.5262542963027954, "learning_rate": 5.9810871370137715e-06, "loss": 0.4454, "step": 29313 }, { "epoch": 1.345234271029324, "grad_norm": 0.435591459274292, "learning_rate": 5.980846714279118e-06, "loss": 0.3106, "step": 29314 }, { "epoch": 1.3452801615345784, "grad_norm": 0.45104286074638367, "learning_rate": 5.9806062891858376e-06, "loss": 0.3516, "step": 29315 }, { "epoch": 1.345326052039833, "grad_norm": 0.5893418788909912, "learning_rate": 5.98036586173451e-06, "loss": 0.4378, "step": 29316 }, { "epoch": 1.3453719425450874, "grad_norm": 0.45053625106811523, "learning_rate": 5.98012543192571e-06, "loss": 0.3369, "step": 29317 }, { "epoch": 1.345417833050342, "grad_norm": 0.4683852791786194, "learning_rate": 5.9798849997600185e-06, "loss": 0.3786, "step": 29318 }, { "epoch": 1.3454637235555964, "grad_norm": 0.4448968470096588, "learning_rate": 5.979644565238012e-06, "loss": 0.3393, "step": 29319 }, { "epoch": 1.345509614060851, "grad_norm": 0.4790360629558563, "learning_rate": 5.979404128360271e-06, "loss": 0.4087, "step": 29320 }, { "epoch": 1.3455555045661054, "grad_norm": 0.45943203568458557, "learning_rate": 5.979163689127372e-06, "loss": 0.3557, "step": 29321 }, { "epoch": 1.3456013950713597, "grad_norm": 0.4400697648525238, "learning_rate": 5.978923247539892e-06, "loss": 0.3112, "step": 29322 }, { "epoch": 1.3456472855766142, "grad_norm": 0.45252957940101624, "learning_rate": 5.97868280359841e-06, "loss": 0.3261, "step": 29323 }, { "epoch": 1.3456931760818687, "grad_norm": 0.4798428416252136, "learning_rate": 5.978442357303507e-06, "loss": 0.4099, "step": 29324 }, { "epoch": 1.3457390665871232, "grad_norm": 0.4590834379196167, "learning_rate": 5.978201908655757e-06, "loss": 0.3107, "step": 29325 }, { "epoch": 1.3457849570923777, "grad_norm": 0.4403301477432251, "learning_rate": 5.977961457655742e-06, "loss": 0.3357, "step": 29326 }, { "epoch": 1.345830847597632, "grad_norm": 0.46569088101387024, "learning_rate": 5.977721004304038e-06, "loss": 0.343, "step": 29327 }, { "epoch": 1.3458767381028864, "grad_norm": 0.4549632668495178, "learning_rate": 5.977480548601222e-06, "loss": 0.3545, "step": 29328 }, { "epoch": 1.345922628608141, "grad_norm": 0.4642695486545563, "learning_rate": 5.977240090547876e-06, "loss": 0.3385, "step": 29329 }, { "epoch": 1.3459685191133954, "grad_norm": 0.49309054017066956, "learning_rate": 5.976999630144574e-06, "loss": 0.3843, "step": 29330 }, { "epoch": 1.34601440961865, "grad_norm": 0.44496285915374756, "learning_rate": 5.9767591673918975e-06, "loss": 0.3335, "step": 29331 }, { "epoch": 1.3460603001239044, "grad_norm": 0.4439498484134674, "learning_rate": 5.976518702290424e-06, "loss": 0.3199, "step": 29332 }, { "epoch": 1.346106190629159, "grad_norm": 0.4783628284931183, "learning_rate": 5.976278234840729e-06, "loss": 0.359, "step": 29333 }, { "epoch": 1.3461520811344134, "grad_norm": 0.4711025059223175, "learning_rate": 5.976037765043394e-06, "loss": 0.3652, "step": 29334 }, { "epoch": 1.3461979716396677, "grad_norm": 0.441425621509552, "learning_rate": 5.975797292898997e-06, "loss": 0.3146, "step": 29335 }, { "epoch": 1.3462438621449222, "grad_norm": 0.47010689973831177, "learning_rate": 5.975556818408115e-06, "loss": 0.3297, "step": 29336 }, { "epoch": 1.3462897526501767, "grad_norm": 0.4925490617752075, "learning_rate": 5.975316341571326e-06, "loss": 0.3939, "step": 29337 }, { "epoch": 1.3463356431554312, "grad_norm": 0.5271863341331482, "learning_rate": 5.9750758623892105e-06, "loss": 0.3532, "step": 29338 }, { "epoch": 1.3463815336606855, "grad_norm": 0.5285645723342896, "learning_rate": 5.974835380862344e-06, "loss": 0.4372, "step": 29339 }, { "epoch": 1.34642742416594, "grad_norm": 0.5414443612098694, "learning_rate": 5.974594896991306e-06, "loss": 0.4078, "step": 29340 }, { "epoch": 1.3464733146711945, "grad_norm": 0.4740733206272125, "learning_rate": 5.974354410776676e-06, "loss": 0.3625, "step": 29341 }, { "epoch": 1.346519205176449, "grad_norm": 0.48613595962524414, "learning_rate": 5.974113922219031e-06, "loss": 0.4014, "step": 29342 }, { "epoch": 1.3465650956817035, "grad_norm": 0.4630837142467499, "learning_rate": 5.97387343131895e-06, "loss": 0.3284, "step": 29343 }, { "epoch": 1.346610986186958, "grad_norm": 0.4600975215435028, "learning_rate": 5.9736329380770085e-06, "loss": 0.3571, "step": 29344 }, { "epoch": 1.3466568766922125, "grad_norm": 0.42264437675476074, "learning_rate": 5.973392442493789e-06, "loss": 0.2779, "step": 29345 }, { "epoch": 1.346702767197467, "grad_norm": 0.44337841868400574, "learning_rate": 5.973151944569866e-06, "loss": 0.3246, "step": 29346 }, { "epoch": 1.3467486577027212, "grad_norm": 0.5059216022491455, "learning_rate": 5.972911444305821e-06, "loss": 0.4, "step": 29347 }, { "epoch": 1.3467945482079757, "grad_norm": 0.4399208426475525, "learning_rate": 5.972670941702231e-06, "loss": 0.3285, "step": 29348 }, { "epoch": 1.3468404387132302, "grad_norm": 0.4736819565296173, "learning_rate": 5.9724304367596755e-06, "loss": 0.3099, "step": 29349 }, { "epoch": 1.3468863292184847, "grad_norm": 0.4642826318740845, "learning_rate": 5.97218992947873e-06, "loss": 0.3158, "step": 29350 }, { "epoch": 1.3469322197237392, "grad_norm": 0.464168518781662, "learning_rate": 5.971949419859975e-06, "loss": 0.3976, "step": 29351 }, { "epoch": 1.3469781102289935, "grad_norm": 0.46343088150024414, "learning_rate": 5.9717089079039885e-06, "loss": 0.3739, "step": 29352 }, { "epoch": 1.347024000734248, "grad_norm": 0.5674736499786377, "learning_rate": 5.971468393611349e-06, "loss": 0.3085, "step": 29353 }, { "epoch": 1.3470698912395025, "grad_norm": 0.5191990733146667, "learning_rate": 5.971227876982634e-06, "loss": 0.4908, "step": 29354 }, { "epoch": 1.347115781744757, "grad_norm": 0.4946841299533844, "learning_rate": 5.970987358018423e-06, "loss": 0.3823, "step": 29355 }, { "epoch": 1.3471616722500115, "grad_norm": 0.4473111927509308, "learning_rate": 5.970746836719293e-06, "loss": 0.3305, "step": 29356 }, { "epoch": 1.347207562755266, "grad_norm": 0.47448456287384033, "learning_rate": 5.970506313085824e-06, "loss": 0.3733, "step": 29357 }, { "epoch": 1.3472534532605205, "grad_norm": 0.4469602108001709, "learning_rate": 5.970265787118595e-06, "loss": 0.3276, "step": 29358 }, { "epoch": 1.347299343765775, "grad_norm": 0.5028883814811707, "learning_rate": 5.970025258818181e-06, "loss": 0.4114, "step": 29359 }, { "epoch": 1.3473452342710293, "grad_norm": 0.47064855694770813, "learning_rate": 5.969784728185164e-06, "loss": 0.315, "step": 29360 }, { "epoch": 1.3473911247762838, "grad_norm": 0.5612934231758118, "learning_rate": 5.969544195220118e-06, "loss": 0.3678, "step": 29361 }, { "epoch": 1.3474370152815383, "grad_norm": 0.6244404911994934, "learning_rate": 5.969303659923626e-06, "loss": 0.3793, "step": 29362 }, { "epoch": 1.3474829057867928, "grad_norm": 0.4776374101638794, "learning_rate": 5.969063122296265e-06, "loss": 0.3273, "step": 29363 }, { "epoch": 1.3475287962920472, "grad_norm": 0.5199674367904663, "learning_rate": 5.968822582338614e-06, "loss": 0.3704, "step": 29364 }, { "epoch": 1.3475746867973015, "grad_norm": 0.45372694730758667, "learning_rate": 5.968582040051247e-06, "loss": 0.3394, "step": 29365 }, { "epoch": 1.347620577302556, "grad_norm": 0.6402072906494141, "learning_rate": 5.968341495434749e-06, "loss": 0.4561, "step": 29366 }, { "epoch": 1.3476664678078105, "grad_norm": 0.5149707794189453, "learning_rate": 5.9681009484896944e-06, "loss": 0.4432, "step": 29367 }, { "epoch": 1.347712358313065, "grad_norm": 0.5099077224731445, "learning_rate": 5.967860399216662e-06, "loss": 0.4173, "step": 29368 }, { "epoch": 1.3477582488183195, "grad_norm": 0.46264445781707764, "learning_rate": 5.967619847616232e-06, "loss": 0.3654, "step": 29369 }, { "epoch": 1.347804139323574, "grad_norm": 0.4301794469356537, "learning_rate": 5.967379293688981e-06, "loss": 0.2864, "step": 29370 }, { "epoch": 1.3478500298288285, "grad_norm": 0.4316639006137848, "learning_rate": 5.967138737435487e-06, "loss": 0.2683, "step": 29371 }, { "epoch": 1.347895920334083, "grad_norm": 0.49088308215141296, "learning_rate": 5.966898178856332e-06, "loss": 0.3861, "step": 29372 }, { "epoch": 1.3479418108393373, "grad_norm": 0.4735565483570099, "learning_rate": 5.9666576179520895e-06, "loss": 0.3481, "step": 29373 }, { "epoch": 1.3479877013445918, "grad_norm": 0.4919901490211487, "learning_rate": 5.966417054723343e-06, "loss": 0.3967, "step": 29374 }, { "epoch": 1.3480335918498463, "grad_norm": 0.45784762501716614, "learning_rate": 5.9661764891706664e-06, "loss": 0.3617, "step": 29375 }, { "epoch": 1.3480794823551008, "grad_norm": 0.46149709820747375, "learning_rate": 5.965935921294641e-06, "loss": 0.3855, "step": 29376 }, { "epoch": 1.3481253728603553, "grad_norm": 0.4719356298446655, "learning_rate": 5.965695351095845e-06, "loss": 0.3653, "step": 29377 }, { "epoch": 1.3481712633656096, "grad_norm": 0.48319998383522034, "learning_rate": 5.965454778574855e-06, "loss": 0.3695, "step": 29378 }, { "epoch": 1.348217153870864, "grad_norm": 0.4846431314945221, "learning_rate": 5.965214203732252e-06, "loss": 0.3842, "step": 29379 }, { "epoch": 1.3482630443761185, "grad_norm": 0.5122337341308594, "learning_rate": 5.964973626568614e-06, "loss": 0.4154, "step": 29380 }, { "epoch": 1.348308934881373, "grad_norm": 0.47419509291648865, "learning_rate": 5.964733047084519e-06, "loss": 0.4186, "step": 29381 }, { "epoch": 1.3483548253866275, "grad_norm": 0.472273588180542, "learning_rate": 5.964492465280544e-06, "loss": 0.3713, "step": 29382 }, { "epoch": 1.348400715891882, "grad_norm": 0.46414420008659363, "learning_rate": 5.964251881157271e-06, "loss": 0.3691, "step": 29383 }, { "epoch": 1.3484466063971365, "grad_norm": 0.4281494915485382, "learning_rate": 5.964011294715275e-06, "loss": 0.3025, "step": 29384 }, { "epoch": 1.3484924969023908, "grad_norm": 0.47918418049812317, "learning_rate": 5.9637707059551375e-06, "loss": 0.3895, "step": 29385 }, { "epoch": 1.3485383874076453, "grad_norm": 0.47423022985458374, "learning_rate": 5.963530114877435e-06, "loss": 0.3758, "step": 29386 }, { "epoch": 1.3485842779128998, "grad_norm": 0.5077845454216003, "learning_rate": 5.963289521482746e-06, "loss": 0.4373, "step": 29387 }, { "epoch": 1.3486301684181543, "grad_norm": 0.4876904785633087, "learning_rate": 5.963048925771649e-06, "loss": 0.3778, "step": 29388 }, { "epoch": 1.3486760589234088, "grad_norm": 0.42401590943336487, "learning_rate": 5.962808327744726e-06, "loss": 0.2791, "step": 29389 }, { "epoch": 1.348721949428663, "grad_norm": 0.47485730051994324, "learning_rate": 5.9625677274025505e-06, "loss": 0.3777, "step": 29390 }, { "epoch": 1.3487678399339176, "grad_norm": 0.4733969271183014, "learning_rate": 5.9623271247457035e-06, "loss": 0.3516, "step": 29391 }, { "epoch": 1.348813730439172, "grad_norm": 0.44413211941719055, "learning_rate": 5.962086519774766e-06, "loss": 0.3498, "step": 29392 }, { "epoch": 1.3488596209444266, "grad_norm": 0.43104058504104614, "learning_rate": 5.961845912490312e-06, "loss": 0.3161, "step": 29393 }, { "epoch": 1.348905511449681, "grad_norm": 0.4933432936668396, "learning_rate": 5.961605302892922e-06, "loss": 0.3813, "step": 29394 }, { "epoch": 1.3489514019549356, "grad_norm": 0.49444133043289185, "learning_rate": 5.961364690983175e-06, "loss": 0.397, "step": 29395 }, { "epoch": 1.34899729246019, "grad_norm": 0.4660189151763916, "learning_rate": 5.9611240767616496e-06, "loss": 0.3517, "step": 29396 }, { "epoch": 1.3490431829654446, "grad_norm": 0.4854762554168701, "learning_rate": 5.960883460228925e-06, "loss": 0.3957, "step": 29397 }, { "epoch": 1.3490890734706988, "grad_norm": 0.4800436496734619, "learning_rate": 5.960642841385576e-06, "loss": 0.3725, "step": 29398 }, { "epoch": 1.3491349639759533, "grad_norm": 0.47272545099258423, "learning_rate": 5.960402220232187e-06, "loss": 0.3704, "step": 29399 }, { "epoch": 1.3491808544812078, "grad_norm": 0.4724104404449463, "learning_rate": 5.960161596769333e-06, "loss": 0.3767, "step": 29400 }, { "epoch": 1.3492267449864623, "grad_norm": 0.4776090979576111, "learning_rate": 5.959920970997592e-06, "loss": 0.3849, "step": 29401 }, { "epoch": 1.3492726354917168, "grad_norm": 0.46469059586524963, "learning_rate": 5.959680342917547e-06, "loss": 0.364, "step": 29402 }, { "epoch": 1.349318525996971, "grad_norm": 0.4276329278945923, "learning_rate": 5.959439712529772e-06, "loss": 0.2984, "step": 29403 }, { "epoch": 1.3493644165022256, "grad_norm": 0.44224753975868225, "learning_rate": 5.959199079834847e-06, "loss": 0.3053, "step": 29404 }, { "epoch": 1.34941030700748, "grad_norm": 0.4662752151489258, "learning_rate": 5.9589584448333505e-06, "loss": 0.3575, "step": 29405 }, { "epoch": 1.3494561975127346, "grad_norm": 0.4530302882194519, "learning_rate": 5.958717807525862e-06, "loss": 0.3401, "step": 29406 }, { "epoch": 1.349502088017989, "grad_norm": 0.4537728428840637, "learning_rate": 5.958477167912961e-06, "loss": 0.3228, "step": 29407 }, { "epoch": 1.3495479785232436, "grad_norm": 0.46529683470726013, "learning_rate": 5.958236525995225e-06, "loss": 0.3664, "step": 29408 }, { "epoch": 1.349593869028498, "grad_norm": 0.5176889300346375, "learning_rate": 5.957995881773231e-06, "loss": 0.4395, "step": 29409 }, { "epoch": 1.3496397595337526, "grad_norm": 0.43422916531562805, "learning_rate": 5.957755235247558e-06, "loss": 0.2814, "step": 29410 }, { "epoch": 1.3496856500390069, "grad_norm": 0.5037030577659607, "learning_rate": 5.957514586418789e-06, "loss": 0.362, "step": 29411 }, { "epoch": 1.3497315405442614, "grad_norm": 0.45877185463905334, "learning_rate": 5.957273935287498e-06, "loss": 0.3481, "step": 29412 }, { "epoch": 1.3497774310495159, "grad_norm": 0.4633530080318451, "learning_rate": 5.957033281854266e-06, "loss": 0.3626, "step": 29413 }, { "epoch": 1.3498233215547704, "grad_norm": 0.4563622772693634, "learning_rate": 5.956792626119672e-06, "loss": 0.3554, "step": 29414 }, { "epoch": 1.3498692120600249, "grad_norm": 0.4340989589691162, "learning_rate": 5.956551968084291e-06, "loss": 0.3199, "step": 29415 }, { "epoch": 1.3499151025652791, "grad_norm": 0.4400111138820648, "learning_rate": 5.956311307748706e-06, "loss": 0.2877, "step": 29416 }, { "epoch": 1.3499609930705336, "grad_norm": 0.4630332887172699, "learning_rate": 5.9560706451134945e-06, "loss": 0.359, "step": 29417 }, { "epoch": 1.3500068835757881, "grad_norm": 0.4595993757247925, "learning_rate": 5.955829980179234e-06, "loss": 0.355, "step": 29418 }, { "epoch": 1.3500527740810426, "grad_norm": 0.4341086149215698, "learning_rate": 5.955589312946504e-06, "loss": 0.3327, "step": 29419 }, { "epoch": 1.3500986645862971, "grad_norm": 0.4963456690311432, "learning_rate": 5.955348643415884e-06, "loss": 0.3799, "step": 29420 }, { "epoch": 1.3501445550915516, "grad_norm": 0.4864499270915985, "learning_rate": 5.955107971587951e-06, "loss": 0.3732, "step": 29421 }, { "epoch": 1.3501904455968061, "grad_norm": 0.44777753949165344, "learning_rate": 5.9548672974632845e-06, "loss": 0.3223, "step": 29422 }, { "epoch": 1.3502363361020606, "grad_norm": 0.4858568608760834, "learning_rate": 5.954626621042466e-06, "loss": 0.4134, "step": 29423 }, { "epoch": 1.350282226607315, "grad_norm": 0.48121047019958496, "learning_rate": 5.95438594232607e-06, "loss": 0.4098, "step": 29424 }, { "epoch": 1.3503281171125694, "grad_norm": 0.4450834393501282, "learning_rate": 5.954145261314678e-06, "loss": 0.3636, "step": 29425 }, { "epoch": 1.350374007617824, "grad_norm": 0.49920767545700073, "learning_rate": 5.953904578008867e-06, "loss": 0.3959, "step": 29426 }, { "epoch": 1.3504198981230784, "grad_norm": 0.4793969988822937, "learning_rate": 5.953663892409215e-06, "loss": 0.4305, "step": 29427 }, { "epoch": 1.3504657886283327, "grad_norm": 0.4778003692626953, "learning_rate": 5.953423204516304e-06, "loss": 0.3457, "step": 29428 }, { "epoch": 1.3505116791335872, "grad_norm": 0.4819778800010681, "learning_rate": 5.953182514330712e-06, "loss": 0.3322, "step": 29429 }, { "epoch": 1.3505575696388417, "grad_norm": 0.512312114238739, "learning_rate": 5.952941821853016e-06, "loss": 0.4285, "step": 29430 }, { "epoch": 1.3506034601440962, "grad_norm": 0.454806923866272, "learning_rate": 5.9527011270837954e-06, "loss": 0.3441, "step": 29431 }, { "epoch": 1.3506493506493507, "grad_norm": 0.48443689942359924, "learning_rate": 5.952460430023629e-06, "loss": 0.3778, "step": 29432 }, { "epoch": 1.3506952411546052, "grad_norm": 0.4988158941268921, "learning_rate": 5.952219730673095e-06, "loss": 0.4215, "step": 29433 }, { "epoch": 1.3507411316598597, "grad_norm": 0.47955095767974854, "learning_rate": 5.951979029032775e-06, "loss": 0.3968, "step": 29434 }, { "epoch": 1.3507870221651141, "grad_norm": 0.4715772271156311, "learning_rate": 5.951738325103247e-06, "loss": 0.3514, "step": 29435 }, { "epoch": 1.3508329126703684, "grad_norm": 0.44676560163497925, "learning_rate": 5.951497618885086e-06, "loss": 0.3419, "step": 29436 }, { "epoch": 1.350878803175623, "grad_norm": 0.4500732719898224, "learning_rate": 5.951256910378875e-06, "loss": 0.3249, "step": 29437 }, { "epoch": 1.3509246936808774, "grad_norm": 0.43639427423477173, "learning_rate": 5.95101619958519e-06, "loss": 0.3406, "step": 29438 }, { "epoch": 1.350970584186132, "grad_norm": 0.4610595405101776, "learning_rate": 5.9507754865046115e-06, "loss": 0.349, "step": 29439 }, { "epoch": 1.3510164746913864, "grad_norm": 0.4489709138870239, "learning_rate": 5.95053477113772e-06, "loss": 0.3178, "step": 29440 }, { "epoch": 1.3510623651966407, "grad_norm": 0.4721061885356903, "learning_rate": 5.950294053485091e-06, "loss": 0.3394, "step": 29441 }, { "epoch": 1.3511082557018952, "grad_norm": 0.4574808180332184, "learning_rate": 5.9500533335473044e-06, "loss": 0.3189, "step": 29442 }, { "epoch": 1.3511541462071497, "grad_norm": 0.4466267228126526, "learning_rate": 5.949812611324939e-06, "loss": 0.3021, "step": 29443 }, { "epoch": 1.3512000367124042, "grad_norm": 0.495392769575119, "learning_rate": 5.949571886818575e-06, "loss": 0.3962, "step": 29444 }, { "epoch": 1.3512459272176587, "grad_norm": 0.44819188117980957, "learning_rate": 5.949331160028789e-06, "loss": 0.3047, "step": 29445 }, { "epoch": 1.3512918177229132, "grad_norm": 0.4276049733161926, "learning_rate": 5.9490904309561636e-06, "loss": 0.2894, "step": 29446 }, { "epoch": 1.3513377082281677, "grad_norm": 0.48284846544265747, "learning_rate": 5.948849699601274e-06, "loss": 0.3784, "step": 29447 }, { "epoch": 1.3513835987334222, "grad_norm": 0.41465261578559875, "learning_rate": 5.9486089659647e-06, "loss": 0.2739, "step": 29448 }, { "epoch": 1.3514294892386765, "grad_norm": 0.48336705565452576, "learning_rate": 5.94836823004702e-06, "loss": 0.3271, "step": 29449 }, { "epoch": 1.351475379743931, "grad_norm": 0.4380035996437073, "learning_rate": 5.948127491848814e-06, "loss": 0.3159, "step": 29450 }, { "epoch": 1.3515212702491854, "grad_norm": 0.4483572244644165, "learning_rate": 5.947886751370664e-06, "loss": 0.309, "step": 29451 }, { "epoch": 1.35156716075444, "grad_norm": 0.4572576880455017, "learning_rate": 5.947646008613141e-06, "loss": 0.3648, "step": 29452 }, { "epoch": 1.3516130512596944, "grad_norm": 0.43247950077056885, "learning_rate": 5.947405263576831e-06, "loss": 0.2872, "step": 29453 }, { "epoch": 1.3516589417649487, "grad_norm": 0.4656652510166168, "learning_rate": 5.947164516262309e-06, "loss": 0.3734, "step": 29454 }, { "epoch": 1.3517048322702032, "grad_norm": 0.4362700879573822, "learning_rate": 5.946923766670156e-06, "loss": 0.3457, "step": 29455 }, { "epoch": 1.3517507227754577, "grad_norm": 0.539270281791687, "learning_rate": 5.94668301480095e-06, "loss": 0.3342, "step": 29456 }, { "epoch": 1.3517966132807122, "grad_norm": 0.4721450209617615, "learning_rate": 5.9464422606552705e-06, "loss": 0.3427, "step": 29457 }, { "epoch": 1.3518425037859667, "grad_norm": 0.5854961276054382, "learning_rate": 5.946201504233696e-06, "loss": 0.4711, "step": 29458 }, { "epoch": 1.3518883942912212, "grad_norm": 0.4998513162136078, "learning_rate": 5.945960745536805e-06, "loss": 0.4166, "step": 29459 }, { "epoch": 1.3519342847964757, "grad_norm": 0.44695258140563965, "learning_rate": 5.945719984565177e-06, "loss": 0.3235, "step": 29460 }, { "epoch": 1.3519801753017302, "grad_norm": 0.4576665163040161, "learning_rate": 5.9454792213193915e-06, "loss": 0.3471, "step": 29461 }, { "epoch": 1.3520260658069845, "grad_norm": 0.48912128806114197, "learning_rate": 5.945238455800027e-06, "loss": 0.3753, "step": 29462 }, { "epoch": 1.352071956312239, "grad_norm": 0.5700365304946899, "learning_rate": 5.944997688007662e-06, "loss": 0.3534, "step": 29463 }, { "epoch": 1.3521178468174935, "grad_norm": 0.4673402011394501, "learning_rate": 5.944756917942875e-06, "loss": 0.3513, "step": 29464 }, { "epoch": 1.352163737322748, "grad_norm": 0.44554173946380615, "learning_rate": 5.944516145606247e-06, "loss": 0.2995, "step": 29465 }, { "epoch": 1.3522096278280025, "grad_norm": 0.4523986876010895, "learning_rate": 5.944275370998355e-06, "loss": 0.3462, "step": 29466 }, { "epoch": 1.3522555183332567, "grad_norm": 0.44060468673706055, "learning_rate": 5.94403459411978e-06, "loss": 0.2846, "step": 29467 }, { "epoch": 1.3523014088385112, "grad_norm": 0.459499716758728, "learning_rate": 5.943793814971098e-06, "loss": 0.3061, "step": 29468 }, { "epoch": 1.3523472993437657, "grad_norm": 0.5119299292564392, "learning_rate": 5.943553033552891e-06, "loss": 0.4215, "step": 29469 }, { "epoch": 1.3523931898490202, "grad_norm": 0.5038403272628784, "learning_rate": 5.943312249865736e-06, "loss": 0.4109, "step": 29470 }, { "epoch": 1.3524390803542747, "grad_norm": 0.48753291368484497, "learning_rate": 5.9430714639102125e-06, "loss": 0.3683, "step": 29471 }, { "epoch": 1.3524849708595292, "grad_norm": 0.4845438599586487, "learning_rate": 5.9428306756869e-06, "loss": 0.3898, "step": 29472 }, { "epoch": 1.3525308613647837, "grad_norm": 0.4997783303260803, "learning_rate": 5.942589885196378e-06, "loss": 0.4343, "step": 29473 }, { "epoch": 1.352576751870038, "grad_norm": 0.5134733319282532, "learning_rate": 5.942349092439224e-06, "loss": 0.4508, "step": 29474 }, { "epoch": 1.3526226423752925, "grad_norm": 0.4712375998497009, "learning_rate": 5.942108297416018e-06, "loss": 0.3401, "step": 29475 }, { "epoch": 1.352668532880547, "grad_norm": 0.48030731081962585, "learning_rate": 5.9418675001273405e-06, "loss": 0.3452, "step": 29476 }, { "epoch": 1.3527144233858015, "grad_norm": 0.5054637789726257, "learning_rate": 5.941626700573767e-06, "loss": 0.3961, "step": 29477 }, { "epoch": 1.352760313891056, "grad_norm": 0.5077686309814453, "learning_rate": 5.941385898755879e-06, "loss": 0.3498, "step": 29478 }, { "epoch": 1.3528062043963103, "grad_norm": 0.45082950592041016, "learning_rate": 5.941145094674257e-06, "loss": 0.3531, "step": 29479 }, { "epoch": 1.3528520949015648, "grad_norm": 0.4849626421928406, "learning_rate": 5.940904288329474e-06, "loss": 0.3496, "step": 29480 }, { "epoch": 1.3528979854068193, "grad_norm": 0.4552357494831085, "learning_rate": 5.940663479722115e-06, "loss": 0.3087, "step": 29481 }, { "epoch": 1.3529438759120738, "grad_norm": 0.49145278334617615, "learning_rate": 5.940422668852759e-06, "loss": 0.3996, "step": 29482 }, { "epoch": 1.3529897664173283, "grad_norm": 0.4758847653865814, "learning_rate": 5.940181855721981e-06, "loss": 0.3608, "step": 29483 }, { "epoch": 1.3530356569225828, "grad_norm": 0.5474574565887451, "learning_rate": 5.9399410403303635e-06, "loss": 0.4008, "step": 29484 }, { "epoch": 1.3530815474278373, "grad_norm": 0.5052220821380615, "learning_rate": 5.939700222678484e-06, "loss": 0.2988, "step": 29485 }, { "epoch": 1.3531274379330918, "grad_norm": 0.42659851908683777, "learning_rate": 5.939459402766922e-06, "loss": 0.2652, "step": 29486 }, { "epoch": 1.353173328438346, "grad_norm": 0.4713885486125946, "learning_rate": 5.939218580596255e-06, "loss": 0.3551, "step": 29487 }, { "epoch": 1.3532192189436005, "grad_norm": 0.46077069640159607, "learning_rate": 5.938977756167067e-06, "loss": 0.3247, "step": 29488 }, { "epoch": 1.353265109448855, "grad_norm": 0.4927977919578552, "learning_rate": 5.938736929479932e-06, "loss": 0.3669, "step": 29489 }, { "epoch": 1.3533109999541095, "grad_norm": 0.5094456672668457, "learning_rate": 5.938496100535432e-06, "loss": 0.4342, "step": 29490 }, { "epoch": 1.353356890459364, "grad_norm": 0.5088447332382202, "learning_rate": 5.938255269334144e-06, "loss": 0.4305, "step": 29491 }, { "epoch": 1.3534027809646183, "grad_norm": 0.4570750594139099, "learning_rate": 5.938014435876649e-06, "loss": 0.2986, "step": 29492 }, { "epoch": 1.3534486714698728, "grad_norm": 0.47599947452545166, "learning_rate": 5.937773600163526e-06, "loss": 0.3742, "step": 29493 }, { "epoch": 1.3534945619751273, "grad_norm": 0.47962313890457153, "learning_rate": 5.937532762195351e-06, "loss": 0.3622, "step": 29494 }, { "epoch": 1.3535404524803818, "grad_norm": 0.49812382459640503, "learning_rate": 5.937291921972708e-06, "loss": 0.3844, "step": 29495 }, { "epoch": 1.3535863429856363, "grad_norm": 0.4808807969093323, "learning_rate": 5.937051079496172e-06, "loss": 0.3754, "step": 29496 }, { "epoch": 1.3536322334908908, "grad_norm": 0.5198098421096802, "learning_rate": 5.936810234766324e-06, "loss": 0.3717, "step": 29497 }, { "epoch": 1.3536781239961453, "grad_norm": 0.47564035654067993, "learning_rate": 5.9365693877837436e-06, "loss": 0.4212, "step": 29498 }, { "epoch": 1.3537240145013998, "grad_norm": 0.49062061309814453, "learning_rate": 5.93632853854901e-06, "loss": 0.3774, "step": 29499 }, { "epoch": 1.353769905006654, "grad_norm": 0.453936368227005, "learning_rate": 5.936087687062701e-06, "loss": 0.3483, "step": 29500 }, { "epoch": 1.3538157955119086, "grad_norm": 0.49795588850975037, "learning_rate": 5.935846833325397e-06, "loss": 0.4334, "step": 29501 }, { "epoch": 1.353861686017163, "grad_norm": 0.4456612467765808, "learning_rate": 5.935605977337678e-06, "loss": 0.2917, "step": 29502 }, { "epoch": 1.3539075765224176, "grad_norm": 0.4585441052913666, "learning_rate": 5.9353651191001195e-06, "loss": 0.3053, "step": 29503 }, { "epoch": 1.353953467027672, "grad_norm": 0.49219778180122375, "learning_rate": 5.9351242586133025e-06, "loss": 0.4228, "step": 29504 }, { "epoch": 1.3539993575329263, "grad_norm": 0.42750632762908936, "learning_rate": 5.93488339587781e-06, "loss": 0.3157, "step": 29505 }, { "epoch": 1.3540452480381808, "grad_norm": 0.49682432413101196, "learning_rate": 5.934642530894216e-06, "loss": 0.3929, "step": 29506 }, { "epoch": 1.3540911385434353, "grad_norm": 0.4230867922306061, "learning_rate": 5.934401663663103e-06, "loss": 0.3016, "step": 29507 }, { "epoch": 1.3541370290486898, "grad_norm": 0.46849682927131653, "learning_rate": 5.934160794185047e-06, "loss": 0.3334, "step": 29508 }, { "epoch": 1.3541829195539443, "grad_norm": 0.48456355929374695, "learning_rate": 5.933919922460629e-06, "loss": 0.3727, "step": 29509 }, { "epoch": 1.3542288100591988, "grad_norm": 0.44948720932006836, "learning_rate": 5.93367904849043e-06, "loss": 0.3208, "step": 29510 }, { "epoch": 1.3542747005644533, "grad_norm": 0.5145674347877502, "learning_rate": 5.933438172275028e-06, "loss": 0.4301, "step": 29511 }, { "epoch": 1.3543205910697078, "grad_norm": 0.5029955506324768, "learning_rate": 5.933197293815e-06, "loss": 0.426, "step": 29512 }, { "epoch": 1.354366481574962, "grad_norm": 0.5060005187988281, "learning_rate": 5.932956413110928e-06, "loss": 0.3944, "step": 29513 }, { "epoch": 1.3544123720802166, "grad_norm": 0.47717639803886414, "learning_rate": 5.9327155301633895e-06, "loss": 0.3455, "step": 29514 }, { "epoch": 1.354458262585471, "grad_norm": 0.4631054103374481, "learning_rate": 5.932474644972965e-06, "loss": 0.3643, "step": 29515 }, { "epoch": 1.3545041530907256, "grad_norm": 0.47242268919944763, "learning_rate": 5.9322337575402335e-06, "loss": 0.3257, "step": 29516 }, { "epoch": 1.3545500435959799, "grad_norm": 0.43661004304885864, "learning_rate": 5.931992867865774e-06, "loss": 0.3186, "step": 29517 }, { "epoch": 1.3545959341012344, "grad_norm": 0.45031511783599854, "learning_rate": 5.9317519759501665e-06, "loss": 0.3043, "step": 29518 }, { "epoch": 1.3546418246064889, "grad_norm": 0.4612464904785156, "learning_rate": 5.931511081793988e-06, "loss": 0.3574, "step": 29519 }, { "epoch": 1.3546877151117434, "grad_norm": 0.4424362778663635, "learning_rate": 5.931270185397821e-06, "loss": 0.3251, "step": 29520 }, { "epoch": 1.3547336056169978, "grad_norm": 0.48220860958099365, "learning_rate": 5.931029286762242e-06, "loss": 0.3487, "step": 29521 }, { "epoch": 1.3547794961222523, "grad_norm": 0.4638567864894867, "learning_rate": 5.9307883858878326e-06, "loss": 0.3239, "step": 29522 }, { "epoch": 1.3548253866275068, "grad_norm": 0.4949064254760742, "learning_rate": 5.9305474827751684e-06, "loss": 0.3514, "step": 29523 }, { "epoch": 1.3548712771327613, "grad_norm": 0.4567302167415619, "learning_rate": 5.930306577424834e-06, "loss": 0.3228, "step": 29524 }, { "epoch": 1.3549171676380156, "grad_norm": 0.6611633896827698, "learning_rate": 5.930065669837404e-06, "loss": 0.2822, "step": 29525 }, { "epoch": 1.3549630581432701, "grad_norm": 0.5450873970985413, "learning_rate": 5.92982476001346e-06, "loss": 0.46, "step": 29526 }, { "epoch": 1.3550089486485246, "grad_norm": 0.5070503354072571, "learning_rate": 5.929583847953581e-06, "loss": 0.412, "step": 29527 }, { "epoch": 1.3550548391537791, "grad_norm": 0.4776357114315033, "learning_rate": 5.929342933658346e-06, "loss": 0.3829, "step": 29528 }, { "epoch": 1.3551007296590336, "grad_norm": 0.4866112172603607, "learning_rate": 5.929102017128335e-06, "loss": 0.3712, "step": 29529 }, { "epoch": 1.3551466201642879, "grad_norm": 0.47708427906036377, "learning_rate": 5.928861098364127e-06, "loss": 0.3641, "step": 29530 }, { "epoch": 1.3551925106695424, "grad_norm": 0.4511614739894867, "learning_rate": 5.928620177366301e-06, "loss": 0.3198, "step": 29531 }, { "epoch": 1.3552384011747969, "grad_norm": 0.4654328525066376, "learning_rate": 5.9283792541354365e-06, "loss": 0.3308, "step": 29532 }, { "epoch": 1.3552842916800514, "grad_norm": 0.478515088558197, "learning_rate": 5.928138328672114e-06, "loss": 0.3496, "step": 29533 }, { "epoch": 1.3553301821853059, "grad_norm": 0.48931899666786194, "learning_rate": 5.92789740097691e-06, "loss": 0.3723, "step": 29534 }, { "epoch": 1.3553760726905604, "grad_norm": 0.4498971402645111, "learning_rate": 5.927656471050406e-06, "loss": 0.3397, "step": 29535 }, { "epoch": 1.3554219631958149, "grad_norm": 0.47696053981781006, "learning_rate": 5.927415538893182e-06, "loss": 0.4142, "step": 29536 }, { "epoch": 1.3554678537010694, "grad_norm": 0.4853246808052063, "learning_rate": 5.927174604505815e-06, "loss": 0.4221, "step": 29537 }, { "epoch": 1.3555137442063236, "grad_norm": 0.4427390992641449, "learning_rate": 5.926933667888887e-06, "loss": 0.3133, "step": 29538 }, { "epoch": 1.3555596347115781, "grad_norm": 0.5313991904258728, "learning_rate": 5.926692729042976e-06, "loss": 0.5289, "step": 29539 }, { "epoch": 1.3556055252168326, "grad_norm": 0.5311051607131958, "learning_rate": 5.92645178796866e-06, "loss": 0.4189, "step": 29540 }, { "epoch": 1.3556514157220871, "grad_norm": 0.4435186982154846, "learning_rate": 5.926210844666521e-06, "loss": 0.2881, "step": 29541 }, { "epoch": 1.3556973062273416, "grad_norm": 0.43815866112709045, "learning_rate": 5.925969899137136e-06, "loss": 0.3191, "step": 29542 }, { "epoch": 1.355743196732596, "grad_norm": 0.4523143470287323, "learning_rate": 5.925728951381088e-06, "loss": 0.3452, "step": 29543 }, { "epoch": 1.3557890872378504, "grad_norm": 0.4735391438007355, "learning_rate": 5.925488001398953e-06, "loss": 0.3454, "step": 29544 }, { "epoch": 1.355834977743105, "grad_norm": 0.5045285224914551, "learning_rate": 5.92524704919131e-06, "loss": 0.3619, "step": 29545 }, { "epoch": 1.3558808682483594, "grad_norm": 0.5694239735603333, "learning_rate": 5.92500609475874e-06, "loss": 0.3457, "step": 29546 }, { "epoch": 1.355926758753614, "grad_norm": 0.49713921546936035, "learning_rate": 5.924765138101824e-06, "loss": 0.3771, "step": 29547 }, { "epoch": 1.3559726492588684, "grad_norm": 0.44682055711746216, "learning_rate": 5.924524179221139e-06, "loss": 0.3086, "step": 29548 }, { "epoch": 1.356018539764123, "grad_norm": 0.47251468896865845, "learning_rate": 5.924283218117265e-06, "loss": 0.289, "step": 29549 }, { "epoch": 1.3560644302693774, "grad_norm": 0.530010461807251, "learning_rate": 5.924042254790783e-06, "loss": 0.3939, "step": 29550 }, { "epoch": 1.3561103207746317, "grad_norm": 0.46580496430397034, "learning_rate": 5.923801289242268e-06, "loss": 0.3048, "step": 29551 }, { "epoch": 1.3561562112798862, "grad_norm": 0.456935316324234, "learning_rate": 5.923560321472304e-06, "loss": 0.3395, "step": 29552 }, { "epoch": 1.3562021017851407, "grad_norm": 0.47044870257377625, "learning_rate": 5.923319351481469e-06, "loss": 0.3686, "step": 29553 }, { "epoch": 1.3562479922903952, "grad_norm": 0.43659117817878723, "learning_rate": 5.923078379270343e-06, "loss": 0.2964, "step": 29554 }, { "epoch": 1.3562938827956494, "grad_norm": 0.4753674566745758, "learning_rate": 5.9228374048395045e-06, "loss": 0.3931, "step": 29555 }, { "epoch": 1.356339773300904, "grad_norm": 0.48780912160873413, "learning_rate": 5.922596428189533e-06, "loss": 0.3721, "step": 29556 }, { "epoch": 1.3563856638061584, "grad_norm": 0.4867079257965088, "learning_rate": 5.922355449321008e-06, "loss": 0.3509, "step": 29557 }, { "epoch": 1.356431554311413, "grad_norm": 0.4791640639305115, "learning_rate": 5.92211446823451e-06, "loss": 0.3913, "step": 29558 }, { "epoch": 1.3564774448166674, "grad_norm": 0.4498705267906189, "learning_rate": 5.9218734849306156e-06, "loss": 0.3041, "step": 29559 }, { "epoch": 1.356523335321922, "grad_norm": 0.46047019958496094, "learning_rate": 5.921632499409909e-06, "loss": 0.3717, "step": 29560 }, { "epoch": 1.3565692258271764, "grad_norm": 0.454988569021225, "learning_rate": 5.921391511672967e-06, "loss": 0.3474, "step": 29561 }, { "epoch": 1.356615116332431, "grad_norm": 0.4630346894264221, "learning_rate": 5.921150521720368e-06, "loss": 0.3679, "step": 29562 }, { "epoch": 1.3566610068376852, "grad_norm": 0.438899427652359, "learning_rate": 5.920909529552692e-06, "loss": 0.2669, "step": 29563 }, { "epoch": 1.3567068973429397, "grad_norm": 0.44538411498069763, "learning_rate": 5.920668535170522e-06, "loss": 0.3031, "step": 29564 }, { "epoch": 1.3567527878481942, "grad_norm": 0.49406489729881287, "learning_rate": 5.920427538574432e-06, "loss": 0.3619, "step": 29565 }, { "epoch": 1.3567986783534487, "grad_norm": 0.4674498736858368, "learning_rate": 5.920186539765006e-06, "loss": 0.3454, "step": 29566 }, { "epoch": 1.3568445688587032, "grad_norm": 0.45554405450820923, "learning_rate": 5.919945538742822e-06, "loss": 0.3053, "step": 29567 }, { "epoch": 1.3568904593639575, "grad_norm": 0.4519006907939911, "learning_rate": 5.919704535508457e-06, "loss": 0.3474, "step": 29568 }, { "epoch": 1.356936349869212, "grad_norm": 0.42680203914642334, "learning_rate": 5.9194635300624935e-06, "loss": 0.309, "step": 29569 }, { "epoch": 1.3569822403744665, "grad_norm": 0.46383142471313477, "learning_rate": 5.919222522405512e-06, "loss": 0.4276, "step": 29570 }, { "epoch": 1.357028130879721, "grad_norm": 0.4380016028881073, "learning_rate": 5.91898151253809e-06, "loss": 0.3086, "step": 29571 }, { "epoch": 1.3570740213849755, "grad_norm": 0.5110563039779663, "learning_rate": 5.918740500460808e-06, "loss": 0.4029, "step": 29572 }, { "epoch": 1.35711991189023, "grad_norm": 0.4521673321723938, "learning_rate": 5.918499486174244e-06, "loss": 0.3422, "step": 29573 }, { "epoch": 1.3571658023954845, "grad_norm": 0.5332672595977783, "learning_rate": 5.918258469678978e-06, "loss": 0.4414, "step": 29574 }, { "epoch": 1.357211692900739, "grad_norm": 0.45664867758750916, "learning_rate": 5.918017450975592e-06, "loss": 0.3519, "step": 29575 }, { "epoch": 1.3572575834059932, "grad_norm": 0.49565690755844116, "learning_rate": 5.917776430064663e-06, "loss": 0.3823, "step": 29576 }, { "epoch": 1.3573034739112477, "grad_norm": 0.47229939699172974, "learning_rate": 5.917535406946771e-06, "loss": 0.4284, "step": 29577 }, { "epoch": 1.3573493644165022, "grad_norm": 0.46170151233673096, "learning_rate": 5.917294381622496e-06, "loss": 0.3335, "step": 29578 }, { "epoch": 1.3573952549217567, "grad_norm": 0.44030386209487915, "learning_rate": 5.917053354092417e-06, "loss": 0.3291, "step": 29579 }, { "epoch": 1.3574411454270112, "grad_norm": 0.47472819685935974, "learning_rate": 5.916812324357114e-06, "loss": 0.3307, "step": 29580 }, { "epoch": 1.3574870359322655, "grad_norm": 0.44266200065612793, "learning_rate": 5.916571292417168e-06, "loss": 0.2984, "step": 29581 }, { "epoch": 1.35753292643752, "grad_norm": 0.4729927182197571, "learning_rate": 5.916330258273158e-06, "loss": 0.3533, "step": 29582 }, { "epoch": 1.3575788169427745, "grad_norm": 0.4902630150318146, "learning_rate": 5.91608922192566e-06, "loss": 0.385, "step": 29583 }, { "epoch": 1.357624707448029, "grad_norm": 0.443960577249527, "learning_rate": 5.915848183375259e-06, "loss": 0.3035, "step": 29584 }, { "epoch": 1.3576705979532835, "grad_norm": 0.5070707201957703, "learning_rate": 5.9156071426225315e-06, "loss": 0.402, "step": 29585 }, { "epoch": 1.357716488458538, "grad_norm": 0.4142516553401947, "learning_rate": 5.915366099668058e-06, "loss": 0.2721, "step": 29586 }, { "epoch": 1.3577623789637925, "grad_norm": 0.4407435357570648, "learning_rate": 5.9151250545124185e-06, "loss": 0.307, "step": 29587 }, { "epoch": 1.357808269469047, "grad_norm": 0.4919806122779846, "learning_rate": 5.9148840071561895e-06, "loss": 0.4126, "step": 29588 }, { "epoch": 1.3578541599743013, "grad_norm": 0.45244964957237244, "learning_rate": 5.914642957599957e-06, "loss": 0.3053, "step": 29589 }, { "epoch": 1.3579000504795558, "grad_norm": 0.4528164863586426, "learning_rate": 5.914401905844294e-06, "loss": 0.3477, "step": 29590 }, { "epoch": 1.3579459409848103, "grad_norm": 0.4564751088619232, "learning_rate": 5.914160851889783e-06, "loss": 0.3083, "step": 29591 }, { "epoch": 1.3579918314900647, "grad_norm": 0.4766659736633301, "learning_rate": 5.913919795737004e-06, "loss": 0.3948, "step": 29592 }, { "epoch": 1.3580377219953192, "grad_norm": 0.4663122892379761, "learning_rate": 5.913678737386538e-06, "loss": 0.3889, "step": 29593 }, { "epoch": 1.3580836125005735, "grad_norm": 0.4822918772697449, "learning_rate": 5.9134376768389614e-06, "loss": 0.3392, "step": 29594 }, { "epoch": 1.358129503005828, "grad_norm": 0.4525180459022522, "learning_rate": 5.913196614094856e-06, "loss": 0.3315, "step": 29595 }, { "epoch": 1.3581753935110825, "grad_norm": 0.46415045857429504, "learning_rate": 5.912955549154801e-06, "loss": 0.352, "step": 29596 }, { "epoch": 1.358221284016337, "grad_norm": 0.444278359413147, "learning_rate": 5.912714482019376e-06, "loss": 0.3063, "step": 29597 }, { "epoch": 1.3582671745215915, "grad_norm": 0.4749598503112793, "learning_rate": 5.912473412689161e-06, "loss": 0.3797, "step": 29598 }, { "epoch": 1.358313065026846, "grad_norm": 0.4539699852466583, "learning_rate": 5.912232341164735e-06, "loss": 0.2921, "step": 29599 }, { "epoch": 1.3583589555321005, "grad_norm": 0.5553392171859741, "learning_rate": 5.911991267446677e-06, "loss": 0.4689, "step": 29600 }, { "epoch": 1.358404846037355, "grad_norm": 0.708074152469635, "learning_rate": 5.911750191535569e-06, "loss": 0.311, "step": 29601 }, { "epoch": 1.3584507365426093, "grad_norm": 0.43390798568725586, "learning_rate": 5.911509113431989e-06, "loss": 0.2958, "step": 29602 }, { "epoch": 1.3584966270478638, "grad_norm": 0.496028333902359, "learning_rate": 5.911268033136519e-06, "loss": 0.3847, "step": 29603 }, { "epoch": 1.3585425175531183, "grad_norm": 0.5099213123321533, "learning_rate": 5.911026950649736e-06, "loss": 0.468, "step": 29604 }, { "epoch": 1.3585884080583728, "grad_norm": 0.47072237730026245, "learning_rate": 5.9107858659722194e-06, "loss": 0.3506, "step": 29605 }, { "epoch": 1.358634298563627, "grad_norm": 0.512626588344574, "learning_rate": 5.910544779104552e-06, "loss": 0.3802, "step": 29606 }, { "epoch": 1.3586801890688815, "grad_norm": 0.4535597264766693, "learning_rate": 5.910303690047311e-06, "loss": 0.3169, "step": 29607 }, { "epoch": 1.358726079574136, "grad_norm": 0.5390753149986267, "learning_rate": 5.910062598801078e-06, "loss": 0.4112, "step": 29608 }, { "epoch": 1.3587719700793905, "grad_norm": 0.488546222448349, "learning_rate": 5.909821505366431e-06, "loss": 0.3922, "step": 29609 }, { "epoch": 1.358817860584645, "grad_norm": 0.43779709935188293, "learning_rate": 5.909580409743949e-06, "loss": 0.3188, "step": 29610 }, { "epoch": 1.3588637510898995, "grad_norm": 0.4747777581214905, "learning_rate": 5.909339311934213e-06, "loss": 0.3962, "step": 29611 }, { "epoch": 1.358909641595154, "grad_norm": 0.4844985902309418, "learning_rate": 5.909098211937806e-06, "loss": 0.364, "step": 29612 }, { "epoch": 1.3589555321004085, "grad_norm": 0.4262472093105316, "learning_rate": 5.9088571097553014e-06, "loss": 0.3346, "step": 29613 }, { "epoch": 1.3590014226056628, "grad_norm": 0.4293331801891327, "learning_rate": 5.908616005387285e-06, "loss": 0.2985, "step": 29614 }, { "epoch": 1.3590473131109173, "grad_norm": 0.4899218678474426, "learning_rate": 5.908374898834334e-06, "loss": 0.4435, "step": 29615 }, { "epoch": 1.3590932036161718, "grad_norm": 0.7056658267974854, "learning_rate": 5.908133790097026e-06, "loss": 0.3994, "step": 29616 }, { "epoch": 1.3591390941214263, "grad_norm": 0.4775899052619934, "learning_rate": 5.907892679175943e-06, "loss": 0.3449, "step": 29617 }, { "epoch": 1.3591849846266808, "grad_norm": 0.460114449262619, "learning_rate": 5.907651566071665e-06, "loss": 0.3747, "step": 29618 }, { "epoch": 1.359230875131935, "grad_norm": 0.4267895221710205, "learning_rate": 5.907410450784773e-06, "loss": 0.2802, "step": 29619 }, { "epoch": 1.3592767656371896, "grad_norm": 0.4801914095878601, "learning_rate": 5.907169333315843e-06, "loss": 0.3716, "step": 29620 }, { "epoch": 1.359322656142444, "grad_norm": 0.43569689989089966, "learning_rate": 5.906928213665459e-06, "loss": 0.293, "step": 29621 }, { "epoch": 1.3593685466476986, "grad_norm": 0.5279867649078369, "learning_rate": 5.906687091834197e-06, "loss": 0.465, "step": 29622 }, { "epoch": 1.359414437152953, "grad_norm": 0.4872879683971405, "learning_rate": 5.90644596782264e-06, "loss": 0.3753, "step": 29623 }, { "epoch": 1.3594603276582076, "grad_norm": 0.47108331322669983, "learning_rate": 5.906204841631365e-06, "loss": 0.3701, "step": 29624 }, { "epoch": 1.359506218163462, "grad_norm": 0.4660903513431549, "learning_rate": 5.905963713260956e-06, "loss": 0.3214, "step": 29625 }, { "epoch": 1.3595521086687166, "grad_norm": 0.4406264126300812, "learning_rate": 5.905722582711989e-06, "loss": 0.2727, "step": 29626 }, { "epoch": 1.3595979991739708, "grad_norm": 0.5067930817604065, "learning_rate": 5.905481449985045e-06, "loss": 0.3687, "step": 29627 }, { "epoch": 1.3596438896792253, "grad_norm": 0.46533942222595215, "learning_rate": 5.905240315080702e-06, "loss": 0.3562, "step": 29628 }, { "epoch": 1.3596897801844798, "grad_norm": 0.4441923201084137, "learning_rate": 5.904999177999544e-06, "loss": 0.3271, "step": 29629 }, { "epoch": 1.3597356706897343, "grad_norm": 0.48101547360420227, "learning_rate": 5.9047580387421485e-06, "loss": 0.3724, "step": 29630 }, { "epoch": 1.3597815611949888, "grad_norm": 0.4518243074417114, "learning_rate": 5.904516897309095e-06, "loss": 0.363, "step": 29631 }, { "epoch": 1.359827451700243, "grad_norm": 0.460448294878006, "learning_rate": 5.904275753700965e-06, "loss": 0.358, "step": 29632 }, { "epoch": 1.3598733422054976, "grad_norm": 0.45445820689201355, "learning_rate": 5.904034607918335e-06, "loss": 0.318, "step": 29633 }, { "epoch": 1.359919232710752, "grad_norm": 0.5103985667228699, "learning_rate": 5.9037934599617875e-06, "loss": 0.4108, "step": 29634 }, { "epoch": 1.3599651232160066, "grad_norm": 0.4557051658630371, "learning_rate": 5.903552309831903e-06, "loss": 0.3411, "step": 29635 }, { "epoch": 1.360011013721261, "grad_norm": 0.4576733708381653, "learning_rate": 5.9033111575292605e-06, "loss": 0.3583, "step": 29636 }, { "epoch": 1.3600569042265156, "grad_norm": 0.45510566234588623, "learning_rate": 5.90307000305444e-06, "loss": 0.3898, "step": 29637 }, { "epoch": 1.36010279473177, "grad_norm": 0.518474280834198, "learning_rate": 5.9028288464080194e-06, "loss": 0.4034, "step": 29638 }, { "epoch": 1.3601486852370246, "grad_norm": 0.4567142724990845, "learning_rate": 5.902587687590581e-06, "loss": 0.3546, "step": 29639 }, { "epoch": 1.3601945757422789, "grad_norm": 0.4524565637111664, "learning_rate": 5.902346526602704e-06, "loss": 0.3621, "step": 29640 }, { "epoch": 1.3602404662475334, "grad_norm": 0.46177491545677185, "learning_rate": 5.90210536344497e-06, "loss": 0.3546, "step": 29641 }, { "epoch": 1.3602863567527879, "grad_norm": 0.48046839237213135, "learning_rate": 5.901864198117956e-06, "loss": 0.382, "step": 29642 }, { "epoch": 1.3603322472580424, "grad_norm": 0.4983239769935608, "learning_rate": 5.901623030622243e-06, "loss": 0.4085, "step": 29643 }, { "epoch": 1.3603781377632966, "grad_norm": 0.45995792746543884, "learning_rate": 5.901381860958412e-06, "loss": 0.3405, "step": 29644 }, { "epoch": 1.3604240282685511, "grad_norm": 0.5116739273071289, "learning_rate": 5.90114068912704e-06, "loss": 0.4462, "step": 29645 }, { "epoch": 1.3604699187738056, "grad_norm": 0.4350147247314453, "learning_rate": 5.9008995151287115e-06, "loss": 0.3149, "step": 29646 }, { "epoch": 1.3605158092790601, "grad_norm": 0.46195584535598755, "learning_rate": 5.9006583389640035e-06, "loss": 0.3256, "step": 29647 }, { "epoch": 1.3605616997843146, "grad_norm": 0.4836227595806122, "learning_rate": 5.900417160633496e-06, "loss": 0.4146, "step": 29648 }, { "epoch": 1.3606075902895691, "grad_norm": 0.46304672956466675, "learning_rate": 5.90017598013777e-06, "loss": 0.3899, "step": 29649 }, { "epoch": 1.3606534807948236, "grad_norm": 0.4790802299976349, "learning_rate": 5.899934797477405e-06, "loss": 0.3299, "step": 29650 }, { "epoch": 1.3606993713000781, "grad_norm": 0.4604453444480896, "learning_rate": 5.899693612652979e-06, "loss": 0.363, "step": 29651 }, { "epoch": 1.3607452618053324, "grad_norm": 0.47053855657577515, "learning_rate": 5.899452425665076e-06, "loss": 0.3866, "step": 29652 }, { "epoch": 1.360791152310587, "grad_norm": 0.44615164399147034, "learning_rate": 5.8992112365142725e-06, "loss": 0.3292, "step": 29653 }, { "epoch": 1.3608370428158414, "grad_norm": 0.45640429854393005, "learning_rate": 5.898970045201151e-06, "loss": 0.3932, "step": 29654 }, { "epoch": 1.3608829333210959, "grad_norm": 0.45141783356666565, "learning_rate": 5.898728851726289e-06, "loss": 0.3954, "step": 29655 }, { "epoch": 1.3609288238263504, "grad_norm": 0.4146559536457062, "learning_rate": 5.898487656090268e-06, "loss": 0.3107, "step": 29656 }, { "epoch": 1.3609747143316047, "grad_norm": 0.4813310205936432, "learning_rate": 5.898246458293668e-06, "loss": 0.4233, "step": 29657 }, { "epoch": 1.3610206048368592, "grad_norm": 0.4341275990009308, "learning_rate": 5.8980052583370695e-06, "loss": 0.2935, "step": 29658 }, { "epoch": 1.3610664953421137, "grad_norm": 0.45984718203544617, "learning_rate": 5.89776405622105e-06, "loss": 0.3363, "step": 29659 }, { "epoch": 1.3611123858473682, "grad_norm": 0.44823405146598816, "learning_rate": 5.897522851946193e-06, "loss": 0.3323, "step": 29660 }, { "epoch": 1.3611582763526227, "grad_norm": 0.4772595465183258, "learning_rate": 5.8972816455130755e-06, "loss": 0.4058, "step": 29661 }, { "epoch": 1.3612041668578772, "grad_norm": 0.5200003385543823, "learning_rate": 5.897040436922279e-06, "loss": 0.3876, "step": 29662 }, { "epoch": 1.3612500573631316, "grad_norm": 0.45474350452423096, "learning_rate": 5.896799226174386e-06, "loss": 0.329, "step": 29663 }, { "epoch": 1.3612959478683861, "grad_norm": 0.4122833013534546, "learning_rate": 5.896558013269971e-06, "loss": 0.2982, "step": 29664 }, { "epoch": 1.3613418383736404, "grad_norm": 0.47715505957603455, "learning_rate": 5.896316798209616e-06, "loss": 0.3595, "step": 29665 }, { "epoch": 1.361387728878895, "grad_norm": 0.4455989599227905, "learning_rate": 5.896075580993904e-06, "loss": 0.3235, "step": 29666 }, { "epoch": 1.3614336193841494, "grad_norm": 0.44369298219680786, "learning_rate": 5.895834361623413e-06, "loss": 0.3215, "step": 29667 }, { "epoch": 1.361479509889404, "grad_norm": 0.4578772783279419, "learning_rate": 5.895593140098723e-06, "loss": 0.361, "step": 29668 }, { "epoch": 1.3615254003946584, "grad_norm": 0.46442487835884094, "learning_rate": 5.895351916420415e-06, "loss": 0.3505, "step": 29669 }, { "epoch": 1.3615712908999127, "grad_norm": 0.4709234833717346, "learning_rate": 5.895110690589065e-06, "loss": 0.4239, "step": 29670 }, { "epoch": 1.3616171814051672, "grad_norm": 0.46745041012763977, "learning_rate": 5.89486946260526e-06, "loss": 0.3482, "step": 29671 }, { "epoch": 1.3616630719104217, "grad_norm": 0.4968077540397644, "learning_rate": 5.894628232469573e-06, "loss": 0.3262, "step": 29672 }, { "epoch": 1.3617089624156762, "grad_norm": 0.4447762370109558, "learning_rate": 5.89438700018259e-06, "loss": 0.3057, "step": 29673 }, { "epoch": 1.3617548529209307, "grad_norm": 0.4621894657611847, "learning_rate": 5.894145765744888e-06, "loss": 0.343, "step": 29674 }, { "epoch": 1.3618007434261852, "grad_norm": 0.5195560455322266, "learning_rate": 5.893904529157048e-06, "loss": 0.4563, "step": 29675 }, { "epoch": 1.3618466339314397, "grad_norm": 0.4150986075401306, "learning_rate": 5.893663290419648e-06, "loss": 0.2835, "step": 29676 }, { "epoch": 1.3618925244366942, "grad_norm": 0.46529433131217957, "learning_rate": 5.893422049533272e-06, "loss": 0.3803, "step": 29677 }, { "epoch": 1.3619384149419484, "grad_norm": 0.47696444392204285, "learning_rate": 5.8931808064984965e-06, "loss": 0.3699, "step": 29678 }, { "epoch": 1.361984305447203, "grad_norm": 0.48936840891838074, "learning_rate": 5.892939561315903e-06, "loss": 0.4168, "step": 29679 }, { "epoch": 1.3620301959524574, "grad_norm": 0.6252825260162354, "learning_rate": 5.892698313986073e-06, "loss": 0.4106, "step": 29680 }, { "epoch": 1.362076086457712, "grad_norm": 0.4199279248714447, "learning_rate": 5.8924570645095835e-06, "loss": 0.2695, "step": 29681 }, { "epoch": 1.3621219769629664, "grad_norm": 0.5053243637084961, "learning_rate": 5.892215812887017e-06, "loss": 0.4676, "step": 29682 }, { "epoch": 1.3621678674682207, "grad_norm": 0.46395283937454224, "learning_rate": 5.891974559118953e-06, "loss": 0.3683, "step": 29683 }, { "epoch": 1.3622137579734752, "grad_norm": 0.43673619627952576, "learning_rate": 5.891733303205973e-06, "loss": 0.3242, "step": 29684 }, { "epoch": 1.3622596484787297, "grad_norm": 0.491452157497406, "learning_rate": 5.891492045148656e-06, "loss": 0.3822, "step": 29685 }, { "epoch": 1.3623055389839842, "grad_norm": 0.4200022518634796, "learning_rate": 5.891250784947581e-06, "loss": 0.2983, "step": 29686 }, { "epoch": 1.3623514294892387, "grad_norm": 0.4812476933002472, "learning_rate": 5.8910095226033285e-06, "loss": 0.3579, "step": 29687 }, { "epoch": 1.3623973199944932, "grad_norm": 0.5228948593139648, "learning_rate": 5.890768258116481e-06, "loss": 0.4348, "step": 29688 }, { "epoch": 1.3624432104997477, "grad_norm": 0.49590861797332764, "learning_rate": 5.890526991487615e-06, "loss": 0.3451, "step": 29689 }, { "epoch": 1.3624891010050022, "grad_norm": 0.47714853286743164, "learning_rate": 5.890285722717314e-06, "loss": 0.4093, "step": 29690 }, { "epoch": 1.3625349915102565, "grad_norm": 0.4981129467487335, "learning_rate": 5.890044451806156e-06, "loss": 0.4124, "step": 29691 }, { "epoch": 1.362580882015511, "grad_norm": 0.48129430413246155, "learning_rate": 5.889803178754723e-06, "loss": 0.3713, "step": 29692 }, { "epoch": 1.3626267725207655, "grad_norm": 0.4568685293197632, "learning_rate": 5.889561903563592e-06, "loss": 0.3547, "step": 29693 }, { "epoch": 1.36267266302602, "grad_norm": 0.44387710094451904, "learning_rate": 5.889320626233348e-06, "loss": 0.3066, "step": 29694 }, { "epoch": 1.3627185535312742, "grad_norm": 0.48319244384765625, "learning_rate": 5.889079346764567e-06, "loss": 0.4501, "step": 29695 }, { "epoch": 1.3627644440365287, "grad_norm": 0.5225914120674133, "learning_rate": 5.888838065157832e-06, "loss": 0.4526, "step": 29696 }, { "epoch": 1.3628103345417832, "grad_norm": 0.490865558385849, "learning_rate": 5.888596781413721e-06, "loss": 0.344, "step": 29697 }, { "epoch": 1.3628562250470377, "grad_norm": 0.45328599214553833, "learning_rate": 5.8883554955328145e-06, "loss": 0.3836, "step": 29698 }, { "epoch": 1.3629021155522922, "grad_norm": 0.4989314675331116, "learning_rate": 5.888114207515694e-06, "loss": 0.4061, "step": 29699 }, { "epoch": 1.3629480060575467, "grad_norm": 0.47671636939048767, "learning_rate": 5.88787291736294e-06, "loss": 0.3781, "step": 29700 }, { "epoch": 1.3629938965628012, "grad_norm": 0.46445339918136597, "learning_rate": 5.887631625075132e-06, "loss": 0.3732, "step": 29701 }, { "epoch": 1.3630397870680557, "grad_norm": 0.5187601447105408, "learning_rate": 5.887390330652849e-06, "loss": 0.4223, "step": 29702 }, { "epoch": 1.36308567757331, "grad_norm": 0.46434056758880615, "learning_rate": 5.887149034096673e-06, "loss": 0.3205, "step": 29703 }, { "epoch": 1.3631315680785645, "grad_norm": 0.45827803015708923, "learning_rate": 5.886907735407182e-06, "loss": 0.3129, "step": 29704 }, { "epoch": 1.363177458583819, "grad_norm": 0.531427264213562, "learning_rate": 5.88666643458496e-06, "loss": 0.4066, "step": 29705 }, { "epoch": 1.3632233490890735, "grad_norm": 0.4810008108615875, "learning_rate": 5.8864251316305846e-06, "loss": 0.3667, "step": 29706 }, { "epoch": 1.363269239594328, "grad_norm": 0.4745352864265442, "learning_rate": 5.8861838265446355e-06, "loss": 0.414, "step": 29707 }, { "epoch": 1.3633151300995823, "grad_norm": 0.443050742149353, "learning_rate": 5.8859425193276955e-06, "loss": 0.3185, "step": 29708 }, { "epoch": 1.3633610206048368, "grad_norm": 0.48126769065856934, "learning_rate": 5.885701209980342e-06, "loss": 0.3842, "step": 29709 }, { "epoch": 1.3634069111100913, "grad_norm": 0.44451019167900085, "learning_rate": 5.885459898503156e-06, "loss": 0.3105, "step": 29710 }, { "epoch": 1.3634528016153458, "grad_norm": 0.5091874599456787, "learning_rate": 5.88521858489672e-06, "loss": 0.3324, "step": 29711 }, { "epoch": 1.3634986921206003, "grad_norm": 0.44315865635871887, "learning_rate": 5.884977269161614e-06, "loss": 0.2847, "step": 29712 }, { "epoch": 1.3635445826258548, "grad_norm": 0.4910751283168793, "learning_rate": 5.884735951298413e-06, "loss": 0.4062, "step": 29713 }, { "epoch": 1.3635904731311093, "grad_norm": 0.5021066665649414, "learning_rate": 5.884494631307705e-06, "loss": 0.4075, "step": 29714 }, { "epoch": 1.3636363636363638, "grad_norm": 0.4840486943721771, "learning_rate": 5.8842533091900645e-06, "loss": 0.3505, "step": 29715 }, { "epoch": 1.363682254141618, "grad_norm": 0.4852827787399292, "learning_rate": 5.884011984946073e-06, "loss": 0.3441, "step": 29716 }, { "epoch": 1.3637281446468725, "grad_norm": 0.4716375470161438, "learning_rate": 5.883770658576313e-06, "loss": 0.323, "step": 29717 }, { "epoch": 1.363774035152127, "grad_norm": 0.44765496253967285, "learning_rate": 5.883529330081364e-06, "loss": 0.3534, "step": 29718 }, { "epoch": 1.3638199256573815, "grad_norm": 0.44826802611351013, "learning_rate": 5.883287999461806e-06, "loss": 0.3351, "step": 29719 }, { "epoch": 1.363865816162636, "grad_norm": 0.45793578028678894, "learning_rate": 5.883046666718219e-06, "loss": 0.3574, "step": 29720 }, { "epoch": 1.3639117066678903, "grad_norm": 0.4719219207763672, "learning_rate": 5.882805331851182e-06, "loss": 0.4078, "step": 29721 }, { "epoch": 1.3639575971731448, "grad_norm": 0.46208590269088745, "learning_rate": 5.882563994861278e-06, "loss": 0.3109, "step": 29722 }, { "epoch": 1.3640034876783993, "grad_norm": 0.4673776626586914, "learning_rate": 5.882322655749086e-06, "loss": 0.3209, "step": 29723 }, { "epoch": 1.3640493781836538, "grad_norm": 0.4862824082374573, "learning_rate": 5.882081314515186e-06, "loss": 0.3479, "step": 29724 }, { "epoch": 1.3640952686889083, "grad_norm": 0.5807982087135315, "learning_rate": 5.881839971160159e-06, "loss": 0.4647, "step": 29725 }, { "epoch": 1.3641411591941628, "grad_norm": 0.46980398893356323, "learning_rate": 5.881598625684586e-06, "loss": 0.363, "step": 29726 }, { "epoch": 1.3641870496994173, "grad_norm": 0.4606328010559082, "learning_rate": 5.881357278089045e-06, "loss": 0.3701, "step": 29727 }, { "epoch": 1.3642329402046718, "grad_norm": 0.4825633764266968, "learning_rate": 5.881115928374119e-06, "loss": 0.3775, "step": 29728 }, { "epoch": 1.364278830709926, "grad_norm": 0.5654458403587341, "learning_rate": 5.8808745765403875e-06, "loss": 0.4849, "step": 29729 }, { "epoch": 1.3643247212151806, "grad_norm": 0.4712643027305603, "learning_rate": 5.88063322258843e-06, "loss": 0.3819, "step": 29730 }, { "epoch": 1.364370611720435, "grad_norm": 0.4865855276584625, "learning_rate": 5.880391866518828e-06, "loss": 0.3717, "step": 29731 }, { "epoch": 1.3644165022256896, "grad_norm": 0.45772209763526917, "learning_rate": 5.880150508332161e-06, "loss": 0.3569, "step": 29732 }, { "epoch": 1.3644623927309438, "grad_norm": 0.46473848819732666, "learning_rate": 5.879909148029011e-06, "loss": 0.3002, "step": 29733 }, { "epoch": 1.3645082832361983, "grad_norm": 0.478754460811615, "learning_rate": 5.879667785609956e-06, "loss": 0.323, "step": 29734 }, { "epoch": 1.3645541737414528, "grad_norm": 0.48935818672180176, "learning_rate": 5.8794264210755765e-06, "loss": 0.365, "step": 29735 }, { "epoch": 1.3646000642467073, "grad_norm": 0.509634792804718, "learning_rate": 5.879185054426457e-06, "loss": 0.3662, "step": 29736 }, { "epoch": 1.3646459547519618, "grad_norm": 0.5099175572395325, "learning_rate": 5.878943685663172e-06, "loss": 0.4368, "step": 29737 }, { "epoch": 1.3646918452572163, "grad_norm": 0.48028722405433655, "learning_rate": 5.8787023147863065e-06, "loss": 0.3536, "step": 29738 }, { "epoch": 1.3647377357624708, "grad_norm": 0.6040860414505005, "learning_rate": 5.878460941796439e-06, "loss": 0.4118, "step": 29739 }, { "epoch": 1.3647836262677253, "grad_norm": 0.47918400168418884, "learning_rate": 5.87821956669415e-06, "loss": 0.4021, "step": 29740 }, { "epoch": 1.3648295167729796, "grad_norm": 0.45733773708343506, "learning_rate": 5.87797818948002e-06, "loss": 0.3205, "step": 29741 }, { "epoch": 1.364875407278234, "grad_norm": 0.47505882382392883, "learning_rate": 5.877736810154631e-06, "loss": 0.3556, "step": 29742 }, { "epoch": 1.3649212977834886, "grad_norm": 0.47317835688591003, "learning_rate": 5.877495428718559e-06, "loss": 0.3331, "step": 29743 }, { "epoch": 1.364967188288743, "grad_norm": 0.4912436008453369, "learning_rate": 5.87725404517239e-06, "loss": 0.4306, "step": 29744 }, { "epoch": 1.3650130787939976, "grad_norm": 0.5024107694625854, "learning_rate": 5.877012659516701e-06, "loss": 0.397, "step": 29745 }, { "epoch": 1.3650589692992519, "grad_norm": 0.49367591738700867, "learning_rate": 5.876771271752073e-06, "loss": 0.356, "step": 29746 }, { "epoch": 1.3651048598045064, "grad_norm": 0.45909377932548523, "learning_rate": 5.876529881879087e-06, "loss": 0.3503, "step": 29747 }, { "epoch": 1.3651507503097609, "grad_norm": 0.49692976474761963, "learning_rate": 5.876288489898324e-06, "loss": 0.3965, "step": 29748 }, { "epoch": 1.3651966408150153, "grad_norm": 0.4792816638946533, "learning_rate": 5.876047095810362e-06, "loss": 0.3616, "step": 29749 }, { "epoch": 1.3652425313202698, "grad_norm": 0.5081164240837097, "learning_rate": 5.8758056996157855e-06, "loss": 0.3832, "step": 29750 }, { "epoch": 1.3652884218255243, "grad_norm": 0.46000030636787415, "learning_rate": 5.875564301315172e-06, "loss": 0.3368, "step": 29751 }, { "epoch": 1.3653343123307788, "grad_norm": 0.4886743128299713, "learning_rate": 5.875322900909102e-06, "loss": 0.3676, "step": 29752 }, { "epoch": 1.3653802028360333, "grad_norm": 0.41099125146865845, "learning_rate": 5.8750814983981575e-06, "loss": 0.2865, "step": 29753 }, { "epoch": 1.3654260933412876, "grad_norm": 0.48294857144355774, "learning_rate": 5.8748400937829165e-06, "loss": 0.2894, "step": 29754 }, { "epoch": 1.3654719838465421, "grad_norm": 0.45478665828704834, "learning_rate": 5.874598687063964e-06, "loss": 0.3403, "step": 29755 }, { "epoch": 1.3655178743517966, "grad_norm": 0.477312296628952, "learning_rate": 5.874357278241876e-06, "loss": 0.4335, "step": 29756 }, { "epoch": 1.365563764857051, "grad_norm": 0.4832588732242584, "learning_rate": 5.874115867317234e-06, "loss": 0.3504, "step": 29757 }, { "epoch": 1.3656096553623056, "grad_norm": 0.5276463031768799, "learning_rate": 5.873874454290619e-06, "loss": 0.3717, "step": 29758 }, { "epoch": 1.3656555458675599, "grad_norm": 0.46185624599456787, "learning_rate": 5.873633039162614e-06, "loss": 0.3658, "step": 29759 }, { "epoch": 1.3657014363728144, "grad_norm": 0.4658348858356476, "learning_rate": 5.873391621933795e-06, "loss": 0.354, "step": 29760 }, { "epoch": 1.3657473268780689, "grad_norm": 0.46875959634780884, "learning_rate": 5.873150202604746e-06, "loss": 0.3708, "step": 29761 }, { "epoch": 1.3657932173833234, "grad_norm": 0.44907090067863464, "learning_rate": 5.872908781176046e-06, "loss": 0.3418, "step": 29762 }, { "epoch": 1.3658391078885779, "grad_norm": 0.43456974625587463, "learning_rate": 5.8726673576482764e-06, "loss": 0.3284, "step": 29763 }, { "epoch": 1.3658849983938324, "grad_norm": 0.470121830701828, "learning_rate": 5.8724259320220164e-06, "loss": 0.3493, "step": 29764 }, { "epoch": 1.3659308888990869, "grad_norm": 0.4517599046230316, "learning_rate": 5.872184504297849e-06, "loss": 0.3339, "step": 29765 }, { "epoch": 1.3659767794043414, "grad_norm": 0.5214707851409912, "learning_rate": 5.871943074476352e-06, "loss": 0.4562, "step": 29766 }, { "epoch": 1.3660226699095956, "grad_norm": 0.4566161036491394, "learning_rate": 5.8717016425581065e-06, "loss": 0.3468, "step": 29767 }, { "epoch": 1.3660685604148501, "grad_norm": 0.49475225806236267, "learning_rate": 5.8714602085436945e-06, "loss": 0.4085, "step": 29768 }, { "epoch": 1.3661144509201046, "grad_norm": 0.47037360072135925, "learning_rate": 5.871218772433694e-06, "loss": 0.3535, "step": 29769 }, { "epoch": 1.3661603414253591, "grad_norm": 0.5339037179946899, "learning_rate": 5.87097733422869e-06, "loss": 0.4859, "step": 29770 }, { "epoch": 1.3662062319306136, "grad_norm": 0.45391660928726196, "learning_rate": 5.870735893929258e-06, "loss": 0.3311, "step": 29771 }, { "epoch": 1.366252122435868, "grad_norm": 0.4685741066932678, "learning_rate": 5.870494451535983e-06, "loss": 0.3349, "step": 29772 }, { "epoch": 1.3662980129411224, "grad_norm": 0.4549446105957031, "learning_rate": 5.870253007049444e-06, "loss": 0.3371, "step": 29773 }, { "epoch": 1.366343903446377, "grad_norm": 0.48662102222442627, "learning_rate": 5.870011560470219e-06, "loss": 0.3696, "step": 29774 }, { "epoch": 1.3663897939516314, "grad_norm": 0.5081114768981934, "learning_rate": 5.869770111798892e-06, "loss": 0.4235, "step": 29775 }, { "epoch": 1.366435684456886, "grad_norm": 0.4568287432193756, "learning_rate": 5.869528661036041e-06, "loss": 0.3325, "step": 29776 }, { "epoch": 1.3664815749621404, "grad_norm": 0.4743413031101227, "learning_rate": 5.86928720818225e-06, "loss": 0.3693, "step": 29777 }, { "epoch": 1.366527465467395, "grad_norm": 0.43583944439888, "learning_rate": 5.869045753238096e-06, "loss": 0.3159, "step": 29778 }, { "epoch": 1.3665733559726494, "grad_norm": 0.5159749388694763, "learning_rate": 5.868804296204163e-06, "loss": 0.3859, "step": 29779 }, { "epoch": 1.3666192464779037, "grad_norm": 0.48418113589286804, "learning_rate": 5.868562837081028e-06, "loss": 0.3567, "step": 29780 }, { "epoch": 1.3666651369831582, "grad_norm": 0.48067766427993774, "learning_rate": 5.868321375869274e-06, "loss": 0.3796, "step": 29781 }, { "epoch": 1.3667110274884127, "grad_norm": 0.47048845887184143, "learning_rate": 5.868079912569482e-06, "loss": 0.3388, "step": 29782 }, { "epoch": 1.3667569179936672, "grad_norm": 0.44692379236221313, "learning_rate": 5.867838447182232e-06, "loss": 0.3078, "step": 29783 }, { "epoch": 1.3668028084989214, "grad_norm": 0.40212884545326233, "learning_rate": 5.867596979708102e-06, "loss": 0.2538, "step": 29784 }, { "epoch": 1.366848699004176, "grad_norm": 0.45300817489624023, "learning_rate": 5.867355510147677e-06, "loss": 0.2937, "step": 29785 }, { "epoch": 1.3668945895094304, "grad_norm": 0.47108668088912964, "learning_rate": 5.867114038501535e-06, "loss": 0.3774, "step": 29786 }, { "epoch": 1.366940480014685, "grad_norm": 0.44795137643814087, "learning_rate": 5.866872564770258e-06, "loss": 0.307, "step": 29787 }, { "epoch": 1.3669863705199394, "grad_norm": 0.4745928645133972, "learning_rate": 5.866631088954427e-06, "loss": 0.3765, "step": 29788 }, { "epoch": 1.367032261025194, "grad_norm": 0.4997924268245697, "learning_rate": 5.86638961105462e-06, "loss": 0.4638, "step": 29789 }, { "epoch": 1.3670781515304484, "grad_norm": 0.4842694103717804, "learning_rate": 5.866148131071421e-06, "loss": 0.4274, "step": 29790 }, { "epoch": 1.367124042035703, "grad_norm": 0.4479700028896332, "learning_rate": 5.865906649005408e-06, "loss": 0.3866, "step": 29791 }, { "epoch": 1.3671699325409572, "grad_norm": 0.4480702877044678, "learning_rate": 5.8656651648571625e-06, "loss": 0.3303, "step": 29792 }, { "epoch": 1.3672158230462117, "grad_norm": 0.46636658906936646, "learning_rate": 5.865423678627266e-06, "loss": 0.3187, "step": 29793 }, { "epoch": 1.3672617135514662, "grad_norm": 0.5072179436683655, "learning_rate": 5.8651821903163e-06, "loss": 0.2762, "step": 29794 }, { "epoch": 1.3673076040567207, "grad_norm": 0.4815240502357483, "learning_rate": 5.864940699924841e-06, "loss": 0.3339, "step": 29795 }, { "epoch": 1.3673534945619752, "grad_norm": 0.45585671067237854, "learning_rate": 5.864699207453476e-06, "loss": 0.392, "step": 29796 }, { "epoch": 1.3673993850672295, "grad_norm": 0.50896817445755, "learning_rate": 5.864457712902779e-06, "loss": 0.4446, "step": 29797 }, { "epoch": 1.367445275572484, "grad_norm": 0.45998165011405945, "learning_rate": 5.864216216273335e-06, "loss": 0.3547, "step": 29798 }, { "epoch": 1.3674911660777385, "grad_norm": 0.46132102608680725, "learning_rate": 5.863974717565727e-06, "loss": 0.3513, "step": 29799 }, { "epoch": 1.367537056582993, "grad_norm": 0.4766498804092407, "learning_rate": 5.863733216780529e-06, "loss": 0.3686, "step": 29800 }, { "epoch": 1.3675829470882475, "grad_norm": 0.4720718264579773, "learning_rate": 5.863491713918327e-06, "loss": 0.3643, "step": 29801 }, { "epoch": 1.367628837593502, "grad_norm": 0.5083315372467041, "learning_rate": 5.863250208979697e-06, "loss": 0.4043, "step": 29802 }, { "epoch": 1.3676747280987565, "grad_norm": 0.5806950330734253, "learning_rate": 5.863008701965225e-06, "loss": 0.3719, "step": 29803 }, { "epoch": 1.367720618604011, "grad_norm": 0.4860505163669586, "learning_rate": 5.8627671928754894e-06, "loss": 0.3574, "step": 29804 }, { "epoch": 1.3677665091092652, "grad_norm": 0.49794402718544006, "learning_rate": 5.862525681711071e-06, "loss": 0.4519, "step": 29805 }, { "epoch": 1.3678123996145197, "grad_norm": 0.4721835255622864, "learning_rate": 5.8622841684725494e-06, "loss": 0.384, "step": 29806 }, { "epoch": 1.3678582901197742, "grad_norm": 0.44490474462509155, "learning_rate": 5.862042653160509e-06, "loss": 0.3403, "step": 29807 }, { "epoch": 1.3679041806250287, "grad_norm": 0.4682622253894806, "learning_rate": 5.8618011357755255e-06, "loss": 0.3894, "step": 29808 }, { "epoch": 1.3679500711302832, "grad_norm": 0.47407492995262146, "learning_rate": 5.861559616318182e-06, "loss": 0.3896, "step": 29809 }, { "epoch": 1.3679959616355375, "grad_norm": 0.4773600995540619, "learning_rate": 5.861318094789064e-06, "loss": 0.3764, "step": 29810 }, { "epoch": 1.368041852140792, "grad_norm": 0.47023552656173706, "learning_rate": 5.861076571188743e-06, "loss": 0.3695, "step": 29811 }, { "epoch": 1.3680877426460465, "grad_norm": 0.45501163601875305, "learning_rate": 5.8608350455178055e-06, "loss": 0.3199, "step": 29812 }, { "epoch": 1.368133633151301, "grad_norm": 0.4484744071960449, "learning_rate": 5.860593517776833e-06, "loss": 0.3663, "step": 29813 }, { "epoch": 1.3681795236565555, "grad_norm": 0.48460522294044495, "learning_rate": 5.860351987966403e-06, "loss": 0.4191, "step": 29814 }, { "epoch": 1.36822541416181, "grad_norm": 0.4628541171550751, "learning_rate": 5.860110456087099e-06, "loss": 0.3599, "step": 29815 }, { "epoch": 1.3682713046670645, "grad_norm": 0.4088127613067627, "learning_rate": 5.859868922139501e-06, "loss": 0.2847, "step": 29816 }, { "epoch": 1.368317195172319, "grad_norm": 0.441959023475647, "learning_rate": 5.859627386124189e-06, "loss": 0.3252, "step": 29817 }, { "epoch": 1.3683630856775733, "grad_norm": 0.46995848417282104, "learning_rate": 5.859385848041746e-06, "loss": 0.3228, "step": 29818 }, { "epoch": 1.3684089761828278, "grad_norm": 0.49330806732177734, "learning_rate": 5.859144307892749e-06, "loss": 0.407, "step": 29819 }, { "epoch": 1.3684548666880822, "grad_norm": 0.4692334532737732, "learning_rate": 5.858902765677781e-06, "loss": 0.2939, "step": 29820 }, { "epoch": 1.3685007571933367, "grad_norm": 0.48432621359825134, "learning_rate": 5.8586612213974255e-06, "loss": 0.3504, "step": 29821 }, { "epoch": 1.368546647698591, "grad_norm": 0.4794270098209381, "learning_rate": 5.858419675052259e-06, "loss": 0.3324, "step": 29822 }, { "epoch": 1.3685925382038455, "grad_norm": 0.436763197183609, "learning_rate": 5.858178126642864e-06, "loss": 0.2853, "step": 29823 }, { "epoch": 1.3686384287091, "grad_norm": 0.46534958481788635, "learning_rate": 5.8579365761698225e-06, "loss": 0.3231, "step": 29824 }, { "epoch": 1.3686843192143545, "grad_norm": 0.4873819649219513, "learning_rate": 5.857695023633713e-06, "loss": 0.3292, "step": 29825 }, { "epoch": 1.368730209719609, "grad_norm": 0.4816637635231018, "learning_rate": 5.857453469035118e-06, "loss": 0.3632, "step": 29826 }, { "epoch": 1.3687761002248635, "grad_norm": 0.5007820129394531, "learning_rate": 5.857211912374619e-06, "loss": 0.3911, "step": 29827 }, { "epoch": 1.368821990730118, "grad_norm": 0.48338058590888977, "learning_rate": 5.856970353652794e-06, "loss": 0.3919, "step": 29828 }, { "epoch": 1.3688678812353725, "grad_norm": 0.4476061463356018, "learning_rate": 5.856728792870228e-06, "loss": 0.3197, "step": 29829 }, { "epoch": 1.3689137717406268, "grad_norm": 0.44476377964019775, "learning_rate": 5.856487230027499e-06, "loss": 0.3127, "step": 29830 }, { "epoch": 1.3689596622458813, "grad_norm": 0.43014779686927795, "learning_rate": 5.856245665125186e-06, "loss": 0.3216, "step": 29831 }, { "epoch": 1.3690055527511358, "grad_norm": 0.4995499849319458, "learning_rate": 5.856004098163877e-06, "loss": 0.404, "step": 29832 }, { "epoch": 1.3690514432563903, "grad_norm": 0.4835204482078552, "learning_rate": 5.855762529144145e-06, "loss": 0.4245, "step": 29833 }, { "epoch": 1.3690973337616448, "grad_norm": 0.4401048421859741, "learning_rate": 5.855520958066575e-06, "loss": 0.2939, "step": 29834 }, { "epoch": 1.369143224266899, "grad_norm": 0.4981495440006256, "learning_rate": 5.855279384931748e-06, "loss": 0.4341, "step": 29835 }, { "epoch": 1.3691891147721535, "grad_norm": 0.46675142645835876, "learning_rate": 5.8550378097402425e-06, "loss": 0.3068, "step": 29836 }, { "epoch": 1.369235005277408, "grad_norm": 0.429499089717865, "learning_rate": 5.854796232492642e-06, "loss": 0.2771, "step": 29837 }, { "epoch": 1.3692808957826625, "grad_norm": 0.46417662501335144, "learning_rate": 5.854554653189526e-06, "loss": 0.3314, "step": 29838 }, { "epoch": 1.369326786287917, "grad_norm": 0.48090627789497375, "learning_rate": 5.854313071831475e-06, "loss": 0.4058, "step": 29839 }, { "epoch": 1.3693726767931715, "grad_norm": 0.46481871604919434, "learning_rate": 5.854071488419071e-06, "loss": 0.3419, "step": 29840 }, { "epoch": 1.369418567298426, "grad_norm": 0.47211334109306335, "learning_rate": 5.853829902952895e-06, "loss": 0.3705, "step": 29841 }, { "epoch": 1.3694644578036805, "grad_norm": 0.4566355049610138, "learning_rate": 5.853588315433529e-06, "loss": 0.4015, "step": 29842 }, { "epoch": 1.3695103483089348, "grad_norm": 0.42488816380500793, "learning_rate": 5.853346725861551e-06, "loss": 0.2772, "step": 29843 }, { "epoch": 1.3695562388141893, "grad_norm": 0.45572802424430847, "learning_rate": 5.853105134237544e-06, "loss": 0.3232, "step": 29844 }, { "epoch": 1.3696021293194438, "grad_norm": 0.46379485726356506, "learning_rate": 5.8528635405620875e-06, "loss": 0.3482, "step": 29845 }, { "epoch": 1.3696480198246983, "grad_norm": 0.42551612854003906, "learning_rate": 5.852621944835763e-06, "loss": 0.2731, "step": 29846 }, { "epoch": 1.3696939103299528, "grad_norm": 0.44556811451911926, "learning_rate": 5.852380347059153e-06, "loss": 0.3596, "step": 29847 }, { "epoch": 1.369739800835207, "grad_norm": 0.49066853523254395, "learning_rate": 5.852138747232837e-06, "loss": 0.3527, "step": 29848 }, { "epoch": 1.3697856913404616, "grad_norm": 0.45625752210617065, "learning_rate": 5.851897145357396e-06, "loss": 0.3358, "step": 29849 }, { "epoch": 1.369831581845716, "grad_norm": 0.5072566270828247, "learning_rate": 5.851655541433412e-06, "loss": 0.4343, "step": 29850 }, { "epoch": 1.3698774723509706, "grad_norm": 0.5686078071594238, "learning_rate": 5.851413935461464e-06, "loss": 0.39, "step": 29851 }, { "epoch": 1.369923362856225, "grad_norm": 0.4519713819026947, "learning_rate": 5.851172327442135e-06, "loss": 0.3125, "step": 29852 }, { "epoch": 1.3699692533614796, "grad_norm": 0.5110547542572021, "learning_rate": 5.850930717376004e-06, "loss": 0.3631, "step": 29853 }, { "epoch": 1.370015143866734, "grad_norm": 0.468814492225647, "learning_rate": 5.850689105263655e-06, "loss": 0.3577, "step": 29854 }, { "epoch": 1.3700610343719886, "grad_norm": 0.4704006612300873, "learning_rate": 5.850447491105668e-06, "loss": 0.3431, "step": 29855 }, { "epoch": 1.3701069248772428, "grad_norm": 0.5195923447608948, "learning_rate": 5.850205874902621e-06, "loss": 0.4473, "step": 29856 }, { "epoch": 1.3701528153824973, "grad_norm": 0.42556577920913696, "learning_rate": 5.849964256655096e-06, "loss": 0.3121, "step": 29857 }, { "epoch": 1.3701987058877518, "grad_norm": 0.4783497452735901, "learning_rate": 5.8497226363636774e-06, "loss": 0.3121, "step": 29858 }, { "epoch": 1.3702445963930063, "grad_norm": 0.48916274309158325, "learning_rate": 5.849481014028945e-06, "loss": 0.3588, "step": 29859 }, { "epoch": 1.3702904868982608, "grad_norm": 0.4638911783695221, "learning_rate": 5.849239389651476e-06, "loss": 0.3267, "step": 29860 }, { "epoch": 1.370336377403515, "grad_norm": 0.4423004388809204, "learning_rate": 5.8489977632318565e-06, "loss": 0.3103, "step": 29861 }, { "epoch": 1.3703822679087696, "grad_norm": 0.5200011134147644, "learning_rate": 5.848756134770665e-06, "loss": 0.408, "step": 29862 }, { "epoch": 1.370428158414024, "grad_norm": 0.4414168894290924, "learning_rate": 5.848514504268481e-06, "loss": 0.3257, "step": 29863 }, { "epoch": 1.3704740489192786, "grad_norm": 0.45072516798973083, "learning_rate": 5.848272871725889e-06, "loss": 0.3265, "step": 29864 }, { "epoch": 1.370519939424533, "grad_norm": 0.47062426805496216, "learning_rate": 5.848031237143469e-06, "loss": 0.3443, "step": 29865 }, { "epoch": 1.3705658299297876, "grad_norm": 0.4317375123500824, "learning_rate": 5.847789600521802e-06, "loss": 0.3, "step": 29866 }, { "epoch": 1.370611720435042, "grad_norm": 0.4936336278915405, "learning_rate": 5.8475479618614665e-06, "loss": 0.4043, "step": 29867 }, { "epoch": 1.3706576109402966, "grad_norm": 0.43565696477890015, "learning_rate": 5.8473063211630465e-06, "loss": 0.3354, "step": 29868 }, { "epoch": 1.3707035014455509, "grad_norm": 0.5324808955192566, "learning_rate": 5.847064678427123e-06, "loss": 0.2976, "step": 29869 }, { "epoch": 1.3707493919508054, "grad_norm": 0.47710222005844116, "learning_rate": 5.846823033654275e-06, "loss": 0.3659, "step": 29870 }, { "epoch": 1.3707952824560599, "grad_norm": 0.4863049387931824, "learning_rate": 5.846581386845086e-06, "loss": 0.4032, "step": 29871 }, { "epoch": 1.3708411729613144, "grad_norm": 0.45570310950279236, "learning_rate": 5.846339738000135e-06, "loss": 0.3436, "step": 29872 }, { "epoch": 1.3708870634665686, "grad_norm": 0.44967561960220337, "learning_rate": 5.846098087120004e-06, "loss": 0.3041, "step": 29873 }, { "epoch": 1.3709329539718231, "grad_norm": 0.46124783158302307, "learning_rate": 5.8458564342052746e-06, "loss": 0.3523, "step": 29874 }, { "epoch": 1.3709788444770776, "grad_norm": 0.5000905990600586, "learning_rate": 5.845614779256527e-06, "loss": 0.373, "step": 29875 }, { "epoch": 1.3710247349823321, "grad_norm": 0.5006259083747864, "learning_rate": 5.845373122274345e-06, "loss": 0.4085, "step": 29876 }, { "epoch": 1.3710706254875866, "grad_norm": 0.4693033993244171, "learning_rate": 5.845131463259305e-06, "loss": 0.3331, "step": 29877 }, { "epoch": 1.3711165159928411, "grad_norm": 0.47834503650665283, "learning_rate": 5.844889802211992e-06, "loss": 0.336, "step": 29878 }, { "epoch": 1.3711624064980956, "grad_norm": 0.48930051922798157, "learning_rate": 5.8446481391329835e-06, "loss": 0.3078, "step": 29879 }, { "epoch": 1.3712082970033501, "grad_norm": 0.46694740653038025, "learning_rate": 5.844406474022865e-06, "loss": 0.3557, "step": 29880 }, { "epoch": 1.3712541875086044, "grad_norm": 0.4398307800292969, "learning_rate": 5.844164806882216e-06, "loss": 0.3517, "step": 29881 }, { "epoch": 1.371300078013859, "grad_norm": 0.4710296392440796, "learning_rate": 5.843923137711613e-06, "loss": 0.364, "step": 29882 }, { "epoch": 1.3713459685191134, "grad_norm": 0.5010955333709717, "learning_rate": 5.843681466511646e-06, "loss": 0.3987, "step": 29883 }, { "epoch": 1.3713918590243679, "grad_norm": 0.4906069040298462, "learning_rate": 5.843439793282888e-06, "loss": 0.4369, "step": 29884 }, { "epoch": 1.3714377495296224, "grad_norm": 0.48778411746025085, "learning_rate": 5.843198118025925e-06, "loss": 0.3644, "step": 29885 }, { "epoch": 1.3714836400348767, "grad_norm": 0.5281990170478821, "learning_rate": 5.842956440741336e-06, "loss": 0.4228, "step": 29886 }, { "epoch": 1.3715295305401312, "grad_norm": 0.46646177768707275, "learning_rate": 5.8427147614297045e-06, "loss": 0.3872, "step": 29887 }, { "epoch": 1.3715754210453857, "grad_norm": 0.5040508508682251, "learning_rate": 5.842473080091608e-06, "loss": 0.4155, "step": 29888 }, { "epoch": 1.3716213115506402, "grad_norm": 0.45189303159713745, "learning_rate": 5.84223139672763e-06, "loss": 0.3162, "step": 29889 }, { "epoch": 1.3716672020558947, "grad_norm": 0.4873679578304291, "learning_rate": 5.841989711338351e-06, "loss": 0.3594, "step": 29890 }, { "epoch": 1.3717130925611491, "grad_norm": 0.4925362467765808, "learning_rate": 5.841748023924354e-06, "loss": 0.3764, "step": 29891 }, { "epoch": 1.3717589830664036, "grad_norm": 0.4965896010398865, "learning_rate": 5.8415063344862174e-06, "loss": 0.3616, "step": 29892 }, { "epoch": 1.3718048735716581, "grad_norm": 0.48598533868789673, "learning_rate": 5.841264643024524e-06, "loss": 0.3819, "step": 29893 }, { "epoch": 1.3718507640769124, "grad_norm": 0.454355925321579, "learning_rate": 5.841022949539854e-06, "loss": 0.3312, "step": 29894 }, { "epoch": 1.371896654582167, "grad_norm": 0.5029056668281555, "learning_rate": 5.84078125403279e-06, "loss": 0.4331, "step": 29895 }, { "epoch": 1.3719425450874214, "grad_norm": 0.4234856069087982, "learning_rate": 5.840539556503913e-06, "loss": 0.2954, "step": 29896 }, { "epoch": 1.371988435592676, "grad_norm": 0.4185897409915924, "learning_rate": 5.840297856953803e-06, "loss": 0.2829, "step": 29897 }, { "epoch": 1.3720343260979304, "grad_norm": 0.47505441308021545, "learning_rate": 5.840056155383042e-06, "loss": 0.3716, "step": 29898 }, { "epoch": 1.3720802166031847, "grad_norm": 0.4962650239467621, "learning_rate": 5.839814451792211e-06, "loss": 0.3718, "step": 29899 }, { "epoch": 1.3721261071084392, "grad_norm": 0.4963543117046356, "learning_rate": 5.839572746181891e-06, "loss": 0.3934, "step": 29900 }, { "epoch": 1.3721719976136937, "grad_norm": 0.4675716459751129, "learning_rate": 5.839331038552664e-06, "loss": 0.3784, "step": 29901 }, { "epoch": 1.3722178881189482, "grad_norm": 0.44355955719947815, "learning_rate": 5.83908932890511e-06, "loss": 0.3691, "step": 29902 }, { "epoch": 1.3722637786242027, "grad_norm": 0.4902159571647644, "learning_rate": 5.838847617239812e-06, "loss": 0.3804, "step": 29903 }, { "epoch": 1.3723096691294572, "grad_norm": 0.46450403332710266, "learning_rate": 5.83860590355735e-06, "loss": 0.3261, "step": 29904 }, { "epoch": 1.3723555596347117, "grad_norm": 0.43805646896362305, "learning_rate": 5.838364187858304e-06, "loss": 0.3408, "step": 29905 }, { "epoch": 1.3724014501399662, "grad_norm": 0.43881484866142273, "learning_rate": 5.838122470143258e-06, "loss": 0.3719, "step": 29906 }, { "epoch": 1.3724473406452204, "grad_norm": 0.45728597044944763, "learning_rate": 5.8378807504127925e-06, "loss": 0.3676, "step": 29907 }, { "epoch": 1.372493231150475, "grad_norm": 0.47228026390075684, "learning_rate": 5.837639028667488e-06, "loss": 0.3451, "step": 29908 }, { "epoch": 1.3725391216557294, "grad_norm": 0.4513946771621704, "learning_rate": 5.837397304907926e-06, "loss": 0.3381, "step": 29909 }, { "epoch": 1.372585012160984, "grad_norm": 0.5032436847686768, "learning_rate": 5.837155579134686e-06, "loss": 0.4114, "step": 29910 }, { "epoch": 1.3726309026662382, "grad_norm": 0.5276680588722229, "learning_rate": 5.836913851348353e-06, "loss": 0.3682, "step": 29911 }, { "epoch": 1.3726767931714927, "grad_norm": 0.5179101824760437, "learning_rate": 5.8366721215495045e-06, "loss": 0.3855, "step": 29912 }, { "epoch": 1.3727226836767472, "grad_norm": 0.4972294867038727, "learning_rate": 5.836430389738727e-06, "loss": 0.4112, "step": 29913 }, { "epoch": 1.3727685741820017, "grad_norm": 0.46858328580856323, "learning_rate": 5.836188655916595e-06, "loss": 0.3092, "step": 29914 }, { "epoch": 1.3728144646872562, "grad_norm": 0.49752476811408997, "learning_rate": 5.835946920083695e-06, "loss": 0.4202, "step": 29915 }, { "epoch": 1.3728603551925107, "grad_norm": 0.4393417239189148, "learning_rate": 5.835705182240605e-06, "loss": 0.3146, "step": 29916 }, { "epoch": 1.3729062456977652, "grad_norm": 0.48807623982429504, "learning_rate": 5.835463442387909e-06, "loss": 0.3482, "step": 29917 }, { "epoch": 1.3729521362030197, "grad_norm": 0.5008551478385925, "learning_rate": 5.835221700526187e-06, "loss": 0.4159, "step": 29918 }, { "epoch": 1.372998026708274, "grad_norm": 0.43300560116767883, "learning_rate": 5.834979956656021e-06, "loss": 0.2896, "step": 29919 }, { "epoch": 1.3730439172135285, "grad_norm": 0.4733290374279022, "learning_rate": 5.834738210777992e-06, "loss": 0.3731, "step": 29920 }, { "epoch": 1.373089807718783, "grad_norm": 0.47431910037994385, "learning_rate": 5.834496462892678e-06, "loss": 0.3775, "step": 29921 }, { "epoch": 1.3731356982240375, "grad_norm": 0.4977334141731262, "learning_rate": 5.8342547130006655e-06, "loss": 0.3799, "step": 29922 }, { "epoch": 1.373181588729292, "grad_norm": 0.47961804270744324, "learning_rate": 5.8340129611025345e-06, "loss": 0.3457, "step": 29923 }, { "epoch": 1.3732274792345462, "grad_norm": 0.46254727244377136, "learning_rate": 5.833771207198865e-06, "loss": 0.3383, "step": 29924 }, { "epoch": 1.3732733697398007, "grad_norm": 0.4781051576137543, "learning_rate": 5.833529451290237e-06, "loss": 0.3606, "step": 29925 }, { "epoch": 1.3733192602450552, "grad_norm": 0.48764723539352417, "learning_rate": 5.833287693377236e-06, "loss": 0.3223, "step": 29926 }, { "epoch": 1.3733651507503097, "grad_norm": 0.45057082176208496, "learning_rate": 5.833045933460441e-06, "loss": 0.3641, "step": 29927 }, { "epoch": 1.3734110412555642, "grad_norm": 0.44263991713523865, "learning_rate": 5.832804171540432e-06, "loss": 0.3003, "step": 29928 }, { "epoch": 1.3734569317608187, "grad_norm": 0.47315505146980286, "learning_rate": 5.832562407617792e-06, "loss": 0.384, "step": 29929 }, { "epoch": 1.3735028222660732, "grad_norm": 0.4774218201637268, "learning_rate": 5.832320641693104e-06, "loss": 0.3602, "step": 29930 }, { "epoch": 1.3735487127713277, "grad_norm": 0.44227614998817444, "learning_rate": 5.832078873766946e-06, "loss": 0.3091, "step": 29931 }, { "epoch": 1.373594603276582, "grad_norm": 0.49108612537384033, "learning_rate": 5.831837103839901e-06, "loss": 0.398, "step": 29932 }, { "epoch": 1.3736404937818365, "grad_norm": 0.5225463509559631, "learning_rate": 5.83159533191255e-06, "loss": 0.4523, "step": 29933 }, { "epoch": 1.373686384287091, "grad_norm": 0.44263342022895813, "learning_rate": 5.831353557985476e-06, "loss": 0.3365, "step": 29934 }, { "epoch": 1.3737322747923455, "grad_norm": 0.5050567388534546, "learning_rate": 5.83111178205926e-06, "loss": 0.4021, "step": 29935 }, { "epoch": 1.3737781652976, "grad_norm": 0.4229198098182678, "learning_rate": 5.8308700041344804e-06, "loss": 0.2815, "step": 29936 }, { "epoch": 1.3738240558028543, "grad_norm": 0.5005334615707397, "learning_rate": 5.8306282242117206e-06, "loss": 0.3399, "step": 29937 }, { "epoch": 1.3738699463081088, "grad_norm": 0.4750683903694153, "learning_rate": 5.830386442291562e-06, "loss": 0.3955, "step": 29938 }, { "epoch": 1.3739158368133633, "grad_norm": 0.4602978825569153, "learning_rate": 5.830144658374585e-06, "loss": 0.3259, "step": 29939 }, { "epoch": 1.3739617273186178, "grad_norm": 0.47690650820732117, "learning_rate": 5.829902872461376e-06, "loss": 0.3891, "step": 29940 }, { "epoch": 1.3740076178238723, "grad_norm": 0.5023176074028015, "learning_rate": 5.82966108455251e-06, "loss": 0.3992, "step": 29941 }, { "epoch": 1.3740535083291268, "grad_norm": 0.5280569195747375, "learning_rate": 5.829419294648571e-06, "loss": 0.4455, "step": 29942 }, { "epoch": 1.3740993988343813, "grad_norm": 0.48837098479270935, "learning_rate": 5.829177502750141e-06, "loss": 0.3954, "step": 29943 }, { "epoch": 1.3741452893396358, "grad_norm": 0.4593237638473511, "learning_rate": 5.8289357088578e-06, "loss": 0.3378, "step": 29944 }, { "epoch": 1.37419117984489, "grad_norm": 0.4931478202342987, "learning_rate": 5.828693912972131e-06, "loss": 0.3545, "step": 29945 }, { "epoch": 1.3742370703501445, "grad_norm": 0.43119341135025024, "learning_rate": 5.828452115093714e-06, "loss": 0.3037, "step": 29946 }, { "epoch": 1.374282960855399, "grad_norm": 0.46201884746551514, "learning_rate": 5.828210315223132e-06, "loss": 0.3064, "step": 29947 }, { "epoch": 1.3743288513606535, "grad_norm": 0.49133220314979553, "learning_rate": 5.827968513360965e-06, "loss": 0.3737, "step": 29948 }, { "epoch": 1.374374741865908, "grad_norm": 0.4481653869152069, "learning_rate": 5.827726709507795e-06, "loss": 0.322, "step": 29949 }, { "epoch": 1.3744206323711623, "grad_norm": 0.5017622709274292, "learning_rate": 5.827484903664202e-06, "loss": 0.3499, "step": 29950 }, { "epoch": 1.3744665228764168, "grad_norm": 0.5003820061683655, "learning_rate": 5.8272430958307725e-06, "loss": 0.4117, "step": 29951 }, { "epoch": 1.3745124133816713, "grad_norm": 0.41425642371177673, "learning_rate": 5.8270012860080825e-06, "loss": 0.2387, "step": 29952 }, { "epoch": 1.3745583038869258, "grad_norm": 0.47807297110557556, "learning_rate": 5.826759474196715e-06, "loss": 0.3733, "step": 29953 }, { "epoch": 1.3746041943921803, "grad_norm": 0.49599745869636536, "learning_rate": 5.826517660397253e-06, "loss": 0.452, "step": 29954 }, { "epoch": 1.3746500848974348, "grad_norm": 0.4883154630661011, "learning_rate": 5.826275844610275e-06, "loss": 0.4042, "step": 29955 }, { "epoch": 1.3746959754026893, "grad_norm": 0.44748809933662415, "learning_rate": 5.826034026836366e-06, "loss": 0.3406, "step": 29956 }, { "epoch": 1.3747418659079438, "grad_norm": 0.5185078382492065, "learning_rate": 5.825792207076106e-06, "loss": 0.44, "step": 29957 }, { "epoch": 1.374787756413198, "grad_norm": 0.4805600643157959, "learning_rate": 5.825550385330076e-06, "loss": 0.3756, "step": 29958 }, { "epoch": 1.3748336469184526, "grad_norm": 0.428946852684021, "learning_rate": 5.825308561598856e-06, "loss": 0.2906, "step": 29959 }, { "epoch": 1.374879537423707, "grad_norm": 0.45514506101608276, "learning_rate": 5.8250667358830315e-06, "loss": 0.3388, "step": 29960 }, { "epoch": 1.3749254279289616, "grad_norm": 0.48010337352752686, "learning_rate": 5.824824908183181e-06, "loss": 0.356, "step": 29961 }, { "epoch": 1.3749713184342158, "grad_norm": 0.4652189314365387, "learning_rate": 5.824583078499888e-06, "loss": 0.4319, "step": 29962 }, { "epoch": 1.3750172089394703, "grad_norm": 0.49779486656188965, "learning_rate": 5.824341246833732e-06, "loss": 0.3602, "step": 29963 }, { "epoch": 1.3750630994447248, "grad_norm": 0.5026893019676208, "learning_rate": 5.824099413185295e-06, "loss": 0.4436, "step": 29964 }, { "epoch": 1.3751089899499793, "grad_norm": 0.5119026899337769, "learning_rate": 5.823857577555161e-06, "loss": 0.4486, "step": 29965 }, { "epoch": 1.3751548804552338, "grad_norm": 0.48339319229125977, "learning_rate": 5.823615739943909e-06, "loss": 0.4345, "step": 29966 }, { "epoch": 1.3752007709604883, "grad_norm": 0.49107396602630615, "learning_rate": 5.82337390035212e-06, "loss": 0.3742, "step": 29967 }, { "epoch": 1.3752466614657428, "grad_norm": 0.5385260581970215, "learning_rate": 5.823132058780378e-06, "loss": 0.4199, "step": 29968 }, { "epoch": 1.3752925519709973, "grad_norm": 0.4572819471359253, "learning_rate": 5.822890215229261e-06, "loss": 0.3115, "step": 29969 }, { "epoch": 1.3753384424762516, "grad_norm": 0.45857003331184387, "learning_rate": 5.822648369699353e-06, "loss": 0.3324, "step": 29970 }, { "epoch": 1.375384332981506, "grad_norm": 0.522088885307312, "learning_rate": 5.822406522191237e-06, "loss": 0.4261, "step": 29971 }, { "epoch": 1.3754302234867606, "grad_norm": 0.4853614866733551, "learning_rate": 5.822164672705492e-06, "loss": 0.335, "step": 29972 }, { "epoch": 1.375476113992015, "grad_norm": 0.5195671916007996, "learning_rate": 5.8219228212427e-06, "loss": 0.4521, "step": 29973 }, { "epoch": 1.3755220044972696, "grad_norm": 0.4595966935157776, "learning_rate": 5.821680967803444e-06, "loss": 0.3161, "step": 29974 }, { "epoch": 1.3755678950025239, "grad_norm": 0.48904651403427124, "learning_rate": 5.821439112388305e-06, "loss": 0.3889, "step": 29975 }, { "epoch": 1.3756137855077784, "grad_norm": 0.4679660499095917, "learning_rate": 5.821197254997862e-06, "loss": 0.3426, "step": 29976 }, { "epoch": 1.3756596760130329, "grad_norm": 0.42805859446525574, "learning_rate": 5.820955395632701e-06, "loss": 0.2938, "step": 29977 }, { "epoch": 1.3757055665182873, "grad_norm": 0.4697129726409912, "learning_rate": 5.8207135342934006e-06, "loss": 0.3226, "step": 29978 }, { "epoch": 1.3757514570235418, "grad_norm": 0.4855487644672394, "learning_rate": 5.8204716709805435e-06, "loss": 0.3762, "step": 29979 }, { "epoch": 1.3757973475287963, "grad_norm": 0.4444408416748047, "learning_rate": 5.82022980569471e-06, "loss": 0.3467, "step": 29980 }, { "epoch": 1.3758432380340508, "grad_norm": 0.4726026654243469, "learning_rate": 5.819987938436483e-06, "loss": 0.3746, "step": 29981 }, { "epoch": 1.3758891285393053, "grad_norm": 0.4688367545604706, "learning_rate": 5.819746069206445e-06, "loss": 0.3572, "step": 29982 }, { "epoch": 1.3759350190445596, "grad_norm": 0.5091916918754578, "learning_rate": 5.819504198005175e-06, "loss": 0.4536, "step": 29983 }, { "epoch": 1.3759809095498141, "grad_norm": 0.48533016443252563, "learning_rate": 5.819262324833257e-06, "loss": 0.3619, "step": 29984 }, { "epoch": 1.3760268000550686, "grad_norm": 0.4556562602519989, "learning_rate": 5.819020449691272e-06, "loss": 0.3351, "step": 29985 }, { "epoch": 1.376072690560323, "grad_norm": 0.4366835057735443, "learning_rate": 5.8187785725798e-06, "loss": 0.3352, "step": 29986 }, { "epoch": 1.3761185810655776, "grad_norm": 0.4405767023563385, "learning_rate": 5.818536693499424e-06, "loss": 0.3346, "step": 29987 }, { "epoch": 1.3761644715708319, "grad_norm": 0.47440850734710693, "learning_rate": 5.818294812450727e-06, "loss": 0.4103, "step": 29988 }, { "epoch": 1.3762103620760864, "grad_norm": 0.44162923097610474, "learning_rate": 5.818052929434288e-06, "loss": 0.2736, "step": 29989 }, { "epoch": 1.3762562525813409, "grad_norm": 0.49869540333747864, "learning_rate": 5.8178110444506896e-06, "loss": 0.4282, "step": 29990 }, { "epoch": 1.3763021430865954, "grad_norm": 0.42945775389671326, "learning_rate": 5.817569157500516e-06, "loss": 0.2881, "step": 29991 }, { "epoch": 1.3763480335918499, "grad_norm": 0.45787739753723145, "learning_rate": 5.817327268584344e-06, "loss": 0.3653, "step": 29992 }, { "epoch": 1.3763939240971044, "grad_norm": 0.4477265179157257, "learning_rate": 5.817085377702758e-06, "loss": 0.3217, "step": 29993 }, { "epoch": 1.3764398146023589, "grad_norm": 0.5165705680847168, "learning_rate": 5.816843484856341e-06, "loss": 0.451, "step": 29994 }, { "epoch": 1.3764857051076134, "grad_norm": 0.4712737500667572, "learning_rate": 5.816601590045673e-06, "loss": 0.3996, "step": 29995 }, { "epoch": 1.3765315956128676, "grad_norm": 0.5163758993148804, "learning_rate": 5.816359693271335e-06, "loss": 0.443, "step": 29996 }, { "epoch": 1.3765774861181221, "grad_norm": 0.4562827944755554, "learning_rate": 5.816117794533911e-06, "loss": 0.3281, "step": 29997 }, { "epoch": 1.3766233766233766, "grad_norm": 0.44356945157051086, "learning_rate": 5.8158758938339795e-06, "loss": 0.3463, "step": 29998 }, { "epoch": 1.3766692671286311, "grad_norm": 0.47437626123428345, "learning_rate": 5.815633991172125e-06, "loss": 0.4258, "step": 29999 }, { "epoch": 1.3767151576338854, "grad_norm": 0.5096777081489563, "learning_rate": 5.815392086548929e-06, "loss": 0.3751, "step": 30000 }, { "epoch": 1.37676104813914, "grad_norm": 0.4830615520477295, "learning_rate": 5.81515017996497e-06, "loss": 0.3185, "step": 30001 }, { "epoch": 1.3768069386443944, "grad_norm": 0.4878954589366913, "learning_rate": 5.814908271420835e-06, "loss": 0.3561, "step": 30002 }, { "epoch": 1.376852829149649, "grad_norm": 0.3972504734992981, "learning_rate": 5.814666360917101e-06, "loss": 0.2538, "step": 30003 }, { "epoch": 1.3768987196549034, "grad_norm": 0.5065438747406006, "learning_rate": 5.81442444845435e-06, "loss": 0.4021, "step": 30004 }, { "epoch": 1.376944610160158, "grad_norm": 0.49227166175842285, "learning_rate": 5.814182534033168e-06, "loss": 0.3427, "step": 30005 }, { "epoch": 1.3769905006654124, "grad_norm": 0.5042336583137512, "learning_rate": 5.813940617654134e-06, "loss": 0.3602, "step": 30006 }, { "epoch": 1.377036391170667, "grad_norm": 0.41764673590660095, "learning_rate": 5.813698699317828e-06, "loss": 0.2805, "step": 30007 }, { "epoch": 1.3770822816759212, "grad_norm": 0.4617655277252197, "learning_rate": 5.8134567790248345e-06, "loss": 0.3692, "step": 30008 }, { "epoch": 1.3771281721811757, "grad_norm": 0.4986257553100586, "learning_rate": 5.813214856775733e-06, "loss": 0.3811, "step": 30009 }, { "epoch": 1.3771740626864302, "grad_norm": 0.4710755944252014, "learning_rate": 5.8129729325711075e-06, "loss": 0.3512, "step": 30010 }, { "epoch": 1.3772199531916847, "grad_norm": 0.4673018455505371, "learning_rate": 5.81273100641154e-06, "loss": 0.3545, "step": 30011 }, { "epoch": 1.3772658436969392, "grad_norm": 0.5035912394523621, "learning_rate": 5.812489078297609e-06, "loss": 0.4364, "step": 30012 }, { "epoch": 1.3773117342021934, "grad_norm": 0.42631104588508606, "learning_rate": 5.812247148229898e-06, "loss": 0.2956, "step": 30013 }, { "epoch": 1.377357624707448, "grad_norm": 0.4278251826763153, "learning_rate": 5.812005216208989e-06, "loss": 0.309, "step": 30014 }, { "epoch": 1.3774035152127024, "grad_norm": 0.4798485040664673, "learning_rate": 5.8117632822354646e-06, "loss": 0.3443, "step": 30015 }, { "epoch": 1.377449405717957, "grad_norm": 0.4846261143684387, "learning_rate": 5.811521346309905e-06, "loss": 0.3293, "step": 30016 }, { "epoch": 1.3774952962232114, "grad_norm": 0.4743522107601166, "learning_rate": 5.8112794084328936e-06, "loss": 0.3447, "step": 30017 }, { "epoch": 1.377541186728466, "grad_norm": 0.4767338037490845, "learning_rate": 5.811037468605011e-06, "loss": 0.3243, "step": 30018 }, { "epoch": 1.3775870772337204, "grad_norm": 0.5213478803634644, "learning_rate": 5.810795526826839e-06, "loss": 0.4027, "step": 30019 }, { "epoch": 1.377632967738975, "grad_norm": 0.49750325083732605, "learning_rate": 5.810553583098959e-06, "loss": 0.3683, "step": 30020 }, { "epoch": 1.3776788582442292, "grad_norm": 0.49233412742614746, "learning_rate": 5.8103116374219535e-06, "loss": 0.3644, "step": 30021 }, { "epoch": 1.3777247487494837, "grad_norm": 0.5257917642593384, "learning_rate": 5.810069689796407e-06, "loss": 0.4044, "step": 30022 }, { "epoch": 1.3777706392547382, "grad_norm": 0.4458836615085602, "learning_rate": 5.809827740222895e-06, "loss": 0.3248, "step": 30023 }, { "epoch": 1.3778165297599927, "grad_norm": 0.45984727144241333, "learning_rate": 5.809585788702004e-06, "loss": 0.3303, "step": 30024 }, { "epoch": 1.3778624202652472, "grad_norm": 0.4440291225910187, "learning_rate": 5.809343835234316e-06, "loss": 0.3194, "step": 30025 }, { "epoch": 1.3779083107705015, "grad_norm": 0.5417717099189758, "learning_rate": 5.809101879820409e-06, "loss": 0.4939, "step": 30026 }, { "epoch": 1.377954201275756, "grad_norm": 0.49395695328712463, "learning_rate": 5.80885992246087e-06, "loss": 0.4245, "step": 30027 }, { "epoch": 1.3780000917810105, "grad_norm": 0.4934785068035126, "learning_rate": 5.808617963156277e-06, "loss": 0.3919, "step": 30028 }, { "epoch": 1.378045982286265, "grad_norm": 0.4426972270011902, "learning_rate": 5.808376001907212e-06, "loss": 0.2934, "step": 30029 }, { "epoch": 1.3780918727915195, "grad_norm": 0.5040740370750427, "learning_rate": 5.80813403871426e-06, "loss": 0.4, "step": 30030 }, { "epoch": 1.378137763296774, "grad_norm": 0.4542422592639923, "learning_rate": 5.807892073577999e-06, "loss": 0.3041, "step": 30031 }, { "epoch": 1.3781836538020285, "grad_norm": 0.5049039721488953, "learning_rate": 5.807650106499012e-06, "loss": 0.4533, "step": 30032 }, { "epoch": 1.378229544307283, "grad_norm": 0.4647717773914337, "learning_rate": 5.807408137477885e-06, "loss": 0.333, "step": 30033 }, { "epoch": 1.3782754348125372, "grad_norm": 0.48151805996894836, "learning_rate": 5.807166166515192e-06, "loss": 0.363, "step": 30034 }, { "epoch": 1.3783213253177917, "grad_norm": 0.5103830695152283, "learning_rate": 5.806924193611521e-06, "loss": 0.4319, "step": 30035 }, { "epoch": 1.3783672158230462, "grad_norm": 0.4830034673213959, "learning_rate": 5.806682218767452e-06, "loss": 0.3676, "step": 30036 }, { "epoch": 1.3784131063283007, "grad_norm": 0.4696045517921448, "learning_rate": 5.806440241983566e-06, "loss": 0.3781, "step": 30037 }, { "epoch": 1.3784589968335552, "grad_norm": 0.4406489431858063, "learning_rate": 5.806198263260447e-06, "loss": 0.3138, "step": 30038 }, { "epoch": 1.3785048873388095, "grad_norm": 0.4668993651866913, "learning_rate": 5.805956282598676e-06, "loss": 0.3482, "step": 30039 }, { "epoch": 1.378550777844064, "grad_norm": 0.4303893446922302, "learning_rate": 5.805714299998833e-06, "loss": 0.3006, "step": 30040 }, { "epoch": 1.3785966683493185, "grad_norm": 0.47575873136520386, "learning_rate": 5.805472315461501e-06, "loss": 0.3584, "step": 30041 }, { "epoch": 1.378642558854573, "grad_norm": 0.5100101828575134, "learning_rate": 5.805230328987265e-06, "loss": 0.4079, "step": 30042 }, { "epoch": 1.3786884493598275, "grad_norm": 0.4731379449367523, "learning_rate": 5.8049883405767015e-06, "loss": 0.3598, "step": 30043 }, { "epoch": 1.378734339865082, "grad_norm": 0.44419899582862854, "learning_rate": 5.804746350230396e-06, "loss": 0.3162, "step": 30044 }, { "epoch": 1.3787802303703365, "grad_norm": 0.4433366656303406, "learning_rate": 5.80450435794893e-06, "loss": 0.313, "step": 30045 }, { "epoch": 1.378826120875591, "grad_norm": 0.4854978919029236, "learning_rate": 5.804262363732885e-06, "loss": 0.4027, "step": 30046 }, { "epoch": 1.3788720113808453, "grad_norm": 0.44491836428642273, "learning_rate": 5.804020367582842e-06, "loss": 0.3308, "step": 30047 }, { "epoch": 1.3789179018860998, "grad_norm": 0.5081619620323181, "learning_rate": 5.8037783694993835e-06, "loss": 0.4722, "step": 30048 }, { "epoch": 1.3789637923913542, "grad_norm": 0.44249504804611206, "learning_rate": 5.803536369483092e-06, "loss": 0.3117, "step": 30049 }, { "epoch": 1.3790096828966087, "grad_norm": 0.4390316307544708, "learning_rate": 5.803294367534551e-06, "loss": 0.3155, "step": 30050 }, { "epoch": 1.379055573401863, "grad_norm": 0.44082918763160706, "learning_rate": 5.803052363654338e-06, "loss": 0.3115, "step": 30051 }, { "epoch": 1.3791014639071175, "grad_norm": 0.44473499059677124, "learning_rate": 5.802810357843038e-06, "loss": 0.2948, "step": 30052 }, { "epoch": 1.379147354412372, "grad_norm": 0.4447748363018036, "learning_rate": 5.802568350101234e-06, "loss": 0.351, "step": 30053 }, { "epoch": 1.3791932449176265, "grad_norm": 0.4902257025241852, "learning_rate": 5.802326340429505e-06, "loss": 0.4176, "step": 30054 }, { "epoch": 1.379239135422881, "grad_norm": 0.4404298961162567, "learning_rate": 5.8020843288284344e-06, "loss": 0.3164, "step": 30055 }, { "epoch": 1.3792850259281355, "grad_norm": 0.47950705885887146, "learning_rate": 5.801842315298606e-06, "loss": 0.3564, "step": 30056 }, { "epoch": 1.37933091643339, "grad_norm": 0.4463609457015991, "learning_rate": 5.801600299840597e-06, "loss": 0.3132, "step": 30057 }, { "epoch": 1.3793768069386445, "grad_norm": 0.4774281680583954, "learning_rate": 5.801358282454994e-06, "loss": 0.3508, "step": 30058 }, { "epoch": 1.3794226974438988, "grad_norm": 0.5036218166351318, "learning_rate": 5.8011162631423765e-06, "loss": 0.4499, "step": 30059 }, { "epoch": 1.3794685879491533, "grad_norm": 0.46814975142478943, "learning_rate": 5.800874241903329e-06, "loss": 0.3515, "step": 30060 }, { "epoch": 1.3795144784544078, "grad_norm": 0.4841305911540985, "learning_rate": 5.80063221873843e-06, "loss": 0.3357, "step": 30061 }, { "epoch": 1.3795603689596623, "grad_norm": 0.5619818568229675, "learning_rate": 5.800390193648265e-06, "loss": 0.3162, "step": 30062 }, { "epoch": 1.3796062594649168, "grad_norm": 0.5121968388557434, "learning_rate": 5.8001481666334114e-06, "loss": 0.431, "step": 30063 }, { "epoch": 1.379652149970171, "grad_norm": 0.5101747512817383, "learning_rate": 5.799906137694455e-06, "loss": 0.4058, "step": 30064 }, { "epoch": 1.3796980404754255, "grad_norm": 0.583008348941803, "learning_rate": 5.799664106831977e-06, "loss": 0.3597, "step": 30065 }, { "epoch": 1.37974393098068, "grad_norm": 0.5467517971992493, "learning_rate": 5.79942207404656e-06, "loss": 0.4141, "step": 30066 }, { "epoch": 1.3797898214859345, "grad_norm": 0.5037644505500793, "learning_rate": 5.799180039338786e-06, "loss": 0.4007, "step": 30067 }, { "epoch": 1.379835711991189, "grad_norm": 0.5072501301765442, "learning_rate": 5.7989380027092335e-06, "loss": 0.4386, "step": 30068 }, { "epoch": 1.3798816024964435, "grad_norm": 0.5426491498947144, "learning_rate": 5.798695964158487e-06, "loss": 0.4431, "step": 30069 }, { "epoch": 1.379927493001698, "grad_norm": 0.4687478840351105, "learning_rate": 5.798453923687132e-06, "loss": 0.363, "step": 30070 }, { "epoch": 1.3799733835069525, "grad_norm": 0.4715091586112976, "learning_rate": 5.798211881295746e-06, "loss": 0.3161, "step": 30071 }, { "epoch": 1.3800192740122068, "grad_norm": 0.47338390350341797, "learning_rate": 5.797969836984911e-06, "loss": 0.3861, "step": 30072 }, { "epoch": 1.3800651645174613, "grad_norm": 0.5061212182044983, "learning_rate": 5.797727790755212e-06, "loss": 0.3869, "step": 30073 }, { "epoch": 1.3801110550227158, "grad_norm": 0.4940020740032196, "learning_rate": 5.797485742607229e-06, "loss": 0.3861, "step": 30074 }, { "epoch": 1.3801569455279703, "grad_norm": 0.4310090243816376, "learning_rate": 5.7972436925415436e-06, "loss": 0.3, "step": 30075 }, { "epoch": 1.3802028360332248, "grad_norm": 0.5196375846862793, "learning_rate": 5.797001640558739e-06, "loss": 0.4616, "step": 30076 }, { "epoch": 1.380248726538479, "grad_norm": 0.4669770896434784, "learning_rate": 5.796759586659398e-06, "loss": 0.3147, "step": 30077 }, { "epoch": 1.3802946170437336, "grad_norm": 0.4549065828323364, "learning_rate": 5.7965175308441015e-06, "loss": 0.3422, "step": 30078 }, { "epoch": 1.380340507548988, "grad_norm": 0.42525434494018555, "learning_rate": 5.796275473113432e-06, "loss": 0.2884, "step": 30079 }, { "epoch": 1.3803863980542426, "grad_norm": 0.4323212802410126, "learning_rate": 5.79603341346797e-06, "loss": 0.312, "step": 30080 }, { "epoch": 1.380432288559497, "grad_norm": 0.4833287000656128, "learning_rate": 5.7957913519083e-06, "loss": 0.3692, "step": 30081 }, { "epoch": 1.3804781790647516, "grad_norm": 0.4570082128047943, "learning_rate": 5.795549288435003e-06, "loss": 0.3071, "step": 30082 }, { "epoch": 1.380524069570006, "grad_norm": 0.46827077865600586, "learning_rate": 5.795307223048659e-06, "loss": 0.3557, "step": 30083 }, { "epoch": 1.3805699600752606, "grad_norm": 0.48298442363739014, "learning_rate": 5.795065155749855e-06, "loss": 0.3215, "step": 30084 }, { "epoch": 1.3806158505805148, "grad_norm": 0.4812818169593811, "learning_rate": 5.794823086539168e-06, "loss": 0.4099, "step": 30085 }, { "epoch": 1.3806617410857693, "grad_norm": 0.4831850826740265, "learning_rate": 5.794581015417182e-06, "loss": 0.3506, "step": 30086 }, { "epoch": 1.3807076315910238, "grad_norm": 0.41549769043922424, "learning_rate": 5.794338942384482e-06, "loss": 0.2626, "step": 30087 }, { "epoch": 1.3807535220962783, "grad_norm": 0.4679988920688629, "learning_rate": 5.794096867441646e-06, "loss": 0.3475, "step": 30088 }, { "epoch": 1.3807994126015326, "grad_norm": 0.46672746539115906, "learning_rate": 5.793854790589257e-06, "loss": 0.3828, "step": 30089 }, { "epoch": 1.380845303106787, "grad_norm": 0.48221760988235474, "learning_rate": 5.793612711827899e-06, "loss": 0.4281, "step": 30090 }, { "epoch": 1.3808911936120416, "grad_norm": 0.4793669283390045, "learning_rate": 5.793370631158153e-06, "loss": 0.3571, "step": 30091 }, { "epoch": 1.380937084117296, "grad_norm": 0.4874010980129242, "learning_rate": 5.793128548580601e-06, "loss": 0.3814, "step": 30092 }, { "epoch": 1.3809829746225506, "grad_norm": 0.4862196445465088, "learning_rate": 5.792886464095825e-06, "loss": 0.341, "step": 30093 }, { "epoch": 1.381028865127805, "grad_norm": 0.48247984051704407, "learning_rate": 5.792644377704406e-06, "loss": 0.3304, "step": 30094 }, { "epoch": 1.3810747556330596, "grad_norm": 0.4899463653564453, "learning_rate": 5.79240228940693e-06, "loss": 0.3884, "step": 30095 }, { "epoch": 1.381120646138314, "grad_norm": 0.48217883706092834, "learning_rate": 5.792160199203974e-06, "loss": 0.3745, "step": 30096 }, { "epoch": 1.3811665366435684, "grad_norm": 0.48121970891952515, "learning_rate": 5.791918107096124e-06, "loss": 0.3768, "step": 30097 }, { "epoch": 1.3812124271488229, "grad_norm": 0.4415617883205414, "learning_rate": 5.79167601308396e-06, "loss": 0.3633, "step": 30098 }, { "epoch": 1.3812583176540774, "grad_norm": 0.46626052260398865, "learning_rate": 5.791433917168067e-06, "loss": 0.3114, "step": 30099 }, { "epoch": 1.3813042081593319, "grad_norm": 0.47387996315956116, "learning_rate": 5.791191819349022e-06, "loss": 0.3188, "step": 30100 }, { "epoch": 1.3813500986645864, "grad_norm": 0.4824138879776001, "learning_rate": 5.790949719627414e-06, "loss": 0.3922, "step": 30101 }, { "epoch": 1.3813959891698406, "grad_norm": 0.4569844901561737, "learning_rate": 5.790707618003819e-06, "loss": 0.3071, "step": 30102 }, { "epoch": 1.3814418796750951, "grad_norm": 0.4678370952606201, "learning_rate": 5.790465514478823e-06, "loss": 0.3386, "step": 30103 }, { "epoch": 1.3814877701803496, "grad_norm": 0.46363189816474915, "learning_rate": 5.790223409053007e-06, "loss": 0.342, "step": 30104 }, { "epoch": 1.3815336606856041, "grad_norm": 0.4456811249256134, "learning_rate": 5.789981301726953e-06, "loss": 0.3305, "step": 30105 }, { "epoch": 1.3815795511908586, "grad_norm": 0.44500625133514404, "learning_rate": 5.789739192501242e-06, "loss": 0.3297, "step": 30106 }, { "epoch": 1.3816254416961131, "grad_norm": 0.5086700916290283, "learning_rate": 5.789497081376458e-06, "loss": 0.4421, "step": 30107 }, { "epoch": 1.3816713322013676, "grad_norm": 0.469319224357605, "learning_rate": 5.789254968353183e-06, "loss": 0.3695, "step": 30108 }, { "epoch": 1.3817172227066221, "grad_norm": 0.4956066608428955, "learning_rate": 5.7890128534319986e-06, "loss": 0.3949, "step": 30109 }, { "epoch": 1.3817631132118764, "grad_norm": 0.47333189845085144, "learning_rate": 5.788770736613489e-06, "loss": 0.3892, "step": 30110 }, { "epoch": 1.381809003717131, "grad_norm": 0.44093748927116394, "learning_rate": 5.788528617898232e-06, "loss": 0.3378, "step": 30111 }, { "epoch": 1.3818548942223854, "grad_norm": 0.5100197196006775, "learning_rate": 5.788286497286816e-06, "loss": 0.4026, "step": 30112 }, { "epoch": 1.3819007847276399, "grad_norm": 0.4195316433906555, "learning_rate": 5.788044374779816e-06, "loss": 0.272, "step": 30113 }, { "epoch": 1.3819466752328944, "grad_norm": 0.493714839220047, "learning_rate": 5.787802250377821e-06, "loss": 0.4634, "step": 30114 }, { "epoch": 1.3819925657381487, "grad_norm": 0.4993947446346283, "learning_rate": 5.787560124081409e-06, "loss": 0.3796, "step": 30115 }, { "epoch": 1.3820384562434032, "grad_norm": 0.44466516375541687, "learning_rate": 5.787317995891163e-06, "loss": 0.3132, "step": 30116 }, { "epoch": 1.3820843467486577, "grad_norm": 0.4807978570461273, "learning_rate": 5.7870758658076654e-06, "loss": 0.343, "step": 30117 }, { "epoch": 1.3821302372539122, "grad_norm": 0.47198548913002014, "learning_rate": 5.7868337338315006e-06, "loss": 0.3975, "step": 30118 }, { "epoch": 1.3821761277591667, "grad_norm": 0.4122215211391449, "learning_rate": 5.786591599963247e-06, "loss": 0.2694, "step": 30119 }, { "epoch": 1.3822220182644211, "grad_norm": 0.43830570578575134, "learning_rate": 5.78634946420349e-06, "loss": 0.3405, "step": 30120 }, { "epoch": 1.3822679087696756, "grad_norm": 0.5233800411224365, "learning_rate": 5.786107326552811e-06, "loss": 0.4738, "step": 30121 }, { "epoch": 1.3823137992749301, "grad_norm": 0.4553917646408081, "learning_rate": 5.785865187011791e-06, "loss": 0.3148, "step": 30122 }, { "epoch": 1.3823596897801844, "grad_norm": 0.4600346088409424, "learning_rate": 5.785623045581013e-06, "loss": 0.3581, "step": 30123 }, { "epoch": 1.382405580285439, "grad_norm": 0.44866496324539185, "learning_rate": 5.785380902261061e-06, "loss": 0.3316, "step": 30124 }, { "epoch": 1.3824514707906934, "grad_norm": 0.42523252964019775, "learning_rate": 5.785138757052515e-06, "loss": 0.3097, "step": 30125 }, { "epoch": 1.382497361295948, "grad_norm": 0.4859412908554077, "learning_rate": 5.784896609955958e-06, "loss": 0.4136, "step": 30126 }, { "epoch": 1.3825432518012024, "grad_norm": 0.5036506652832031, "learning_rate": 5.784654460971972e-06, "loss": 0.3529, "step": 30127 }, { "epoch": 1.3825891423064567, "grad_norm": 0.48324862122535706, "learning_rate": 5.78441231010114e-06, "loss": 0.4217, "step": 30128 }, { "epoch": 1.3826350328117112, "grad_norm": 0.4600822925567627, "learning_rate": 5.784170157344044e-06, "loss": 0.3407, "step": 30129 }, { "epoch": 1.3826809233169657, "grad_norm": 0.44016775488853455, "learning_rate": 5.7839280027012666e-06, "loss": 0.3281, "step": 30130 }, { "epoch": 1.3827268138222202, "grad_norm": 0.4400809407234192, "learning_rate": 5.78368584617339e-06, "loss": 0.3033, "step": 30131 }, { "epoch": 1.3827727043274747, "grad_norm": 0.41949546337127686, "learning_rate": 5.783443687760997e-06, "loss": 0.2674, "step": 30132 }, { "epoch": 1.3828185948327292, "grad_norm": 0.4590534269809723, "learning_rate": 5.783201527464667e-06, "loss": 0.3461, "step": 30133 }, { "epoch": 1.3828644853379837, "grad_norm": 0.4224918782711029, "learning_rate": 5.7829593652849845e-06, "loss": 0.2772, "step": 30134 }, { "epoch": 1.3829103758432382, "grad_norm": 0.437712699174881, "learning_rate": 5.782717201222533e-06, "loss": 0.304, "step": 30135 }, { "epoch": 1.3829562663484924, "grad_norm": 0.48794978857040405, "learning_rate": 5.782475035277895e-06, "loss": 0.3941, "step": 30136 }, { "epoch": 1.383002156853747, "grad_norm": 0.4472852945327759, "learning_rate": 5.782232867451649e-06, "loss": 0.3613, "step": 30137 }, { "epoch": 1.3830480473590014, "grad_norm": 0.47399502992630005, "learning_rate": 5.7819906977443805e-06, "loss": 0.3418, "step": 30138 }, { "epoch": 1.383093937864256, "grad_norm": 0.46424493193626404, "learning_rate": 5.7817485261566716e-06, "loss": 0.324, "step": 30139 }, { "epoch": 1.3831398283695102, "grad_norm": 0.43304571509361267, "learning_rate": 5.7815063526891034e-06, "loss": 0.3182, "step": 30140 }, { "epoch": 1.3831857188747647, "grad_norm": 0.46367359161376953, "learning_rate": 5.781264177342261e-06, "loss": 0.3248, "step": 30141 }, { "epoch": 1.3832316093800192, "grad_norm": 0.482255220413208, "learning_rate": 5.781022000116724e-06, "loss": 0.3747, "step": 30142 }, { "epoch": 1.3832774998852737, "grad_norm": 0.4829370677471161, "learning_rate": 5.780779821013074e-06, "loss": 0.365, "step": 30143 }, { "epoch": 1.3833233903905282, "grad_norm": 0.4703187346458435, "learning_rate": 5.780537640031897e-06, "loss": 0.3641, "step": 30144 }, { "epoch": 1.3833692808957827, "grad_norm": 0.47962355613708496, "learning_rate": 5.7802954571737725e-06, "loss": 0.3876, "step": 30145 }, { "epoch": 1.3834151714010372, "grad_norm": 0.4452257454395294, "learning_rate": 5.780053272439283e-06, "loss": 0.3185, "step": 30146 }, { "epoch": 1.3834610619062917, "grad_norm": 0.44089433550834656, "learning_rate": 5.779811085829014e-06, "loss": 0.3576, "step": 30147 }, { "epoch": 1.383506952411546, "grad_norm": 0.465387225151062, "learning_rate": 5.779568897343543e-06, "loss": 0.3876, "step": 30148 }, { "epoch": 1.3835528429168005, "grad_norm": 0.4795728027820587, "learning_rate": 5.779326706983456e-06, "loss": 0.3551, "step": 30149 }, { "epoch": 1.383598733422055, "grad_norm": 0.4223843812942505, "learning_rate": 5.779084514749333e-06, "loss": 0.3172, "step": 30150 }, { "epoch": 1.3836446239273095, "grad_norm": 0.49916842579841614, "learning_rate": 5.778842320641758e-06, "loss": 0.4036, "step": 30151 }, { "epoch": 1.383690514432564, "grad_norm": 0.4564020037651062, "learning_rate": 5.778600124661313e-06, "loss": 0.3218, "step": 30152 }, { "epoch": 1.3837364049378182, "grad_norm": 0.44122374057769775, "learning_rate": 5.778357926808582e-06, "loss": 0.3078, "step": 30153 }, { "epoch": 1.3837822954430727, "grad_norm": 0.4379919171333313, "learning_rate": 5.778115727084144e-06, "loss": 0.3266, "step": 30154 }, { "epoch": 1.3838281859483272, "grad_norm": 0.4965982437133789, "learning_rate": 5.7778735254885845e-06, "loss": 0.3704, "step": 30155 }, { "epoch": 1.3838740764535817, "grad_norm": 0.4463886320590973, "learning_rate": 5.777631322022483e-06, "loss": 0.3167, "step": 30156 }, { "epoch": 1.3839199669588362, "grad_norm": 0.4470362961292267, "learning_rate": 5.777389116686426e-06, "loss": 0.3436, "step": 30157 }, { "epoch": 1.3839658574640907, "grad_norm": 0.4523601531982422, "learning_rate": 5.777146909480993e-06, "loss": 0.3049, "step": 30158 }, { "epoch": 1.3840117479693452, "grad_norm": 0.4852578938007355, "learning_rate": 5.776904700406765e-06, "loss": 0.4004, "step": 30159 }, { "epoch": 1.3840576384745997, "grad_norm": 0.44050097465515137, "learning_rate": 5.776662489464327e-06, "loss": 0.3283, "step": 30160 }, { "epoch": 1.384103528979854, "grad_norm": 0.4994003474712372, "learning_rate": 5.776420276654262e-06, "loss": 0.4142, "step": 30161 }, { "epoch": 1.3841494194851085, "grad_norm": 0.43885889649391174, "learning_rate": 5.7761780619771505e-06, "loss": 0.3022, "step": 30162 }, { "epoch": 1.384195309990363, "grad_norm": 0.4919586479663849, "learning_rate": 5.775935845433575e-06, "loss": 0.4391, "step": 30163 }, { "epoch": 1.3842412004956175, "grad_norm": 0.4547795057296753, "learning_rate": 5.775693627024121e-06, "loss": 0.3732, "step": 30164 }, { "epoch": 1.384287091000872, "grad_norm": 0.42514944076538086, "learning_rate": 5.775451406749367e-06, "loss": 0.2962, "step": 30165 }, { "epoch": 1.3843329815061263, "grad_norm": 0.47637006640434265, "learning_rate": 5.775209184609898e-06, "loss": 0.3486, "step": 30166 }, { "epoch": 1.3843788720113808, "grad_norm": 0.4872959554195404, "learning_rate": 5.7749669606062945e-06, "loss": 0.4134, "step": 30167 }, { "epoch": 1.3844247625166353, "grad_norm": 0.5105438232421875, "learning_rate": 5.774724734739141e-06, "loss": 0.4293, "step": 30168 }, { "epoch": 1.3844706530218898, "grad_norm": 0.44854724407196045, "learning_rate": 5.77448250700902e-06, "loss": 0.3299, "step": 30169 }, { "epoch": 1.3845165435271443, "grad_norm": 0.4842880964279175, "learning_rate": 5.77424027741651e-06, "loss": 0.3974, "step": 30170 }, { "epoch": 1.3845624340323988, "grad_norm": 0.4838508367538452, "learning_rate": 5.773998045962197e-06, "loss": 0.3725, "step": 30171 }, { "epoch": 1.3846083245376533, "grad_norm": 0.467365026473999, "learning_rate": 5.7737558126466655e-06, "loss": 0.3704, "step": 30172 }, { "epoch": 1.3846542150429078, "grad_norm": 0.5171970129013062, "learning_rate": 5.773513577470493e-06, "loss": 0.3323, "step": 30173 }, { "epoch": 1.384700105548162, "grad_norm": 0.5004127025604248, "learning_rate": 5.773271340434265e-06, "loss": 0.4512, "step": 30174 }, { "epoch": 1.3847459960534165, "grad_norm": 0.4914829432964325, "learning_rate": 5.773029101538566e-06, "loss": 0.394, "step": 30175 }, { "epoch": 1.384791886558671, "grad_norm": 0.5040169954299927, "learning_rate": 5.772786860783972e-06, "loss": 0.3659, "step": 30176 }, { "epoch": 1.3848377770639255, "grad_norm": 0.4755440354347229, "learning_rate": 5.772544618171072e-06, "loss": 0.3515, "step": 30177 }, { "epoch": 1.3848836675691798, "grad_norm": 0.48394501209259033, "learning_rate": 5.772302373700446e-06, "loss": 0.3935, "step": 30178 }, { "epoch": 1.3849295580744343, "grad_norm": 0.4634353220462799, "learning_rate": 5.772060127372676e-06, "loss": 0.3548, "step": 30179 }, { "epoch": 1.3849754485796888, "grad_norm": 0.4337379038333893, "learning_rate": 5.771817879188345e-06, "loss": 0.3236, "step": 30180 }, { "epoch": 1.3850213390849433, "grad_norm": 0.5086374282836914, "learning_rate": 5.7715756291480355e-06, "loss": 0.4427, "step": 30181 }, { "epoch": 1.3850672295901978, "grad_norm": 0.46645882725715637, "learning_rate": 5.77133337725233e-06, "loss": 0.3494, "step": 30182 }, { "epoch": 1.3851131200954523, "grad_norm": 0.4445298910140991, "learning_rate": 5.771091123501812e-06, "loss": 0.3159, "step": 30183 }, { "epoch": 1.3851590106007068, "grad_norm": 0.45073261857032776, "learning_rate": 5.770848867897062e-06, "loss": 0.3553, "step": 30184 }, { "epoch": 1.3852049011059613, "grad_norm": 0.48292627930641174, "learning_rate": 5.770606610438665e-06, "loss": 0.3594, "step": 30185 }, { "epoch": 1.3852507916112156, "grad_norm": 0.6269731521606445, "learning_rate": 5.770364351127202e-06, "loss": 0.3412, "step": 30186 }, { "epoch": 1.38529668211647, "grad_norm": 0.45274046063423157, "learning_rate": 5.770122089963255e-06, "loss": 0.3418, "step": 30187 }, { "epoch": 1.3853425726217246, "grad_norm": 0.4876083433628082, "learning_rate": 5.769879826947406e-06, "loss": 0.3592, "step": 30188 }, { "epoch": 1.385388463126979, "grad_norm": 0.46288368105888367, "learning_rate": 5.769637562080242e-06, "loss": 0.3374, "step": 30189 }, { "epoch": 1.3854343536322336, "grad_norm": 0.469482958316803, "learning_rate": 5.7693952953623435e-06, "loss": 0.3391, "step": 30190 }, { "epoch": 1.3854802441374878, "grad_norm": 0.476589560508728, "learning_rate": 5.7691530267942884e-06, "loss": 0.2171, "step": 30191 }, { "epoch": 1.3855261346427423, "grad_norm": 0.47293582558631897, "learning_rate": 5.768910756376666e-06, "loss": 0.3792, "step": 30192 }, { "epoch": 1.3855720251479968, "grad_norm": 0.4677846133708954, "learning_rate": 5.768668484110054e-06, "loss": 0.3554, "step": 30193 }, { "epoch": 1.3856179156532513, "grad_norm": 0.5350527167320251, "learning_rate": 5.768426209995039e-06, "loss": 0.4428, "step": 30194 }, { "epoch": 1.3856638061585058, "grad_norm": 0.4820314943790436, "learning_rate": 5.768183934032199e-06, "loss": 0.4073, "step": 30195 }, { "epoch": 1.3857096966637603, "grad_norm": 0.48067232966423035, "learning_rate": 5.767941656222121e-06, "loss": 0.3594, "step": 30196 }, { "epoch": 1.3857555871690148, "grad_norm": 0.46545538306236267, "learning_rate": 5.7676993765653845e-06, "loss": 0.3498, "step": 30197 }, { "epoch": 1.3858014776742693, "grad_norm": 0.4139038622379303, "learning_rate": 5.767457095062573e-06, "loss": 0.2658, "step": 30198 }, { "epoch": 1.3858473681795236, "grad_norm": 0.4817889332771301, "learning_rate": 5.76721481171427e-06, "loss": 0.3786, "step": 30199 }, { "epoch": 1.385893258684778, "grad_norm": 0.4949606657028198, "learning_rate": 5.766972526521059e-06, "loss": 0.3678, "step": 30200 }, { "epoch": 1.3859391491900326, "grad_norm": 0.44682562351226807, "learning_rate": 5.766730239483519e-06, "loss": 0.2934, "step": 30201 }, { "epoch": 1.385985039695287, "grad_norm": 0.45298531651496887, "learning_rate": 5.766487950602236e-06, "loss": 0.3326, "step": 30202 }, { "epoch": 1.3860309302005416, "grad_norm": 0.4452357590198517, "learning_rate": 5.766245659877791e-06, "loss": 0.3343, "step": 30203 }, { "epoch": 1.3860768207057959, "grad_norm": 0.4115660488605499, "learning_rate": 5.766003367310767e-06, "loss": 0.298, "step": 30204 }, { "epoch": 1.3861227112110504, "grad_norm": 0.5188589692115784, "learning_rate": 5.7657610729017455e-06, "loss": 0.4691, "step": 30205 }, { "epoch": 1.3861686017163048, "grad_norm": 0.520616888999939, "learning_rate": 5.765518776651311e-06, "loss": 0.4486, "step": 30206 }, { "epoch": 1.3862144922215593, "grad_norm": 0.434212863445282, "learning_rate": 5.7652764785600464e-06, "loss": 0.3102, "step": 30207 }, { "epoch": 1.3862603827268138, "grad_norm": 0.4577532112598419, "learning_rate": 5.765034178628532e-06, "loss": 0.3422, "step": 30208 }, { "epoch": 1.3863062732320683, "grad_norm": 0.4571246802806854, "learning_rate": 5.764791876857354e-06, "loss": 0.3516, "step": 30209 }, { "epoch": 1.3863521637373228, "grad_norm": 0.5221338272094727, "learning_rate": 5.764549573247091e-06, "loss": 0.4845, "step": 30210 }, { "epoch": 1.3863980542425773, "grad_norm": 0.46580061316490173, "learning_rate": 5.764307267798328e-06, "loss": 0.3506, "step": 30211 }, { "epoch": 1.3864439447478316, "grad_norm": 0.4762353003025055, "learning_rate": 5.764064960511648e-06, "loss": 0.371, "step": 30212 }, { "epoch": 1.3864898352530861, "grad_norm": 0.4462132751941681, "learning_rate": 5.763822651387632e-06, "loss": 0.3251, "step": 30213 }, { "epoch": 1.3865357257583406, "grad_norm": 0.46403101086616516, "learning_rate": 5.763580340426864e-06, "loss": 0.3493, "step": 30214 }, { "epoch": 1.386581616263595, "grad_norm": 0.477291464805603, "learning_rate": 5.763338027629925e-06, "loss": 0.3676, "step": 30215 }, { "epoch": 1.3866275067688496, "grad_norm": 0.44286972284317017, "learning_rate": 5.7630957129974e-06, "loss": 0.3256, "step": 30216 }, { "epoch": 1.3866733972741039, "grad_norm": 0.4290679693222046, "learning_rate": 5.762853396529872e-06, "loss": 0.271, "step": 30217 }, { "epoch": 1.3867192877793584, "grad_norm": 0.46706992387771606, "learning_rate": 5.7626110782279224e-06, "loss": 0.3235, "step": 30218 }, { "epoch": 1.3867651782846129, "grad_norm": 0.4916428029537201, "learning_rate": 5.762368758092131e-06, "loss": 0.402, "step": 30219 }, { "epoch": 1.3868110687898674, "grad_norm": 0.44531404972076416, "learning_rate": 5.762126436123086e-06, "loss": 0.3371, "step": 30220 }, { "epoch": 1.3868569592951219, "grad_norm": 0.4945046603679657, "learning_rate": 5.761884112321365e-06, "loss": 0.422, "step": 30221 }, { "epoch": 1.3869028498003764, "grad_norm": 0.5105440020561218, "learning_rate": 5.761641786687555e-06, "loss": 0.4286, "step": 30222 }, { "epoch": 1.3869487403056309, "grad_norm": 0.48724180459976196, "learning_rate": 5.761399459222238e-06, "loss": 0.4184, "step": 30223 }, { "epoch": 1.3869946308108854, "grad_norm": 0.4847036302089691, "learning_rate": 5.761157129925992e-06, "loss": 0.3592, "step": 30224 }, { "epoch": 1.3870405213161396, "grad_norm": 0.44796520471572876, "learning_rate": 5.760914798799407e-06, "loss": 0.3212, "step": 30225 }, { "epoch": 1.3870864118213941, "grad_norm": 0.48685550689697266, "learning_rate": 5.7606724658430605e-06, "loss": 0.3882, "step": 30226 }, { "epoch": 1.3871323023266486, "grad_norm": 0.4488457143306732, "learning_rate": 5.760430131057535e-06, "loss": 0.2855, "step": 30227 }, { "epoch": 1.3871781928319031, "grad_norm": 0.5084857940673828, "learning_rate": 5.760187794443418e-06, "loss": 0.42, "step": 30228 }, { "epoch": 1.3872240833371574, "grad_norm": 0.47038644552230835, "learning_rate": 5.759945456001288e-06, "loss": 0.3737, "step": 30229 }, { "epoch": 1.387269973842412, "grad_norm": 0.5137220025062561, "learning_rate": 5.7597031157317285e-06, "loss": 0.4094, "step": 30230 }, { "epoch": 1.3873158643476664, "grad_norm": 0.4809383451938629, "learning_rate": 5.759460773635323e-06, "loss": 0.3597, "step": 30231 }, { "epoch": 1.387361754852921, "grad_norm": 0.4539553225040436, "learning_rate": 5.759218429712655e-06, "loss": 0.3278, "step": 30232 }, { "epoch": 1.3874076453581754, "grad_norm": 0.49070674180984497, "learning_rate": 5.758976083964305e-06, "loss": 0.3976, "step": 30233 }, { "epoch": 1.38745353586343, "grad_norm": 0.4861520528793335, "learning_rate": 5.758733736390859e-06, "loss": 0.3549, "step": 30234 }, { "epoch": 1.3874994263686844, "grad_norm": 0.4564046859741211, "learning_rate": 5.758491386992896e-06, "loss": 0.3725, "step": 30235 }, { "epoch": 1.387545316873939, "grad_norm": 0.6546860337257385, "learning_rate": 5.758249035771001e-06, "loss": 0.4455, "step": 30236 }, { "epoch": 1.3875912073791932, "grad_norm": 0.47364097833633423, "learning_rate": 5.7580066827257565e-06, "loss": 0.3232, "step": 30237 }, { "epoch": 1.3876370978844477, "grad_norm": 0.4762275815010071, "learning_rate": 5.757764327857745e-06, "loss": 0.3477, "step": 30238 }, { "epoch": 1.3876829883897022, "grad_norm": 0.49329712986946106, "learning_rate": 5.7575219711675504e-06, "loss": 0.4184, "step": 30239 }, { "epoch": 1.3877288788949567, "grad_norm": 0.47955426573753357, "learning_rate": 5.757279612655754e-06, "loss": 0.3294, "step": 30240 }, { "epoch": 1.3877747694002112, "grad_norm": 0.46467211842536926, "learning_rate": 5.757037252322939e-06, "loss": 0.3467, "step": 30241 }, { "epoch": 1.3878206599054654, "grad_norm": 0.48041290044784546, "learning_rate": 5.756794890169688e-06, "loss": 0.3194, "step": 30242 }, { "epoch": 1.38786655041072, "grad_norm": 0.5120606422424316, "learning_rate": 5.756552526196584e-06, "loss": 0.34, "step": 30243 }, { "epoch": 1.3879124409159744, "grad_norm": 0.4717693328857422, "learning_rate": 5.756310160404209e-06, "loss": 0.3314, "step": 30244 }, { "epoch": 1.387958331421229, "grad_norm": 0.5047916769981384, "learning_rate": 5.7560677927931505e-06, "loss": 0.3913, "step": 30245 }, { "epoch": 1.3880042219264834, "grad_norm": 0.44254550337791443, "learning_rate": 5.755825423363984e-06, "loss": 0.288, "step": 30246 }, { "epoch": 1.388050112431738, "grad_norm": 0.4101138114929199, "learning_rate": 5.7555830521172975e-06, "loss": 0.2498, "step": 30247 }, { "epoch": 1.3880960029369924, "grad_norm": 0.4936481714248657, "learning_rate": 5.755340679053671e-06, "loss": 0.4302, "step": 30248 }, { "epoch": 1.388141893442247, "grad_norm": 0.49934646487236023, "learning_rate": 5.75509830417369e-06, "loss": 0.4434, "step": 30249 }, { "epoch": 1.3881877839475012, "grad_norm": 0.6422379016876221, "learning_rate": 5.754855927477936e-06, "loss": 0.451, "step": 30250 }, { "epoch": 1.3882336744527557, "grad_norm": 0.4839049279689789, "learning_rate": 5.754613548966991e-06, "loss": 0.3668, "step": 30251 }, { "epoch": 1.3882795649580102, "grad_norm": 0.47374632954597473, "learning_rate": 5.7543711686414385e-06, "loss": 0.378, "step": 30252 }, { "epoch": 1.3883254554632647, "grad_norm": 0.48342493176460266, "learning_rate": 5.75412878650186e-06, "loss": 0.3828, "step": 30253 }, { "epoch": 1.3883713459685192, "grad_norm": 0.475362628698349, "learning_rate": 5.753886402548843e-06, "loss": 0.3243, "step": 30254 }, { "epoch": 1.3884172364737735, "grad_norm": 0.46324992179870605, "learning_rate": 5.753644016782964e-06, "loss": 0.3022, "step": 30255 }, { "epoch": 1.388463126979028, "grad_norm": 0.5234882235527039, "learning_rate": 5.7534016292048114e-06, "loss": 0.4761, "step": 30256 }, { "epoch": 1.3885090174842825, "grad_norm": 0.42796188592910767, "learning_rate": 5.753159239814965e-06, "loss": 0.3046, "step": 30257 }, { "epoch": 1.388554907989537, "grad_norm": 0.41637393832206726, "learning_rate": 5.752916848614007e-06, "loss": 0.2803, "step": 30258 }, { "epoch": 1.3886007984947915, "grad_norm": 0.46950602531433105, "learning_rate": 5.7526744556025235e-06, "loss": 0.3731, "step": 30259 }, { "epoch": 1.388646689000046, "grad_norm": 0.45282888412475586, "learning_rate": 5.752432060781093e-06, "loss": 0.3263, "step": 30260 }, { "epoch": 1.3886925795053005, "grad_norm": 0.4992215037345886, "learning_rate": 5.752189664150304e-06, "loss": 0.4165, "step": 30261 }, { "epoch": 1.388738470010555, "grad_norm": 0.4870789051055908, "learning_rate": 5.751947265710736e-06, "loss": 0.3835, "step": 30262 }, { "epoch": 1.3887843605158092, "grad_norm": 0.4718581736087799, "learning_rate": 5.751704865462969e-06, "loss": 0.3965, "step": 30263 }, { "epoch": 1.3888302510210637, "grad_norm": 0.4494836628437042, "learning_rate": 5.751462463407591e-06, "loss": 0.3158, "step": 30264 }, { "epoch": 1.3888761415263182, "grad_norm": 0.5015130043029785, "learning_rate": 5.751220059545183e-06, "loss": 0.3793, "step": 30265 }, { "epoch": 1.3889220320315727, "grad_norm": 0.4693112075328827, "learning_rate": 5.750977653876327e-06, "loss": 0.3912, "step": 30266 }, { "epoch": 1.388967922536827, "grad_norm": 0.46782997250556946, "learning_rate": 5.750735246401609e-06, "loss": 0.3707, "step": 30267 }, { "epoch": 1.3890138130420815, "grad_norm": 0.4616799056529999, "learning_rate": 5.750492837121608e-06, "loss": 0.3452, "step": 30268 }, { "epoch": 1.389059703547336, "grad_norm": 0.46908581256866455, "learning_rate": 5.750250426036909e-06, "loss": 0.3781, "step": 30269 }, { "epoch": 1.3891055940525905, "grad_norm": 0.46836769580841064, "learning_rate": 5.750008013148093e-06, "loss": 0.3188, "step": 30270 }, { "epoch": 1.389151484557845, "grad_norm": 0.44338929653167725, "learning_rate": 5.7497655984557456e-06, "loss": 0.3233, "step": 30271 }, { "epoch": 1.3891973750630995, "grad_norm": 0.5079687237739563, "learning_rate": 5.749523181960449e-06, "loss": 0.445, "step": 30272 }, { "epoch": 1.389243265568354, "grad_norm": 0.4805227518081665, "learning_rate": 5.7492807636627854e-06, "loss": 0.365, "step": 30273 }, { "epoch": 1.3892891560736085, "grad_norm": 0.4633045494556427, "learning_rate": 5.749038343563338e-06, "loss": 0.3541, "step": 30274 }, { "epoch": 1.3893350465788628, "grad_norm": 0.45519891381263733, "learning_rate": 5.74879592166269e-06, "loss": 0.3108, "step": 30275 }, { "epoch": 1.3893809370841173, "grad_norm": 0.4301750957965851, "learning_rate": 5.748553497961424e-06, "loss": 0.324, "step": 30276 }, { "epoch": 1.3894268275893717, "grad_norm": 0.4371340572834015, "learning_rate": 5.748311072460122e-06, "loss": 0.3569, "step": 30277 }, { "epoch": 1.3894727180946262, "grad_norm": 0.48944544792175293, "learning_rate": 5.748068645159371e-06, "loss": 0.3631, "step": 30278 }, { "epoch": 1.3895186085998807, "grad_norm": 0.5014194846153259, "learning_rate": 5.7478262160597485e-06, "loss": 0.34, "step": 30279 }, { "epoch": 1.389564499105135, "grad_norm": 0.5651270151138306, "learning_rate": 5.74758378516184e-06, "loss": 0.4017, "step": 30280 }, { "epoch": 1.3896103896103895, "grad_norm": 0.4608399271965027, "learning_rate": 5.747341352466228e-06, "loss": 0.341, "step": 30281 }, { "epoch": 1.389656280115644, "grad_norm": 0.42807790637016296, "learning_rate": 5.747098917973498e-06, "loss": 0.3006, "step": 30282 }, { "epoch": 1.3897021706208985, "grad_norm": 0.45664551854133606, "learning_rate": 5.74685648168423e-06, "loss": 0.3467, "step": 30283 }, { "epoch": 1.389748061126153, "grad_norm": 0.4479440152645111, "learning_rate": 5.746614043599007e-06, "loss": 0.3274, "step": 30284 }, { "epoch": 1.3897939516314075, "grad_norm": 0.4336516857147217, "learning_rate": 5.7463716037184146e-06, "loss": 0.2725, "step": 30285 }, { "epoch": 1.389839842136662, "grad_norm": 0.486752450466156, "learning_rate": 5.746129162043032e-06, "loss": 0.3679, "step": 30286 }, { "epoch": 1.3898857326419165, "grad_norm": 0.4881468713283539, "learning_rate": 5.7458867185734445e-06, "loss": 0.3937, "step": 30287 }, { "epoch": 1.3899316231471708, "grad_norm": 0.4904804527759552, "learning_rate": 5.745644273310236e-06, "loss": 0.3533, "step": 30288 }, { "epoch": 1.3899775136524253, "grad_norm": 0.4749709665775299, "learning_rate": 5.745401826253989e-06, "loss": 0.342, "step": 30289 }, { "epoch": 1.3900234041576798, "grad_norm": 0.45477306842803955, "learning_rate": 5.745159377405284e-06, "loss": 0.332, "step": 30290 }, { "epoch": 1.3900692946629343, "grad_norm": 0.4279290437698364, "learning_rate": 5.744916926764708e-06, "loss": 0.2988, "step": 30291 }, { "epoch": 1.3901151851681888, "grad_norm": 0.5339832305908203, "learning_rate": 5.744674474332839e-06, "loss": 0.5179, "step": 30292 }, { "epoch": 1.390161075673443, "grad_norm": 0.4888162314891815, "learning_rate": 5.744432020110265e-06, "loss": 0.4337, "step": 30293 }, { "epoch": 1.3902069661786975, "grad_norm": 0.4568067491054535, "learning_rate": 5.744189564097566e-06, "loss": 0.3553, "step": 30294 }, { "epoch": 1.390252856683952, "grad_norm": 0.45538386702537537, "learning_rate": 5.743947106295327e-06, "loss": 0.3066, "step": 30295 }, { "epoch": 1.3902987471892065, "grad_norm": 0.46293267607688904, "learning_rate": 5.743704646704129e-06, "loss": 0.3931, "step": 30296 }, { "epoch": 1.390344637694461, "grad_norm": 0.4840135872364044, "learning_rate": 5.7434621853245564e-06, "loss": 0.4031, "step": 30297 }, { "epoch": 1.3903905281997155, "grad_norm": 0.46634942293167114, "learning_rate": 5.743219722157192e-06, "loss": 0.3561, "step": 30298 }, { "epoch": 1.39043641870497, "grad_norm": 0.45144978165626526, "learning_rate": 5.742977257202619e-06, "loss": 0.3306, "step": 30299 }, { "epoch": 1.3904823092102245, "grad_norm": 0.48121321201324463, "learning_rate": 5.74273479046142e-06, "loss": 0.3892, "step": 30300 }, { "epoch": 1.3905281997154788, "grad_norm": 0.4638969600200653, "learning_rate": 5.7424923219341775e-06, "loss": 0.3483, "step": 30301 }, { "epoch": 1.3905740902207333, "grad_norm": 0.4213906228542328, "learning_rate": 5.742249851621476e-06, "loss": 0.2578, "step": 30302 }, { "epoch": 1.3906199807259878, "grad_norm": 0.45766401290893555, "learning_rate": 5.7420073795238975e-06, "loss": 0.3611, "step": 30303 }, { "epoch": 1.3906658712312423, "grad_norm": 0.4397094249725342, "learning_rate": 5.741764905642026e-06, "loss": 0.3303, "step": 30304 }, { "epoch": 1.3907117617364968, "grad_norm": 0.4559842348098755, "learning_rate": 5.741522429976445e-06, "loss": 0.3247, "step": 30305 }, { "epoch": 1.390757652241751, "grad_norm": 0.4601464569568634, "learning_rate": 5.741279952527734e-06, "loss": 0.3472, "step": 30306 }, { "epoch": 1.3908035427470056, "grad_norm": 0.4247780442237854, "learning_rate": 5.74103747329648e-06, "loss": 0.2693, "step": 30307 }, { "epoch": 1.39084943325226, "grad_norm": 0.4816422462463379, "learning_rate": 5.740794992283265e-06, "loss": 0.3354, "step": 30308 }, { "epoch": 1.3908953237575146, "grad_norm": 0.479301393032074, "learning_rate": 5.74055250948867e-06, "loss": 0.394, "step": 30309 }, { "epoch": 1.390941214262769, "grad_norm": 0.45645663142204285, "learning_rate": 5.740310024913283e-06, "loss": 0.3593, "step": 30310 }, { "epoch": 1.3909871047680236, "grad_norm": 0.48338621854782104, "learning_rate": 5.740067538557683e-06, "loss": 0.3991, "step": 30311 }, { "epoch": 1.391032995273278, "grad_norm": 0.5078062415122986, "learning_rate": 5.739825050422453e-06, "loss": 0.4444, "step": 30312 }, { "epoch": 1.3910788857785326, "grad_norm": 0.45488956570625305, "learning_rate": 5.739582560508179e-06, "loss": 0.3236, "step": 30313 }, { "epoch": 1.3911247762837868, "grad_norm": 0.47081366181373596, "learning_rate": 5.73934006881544e-06, "loss": 0.3398, "step": 30314 }, { "epoch": 1.3911706667890413, "grad_norm": 0.42333802580833435, "learning_rate": 5.739097575344823e-06, "loss": 0.3013, "step": 30315 }, { "epoch": 1.3912165572942958, "grad_norm": 0.4540907144546509, "learning_rate": 5.73885508009691e-06, "loss": 0.3266, "step": 30316 }, { "epoch": 1.3912624477995503, "grad_norm": 0.4784020185470581, "learning_rate": 5.738612583072282e-06, "loss": 0.3964, "step": 30317 }, { "epoch": 1.3913083383048046, "grad_norm": 0.47597798705101013, "learning_rate": 5.738370084271525e-06, "loss": 0.3313, "step": 30318 }, { "epoch": 1.391354228810059, "grad_norm": 0.4556713104248047, "learning_rate": 5.738127583695223e-06, "loss": 0.3367, "step": 30319 }, { "epoch": 1.3914001193153136, "grad_norm": 0.46421167254447937, "learning_rate": 5.737885081343953e-06, "loss": 0.34, "step": 30320 }, { "epoch": 1.391446009820568, "grad_norm": 0.46209293603897095, "learning_rate": 5.7376425772183055e-06, "loss": 0.3206, "step": 30321 }, { "epoch": 1.3914919003258226, "grad_norm": 0.4941738247871399, "learning_rate": 5.737400071318859e-06, "loss": 0.3648, "step": 30322 }, { "epoch": 1.391537790831077, "grad_norm": 0.47378218173980713, "learning_rate": 5.737157563646198e-06, "loss": 0.3431, "step": 30323 }, { "epoch": 1.3915836813363316, "grad_norm": 0.4601399600505829, "learning_rate": 5.736915054200907e-06, "loss": 0.372, "step": 30324 }, { "epoch": 1.391629571841586, "grad_norm": 0.41920557618141174, "learning_rate": 5.7366725429835655e-06, "loss": 0.2705, "step": 30325 }, { "epoch": 1.3916754623468404, "grad_norm": 0.443963885307312, "learning_rate": 5.736430029994761e-06, "loss": 0.3202, "step": 30326 }, { "epoch": 1.3917213528520949, "grad_norm": 0.4723348021507263, "learning_rate": 5.7361875152350745e-06, "loss": 0.391, "step": 30327 }, { "epoch": 1.3917672433573494, "grad_norm": 0.45168864727020264, "learning_rate": 5.735944998705088e-06, "loss": 0.3293, "step": 30328 }, { "epoch": 1.3918131338626039, "grad_norm": 0.4439443349838257, "learning_rate": 5.735702480405387e-06, "loss": 0.3344, "step": 30329 }, { "epoch": 1.3918590243678584, "grad_norm": 0.453037291765213, "learning_rate": 5.735459960336555e-06, "loss": 0.3353, "step": 30330 }, { "epoch": 1.3919049148731126, "grad_norm": 0.5007305145263672, "learning_rate": 5.735217438499171e-06, "loss": 0.3852, "step": 30331 }, { "epoch": 1.3919508053783671, "grad_norm": 0.4656926393508911, "learning_rate": 5.7349749148938235e-06, "loss": 0.3038, "step": 30332 }, { "epoch": 1.3919966958836216, "grad_norm": 0.495773047208786, "learning_rate": 5.734732389521094e-06, "loss": 0.3622, "step": 30333 }, { "epoch": 1.3920425863888761, "grad_norm": 0.4706609845161438, "learning_rate": 5.734489862381563e-06, "loss": 0.3788, "step": 30334 }, { "epoch": 1.3920884768941306, "grad_norm": 0.47529056668281555, "learning_rate": 5.734247333475815e-06, "loss": 0.3956, "step": 30335 }, { "epoch": 1.3921343673993851, "grad_norm": 0.4827890694141388, "learning_rate": 5.734004802804436e-06, "loss": 0.379, "step": 30336 }, { "epoch": 1.3921802579046396, "grad_norm": 0.4604039192199707, "learning_rate": 5.733762270368006e-06, "loss": 0.3466, "step": 30337 }, { "epoch": 1.3922261484098941, "grad_norm": 0.48655077815055847, "learning_rate": 5.733519736167109e-06, "loss": 0.384, "step": 30338 }, { "epoch": 1.3922720389151484, "grad_norm": 0.4173821806907654, "learning_rate": 5.733277200202329e-06, "loss": 0.281, "step": 30339 }, { "epoch": 1.3923179294204029, "grad_norm": 0.814814031124115, "learning_rate": 5.733034662474249e-06, "loss": 0.3498, "step": 30340 }, { "epoch": 1.3923638199256574, "grad_norm": 0.4419386088848114, "learning_rate": 5.732792122983452e-06, "loss": 0.3552, "step": 30341 }, { "epoch": 1.3924097104309119, "grad_norm": 0.4876553416252136, "learning_rate": 5.732549581730521e-06, "loss": 0.3726, "step": 30342 }, { "epoch": 1.3924556009361664, "grad_norm": 0.5092206001281738, "learning_rate": 5.732307038716041e-06, "loss": 0.4212, "step": 30343 }, { "epoch": 1.3925014914414207, "grad_norm": 0.41890591382980347, "learning_rate": 5.732064493940592e-06, "loss": 0.2814, "step": 30344 }, { "epoch": 1.3925473819466752, "grad_norm": 0.5132268667221069, "learning_rate": 5.731821947404759e-06, "loss": 0.4234, "step": 30345 }, { "epoch": 1.3925932724519297, "grad_norm": 0.4383687973022461, "learning_rate": 5.731579399109124e-06, "loss": 0.2965, "step": 30346 }, { "epoch": 1.3926391629571842, "grad_norm": 0.5155216455459595, "learning_rate": 5.7313368490542735e-06, "loss": 0.439, "step": 30347 }, { "epoch": 1.3926850534624386, "grad_norm": 0.47807371616363525, "learning_rate": 5.731094297240789e-06, "loss": 0.3535, "step": 30348 }, { "epoch": 1.3927309439676931, "grad_norm": 0.4623567461967468, "learning_rate": 5.730851743669251e-06, "loss": 0.3075, "step": 30349 }, { "epoch": 1.3927768344729476, "grad_norm": 0.5822451114654541, "learning_rate": 5.730609188340247e-06, "loss": 0.4935, "step": 30350 }, { "epoch": 1.3928227249782021, "grad_norm": 0.4866446852684021, "learning_rate": 5.730366631254358e-06, "loss": 0.3808, "step": 30351 }, { "epoch": 1.3928686154834564, "grad_norm": 0.4441816806793213, "learning_rate": 5.7301240724121676e-06, "loss": 0.3067, "step": 30352 }, { "epoch": 1.392914505988711, "grad_norm": 0.4770102798938751, "learning_rate": 5.729881511814261e-06, "loss": 0.3764, "step": 30353 }, { "epoch": 1.3929603964939654, "grad_norm": 0.5705597996711731, "learning_rate": 5.729638949461218e-06, "loss": 0.4614, "step": 30354 }, { "epoch": 1.39300628699922, "grad_norm": 0.5419921875, "learning_rate": 5.729396385353622e-06, "loss": 0.4295, "step": 30355 }, { "epoch": 1.3930521775044742, "grad_norm": 0.4608101546764374, "learning_rate": 5.729153819492061e-06, "loss": 0.3168, "step": 30356 }, { "epoch": 1.3930980680097287, "grad_norm": 0.4511256217956543, "learning_rate": 5.728911251877114e-06, "loss": 0.3604, "step": 30357 }, { "epoch": 1.3931439585149832, "grad_norm": 0.46918269991874695, "learning_rate": 5.728668682509366e-06, "loss": 0.3906, "step": 30358 }, { "epoch": 1.3931898490202377, "grad_norm": 0.4779897630214691, "learning_rate": 5.728426111389399e-06, "loss": 0.3544, "step": 30359 }, { "epoch": 1.3932357395254922, "grad_norm": 0.4973537027835846, "learning_rate": 5.728183538517798e-06, "loss": 0.3654, "step": 30360 }, { "epoch": 1.3932816300307467, "grad_norm": 0.4682236909866333, "learning_rate": 5.727940963895145e-06, "loss": 0.3087, "step": 30361 }, { "epoch": 1.3933275205360012, "grad_norm": 0.4664536118507385, "learning_rate": 5.727698387522023e-06, "loss": 0.352, "step": 30362 }, { "epoch": 1.3933734110412557, "grad_norm": 0.4596472680568695, "learning_rate": 5.727455809399015e-06, "loss": 0.3148, "step": 30363 }, { "epoch": 1.39341930154651, "grad_norm": 0.433256059885025, "learning_rate": 5.727213229526708e-06, "loss": 0.3233, "step": 30364 }, { "epoch": 1.3934651920517644, "grad_norm": 0.46075862646102905, "learning_rate": 5.726970647905682e-06, "loss": 0.3163, "step": 30365 }, { "epoch": 1.393511082557019, "grad_norm": 0.44754964113235474, "learning_rate": 5.72672806453652e-06, "loss": 0.3334, "step": 30366 }, { "epoch": 1.3935569730622734, "grad_norm": 0.4604916572570801, "learning_rate": 5.726485479419808e-06, "loss": 0.3162, "step": 30367 }, { "epoch": 1.393602863567528, "grad_norm": 0.43873023986816406, "learning_rate": 5.726242892556125e-06, "loss": 0.2928, "step": 30368 }, { "epoch": 1.3936487540727822, "grad_norm": 0.4664902091026306, "learning_rate": 5.72600030394606e-06, "loss": 0.3309, "step": 30369 }, { "epoch": 1.3936946445780367, "grad_norm": 0.4777691662311554, "learning_rate": 5.725757713590192e-06, "loss": 0.3391, "step": 30370 }, { "epoch": 1.3937405350832912, "grad_norm": 0.4571942985057831, "learning_rate": 5.725515121489106e-06, "loss": 0.3316, "step": 30371 }, { "epoch": 1.3937864255885457, "grad_norm": 0.4715847074985504, "learning_rate": 5.725272527643384e-06, "loss": 0.3414, "step": 30372 }, { "epoch": 1.3938323160938002, "grad_norm": 0.4987230896949768, "learning_rate": 5.725029932053612e-06, "loss": 0.3017, "step": 30373 }, { "epoch": 1.3938782065990547, "grad_norm": 0.4543333947658539, "learning_rate": 5.724787334720371e-06, "loss": 0.3415, "step": 30374 }, { "epoch": 1.3939240971043092, "grad_norm": 0.47051939368247986, "learning_rate": 5.724544735644247e-06, "loss": 0.3669, "step": 30375 }, { "epoch": 1.3939699876095637, "grad_norm": 0.46173739433288574, "learning_rate": 5.72430213482582e-06, "loss": 0.3717, "step": 30376 }, { "epoch": 1.394015878114818, "grad_norm": 0.44817879796028137, "learning_rate": 5.7240595322656746e-06, "loss": 0.3331, "step": 30377 }, { "epoch": 1.3940617686200725, "grad_norm": 0.5127765536308289, "learning_rate": 5.7238169279643955e-06, "loss": 0.416, "step": 30378 }, { "epoch": 1.394107659125327, "grad_norm": 0.45980104804039, "learning_rate": 5.723574321922564e-06, "loss": 0.3607, "step": 30379 }, { "epoch": 1.3941535496305815, "grad_norm": 0.41525402665138245, "learning_rate": 5.723331714140765e-06, "loss": 0.2849, "step": 30380 }, { "epoch": 1.394199440135836, "grad_norm": 0.4880722463130951, "learning_rate": 5.723089104619583e-06, "loss": 0.3629, "step": 30381 }, { "epoch": 1.3942453306410902, "grad_norm": 0.5014582276344299, "learning_rate": 5.722846493359598e-06, "loss": 0.4393, "step": 30382 }, { "epoch": 1.3942912211463447, "grad_norm": 0.47979748249053955, "learning_rate": 5.722603880361396e-06, "loss": 0.3569, "step": 30383 }, { "epoch": 1.3943371116515992, "grad_norm": 0.4602370262145996, "learning_rate": 5.72236126562556e-06, "loss": 0.3135, "step": 30384 }, { "epoch": 1.3943830021568537, "grad_norm": 0.434594064950943, "learning_rate": 5.722118649152673e-06, "loss": 0.2995, "step": 30385 }, { "epoch": 1.3944288926621082, "grad_norm": 0.47161078453063965, "learning_rate": 5.7218760309433185e-06, "loss": 0.4037, "step": 30386 }, { "epoch": 1.3944747831673627, "grad_norm": 0.430989533662796, "learning_rate": 5.7216334109980795e-06, "loss": 0.2803, "step": 30387 }, { "epoch": 1.3945206736726172, "grad_norm": 0.5023766160011292, "learning_rate": 5.72139078931754e-06, "loss": 0.4191, "step": 30388 }, { "epoch": 1.3945665641778717, "grad_norm": 0.46908140182495117, "learning_rate": 5.7211481659022835e-06, "loss": 0.3875, "step": 30389 }, { "epoch": 1.394612454683126, "grad_norm": 0.4324429929256439, "learning_rate": 5.7209055407528945e-06, "loss": 0.3171, "step": 30390 }, { "epoch": 1.3946583451883805, "grad_norm": 0.4551837146282196, "learning_rate": 5.720662913869954e-06, "loss": 0.3637, "step": 30391 }, { "epoch": 1.394704235693635, "grad_norm": 0.5051344037055969, "learning_rate": 5.720420285254047e-06, "loss": 0.2904, "step": 30392 }, { "epoch": 1.3947501261988895, "grad_norm": 0.4744405448436737, "learning_rate": 5.720177654905756e-06, "loss": 0.424, "step": 30393 }, { "epoch": 1.394796016704144, "grad_norm": 0.46851974725723267, "learning_rate": 5.719935022825666e-06, "loss": 0.3601, "step": 30394 }, { "epoch": 1.3948419072093983, "grad_norm": 0.4627271890640259, "learning_rate": 5.719692389014359e-06, "loss": 0.3651, "step": 30395 }, { "epoch": 1.3948877977146528, "grad_norm": 0.49190598726272583, "learning_rate": 5.7194497534724184e-06, "loss": 0.3825, "step": 30396 }, { "epoch": 1.3949336882199073, "grad_norm": 0.43936887383461, "learning_rate": 5.7192071162004285e-06, "loss": 0.3002, "step": 30397 }, { "epoch": 1.3949795787251618, "grad_norm": 0.44454583525657654, "learning_rate": 5.718964477198973e-06, "loss": 0.3083, "step": 30398 }, { "epoch": 1.3950254692304163, "grad_norm": 0.41476377844810486, "learning_rate": 5.718721836468634e-06, "loss": 0.3055, "step": 30399 }, { "epoch": 1.3950713597356708, "grad_norm": 0.5318158268928528, "learning_rate": 5.718479194009996e-06, "loss": 0.421, "step": 30400 }, { "epoch": 1.3951172502409253, "grad_norm": 0.47496166825294495, "learning_rate": 5.718236549823642e-06, "loss": 0.3457, "step": 30401 }, { "epoch": 1.3951631407461798, "grad_norm": 0.49589404463768005, "learning_rate": 5.717993903910157e-06, "loss": 0.394, "step": 30402 }, { "epoch": 1.395209031251434, "grad_norm": 0.537624180316925, "learning_rate": 5.717751256270123e-06, "loss": 0.4489, "step": 30403 }, { "epoch": 1.3952549217566885, "grad_norm": 0.46196869015693665, "learning_rate": 5.717508606904124e-06, "loss": 0.322, "step": 30404 }, { "epoch": 1.395300812261943, "grad_norm": 0.4932910203933716, "learning_rate": 5.717265955812742e-06, "loss": 0.4482, "step": 30405 }, { "epoch": 1.3953467027671975, "grad_norm": 0.46956688165664673, "learning_rate": 5.717023302996563e-06, "loss": 0.3515, "step": 30406 }, { "epoch": 1.3953925932724518, "grad_norm": 0.462916761636734, "learning_rate": 5.716780648456167e-06, "loss": 0.3531, "step": 30407 }, { "epoch": 1.3954384837777063, "grad_norm": 0.4539948105812073, "learning_rate": 5.716537992192143e-06, "loss": 0.3351, "step": 30408 }, { "epoch": 1.3954843742829608, "grad_norm": 0.45970776677131653, "learning_rate": 5.71629533420507e-06, "loss": 0.3227, "step": 30409 }, { "epoch": 1.3955302647882153, "grad_norm": 0.48853880167007446, "learning_rate": 5.7160526744955315e-06, "loss": 0.3674, "step": 30410 }, { "epoch": 1.3955761552934698, "grad_norm": 0.43366432189941406, "learning_rate": 5.715810013064112e-06, "loss": 0.3086, "step": 30411 }, { "epoch": 1.3956220457987243, "grad_norm": 0.4934180676937103, "learning_rate": 5.715567349911398e-06, "loss": 0.405, "step": 30412 }, { "epoch": 1.3956679363039788, "grad_norm": 0.4571032226085663, "learning_rate": 5.71532468503797e-06, "loss": 0.3462, "step": 30413 }, { "epoch": 1.3957138268092333, "grad_norm": 0.4463425874710083, "learning_rate": 5.71508201844441e-06, "loss": 0.3429, "step": 30414 }, { "epoch": 1.3957597173144876, "grad_norm": 0.48190680146217346, "learning_rate": 5.714839350131305e-06, "loss": 0.3521, "step": 30415 }, { "epoch": 1.395805607819742, "grad_norm": 0.48486289381980896, "learning_rate": 5.714596680099236e-06, "loss": 0.336, "step": 30416 }, { "epoch": 1.3958514983249966, "grad_norm": 0.48747485876083374, "learning_rate": 5.714354008348787e-06, "loss": 0.411, "step": 30417 }, { "epoch": 1.395897388830251, "grad_norm": 0.4783124327659607, "learning_rate": 5.714111334880543e-06, "loss": 0.3824, "step": 30418 }, { "epoch": 1.3959432793355055, "grad_norm": 0.4417360723018646, "learning_rate": 5.713868659695087e-06, "loss": 0.3566, "step": 30419 }, { "epoch": 1.3959891698407598, "grad_norm": 0.4791170358657837, "learning_rate": 5.713625982793001e-06, "loss": 0.3778, "step": 30420 }, { "epoch": 1.3960350603460143, "grad_norm": 0.4776018559932709, "learning_rate": 5.713383304174871e-06, "loss": 0.3683, "step": 30421 }, { "epoch": 1.3960809508512688, "grad_norm": 0.48346200585365295, "learning_rate": 5.713140623841278e-06, "loss": 0.312, "step": 30422 }, { "epoch": 1.3961268413565233, "grad_norm": 0.48635196685791016, "learning_rate": 5.712897941792808e-06, "loss": 0.3903, "step": 30423 }, { "epoch": 1.3961727318617778, "grad_norm": 0.49332302808761597, "learning_rate": 5.712655258030043e-06, "loss": 0.4068, "step": 30424 }, { "epoch": 1.3962186223670323, "grad_norm": 0.4469403624534607, "learning_rate": 5.712412572553567e-06, "loss": 0.3602, "step": 30425 }, { "epoch": 1.3962645128722868, "grad_norm": 0.47400718927383423, "learning_rate": 5.712169885363964e-06, "loss": 0.375, "step": 30426 }, { "epoch": 1.3963104033775413, "grad_norm": 0.43531715869903564, "learning_rate": 5.711927196461816e-06, "loss": 0.312, "step": 30427 }, { "epoch": 1.3963562938827956, "grad_norm": 0.4611225128173828, "learning_rate": 5.711684505847709e-06, "loss": 0.3157, "step": 30428 }, { "epoch": 1.39640218438805, "grad_norm": 0.444152295589447, "learning_rate": 5.711441813522225e-06, "loss": 0.3063, "step": 30429 }, { "epoch": 1.3964480748933046, "grad_norm": 0.4856477975845337, "learning_rate": 5.711199119485948e-06, "loss": 0.3835, "step": 30430 }, { "epoch": 1.396493965398559, "grad_norm": 0.4897124767303467, "learning_rate": 5.710956423739462e-06, "loss": 0.3477, "step": 30431 }, { "epoch": 1.3965398559038136, "grad_norm": 0.44401347637176514, "learning_rate": 5.71071372628335e-06, "loss": 0.2994, "step": 30432 }, { "epoch": 1.3965857464090679, "grad_norm": 0.4477541744709015, "learning_rate": 5.710471027118195e-06, "loss": 0.317, "step": 30433 }, { "epoch": 1.3966316369143223, "grad_norm": 0.4925900995731354, "learning_rate": 5.710228326244581e-06, "loss": 0.3822, "step": 30434 }, { "epoch": 1.3966775274195768, "grad_norm": 0.4776746332645416, "learning_rate": 5.709985623663094e-06, "loss": 0.406, "step": 30435 }, { "epoch": 1.3967234179248313, "grad_norm": 0.4376874268054962, "learning_rate": 5.709742919374315e-06, "loss": 0.2961, "step": 30436 }, { "epoch": 1.3967693084300858, "grad_norm": 0.4580034911632538, "learning_rate": 5.709500213378827e-06, "loss": 0.3446, "step": 30437 }, { "epoch": 1.3968151989353403, "grad_norm": 0.43879827857017517, "learning_rate": 5.709257505677217e-06, "loss": 0.3004, "step": 30438 }, { "epoch": 1.3968610894405948, "grad_norm": 0.450599730014801, "learning_rate": 5.709014796270065e-06, "loss": 0.3382, "step": 30439 }, { "epoch": 1.3969069799458493, "grad_norm": 0.45005470514297485, "learning_rate": 5.708772085157956e-06, "loss": 0.3343, "step": 30440 }, { "epoch": 1.3969528704511036, "grad_norm": 0.47958171367645264, "learning_rate": 5.708529372341476e-06, "loss": 0.3449, "step": 30441 }, { "epoch": 1.396998760956358, "grad_norm": 0.48262518644332886, "learning_rate": 5.708286657821204e-06, "loss": 0.4306, "step": 30442 }, { "epoch": 1.3970446514616126, "grad_norm": 0.46672382950782776, "learning_rate": 5.708043941597728e-06, "loss": 0.2987, "step": 30443 }, { "epoch": 1.397090541966867, "grad_norm": 0.45540308952331543, "learning_rate": 5.707801223671627e-06, "loss": 0.3599, "step": 30444 }, { "epoch": 1.3971364324721214, "grad_norm": 0.4659494161605835, "learning_rate": 5.707558504043489e-06, "loss": 0.3434, "step": 30445 }, { "epoch": 1.3971823229773759, "grad_norm": 0.4493085443973541, "learning_rate": 5.707315782713898e-06, "loss": 0.3248, "step": 30446 }, { "epoch": 1.3972282134826304, "grad_norm": 0.4677808880805969, "learning_rate": 5.707073059683433e-06, "loss": 0.3942, "step": 30447 }, { "epoch": 1.3972741039878849, "grad_norm": 0.4951171278953552, "learning_rate": 5.706830334952681e-06, "loss": 0.3819, "step": 30448 }, { "epoch": 1.3973199944931394, "grad_norm": 0.5140604972839355, "learning_rate": 5.706587608522227e-06, "loss": 0.4249, "step": 30449 }, { "epoch": 1.3973658849983939, "grad_norm": 0.4192191958427429, "learning_rate": 5.70634488039265e-06, "loss": 0.2682, "step": 30450 }, { "epoch": 1.3974117755036484, "grad_norm": 0.451386421918869, "learning_rate": 5.706102150564539e-06, "loss": 0.328, "step": 30451 }, { "epoch": 1.3974576660089029, "grad_norm": 0.4777073264122009, "learning_rate": 5.705859419038475e-06, "loss": 0.3629, "step": 30452 }, { "epoch": 1.3975035565141571, "grad_norm": 0.48843997716903687, "learning_rate": 5.705616685815041e-06, "loss": 0.3306, "step": 30453 }, { "epoch": 1.3975494470194116, "grad_norm": 0.47370651364326477, "learning_rate": 5.70537395089482e-06, "loss": 0.3506, "step": 30454 }, { "epoch": 1.3975953375246661, "grad_norm": 0.48216167092323303, "learning_rate": 5.7051312142784e-06, "loss": 0.3271, "step": 30455 }, { "epoch": 1.3976412280299206, "grad_norm": 0.46302226185798645, "learning_rate": 5.7048884759663604e-06, "loss": 0.3428, "step": 30456 }, { "epoch": 1.3976871185351751, "grad_norm": 0.4696814715862274, "learning_rate": 5.704645735959288e-06, "loss": 0.3673, "step": 30457 }, { "epoch": 1.3977330090404294, "grad_norm": 0.4597233235836029, "learning_rate": 5.704402994257764e-06, "loss": 0.3265, "step": 30458 }, { "epoch": 1.397778899545684, "grad_norm": 0.476111501455307, "learning_rate": 5.704160250862372e-06, "loss": 0.4011, "step": 30459 }, { "epoch": 1.3978247900509384, "grad_norm": 0.44585949182510376, "learning_rate": 5.7039175057736985e-06, "loss": 0.3282, "step": 30460 }, { "epoch": 1.397870680556193, "grad_norm": 0.4638659358024597, "learning_rate": 5.703674758992324e-06, "loss": 0.3734, "step": 30461 }, { "epoch": 1.3979165710614474, "grad_norm": 0.4854170083999634, "learning_rate": 5.703432010518836e-06, "loss": 0.4127, "step": 30462 }, { "epoch": 1.397962461566702, "grad_norm": 0.44458135962486267, "learning_rate": 5.7031892603538144e-06, "loss": 0.3023, "step": 30463 }, { "epoch": 1.3980083520719564, "grad_norm": 0.4612308740615845, "learning_rate": 5.702946508497844e-06, "loss": 0.3905, "step": 30464 }, { "epoch": 1.398054242577211, "grad_norm": 0.5073607563972473, "learning_rate": 5.7027037549515106e-06, "loss": 0.4023, "step": 30465 }, { "epoch": 1.3981001330824652, "grad_norm": 0.48198723793029785, "learning_rate": 5.702460999715396e-06, "loss": 0.3604, "step": 30466 }, { "epoch": 1.3981460235877197, "grad_norm": 0.4853844940662384, "learning_rate": 5.702218242790084e-06, "loss": 0.3888, "step": 30467 }, { "epoch": 1.3981919140929742, "grad_norm": 0.5036321878433228, "learning_rate": 5.7019754841761585e-06, "loss": 0.4339, "step": 30468 }, { "epoch": 1.3982378045982287, "grad_norm": 0.48087015748023987, "learning_rate": 5.701732723874205e-06, "loss": 0.3836, "step": 30469 }, { "epoch": 1.3982836951034832, "grad_norm": 0.45086729526519775, "learning_rate": 5.701489961884804e-06, "loss": 0.3054, "step": 30470 }, { "epoch": 1.3983295856087374, "grad_norm": 0.46627333760261536, "learning_rate": 5.701247198208543e-06, "loss": 0.4083, "step": 30471 }, { "epoch": 1.398375476113992, "grad_norm": 0.5135111808776855, "learning_rate": 5.701004432846002e-06, "loss": 0.4369, "step": 30472 }, { "epoch": 1.3984213666192464, "grad_norm": 0.47604984045028687, "learning_rate": 5.700761665797767e-06, "loss": 0.3637, "step": 30473 }, { "epoch": 1.398467257124501, "grad_norm": 0.47560882568359375, "learning_rate": 5.700518897064423e-06, "loss": 0.3434, "step": 30474 }, { "epoch": 1.3985131476297554, "grad_norm": 0.4974267780780792, "learning_rate": 5.70027612664655e-06, "loss": 0.4142, "step": 30475 }, { "epoch": 1.39855903813501, "grad_norm": 0.47538191080093384, "learning_rate": 5.700033354544734e-06, "loss": 0.3365, "step": 30476 }, { "epoch": 1.3986049286402644, "grad_norm": 0.5176913142204285, "learning_rate": 5.69979058075956e-06, "loss": 0.3822, "step": 30477 }, { "epoch": 1.398650819145519, "grad_norm": 0.45130783319473267, "learning_rate": 5.69954780529161e-06, "loss": 0.332, "step": 30478 }, { "epoch": 1.3986967096507732, "grad_norm": 0.47908908128738403, "learning_rate": 5.699305028141469e-06, "loss": 0.359, "step": 30479 }, { "epoch": 1.3987426001560277, "grad_norm": 0.4527153968811035, "learning_rate": 5.699062249309719e-06, "loss": 0.3823, "step": 30480 }, { "epoch": 1.3987884906612822, "grad_norm": 0.45467329025268555, "learning_rate": 5.698819468796945e-06, "loss": 0.3252, "step": 30481 }, { "epoch": 1.3988343811665367, "grad_norm": 0.45557960867881775, "learning_rate": 5.69857668660373e-06, "loss": 0.3653, "step": 30482 }, { "epoch": 1.3988802716717912, "grad_norm": 0.45011451840400696, "learning_rate": 5.69833390273066e-06, "loss": 0.3077, "step": 30483 }, { "epoch": 1.3989261621770455, "grad_norm": 0.4902711808681488, "learning_rate": 5.698091117178317e-06, "loss": 0.3942, "step": 30484 }, { "epoch": 1.3989720526823, "grad_norm": 0.4374467432498932, "learning_rate": 5.697848329947285e-06, "loss": 0.2941, "step": 30485 }, { "epoch": 1.3990179431875545, "grad_norm": 0.4423973262310028, "learning_rate": 5.6976055410381495e-06, "loss": 0.3232, "step": 30486 }, { "epoch": 1.399063833692809, "grad_norm": 0.4958360493183136, "learning_rate": 5.697362750451489e-06, "loss": 0.3902, "step": 30487 }, { "epoch": 1.3991097241980635, "grad_norm": 0.44962233304977417, "learning_rate": 5.6971199581878954e-06, "loss": 0.3173, "step": 30488 }, { "epoch": 1.399155614703318, "grad_norm": 0.4194338023662567, "learning_rate": 5.696877164247946e-06, "loss": 0.2835, "step": 30489 }, { "epoch": 1.3992015052085724, "grad_norm": 0.46637168526649475, "learning_rate": 5.696634368632228e-06, "loss": 0.3657, "step": 30490 }, { "epoch": 1.399247395713827, "grad_norm": 0.4376131594181061, "learning_rate": 5.696391571341324e-06, "loss": 0.3076, "step": 30491 }, { "epoch": 1.3992932862190812, "grad_norm": 0.4737323820590973, "learning_rate": 5.6961487723758165e-06, "loss": 0.3699, "step": 30492 }, { "epoch": 1.3993391767243357, "grad_norm": 0.48057979345321655, "learning_rate": 5.6959059717362916e-06, "loss": 0.361, "step": 30493 }, { "epoch": 1.3993850672295902, "grad_norm": 0.46984413266181946, "learning_rate": 5.6956631694233344e-06, "loss": 0.3396, "step": 30494 }, { "epoch": 1.3994309577348447, "grad_norm": 0.47452211380004883, "learning_rate": 5.695420365437526e-06, "loss": 0.3804, "step": 30495 }, { "epoch": 1.399476848240099, "grad_norm": 0.5063121914863586, "learning_rate": 5.6951775597794504e-06, "loss": 0.3905, "step": 30496 }, { "epoch": 1.3995227387453535, "grad_norm": 0.4413262903690338, "learning_rate": 5.6949347524496925e-06, "loss": 0.2889, "step": 30497 }, { "epoch": 1.399568629250608, "grad_norm": 0.5324788689613342, "learning_rate": 5.694691943448834e-06, "loss": 0.3573, "step": 30498 }, { "epoch": 1.3996145197558625, "grad_norm": 0.47010743618011475, "learning_rate": 5.694449132777463e-06, "loss": 0.3626, "step": 30499 }, { "epoch": 1.399660410261117, "grad_norm": 0.48463723063468933, "learning_rate": 5.694206320436161e-06, "loss": 0.4044, "step": 30500 }, { "epoch": 1.3997063007663715, "grad_norm": 0.4952201843261719, "learning_rate": 5.693963506425512e-06, "loss": 0.3539, "step": 30501 }, { "epoch": 1.399752191271626, "grad_norm": 0.4416428804397583, "learning_rate": 5.693720690746098e-06, "loss": 0.2946, "step": 30502 }, { "epoch": 1.3997980817768805, "grad_norm": 0.47428685426712036, "learning_rate": 5.6934778733985064e-06, "loss": 0.3646, "step": 30503 }, { "epoch": 1.3998439722821348, "grad_norm": 0.47907447814941406, "learning_rate": 5.693235054383318e-06, "loss": 0.4347, "step": 30504 }, { "epoch": 1.3998898627873892, "grad_norm": 0.3950011730194092, "learning_rate": 5.692992233701119e-06, "loss": 0.2598, "step": 30505 }, { "epoch": 1.3999357532926437, "grad_norm": 0.49776071310043335, "learning_rate": 5.692749411352493e-06, "loss": 0.3811, "step": 30506 }, { "epoch": 1.3999816437978982, "grad_norm": 0.4954083263874054, "learning_rate": 5.692506587338022e-06, "loss": 0.4057, "step": 30507 }, { "epoch": 1.4000275343031527, "grad_norm": 0.46958890557289124, "learning_rate": 5.692263761658292e-06, "loss": 0.4367, "step": 30508 }, { "epoch": 1.400073424808407, "grad_norm": 0.4891435205936432, "learning_rate": 5.692020934313886e-06, "loss": 0.3689, "step": 30509 }, { "epoch": 1.4001193153136615, "grad_norm": 0.469858855009079, "learning_rate": 5.691778105305387e-06, "loss": 0.3772, "step": 30510 }, { "epoch": 1.400165205818916, "grad_norm": 0.4417387545108795, "learning_rate": 5.691535274633381e-06, "loss": 0.2982, "step": 30511 }, { "epoch": 1.4002110963241705, "grad_norm": 0.4273539185523987, "learning_rate": 5.691292442298451e-06, "loss": 0.3181, "step": 30512 }, { "epoch": 1.400256986829425, "grad_norm": 0.4822871685028076, "learning_rate": 5.69104960830118e-06, "loss": 0.3987, "step": 30513 }, { "epoch": 1.4003028773346795, "grad_norm": 0.4981745779514313, "learning_rate": 5.6908067726421545e-06, "loss": 0.4462, "step": 30514 }, { "epoch": 1.400348767839934, "grad_norm": 0.4563388228416443, "learning_rate": 5.690563935321954e-06, "loss": 0.3496, "step": 30515 }, { "epoch": 1.4003946583451885, "grad_norm": 0.48132598400115967, "learning_rate": 5.690321096341168e-06, "loss": 0.362, "step": 30516 }, { "epoch": 1.4004405488504428, "grad_norm": 0.45878010988235474, "learning_rate": 5.690078255700377e-06, "loss": 0.3385, "step": 30517 }, { "epoch": 1.4004864393556973, "grad_norm": 0.47036391496658325, "learning_rate": 5.689835413400164e-06, "loss": 0.3601, "step": 30518 }, { "epoch": 1.4005323298609518, "grad_norm": 0.4814489781856537, "learning_rate": 5.6895925694411154e-06, "loss": 0.3484, "step": 30519 }, { "epoch": 1.4005782203662063, "grad_norm": 0.47754746675491333, "learning_rate": 5.689349723823816e-06, "loss": 0.3896, "step": 30520 }, { "epoch": 1.4006241108714608, "grad_norm": 0.47672638297080994, "learning_rate": 5.689106876548845e-06, "loss": 0.406, "step": 30521 }, { "epoch": 1.400670001376715, "grad_norm": 0.4592185914516449, "learning_rate": 5.6888640276167915e-06, "loss": 0.3492, "step": 30522 }, { "epoch": 1.4007158918819695, "grad_norm": 0.4650770425796509, "learning_rate": 5.6886211770282385e-06, "loss": 0.3439, "step": 30523 }, { "epoch": 1.400761782387224, "grad_norm": 0.47770148515701294, "learning_rate": 5.6883783247837665e-06, "loss": 0.3803, "step": 30524 }, { "epoch": 1.4008076728924785, "grad_norm": 0.4395122230052948, "learning_rate": 5.6881354708839635e-06, "loss": 0.3212, "step": 30525 }, { "epoch": 1.400853563397733, "grad_norm": 0.4546579420566559, "learning_rate": 5.6878926153294105e-06, "loss": 0.3717, "step": 30526 }, { "epoch": 1.4008994539029875, "grad_norm": 0.47088274359703064, "learning_rate": 5.687649758120694e-06, "loss": 0.3025, "step": 30527 }, { "epoch": 1.400945344408242, "grad_norm": 0.4586959481239319, "learning_rate": 5.687406899258396e-06, "loss": 0.3827, "step": 30528 }, { "epoch": 1.4009912349134965, "grad_norm": 0.4332188367843628, "learning_rate": 5.6871640387431026e-06, "loss": 0.3234, "step": 30529 }, { "epoch": 1.4010371254187508, "grad_norm": 0.5075878500938416, "learning_rate": 5.686921176575395e-06, "loss": 0.3696, "step": 30530 }, { "epoch": 1.4010830159240053, "grad_norm": 0.4826306104660034, "learning_rate": 5.68667831275586e-06, "loss": 0.3764, "step": 30531 }, { "epoch": 1.4011289064292598, "grad_norm": 0.4077684283256531, "learning_rate": 5.686435447285079e-06, "loss": 0.2897, "step": 30532 }, { "epoch": 1.4011747969345143, "grad_norm": 0.4237208068370819, "learning_rate": 5.686192580163639e-06, "loss": 0.2835, "step": 30533 }, { "epoch": 1.4012206874397686, "grad_norm": 0.4674409031867981, "learning_rate": 5.6859497113921225e-06, "loss": 0.3727, "step": 30534 }, { "epoch": 1.401266577945023, "grad_norm": 0.4776623249053955, "learning_rate": 5.685706840971111e-06, "loss": 0.3822, "step": 30535 }, { "epoch": 1.4013124684502776, "grad_norm": 0.46475669741630554, "learning_rate": 5.685463968901193e-06, "loss": 0.3488, "step": 30536 }, { "epoch": 1.401358358955532, "grad_norm": 0.4328990876674652, "learning_rate": 5.68522109518295e-06, "loss": 0.2959, "step": 30537 }, { "epoch": 1.4014042494607866, "grad_norm": 0.46911895275115967, "learning_rate": 5.684978219816968e-06, "loss": 0.3437, "step": 30538 }, { "epoch": 1.401450139966041, "grad_norm": 0.4853106737136841, "learning_rate": 5.684735342803828e-06, "loss": 0.3793, "step": 30539 }, { "epoch": 1.4014960304712956, "grad_norm": 0.44736698269844055, "learning_rate": 5.684492464144116e-06, "loss": 0.3057, "step": 30540 }, { "epoch": 1.40154192097655, "grad_norm": 0.4573589861392975, "learning_rate": 5.684249583838414e-06, "loss": 0.3831, "step": 30541 }, { "epoch": 1.4015878114818043, "grad_norm": 0.44389578700065613, "learning_rate": 5.68400670188731e-06, "loss": 0.3423, "step": 30542 }, { "epoch": 1.4016337019870588, "grad_norm": 0.45849719643592834, "learning_rate": 5.683763818291383e-06, "loss": 0.3039, "step": 30543 }, { "epoch": 1.4016795924923133, "grad_norm": 0.4361306130886078, "learning_rate": 5.683520933051223e-06, "loss": 0.2926, "step": 30544 }, { "epoch": 1.4017254829975678, "grad_norm": 0.49260056018829346, "learning_rate": 5.683278046167411e-06, "loss": 0.3785, "step": 30545 }, { "epoch": 1.4017713735028223, "grad_norm": 0.46770408749580383, "learning_rate": 5.683035157640529e-06, "loss": 0.3693, "step": 30546 }, { "epoch": 1.4018172640080766, "grad_norm": 0.4235199987888336, "learning_rate": 5.682792267471162e-06, "loss": 0.287, "step": 30547 }, { "epoch": 1.401863154513331, "grad_norm": 0.4623138904571533, "learning_rate": 5.682549375659897e-06, "loss": 0.3785, "step": 30548 }, { "epoch": 1.4019090450185856, "grad_norm": 0.47290629148483276, "learning_rate": 5.6823064822073175e-06, "loss": 0.3849, "step": 30549 }, { "epoch": 1.40195493552384, "grad_norm": 0.48674890398979187, "learning_rate": 5.682063587114004e-06, "loss": 0.4225, "step": 30550 }, { "epoch": 1.4020008260290946, "grad_norm": 0.44226357340812683, "learning_rate": 5.681820690380544e-06, "loss": 0.3243, "step": 30551 }, { "epoch": 1.402046716534349, "grad_norm": 0.44181811809539795, "learning_rate": 5.681577792007519e-06, "loss": 0.3169, "step": 30552 }, { "epoch": 1.4020926070396036, "grad_norm": 0.495848149061203, "learning_rate": 5.681334891995516e-06, "loss": 0.3747, "step": 30553 }, { "epoch": 1.402138497544858, "grad_norm": 0.48354411125183105, "learning_rate": 5.681091990345116e-06, "loss": 0.3536, "step": 30554 }, { "epoch": 1.4021843880501124, "grad_norm": 0.4623820185661316, "learning_rate": 5.680849087056907e-06, "loss": 0.3622, "step": 30555 }, { "epoch": 1.4022302785553669, "grad_norm": 0.4764816462993622, "learning_rate": 5.68060618213147e-06, "loss": 0.3797, "step": 30556 }, { "epoch": 1.4022761690606214, "grad_norm": 0.45645153522491455, "learning_rate": 5.680363275569388e-06, "loss": 0.3657, "step": 30557 }, { "epoch": 1.4023220595658759, "grad_norm": 0.4388754665851593, "learning_rate": 5.680120367371248e-06, "loss": 0.335, "step": 30558 }, { "epoch": 1.4023679500711304, "grad_norm": 0.5064692497253418, "learning_rate": 5.679877457537635e-06, "loss": 0.3976, "step": 30559 }, { "epoch": 1.4024138405763846, "grad_norm": 0.5059183835983276, "learning_rate": 5.679634546069129e-06, "loss": 0.3498, "step": 30560 }, { "epoch": 1.4024597310816391, "grad_norm": 0.5017886757850647, "learning_rate": 5.6793916329663176e-06, "loss": 0.4308, "step": 30561 }, { "epoch": 1.4025056215868936, "grad_norm": 0.4760131239891052, "learning_rate": 5.679148718229784e-06, "loss": 0.3618, "step": 30562 }, { "epoch": 1.4025515120921481, "grad_norm": 0.44018077850341797, "learning_rate": 5.6789058018601104e-06, "loss": 0.3218, "step": 30563 }, { "epoch": 1.4025974025974026, "grad_norm": 0.4927705228328705, "learning_rate": 5.678662883857884e-06, "loss": 0.3612, "step": 30564 }, { "epoch": 1.4026432931026571, "grad_norm": 0.4663553535938263, "learning_rate": 5.678419964223688e-06, "loss": 0.3448, "step": 30565 }, { "epoch": 1.4026891836079116, "grad_norm": 0.4771214723587036, "learning_rate": 5.6781770429581066e-06, "loss": 0.3763, "step": 30566 }, { "epoch": 1.4027350741131661, "grad_norm": 0.4704783260822296, "learning_rate": 5.677934120061721e-06, "loss": 0.3892, "step": 30567 }, { "epoch": 1.4027809646184204, "grad_norm": 0.4526841938495636, "learning_rate": 5.67769119553512e-06, "loss": 0.3691, "step": 30568 }, { "epoch": 1.4028268551236749, "grad_norm": 0.4877703785896301, "learning_rate": 5.6774482693788835e-06, "loss": 0.4102, "step": 30569 }, { "epoch": 1.4028727456289294, "grad_norm": 0.517266035079956, "learning_rate": 5.6772053415936e-06, "loss": 0.3549, "step": 30570 }, { "epoch": 1.4029186361341839, "grad_norm": 0.4898463785648346, "learning_rate": 5.676962412179851e-06, "loss": 0.4367, "step": 30571 }, { "epoch": 1.4029645266394384, "grad_norm": 0.456590861082077, "learning_rate": 5.67671948113822e-06, "loss": 0.3689, "step": 30572 }, { "epoch": 1.4030104171446927, "grad_norm": 0.491571307182312, "learning_rate": 5.676476548469293e-06, "loss": 0.3672, "step": 30573 }, { "epoch": 1.4030563076499472, "grad_norm": 0.4826657772064209, "learning_rate": 5.676233614173652e-06, "loss": 0.3896, "step": 30574 }, { "epoch": 1.4031021981552017, "grad_norm": 0.4708399772644043, "learning_rate": 5.675990678251884e-06, "loss": 0.374, "step": 30575 }, { "epoch": 1.4031480886604561, "grad_norm": 0.5163946747779846, "learning_rate": 5.675747740704571e-06, "loss": 0.4693, "step": 30576 }, { "epoch": 1.4031939791657106, "grad_norm": 0.486020028591156, "learning_rate": 5.675504801532299e-06, "loss": 0.4305, "step": 30577 }, { "epoch": 1.4032398696709651, "grad_norm": 0.4489801824092865, "learning_rate": 5.6752618607356505e-06, "loss": 0.3307, "step": 30578 }, { "epoch": 1.4032857601762196, "grad_norm": 0.49930375814437866, "learning_rate": 5.675018918315211e-06, "loss": 0.3732, "step": 30579 }, { "epoch": 1.4033316506814741, "grad_norm": 0.4781452417373657, "learning_rate": 5.674775974271561e-06, "loss": 0.3717, "step": 30580 }, { "epoch": 1.4033775411867284, "grad_norm": 0.5085635185241699, "learning_rate": 5.674533028605291e-06, "loss": 0.4462, "step": 30581 }, { "epoch": 1.403423431691983, "grad_norm": 0.4564163386821747, "learning_rate": 5.674290081316982e-06, "loss": 0.3163, "step": 30582 }, { "epoch": 1.4034693221972374, "grad_norm": 0.5115766525268555, "learning_rate": 5.674047132407217e-06, "loss": 0.3875, "step": 30583 }, { "epoch": 1.403515212702492, "grad_norm": 0.4586496651172638, "learning_rate": 5.673804181876581e-06, "loss": 0.3294, "step": 30584 }, { "epoch": 1.4035611032077462, "grad_norm": 0.4568221867084503, "learning_rate": 5.673561229725659e-06, "loss": 0.3358, "step": 30585 }, { "epoch": 1.4036069937130007, "grad_norm": 0.49388402700424194, "learning_rate": 5.6733182759550354e-06, "loss": 0.4421, "step": 30586 }, { "epoch": 1.4036528842182552, "grad_norm": 0.4518837630748749, "learning_rate": 5.673075320565293e-06, "loss": 0.332, "step": 30587 }, { "epoch": 1.4036987747235097, "grad_norm": 0.4668740928173065, "learning_rate": 5.672832363557018e-06, "loss": 0.349, "step": 30588 }, { "epoch": 1.4037446652287642, "grad_norm": 0.5386171936988831, "learning_rate": 5.6725894049307924e-06, "loss": 0.5308, "step": 30589 }, { "epoch": 1.4037905557340187, "grad_norm": 0.46664607524871826, "learning_rate": 5.672346444687202e-06, "loss": 0.3385, "step": 30590 }, { "epoch": 1.4038364462392732, "grad_norm": 0.48087185621261597, "learning_rate": 5.67210348282683e-06, "loss": 0.4125, "step": 30591 }, { "epoch": 1.4038823367445277, "grad_norm": 0.4826028645038605, "learning_rate": 5.671860519350263e-06, "loss": 0.3186, "step": 30592 }, { "epoch": 1.403928227249782, "grad_norm": 0.4580307900905609, "learning_rate": 5.671617554258083e-06, "loss": 0.318, "step": 30593 }, { "epoch": 1.4039741177550364, "grad_norm": 0.450836181640625, "learning_rate": 5.6713745875508745e-06, "loss": 0.3475, "step": 30594 }, { "epoch": 1.404020008260291, "grad_norm": 0.4636431038379669, "learning_rate": 5.67113161922922e-06, "loss": 0.3135, "step": 30595 }, { "epoch": 1.4040658987655454, "grad_norm": 0.48314210772514343, "learning_rate": 5.670888649293709e-06, "loss": 0.344, "step": 30596 }, { "epoch": 1.4041117892708, "grad_norm": 0.4551754891872406, "learning_rate": 5.670645677744919e-06, "loss": 0.3151, "step": 30597 }, { "epoch": 1.4041576797760542, "grad_norm": 0.4140892028808594, "learning_rate": 5.670402704583441e-06, "loss": 0.2445, "step": 30598 }, { "epoch": 1.4042035702813087, "grad_norm": 0.4736423194408417, "learning_rate": 5.670159729809855e-06, "loss": 0.3405, "step": 30599 }, { "epoch": 1.4042494607865632, "grad_norm": 0.49598556756973267, "learning_rate": 5.669916753424745e-06, "loss": 0.3872, "step": 30600 }, { "epoch": 1.4042953512918177, "grad_norm": 0.4861445128917694, "learning_rate": 5.669673775428698e-06, "loss": 0.4017, "step": 30601 }, { "epoch": 1.4043412417970722, "grad_norm": 0.466854989528656, "learning_rate": 5.669430795822297e-06, "loss": 0.3338, "step": 30602 }, { "epoch": 1.4043871323023267, "grad_norm": 0.5214053392410278, "learning_rate": 5.6691878146061265e-06, "loss": 0.3376, "step": 30603 }, { "epoch": 1.4044330228075812, "grad_norm": 0.5264842510223389, "learning_rate": 5.66894483178077e-06, "loss": 0.442, "step": 30604 }, { "epoch": 1.4044789133128357, "grad_norm": 0.4776712954044342, "learning_rate": 5.668701847346813e-06, "loss": 0.3482, "step": 30605 }, { "epoch": 1.40452480381809, "grad_norm": 0.5053424835205078, "learning_rate": 5.668458861304838e-06, "loss": 0.4322, "step": 30606 }, { "epoch": 1.4045706943233445, "grad_norm": 0.5037473440170288, "learning_rate": 5.66821587365543e-06, "loss": 0.396, "step": 30607 }, { "epoch": 1.404616584828599, "grad_norm": 0.4608955383300781, "learning_rate": 5.6679728843991745e-06, "loss": 0.3676, "step": 30608 }, { "epoch": 1.4046624753338535, "grad_norm": 0.49622461199760437, "learning_rate": 5.667729893536655e-06, "loss": 0.408, "step": 30609 }, { "epoch": 1.404708365839108, "grad_norm": 0.4593330919742584, "learning_rate": 5.667486901068457e-06, "loss": 0.3604, "step": 30610 }, { "epoch": 1.4047542563443622, "grad_norm": 0.46884584426879883, "learning_rate": 5.6672439069951624e-06, "loss": 0.3118, "step": 30611 }, { "epoch": 1.4048001468496167, "grad_norm": 0.47514376044273376, "learning_rate": 5.667000911317357e-06, "loss": 0.3358, "step": 30612 }, { "epoch": 1.4048460373548712, "grad_norm": 0.5239102244377136, "learning_rate": 5.666757914035625e-06, "loss": 0.3936, "step": 30613 }, { "epoch": 1.4048919278601257, "grad_norm": 0.48989957571029663, "learning_rate": 5.666514915150551e-06, "loss": 0.353, "step": 30614 }, { "epoch": 1.4049378183653802, "grad_norm": 0.449582576751709, "learning_rate": 5.6662719146627176e-06, "loss": 0.3497, "step": 30615 }, { "epoch": 1.4049837088706347, "grad_norm": 0.507612943649292, "learning_rate": 5.666028912572712e-06, "loss": 0.4163, "step": 30616 }, { "epoch": 1.4050295993758892, "grad_norm": 0.4762733578681946, "learning_rate": 5.665785908881116e-06, "loss": 0.3628, "step": 30617 }, { "epoch": 1.4050754898811437, "grad_norm": 0.477347195148468, "learning_rate": 5.665542903588516e-06, "loss": 0.3802, "step": 30618 }, { "epoch": 1.405121380386398, "grad_norm": 0.5213756561279297, "learning_rate": 5.665299896695494e-06, "loss": 0.4043, "step": 30619 }, { "epoch": 1.4051672708916525, "grad_norm": 0.4820301830768585, "learning_rate": 5.665056888202637e-06, "loss": 0.4263, "step": 30620 }, { "epoch": 1.405213161396907, "grad_norm": 0.48144298791885376, "learning_rate": 5.664813878110529e-06, "loss": 0.343, "step": 30621 }, { "epoch": 1.4052590519021615, "grad_norm": 0.4894770681858063, "learning_rate": 5.6645708664197505e-06, "loss": 0.3812, "step": 30622 }, { "epoch": 1.4053049424074158, "grad_norm": 0.4747690260410309, "learning_rate": 5.66432785313089e-06, "loss": 0.3432, "step": 30623 }, { "epoch": 1.4053508329126703, "grad_norm": 0.4587125778198242, "learning_rate": 5.6640848382445324e-06, "loss": 0.2986, "step": 30624 }, { "epoch": 1.4053967234179248, "grad_norm": 0.4678109586238861, "learning_rate": 5.663841821761259e-06, "loss": 0.3418, "step": 30625 }, { "epoch": 1.4054426139231793, "grad_norm": 0.4801413118839264, "learning_rate": 5.663598803681656e-06, "loss": 0.3682, "step": 30626 }, { "epoch": 1.4054885044284338, "grad_norm": 0.46379968523979187, "learning_rate": 5.6633557840063066e-06, "loss": 0.3162, "step": 30627 }, { "epoch": 1.4055343949336883, "grad_norm": 0.4812333285808563, "learning_rate": 5.663112762735796e-06, "loss": 0.3591, "step": 30628 }, { "epoch": 1.4055802854389428, "grad_norm": 0.44547685980796814, "learning_rate": 5.6628697398707076e-06, "loss": 0.3447, "step": 30629 }, { "epoch": 1.4056261759441973, "grad_norm": 0.5347281098365784, "learning_rate": 5.662626715411629e-06, "loss": 0.4761, "step": 30630 }, { "epoch": 1.4056720664494515, "grad_norm": 0.4757799506187439, "learning_rate": 5.662383689359141e-06, "loss": 0.3481, "step": 30631 }, { "epoch": 1.405717956954706, "grad_norm": 0.46899133920669556, "learning_rate": 5.662140661713829e-06, "loss": 0.36, "step": 30632 }, { "epoch": 1.4057638474599605, "grad_norm": 0.45681729912757874, "learning_rate": 5.661897632476279e-06, "loss": 0.3167, "step": 30633 }, { "epoch": 1.405809737965215, "grad_norm": 0.43412426114082336, "learning_rate": 5.661654601647072e-06, "loss": 0.3392, "step": 30634 }, { "epoch": 1.4058556284704695, "grad_norm": 0.46264445781707764, "learning_rate": 5.661411569226797e-06, "loss": 0.32, "step": 30635 }, { "epoch": 1.4059015189757238, "grad_norm": 0.6127398014068604, "learning_rate": 5.661168535216035e-06, "loss": 0.2972, "step": 30636 }, { "epoch": 1.4059474094809783, "grad_norm": 0.4390941262245178, "learning_rate": 5.660925499615369e-06, "loss": 0.3139, "step": 30637 }, { "epoch": 1.4059932999862328, "grad_norm": 0.43178120255470276, "learning_rate": 5.660682462425388e-06, "loss": 0.2964, "step": 30638 }, { "epoch": 1.4060391904914873, "grad_norm": 0.48744189739227295, "learning_rate": 5.6604394236466745e-06, "loss": 0.3671, "step": 30639 }, { "epoch": 1.4060850809967418, "grad_norm": 0.47400999069213867, "learning_rate": 5.660196383279811e-06, "loss": 0.3113, "step": 30640 }, { "epoch": 1.4061309715019963, "grad_norm": 0.5465152859687805, "learning_rate": 5.659953341325386e-06, "loss": 0.5219, "step": 30641 }, { "epoch": 1.4061768620072508, "grad_norm": 0.46050935983657837, "learning_rate": 5.6597102977839804e-06, "loss": 0.3637, "step": 30642 }, { "epoch": 1.4062227525125053, "grad_norm": 0.48843950033187866, "learning_rate": 5.659467252656179e-06, "loss": 0.3873, "step": 30643 }, { "epoch": 1.4062686430177596, "grad_norm": 0.49041104316711426, "learning_rate": 5.659224205942568e-06, "loss": 0.3621, "step": 30644 }, { "epoch": 1.406314533523014, "grad_norm": 0.46887555718421936, "learning_rate": 5.6589811576437295e-06, "loss": 0.3447, "step": 30645 }, { "epoch": 1.4063604240282686, "grad_norm": 0.47729310393333435, "learning_rate": 5.6587381077602485e-06, "loss": 0.3407, "step": 30646 }, { "epoch": 1.406406314533523, "grad_norm": 0.4840472638607025, "learning_rate": 5.658495056292714e-06, "loss": 0.3925, "step": 30647 }, { "epoch": 1.4064522050387775, "grad_norm": 0.43364202976226807, "learning_rate": 5.658252003241704e-06, "loss": 0.3, "step": 30648 }, { "epoch": 1.4064980955440318, "grad_norm": 0.5036024451255798, "learning_rate": 5.658008948607806e-06, "loss": 0.4112, "step": 30649 }, { "epoch": 1.4065439860492863, "grad_norm": 0.507006824016571, "learning_rate": 5.657765892391604e-06, "loss": 0.4343, "step": 30650 }, { "epoch": 1.4065898765545408, "grad_norm": 0.4379733204841614, "learning_rate": 5.657522834593683e-06, "loss": 0.3271, "step": 30651 }, { "epoch": 1.4066357670597953, "grad_norm": 0.484536349773407, "learning_rate": 5.657279775214628e-06, "loss": 0.3578, "step": 30652 }, { "epoch": 1.4066816575650498, "grad_norm": 0.47118768095970154, "learning_rate": 5.657036714255022e-06, "loss": 0.427, "step": 30653 }, { "epoch": 1.4067275480703043, "grad_norm": 0.474059522151947, "learning_rate": 5.656793651715449e-06, "loss": 0.3944, "step": 30654 }, { "epoch": 1.4067734385755588, "grad_norm": 0.4684872329235077, "learning_rate": 5.656550587596496e-06, "loss": 0.3495, "step": 30655 }, { "epoch": 1.4068193290808133, "grad_norm": 0.45506152510643005, "learning_rate": 5.656307521898744e-06, "loss": 0.2978, "step": 30656 }, { "epoch": 1.4068652195860676, "grad_norm": 0.45992323756217957, "learning_rate": 5.656064454622781e-06, "loss": 0.3611, "step": 30657 }, { "epoch": 1.406911110091322, "grad_norm": 0.44445157051086426, "learning_rate": 5.6558213857691916e-06, "loss": 0.3235, "step": 30658 }, { "epoch": 1.4069570005965766, "grad_norm": 0.5436306595802307, "learning_rate": 5.6555783153385564e-06, "loss": 0.495, "step": 30659 }, { "epoch": 1.407002891101831, "grad_norm": 0.4242517054080963, "learning_rate": 5.6553352433314626e-06, "loss": 0.3088, "step": 30660 }, { "epoch": 1.4070487816070856, "grad_norm": 0.4567176103591919, "learning_rate": 5.655092169748495e-06, "loss": 0.3595, "step": 30661 }, { "epoch": 1.4070946721123399, "grad_norm": 0.4350859522819519, "learning_rate": 5.654849094590237e-06, "loss": 0.2966, "step": 30662 }, { "epoch": 1.4071405626175943, "grad_norm": 0.4829665720462799, "learning_rate": 5.654606017857274e-06, "loss": 0.3867, "step": 30663 }, { "epoch": 1.4071864531228488, "grad_norm": 0.4963776469230652, "learning_rate": 5.654362939550191e-06, "loss": 0.3994, "step": 30664 }, { "epoch": 1.4072323436281033, "grad_norm": 0.445199579000473, "learning_rate": 5.654119859669569e-06, "loss": 0.3409, "step": 30665 }, { "epoch": 1.4072782341333578, "grad_norm": 0.4864162802696228, "learning_rate": 5.653876778215996e-06, "loss": 0.3871, "step": 30666 }, { "epoch": 1.4073241246386123, "grad_norm": 0.48371148109436035, "learning_rate": 5.6536336951900576e-06, "loss": 0.3984, "step": 30667 }, { "epoch": 1.4073700151438668, "grad_norm": 0.47050681710243225, "learning_rate": 5.653390610592336e-06, "loss": 0.3548, "step": 30668 }, { "epoch": 1.4074159056491213, "grad_norm": 0.46876898407936096, "learning_rate": 5.653147524423416e-06, "loss": 0.2987, "step": 30669 }, { "epoch": 1.4074617961543756, "grad_norm": 0.5129265189170837, "learning_rate": 5.652904436683882e-06, "loss": 0.4216, "step": 30670 }, { "epoch": 1.40750768665963, "grad_norm": 0.45877495408058167, "learning_rate": 5.652661347374317e-06, "loss": 0.3037, "step": 30671 }, { "epoch": 1.4075535771648846, "grad_norm": 0.47052499651908875, "learning_rate": 5.65241825649531e-06, "loss": 0.3779, "step": 30672 }, { "epoch": 1.407599467670139, "grad_norm": 0.4405198395252228, "learning_rate": 5.652175164047442e-06, "loss": 0.3179, "step": 30673 }, { "epoch": 1.4076453581753934, "grad_norm": 0.4603905975818634, "learning_rate": 5.651932070031298e-06, "loss": 0.358, "step": 30674 }, { "epoch": 1.4076912486806479, "grad_norm": 0.48367437720298767, "learning_rate": 5.651688974447466e-06, "loss": 0.3433, "step": 30675 }, { "epoch": 1.4077371391859024, "grad_norm": 0.48867806792259216, "learning_rate": 5.651445877296524e-06, "loss": 0.3574, "step": 30676 }, { "epoch": 1.4077830296911569, "grad_norm": 0.47831350564956665, "learning_rate": 5.651202778579061e-06, "loss": 0.3936, "step": 30677 }, { "epoch": 1.4078289201964114, "grad_norm": 0.4969753623008728, "learning_rate": 5.650959678295663e-06, "loss": 0.3591, "step": 30678 }, { "epoch": 1.4078748107016659, "grad_norm": 0.4149496853351593, "learning_rate": 5.650716576446911e-06, "loss": 0.2859, "step": 30679 }, { "epoch": 1.4079207012069204, "grad_norm": 0.49920201301574707, "learning_rate": 5.650473473033391e-06, "loss": 0.4488, "step": 30680 }, { "epoch": 1.4079665917121749, "grad_norm": 0.4971385598182678, "learning_rate": 5.650230368055689e-06, "loss": 0.3979, "step": 30681 }, { "epoch": 1.4080124822174291, "grad_norm": 0.4720684885978699, "learning_rate": 5.649987261514386e-06, "loss": 0.3284, "step": 30682 }, { "epoch": 1.4080583727226836, "grad_norm": 0.48297712206840515, "learning_rate": 5.64974415341007e-06, "loss": 0.3899, "step": 30683 }, { "epoch": 1.4081042632279381, "grad_norm": 0.4882252514362335, "learning_rate": 5.649501043743324e-06, "loss": 0.4061, "step": 30684 }, { "epoch": 1.4081501537331926, "grad_norm": 0.4824000298976898, "learning_rate": 5.649257932514733e-06, "loss": 0.3629, "step": 30685 }, { "epoch": 1.4081960442384471, "grad_norm": 0.4858934283256531, "learning_rate": 5.6490148197248824e-06, "loss": 0.3759, "step": 30686 }, { "epoch": 1.4082419347437014, "grad_norm": 0.4833122491836548, "learning_rate": 5.648771705374355e-06, "loss": 0.2909, "step": 30687 }, { "epoch": 1.408287825248956, "grad_norm": 0.4304989278316498, "learning_rate": 5.648528589463736e-06, "loss": 0.296, "step": 30688 }, { "epoch": 1.4083337157542104, "grad_norm": 0.4500770568847656, "learning_rate": 5.648285471993611e-06, "loss": 0.2979, "step": 30689 }, { "epoch": 1.408379606259465, "grad_norm": 0.4763033092021942, "learning_rate": 5.648042352964564e-06, "loss": 0.3892, "step": 30690 }, { "epoch": 1.4084254967647194, "grad_norm": 0.49361369013786316, "learning_rate": 5.6477992323771805e-06, "loss": 0.3684, "step": 30691 }, { "epoch": 1.408471387269974, "grad_norm": 0.5281588435173035, "learning_rate": 5.647556110232044e-06, "loss": 0.4647, "step": 30692 }, { "epoch": 1.4085172777752284, "grad_norm": 0.4348994195461273, "learning_rate": 5.647312986529738e-06, "loss": 0.2558, "step": 30693 }, { "epoch": 1.408563168280483, "grad_norm": 0.4772498607635498, "learning_rate": 5.647069861270849e-06, "loss": 0.4025, "step": 30694 }, { "epoch": 1.4086090587857372, "grad_norm": 0.5009322166442871, "learning_rate": 5.6468267344559626e-06, "loss": 0.4053, "step": 30695 }, { "epoch": 1.4086549492909917, "grad_norm": 0.458627313375473, "learning_rate": 5.646583606085662e-06, "loss": 0.3244, "step": 30696 }, { "epoch": 1.4087008397962462, "grad_norm": 0.4330346882343292, "learning_rate": 5.64634047616053e-06, "loss": 0.3047, "step": 30697 }, { "epoch": 1.4087467303015007, "grad_norm": 0.48149943351745605, "learning_rate": 5.646097344681155e-06, "loss": 0.3571, "step": 30698 }, { "epoch": 1.4087926208067552, "grad_norm": 0.46391624212265015, "learning_rate": 5.645854211648119e-06, "loss": 0.332, "step": 30699 }, { "epoch": 1.4088385113120094, "grad_norm": 0.47340190410614014, "learning_rate": 5.645611077062008e-06, "loss": 0.3722, "step": 30700 }, { "epoch": 1.408884401817264, "grad_norm": 0.4731406271457672, "learning_rate": 5.645367940923405e-06, "loss": 0.3216, "step": 30701 }, { "epoch": 1.4089302923225184, "grad_norm": 0.4492392838001251, "learning_rate": 5.645124803232897e-06, "loss": 0.3504, "step": 30702 }, { "epoch": 1.408976182827773, "grad_norm": 0.4633786082267761, "learning_rate": 5.6448816639910675e-06, "loss": 0.3331, "step": 30703 }, { "epoch": 1.4090220733330274, "grad_norm": 0.4437253177165985, "learning_rate": 5.6446385231985e-06, "loss": 0.3103, "step": 30704 }, { "epoch": 1.409067963838282, "grad_norm": 0.48084208369255066, "learning_rate": 5.64439538085578e-06, "loss": 0.3622, "step": 30705 }, { "epoch": 1.4091138543435364, "grad_norm": 0.46057942509651184, "learning_rate": 5.644152236963494e-06, "loss": 0.325, "step": 30706 }, { "epoch": 1.409159744848791, "grad_norm": 0.43466031551361084, "learning_rate": 5.643909091522225e-06, "loss": 0.3006, "step": 30707 }, { "epoch": 1.4092056353540452, "grad_norm": 0.4857945740222931, "learning_rate": 5.643665944532557e-06, "loss": 0.4165, "step": 30708 }, { "epoch": 1.4092515258592997, "grad_norm": 0.4735317528247833, "learning_rate": 5.643422795995076e-06, "loss": 0.4095, "step": 30709 }, { "epoch": 1.4092974163645542, "grad_norm": 0.4613579213619232, "learning_rate": 5.643179645910365e-06, "loss": 0.355, "step": 30710 }, { "epoch": 1.4093433068698087, "grad_norm": 0.4570719301700592, "learning_rate": 5.642936494279011e-06, "loss": 0.3289, "step": 30711 }, { "epoch": 1.409389197375063, "grad_norm": 0.4825561046600342, "learning_rate": 5.642693341101598e-06, "loss": 0.3876, "step": 30712 }, { "epoch": 1.4094350878803175, "grad_norm": 0.5210810899734497, "learning_rate": 5.642450186378711e-06, "loss": 0.3863, "step": 30713 }, { "epoch": 1.409480978385572, "grad_norm": 0.4759581983089447, "learning_rate": 5.642207030110932e-06, "loss": 0.382, "step": 30714 }, { "epoch": 1.4095268688908265, "grad_norm": 0.46867358684539795, "learning_rate": 5.64196387229885e-06, "loss": 0.3337, "step": 30715 }, { "epoch": 1.409572759396081, "grad_norm": 0.46694397926330566, "learning_rate": 5.641720712943045e-06, "loss": 0.3879, "step": 30716 }, { "epoch": 1.4096186499013355, "grad_norm": 0.46989521384239197, "learning_rate": 5.641477552044107e-06, "loss": 0.3545, "step": 30717 }, { "epoch": 1.40966454040659, "grad_norm": 0.4884621798992157, "learning_rate": 5.641234389602617e-06, "loss": 0.3638, "step": 30718 }, { "epoch": 1.4097104309118444, "grad_norm": 0.5010775923728943, "learning_rate": 5.640991225619159e-06, "loss": 0.3973, "step": 30719 }, { "epoch": 1.4097563214170987, "grad_norm": 0.48933959007263184, "learning_rate": 5.640748060094322e-06, "loss": 0.3903, "step": 30720 }, { "epoch": 1.4098022119223532, "grad_norm": 0.48443931341171265, "learning_rate": 5.640504893028687e-06, "loss": 0.3875, "step": 30721 }, { "epoch": 1.4098481024276077, "grad_norm": 0.4882057011127472, "learning_rate": 5.640261724422838e-06, "loss": 0.3985, "step": 30722 }, { "epoch": 1.4098939929328622, "grad_norm": 0.4784524142742157, "learning_rate": 5.640018554277365e-06, "loss": 0.3646, "step": 30723 }, { "epoch": 1.4099398834381167, "grad_norm": 0.44518914818763733, "learning_rate": 5.639775382592847e-06, "loss": 0.3191, "step": 30724 }, { "epoch": 1.409985773943371, "grad_norm": 0.48890867829322815, "learning_rate": 5.6395322093698716e-06, "loss": 0.3787, "step": 30725 }, { "epoch": 1.4100316644486255, "grad_norm": 0.49693554639816284, "learning_rate": 5.639289034609024e-06, "loss": 0.3962, "step": 30726 }, { "epoch": 1.41007755495388, "grad_norm": 0.4443364143371582, "learning_rate": 5.639045858310886e-06, "loss": 0.3268, "step": 30727 }, { "epoch": 1.4101234454591345, "grad_norm": 0.4881758391857147, "learning_rate": 5.638802680476047e-06, "loss": 0.3997, "step": 30728 }, { "epoch": 1.410169335964389, "grad_norm": 0.4745207130908966, "learning_rate": 5.638559501105087e-06, "loss": 0.359, "step": 30729 }, { "epoch": 1.4102152264696435, "grad_norm": 0.4794755280017853, "learning_rate": 5.638316320198594e-06, "loss": 0.3756, "step": 30730 }, { "epoch": 1.410261116974898, "grad_norm": 0.4616711735725403, "learning_rate": 5.638073137757151e-06, "loss": 0.3417, "step": 30731 }, { "epoch": 1.4103070074801525, "grad_norm": 0.43901970982551575, "learning_rate": 5.637829953781344e-06, "loss": 0.281, "step": 30732 }, { "epoch": 1.4103528979854068, "grad_norm": 0.4389268755912781, "learning_rate": 5.637586768271758e-06, "loss": 0.3257, "step": 30733 }, { "epoch": 1.4103987884906612, "grad_norm": 0.46237727999687195, "learning_rate": 5.637343581228976e-06, "loss": 0.3599, "step": 30734 }, { "epoch": 1.4104446789959157, "grad_norm": 0.4500422775745392, "learning_rate": 5.637100392653585e-06, "loss": 0.341, "step": 30735 }, { "epoch": 1.4104905695011702, "grad_norm": 0.48443707823753357, "learning_rate": 5.636857202546167e-06, "loss": 0.4267, "step": 30736 }, { "epoch": 1.4105364600064247, "grad_norm": 0.4469890892505646, "learning_rate": 5.63661401090731e-06, "loss": 0.3115, "step": 30737 }, { "epoch": 1.410582350511679, "grad_norm": 0.5021235346794128, "learning_rate": 5.636370817737596e-06, "loss": 0.4222, "step": 30738 }, { "epoch": 1.4106282410169335, "grad_norm": 0.48171862959861755, "learning_rate": 5.636127623037612e-06, "loss": 0.3703, "step": 30739 }, { "epoch": 1.410674131522188, "grad_norm": 0.47213053703308105, "learning_rate": 5.635884426807941e-06, "loss": 0.3496, "step": 30740 }, { "epoch": 1.4107200220274425, "grad_norm": 0.4211561977863312, "learning_rate": 5.635641229049169e-06, "loss": 0.2877, "step": 30741 }, { "epoch": 1.410765912532697, "grad_norm": 0.4373224079608917, "learning_rate": 5.6353980297618805e-06, "loss": 0.3303, "step": 30742 }, { "epoch": 1.4108118030379515, "grad_norm": 0.4817468822002411, "learning_rate": 5.635154828946661e-06, "loss": 0.3652, "step": 30743 }, { "epoch": 1.410857693543206, "grad_norm": 0.46277809143066406, "learning_rate": 5.6349116266040925e-06, "loss": 0.362, "step": 30744 }, { "epoch": 1.4109035840484605, "grad_norm": 0.44873908162117004, "learning_rate": 5.6346684227347634e-06, "loss": 0.3154, "step": 30745 }, { "epoch": 1.4109494745537148, "grad_norm": 0.48887234926223755, "learning_rate": 5.634425217339258e-06, "loss": 0.4277, "step": 30746 }, { "epoch": 1.4109953650589693, "grad_norm": 0.463225781917572, "learning_rate": 5.634182010418158e-06, "loss": 0.3524, "step": 30747 }, { "epoch": 1.4110412555642238, "grad_norm": 0.46243152022361755, "learning_rate": 5.633938801972051e-06, "loss": 0.3589, "step": 30748 }, { "epoch": 1.4110871460694783, "grad_norm": 0.4806060194969177, "learning_rate": 5.633695592001522e-06, "loss": 0.3603, "step": 30749 }, { "epoch": 1.4111330365747328, "grad_norm": 0.4719722867012024, "learning_rate": 5.633452380507155e-06, "loss": 0.3512, "step": 30750 }, { "epoch": 1.411178927079987, "grad_norm": 0.4405514597892761, "learning_rate": 5.633209167489536e-06, "loss": 0.355, "step": 30751 }, { "epoch": 1.4112248175852415, "grad_norm": 0.47409629821777344, "learning_rate": 5.632965952949246e-06, "loss": 0.3436, "step": 30752 }, { "epoch": 1.411270708090496, "grad_norm": 0.4613935947418213, "learning_rate": 5.632722736886875e-06, "loss": 0.3478, "step": 30753 }, { "epoch": 1.4113165985957505, "grad_norm": 0.45856964588165283, "learning_rate": 5.632479519303005e-06, "loss": 0.3378, "step": 30754 }, { "epoch": 1.411362489101005, "grad_norm": 0.4496823847293854, "learning_rate": 5.63223630019822e-06, "loss": 0.3174, "step": 30755 }, { "epoch": 1.4114083796062595, "grad_norm": 0.44037240743637085, "learning_rate": 5.631993079573108e-06, "loss": 0.2915, "step": 30756 }, { "epoch": 1.411454270111514, "grad_norm": 0.444921612739563, "learning_rate": 5.631749857428253e-06, "loss": 0.3347, "step": 30757 }, { "epoch": 1.4115001606167685, "grad_norm": 0.4797303378582001, "learning_rate": 5.6315066337642355e-06, "loss": 0.3707, "step": 30758 }, { "epoch": 1.4115460511220228, "grad_norm": 0.5092024207115173, "learning_rate": 5.631263408581646e-06, "loss": 0.4175, "step": 30759 }, { "epoch": 1.4115919416272773, "grad_norm": 0.5355422496795654, "learning_rate": 5.631020181881067e-06, "loss": 0.3838, "step": 30760 }, { "epoch": 1.4116378321325318, "grad_norm": 0.4820060729980469, "learning_rate": 5.630776953663084e-06, "loss": 0.3458, "step": 30761 }, { "epoch": 1.4116837226377863, "grad_norm": 0.4782482385635376, "learning_rate": 5.6305337239282805e-06, "loss": 0.4286, "step": 30762 }, { "epoch": 1.4117296131430406, "grad_norm": 0.4851239025592804, "learning_rate": 5.630290492677244e-06, "loss": 0.3507, "step": 30763 }, { "epoch": 1.411775503648295, "grad_norm": 0.4915924072265625, "learning_rate": 5.630047259910556e-06, "loss": 0.3632, "step": 30764 }, { "epoch": 1.4118213941535496, "grad_norm": 0.44733747839927673, "learning_rate": 5.629804025628804e-06, "loss": 0.2955, "step": 30765 }, { "epoch": 1.411867284658804, "grad_norm": 0.43608808517456055, "learning_rate": 5.629560789832572e-06, "loss": 0.318, "step": 30766 }, { "epoch": 1.4119131751640586, "grad_norm": 0.5141822099685669, "learning_rate": 5.629317552522446e-06, "loss": 0.4107, "step": 30767 }, { "epoch": 1.411959065669313, "grad_norm": 0.48339518904685974, "learning_rate": 5.629074313699009e-06, "loss": 0.3796, "step": 30768 }, { "epoch": 1.4120049561745676, "grad_norm": 0.441035658121109, "learning_rate": 5.6288310733628475e-06, "loss": 0.3165, "step": 30769 }, { "epoch": 1.412050846679822, "grad_norm": 0.45699751377105713, "learning_rate": 5.628587831514544e-06, "loss": 0.3244, "step": 30770 }, { "epoch": 1.4120967371850763, "grad_norm": 0.4463905096054077, "learning_rate": 5.628344588154686e-06, "loss": 0.3185, "step": 30771 }, { "epoch": 1.4121426276903308, "grad_norm": 0.5068268179893494, "learning_rate": 5.628101343283858e-06, "loss": 0.4086, "step": 30772 }, { "epoch": 1.4121885181955853, "grad_norm": 0.49815142154693604, "learning_rate": 5.6278580969026444e-06, "loss": 0.376, "step": 30773 }, { "epoch": 1.4122344087008398, "grad_norm": 0.4651508927345276, "learning_rate": 5.6276148490116316e-06, "loss": 0.3421, "step": 30774 }, { "epoch": 1.4122802992060943, "grad_norm": 0.45601505041122437, "learning_rate": 5.6273715996114e-06, "loss": 0.3526, "step": 30775 }, { "epoch": 1.4123261897113486, "grad_norm": 0.4383566379547119, "learning_rate": 5.627128348702538e-06, "loss": 0.3144, "step": 30776 }, { "epoch": 1.412372080216603, "grad_norm": 0.4120192527770996, "learning_rate": 5.626885096285633e-06, "loss": 0.3014, "step": 30777 }, { "epoch": 1.4124179707218576, "grad_norm": 0.44541439414024353, "learning_rate": 5.626641842361267e-06, "loss": 0.2891, "step": 30778 }, { "epoch": 1.412463861227112, "grad_norm": 0.4743247926235199, "learning_rate": 5.626398586930023e-06, "loss": 0.347, "step": 30779 }, { "epoch": 1.4125097517323666, "grad_norm": 0.4687708616256714, "learning_rate": 5.62615532999249e-06, "loss": 0.3129, "step": 30780 }, { "epoch": 1.412555642237621, "grad_norm": 0.41564494371414185, "learning_rate": 5.625912071549248e-06, "loss": 0.2624, "step": 30781 }, { "epoch": 1.4126015327428756, "grad_norm": 0.43975740671157837, "learning_rate": 5.625668811600888e-06, "loss": 0.2809, "step": 30782 }, { "epoch": 1.41264742324813, "grad_norm": 0.4882320165634155, "learning_rate": 5.625425550147993e-06, "loss": 0.3371, "step": 30783 }, { "epoch": 1.4126933137533844, "grad_norm": 0.451289564371109, "learning_rate": 5.625182287191145e-06, "loss": 0.3021, "step": 30784 }, { "epoch": 1.4127392042586389, "grad_norm": 0.46049511432647705, "learning_rate": 5.624939022730931e-06, "loss": 0.3398, "step": 30785 }, { "epoch": 1.4127850947638934, "grad_norm": 0.4688246250152588, "learning_rate": 5.624695756767935e-06, "loss": 0.3753, "step": 30786 }, { "epoch": 1.4128309852691479, "grad_norm": 0.4541654586791992, "learning_rate": 5.624452489302743e-06, "loss": 0.3402, "step": 30787 }, { "epoch": 1.4128768757744024, "grad_norm": 0.5195730924606323, "learning_rate": 5.624209220335941e-06, "loss": 0.4128, "step": 30788 }, { "epoch": 1.4129227662796566, "grad_norm": 0.5064204335212708, "learning_rate": 5.623965949868113e-06, "loss": 0.4544, "step": 30789 }, { "epoch": 1.4129686567849111, "grad_norm": 0.4945853650569916, "learning_rate": 5.623722677899842e-06, "loss": 0.3834, "step": 30790 }, { "epoch": 1.4130145472901656, "grad_norm": 0.45019251108169556, "learning_rate": 5.623479404431717e-06, "loss": 0.3206, "step": 30791 }, { "epoch": 1.4130604377954201, "grad_norm": 0.48744407296180725, "learning_rate": 5.623236129464319e-06, "loss": 0.427, "step": 30792 }, { "epoch": 1.4131063283006746, "grad_norm": 0.44211477041244507, "learning_rate": 5.622992852998237e-06, "loss": 0.3379, "step": 30793 }, { "epoch": 1.4131522188059291, "grad_norm": 0.4443802535533905, "learning_rate": 5.622749575034052e-06, "loss": 0.282, "step": 30794 }, { "epoch": 1.4131981093111836, "grad_norm": 0.4637661576271057, "learning_rate": 5.622506295572351e-06, "loss": 0.3568, "step": 30795 }, { "epoch": 1.4132439998164381, "grad_norm": 0.45468705892562866, "learning_rate": 5.622263014613719e-06, "loss": 0.3247, "step": 30796 }, { "epoch": 1.4132898903216924, "grad_norm": 0.4522338807582855, "learning_rate": 5.622019732158741e-06, "loss": 0.3029, "step": 30797 }, { "epoch": 1.4133357808269469, "grad_norm": 0.45286786556243896, "learning_rate": 5.621776448208002e-06, "loss": 0.3419, "step": 30798 }, { "epoch": 1.4133816713322014, "grad_norm": 0.5250926613807678, "learning_rate": 5.621533162762086e-06, "loss": 0.3537, "step": 30799 }, { "epoch": 1.4134275618374559, "grad_norm": 0.4815371036529541, "learning_rate": 5.621289875821581e-06, "loss": 0.3945, "step": 30800 }, { "epoch": 1.4134734523427102, "grad_norm": 0.4450767934322357, "learning_rate": 5.6210465873870675e-06, "loss": 0.3141, "step": 30801 }, { "epoch": 1.4135193428479647, "grad_norm": 0.45520344376564026, "learning_rate": 5.620803297459134e-06, "loss": 0.3443, "step": 30802 }, { "epoch": 1.4135652333532192, "grad_norm": 0.4648921489715576, "learning_rate": 5.620560006038365e-06, "loss": 0.3363, "step": 30803 }, { "epoch": 1.4136111238584737, "grad_norm": 0.43136176466941833, "learning_rate": 5.620316713125344e-06, "loss": 0.2792, "step": 30804 }, { "epoch": 1.4136570143637281, "grad_norm": 0.4508459270000458, "learning_rate": 5.620073418720659e-06, "loss": 0.3164, "step": 30805 }, { "epoch": 1.4137029048689826, "grad_norm": 0.4485633075237274, "learning_rate": 5.61983012282489e-06, "loss": 0.355, "step": 30806 }, { "epoch": 1.4137487953742371, "grad_norm": 0.4914606511592865, "learning_rate": 5.619586825438628e-06, "loss": 0.3541, "step": 30807 }, { "epoch": 1.4137946858794916, "grad_norm": 0.4921012818813324, "learning_rate": 5.619343526562454e-06, "loss": 0.418, "step": 30808 }, { "epoch": 1.413840576384746, "grad_norm": 0.5061198472976685, "learning_rate": 5.619100226196954e-06, "loss": 0.4026, "step": 30809 }, { "epoch": 1.4138864668900004, "grad_norm": 0.5178914070129395, "learning_rate": 5.6188569243427135e-06, "loss": 0.4659, "step": 30810 }, { "epoch": 1.413932357395255, "grad_norm": 0.4768790006637573, "learning_rate": 5.618613621000319e-06, "loss": 0.3339, "step": 30811 }, { "epoch": 1.4139782479005094, "grad_norm": 0.49426522850990295, "learning_rate": 5.618370316170352e-06, "loss": 0.4321, "step": 30812 }, { "epoch": 1.414024138405764, "grad_norm": 0.4731158912181854, "learning_rate": 5.618127009853401e-06, "loss": 0.383, "step": 30813 }, { "epoch": 1.4140700289110182, "grad_norm": 0.48745959997177124, "learning_rate": 5.617883702050047e-06, "loss": 0.4198, "step": 30814 }, { "epoch": 1.4141159194162727, "grad_norm": 0.44403496384620667, "learning_rate": 5.6176403927608814e-06, "loss": 0.3322, "step": 30815 }, { "epoch": 1.4141618099215272, "grad_norm": 0.4661827087402344, "learning_rate": 5.617397081986484e-06, "loss": 0.3352, "step": 30816 }, { "epoch": 1.4142077004267817, "grad_norm": 0.4513093829154968, "learning_rate": 5.61715376972744e-06, "loss": 0.3203, "step": 30817 }, { "epoch": 1.4142535909320362, "grad_norm": 0.4757383167743683, "learning_rate": 5.616910455984337e-06, "loss": 0.4375, "step": 30818 }, { "epoch": 1.4142994814372907, "grad_norm": 0.4529770612716675, "learning_rate": 5.61666714075776e-06, "loss": 0.3456, "step": 30819 }, { "epoch": 1.4143453719425452, "grad_norm": 0.49846771359443665, "learning_rate": 5.616423824048291e-06, "loss": 0.4144, "step": 30820 }, { "epoch": 1.4143912624477997, "grad_norm": 0.46628206968307495, "learning_rate": 5.61618050585652e-06, "loss": 0.375, "step": 30821 }, { "epoch": 1.414437152953054, "grad_norm": 0.46206483244895935, "learning_rate": 5.615937186183029e-06, "loss": 0.3679, "step": 30822 }, { "epoch": 1.4144830434583084, "grad_norm": 0.48857581615448, "learning_rate": 5.615693865028402e-06, "loss": 0.3742, "step": 30823 }, { "epoch": 1.414528933963563, "grad_norm": 0.46295231580734253, "learning_rate": 5.615450542393225e-06, "loss": 0.2921, "step": 30824 }, { "epoch": 1.4145748244688174, "grad_norm": 0.49245187640190125, "learning_rate": 5.615207218278085e-06, "loss": 0.3819, "step": 30825 }, { "epoch": 1.414620714974072, "grad_norm": 0.449907124042511, "learning_rate": 5.614963892683567e-06, "loss": 0.3027, "step": 30826 }, { "epoch": 1.4146666054793262, "grad_norm": 0.4931392967700958, "learning_rate": 5.614720565610252e-06, "loss": 0.3888, "step": 30827 }, { "epoch": 1.4147124959845807, "grad_norm": 0.47935810685157776, "learning_rate": 5.61447723705873e-06, "loss": 0.3843, "step": 30828 }, { "epoch": 1.4147583864898352, "grad_norm": 0.48280128836631775, "learning_rate": 5.614233907029583e-06, "loss": 0.4114, "step": 30829 }, { "epoch": 1.4148042769950897, "grad_norm": 0.5052943229675293, "learning_rate": 5.613990575523398e-06, "loss": 0.4056, "step": 30830 }, { "epoch": 1.4148501675003442, "grad_norm": 0.48071202635765076, "learning_rate": 5.6137472425407604e-06, "loss": 0.3942, "step": 30831 }, { "epoch": 1.4148960580055987, "grad_norm": 0.4723033010959625, "learning_rate": 5.613503908082254e-06, "loss": 0.2998, "step": 30832 }, { "epoch": 1.4149419485108532, "grad_norm": 0.6170492172241211, "learning_rate": 5.6132605721484635e-06, "loss": 0.4795, "step": 30833 }, { "epoch": 1.4149878390161077, "grad_norm": 0.45418781042099, "learning_rate": 5.613017234739975e-06, "loss": 0.3166, "step": 30834 }, { "epoch": 1.415033729521362, "grad_norm": 0.44432970881462097, "learning_rate": 5.612773895857373e-06, "loss": 0.3242, "step": 30835 }, { "epoch": 1.4150796200266165, "grad_norm": 0.48921099305152893, "learning_rate": 5.6125305555012445e-06, "loss": 0.4322, "step": 30836 }, { "epoch": 1.415125510531871, "grad_norm": 0.47543594241142273, "learning_rate": 5.612287213672173e-06, "loss": 0.3817, "step": 30837 }, { "epoch": 1.4151714010371255, "grad_norm": 0.48448383808135986, "learning_rate": 5.612043870370744e-06, "loss": 0.358, "step": 30838 }, { "epoch": 1.41521729154238, "grad_norm": 0.47895464301109314, "learning_rate": 5.611800525597541e-06, "loss": 0.3935, "step": 30839 }, { "epoch": 1.4152631820476342, "grad_norm": 0.44931748509407043, "learning_rate": 5.611557179353153e-06, "loss": 0.3232, "step": 30840 }, { "epoch": 1.4153090725528887, "grad_norm": 0.5104110240936279, "learning_rate": 5.611313831638161e-06, "loss": 0.387, "step": 30841 }, { "epoch": 1.4153549630581432, "grad_norm": 0.47373420000076294, "learning_rate": 5.611070482453154e-06, "loss": 0.4025, "step": 30842 }, { "epoch": 1.4154008535633977, "grad_norm": 0.487661749124527, "learning_rate": 5.610827131798715e-06, "loss": 0.3783, "step": 30843 }, { "epoch": 1.4154467440686522, "grad_norm": 0.46824395656585693, "learning_rate": 5.610583779675429e-06, "loss": 0.3652, "step": 30844 }, { "epoch": 1.4154926345739067, "grad_norm": 0.49245521426200867, "learning_rate": 5.610340426083882e-06, "loss": 0.3817, "step": 30845 }, { "epoch": 1.4155385250791612, "grad_norm": 0.4526039659976959, "learning_rate": 5.610097071024658e-06, "loss": 0.3703, "step": 30846 }, { "epoch": 1.4155844155844157, "grad_norm": 0.48301881551742554, "learning_rate": 5.609853714498346e-06, "loss": 0.375, "step": 30847 }, { "epoch": 1.41563030608967, "grad_norm": 0.4459870457649231, "learning_rate": 5.609610356505526e-06, "loss": 0.2999, "step": 30848 }, { "epoch": 1.4156761965949245, "grad_norm": 0.47432318329811096, "learning_rate": 5.609366997046785e-06, "loss": 0.3177, "step": 30849 }, { "epoch": 1.415722087100179, "grad_norm": 0.43323707580566406, "learning_rate": 5.60912363612271e-06, "loss": 0.3027, "step": 30850 }, { "epoch": 1.4157679776054335, "grad_norm": 0.47800540924072266, "learning_rate": 5.6088802737338834e-06, "loss": 0.3604, "step": 30851 }, { "epoch": 1.4158138681106878, "grad_norm": 0.46721842885017395, "learning_rate": 5.608636909880892e-06, "loss": 0.3639, "step": 30852 }, { "epoch": 1.4158597586159423, "grad_norm": 0.46183836460113525, "learning_rate": 5.608393544564322e-06, "loss": 0.3257, "step": 30853 }, { "epoch": 1.4159056491211968, "grad_norm": 0.4921756684780121, "learning_rate": 5.608150177784758e-06, "loss": 0.3916, "step": 30854 }, { "epoch": 1.4159515396264513, "grad_norm": 0.4852762818336487, "learning_rate": 5.6079068095427835e-06, "loss": 0.3868, "step": 30855 }, { "epoch": 1.4159974301317058, "grad_norm": 0.44553765654563904, "learning_rate": 5.607663439838986e-06, "loss": 0.3186, "step": 30856 }, { "epoch": 1.4160433206369603, "grad_norm": 0.47788533568382263, "learning_rate": 5.607420068673949e-06, "loss": 0.3654, "step": 30857 }, { "epoch": 1.4160892111422148, "grad_norm": 0.45754408836364746, "learning_rate": 5.6071766960482585e-06, "loss": 0.3741, "step": 30858 }, { "epoch": 1.4161351016474693, "grad_norm": 0.498123437166214, "learning_rate": 5.606933321962501e-06, "loss": 0.3773, "step": 30859 }, { "epoch": 1.4161809921527235, "grad_norm": 0.4949764907360077, "learning_rate": 5.606689946417258e-06, "loss": 0.4187, "step": 30860 }, { "epoch": 1.416226882657978, "grad_norm": 0.47138383984565735, "learning_rate": 5.606446569413118e-06, "loss": 0.3677, "step": 30861 }, { "epoch": 1.4162727731632325, "grad_norm": 0.48738282918930054, "learning_rate": 5.606203190950667e-06, "loss": 0.4039, "step": 30862 }, { "epoch": 1.416318663668487, "grad_norm": 0.4584967792034149, "learning_rate": 5.605959811030487e-06, "loss": 0.3289, "step": 30863 }, { "epoch": 1.4163645541737415, "grad_norm": 0.4889296293258667, "learning_rate": 5.605716429653166e-06, "loss": 0.3874, "step": 30864 }, { "epoch": 1.4164104446789958, "grad_norm": 0.47574689984321594, "learning_rate": 5.605473046819287e-06, "loss": 0.2948, "step": 30865 }, { "epoch": 1.4164563351842503, "grad_norm": 0.4602375328540802, "learning_rate": 5.6052296625294375e-06, "loss": 0.2953, "step": 30866 }, { "epoch": 1.4165022256895048, "grad_norm": 0.44930317997932434, "learning_rate": 5.604986276784201e-06, "loss": 0.362, "step": 30867 }, { "epoch": 1.4165481161947593, "grad_norm": 0.46380865573883057, "learning_rate": 5.604742889584164e-06, "loss": 0.3139, "step": 30868 }, { "epoch": 1.4165940067000138, "grad_norm": 0.49410802125930786, "learning_rate": 5.60449950092991e-06, "loss": 0.4068, "step": 30869 }, { "epoch": 1.4166398972052683, "grad_norm": 0.49883976578712463, "learning_rate": 5.604256110822027e-06, "loss": 0.3873, "step": 30870 }, { "epoch": 1.4166857877105228, "grad_norm": 0.47129490971565247, "learning_rate": 5.604012719261098e-06, "loss": 0.3455, "step": 30871 }, { "epoch": 1.4167316782157773, "grad_norm": 0.45425158739089966, "learning_rate": 5.603769326247709e-06, "loss": 0.3247, "step": 30872 }, { "epoch": 1.4167775687210316, "grad_norm": 0.5123706459999084, "learning_rate": 5.603525931782445e-06, "loss": 0.3211, "step": 30873 }, { "epoch": 1.416823459226286, "grad_norm": 0.68637615442276, "learning_rate": 5.603282535865891e-06, "loss": 0.4701, "step": 30874 }, { "epoch": 1.4168693497315406, "grad_norm": 0.4929622411727905, "learning_rate": 5.603039138498635e-06, "loss": 0.3703, "step": 30875 }, { "epoch": 1.416915240236795, "grad_norm": 0.49409058690071106, "learning_rate": 5.602795739681259e-06, "loss": 0.3932, "step": 30876 }, { "epoch": 1.4169611307420495, "grad_norm": 0.4800538122653961, "learning_rate": 5.60255233941435e-06, "loss": 0.3435, "step": 30877 }, { "epoch": 1.4170070212473038, "grad_norm": 0.47643357515335083, "learning_rate": 5.602308937698492e-06, "loss": 0.3738, "step": 30878 }, { "epoch": 1.4170529117525583, "grad_norm": 0.5361646413803101, "learning_rate": 5.602065534534273e-06, "loss": 0.4618, "step": 30879 }, { "epoch": 1.4170988022578128, "grad_norm": 0.5524381399154663, "learning_rate": 5.601822129922273e-06, "loss": 0.4279, "step": 30880 }, { "epoch": 1.4171446927630673, "grad_norm": 0.5012064576148987, "learning_rate": 5.601578723863085e-06, "loss": 0.3724, "step": 30881 }, { "epoch": 1.4171905832683218, "grad_norm": 0.49646779894828796, "learning_rate": 5.601335316357286e-06, "loss": 0.3688, "step": 30882 }, { "epoch": 1.4172364737735763, "grad_norm": 0.46183857321739197, "learning_rate": 5.601091907405467e-06, "loss": 0.3285, "step": 30883 }, { "epoch": 1.4172823642788308, "grad_norm": 0.4562946856021881, "learning_rate": 5.600848497008212e-06, "loss": 0.3202, "step": 30884 }, { "epoch": 1.4173282547840853, "grad_norm": 0.5227342844009399, "learning_rate": 5.600605085166106e-06, "loss": 0.4564, "step": 30885 }, { "epoch": 1.4173741452893396, "grad_norm": 0.45754769444465637, "learning_rate": 5.600361671879734e-06, "loss": 0.316, "step": 30886 }, { "epoch": 1.417420035794594, "grad_norm": 0.5234241485595703, "learning_rate": 5.600118257149681e-06, "loss": 0.4744, "step": 30887 }, { "epoch": 1.4174659262998486, "grad_norm": 0.4899563491344452, "learning_rate": 5.599874840976534e-06, "loss": 0.383, "step": 30888 }, { "epoch": 1.417511816805103, "grad_norm": 0.449245423078537, "learning_rate": 5.5996314233608766e-06, "loss": 0.339, "step": 30889 }, { "epoch": 1.4175577073103574, "grad_norm": 0.4816667139530182, "learning_rate": 5.599388004303296e-06, "loss": 0.3604, "step": 30890 }, { "epoch": 1.4176035978156118, "grad_norm": 0.46153369545936584, "learning_rate": 5.599144583804375e-06, "loss": 0.3398, "step": 30891 }, { "epoch": 1.4176494883208663, "grad_norm": 0.5026823878288269, "learning_rate": 5.598901161864701e-06, "loss": 0.4169, "step": 30892 }, { "epoch": 1.4176953788261208, "grad_norm": 0.5115267038345337, "learning_rate": 5.5986577384848595e-06, "loss": 0.4208, "step": 30893 }, { "epoch": 1.4177412693313753, "grad_norm": 0.49240148067474365, "learning_rate": 5.5984143136654335e-06, "loss": 0.3377, "step": 30894 }, { "epoch": 1.4177871598366298, "grad_norm": 0.43318167328834534, "learning_rate": 5.59817088740701e-06, "loss": 0.3134, "step": 30895 }, { "epoch": 1.4178330503418843, "grad_norm": 0.46684059500694275, "learning_rate": 5.597927459710176e-06, "loss": 0.35, "step": 30896 }, { "epoch": 1.4178789408471388, "grad_norm": 0.4637692868709564, "learning_rate": 5.597684030575515e-06, "loss": 0.4069, "step": 30897 }, { "epoch": 1.4179248313523931, "grad_norm": 0.4730170667171478, "learning_rate": 5.597440600003612e-06, "loss": 0.3423, "step": 30898 }, { "epoch": 1.4179707218576476, "grad_norm": 0.4782581925392151, "learning_rate": 5.597197167995052e-06, "loss": 0.3814, "step": 30899 }, { "epoch": 1.418016612362902, "grad_norm": 0.5052204132080078, "learning_rate": 5.596953734550422e-06, "loss": 0.4053, "step": 30900 }, { "epoch": 1.4180625028681566, "grad_norm": 0.5245323777198792, "learning_rate": 5.5967102996703075e-06, "loss": 0.4144, "step": 30901 }, { "epoch": 1.418108393373411, "grad_norm": 0.508316159248352, "learning_rate": 5.596466863355291e-06, "loss": 0.3997, "step": 30902 }, { "epoch": 1.4181542838786654, "grad_norm": 0.47295576333999634, "learning_rate": 5.596223425605962e-06, "loss": 0.349, "step": 30903 }, { "epoch": 1.4182001743839199, "grad_norm": 0.4769926965236664, "learning_rate": 5.5959799864229035e-06, "loss": 0.3725, "step": 30904 }, { "epoch": 1.4182460648891744, "grad_norm": 0.5032974481582642, "learning_rate": 5.5957365458067e-06, "loss": 0.4556, "step": 30905 }, { "epoch": 1.4182919553944289, "grad_norm": 0.45182308554649353, "learning_rate": 5.595493103757939e-06, "loss": 0.3608, "step": 30906 }, { "epoch": 1.4183378458996834, "grad_norm": 0.4422386586666107, "learning_rate": 5.595249660277205e-06, "loss": 0.3116, "step": 30907 }, { "epoch": 1.4183837364049379, "grad_norm": 0.44333624839782715, "learning_rate": 5.595006215365084e-06, "loss": 0.3214, "step": 30908 }, { "epoch": 1.4184296269101924, "grad_norm": 0.47620120644569397, "learning_rate": 5.594762769022159e-06, "loss": 0.3668, "step": 30909 }, { "epoch": 1.4184755174154469, "grad_norm": 0.44359755516052246, "learning_rate": 5.594519321249019e-06, "loss": 0.3179, "step": 30910 }, { "epoch": 1.4185214079207011, "grad_norm": 0.4887865483760834, "learning_rate": 5.594275872046247e-06, "loss": 0.3646, "step": 30911 }, { "epoch": 1.4185672984259556, "grad_norm": 0.5266855955123901, "learning_rate": 5.5940324214144286e-06, "loss": 0.3124, "step": 30912 }, { "epoch": 1.4186131889312101, "grad_norm": 0.47665849328041077, "learning_rate": 5.59378896935415e-06, "loss": 0.3569, "step": 30913 }, { "epoch": 1.4186590794364646, "grad_norm": 0.4883550703525543, "learning_rate": 5.593545515865998e-06, "loss": 0.3918, "step": 30914 }, { "epoch": 1.4187049699417191, "grad_norm": 0.4549596309661865, "learning_rate": 5.593302060950555e-06, "loss": 0.2765, "step": 30915 }, { "epoch": 1.4187508604469734, "grad_norm": 0.4398154318332672, "learning_rate": 5.593058604608408e-06, "loss": 0.3291, "step": 30916 }, { "epoch": 1.418796750952228, "grad_norm": 0.5812874436378479, "learning_rate": 5.592815146840141e-06, "loss": 0.5129, "step": 30917 }, { "epoch": 1.4188426414574824, "grad_norm": 0.4330728054046631, "learning_rate": 5.592571687646342e-06, "loss": 0.3089, "step": 30918 }, { "epoch": 1.418888531962737, "grad_norm": 0.46022188663482666, "learning_rate": 5.592328227027596e-06, "loss": 0.2851, "step": 30919 }, { "epoch": 1.4189344224679914, "grad_norm": 0.4766254425048828, "learning_rate": 5.592084764984486e-06, "loss": 0.34, "step": 30920 }, { "epoch": 1.418980312973246, "grad_norm": 0.48329663276672363, "learning_rate": 5.5918413015176e-06, "loss": 0.3755, "step": 30921 }, { "epoch": 1.4190262034785004, "grad_norm": 0.42892536520957947, "learning_rate": 5.59159783662752e-06, "loss": 0.287, "step": 30922 }, { "epoch": 1.419072093983755, "grad_norm": 0.5095816850662231, "learning_rate": 5.591354370314835e-06, "loss": 0.3925, "step": 30923 }, { "epoch": 1.4191179844890092, "grad_norm": 0.5774825811386108, "learning_rate": 5.591110902580132e-06, "loss": 0.4593, "step": 30924 }, { "epoch": 1.4191638749942637, "grad_norm": 0.47800755500793457, "learning_rate": 5.590867433423992e-06, "loss": 0.382, "step": 30925 }, { "epoch": 1.4192097654995182, "grad_norm": 0.5057810544967651, "learning_rate": 5.590623962847001e-06, "loss": 0.3699, "step": 30926 }, { "epoch": 1.4192556560047727, "grad_norm": 0.4692411422729492, "learning_rate": 5.5903804908497486e-06, "loss": 0.3623, "step": 30927 }, { "epoch": 1.4193015465100272, "grad_norm": 0.43551987409591675, "learning_rate": 5.590137017432815e-06, "loss": 0.314, "step": 30928 }, { "epoch": 1.4193474370152814, "grad_norm": 0.48797404766082764, "learning_rate": 5.589893542596789e-06, "loss": 0.4336, "step": 30929 }, { "epoch": 1.419393327520536, "grad_norm": 0.4378354549407959, "learning_rate": 5.589650066342256e-06, "loss": 0.3055, "step": 30930 }, { "epoch": 1.4194392180257904, "grad_norm": 0.47631052136421204, "learning_rate": 5.5894065886698e-06, "loss": 0.3635, "step": 30931 }, { "epoch": 1.419485108531045, "grad_norm": 0.47139057517051697, "learning_rate": 5.589163109580007e-06, "loss": 0.3739, "step": 30932 }, { "epoch": 1.4195309990362994, "grad_norm": 0.44874078035354614, "learning_rate": 5.588919629073462e-06, "loss": 0.3272, "step": 30933 }, { "epoch": 1.419576889541554, "grad_norm": 0.4764311611652374, "learning_rate": 5.588676147150752e-06, "loss": 0.3375, "step": 30934 }, { "epoch": 1.4196227800468084, "grad_norm": 0.45631274580955505, "learning_rate": 5.588432663812462e-06, "loss": 0.3332, "step": 30935 }, { "epoch": 1.419668670552063, "grad_norm": 0.456581711769104, "learning_rate": 5.588189179059177e-06, "loss": 0.3428, "step": 30936 }, { "epoch": 1.4197145610573172, "grad_norm": 0.46663832664489746, "learning_rate": 5.587945692891482e-06, "loss": 0.3339, "step": 30937 }, { "epoch": 1.4197604515625717, "grad_norm": 0.4635070264339447, "learning_rate": 5.587702205309964e-06, "loss": 0.3667, "step": 30938 }, { "epoch": 1.4198063420678262, "grad_norm": 0.4456779360771179, "learning_rate": 5.587458716315207e-06, "loss": 0.3137, "step": 30939 }, { "epoch": 1.4198522325730807, "grad_norm": 0.4748457670211792, "learning_rate": 5.587215225907798e-06, "loss": 0.3526, "step": 30940 }, { "epoch": 1.419898123078335, "grad_norm": 0.4397049844264984, "learning_rate": 5.586971734088321e-06, "loss": 0.2946, "step": 30941 }, { "epoch": 1.4199440135835895, "grad_norm": 0.5057998895645142, "learning_rate": 5.586728240857362e-06, "loss": 0.3774, "step": 30942 }, { "epoch": 1.419989904088844, "grad_norm": 0.4631330668926239, "learning_rate": 5.586484746215507e-06, "loss": 0.3484, "step": 30943 }, { "epoch": 1.4200357945940985, "grad_norm": 0.4738018810749054, "learning_rate": 5.586241250163342e-06, "loss": 0.3793, "step": 30944 }, { "epoch": 1.420081685099353, "grad_norm": 0.4386574923992157, "learning_rate": 5.585997752701451e-06, "loss": 0.3163, "step": 30945 }, { "epoch": 1.4201275756046075, "grad_norm": 0.4675652086734772, "learning_rate": 5.585754253830421e-06, "loss": 0.3708, "step": 30946 }, { "epoch": 1.420173466109862, "grad_norm": 0.45826563239097595, "learning_rate": 5.585510753550837e-06, "loss": 0.3425, "step": 30947 }, { "epoch": 1.4202193566151164, "grad_norm": 0.3999793529510498, "learning_rate": 5.585267251863284e-06, "loss": 0.2985, "step": 30948 }, { "epoch": 1.4202652471203707, "grad_norm": 0.4877398610115051, "learning_rate": 5.585023748768348e-06, "loss": 0.349, "step": 30949 }, { "epoch": 1.4203111376256252, "grad_norm": 0.48880425095558167, "learning_rate": 5.584780244266613e-06, "loss": 0.3755, "step": 30950 }, { "epoch": 1.4203570281308797, "grad_norm": 0.47869059443473816, "learning_rate": 5.584536738358668e-06, "loss": 0.3465, "step": 30951 }, { "epoch": 1.4204029186361342, "grad_norm": 0.5100029110908508, "learning_rate": 5.584293231045097e-06, "loss": 0.3831, "step": 30952 }, { "epoch": 1.4204488091413887, "grad_norm": 0.46094179153442383, "learning_rate": 5.584049722326484e-06, "loss": 0.334, "step": 30953 }, { "epoch": 1.420494699646643, "grad_norm": 0.47298967838287354, "learning_rate": 5.583806212203416e-06, "loss": 0.3424, "step": 30954 }, { "epoch": 1.4205405901518975, "grad_norm": 0.4833521842956543, "learning_rate": 5.583562700676478e-06, "loss": 0.3494, "step": 30955 }, { "epoch": 1.420586480657152, "grad_norm": 0.440070778131485, "learning_rate": 5.583319187746255e-06, "loss": 0.336, "step": 30956 }, { "epoch": 1.4206323711624065, "grad_norm": 0.5047367811203003, "learning_rate": 5.583075673413336e-06, "loss": 0.3632, "step": 30957 }, { "epoch": 1.420678261667661, "grad_norm": 0.47020480036735535, "learning_rate": 5.582832157678303e-06, "loss": 0.347, "step": 30958 }, { "epoch": 1.4207241521729155, "grad_norm": 0.48552921414375305, "learning_rate": 5.582588640541742e-06, "loss": 0.3683, "step": 30959 }, { "epoch": 1.42077004267817, "grad_norm": 0.48459237813949585, "learning_rate": 5.582345122004239e-06, "loss": 0.4275, "step": 30960 }, { "epoch": 1.4208159331834245, "grad_norm": 0.46578142046928406, "learning_rate": 5.582101602066381e-06, "loss": 0.3575, "step": 30961 }, { "epoch": 1.4208618236886787, "grad_norm": 0.48758113384246826, "learning_rate": 5.581858080728752e-06, "loss": 0.3432, "step": 30962 }, { "epoch": 1.4209077141939332, "grad_norm": 0.4580640494823456, "learning_rate": 5.581614557991938e-06, "loss": 0.3834, "step": 30963 }, { "epoch": 1.4209536046991877, "grad_norm": 0.4301185607910156, "learning_rate": 5.581371033856524e-06, "loss": 0.2891, "step": 30964 }, { "epoch": 1.4209994952044422, "grad_norm": 0.4643731415271759, "learning_rate": 5.581127508323096e-06, "loss": 0.3061, "step": 30965 }, { "epoch": 1.4210453857096967, "grad_norm": 0.46613380312919617, "learning_rate": 5.58088398139224e-06, "loss": 0.3435, "step": 30966 }, { "epoch": 1.421091276214951, "grad_norm": 0.46103614568710327, "learning_rate": 5.580640453064541e-06, "loss": 0.3541, "step": 30967 }, { "epoch": 1.4211371667202055, "grad_norm": 0.4556327760219574, "learning_rate": 5.580396923340586e-06, "loss": 0.3281, "step": 30968 }, { "epoch": 1.42118305722546, "grad_norm": 0.4736635982990265, "learning_rate": 5.580153392220959e-06, "loss": 0.3556, "step": 30969 }, { "epoch": 1.4212289477307145, "grad_norm": 0.46170416474342346, "learning_rate": 5.579909859706246e-06, "loss": 0.3558, "step": 30970 }, { "epoch": 1.421274838235969, "grad_norm": 0.507839560508728, "learning_rate": 5.579666325797033e-06, "loss": 0.3766, "step": 30971 }, { "epoch": 1.4213207287412235, "grad_norm": 0.4694567322731018, "learning_rate": 5.579422790493906e-06, "loss": 0.3368, "step": 30972 }, { "epoch": 1.421366619246478, "grad_norm": 0.44808900356292725, "learning_rate": 5.57917925379745e-06, "loss": 0.3117, "step": 30973 }, { "epoch": 1.4214125097517325, "grad_norm": 0.48612546920776367, "learning_rate": 5.578935715708249e-06, "loss": 0.3724, "step": 30974 }, { "epoch": 1.4214584002569868, "grad_norm": 0.4376981854438782, "learning_rate": 5.578692176226892e-06, "loss": 0.2955, "step": 30975 }, { "epoch": 1.4215042907622413, "grad_norm": 0.44477537274360657, "learning_rate": 5.578448635353962e-06, "loss": 0.3214, "step": 30976 }, { "epoch": 1.4215501812674958, "grad_norm": 0.4642540216445923, "learning_rate": 5.5782050930900455e-06, "loss": 0.3536, "step": 30977 }, { "epoch": 1.4215960717727503, "grad_norm": 0.4852762520313263, "learning_rate": 5.577961549435729e-06, "loss": 0.372, "step": 30978 }, { "epoch": 1.4216419622780045, "grad_norm": 0.44843393564224243, "learning_rate": 5.577718004391598e-06, "loss": 0.3458, "step": 30979 }, { "epoch": 1.421687852783259, "grad_norm": 0.46489760279655457, "learning_rate": 5.577474457958238e-06, "loss": 0.4139, "step": 30980 }, { "epoch": 1.4217337432885135, "grad_norm": 0.48675012588500977, "learning_rate": 5.577230910136231e-06, "loss": 0.4099, "step": 30981 }, { "epoch": 1.421779633793768, "grad_norm": 0.4463357627391815, "learning_rate": 5.576987360926167e-06, "loss": 0.3154, "step": 30982 }, { "epoch": 1.4218255242990225, "grad_norm": 0.4637179374694824, "learning_rate": 5.576743810328632e-06, "loss": 0.3538, "step": 30983 }, { "epoch": 1.421871414804277, "grad_norm": 0.4994390606880188, "learning_rate": 5.576500258344211e-06, "loss": 0.4532, "step": 30984 }, { "epoch": 1.4219173053095315, "grad_norm": 0.43484872579574585, "learning_rate": 5.5762567049734864e-06, "loss": 0.2709, "step": 30985 }, { "epoch": 1.421963195814786, "grad_norm": 0.45192837715148926, "learning_rate": 5.576013150217046e-06, "loss": 0.3246, "step": 30986 }, { "epoch": 1.4220090863200403, "grad_norm": 0.48731496930122375, "learning_rate": 5.575769594075477e-06, "loss": 0.4002, "step": 30987 }, { "epoch": 1.4220549768252948, "grad_norm": 0.45242056250572205, "learning_rate": 5.575526036549364e-06, "loss": 0.3529, "step": 30988 }, { "epoch": 1.4221008673305493, "grad_norm": 0.4376673400402069, "learning_rate": 5.575282477639292e-06, "loss": 0.2943, "step": 30989 }, { "epoch": 1.4221467578358038, "grad_norm": 0.4633028209209442, "learning_rate": 5.575038917345848e-06, "loss": 0.356, "step": 30990 }, { "epoch": 1.4221926483410583, "grad_norm": 0.46812039613723755, "learning_rate": 5.574795355669616e-06, "loss": 0.3322, "step": 30991 }, { "epoch": 1.4222385388463126, "grad_norm": 0.44561320543289185, "learning_rate": 5.574551792611184e-06, "loss": 0.3323, "step": 30992 }, { "epoch": 1.422284429351567, "grad_norm": 0.4743368923664093, "learning_rate": 5.574308228171134e-06, "loss": 0.3734, "step": 30993 }, { "epoch": 1.4223303198568216, "grad_norm": 0.4766879975795746, "learning_rate": 5.574064662350055e-06, "loss": 0.3529, "step": 30994 }, { "epoch": 1.422376210362076, "grad_norm": 0.4752628803253174, "learning_rate": 5.573821095148533e-06, "loss": 0.3583, "step": 30995 }, { "epoch": 1.4224221008673306, "grad_norm": 0.4821039140224457, "learning_rate": 5.573577526567151e-06, "loss": 0.403, "step": 30996 }, { "epoch": 1.422467991372585, "grad_norm": 0.4664444327354431, "learning_rate": 5.573333956606497e-06, "loss": 0.3419, "step": 30997 }, { "epoch": 1.4225138818778396, "grad_norm": 0.4838399291038513, "learning_rate": 5.5730903852671536e-06, "loss": 0.3796, "step": 30998 }, { "epoch": 1.422559772383094, "grad_norm": 0.48404639959335327, "learning_rate": 5.57284681254971e-06, "loss": 0.3687, "step": 30999 }, { "epoch": 1.4226056628883483, "grad_norm": 0.5291709899902344, "learning_rate": 5.5726032384547516e-06, "loss": 0.4816, "step": 31000 }, { "epoch": 1.4226515533936028, "grad_norm": 0.490590900182724, "learning_rate": 5.572359662982862e-06, "loss": 0.3345, "step": 31001 }, { "epoch": 1.4226974438988573, "grad_norm": 0.48723292350769043, "learning_rate": 5.572116086134628e-06, "loss": 0.4069, "step": 31002 }, { "epoch": 1.4227433344041118, "grad_norm": 0.44026732444763184, "learning_rate": 5.571872507910636e-06, "loss": 0.2971, "step": 31003 }, { "epoch": 1.4227892249093663, "grad_norm": 0.5076603293418884, "learning_rate": 5.571628928311471e-06, "loss": 0.4092, "step": 31004 }, { "epoch": 1.4228351154146206, "grad_norm": 0.49760347604751587, "learning_rate": 5.571385347337718e-06, "loss": 0.4052, "step": 31005 }, { "epoch": 1.422881005919875, "grad_norm": 0.46346408128738403, "learning_rate": 5.571141764989966e-06, "loss": 0.3899, "step": 31006 }, { "epoch": 1.4229268964251296, "grad_norm": 0.465949147939682, "learning_rate": 5.570898181268795e-06, "loss": 0.3499, "step": 31007 }, { "epoch": 1.422972786930384, "grad_norm": 0.44134849309921265, "learning_rate": 5.570654596174796e-06, "loss": 0.3274, "step": 31008 }, { "epoch": 1.4230186774356386, "grad_norm": 0.48117873072624207, "learning_rate": 5.570411009708553e-06, "loss": 0.3799, "step": 31009 }, { "epoch": 1.423064567940893, "grad_norm": 0.4314451813697815, "learning_rate": 5.570167421870651e-06, "loss": 0.3018, "step": 31010 }, { "epoch": 1.4231104584461476, "grad_norm": 0.45402801036834717, "learning_rate": 5.569923832661676e-06, "loss": 0.3188, "step": 31011 }, { "epoch": 1.423156348951402, "grad_norm": 0.47225436568260193, "learning_rate": 5.569680242082215e-06, "loss": 0.3711, "step": 31012 }, { "epoch": 1.4232022394566564, "grad_norm": 0.4351010024547577, "learning_rate": 5.569436650132851e-06, "loss": 0.3159, "step": 31013 }, { "epoch": 1.4232481299619109, "grad_norm": 0.4244071841239929, "learning_rate": 5.569193056814174e-06, "loss": 0.2713, "step": 31014 }, { "epoch": 1.4232940204671654, "grad_norm": 0.4773484170436859, "learning_rate": 5.568949462126765e-06, "loss": 0.3774, "step": 31015 }, { "epoch": 1.4233399109724199, "grad_norm": 0.4773140251636505, "learning_rate": 5.568705866071212e-06, "loss": 0.3796, "step": 31016 }, { "epoch": 1.4233858014776744, "grad_norm": 0.458549439907074, "learning_rate": 5.568462268648104e-06, "loss": 0.3221, "step": 31017 }, { "epoch": 1.4234316919829286, "grad_norm": 0.4530276358127594, "learning_rate": 5.568218669858021e-06, "loss": 0.3012, "step": 31018 }, { "epoch": 1.4234775824881831, "grad_norm": 0.47206175327301025, "learning_rate": 5.5679750697015525e-06, "loss": 0.3511, "step": 31019 }, { "epoch": 1.4235234729934376, "grad_norm": 0.4554614722728729, "learning_rate": 5.5677314681792824e-06, "loss": 0.3371, "step": 31020 }, { "epoch": 1.4235693634986921, "grad_norm": 0.5060295462608337, "learning_rate": 5.567487865291798e-06, "loss": 0.3804, "step": 31021 }, { "epoch": 1.4236152540039466, "grad_norm": 0.4525175392627716, "learning_rate": 5.567244261039684e-06, "loss": 0.3095, "step": 31022 }, { "epoch": 1.4236611445092011, "grad_norm": 0.47621452808380127, "learning_rate": 5.567000655423528e-06, "loss": 0.3213, "step": 31023 }, { "epoch": 1.4237070350144556, "grad_norm": 0.5312054753303528, "learning_rate": 5.566757048443911e-06, "loss": 0.4759, "step": 31024 }, { "epoch": 1.4237529255197099, "grad_norm": 0.4629071056842804, "learning_rate": 5.566513440101424e-06, "loss": 0.3148, "step": 31025 }, { "epoch": 1.4237988160249644, "grad_norm": 0.4769214689731598, "learning_rate": 5.566269830396652e-06, "loss": 0.315, "step": 31026 }, { "epoch": 1.4238447065302189, "grad_norm": 0.4603554904460907, "learning_rate": 5.566026219330179e-06, "loss": 0.3008, "step": 31027 }, { "epoch": 1.4238905970354734, "grad_norm": 0.47094807028770447, "learning_rate": 5.565782606902593e-06, "loss": 0.3839, "step": 31028 }, { "epoch": 1.4239364875407279, "grad_norm": 0.47544899582862854, "learning_rate": 5.565538993114476e-06, "loss": 0.3589, "step": 31029 }, { "epoch": 1.4239823780459822, "grad_norm": 0.44159430265426636, "learning_rate": 5.5652953779664175e-06, "loss": 0.3506, "step": 31030 }, { "epoch": 1.4240282685512367, "grad_norm": 0.49555739760398865, "learning_rate": 5.565051761459001e-06, "loss": 0.4314, "step": 31031 }, { "epoch": 1.4240741590564912, "grad_norm": 0.49806538224220276, "learning_rate": 5.564808143592815e-06, "loss": 0.3826, "step": 31032 }, { "epoch": 1.4241200495617456, "grad_norm": 0.6368608474731445, "learning_rate": 5.564564524368443e-06, "loss": 0.4214, "step": 31033 }, { "epoch": 1.4241659400670001, "grad_norm": 0.4357253909111023, "learning_rate": 5.564320903786471e-06, "loss": 0.2951, "step": 31034 }, { "epoch": 1.4242118305722546, "grad_norm": 0.4655635356903076, "learning_rate": 5.564077281847485e-06, "loss": 0.3427, "step": 31035 }, { "epoch": 1.4242577210775091, "grad_norm": 0.4947997033596039, "learning_rate": 5.563833658552071e-06, "loss": 0.3758, "step": 31036 }, { "epoch": 1.4243036115827636, "grad_norm": 0.43814054131507874, "learning_rate": 5.563590033900816e-06, "loss": 0.3188, "step": 31037 }, { "epoch": 1.424349502088018, "grad_norm": 0.4973623752593994, "learning_rate": 5.563346407894304e-06, "loss": 0.4133, "step": 31038 }, { "epoch": 1.4243953925932724, "grad_norm": 0.46660125255584717, "learning_rate": 5.563102780533124e-06, "loss": 0.3931, "step": 31039 }, { "epoch": 1.424441283098527, "grad_norm": 0.4456760287284851, "learning_rate": 5.562859151817858e-06, "loss": 0.309, "step": 31040 }, { "epoch": 1.4244871736037814, "grad_norm": 0.4747001826763153, "learning_rate": 5.562615521749093e-06, "loss": 0.3349, "step": 31041 }, { "epoch": 1.424533064109036, "grad_norm": 0.4795725643634796, "learning_rate": 5.562371890327415e-06, "loss": 0.3747, "step": 31042 }, { "epoch": 1.4245789546142902, "grad_norm": 0.46141675114631653, "learning_rate": 5.562128257553411e-06, "loss": 0.3332, "step": 31043 }, { "epoch": 1.4246248451195447, "grad_norm": 0.4970603287220001, "learning_rate": 5.561884623427666e-06, "loss": 0.3582, "step": 31044 }, { "epoch": 1.4246707356247992, "grad_norm": 0.46043330430984497, "learning_rate": 5.561640987950765e-06, "loss": 0.3223, "step": 31045 }, { "epoch": 1.4247166261300537, "grad_norm": 0.46806102991104126, "learning_rate": 5.561397351123294e-06, "loss": 0.3244, "step": 31046 }, { "epoch": 1.4247625166353082, "grad_norm": 0.47579801082611084, "learning_rate": 5.56115371294584e-06, "loss": 0.4038, "step": 31047 }, { "epoch": 1.4248084071405627, "grad_norm": 0.48727986216545105, "learning_rate": 5.56091007341899e-06, "loss": 0.4001, "step": 31048 }, { "epoch": 1.4248542976458172, "grad_norm": 0.4433022439479828, "learning_rate": 5.560666432543327e-06, "loss": 0.3263, "step": 31049 }, { "epoch": 1.4249001881510717, "grad_norm": 0.4921528995037079, "learning_rate": 5.560422790319438e-06, "loss": 0.4102, "step": 31050 }, { "epoch": 1.424946078656326, "grad_norm": 0.47830930352211, "learning_rate": 5.560179146747911e-06, "loss": 0.3792, "step": 31051 }, { "epoch": 1.4249919691615804, "grad_norm": 0.48027366399765015, "learning_rate": 5.559935501829328e-06, "loss": 0.3962, "step": 31052 }, { "epoch": 1.425037859666835, "grad_norm": 0.4601075053215027, "learning_rate": 5.5596918555642765e-06, "loss": 0.3656, "step": 31053 }, { "epoch": 1.4250837501720894, "grad_norm": 0.482686847448349, "learning_rate": 5.559448207953344e-06, "loss": 0.3989, "step": 31054 }, { "epoch": 1.425129640677344, "grad_norm": 0.5216338634490967, "learning_rate": 5.559204558997115e-06, "loss": 0.4139, "step": 31055 }, { "epoch": 1.4251755311825982, "grad_norm": 0.5063871741294861, "learning_rate": 5.558960908696175e-06, "loss": 0.4114, "step": 31056 }, { "epoch": 1.4252214216878527, "grad_norm": 0.46411412954330444, "learning_rate": 5.558717257051111e-06, "loss": 0.3285, "step": 31057 }, { "epoch": 1.4252673121931072, "grad_norm": 0.4624720811843872, "learning_rate": 5.558473604062507e-06, "loss": 0.3206, "step": 31058 }, { "epoch": 1.4253132026983617, "grad_norm": 0.46868404746055603, "learning_rate": 5.558229949730951e-06, "loss": 0.3501, "step": 31059 }, { "epoch": 1.4253590932036162, "grad_norm": 0.49551478028297424, "learning_rate": 5.557986294057029e-06, "loss": 0.3578, "step": 31060 }, { "epoch": 1.4254049837088707, "grad_norm": 0.48992183804512024, "learning_rate": 5.557742637041326e-06, "loss": 0.4218, "step": 31061 }, { "epoch": 1.4254508742141252, "grad_norm": 0.46859657764434814, "learning_rate": 5.557498978684428e-06, "loss": 0.3568, "step": 31062 }, { "epoch": 1.4254967647193797, "grad_norm": 0.3955368101596832, "learning_rate": 5.55725531898692e-06, "loss": 0.2502, "step": 31063 }, { "epoch": 1.425542655224634, "grad_norm": 0.49963104724884033, "learning_rate": 5.557011657949388e-06, "loss": 0.4049, "step": 31064 }, { "epoch": 1.4255885457298885, "grad_norm": 0.45654359459877014, "learning_rate": 5.5567679955724206e-06, "loss": 0.3348, "step": 31065 }, { "epoch": 1.425634436235143, "grad_norm": 0.4742346405982971, "learning_rate": 5.5565243318566005e-06, "loss": 0.372, "step": 31066 }, { "epoch": 1.4256803267403975, "grad_norm": 0.48425766825675964, "learning_rate": 5.556280666802516e-06, "loss": 0.4301, "step": 31067 }, { "epoch": 1.4257262172456517, "grad_norm": 0.5147684812545776, "learning_rate": 5.556037000410752e-06, "loss": 0.3932, "step": 31068 }, { "epoch": 1.4257721077509062, "grad_norm": 0.4624393582344055, "learning_rate": 5.555793332681894e-06, "loss": 0.3342, "step": 31069 }, { "epoch": 1.4258179982561607, "grad_norm": 0.501838743686676, "learning_rate": 5.555549663616529e-06, "loss": 0.3893, "step": 31070 }, { "epoch": 1.4258638887614152, "grad_norm": 0.4835980534553528, "learning_rate": 5.555305993215242e-06, "loss": 0.3741, "step": 31071 }, { "epoch": 1.4259097792666697, "grad_norm": 0.47363075613975525, "learning_rate": 5.555062321478621e-06, "loss": 0.3694, "step": 31072 }, { "epoch": 1.4259556697719242, "grad_norm": 0.4904089868068695, "learning_rate": 5.554818648407247e-06, "loss": 0.3685, "step": 31073 }, { "epoch": 1.4260015602771787, "grad_norm": 0.4854147136211395, "learning_rate": 5.554574974001712e-06, "loss": 0.4037, "step": 31074 }, { "epoch": 1.4260474507824332, "grad_norm": 0.49199244379997253, "learning_rate": 5.554331298262597e-06, "loss": 0.3741, "step": 31075 }, { "epoch": 1.4260933412876875, "grad_norm": 0.4391774535179138, "learning_rate": 5.554087621190492e-06, "loss": 0.3082, "step": 31076 }, { "epoch": 1.426139231792942, "grad_norm": 0.4108867645263672, "learning_rate": 5.553843942785981e-06, "loss": 0.2799, "step": 31077 }, { "epoch": 1.4261851222981965, "grad_norm": 0.4756261110305786, "learning_rate": 5.55360026304965e-06, "loss": 0.3414, "step": 31078 }, { "epoch": 1.426231012803451, "grad_norm": 0.5060372352600098, "learning_rate": 5.553356581982085e-06, "loss": 0.4111, "step": 31079 }, { "epoch": 1.4262769033087055, "grad_norm": 0.48658743500709534, "learning_rate": 5.553112899583871e-06, "loss": 0.3694, "step": 31080 }, { "epoch": 1.4263227938139598, "grad_norm": 0.4890486001968384, "learning_rate": 5.552869215855595e-06, "loss": 0.383, "step": 31081 }, { "epoch": 1.4263686843192143, "grad_norm": 0.43703097105026245, "learning_rate": 5.552625530797845e-06, "loss": 0.2834, "step": 31082 }, { "epoch": 1.4264145748244688, "grad_norm": 0.43353942036628723, "learning_rate": 5.552381844411204e-06, "loss": 0.2827, "step": 31083 }, { "epoch": 1.4264604653297233, "grad_norm": 0.525862991809845, "learning_rate": 5.552138156696258e-06, "loss": 0.4573, "step": 31084 }, { "epoch": 1.4265063558349778, "grad_norm": 0.4623962938785553, "learning_rate": 5.551894467653596e-06, "loss": 0.3122, "step": 31085 }, { "epoch": 1.4265522463402323, "grad_norm": 0.41249749064445496, "learning_rate": 5.5516507772838e-06, "loss": 0.283, "step": 31086 }, { "epoch": 1.4265981368454868, "grad_norm": 0.44757911562919617, "learning_rate": 5.551407085587459e-06, "loss": 0.3486, "step": 31087 }, { "epoch": 1.4266440273507413, "grad_norm": 0.4539002776145935, "learning_rate": 5.551163392565159e-06, "loss": 0.285, "step": 31088 }, { "epoch": 1.4266899178559955, "grad_norm": 0.4802192449569702, "learning_rate": 5.550919698217483e-06, "loss": 0.3981, "step": 31089 }, { "epoch": 1.42673580836125, "grad_norm": 0.501649022102356, "learning_rate": 5.550676002545019e-06, "loss": 0.4167, "step": 31090 }, { "epoch": 1.4267816988665045, "grad_norm": 0.47513073682785034, "learning_rate": 5.550432305548355e-06, "loss": 0.3622, "step": 31091 }, { "epoch": 1.426827589371759, "grad_norm": 0.48464372754096985, "learning_rate": 5.5501886072280735e-06, "loss": 0.357, "step": 31092 }, { "epoch": 1.4268734798770135, "grad_norm": 0.4413989782333374, "learning_rate": 5.549944907584763e-06, "loss": 0.3072, "step": 31093 }, { "epoch": 1.4269193703822678, "grad_norm": 0.46637436747550964, "learning_rate": 5.549701206619008e-06, "loss": 0.3735, "step": 31094 }, { "epoch": 1.4269652608875223, "grad_norm": 0.4858378767967224, "learning_rate": 5.549457504331395e-06, "loss": 0.4421, "step": 31095 }, { "epoch": 1.4270111513927768, "grad_norm": 0.4464746415615082, "learning_rate": 5.549213800722511e-06, "loss": 0.3459, "step": 31096 }, { "epoch": 1.4270570418980313, "grad_norm": 0.4108494222164154, "learning_rate": 5.548970095792939e-06, "loss": 0.2617, "step": 31097 }, { "epoch": 1.4271029324032858, "grad_norm": 0.4329231083393097, "learning_rate": 5.54872638954327e-06, "loss": 0.2919, "step": 31098 }, { "epoch": 1.4271488229085403, "grad_norm": 0.43960508704185486, "learning_rate": 5.548482681974086e-06, "loss": 0.3043, "step": 31099 }, { "epoch": 1.4271947134137948, "grad_norm": 0.45808500051498413, "learning_rate": 5.548238973085973e-06, "loss": 0.3257, "step": 31100 }, { "epoch": 1.4272406039190493, "grad_norm": 0.5050986409187317, "learning_rate": 5.547995262879518e-06, "loss": 0.4834, "step": 31101 }, { "epoch": 1.4272864944243036, "grad_norm": 0.4525831341743469, "learning_rate": 5.547751551355311e-06, "loss": 0.3665, "step": 31102 }, { "epoch": 1.427332384929558, "grad_norm": 0.48764780163764954, "learning_rate": 5.54750783851393e-06, "loss": 0.3894, "step": 31103 }, { "epoch": 1.4273782754348125, "grad_norm": 0.48833557963371277, "learning_rate": 5.547264124355969e-06, "loss": 0.3572, "step": 31104 }, { "epoch": 1.427424165940067, "grad_norm": 0.4694660007953644, "learning_rate": 5.547020408882009e-06, "loss": 0.3097, "step": 31105 }, { "epoch": 1.4274700564453215, "grad_norm": 0.436752587556839, "learning_rate": 5.546776692092637e-06, "loss": 0.2971, "step": 31106 }, { "epoch": 1.4275159469505758, "grad_norm": 0.47643283009529114, "learning_rate": 5.54653297398844e-06, "loss": 0.3513, "step": 31107 }, { "epoch": 1.4275618374558303, "grad_norm": 0.4848998486995697, "learning_rate": 5.546289254570004e-06, "loss": 0.4039, "step": 31108 }, { "epoch": 1.4276077279610848, "grad_norm": 0.5048577785491943, "learning_rate": 5.546045533837915e-06, "loss": 0.4136, "step": 31109 }, { "epoch": 1.4276536184663393, "grad_norm": 0.5138494968414307, "learning_rate": 5.5458018117927595e-06, "loss": 0.412, "step": 31110 }, { "epoch": 1.4276995089715938, "grad_norm": 0.49820584058761597, "learning_rate": 5.545558088435121e-06, "loss": 0.45, "step": 31111 }, { "epoch": 1.4277453994768483, "grad_norm": 0.46815353631973267, "learning_rate": 5.545314363765588e-06, "loss": 0.3617, "step": 31112 }, { "epoch": 1.4277912899821028, "grad_norm": 0.46897268295288086, "learning_rate": 5.545070637784746e-06, "loss": 0.3874, "step": 31113 }, { "epoch": 1.427837180487357, "grad_norm": 0.4410501718521118, "learning_rate": 5.544826910493182e-06, "loss": 0.2994, "step": 31114 }, { "epoch": 1.4278830709926116, "grad_norm": 0.45134487748146057, "learning_rate": 5.544583181891481e-06, "loss": 0.3156, "step": 31115 }, { "epoch": 1.427928961497866, "grad_norm": 0.5071738958358765, "learning_rate": 5.544339451980229e-06, "loss": 0.4318, "step": 31116 }, { "epoch": 1.4279748520031206, "grad_norm": 0.4897975027561188, "learning_rate": 5.544095720760012e-06, "loss": 0.3989, "step": 31117 }, { "epoch": 1.428020742508375, "grad_norm": 0.4546639621257782, "learning_rate": 5.5438519882314165e-06, "loss": 0.3189, "step": 31118 }, { "epoch": 1.4280666330136293, "grad_norm": 0.47649550437927246, "learning_rate": 5.54360825439503e-06, "loss": 0.3675, "step": 31119 }, { "epoch": 1.4281125235188838, "grad_norm": 0.45490825176239014, "learning_rate": 5.543364519251437e-06, "loss": 0.3352, "step": 31120 }, { "epoch": 1.4281584140241383, "grad_norm": 0.42464736104011536, "learning_rate": 5.5431207828012226e-06, "loss": 0.3063, "step": 31121 }, { "epoch": 1.4282043045293928, "grad_norm": 0.503308892250061, "learning_rate": 5.542877045044975e-06, "loss": 0.4001, "step": 31122 }, { "epoch": 1.4282501950346473, "grad_norm": 0.4990735650062561, "learning_rate": 5.542633305983279e-06, "loss": 0.4327, "step": 31123 }, { "epoch": 1.4282960855399018, "grad_norm": 0.4514814615249634, "learning_rate": 5.54238956561672e-06, "loss": 0.328, "step": 31124 }, { "epoch": 1.4283419760451563, "grad_norm": 0.4717971682548523, "learning_rate": 5.542145823945888e-06, "loss": 0.3303, "step": 31125 }, { "epoch": 1.4283878665504108, "grad_norm": 0.4817515015602112, "learning_rate": 5.541902080971366e-06, "loss": 0.3877, "step": 31126 }, { "epoch": 1.428433757055665, "grad_norm": 0.47787919640541077, "learning_rate": 5.541658336693739e-06, "loss": 0.3638, "step": 31127 }, { "epoch": 1.4284796475609196, "grad_norm": 0.46937263011932373, "learning_rate": 5.541414591113595e-06, "loss": 0.3665, "step": 31128 }, { "epoch": 1.428525538066174, "grad_norm": 0.49982351064682007, "learning_rate": 5.54117084423152e-06, "loss": 0.3577, "step": 31129 }, { "epoch": 1.4285714285714286, "grad_norm": 0.4852502942085266, "learning_rate": 5.5409270960481e-06, "loss": 0.3897, "step": 31130 }, { "epoch": 1.428617319076683, "grad_norm": 0.43196403980255127, "learning_rate": 5.540683346563921e-06, "loss": 0.287, "step": 31131 }, { "epoch": 1.4286632095819374, "grad_norm": 0.4504820704460144, "learning_rate": 5.5404395957795686e-06, "loss": 0.3455, "step": 31132 }, { "epoch": 1.4287091000871919, "grad_norm": 0.4444904923439026, "learning_rate": 5.54019584369563e-06, "loss": 0.3208, "step": 31133 }, { "epoch": 1.4287549905924464, "grad_norm": 0.474321186542511, "learning_rate": 5.5399520903126915e-06, "loss": 0.3226, "step": 31134 }, { "epoch": 1.4288008810977009, "grad_norm": 0.4553077518939972, "learning_rate": 5.539708335631338e-06, "loss": 0.2677, "step": 31135 }, { "epoch": 1.4288467716029554, "grad_norm": 0.46007683873176575, "learning_rate": 5.5394645796521565e-06, "loss": 0.3416, "step": 31136 }, { "epoch": 1.4288926621082099, "grad_norm": 0.49282917380332947, "learning_rate": 5.5392208223757326e-06, "loss": 0.4046, "step": 31137 }, { "epoch": 1.4289385526134644, "grad_norm": 0.49968212842941284, "learning_rate": 5.538977063802653e-06, "loss": 0.4205, "step": 31138 }, { "epoch": 1.4289844431187189, "grad_norm": 0.4658733010292053, "learning_rate": 5.538733303933504e-06, "loss": 0.4071, "step": 31139 }, { "epoch": 1.4290303336239731, "grad_norm": 0.4665359556674957, "learning_rate": 5.538489542768871e-06, "loss": 0.3726, "step": 31140 }, { "epoch": 1.4290762241292276, "grad_norm": 0.4611313045024872, "learning_rate": 5.538245780309342e-06, "loss": 0.3474, "step": 31141 }, { "epoch": 1.4291221146344821, "grad_norm": 0.46552059054374695, "learning_rate": 5.5380020165555e-06, "loss": 0.3262, "step": 31142 }, { "epoch": 1.4291680051397366, "grad_norm": 0.5336297154426575, "learning_rate": 5.5377582515079335e-06, "loss": 0.4692, "step": 31143 }, { "epoch": 1.4292138956449911, "grad_norm": 0.4543493092060089, "learning_rate": 5.537514485167228e-06, "loss": 0.341, "step": 31144 }, { "epoch": 1.4292597861502454, "grad_norm": 0.4760379195213318, "learning_rate": 5.5372707175339694e-06, "loss": 0.3639, "step": 31145 }, { "epoch": 1.4293056766555, "grad_norm": 0.458626925945282, "learning_rate": 5.5370269486087435e-06, "loss": 0.345, "step": 31146 }, { "epoch": 1.4293515671607544, "grad_norm": 0.49291160702705383, "learning_rate": 5.53678317839214e-06, "loss": 0.4093, "step": 31147 }, { "epoch": 1.429397457666009, "grad_norm": 0.47705790400505066, "learning_rate": 5.5365394068847414e-06, "loss": 0.3618, "step": 31148 }, { "epoch": 1.4294433481712634, "grad_norm": 0.5001669526100159, "learning_rate": 5.536295634087133e-06, "loss": 0.3629, "step": 31149 }, { "epoch": 1.429489238676518, "grad_norm": 0.5112882852554321, "learning_rate": 5.536051859999906e-06, "loss": 0.3868, "step": 31150 }, { "epoch": 1.4295351291817724, "grad_norm": 0.5265322327613831, "learning_rate": 5.53580808462364e-06, "loss": 0.4315, "step": 31151 }, { "epoch": 1.4295810196870269, "grad_norm": 0.4835872948169708, "learning_rate": 5.535564307958927e-06, "loss": 0.3669, "step": 31152 }, { "epoch": 1.4296269101922812, "grad_norm": 0.5137470364570618, "learning_rate": 5.535320530006352e-06, "loss": 0.407, "step": 31153 }, { "epoch": 1.4296728006975357, "grad_norm": 0.4499948024749756, "learning_rate": 5.535076750766497e-06, "loss": 0.3054, "step": 31154 }, { "epoch": 1.4297186912027902, "grad_norm": 0.4675934314727783, "learning_rate": 5.534832970239952e-06, "loss": 0.3289, "step": 31155 }, { "epoch": 1.4297645817080447, "grad_norm": 0.4311208426952362, "learning_rate": 5.534589188427304e-06, "loss": 0.3085, "step": 31156 }, { "epoch": 1.429810472213299, "grad_norm": 0.45414820313453674, "learning_rate": 5.534345405329136e-06, "loss": 0.3176, "step": 31157 }, { "epoch": 1.4298563627185534, "grad_norm": 0.4811494052410126, "learning_rate": 5.534101620946037e-06, "loss": 0.3969, "step": 31158 }, { "epoch": 1.429902253223808, "grad_norm": 0.4965008795261383, "learning_rate": 5.5338578352785935e-06, "loss": 0.4224, "step": 31159 }, { "epoch": 1.4299481437290624, "grad_norm": 0.44886913895606995, "learning_rate": 5.533614048327387e-06, "loss": 0.2961, "step": 31160 }, { "epoch": 1.429994034234317, "grad_norm": 0.5075212121009827, "learning_rate": 5.53337026009301e-06, "loss": 0.4441, "step": 31161 }, { "epoch": 1.4300399247395714, "grad_norm": 0.489182710647583, "learning_rate": 5.533126470576044e-06, "loss": 0.3582, "step": 31162 }, { "epoch": 1.430085815244826, "grad_norm": 0.483430951833725, "learning_rate": 5.5328826797770786e-06, "loss": 0.3936, "step": 31163 }, { "epoch": 1.4301317057500804, "grad_norm": 0.4501614272594452, "learning_rate": 5.5326388876966985e-06, "loss": 0.3104, "step": 31164 }, { "epoch": 1.4301775962553347, "grad_norm": 0.5078386068344116, "learning_rate": 5.5323950943354885e-06, "loss": 0.41, "step": 31165 }, { "epoch": 1.4302234867605892, "grad_norm": 0.5015767216682434, "learning_rate": 5.532151299694037e-06, "loss": 0.3932, "step": 31166 }, { "epoch": 1.4302693772658437, "grad_norm": 0.4077334403991699, "learning_rate": 5.5319075037729296e-06, "loss": 0.2484, "step": 31167 }, { "epoch": 1.4303152677710982, "grad_norm": 0.4816370904445648, "learning_rate": 5.531663706572753e-06, "loss": 0.4079, "step": 31168 }, { "epoch": 1.4303611582763527, "grad_norm": 0.5312691926956177, "learning_rate": 5.531419908094093e-06, "loss": 0.4152, "step": 31169 }, { "epoch": 1.430407048781607, "grad_norm": 0.3909851312637329, "learning_rate": 5.531176108337536e-06, "loss": 0.2429, "step": 31170 }, { "epoch": 1.4304529392868615, "grad_norm": 0.45513197779655457, "learning_rate": 5.530932307303667e-06, "loss": 0.3629, "step": 31171 }, { "epoch": 1.430498829792116, "grad_norm": 0.46619272232055664, "learning_rate": 5.530688504993073e-06, "loss": 0.3643, "step": 31172 }, { "epoch": 1.4305447202973705, "grad_norm": 0.4528715908527374, "learning_rate": 5.530444701406342e-06, "loss": 0.3727, "step": 31173 }, { "epoch": 1.430590610802625, "grad_norm": 0.4613390564918518, "learning_rate": 5.530200896544059e-06, "loss": 0.3231, "step": 31174 }, { "epoch": 1.4306365013078794, "grad_norm": 0.45612916350364685, "learning_rate": 5.529957090406811e-06, "loss": 0.3004, "step": 31175 }, { "epoch": 1.430682391813134, "grad_norm": 0.48451218008995056, "learning_rate": 5.529713282995182e-06, "loss": 0.3987, "step": 31176 }, { "epoch": 1.4307282823183884, "grad_norm": 0.5055718421936035, "learning_rate": 5.5294694743097584e-06, "loss": 0.3429, "step": 31177 }, { "epoch": 1.4307741728236427, "grad_norm": 0.5270825028419495, "learning_rate": 5.52922566435113e-06, "loss": 0.4964, "step": 31178 }, { "epoch": 1.4308200633288972, "grad_norm": 0.48701563477516174, "learning_rate": 5.528981853119881e-06, "loss": 0.4508, "step": 31179 }, { "epoch": 1.4308659538341517, "grad_norm": 0.43516382575035095, "learning_rate": 5.528738040616596e-06, "loss": 0.2705, "step": 31180 }, { "epoch": 1.4309118443394062, "grad_norm": 0.49163514375686646, "learning_rate": 5.528494226841864e-06, "loss": 0.4035, "step": 31181 }, { "epoch": 1.4309577348446607, "grad_norm": 0.4601171612739563, "learning_rate": 5.52825041179627e-06, "loss": 0.4085, "step": 31182 }, { "epoch": 1.431003625349915, "grad_norm": 0.4760420024394989, "learning_rate": 5.5280065954803996e-06, "loss": 0.3558, "step": 31183 }, { "epoch": 1.4310495158551695, "grad_norm": 0.47134125232696533, "learning_rate": 5.527762777894841e-06, "loss": 0.3684, "step": 31184 }, { "epoch": 1.431095406360424, "grad_norm": 0.457643061876297, "learning_rate": 5.52751895904018e-06, "loss": 0.4, "step": 31185 }, { "epoch": 1.4311412968656785, "grad_norm": 0.4472745954990387, "learning_rate": 5.527275138917001e-06, "loss": 0.349, "step": 31186 }, { "epoch": 1.431187187370933, "grad_norm": 0.6127449870109558, "learning_rate": 5.527031317525893e-06, "loss": 0.3052, "step": 31187 }, { "epoch": 1.4312330778761875, "grad_norm": 0.44817066192626953, "learning_rate": 5.52678749486744e-06, "loss": 0.3489, "step": 31188 }, { "epoch": 1.431278968381442, "grad_norm": 0.45312389731407166, "learning_rate": 5.526543670942228e-06, "loss": 0.3376, "step": 31189 }, { "epoch": 1.4313248588866965, "grad_norm": 0.46881818771362305, "learning_rate": 5.526299845750847e-06, "loss": 0.3293, "step": 31190 }, { "epoch": 1.4313707493919507, "grad_norm": 0.4609656035900116, "learning_rate": 5.526056019293882e-06, "loss": 0.3058, "step": 31191 }, { "epoch": 1.4314166398972052, "grad_norm": 0.4606319069862366, "learning_rate": 5.525812191571916e-06, "loss": 0.363, "step": 31192 }, { "epoch": 1.4314625304024597, "grad_norm": 0.4652095139026642, "learning_rate": 5.525568362585538e-06, "loss": 0.3067, "step": 31193 }, { "epoch": 1.4315084209077142, "grad_norm": 0.5363459587097168, "learning_rate": 5.5253245323353335e-06, "loss": 0.3693, "step": 31194 }, { "epoch": 1.4315543114129687, "grad_norm": 0.46269989013671875, "learning_rate": 5.525080700821891e-06, "loss": 0.3779, "step": 31195 }, { "epoch": 1.431600201918223, "grad_norm": 0.49151402711868286, "learning_rate": 5.524836868045794e-06, "loss": 0.4102, "step": 31196 }, { "epoch": 1.4316460924234775, "grad_norm": 0.48053231835365295, "learning_rate": 5.5245930340076304e-06, "loss": 0.4145, "step": 31197 }, { "epoch": 1.431691982928732, "grad_norm": 0.5108349919319153, "learning_rate": 5.5243491987079854e-06, "loss": 0.4194, "step": 31198 }, { "epoch": 1.4317378734339865, "grad_norm": 0.4650034010410309, "learning_rate": 5.524105362147446e-06, "loss": 0.3513, "step": 31199 }, { "epoch": 1.431783763939241, "grad_norm": 0.521092414855957, "learning_rate": 5.523861524326599e-06, "loss": 0.4219, "step": 31200 }, { "epoch": 1.4318296544444955, "grad_norm": 0.4538710117340088, "learning_rate": 5.523617685246031e-06, "loss": 0.3436, "step": 31201 }, { "epoch": 1.43187554494975, "grad_norm": 0.44621777534484863, "learning_rate": 5.523373844906328e-06, "loss": 0.2826, "step": 31202 }, { "epoch": 1.4319214354550043, "grad_norm": 0.4550861418247223, "learning_rate": 5.5231300033080746e-06, "loss": 0.3303, "step": 31203 }, { "epoch": 1.4319673259602588, "grad_norm": 0.4539366066455841, "learning_rate": 5.522886160451861e-06, "loss": 0.3526, "step": 31204 }, { "epoch": 1.4320132164655133, "grad_norm": 0.4691370129585266, "learning_rate": 5.522642316338268e-06, "loss": 0.3471, "step": 31205 }, { "epoch": 1.4320591069707678, "grad_norm": 0.47325608134269714, "learning_rate": 5.5223984709678855e-06, "loss": 0.3606, "step": 31206 }, { "epoch": 1.4321049974760223, "grad_norm": 0.4400242567062378, "learning_rate": 5.522154624341303e-06, "loss": 0.2855, "step": 31207 }, { "epoch": 1.4321508879812765, "grad_norm": 0.4369378089904785, "learning_rate": 5.521910776459101e-06, "loss": 0.2835, "step": 31208 }, { "epoch": 1.432196778486531, "grad_norm": 0.514861524105072, "learning_rate": 5.521666927321868e-06, "loss": 0.401, "step": 31209 }, { "epoch": 1.4322426689917855, "grad_norm": 0.48621705174446106, "learning_rate": 5.521423076930192e-06, "loss": 0.3744, "step": 31210 }, { "epoch": 1.43228855949704, "grad_norm": 0.45788395404815674, "learning_rate": 5.5211792252846565e-06, "loss": 0.3367, "step": 31211 }, { "epoch": 1.4323344500022945, "grad_norm": 0.6031245589256287, "learning_rate": 5.52093537238585e-06, "loss": 0.3573, "step": 31212 }, { "epoch": 1.432380340507549, "grad_norm": 0.4817715287208557, "learning_rate": 5.520691518234359e-06, "loss": 0.3352, "step": 31213 }, { "epoch": 1.4324262310128035, "grad_norm": 0.5083053112030029, "learning_rate": 5.520447662830768e-06, "loss": 0.4267, "step": 31214 }, { "epoch": 1.432472121518058, "grad_norm": 0.4644688367843628, "learning_rate": 5.520203806175666e-06, "loss": 0.3051, "step": 31215 }, { "epoch": 1.4325180120233123, "grad_norm": 0.4444335699081421, "learning_rate": 5.519959948269637e-06, "loss": 0.3295, "step": 31216 }, { "epoch": 1.4325639025285668, "grad_norm": 0.5276976227760315, "learning_rate": 5.519716089113269e-06, "loss": 0.4076, "step": 31217 }, { "epoch": 1.4326097930338213, "grad_norm": 0.4655589461326599, "learning_rate": 5.519472228707149e-06, "loss": 0.3442, "step": 31218 }, { "epoch": 1.4326556835390758, "grad_norm": 0.47360605001449585, "learning_rate": 5.51922836705186e-06, "loss": 0.3694, "step": 31219 }, { "epoch": 1.4327015740443303, "grad_norm": 0.4911370277404785, "learning_rate": 5.518984504147991e-06, "loss": 0.3873, "step": 31220 }, { "epoch": 1.4327474645495846, "grad_norm": 0.481213241815567, "learning_rate": 5.518740639996129e-06, "loss": 0.3892, "step": 31221 }, { "epoch": 1.432793355054839, "grad_norm": 0.4377342462539673, "learning_rate": 5.5184967745968585e-06, "loss": 0.2878, "step": 31222 }, { "epoch": 1.4328392455600936, "grad_norm": 0.48144635558128357, "learning_rate": 5.518252907950769e-06, "loss": 0.3644, "step": 31223 }, { "epoch": 1.432885136065348, "grad_norm": 0.5083449482917786, "learning_rate": 5.518009040058443e-06, "loss": 0.4303, "step": 31224 }, { "epoch": 1.4329310265706026, "grad_norm": 0.48671770095825195, "learning_rate": 5.5177651709204694e-06, "loss": 0.3371, "step": 31225 }, { "epoch": 1.432976917075857, "grad_norm": 0.44579946994781494, "learning_rate": 5.517521300537434e-06, "loss": 0.3096, "step": 31226 }, { "epoch": 1.4330228075811116, "grad_norm": 0.6146912574768066, "learning_rate": 5.517277428909922e-06, "loss": 0.3723, "step": 31227 }, { "epoch": 1.433068698086366, "grad_norm": 0.4635695517063141, "learning_rate": 5.517033556038521e-06, "loss": 0.3495, "step": 31228 }, { "epoch": 1.4331145885916203, "grad_norm": 0.44822603464126587, "learning_rate": 5.5167896819238195e-06, "loss": 0.3015, "step": 31229 }, { "epoch": 1.4331604790968748, "grad_norm": 0.456224262714386, "learning_rate": 5.516545806566402e-06, "loss": 0.3287, "step": 31230 }, { "epoch": 1.4332063696021293, "grad_norm": 0.513614296913147, "learning_rate": 5.516301929966852e-06, "loss": 0.3896, "step": 31231 }, { "epoch": 1.4332522601073838, "grad_norm": 0.5015686750411987, "learning_rate": 5.516058052125761e-06, "loss": 0.3593, "step": 31232 }, { "epoch": 1.4332981506126383, "grad_norm": 0.4730696678161621, "learning_rate": 5.515814173043712e-06, "loss": 0.3399, "step": 31233 }, { "epoch": 1.4333440411178926, "grad_norm": 0.46964579820632935, "learning_rate": 5.515570292721294e-06, "loss": 0.3347, "step": 31234 }, { "epoch": 1.433389931623147, "grad_norm": 0.492269366979599, "learning_rate": 5.515326411159092e-06, "loss": 0.4147, "step": 31235 }, { "epoch": 1.4334358221284016, "grad_norm": 0.44471505284309387, "learning_rate": 5.515082528357693e-06, "loss": 0.2868, "step": 31236 }, { "epoch": 1.433481712633656, "grad_norm": 0.48176226019859314, "learning_rate": 5.5148386443176815e-06, "loss": 0.3311, "step": 31237 }, { "epoch": 1.4335276031389106, "grad_norm": 0.45948708057403564, "learning_rate": 5.514594759039646e-06, "loss": 0.3677, "step": 31238 }, { "epoch": 1.433573493644165, "grad_norm": 0.42645886540412903, "learning_rate": 5.5143508725241725e-06, "loss": 0.2767, "step": 31239 }, { "epoch": 1.4336193841494196, "grad_norm": 0.4710995852947235, "learning_rate": 5.51410698477185e-06, "loss": 0.373, "step": 31240 }, { "epoch": 1.433665274654674, "grad_norm": 0.4659832715988159, "learning_rate": 5.513863095783259e-06, "loss": 0.3272, "step": 31241 }, { "epoch": 1.4337111651599284, "grad_norm": 0.4803202450275421, "learning_rate": 5.513619205558991e-06, "loss": 0.3602, "step": 31242 }, { "epoch": 1.4337570556651829, "grad_norm": 0.4392968714237213, "learning_rate": 5.51337531409963e-06, "loss": 0.3137, "step": 31243 }, { "epoch": 1.4338029461704374, "grad_norm": 0.48227450251579285, "learning_rate": 5.513131421405764e-06, "loss": 0.3511, "step": 31244 }, { "epoch": 1.4338488366756919, "grad_norm": 0.529499351978302, "learning_rate": 5.51288752747798e-06, "loss": 0.4758, "step": 31245 }, { "epoch": 1.4338947271809461, "grad_norm": 0.48742902278900146, "learning_rate": 5.5126436323168634e-06, "loss": 0.4367, "step": 31246 }, { "epoch": 1.4339406176862006, "grad_norm": 0.4881955683231354, "learning_rate": 5.512399735922999e-06, "loss": 0.4018, "step": 31247 }, { "epoch": 1.4339865081914551, "grad_norm": 0.4701627790927887, "learning_rate": 5.5121558382969744e-06, "loss": 0.4219, "step": 31248 }, { "epoch": 1.4340323986967096, "grad_norm": 0.47178494930267334, "learning_rate": 5.511911939439378e-06, "loss": 0.3488, "step": 31249 }, { "epoch": 1.4340782892019641, "grad_norm": 0.4015968143939972, "learning_rate": 5.511668039350795e-06, "loss": 0.2792, "step": 31250 }, { "epoch": 1.4341241797072186, "grad_norm": 0.4519443213939667, "learning_rate": 5.5114241380318116e-06, "loss": 0.3211, "step": 31251 }, { "epoch": 1.4341700702124731, "grad_norm": 0.496487021446228, "learning_rate": 5.511180235483015e-06, "loss": 0.4518, "step": 31252 }, { "epoch": 1.4342159607177276, "grad_norm": 0.4863892197608948, "learning_rate": 5.51093633170499e-06, "loss": 0.3567, "step": 31253 }, { "epoch": 1.4342618512229819, "grad_norm": 0.46944448351860046, "learning_rate": 5.510692426698325e-06, "loss": 0.3489, "step": 31254 }, { "epoch": 1.4343077417282364, "grad_norm": 0.46215325593948364, "learning_rate": 5.510448520463607e-06, "loss": 0.3363, "step": 31255 }, { "epoch": 1.4343536322334909, "grad_norm": 0.4589986503124237, "learning_rate": 5.510204613001421e-06, "loss": 0.3232, "step": 31256 }, { "epoch": 1.4343995227387454, "grad_norm": 0.4844457507133484, "learning_rate": 5.509960704312354e-06, "loss": 0.4059, "step": 31257 }, { "epoch": 1.4344454132439999, "grad_norm": 0.4951876699924469, "learning_rate": 5.509716794396991e-06, "loss": 0.3688, "step": 31258 }, { "epoch": 1.4344913037492542, "grad_norm": 0.4735114276409149, "learning_rate": 5.5094728832559195e-06, "loss": 0.3922, "step": 31259 }, { "epoch": 1.4345371942545087, "grad_norm": 0.480099618434906, "learning_rate": 5.50922897088973e-06, "loss": 0.4053, "step": 31260 }, { "epoch": 1.4345830847597631, "grad_norm": 0.4813135862350464, "learning_rate": 5.508985057299002e-06, "loss": 0.3508, "step": 31261 }, { "epoch": 1.4346289752650176, "grad_norm": 0.44718798995018005, "learning_rate": 5.508741142484327e-06, "loss": 0.3548, "step": 31262 }, { "epoch": 1.4346748657702721, "grad_norm": 0.4617826044559479, "learning_rate": 5.508497226446291e-06, "loss": 0.38, "step": 31263 }, { "epoch": 1.4347207562755266, "grad_norm": 0.46927306056022644, "learning_rate": 5.508253309185478e-06, "loss": 0.3404, "step": 31264 }, { "epoch": 1.4347666467807811, "grad_norm": 0.46597418189048767, "learning_rate": 5.508009390702477e-06, "loss": 0.3936, "step": 31265 }, { "epoch": 1.4348125372860356, "grad_norm": 0.5003175735473633, "learning_rate": 5.507765470997875e-06, "loss": 0.3911, "step": 31266 }, { "epoch": 1.43485842779129, "grad_norm": 0.5048842430114746, "learning_rate": 5.507521550072256e-06, "loss": 0.4244, "step": 31267 }, { "epoch": 1.4349043182965444, "grad_norm": 0.47882556915283203, "learning_rate": 5.507277627926206e-06, "loss": 0.396, "step": 31268 }, { "epoch": 1.434950208801799, "grad_norm": 0.434061735868454, "learning_rate": 5.507033704560316e-06, "loss": 0.2819, "step": 31269 }, { "epoch": 1.4349960993070534, "grad_norm": 0.5054728388786316, "learning_rate": 5.506789779975169e-06, "loss": 0.4042, "step": 31270 }, { "epoch": 1.435041989812308, "grad_norm": 0.4878963530063629, "learning_rate": 5.5065458541713525e-06, "loss": 0.3871, "step": 31271 }, { "epoch": 1.4350878803175622, "grad_norm": 0.4776059091091156, "learning_rate": 5.506301927149453e-06, "loss": 0.3289, "step": 31272 }, { "epoch": 1.4351337708228167, "grad_norm": 0.4663805663585663, "learning_rate": 5.506057998910059e-06, "loss": 0.3422, "step": 31273 }, { "epoch": 1.4351796613280712, "grad_norm": 0.4405117332935333, "learning_rate": 5.505814069453755e-06, "loss": 0.3162, "step": 31274 }, { "epoch": 1.4352255518333257, "grad_norm": 0.5175184607505798, "learning_rate": 5.5055701387811255e-06, "loss": 0.4795, "step": 31275 }, { "epoch": 1.4352714423385802, "grad_norm": 0.5051531195640564, "learning_rate": 5.505326206892759e-06, "loss": 0.3496, "step": 31276 }, { "epoch": 1.4353173328438347, "grad_norm": 0.4543939232826233, "learning_rate": 5.505082273789245e-06, "loss": 0.3776, "step": 31277 }, { "epoch": 1.4353632233490892, "grad_norm": 0.4529600143432617, "learning_rate": 5.504838339471167e-06, "loss": 0.348, "step": 31278 }, { "epoch": 1.4354091138543437, "grad_norm": 0.5172673463821411, "learning_rate": 5.50459440393911e-06, "loss": 0.3801, "step": 31279 }, { "epoch": 1.435455004359598, "grad_norm": 0.46863535046577454, "learning_rate": 5.504350467193666e-06, "loss": 0.3913, "step": 31280 }, { "epoch": 1.4355008948648524, "grad_norm": 0.4577239155769348, "learning_rate": 5.504106529235416e-06, "loss": 0.3241, "step": 31281 }, { "epoch": 1.435546785370107, "grad_norm": 0.44955503940582275, "learning_rate": 5.503862590064949e-06, "loss": 0.3673, "step": 31282 }, { "epoch": 1.4355926758753614, "grad_norm": 0.4839995503425598, "learning_rate": 5.503618649682853e-06, "loss": 0.3756, "step": 31283 }, { "epoch": 1.435638566380616, "grad_norm": 0.44301360845565796, "learning_rate": 5.503374708089712e-06, "loss": 0.314, "step": 31284 }, { "epoch": 1.4356844568858702, "grad_norm": 0.5052185654640198, "learning_rate": 5.503130765286113e-06, "loss": 0.44, "step": 31285 }, { "epoch": 1.4357303473911247, "grad_norm": 0.4602861702442169, "learning_rate": 5.502886821272644e-06, "loss": 0.3258, "step": 31286 }, { "epoch": 1.4357762378963792, "grad_norm": 0.4827623963356018, "learning_rate": 5.502642876049891e-06, "loss": 0.3573, "step": 31287 }, { "epoch": 1.4358221284016337, "grad_norm": 0.47578364610671997, "learning_rate": 5.502398929618441e-06, "loss": 0.3967, "step": 31288 }, { "epoch": 1.4358680189068882, "grad_norm": 0.4548254609107971, "learning_rate": 5.50215498197888e-06, "loss": 0.3211, "step": 31289 }, { "epoch": 1.4359139094121427, "grad_norm": 0.4505936801433563, "learning_rate": 5.5019110331317936e-06, "loss": 0.3495, "step": 31290 }, { "epoch": 1.4359597999173972, "grad_norm": 0.4768708348274231, "learning_rate": 5.501667083077771e-06, "loss": 0.3276, "step": 31291 }, { "epoch": 1.4360056904226515, "grad_norm": 0.44940635561943054, "learning_rate": 5.501423131817396e-06, "loss": 0.3067, "step": 31292 }, { "epoch": 1.436051580927906, "grad_norm": 0.46306875348091125, "learning_rate": 5.501179179351257e-06, "loss": 0.3469, "step": 31293 }, { "epoch": 1.4360974714331605, "grad_norm": 0.4432179629802704, "learning_rate": 5.500935225679941e-06, "loss": 0.3394, "step": 31294 }, { "epoch": 1.436143361938415, "grad_norm": 0.48616695404052734, "learning_rate": 5.500691270804034e-06, "loss": 0.386, "step": 31295 }, { "epoch": 1.4361892524436695, "grad_norm": 0.45936718583106995, "learning_rate": 5.500447314724121e-06, "loss": 0.3246, "step": 31296 }, { "epoch": 1.4362351429489237, "grad_norm": 0.45469799637794495, "learning_rate": 5.500203357440792e-06, "loss": 0.3324, "step": 31297 }, { "epoch": 1.4362810334541782, "grad_norm": 0.4351957440376282, "learning_rate": 5.499959398954631e-06, "loss": 0.2883, "step": 31298 }, { "epoch": 1.4363269239594327, "grad_norm": 0.4529068171977997, "learning_rate": 5.499715439266226e-06, "loss": 0.3075, "step": 31299 }, { "epoch": 1.4363728144646872, "grad_norm": 0.48000335693359375, "learning_rate": 5.499471478376163e-06, "loss": 0.3785, "step": 31300 }, { "epoch": 1.4364187049699417, "grad_norm": 0.4580353796482086, "learning_rate": 5.4992275162850275e-06, "loss": 0.3354, "step": 31301 }, { "epoch": 1.4364645954751962, "grad_norm": 0.4898914694786072, "learning_rate": 5.498983552993408e-06, "loss": 0.411, "step": 31302 }, { "epoch": 1.4365104859804507, "grad_norm": 0.4478312134742737, "learning_rate": 5.49873958850189e-06, "loss": 0.3099, "step": 31303 }, { "epoch": 1.4365563764857052, "grad_norm": 0.46175700426101685, "learning_rate": 5.498495622811061e-06, "loss": 0.3741, "step": 31304 }, { "epoch": 1.4366022669909595, "grad_norm": 0.480903685092926, "learning_rate": 5.498251655921508e-06, "loss": 0.3552, "step": 31305 }, { "epoch": 1.436648157496214, "grad_norm": 0.4578835964202881, "learning_rate": 5.498007687833818e-06, "loss": 0.3696, "step": 31306 }, { "epoch": 1.4366940480014685, "grad_norm": 0.504242479801178, "learning_rate": 5.497763718548574e-06, "loss": 0.4595, "step": 31307 }, { "epoch": 1.436739938506723, "grad_norm": 0.462536096572876, "learning_rate": 5.497519748066368e-06, "loss": 0.336, "step": 31308 }, { "epoch": 1.4367858290119775, "grad_norm": 0.48290562629699707, "learning_rate": 5.497275776387782e-06, "loss": 0.4001, "step": 31309 }, { "epoch": 1.4368317195172318, "grad_norm": 0.44656771421432495, "learning_rate": 5.4970318035134065e-06, "loss": 0.2911, "step": 31310 }, { "epoch": 1.4368776100224863, "grad_norm": 0.46598732471466064, "learning_rate": 5.496787829443825e-06, "loss": 0.3483, "step": 31311 }, { "epoch": 1.4369235005277408, "grad_norm": 0.4733452796936035, "learning_rate": 5.496543854179626e-06, "loss": 0.3688, "step": 31312 }, { "epoch": 1.4369693910329953, "grad_norm": 0.4787349998950958, "learning_rate": 5.496299877721395e-06, "loss": 0.3962, "step": 31313 }, { "epoch": 1.4370152815382498, "grad_norm": 0.46761354804039, "learning_rate": 5.49605590006972e-06, "loss": 0.3196, "step": 31314 }, { "epoch": 1.4370611720435043, "grad_norm": 0.447812020778656, "learning_rate": 5.4958119212251874e-06, "loss": 0.3332, "step": 31315 }, { "epoch": 1.4371070625487588, "grad_norm": 0.4714793264865875, "learning_rate": 5.4955679411883835e-06, "loss": 0.3638, "step": 31316 }, { "epoch": 1.4371529530540132, "grad_norm": 0.5205245614051819, "learning_rate": 5.495323959959895e-06, "loss": 0.4365, "step": 31317 }, { "epoch": 1.4371988435592675, "grad_norm": 0.4729923903942108, "learning_rate": 5.4950799775403086e-06, "loss": 0.3774, "step": 31318 }, { "epoch": 1.437244734064522, "grad_norm": 0.519253671169281, "learning_rate": 5.494835993930211e-06, "loss": 0.4506, "step": 31319 }, { "epoch": 1.4372906245697765, "grad_norm": 0.4829270839691162, "learning_rate": 5.49459200913019e-06, "loss": 0.3955, "step": 31320 }, { "epoch": 1.437336515075031, "grad_norm": 0.44445738196372986, "learning_rate": 5.4943480231408305e-06, "loss": 0.3221, "step": 31321 }, { "epoch": 1.4373824055802855, "grad_norm": 0.4662395417690277, "learning_rate": 5.494104035962721e-06, "loss": 0.3613, "step": 31322 }, { "epoch": 1.4374282960855398, "grad_norm": 0.46019259095191956, "learning_rate": 5.4938600475964455e-06, "loss": 0.3238, "step": 31323 }, { "epoch": 1.4374741865907943, "grad_norm": 0.4775780737400055, "learning_rate": 5.493616058042592e-06, "loss": 0.3515, "step": 31324 }, { "epoch": 1.4375200770960488, "grad_norm": 0.49702268838882446, "learning_rate": 5.49337206730175e-06, "loss": 0.4333, "step": 31325 }, { "epoch": 1.4375659676013033, "grad_norm": 0.4497983455657959, "learning_rate": 5.493128075374503e-06, "loss": 0.3013, "step": 31326 }, { "epoch": 1.4376118581065578, "grad_norm": 0.4566693902015686, "learning_rate": 5.4928840822614384e-06, "loss": 0.3357, "step": 31327 }, { "epoch": 1.4376577486118123, "grad_norm": 0.449724942445755, "learning_rate": 5.492640087963143e-06, "loss": 0.3233, "step": 31328 }, { "epoch": 1.4377036391170668, "grad_norm": 0.5228531360626221, "learning_rate": 5.492396092480204e-06, "loss": 0.4378, "step": 31329 }, { "epoch": 1.4377495296223213, "grad_norm": 0.45829933881759644, "learning_rate": 5.492152095813208e-06, "loss": 0.3558, "step": 31330 }, { "epoch": 1.4377954201275756, "grad_norm": 0.44357579946517944, "learning_rate": 5.491908097962742e-06, "loss": 0.3203, "step": 31331 }, { "epoch": 1.43784131063283, "grad_norm": 0.444955438375473, "learning_rate": 5.491664098929391e-06, "loss": 0.2875, "step": 31332 }, { "epoch": 1.4378872011380845, "grad_norm": 0.5146957635879517, "learning_rate": 5.491420098713743e-06, "loss": 0.3701, "step": 31333 }, { "epoch": 1.437933091643339, "grad_norm": 0.46581801772117615, "learning_rate": 5.491176097316386e-06, "loss": 0.3622, "step": 31334 }, { "epoch": 1.4379789821485933, "grad_norm": 0.47365114092826843, "learning_rate": 5.490932094737904e-06, "loss": 0.3511, "step": 31335 }, { "epoch": 1.4380248726538478, "grad_norm": 0.4519864022731781, "learning_rate": 5.490688090978885e-06, "loss": 0.3157, "step": 31336 }, { "epoch": 1.4380707631591023, "grad_norm": 0.4523589611053467, "learning_rate": 5.490444086039918e-06, "loss": 0.3483, "step": 31337 }, { "epoch": 1.4381166536643568, "grad_norm": 0.47337013483047485, "learning_rate": 5.4902000799215875e-06, "loss": 0.3948, "step": 31338 }, { "epoch": 1.4381625441696113, "grad_norm": 0.47752612829208374, "learning_rate": 5.489956072624481e-06, "loss": 0.3795, "step": 31339 }, { "epoch": 1.4382084346748658, "grad_norm": 0.4637202024459839, "learning_rate": 5.489712064149182e-06, "loss": 0.35, "step": 31340 }, { "epoch": 1.4382543251801203, "grad_norm": 0.4736252427101135, "learning_rate": 5.489468054496281e-06, "loss": 0.334, "step": 31341 }, { "epoch": 1.4383002156853748, "grad_norm": 0.4821629822254181, "learning_rate": 5.489224043666366e-06, "loss": 0.3967, "step": 31342 }, { "epoch": 1.438346106190629, "grad_norm": 0.4589228928089142, "learning_rate": 5.4889800316600195e-06, "loss": 0.3323, "step": 31343 }, { "epoch": 1.4383919966958836, "grad_norm": 0.5332475900650024, "learning_rate": 5.48873601847783e-06, "loss": 0.5006, "step": 31344 }, { "epoch": 1.438437887201138, "grad_norm": 0.5547856092453003, "learning_rate": 5.488492004120387e-06, "loss": 0.4859, "step": 31345 }, { "epoch": 1.4384837777063926, "grad_norm": 0.44414642453193665, "learning_rate": 5.488247988588272e-06, "loss": 0.3392, "step": 31346 }, { "epoch": 1.438529668211647, "grad_norm": 0.4363952577114105, "learning_rate": 5.488003971882076e-06, "loss": 0.3314, "step": 31347 }, { "epoch": 1.4385755587169013, "grad_norm": 0.43330708146095276, "learning_rate": 5.487759954002385e-06, "loss": 0.2811, "step": 31348 }, { "epoch": 1.4386214492221558, "grad_norm": 0.4809993803501129, "learning_rate": 5.487515934949786e-06, "loss": 0.3886, "step": 31349 }, { "epoch": 1.4386673397274103, "grad_norm": 0.495016872882843, "learning_rate": 5.487271914724863e-06, "loss": 0.4226, "step": 31350 }, { "epoch": 1.4387132302326648, "grad_norm": 0.4973916709423065, "learning_rate": 5.487027893328206e-06, "loss": 0.3914, "step": 31351 }, { "epoch": 1.4387591207379193, "grad_norm": 0.4934927821159363, "learning_rate": 5.486783870760399e-06, "loss": 0.4294, "step": 31352 }, { "epoch": 1.4388050112431738, "grad_norm": 0.4328587055206299, "learning_rate": 5.486539847022033e-06, "loss": 0.3206, "step": 31353 }, { "epoch": 1.4388509017484283, "grad_norm": 0.45245832204818726, "learning_rate": 5.486295822113692e-06, "loss": 0.351, "step": 31354 }, { "epoch": 1.4388967922536828, "grad_norm": 0.5030629634857178, "learning_rate": 5.486051796035961e-06, "loss": 0.4261, "step": 31355 }, { "epoch": 1.438942682758937, "grad_norm": 0.4464128315448761, "learning_rate": 5.485807768789431e-06, "loss": 0.3272, "step": 31356 }, { "epoch": 1.4389885732641916, "grad_norm": 0.41742241382598877, "learning_rate": 5.485563740374684e-06, "loss": 0.2826, "step": 31357 }, { "epoch": 1.439034463769446, "grad_norm": 0.4848165214061737, "learning_rate": 5.485319710792311e-06, "loss": 0.3989, "step": 31358 }, { "epoch": 1.4390803542747006, "grad_norm": 0.43734264373779297, "learning_rate": 5.485075680042897e-06, "loss": 0.286, "step": 31359 }, { "epoch": 1.439126244779955, "grad_norm": 0.497589111328125, "learning_rate": 5.48483164812703e-06, "loss": 0.4194, "step": 31360 }, { "epoch": 1.4391721352852094, "grad_norm": 0.4430268406867981, "learning_rate": 5.484587615045295e-06, "loss": 0.3282, "step": 31361 }, { "epoch": 1.4392180257904639, "grad_norm": 0.5103000402450562, "learning_rate": 5.4843435807982805e-06, "loss": 0.4599, "step": 31362 }, { "epoch": 1.4392639162957184, "grad_norm": 0.4488638639450073, "learning_rate": 5.484099545386571e-06, "loss": 0.3391, "step": 31363 }, { "epoch": 1.4393098068009729, "grad_norm": 0.4482279121875763, "learning_rate": 5.483855508810756e-06, "loss": 0.339, "step": 31364 }, { "epoch": 1.4393556973062274, "grad_norm": 0.46682676672935486, "learning_rate": 5.483611471071422e-06, "loss": 0.3513, "step": 31365 }, { "epoch": 1.4394015878114819, "grad_norm": 0.4459417462348938, "learning_rate": 5.483367432169154e-06, "loss": 0.3308, "step": 31366 }, { "epoch": 1.4394474783167364, "grad_norm": 0.49146759510040283, "learning_rate": 5.483123392104539e-06, "loss": 0.3956, "step": 31367 }, { "epoch": 1.4394933688219909, "grad_norm": 0.48244786262512207, "learning_rate": 5.482879350878165e-06, "loss": 0.3767, "step": 31368 }, { "epoch": 1.4395392593272451, "grad_norm": 0.5042799115180969, "learning_rate": 5.482635308490619e-06, "loss": 0.4157, "step": 31369 }, { "epoch": 1.4395851498324996, "grad_norm": 0.4869748055934906, "learning_rate": 5.482391264942488e-06, "loss": 0.3814, "step": 31370 }, { "epoch": 1.4396310403377541, "grad_norm": 0.46865376830101013, "learning_rate": 5.48214722023436e-06, "loss": 0.3993, "step": 31371 }, { "epoch": 1.4396769308430086, "grad_norm": 0.4360770583152771, "learning_rate": 5.481903174366816e-06, "loss": 0.3036, "step": 31372 }, { "epoch": 1.4397228213482631, "grad_norm": 0.46867313981056213, "learning_rate": 5.48165912734045e-06, "loss": 0.3226, "step": 31373 }, { "epoch": 1.4397687118535174, "grad_norm": 0.5339978337287903, "learning_rate": 5.481415079155844e-06, "loss": 0.4624, "step": 31374 }, { "epoch": 1.439814602358772, "grad_norm": 0.5256969332695007, "learning_rate": 5.481171029813588e-06, "loss": 0.3014, "step": 31375 }, { "epoch": 1.4398604928640264, "grad_norm": 0.49928709864616394, "learning_rate": 5.480926979314267e-06, "loss": 0.401, "step": 31376 }, { "epoch": 1.439906383369281, "grad_norm": 0.4604565501213074, "learning_rate": 5.4806829276584685e-06, "loss": 0.3206, "step": 31377 }, { "epoch": 1.4399522738745354, "grad_norm": 0.46290624141693115, "learning_rate": 5.480438874846778e-06, "loss": 0.343, "step": 31378 }, { "epoch": 1.43999816437979, "grad_norm": 0.5096548795700073, "learning_rate": 5.480194820879786e-06, "loss": 0.3708, "step": 31379 }, { "epoch": 1.4400440548850444, "grad_norm": 0.4929756820201874, "learning_rate": 5.479950765758075e-06, "loss": 0.3529, "step": 31380 }, { "epoch": 1.4400899453902987, "grad_norm": 0.48528236150741577, "learning_rate": 5.479706709482235e-06, "loss": 0.353, "step": 31381 }, { "epoch": 1.4401358358955532, "grad_norm": 0.4715818166732788, "learning_rate": 5.479462652052853e-06, "loss": 0.3413, "step": 31382 }, { "epoch": 1.4401817264008077, "grad_norm": 0.45701804757118225, "learning_rate": 5.479218593470512e-06, "loss": 0.3133, "step": 31383 }, { "epoch": 1.4402276169060622, "grad_norm": 0.4825209975242615, "learning_rate": 5.478974533735802e-06, "loss": 0.3372, "step": 31384 }, { "epoch": 1.4402735074113167, "grad_norm": 0.5008915066719055, "learning_rate": 5.478730472849311e-06, "loss": 0.4343, "step": 31385 }, { "epoch": 1.440319397916571, "grad_norm": 0.4563448131084442, "learning_rate": 5.478486410811623e-06, "loss": 0.3627, "step": 31386 }, { "epoch": 1.4403652884218254, "grad_norm": 0.4518575966358185, "learning_rate": 5.478242347623327e-06, "loss": 0.3132, "step": 31387 }, { "epoch": 1.44041117892708, "grad_norm": 0.49911078810691833, "learning_rate": 5.4779982832850085e-06, "loss": 0.4573, "step": 31388 }, { "epoch": 1.4404570694323344, "grad_norm": 0.4592586159706116, "learning_rate": 5.477754217797254e-06, "loss": 0.3367, "step": 31389 }, { "epoch": 1.440502959937589, "grad_norm": 0.5126767754554749, "learning_rate": 5.477510151160653e-06, "loss": 0.455, "step": 31390 }, { "epoch": 1.4405488504428434, "grad_norm": 0.4522346556186676, "learning_rate": 5.4772660833757895e-06, "loss": 0.3122, "step": 31391 }, { "epoch": 1.440594740948098, "grad_norm": 0.45370566844940186, "learning_rate": 5.477022014443254e-06, "loss": 0.3567, "step": 31392 }, { "epoch": 1.4406406314533524, "grad_norm": 0.4503306746482849, "learning_rate": 5.476777944363629e-06, "loss": 0.3383, "step": 31393 }, { "epoch": 1.4406865219586067, "grad_norm": 0.46526479721069336, "learning_rate": 5.476533873137504e-06, "loss": 0.35, "step": 31394 }, { "epoch": 1.4407324124638612, "grad_norm": 0.4870149493217468, "learning_rate": 5.476289800765465e-06, "loss": 0.3774, "step": 31395 }, { "epoch": 1.4407783029691157, "grad_norm": 0.5725382566452026, "learning_rate": 5.4760457272481e-06, "loss": 0.454, "step": 31396 }, { "epoch": 1.4408241934743702, "grad_norm": 0.4231787323951721, "learning_rate": 5.475801652585997e-06, "loss": 0.3002, "step": 31397 }, { "epoch": 1.4408700839796247, "grad_norm": 0.45858556032180786, "learning_rate": 5.475557576779739e-06, "loss": 0.3516, "step": 31398 }, { "epoch": 1.440915974484879, "grad_norm": 0.478611558675766, "learning_rate": 5.475313499829915e-06, "loss": 0.3871, "step": 31399 }, { "epoch": 1.4409618649901335, "grad_norm": 0.4636435806751251, "learning_rate": 5.475069421737113e-06, "loss": 0.3555, "step": 31400 }, { "epoch": 1.441007755495388, "grad_norm": 0.484809547662735, "learning_rate": 5.474825342501917e-06, "loss": 0.3671, "step": 31401 }, { "epoch": 1.4410536460006425, "grad_norm": 0.457040935754776, "learning_rate": 5.474581262124918e-06, "loss": 0.3306, "step": 31402 }, { "epoch": 1.441099536505897, "grad_norm": 0.46439191699028015, "learning_rate": 5.474337180606702e-06, "loss": 0.3337, "step": 31403 }, { "epoch": 1.4411454270111514, "grad_norm": 0.46279504895210266, "learning_rate": 5.474093097947852e-06, "loss": 0.3326, "step": 31404 }, { "epoch": 1.441191317516406, "grad_norm": 0.4923481345176697, "learning_rate": 5.4738490141489585e-06, "loss": 0.3981, "step": 31405 }, { "epoch": 1.4412372080216604, "grad_norm": 0.4916951656341553, "learning_rate": 5.473604929210607e-06, "loss": 0.3998, "step": 31406 }, { "epoch": 1.4412830985269147, "grad_norm": 0.44359204173088074, "learning_rate": 5.473360843133388e-06, "loss": 0.3152, "step": 31407 }, { "epoch": 1.4413289890321692, "grad_norm": 0.458746999502182, "learning_rate": 5.473116755917883e-06, "loss": 0.3676, "step": 31408 }, { "epoch": 1.4413748795374237, "grad_norm": 0.5041618943214417, "learning_rate": 5.472872667564681e-06, "loss": 0.3937, "step": 31409 }, { "epoch": 1.4414207700426782, "grad_norm": 0.4622243046760559, "learning_rate": 5.472628578074373e-06, "loss": 0.3614, "step": 31410 }, { "epoch": 1.4414666605479327, "grad_norm": 0.4347517788410187, "learning_rate": 5.472384487447539e-06, "loss": 0.3264, "step": 31411 }, { "epoch": 1.441512551053187, "grad_norm": 0.4699138402938843, "learning_rate": 5.47214039568477e-06, "loss": 0.3739, "step": 31412 }, { "epoch": 1.4415584415584415, "grad_norm": 0.4471346139907837, "learning_rate": 5.4718963027866535e-06, "loss": 0.3192, "step": 31413 }, { "epoch": 1.441604332063696, "grad_norm": 0.4525623321533203, "learning_rate": 5.4716522087537745e-06, "loss": 0.3429, "step": 31414 }, { "epoch": 1.4416502225689505, "grad_norm": 0.4725325107574463, "learning_rate": 5.47140811358672e-06, "loss": 0.3374, "step": 31415 }, { "epoch": 1.441696113074205, "grad_norm": 0.4657646417617798, "learning_rate": 5.47116401728608e-06, "loss": 0.3482, "step": 31416 }, { "epoch": 1.4417420035794595, "grad_norm": 0.48165014386177063, "learning_rate": 5.470919919852437e-06, "loss": 0.382, "step": 31417 }, { "epoch": 1.441787894084714, "grad_norm": 0.4800724387168884, "learning_rate": 5.470675821286381e-06, "loss": 0.3851, "step": 31418 }, { "epoch": 1.4418337845899685, "grad_norm": 0.452603816986084, "learning_rate": 5.470431721588499e-06, "loss": 0.3103, "step": 31419 }, { "epoch": 1.4418796750952227, "grad_norm": 0.46072864532470703, "learning_rate": 5.470187620759377e-06, "loss": 0.3573, "step": 31420 }, { "epoch": 1.4419255656004772, "grad_norm": 0.4626932144165039, "learning_rate": 5.469943518799602e-06, "loss": 0.2985, "step": 31421 }, { "epoch": 1.4419714561057317, "grad_norm": 0.46423834562301636, "learning_rate": 5.469699415709761e-06, "loss": 0.3322, "step": 31422 }, { "epoch": 1.4420173466109862, "grad_norm": 0.49719539284706116, "learning_rate": 5.469455311490441e-06, "loss": 0.3844, "step": 31423 }, { "epoch": 1.4420632371162405, "grad_norm": 0.4634324014186859, "learning_rate": 5.469211206142229e-06, "loss": 0.3389, "step": 31424 }, { "epoch": 1.442109127621495, "grad_norm": 0.44302046298980713, "learning_rate": 5.468967099665712e-06, "loss": 0.3508, "step": 31425 }, { "epoch": 1.4421550181267495, "grad_norm": 0.4605623483657837, "learning_rate": 5.468722992061477e-06, "loss": 0.3658, "step": 31426 }, { "epoch": 1.442200908632004, "grad_norm": 0.47131460905075073, "learning_rate": 5.4684788833301124e-06, "loss": 0.3663, "step": 31427 }, { "epoch": 1.4422467991372585, "grad_norm": 0.4544435739517212, "learning_rate": 5.468234773472203e-06, "loss": 0.299, "step": 31428 }, { "epoch": 1.442292689642513, "grad_norm": 0.44661569595336914, "learning_rate": 5.467990662488336e-06, "loss": 0.2662, "step": 31429 }, { "epoch": 1.4423385801477675, "grad_norm": 0.44164687395095825, "learning_rate": 5.467746550379101e-06, "loss": 0.319, "step": 31430 }, { "epoch": 1.442384470653022, "grad_norm": 0.48393216729164124, "learning_rate": 5.467502437145082e-06, "loss": 0.3513, "step": 31431 }, { "epoch": 1.4424303611582763, "grad_norm": 0.5150102972984314, "learning_rate": 5.467258322786867e-06, "loss": 0.451, "step": 31432 }, { "epoch": 1.4424762516635308, "grad_norm": 0.496931791305542, "learning_rate": 5.4670142073050436e-06, "loss": 0.4072, "step": 31433 }, { "epoch": 1.4425221421687853, "grad_norm": 0.4517005383968353, "learning_rate": 5.4667700907001986e-06, "loss": 0.3274, "step": 31434 }, { "epoch": 1.4425680326740398, "grad_norm": 0.4690845310688019, "learning_rate": 5.466525972972919e-06, "loss": 0.372, "step": 31435 }, { "epoch": 1.4426139231792943, "grad_norm": 0.4751775562763214, "learning_rate": 5.4662818541237914e-06, "loss": 0.35, "step": 31436 }, { "epoch": 1.4426598136845485, "grad_norm": 0.4207060933113098, "learning_rate": 5.466037734153403e-06, "loss": 0.309, "step": 31437 }, { "epoch": 1.442705704189803, "grad_norm": 0.5052752494812012, "learning_rate": 5.4657936130623415e-06, "loss": 0.4932, "step": 31438 }, { "epoch": 1.4427515946950575, "grad_norm": 0.42586788535118103, "learning_rate": 5.465549490851192e-06, "loss": 0.2952, "step": 31439 }, { "epoch": 1.442797485200312, "grad_norm": 0.4814181923866272, "learning_rate": 5.465305367520544e-06, "loss": 0.3829, "step": 31440 }, { "epoch": 1.4428433757055665, "grad_norm": 0.4755573868751526, "learning_rate": 5.465061243070984e-06, "loss": 0.34, "step": 31441 }, { "epoch": 1.442889266210821, "grad_norm": 0.41971951723098755, "learning_rate": 5.464817117503098e-06, "loss": 0.3047, "step": 31442 }, { "epoch": 1.4429351567160755, "grad_norm": 0.48504501581192017, "learning_rate": 5.464572990817471e-06, "loss": 0.3744, "step": 31443 }, { "epoch": 1.44298104722133, "grad_norm": 0.4887520372867584, "learning_rate": 5.4643288630146966e-06, "loss": 0.3717, "step": 31444 }, { "epoch": 1.4430269377265843, "grad_norm": 0.48134979605674744, "learning_rate": 5.464084734095355e-06, "loss": 0.3691, "step": 31445 }, { "epoch": 1.4430728282318388, "grad_norm": 0.46565812826156616, "learning_rate": 5.463840604060038e-06, "loss": 0.3518, "step": 31446 }, { "epoch": 1.4431187187370933, "grad_norm": 0.41107267141342163, "learning_rate": 5.46359647290933e-06, "loss": 0.2712, "step": 31447 }, { "epoch": 1.4431646092423478, "grad_norm": 0.4478841722011566, "learning_rate": 5.463352340643818e-06, "loss": 0.318, "step": 31448 }, { "epoch": 1.4432104997476023, "grad_norm": 0.47210371494293213, "learning_rate": 5.4631082072640905e-06, "loss": 0.3609, "step": 31449 }, { "epoch": 1.4432563902528566, "grad_norm": 0.43305447697639465, "learning_rate": 5.462864072770734e-06, "loss": 0.2593, "step": 31450 }, { "epoch": 1.443302280758111, "grad_norm": 0.5071636438369751, "learning_rate": 5.462619937164335e-06, "loss": 0.4478, "step": 31451 }, { "epoch": 1.4433481712633656, "grad_norm": 0.4485457241535187, "learning_rate": 5.462375800445483e-06, "loss": 0.3333, "step": 31452 }, { "epoch": 1.44339406176862, "grad_norm": 0.4690832495689392, "learning_rate": 5.462131662614762e-06, "loss": 0.3258, "step": 31453 }, { "epoch": 1.4434399522738746, "grad_norm": 0.4451025128364563, "learning_rate": 5.461887523672758e-06, "loss": 0.3338, "step": 31454 }, { "epoch": 1.443485842779129, "grad_norm": 0.4559609889984131, "learning_rate": 5.4616433836200636e-06, "loss": 0.3326, "step": 31455 }, { "epoch": 1.4435317332843836, "grad_norm": 0.4676848351955414, "learning_rate": 5.46139924245726e-06, "loss": 0.3385, "step": 31456 }, { "epoch": 1.443577623789638, "grad_norm": 0.5128927230834961, "learning_rate": 5.461155100184938e-06, "loss": 0.3918, "step": 31457 }, { "epoch": 1.4436235142948923, "grad_norm": 0.47096022963523865, "learning_rate": 5.460910956803684e-06, "loss": 0.3908, "step": 31458 }, { "epoch": 1.4436694048001468, "grad_norm": 0.42631372809410095, "learning_rate": 5.460666812314084e-06, "loss": 0.2618, "step": 31459 }, { "epoch": 1.4437152953054013, "grad_norm": 0.4620649814605713, "learning_rate": 5.460422666716725e-06, "loss": 0.3385, "step": 31460 }, { "epoch": 1.4437611858106558, "grad_norm": 0.4816322326660156, "learning_rate": 5.460178520012196e-06, "loss": 0.3824, "step": 31461 }, { "epoch": 1.4438070763159103, "grad_norm": 0.47888118028640747, "learning_rate": 5.459934372201082e-06, "loss": 0.3892, "step": 31462 }, { "epoch": 1.4438529668211646, "grad_norm": 0.4660278260707855, "learning_rate": 5.459690223283972e-06, "loss": 0.359, "step": 31463 }, { "epoch": 1.443898857326419, "grad_norm": 0.45937272906303406, "learning_rate": 5.4594460732614515e-06, "loss": 0.3214, "step": 31464 }, { "epoch": 1.4439447478316736, "grad_norm": 0.46098822355270386, "learning_rate": 5.459201922134108e-06, "loss": 0.3603, "step": 31465 }, { "epoch": 1.443990638336928, "grad_norm": 0.4859407842159271, "learning_rate": 5.458957769902529e-06, "loss": 0.3799, "step": 31466 }, { "epoch": 1.4440365288421826, "grad_norm": 0.4993225038051605, "learning_rate": 5.458713616567301e-06, "loss": 0.3992, "step": 31467 }, { "epoch": 1.444082419347437, "grad_norm": 0.47318485379219055, "learning_rate": 5.458469462129013e-06, "loss": 0.3283, "step": 31468 }, { "epoch": 1.4441283098526916, "grad_norm": 0.44621405005455017, "learning_rate": 5.458225306588251e-06, "loss": 0.3268, "step": 31469 }, { "epoch": 1.4441742003579459, "grad_norm": 0.43175822496414185, "learning_rate": 5.457981149945599e-06, "loss": 0.3309, "step": 31470 }, { "epoch": 1.4442200908632004, "grad_norm": 0.46265751123428345, "learning_rate": 5.457736992201648e-06, "loss": 0.3633, "step": 31471 }, { "epoch": 1.4442659813684549, "grad_norm": 0.4149712920188904, "learning_rate": 5.457492833356986e-06, "loss": 0.2723, "step": 31472 }, { "epoch": 1.4443118718737094, "grad_norm": 0.4646666646003723, "learning_rate": 5.457248673412194e-06, "loss": 0.3783, "step": 31473 }, { "epoch": 1.4443577623789638, "grad_norm": 0.4596672058105469, "learning_rate": 5.457004512367867e-06, "loss": 0.3479, "step": 31474 }, { "epoch": 1.4444036528842181, "grad_norm": 0.4868861138820648, "learning_rate": 5.456760350224588e-06, "loss": 0.3715, "step": 31475 }, { "epoch": 1.4444495433894726, "grad_norm": 0.4642239511013031, "learning_rate": 5.456516186982943e-06, "loss": 0.3445, "step": 31476 }, { "epoch": 1.4444954338947271, "grad_norm": 0.4699695110321045, "learning_rate": 5.456272022643521e-06, "loss": 0.3801, "step": 31477 }, { "epoch": 1.4445413243999816, "grad_norm": 0.4731456935405731, "learning_rate": 5.4560278572069095e-06, "loss": 0.3329, "step": 31478 }, { "epoch": 1.4445872149052361, "grad_norm": 0.4280276596546173, "learning_rate": 5.455783690673694e-06, "loss": 0.3311, "step": 31479 }, { "epoch": 1.4446331054104906, "grad_norm": 0.4406444728374481, "learning_rate": 5.455539523044463e-06, "loss": 0.3103, "step": 31480 }, { "epoch": 1.4446789959157451, "grad_norm": 0.4373532235622406, "learning_rate": 5.455295354319803e-06, "loss": 0.3202, "step": 31481 }, { "epoch": 1.4447248864209996, "grad_norm": 0.4357433617115021, "learning_rate": 5.455051184500302e-06, "loss": 0.2938, "step": 31482 }, { "epoch": 1.4447707769262539, "grad_norm": 0.45730534195899963, "learning_rate": 5.454807013586545e-06, "loss": 0.3648, "step": 31483 }, { "epoch": 1.4448166674315084, "grad_norm": 0.46353477239608765, "learning_rate": 5.454562841579122e-06, "loss": 0.32, "step": 31484 }, { "epoch": 1.4448625579367629, "grad_norm": 0.4984106719493866, "learning_rate": 5.45431866847862e-06, "loss": 0.4263, "step": 31485 }, { "epoch": 1.4449084484420174, "grad_norm": 0.4432547986507416, "learning_rate": 5.454074494285624e-06, "loss": 0.2921, "step": 31486 }, { "epoch": 1.4449543389472719, "grad_norm": 0.4568859040737152, "learning_rate": 5.4538303190007206e-06, "loss": 0.3262, "step": 31487 }, { "epoch": 1.4450002294525262, "grad_norm": 0.4592471420764923, "learning_rate": 5.4535861426244984e-06, "loss": 0.3128, "step": 31488 }, { "epoch": 1.4450461199577807, "grad_norm": 0.4604605734348297, "learning_rate": 5.453341965157547e-06, "loss": 0.3492, "step": 31489 }, { "epoch": 1.4450920104630351, "grad_norm": 0.43592938780784607, "learning_rate": 5.453097786600451e-06, "loss": 0.3406, "step": 31490 }, { "epoch": 1.4451379009682896, "grad_norm": 0.4726647436618805, "learning_rate": 5.452853606953796e-06, "loss": 0.3332, "step": 31491 }, { "epoch": 1.4451837914735441, "grad_norm": 0.4880056381225586, "learning_rate": 5.452609426218173e-06, "loss": 0.3687, "step": 31492 }, { "epoch": 1.4452296819787986, "grad_norm": 0.47249162197113037, "learning_rate": 5.452365244394165e-06, "loss": 0.3713, "step": 31493 }, { "epoch": 1.4452755724840531, "grad_norm": 0.45968177914619446, "learning_rate": 5.452121061482363e-06, "loss": 0.3794, "step": 31494 }, { "epoch": 1.4453214629893076, "grad_norm": 0.4668162763118744, "learning_rate": 5.451876877483352e-06, "loss": 0.3412, "step": 31495 }, { "epoch": 1.445367353494562, "grad_norm": 0.41201239824295044, "learning_rate": 5.45163269239772e-06, "loss": 0.2639, "step": 31496 }, { "epoch": 1.4454132439998164, "grad_norm": 0.497307687997818, "learning_rate": 5.451388506226054e-06, "loss": 0.4233, "step": 31497 }, { "epoch": 1.445459134505071, "grad_norm": 0.4361111521720886, "learning_rate": 5.451144318968941e-06, "loss": 0.2984, "step": 31498 }, { "epoch": 1.4455050250103254, "grad_norm": 0.4693741798400879, "learning_rate": 5.450900130626967e-06, "loss": 0.3479, "step": 31499 }, { "epoch": 1.44555091551558, "grad_norm": 0.5227579474449158, "learning_rate": 5.450655941200722e-06, "loss": 0.4007, "step": 31500 }, { "epoch": 1.4455968060208342, "grad_norm": 0.5005751848220825, "learning_rate": 5.450411750690793e-06, "loss": 0.3959, "step": 31501 }, { "epoch": 1.4456426965260887, "grad_norm": 0.4914509057998657, "learning_rate": 5.450167559097763e-06, "loss": 0.3707, "step": 31502 }, { "epoch": 1.4456885870313432, "grad_norm": 0.46135854721069336, "learning_rate": 5.449923366422224e-06, "loss": 0.364, "step": 31503 }, { "epoch": 1.4457344775365977, "grad_norm": 0.4501906931400299, "learning_rate": 5.44967917266476e-06, "loss": 0.3078, "step": 31504 }, { "epoch": 1.4457803680418522, "grad_norm": 0.46237432956695557, "learning_rate": 5.44943497782596e-06, "loss": 0.3332, "step": 31505 }, { "epoch": 1.4458262585471067, "grad_norm": 0.48096054792404175, "learning_rate": 5.4491907819064106e-06, "loss": 0.3919, "step": 31506 }, { "epoch": 1.4458721490523612, "grad_norm": 0.45157212018966675, "learning_rate": 5.4489465849067005e-06, "loss": 0.3137, "step": 31507 }, { "epoch": 1.4459180395576157, "grad_norm": 0.47216635942459106, "learning_rate": 5.448702386827414e-06, "loss": 0.3668, "step": 31508 }, { "epoch": 1.44596393006287, "grad_norm": 0.47318604588508606, "learning_rate": 5.44845818766914e-06, "loss": 0.3304, "step": 31509 }, { "epoch": 1.4460098205681244, "grad_norm": 0.4905325174331665, "learning_rate": 5.448213987432466e-06, "loss": 0.4142, "step": 31510 }, { "epoch": 1.446055711073379, "grad_norm": 0.4934482276439667, "learning_rate": 5.4479697861179795e-06, "loss": 0.3646, "step": 31511 }, { "epoch": 1.4461016015786334, "grad_norm": 0.43877291679382324, "learning_rate": 5.447725583726267e-06, "loss": 0.3135, "step": 31512 }, { "epoch": 1.4461474920838877, "grad_norm": 0.44059619307518005, "learning_rate": 5.4474813802579145e-06, "loss": 0.3216, "step": 31513 }, { "epoch": 1.4461933825891422, "grad_norm": 0.48452332615852356, "learning_rate": 5.44723717571351e-06, "loss": 0.3824, "step": 31514 }, { "epoch": 1.4462392730943967, "grad_norm": 0.49147099256515503, "learning_rate": 5.446992970093643e-06, "loss": 0.3841, "step": 31515 }, { "epoch": 1.4462851635996512, "grad_norm": 0.49230459332466125, "learning_rate": 5.446748763398897e-06, "loss": 0.3908, "step": 31516 }, { "epoch": 1.4463310541049057, "grad_norm": 0.45573940873146057, "learning_rate": 5.446504555629864e-06, "loss": 0.357, "step": 31517 }, { "epoch": 1.4463769446101602, "grad_norm": 0.4334750473499298, "learning_rate": 5.446260346787126e-06, "loss": 0.3074, "step": 31518 }, { "epoch": 1.4464228351154147, "grad_norm": 0.4894486367702484, "learning_rate": 5.4460161368712735e-06, "loss": 0.3793, "step": 31519 }, { "epoch": 1.4464687256206692, "grad_norm": 0.5001718401908875, "learning_rate": 5.445771925882895e-06, "loss": 0.381, "step": 31520 }, { "epoch": 1.4465146161259235, "grad_norm": 0.48809054493904114, "learning_rate": 5.445527713822573e-06, "loss": 0.3563, "step": 31521 }, { "epoch": 1.446560506631178, "grad_norm": 0.482994019985199, "learning_rate": 5.4452835006908986e-06, "loss": 0.3875, "step": 31522 }, { "epoch": 1.4466063971364325, "grad_norm": 0.5072069764137268, "learning_rate": 5.4450392864884584e-06, "loss": 0.3783, "step": 31523 }, { "epoch": 1.446652287641687, "grad_norm": 0.4639210104942322, "learning_rate": 5.444795071215837e-06, "loss": 0.3638, "step": 31524 }, { "epoch": 1.4466981781469415, "grad_norm": 0.43786078691482544, "learning_rate": 5.444550854873625e-06, "loss": 0.3097, "step": 31525 }, { "epoch": 1.4467440686521957, "grad_norm": 0.45528531074523926, "learning_rate": 5.44430663746241e-06, "loss": 0.3035, "step": 31526 }, { "epoch": 1.4467899591574502, "grad_norm": 0.4688328504562378, "learning_rate": 5.444062418982775e-06, "loss": 0.3823, "step": 31527 }, { "epoch": 1.4468358496627047, "grad_norm": 0.4502474069595337, "learning_rate": 5.443818199435312e-06, "loss": 0.3171, "step": 31528 }, { "epoch": 1.4468817401679592, "grad_norm": 0.4974532723426819, "learning_rate": 5.4435739788206065e-06, "loss": 0.4246, "step": 31529 }, { "epoch": 1.4469276306732137, "grad_norm": 0.7088391184806824, "learning_rate": 5.443329757139245e-06, "loss": 0.2902, "step": 31530 }, { "epoch": 1.4469735211784682, "grad_norm": 0.5362744927406311, "learning_rate": 5.4430855343918144e-06, "loss": 0.5075, "step": 31531 }, { "epoch": 1.4470194116837227, "grad_norm": 0.4511568248271942, "learning_rate": 5.442841310578905e-06, "loss": 0.3422, "step": 31532 }, { "epoch": 1.4470653021889772, "grad_norm": 0.5412117838859558, "learning_rate": 5.4425970857011016e-06, "loss": 0.4104, "step": 31533 }, { "epoch": 1.4471111926942315, "grad_norm": 0.4334595203399658, "learning_rate": 5.442352859758991e-06, "loss": 0.2889, "step": 31534 }, { "epoch": 1.447157083199486, "grad_norm": 0.4764520525932312, "learning_rate": 5.4421086327531604e-06, "loss": 0.3629, "step": 31535 }, { "epoch": 1.4472029737047405, "grad_norm": 0.4808928966522217, "learning_rate": 5.441864404684198e-06, "loss": 0.3618, "step": 31536 }, { "epoch": 1.447248864209995, "grad_norm": 0.44103720784187317, "learning_rate": 5.441620175552693e-06, "loss": 0.3268, "step": 31537 }, { "epoch": 1.4472947547152495, "grad_norm": 0.5004603266716003, "learning_rate": 5.4413759453592305e-06, "loss": 0.4154, "step": 31538 }, { "epoch": 1.4473406452205038, "grad_norm": 0.45953577756881714, "learning_rate": 5.4411317141043984e-06, "loss": 0.3117, "step": 31539 }, { "epoch": 1.4473865357257583, "grad_norm": 0.4854617714881897, "learning_rate": 5.440887481788784e-06, "loss": 0.359, "step": 31540 }, { "epoch": 1.4474324262310128, "grad_norm": 0.4850221574306488, "learning_rate": 5.440643248412972e-06, "loss": 0.4112, "step": 31541 }, { "epoch": 1.4474783167362673, "grad_norm": 0.4714001715183258, "learning_rate": 5.440399013977553e-06, "loss": 0.3725, "step": 31542 }, { "epoch": 1.4475242072415218, "grad_norm": 0.4845878481864929, "learning_rate": 5.4401547784831155e-06, "loss": 0.4192, "step": 31543 }, { "epoch": 1.4475700977467763, "grad_norm": 0.514161229133606, "learning_rate": 5.439910541930243e-06, "loss": 0.4018, "step": 31544 }, { "epoch": 1.4476159882520307, "grad_norm": 0.48714280128479004, "learning_rate": 5.439666304319524e-06, "loss": 0.3926, "step": 31545 }, { "epoch": 1.4476618787572852, "grad_norm": 0.4493511915206909, "learning_rate": 5.439422065651547e-06, "loss": 0.3108, "step": 31546 }, { "epoch": 1.4477077692625395, "grad_norm": 0.5395213961601257, "learning_rate": 5.439177825926899e-06, "loss": 0.4787, "step": 31547 }, { "epoch": 1.447753659767794, "grad_norm": 0.4332595765590668, "learning_rate": 5.438933585146166e-06, "loss": 0.3108, "step": 31548 }, { "epoch": 1.4477995502730485, "grad_norm": 0.45727410912513733, "learning_rate": 5.438689343309937e-06, "loss": 0.3276, "step": 31549 }, { "epoch": 1.447845440778303, "grad_norm": 0.5744962096214294, "learning_rate": 5.4384451004187985e-06, "loss": 0.3841, "step": 31550 }, { "epoch": 1.4478913312835575, "grad_norm": 0.43793392181396484, "learning_rate": 5.438200856473339e-06, "loss": 0.2916, "step": 31551 }, { "epoch": 1.4479372217888118, "grad_norm": 0.4581752121448517, "learning_rate": 5.4379566114741425e-06, "loss": 0.3608, "step": 31552 }, { "epoch": 1.4479831122940663, "grad_norm": 0.47944313287734985, "learning_rate": 5.437712365421799e-06, "loss": 0.3882, "step": 31553 }, { "epoch": 1.4480290027993208, "grad_norm": 0.4713372588157654, "learning_rate": 5.437468118316897e-06, "loss": 0.3672, "step": 31554 }, { "epoch": 1.4480748933045753, "grad_norm": 0.463824599981308, "learning_rate": 5.437223870160021e-06, "loss": 0.357, "step": 31555 }, { "epoch": 1.4481207838098298, "grad_norm": 0.47978249192237854, "learning_rate": 5.43697962095176e-06, "loss": 0.3291, "step": 31556 }, { "epoch": 1.4481666743150843, "grad_norm": 0.45111119747161865, "learning_rate": 5.4367353706927e-06, "loss": 0.31, "step": 31557 }, { "epoch": 1.4482125648203388, "grad_norm": 0.4915436804294586, "learning_rate": 5.43649111938343e-06, "loss": 0.3713, "step": 31558 }, { "epoch": 1.448258455325593, "grad_norm": 0.44064241647720337, "learning_rate": 5.4362468670245364e-06, "loss": 0.2947, "step": 31559 }, { "epoch": 1.4483043458308476, "grad_norm": 0.4814739525318146, "learning_rate": 5.436002613616608e-06, "loss": 0.3938, "step": 31560 }, { "epoch": 1.448350236336102, "grad_norm": 0.5456748604774475, "learning_rate": 5.435758359160231e-06, "loss": 0.4717, "step": 31561 }, { "epoch": 1.4483961268413565, "grad_norm": 0.4593081474304199, "learning_rate": 5.43551410365599e-06, "loss": 0.3437, "step": 31562 }, { "epoch": 1.448442017346611, "grad_norm": 0.48476284742355347, "learning_rate": 5.435269847104477e-06, "loss": 0.4118, "step": 31563 }, { "epoch": 1.4484879078518653, "grad_norm": 0.4382380247116089, "learning_rate": 5.435025589506277e-06, "loss": 0.3213, "step": 31564 }, { "epoch": 1.4485337983571198, "grad_norm": 0.5273840427398682, "learning_rate": 5.434781330861978e-06, "loss": 0.4325, "step": 31565 }, { "epoch": 1.4485796888623743, "grad_norm": 0.43704164028167725, "learning_rate": 5.434537071172167e-06, "loss": 0.3017, "step": 31566 }, { "epoch": 1.4486255793676288, "grad_norm": 0.4042474627494812, "learning_rate": 5.4342928104374304e-06, "loss": 0.2748, "step": 31567 }, { "epoch": 1.4486714698728833, "grad_norm": 0.47312068939208984, "learning_rate": 5.434048548658358e-06, "loss": 0.384, "step": 31568 }, { "epoch": 1.4487173603781378, "grad_norm": 0.42987897992134094, "learning_rate": 5.433804285835536e-06, "loss": 0.2709, "step": 31569 }, { "epoch": 1.4487632508833923, "grad_norm": 0.4516170024871826, "learning_rate": 5.433560021969551e-06, "loss": 0.3073, "step": 31570 }, { "epoch": 1.4488091413886468, "grad_norm": 0.47726088762283325, "learning_rate": 5.433315757060991e-06, "loss": 0.3984, "step": 31571 }, { "epoch": 1.448855031893901, "grad_norm": 0.44944024085998535, "learning_rate": 5.4330714911104435e-06, "loss": 0.3167, "step": 31572 }, { "epoch": 1.4489009223991556, "grad_norm": 0.46468833088874817, "learning_rate": 5.432827224118495e-06, "loss": 0.3506, "step": 31573 }, { "epoch": 1.44894681290441, "grad_norm": 0.44470399618148804, "learning_rate": 5.432582956085735e-06, "loss": 0.2963, "step": 31574 }, { "epoch": 1.4489927034096646, "grad_norm": 0.44895508885383606, "learning_rate": 5.432338687012748e-06, "loss": 0.3317, "step": 31575 }, { "epoch": 1.449038593914919, "grad_norm": 0.5199688673019409, "learning_rate": 5.432094416900124e-06, "loss": 0.42, "step": 31576 }, { "epoch": 1.4490844844201733, "grad_norm": 0.43362748622894287, "learning_rate": 5.43185014574845e-06, "loss": 0.2725, "step": 31577 }, { "epoch": 1.4491303749254278, "grad_norm": 0.45270612835884094, "learning_rate": 5.431605873558311e-06, "loss": 0.363, "step": 31578 }, { "epoch": 1.4491762654306823, "grad_norm": 0.49447566270828247, "learning_rate": 5.431361600330296e-06, "loss": 0.4202, "step": 31579 }, { "epoch": 1.4492221559359368, "grad_norm": 0.40922412276268005, "learning_rate": 5.4311173260649945e-06, "loss": 0.2857, "step": 31580 }, { "epoch": 1.4492680464411913, "grad_norm": 0.49631357192993164, "learning_rate": 5.430873050762989e-06, "loss": 0.3942, "step": 31581 }, { "epoch": 1.4493139369464458, "grad_norm": 0.4973682463169098, "learning_rate": 5.430628774424873e-06, "loss": 0.4185, "step": 31582 }, { "epoch": 1.4493598274517003, "grad_norm": 0.4822652041912079, "learning_rate": 5.43038449705123e-06, "loss": 0.4198, "step": 31583 }, { "epoch": 1.4494057179569548, "grad_norm": 0.5054507851600647, "learning_rate": 5.430140218642646e-06, "loss": 0.4229, "step": 31584 }, { "epoch": 1.449451608462209, "grad_norm": 0.484998881816864, "learning_rate": 5.429895939199712e-06, "loss": 0.3969, "step": 31585 }, { "epoch": 1.4494974989674636, "grad_norm": 0.46955549716949463, "learning_rate": 5.429651658723014e-06, "loss": 0.349, "step": 31586 }, { "epoch": 1.449543389472718, "grad_norm": 0.44023317098617554, "learning_rate": 5.42940737721314e-06, "loss": 0.3327, "step": 31587 }, { "epoch": 1.4495892799779726, "grad_norm": 0.4595310688018799, "learning_rate": 5.429163094670675e-06, "loss": 0.3281, "step": 31588 }, { "epoch": 1.449635170483227, "grad_norm": 0.41622549295425415, "learning_rate": 5.42891881109621e-06, "loss": 0.2747, "step": 31589 }, { "epoch": 1.4496810609884814, "grad_norm": 0.47133758664131165, "learning_rate": 5.428674526490328e-06, "loss": 0.4077, "step": 31590 }, { "epoch": 1.4497269514937359, "grad_norm": 0.4732748568058014, "learning_rate": 5.428430240853621e-06, "loss": 0.3376, "step": 31591 }, { "epoch": 1.4497728419989904, "grad_norm": 0.47463351488113403, "learning_rate": 5.428185954186675e-06, "loss": 0.3604, "step": 31592 }, { "epoch": 1.4498187325042449, "grad_norm": 0.4565639793872833, "learning_rate": 5.427941666490077e-06, "loss": 0.3554, "step": 31593 }, { "epoch": 1.4498646230094994, "grad_norm": 0.43262723088264465, "learning_rate": 5.427697377764414e-06, "loss": 0.3041, "step": 31594 }, { "epoch": 1.4499105135147539, "grad_norm": 0.4597509205341339, "learning_rate": 5.427453088010273e-06, "loss": 0.3314, "step": 31595 }, { "epoch": 1.4499564040200084, "grad_norm": 0.4963235855102539, "learning_rate": 5.4272087972282414e-06, "loss": 0.3883, "step": 31596 }, { "epoch": 1.4500022945252629, "grad_norm": 0.497183620929718, "learning_rate": 5.426964505418909e-06, "loss": 0.4004, "step": 31597 }, { "epoch": 1.4500481850305171, "grad_norm": 0.537064790725708, "learning_rate": 5.426720212582862e-06, "loss": 0.4417, "step": 31598 }, { "epoch": 1.4500940755357716, "grad_norm": 0.5430310964584351, "learning_rate": 5.426475918720686e-06, "loss": 0.3434, "step": 31599 }, { "epoch": 1.4501399660410261, "grad_norm": 0.499609112739563, "learning_rate": 5.426231623832971e-06, "loss": 0.4599, "step": 31600 }, { "epoch": 1.4501858565462806, "grad_norm": 0.4501103460788727, "learning_rate": 5.4259873279203035e-06, "loss": 0.321, "step": 31601 }, { "epoch": 1.450231747051535, "grad_norm": 0.4918352961540222, "learning_rate": 5.4257430309832714e-06, "loss": 0.3589, "step": 31602 }, { "epoch": 1.4502776375567894, "grad_norm": 0.45906490087509155, "learning_rate": 5.42549873302246e-06, "loss": 0.3443, "step": 31603 }, { "epoch": 1.450323528062044, "grad_norm": 0.4886839687824249, "learning_rate": 5.4252544340384604e-06, "loss": 0.3787, "step": 31604 }, { "epoch": 1.4503694185672984, "grad_norm": 0.47225892543792725, "learning_rate": 5.425010134031858e-06, "loss": 0.3265, "step": 31605 }, { "epoch": 1.450415309072553, "grad_norm": 0.4561483561992645, "learning_rate": 5.424765833003239e-06, "loss": 0.2961, "step": 31606 }, { "epoch": 1.4504611995778074, "grad_norm": 0.4830673336982727, "learning_rate": 5.424521530953193e-06, "loss": 0.4097, "step": 31607 }, { "epoch": 1.450507090083062, "grad_norm": 0.4718051552772522, "learning_rate": 5.424277227882307e-06, "loss": 0.3467, "step": 31608 }, { "epoch": 1.4505529805883164, "grad_norm": 0.46575626730918884, "learning_rate": 5.424032923791168e-06, "loss": 0.3914, "step": 31609 }, { "epoch": 1.4505988710935707, "grad_norm": 0.4473998248577118, "learning_rate": 5.423788618680364e-06, "loss": 0.3269, "step": 31610 }, { "epoch": 1.4506447615988252, "grad_norm": 0.4723009765148163, "learning_rate": 5.423544312550481e-06, "loss": 0.4364, "step": 31611 }, { "epoch": 1.4506906521040797, "grad_norm": 0.5076924562454224, "learning_rate": 5.423300005402108e-06, "loss": 0.4052, "step": 31612 }, { "epoch": 1.4507365426093342, "grad_norm": 0.4647425413131714, "learning_rate": 5.423055697235832e-06, "loss": 0.3597, "step": 31613 }, { "epoch": 1.4507824331145887, "grad_norm": 0.46228864789009094, "learning_rate": 5.42281138805224e-06, "loss": 0.2995, "step": 31614 }, { "epoch": 1.450828323619843, "grad_norm": 0.47446712851524353, "learning_rate": 5.422567077851922e-06, "loss": 0.3526, "step": 31615 }, { "epoch": 1.4508742141250974, "grad_norm": 0.4463285207748413, "learning_rate": 5.422322766635463e-06, "loss": 0.3148, "step": 31616 }, { "epoch": 1.450920104630352, "grad_norm": 0.4402204751968384, "learning_rate": 5.4220784544034495e-06, "loss": 0.3053, "step": 31617 }, { "epoch": 1.4509659951356064, "grad_norm": 0.5054766535758972, "learning_rate": 5.421834141156471e-06, "loss": 0.4464, "step": 31618 }, { "epoch": 1.451011885640861, "grad_norm": 0.47673550248146057, "learning_rate": 5.4215898268951165e-06, "loss": 0.3626, "step": 31619 }, { "epoch": 1.4510577761461154, "grad_norm": 0.5053106546401978, "learning_rate": 5.42134551161997e-06, "loss": 0.3768, "step": 31620 }, { "epoch": 1.45110366665137, "grad_norm": 0.49903103709220886, "learning_rate": 5.421101195331619e-06, "loss": 0.4187, "step": 31621 }, { "epoch": 1.4511495571566244, "grad_norm": 0.5214585065841675, "learning_rate": 5.420856878030655e-06, "loss": 0.4405, "step": 31622 }, { "epoch": 1.4511954476618787, "grad_norm": 0.4655395746231079, "learning_rate": 5.420612559717662e-06, "loss": 0.3906, "step": 31623 }, { "epoch": 1.4512413381671332, "grad_norm": 0.47630926966667175, "learning_rate": 5.420368240393228e-06, "loss": 0.3315, "step": 31624 }, { "epoch": 1.4512872286723877, "grad_norm": 0.4782724380493164, "learning_rate": 5.4201239200579425e-06, "loss": 0.3705, "step": 31625 }, { "epoch": 1.4513331191776422, "grad_norm": 0.47093111276626587, "learning_rate": 5.419879598712391e-06, "loss": 0.3573, "step": 31626 }, { "epoch": 1.4513790096828967, "grad_norm": 0.4574928283691406, "learning_rate": 5.41963527635716e-06, "loss": 0.3442, "step": 31627 }, { "epoch": 1.451424900188151, "grad_norm": 0.45991677045822144, "learning_rate": 5.4193909529928416e-06, "loss": 0.3417, "step": 31628 }, { "epoch": 1.4514707906934055, "grad_norm": 0.46784305572509766, "learning_rate": 5.419146628620018e-06, "loss": 0.3367, "step": 31629 }, { "epoch": 1.45151668119866, "grad_norm": 0.4615319073200226, "learning_rate": 5.418902303239279e-06, "loss": 0.4017, "step": 31630 }, { "epoch": 1.4515625717039145, "grad_norm": 0.5220027565956116, "learning_rate": 5.418657976851213e-06, "loss": 0.455, "step": 31631 }, { "epoch": 1.451608462209169, "grad_norm": 0.4927975535392761, "learning_rate": 5.418413649456406e-06, "loss": 0.3506, "step": 31632 }, { "epoch": 1.4516543527144234, "grad_norm": 0.4587079882621765, "learning_rate": 5.4181693210554485e-06, "loss": 0.3428, "step": 31633 }, { "epoch": 1.451700243219678, "grad_norm": 0.44178688526153564, "learning_rate": 5.417924991648923e-06, "loss": 0.3466, "step": 31634 }, { "epoch": 1.4517461337249324, "grad_norm": 0.47204509377479553, "learning_rate": 5.4176806612374215e-06, "loss": 0.3405, "step": 31635 }, { "epoch": 1.4517920242301867, "grad_norm": 0.4837011992931366, "learning_rate": 5.417436329821528e-06, "loss": 0.4131, "step": 31636 }, { "epoch": 1.4518379147354412, "grad_norm": 0.48836588859558105, "learning_rate": 5.417191997401834e-06, "loss": 0.4062, "step": 31637 }, { "epoch": 1.4518838052406957, "grad_norm": 0.45542967319488525, "learning_rate": 5.416947663978924e-06, "loss": 0.3178, "step": 31638 }, { "epoch": 1.4519296957459502, "grad_norm": 0.42953771352767944, "learning_rate": 5.416703329553387e-06, "loss": 0.322, "step": 31639 }, { "epoch": 1.4519755862512047, "grad_norm": 0.46427950263023376, "learning_rate": 5.416458994125808e-06, "loss": 0.3411, "step": 31640 }, { "epoch": 1.452021476756459, "grad_norm": 0.4875447452068329, "learning_rate": 5.416214657696778e-06, "loss": 0.3666, "step": 31641 }, { "epoch": 1.4520673672617135, "grad_norm": 0.513375461101532, "learning_rate": 5.415970320266884e-06, "loss": 0.4459, "step": 31642 }, { "epoch": 1.452113257766968, "grad_norm": 0.4543962776660919, "learning_rate": 5.4157259818367125e-06, "loss": 0.366, "step": 31643 }, { "epoch": 1.4521591482722225, "grad_norm": 0.45241397619247437, "learning_rate": 5.415481642406849e-06, "loss": 0.3233, "step": 31644 }, { "epoch": 1.452205038777477, "grad_norm": 0.5078842043876648, "learning_rate": 5.415237301977886e-06, "loss": 0.4535, "step": 31645 }, { "epoch": 1.4522509292827315, "grad_norm": 0.4627383053302765, "learning_rate": 5.414992960550406e-06, "loss": 0.3397, "step": 31646 }, { "epoch": 1.452296819787986, "grad_norm": 0.5406371355056763, "learning_rate": 5.414748618125001e-06, "loss": 0.4739, "step": 31647 }, { "epoch": 1.4523427102932402, "grad_norm": 0.4885638356208801, "learning_rate": 5.414504274702256e-06, "loss": 0.3873, "step": 31648 }, { "epoch": 1.4523886007984947, "grad_norm": 0.5156583786010742, "learning_rate": 5.414259930282758e-06, "loss": 0.415, "step": 31649 }, { "epoch": 1.4524344913037492, "grad_norm": 0.44994184374809265, "learning_rate": 5.414015584867096e-06, "loss": 0.3301, "step": 31650 }, { "epoch": 1.4524803818090037, "grad_norm": 0.45658576488494873, "learning_rate": 5.413771238455857e-06, "loss": 0.3248, "step": 31651 }, { "epoch": 1.4525262723142582, "grad_norm": 0.5044172406196594, "learning_rate": 5.413526891049629e-06, "loss": 0.3986, "step": 31652 }, { "epoch": 1.4525721628195125, "grad_norm": 0.4753064215183258, "learning_rate": 5.413282542649001e-06, "loss": 0.3965, "step": 31653 }, { "epoch": 1.452618053324767, "grad_norm": 0.4664745628833771, "learning_rate": 5.413038193254556e-06, "loss": 0.3506, "step": 31654 }, { "epoch": 1.4526639438300215, "grad_norm": 0.4888538122177124, "learning_rate": 5.412793842866886e-06, "loss": 0.3906, "step": 31655 }, { "epoch": 1.452709834335276, "grad_norm": 0.43374478816986084, "learning_rate": 5.4125494914865776e-06, "loss": 0.2834, "step": 31656 }, { "epoch": 1.4527557248405305, "grad_norm": 0.4544423520565033, "learning_rate": 5.412305139114216e-06, "loss": 0.3292, "step": 31657 }, { "epoch": 1.452801615345785, "grad_norm": 0.4773028492927551, "learning_rate": 5.412060785750393e-06, "loss": 0.3658, "step": 31658 }, { "epoch": 1.4528475058510395, "grad_norm": 0.47520604729652405, "learning_rate": 5.4118164313956936e-06, "loss": 0.3688, "step": 31659 }, { "epoch": 1.452893396356294, "grad_norm": 0.4983828663825989, "learning_rate": 5.411572076050704e-06, "loss": 0.3931, "step": 31660 }, { "epoch": 1.4529392868615483, "grad_norm": 0.4865405559539795, "learning_rate": 5.411327719716013e-06, "loss": 0.346, "step": 31661 }, { "epoch": 1.4529851773668028, "grad_norm": 0.4596133828163147, "learning_rate": 5.411083362392211e-06, "loss": 0.323, "step": 31662 }, { "epoch": 1.4530310678720573, "grad_norm": 0.463183730840683, "learning_rate": 5.4108390040798815e-06, "loss": 0.3429, "step": 31663 }, { "epoch": 1.4530769583773118, "grad_norm": 0.4940626621246338, "learning_rate": 5.410594644779615e-06, "loss": 0.3925, "step": 31664 }, { "epoch": 1.4531228488825663, "grad_norm": 0.4246724247932434, "learning_rate": 5.410350284491998e-06, "loss": 0.3208, "step": 31665 }, { "epoch": 1.4531687393878205, "grad_norm": 0.45821335911750793, "learning_rate": 5.410105923217617e-06, "loss": 0.3585, "step": 31666 }, { "epoch": 1.453214629893075, "grad_norm": 0.4330706298351288, "learning_rate": 5.409861560957062e-06, "loss": 0.2908, "step": 31667 }, { "epoch": 1.4532605203983295, "grad_norm": 0.44777190685272217, "learning_rate": 5.409617197710918e-06, "loss": 0.3318, "step": 31668 }, { "epoch": 1.453306410903584, "grad_norm": 0.44717422127723694, "learning_rate": 5.409372833479775e-06, "loss": 0.3287, "step": 31669 }, { "epoch": 1.4533523014088385, "grad_norm": 0.4453507959842682, "learning_rate": 5.40912846826422e-06, "loss": 0.2963, "step": 31670 }, { "epoch": 1.453398191914093, "grad_norm": 0.4331429600715637, "learning_rate": 5.408884102064837e-06, "loss": 0.2897, "step": 31671 }, { "epoch": 1.4534440824193475, "grad_norm": 0.48981642723083496, "learning_rate": 5.408639734882219e-06, "loss": 0.3694, "step": 31672 }, { "epoch": 1.453489972924602, "grad_norm": 0.4838477373123169, "learning_rate": 5.408395366716952e-06, "loss": 0.3563, "step": 31673 }, { "epoch": 1.4535358634298563, "grad_norm": 0.4682222902774811, "learning_rate": 5.408150997569622e-06, "loss": 0.366, "step": 31674 }, { "epoch": 1.4535817539351108, "grad_norm": 0.42475154995918274, "learning_rate": 5.407906627440819e-06, "loss": 0.2972, "step": 31675 }, { "epoch": 1.4536276444403653, "grad_norm": 0.4580738842487335, "learning_rate": 5.407662256331129e-06, "loss": 0.3021, "step": 31676 }, { "epoch": 1.4536735349456198, "grad_norm": 0.48073810338974, "learning_rate": 5.407417884241139e-06, "loss": 0.3635, "step": 31677 }, { "epoch": 1.4537194254508743, "grad_norm": 0.5158578753471375, "learning_rate": 5.407173511171436e-06, "loss": 0.4804, "step": 31678 }, { "epoch": 1.4537653159561286, "grad_norm": 0.4345202147960663, "learning_rate": 5.406929137122611e-06, "loss": 0.315, "step": 31679 }, { "epoch": 1.453811206461383, "grad_norm": 0.48770198225975037, "learning_rate": 5.406684762095251e-06, "loss": 0.3861, "step": 31680 }, { "epoch": 1.4538570969666376, "grad_norm": 0.4548685848712921, "learning_rate": 5.406440386089942e-06, "loss": 0.3156, "step": 31681 }, { "epoch": 1.453902987471892, "grad_norm": 0.4704999029636383, "learning_rate": 5.406196009107272e-06, "loss": 0.3198, "step": 31682 }, { "epoch": 1.4539488779771466, "grad_norm": 0.44958823919296265, "learning_rate": 5.4059516311478276e-06, "loss": 0.3172, "step": 31683 }, { "epoch": 1.453994768482401, "grad_norm": 0.46236634254455566, "learning_rate": 5.405707252212199e-06, "loss": 0.3178, "step": 31684 }, { "epoch": 1.4540406589876556, "grad_norm": 0.4707605540752411, "learning_rate": 5.4054628723009705e-06, "loss": 0.3904, "step": 31685 }, { "epoch": 1.45408654949291, "grad_norm": 0.43966299295425415, "learning_rate": 5.405218491414734e-06, "loss": 0.3162, "step": 31686 }, { "epoch": 1.4541324399981643, "grad_norm": 0.5429790616035461, "learning_rate": 5.4049741095540755e-06, "loss": 0.3775, "step": 31687 }, { "epoch": 1.4541783305034188, "grad_norm": 0.5067269802093506, "learning_rate": 5.40472972671958e-06, "loss": 0.4017, "step": 31688 }, { "epoch": 1.4542242210086733, "grad_norm": 0.48501306772232056, "learning_rate": 5.404485342911837e-06, "loss": 0.3803, "step": 31689 }, { "epoch": 1.4542701115139278, "grad_norm": 0.4812372326850891, "learning_rate": 5.404240958131436e-06, "loss": 0.3623, "step": 31690 }, { "epoch": 1.454316002019182, "grad_norm": 0.4632980525493622, "learning_rate": 5.403996572378963e-06, "loss": 0.3815, "step": 31691 }, { "epoch": 1.4543618925244366, "grad_norm": 0.42121750116348267, "learning_rate": 5.403752185655005e-06, "loss": 0.2948, "step": 31692 }, { "epoch": 1.454407783029691, "grad_norm": 0.5320842266082764, "learning_rate": 5.40350779796015e-06, "loss": 0.4286, "step": 31693 }, { "epoch": 1.4544536735349456, "grad_norm": 0.47411030530929565, "learning_rate": 5.403263409294986e-06, "loss": 0.3627, "step": 31694 }, { "epoch": 1.4544995640402, "grad_norm": 0.46880394220352173, "learning_rate": 5.4030190196601015e-06, "loss": 0.3701, "step": 31695 }, { "epoch": 1.4545454545454546, "grad_norm": 0.4768487215042114, "learning_rate": 5.402774629056084e-06, "loss": 0.388, "step": 31696 }, { "epoch": 1.454591345050709, "grad_norm": 0.486568808555603, "learning_rate": 5.402530237483521e-06, "loss": 0.3951, "step": 31697 }, { "epoch": 1.4546372355559636, "grad_norm": 0.42959803342819214, "learning_rate": 5.402285844942999e-06, "loss": 0.2645, "step": 31698 }, { "epoch": 1.4546831260612179, "grad_norm": 0.46651342511177063, "learning_rate": 5.402041451435106e-06, "loss": 0.4098, "step": 31699 }, { "epoch": 1.4547290165664724, "grad_norm": 0.5194833874702454, "learning_rate": 5.4017970569604295e-06, "loss": 0.4452, "step": 31700 }, { "epoch": 1.4547749070717269, "grad_norm": 0.48392850160598755, "learning_rate": 5.40155266151956e-06, "loss": 0.3941, "step": 31701 }, { "epoch": 1.4548207975769814, "grad_norm": 0.48023393750190735, "learning_rate": 5.401308265113082e-06, "loss": 0.3736, "step": 31702 }, { "epoch": 1.4548666880822358, "grad_norm": 0.4706224203109741, "learning_rate": 5.401063867741584e-06, "loss": 0.3447, "step": 31703 }, { "epoch": 1.4549125785874901, "grad_norm": 0.46051982045173645, "learning_rate": 5.4008194694056545e-06, "loss": 0.3691, "step": 31704 }, { "epoch": 1.4549584690927446, "grad_norm": 0.45851796865463257, "learning_rate": 5.40057507010588e-06, "loss": 0.3666, "step": 31705 }, { "epoch": 1.4550043595979991, "grad_norm": 0.4726441502571106, "learning_rate": 5.400330669842849e-06, "loss": 0.3435, "step": 31706 }, { "epoch": 1.4550502501032536, "grad_norm": 0.4611124098300934, "learning_rate": 5.400086268617151e-06, "loss": 0.3733, "step": 31707 }, { "epoch": 1.4550961406085081, "grad_norm": 0.4720187783241272, "learning_rate": 5.39984186642937e-06, "loss": 0.3392, "step": 31708 }, { "epoch": 1.4551420311137626, "grad_norm": 0.43098124861717224, "learning_rate": 5.399597463280095e-06, "loss": 0.3112, "step": 31709 }, { "epoch": 1.455187921619017, "grad_norm": 0.452161580324173, "learning_rate": 5.399353059169916e-06, "loss": 0.3045, "step": 31710 }, { "epoch": 1.4552338121242716, "grad_norm": 0.4700487554073334, "learning_rate": 5.399108654099418e-06, "loss": 0.3659, "step": 31711 }, { "epoch": 1.4552797026295259, "grad_norm": 0.4566735625267029, "learning_rate": 5.398864248069189e-06, "loss": 0.3804, "step": 31712 }, { "epoch": 1.4553255931347804, "grad_norm": 0.5041570663452148, "learning_rate": 5.398619841079818e-06, "loss": 0.417, "step": 31713 }, { "epoch": 1.4553714836400349, "grad_norm": 0.5136959552764893, "learning_rate": 5.3983754331318915e-06, "loss": 0.4331, "step": 31714 }, { "epoch": 1.4554173741452894, "grad_norm": 0.40874212980270386, "learning_rate": 5.398131024226e-06, "loss": 0.2639, "step": 31715 }, { "epoch": 1.4554632646505439, "grad_norm": 0.46759033203125, "learning_rate": 5.397886614362726e-06, "loss": 0.3704, "step": 31716 }, { "epoch": 1.4555091551557982, "grad_norm": 0.47300708293914795, "learning_rate": 5.3976422035426615e-06, "loss": 0.3936, "step": 31717 }, { "epoch": 1.4555550456610526, "grad_norm": 0.45352593064308167, "learning_rate": 5.397397791766393e-06, "loss": 0.3832, "step": 31718 }, { "epoch": 1.4556009361663071, "grad_norm": 0.5446633100509644, "learning_rate": 5.39715337903451e-06, "loss": 0.4925, "step": 31719 }, { "epoch": 1.4556468266715616, "grad_norm": 0.4448098838329315, "learning_rate": 5.396908965347596e-06, "loss": 0.2956, "step": 31720 }, { "epoch": 1.4556927171768161, "grad_norm": 0.44262444972991943, "learning_rate": 5.396664550706242e-06, "loss": 0.3123, "step": 31721 }, { "epoch": 1.4557386076820706, "grad_norm": 0.4554646611213684, "learning_rate": 5.396420135111035e-06, "loss": 0.3376, "step": 31722 }, { "epoch": 1.4557844981873251, "grad_norm": 0.4622972309589386, "learning_rate": 5.396175718562563e-06, "loss": 0.3306, "step": 31723 }, { "epoch": 1.4558303886925796, "grad_norm": 0.4878419041633606, "learning_rate": 5.3959313010614135e-06, "loss": 0.3912, "step": 31724 }, { "epoch": 1.455876279197834, "grad_norm": 0.44706910848617554, "learning_rate": 5.395686882608173e-06, "loss": 0.316, "step": 31725 }, { "epoch": 1.4559221697030884, "grad_norm": 0.44238710403442383, "learning_rate": 5.395442463203431e-06, "loss": 0.3369, "step": 31726 }, { "epoch": 1.455968060208343, "grad_norm": 0.4819006621837616, "learning_rate": 5.395198042847776e-06, "loss": 0.3574, "step": 31727 }, { "epoch": 1.4560139507135974, "grad_norm": 0.5023280382156372, "learning_rate": 5.394953621541792e-06, "loss": 0.3969, "step": 31728 }, { "epoch": 1.456059841218852, "grad_norm": 0.5112073421478271, "learning_rate": 5.394709199286071e-06, "loss": 0.4437, "step": 31729 }, { "epoch": 1.4561057317241062, "grad_norm": 0.49302011728286743, "learning_rate": 5.3944647760812e-06, "loss": 0.3514, "step": 31730 }, { "epoch": 1.4561516222293607, "grad_norm": 0.45961514115333557, "learning_rate": 5.394220351927763e-06, "loss": 0.3285, "step": 31731 }, { "epoch": 1.4561975127346152, "grad_norm": 0.4241871237754822, "learning_rate": 5.393975926826352e-06, "loss": 0.3173, "step": 31732 }, { "epoch": 1.4562434032398697, "grad_norm": 0.46221402287483215, "learning_rate": 5.3937315007775526e-06, "loss": 0.3309, "step": 31733 }, { "epoch": 1.4562892937451242, "grad_norm": 0.44979017972946167, "learning_rate": 5.393487073781953e-06, "loss": 0.3167, "step": 31734 }, { "epoch": 1.4563351842503787, "grad_norm": 0.4798332154750824, "learning_rate": 5.393242645840143e-06, "loss": 0.3991, "step": 31735 }, { "epoch": 1.4563810747556332, "grad_norm": 0.4460448622703552, "learning_rate": 5.392998216952706e-06, "loss": 0.3168, "step": 31736 }, { "epoch": 1.4564269652608874, "grad_norm": 0.4373162090778351, "learning_rate": 5.392753787120234e-06, "loss": 0.2747, "step": 31737 }, { "epoch": 1.456472855766142, "grad_norm": 0.49573132395744324, "learning_rate": 5.392509356343313e-06, "loss": 0.398, "step": 31738 }, { "epoch": 1.4565187462713964, "grad_norm": 0.4487608075141907, "learning_rate": 5.39226492462253e-06, "loss": 0.3212, "step": 31739 }, { "epoch": 1.456564636776651, "grad_norm": 0.482695609331131, "learning_rate": 5.392020491958474e-06, "loss": 0.3853, "step": 31740 }, { "epoch": 1.4566105272819054, "grad_norm": 0.47517502307891846, "learning_rate": 5.391776058351735e-06, "loss": 0.3778, "step": 31741 }, { "epoch": 1.4566564177871597, "grad_norm": 0.46444737911224365, "learning_rate": 5.391531623802893e-06, "loss": 0.3443, "step": 31742 }, { "epoch": 1.4567023082924142, "grad_norm": 0.5038636326789856, "learning_rate": 5.391287188312544e-06, "loss": 0.4268, "step": 31743 }, { "epoch": 1.4567481987976687, "grad_norm": 0.4671814441680908, "learning_rate": 5.391042751881274e-06, "loss": 0.3648, "step": 31744 }, { "epoch": 1.4567940893029232, "grad_norm": 0.5118071436882019, "learning_rate": 5.390798314509669e-06, "loss": 0.4689, "step": 31745 }, { "epoch": 1.4568399798081777, "grad_norm": 0.4481141269207001, "learning_rate": 5.3905538761983155e-06, "loss": 0.3426, "step": 31746 }, { "epoch": 1.4568858703134322, "grad_norm": 0.4869726300239563, "learning_rate": 5.390309436947805e-06, "loss": 0.399, "step": 31747 }, { "epoch": 1.4569317608186867, "grad_norm": 0.45881137251853943, "learning_rate": 5.390064996758723e-06, "loss": 0.3683, "step": 31748 }, { "epoch": 1.4569776513239412, "grad_norm": 0.49615198373794556, "learning_rate": 5.389820555631658e-06, "loss": 0.3428, "step": 31749 }, { "epoch": 1.4570235418291955, "grad_norm": 0.463264137506485, "learning_rate": 5.389576113567196e-06, "loss": 0.3802, "step": 31750 }, { "epoch": 1.45706943233445, "grad_norm": 0.5372828245162964, "learning_rate": 5.389331670565928e-06, "loss": 0.453, "step": 31751 }, { "epoch": 1.4571153228397045, "grad_norm": 0.4853935241699219, "learning_rate": 5.38908722662844e-06, "loss": 0.393, "step": 31752 }, { "epoch": 1.457161213344959, "grad_norm": 0.4778353273868561, "learning_rate": 5.3888427817553205e-06, "loss": 0.3854, "step": 31753 }, { "epoch": 1.4572071038502135, "grad_norm": 0.4788605570793152, "learning_rate": 5.388598335947155e-06, "loss": 0.3577, "step": 31754 }, { "epoch": 1.4572529943554677, "grad_norm": 0.580269992351532, "learning_rate": 5.3883538892045354e-06, "loss": 0.3927, "step": 31755 }, { "epoch": 1.4572988848607222, "grad_norm": 0.43341192603111267, "learning_rate": 5.388109441528046e-06, "loss": 0.3225, "step": 31756 }, { "epoch": 1.4573447753659767, "grad_norm": 0.4422518312931061, "learning_rate": 5.387864992918276e-06, "loss": 0.2914, "step": 31757 }, { "epoch": 1.4573906658712312, "grad_norm": 0.5456418991088867, "learning_rate": 5.387620543375814e-06, "loss": 0.3489, "step": 31758 }, { "epoch": 1.4574365563764857, "grad_norm": 0.454874187707901, "learning_rate": 5.387376092901245e-06, "loss": 0.3548, "step": 31759 }, { "epoch": 1.4574824468817402, "grad_norm": 0.5220717787742615, "learning_rate": 5.387131641495159e-06, "loss": 0.4282, "step": 31760 }, { "epoch": 1.4575283373869947, "grad_norm": 0.5027706623077393, "learning_rate": 5.386887189158145e-06, "loss": 0.4412, "step": 31761 }, { "epoch": 1.4575742278922492, "grad_norm": 0.5147600769996643, "learning_rate": 5.386642735890791e-06, "loss": 0.3795, "step": 31762 }, { "epoch": 1.4576201183975035, "grad_norm": 0.5040339827537537, "learning_rate": 5.38639828169368e-06, "loss": 0.4402, "step": 31763 }, { "epoch": 1.457666008902758, "grad_norm": 0.4829963445663452, "learning_rate": 5.386153826567404e-06, "loss": 0.3577, "step": 31764 }, { "epoch": 1.4577118994080125, "grad_norm": 0.49548453092575073, "learning_rate": 5.38590937051255e-06, "loss": 0.4109, "step": 31765 }, { "epoch": 1.457757789913267, "grad_norm": 0.4876136779785156, "learning_rate": 5.385664913529706e-06, "loss": 0.3866, "step": 31766 }, { "epoch": 1.4578036804185215, "grad_norm": 0.46445268392562866, "learning_rate": 5.38542045561946e-06, "loss": 0.3495, "step": 31767 }, { "epoch": 1.4578495709237758, "grad_norm": 0.4918076694011688, "learning_rate": 5.385175996782399e-06, "loss": 0.3771, "step": 31768 }, { "epoch": 1.4578954614290303, "grad_norm": 0.46131467819213867, "learning_rate": 5.384931537019111e-06, "loss": 0.3647, "step": 31769 }, { "epoch": 1.4579413519342848, "grad_norm": 0.43847426772117615, "learning_rate": 5.384687076330184e-06, "loss": 0.2933, "step": 31770 }, { "epoch": 1.4579872424395393, "grad_norm": 0.47957944869995117, "learning_rate": 5.384442614716206e-06, "loss": 0.3892, "step": 31771 }, { "epoch": 1.4580331329447938, "grad_norm": 0.48010751605033875, "learning_rate": 5.384198152177765e-06, "loss": 0.4048, "step": 31772 }, { "epoch": 1.4580790234500483, "grad_norm": 0.44766220450401306, "learning_rate": 5.38395368871545e-06, "loss": 0.3355, "step": 31773 }, { "epoch": 1.4581249139553027, "grad_norm": 0.4655357599258423, "learning_rate": 5.383709224329846e-06, "loss": 0.3722, "step": 31774 }, { "epoch": 1.4581708044605572, "grad_norm": 0.46345436573028564, "learning_rate": 5.383464759021544e-06, "loss": 0.341, "step": 31775 }, { "epoch": 1.4582166949658115, "grad_norm": 0.4936167299747467, "learning_rate": 5.383220292791128e-06, "loss": 0.4237, "step": 31776 }, { "epoch": 1.458262585471066, "grad_norm": 0.46812498569488525, "learning_rate": 5.3829758256391895e-06, "loss": 0.34, "step": 31777 }, { "epoch": 1.4583084759763205, "grad_norm": 0.4803893566131592, "learning_rate": 5.3827313575663155e-06, "loss": 0.3865, "step": 31778 }, { "epoch": 1.458354366481575, "grad_norm": 0.44903597235679626, "learning_rate": 5.382486888573093e-06, "loss": 0.33, "step": 31779 }, { "epoch": 1.4584002569868293, "grad_norm": 0.4652249217033386, "learning_rate": 5.38224241866011e-06, "loss": 0.3265, "step": 31780 }, { "epoch": 1.4584461474920838, "grad_norm": 0.5255602598190308, "learning_rate": 5.381997947827954e-06, "loss": 0.4483, "step": 31781 }, { "epoch": 1.4584920379973383, "grad_norm": 0.4681764841079712, "learning_rate": 5.381753476077214e-06, "loss": 0.3246, "step": 31782 }, { "epoch": 1.4585379285025928, "grad_norm": 0.45895472168922424, "learning_rate": 5.381509003408477e-06, "loss": 0.3661, "step": 31783 }, { "epoch": 1.4585838190078473, "grad_norm": 0.48549172282218933, "learning_rate": 5.3812645298223335e-06, "loss": 0.4143, "step": 31784 }, { "epoch": 1.4586297095131018, "grad_norm": 0.4738878309726715, "learning_rate": 5.3810200553193655e-06, "loss": 0.3418, "step": 31785 }, { "epoch": 1.4586756000183563, "grad_norm": 0.47453609108924866, "learning_rate": 5.380775579900167e-06, "loss": 0.3822, "step": 31786 }, { "epoch": 1.4587214905236108, "grad_norm": 0.4774502217769623, "learning_rate": 5.380531103565323e-06, "loss": 0.4415, "step": 31787 }, { "epoch": 1.458767381028865, "grad_norm": 0.48874348402023315, "learning_rate": 5.380286626315422e-06, "loss": 0.3896, "step": 31788 }, { "epoch": 1.4588132715341195, "grad_norm": 0.48349034786224365, "learning_rate": 5.3800421481510515e-06, "loss": 0.3891, "step": 31789 }, { "epoch": 1.458859162039374, "grad_norm": 0.46257421374320984, "learning_rate": 5.379797669072798e-06, "loss": 0.3315, "step": 31790 }, { "epoch": 1.4589050525446285, "grad_norm": 0.4699471592903137, "learning_rate": 5.379553189081252e-06, "loss": 0.3937, "step": 31791 }, { "epoch": 1.458950943049883, "grad_norm": 0.4804019331932068, "learning_rate": 5.379308708177001e-06, "loss": 0.3654, "step": 31792 }, { "epoch": 1.4589968335551373, "grad_norm": 0.45206400752067566, "learning_rate": 5.379064226360632e-06, "loss": 0.3551, "step": 31793 }, { "epoch": 1.4590427240603918, "grad_norm": 0.4776676297187805, "learning_rate": 5.378819743632734e-06, "loss": 0.3538, "step": 31794 }, { "epoch": 1.4590886145656463, "grad_norm": 0.44597911834716797, "learning_rate": 5.378575259993893e-06, "loss": 0.3637, "step": 31795 }, { "epoch": 1.4591345050709008, "grad_norm": 0.4166402220726013, "learning_rate": 5.3783307754446976e-06, "loss": 0.2861, "step": 31796 }, { "epoch": 1.4591803955761553, "grad_norm": 0.4976336658000946, "learning_rate": 5.378086289985738e-06, "loss": 0.3887, "step": 31797 }, { "epoch": 1.4592262860814098, "grad_norm": 0.4735249876976013, "learning_rate": 5.3778418036175985e-06, "loss": 0.3545, "step": 31798 }, { "epoch": 1.4592721765866643, "grad_norm": 0.46378806233406067, "learning_rate": 5.377597316340869e-06, "loss": 0.3275, "step": 31799 }, { "epoch": 1.4593180670919188, "grad_norm": 0.44812238216400146, "learning_rate": 5.377352828156137e-06, "loss": 0.3366, "step": 31800 }, { "epoch": 1.459363957597173, "grad_norm": 0.5025234222412109, "learning_rate": 5.377108339063991e-06, "loss": 0.3653, "step": 31801 }, { "epoch": 1.4594098481024276, "grad_norm": 0.4662858843803406, "learning_rate": 5.376863849065017e-06, "loss": 0.4061, "step": 31802 }, { "epoch": 1.459455738607682, "grad_norm": 0.4641208052635193, "learning_rate": 5.376619358159807e-06, "loss": 0.3338, "step": 31803 }, { "epoch": 1.4595016291129366, "grad_norm": 0.4493691027164459, "learning_rate": 5.376374866348944e-06, "loss": 0.3231, "step": 31804 }, { "epoch": 1.459547519618191, "grad_norm": 0.47502127289772034, "learning_rate": 5.376130373633019e-06, "loss": 0.3631, "step": 31805 }, { "epoch": 1.4595934101234453, "grad_norm": 0.5112781524658203, "learning_rate": 5.37588588001262e-06, "loss": 0.439, "step": 31806 }, { "epoch": 1.4596393006286998, "grad_norm": 0.5035282969474792, "learning_rate": 5.375641385488333e-06, "loss": 0.4192, "step": 31807 }, { "epoch": 1.4596851911339543, "grad_norm": 0.4740073084831238, "learning_rate": 5.375396890060746e-06, "loss": 0.3395, "step": 31808 }, { "epoch": 1.4597310816392088, "grad_norm": 0.4311659336090088, "learning_rate": 5.375152393730451e-06, "loss": 0.322, "step": 31809 }, { "epoch": 1.4597769721444633, "grad_norm": 0.48666098713874817, "learning_rate": 5.3749078964980315e-06, "loss": 0.343, "step": 31810 }, { "epoch": 1.4598228626497178, "grad_norm": 0.44900816679000854, "learning_rate": 5.374663398364076e-06, "loss": 0.3264, "step": 31811 }, { "epoch": 1.4598687531549723, "grad_norm": 0.4609900414943695, "learning_rate": 5.374418899329175e-06, "loss": 0.3812, "step": 31812 }, { "epoch": 1.4599146436602268, "grad_norm": 0.45296239852905273, "learning_rate": 5.374174399393914e-06, "loss": 0.3575, "step": 31813 }, { "epoch": 1.459960534165481, "grad_norm": 0.4739902913570404, "learning_rate": 5.373929898558881e-06, "loss": 0.346, "step": 31814 }, { "epoch": 1.4600064246707356, "grad_norm": 0.4945509135723114, "learning_rate": 5.3736853968246635e-06, "loss": 0.384, "step": 31815 }, { "epoch": 1.46005231517599, "grad_norm": 0.4569809138774872, "learning_rate": 5.3734408941918525e-06, "loss": 0.3309, "step": 31816 }, { "epoch": 1.4600982056812446, "grad_norm": 0.4636821150779724, "learning_rate": 5.373196390661034e-06, "loss": 0.3383, "step": 31817 }, { "epoch": 1.460144096186499, "grad_norm": 0.4706510901451111, "learning_rate": 5.372951886232795e-06, "loss": 0.3125, "step": 31818 }, { "epoch": 1.4601899866917534, "grad_norm": 0.48894205689430237, "learning_rate": 5.372707380907725e-06, "loss": 0.4102, "step": 31819 }, { "epoch": 1.4602358771970079, "grad_norm": 0.4381929337978363, "learning_rate": 5.3724628746864105e-06, "loss": 0.2922, "step": 31820 }, { "epoch": 1.4602817677022624, "grad_norm": 0.5284765362739563, "learning_rate": 5.372218367569442e-06, "loss": 0.3897, "step": 31821 }, { "epoch": 1.4603276582075169, "grad_norm": 0.47020918130874634, "learning_rate": 5.371973859557404e-06, "loss": 0.3815, "step": 31822 }, { "epoch": 1.4603735487127714, "grad_norm": 0.4818968176841736, "learning_rate": 5.371729350650889e-06, "loss": 0.3741, "step": 31823 }, { "epoch": 1.4604194392180259, "grad_norm": 0.44925743341445923, "learning_rate": 5.371484840850478e-06, "loss": 0.301, "step": 31824 }, { "epoch": 1.4604653297232804, "grad_norm": 0.5215743184089661, "learning_rate": 5.371240330156766e-06, "loss": 0.3676, "step": 31825 }, { "epoch": 1.4605112202285346, "grad_norm": 0.5002943873405457, "learning_rate": 5.370995818570338e-06, "loss": 0.3457, "step": 31826 }, { "epoch": 1.4605571107337891, "grad_norm": 0.4734555184841156, "learning_rate": 5.3707513060917824e-06, "loss": 0.3988, "step": 31827 }, { "epoch": 1.4606030012390436, "grad_norm": 0.48917949199676514, "learning_rate": 5.370506792721687e-06, "loss": 0.3829, "step": 31828 }, { "epoch": 1.4606488917442981, "grad_norm": 0.5055498480796814, "learning_rate": 5.370262278460638e-06, "loss": 0.3936, "step": 31829 }, { "epoch": 1.4606947822495526, "grad_norm": 0.4865299165248871, "learning_rate": 5.370017763309225e-06, "loss": 0.3901, "step": 31830 }, { "epoch": 1.460740672754807, "grad_norm": 0.4380596876144409, "learning_rate": 5.369773247268038e-06, "loss": 0.3105, "step": 31831 }, { "epoch": 1.4607865632600614, "grad_norm": 0.46879491209983826, "learning_rate": 5.369528730337663e-06, "loss": 0.3504, "step": 31832 }, { "epoch": 1.460832453765316, "grad_norm": 0.4532281160354614, "learning_rate": 5.369284212518687e-06, "loss": 0.3356, "step": 31833 }, { "epoch": 1.4608783442705704, "grad_norm": 0.45346301794052124, "learning_rate": 5.3690396938116995e-06, "loss": 0.3074, "step": 31834 }, { "epoch": 1.460924234775825, "grad_norm": 0.45121511816978455, "learning_rate": 5.368795174217288e-06, "loss": 0.2993, "step": 31835 }, { "epoch": 1.4609701252810794, "grad_norm": 0.46691641211509705, "learning_rate": 5.36855065373604e-06, "loss": 0.3304, "step": 31836 }, { "epoch": 1.4610160157863339, "grad_norm": 0.46297675371170044, "learning_rate": 5.368306132368545e-06, "loss": 0.3425, "step": 31837 }, { "epoch": 1.4610619062915884, "grad_norm": 0.46319469809532166, "learning_rate": 5.368061610115389e-06, "loss": 0.335, "step": 31838 }, { "epoch": 1.4611077967968427, "grad_norm": 0.4951958656311035, "learning_rate": 5.367817086977161e-06, "loss": 0.3893, "step": 31839 }, { "epoch": 1.4611536873020972, "grad_norm": 0.47001132369041443, "learning_rate": 5.36757256295445e-06, "loss": 0.3771, "step": 31840 }, { "epoch": 1.4611995778073517, "grad_norm": 0.45216435194015503, "learning_rate": 5.367328038047841e-06, "loss": 0.3441, "step": 31841 }, { "epoch": 1.4612454683126062, "grad_norm": 0.4867473840713501, "learning_rate": 5.367083512257926e-06, "loss": 0.3918, "step": 31842 }, { "epoch": 1.4612913588178607, "grad_norm": 0.45032796263694763, "learning_rate": 5.366838985585291e-06, "loss": 0.3408, "step": 31843 }, { "epoch": 1.461337249323115, "grad_norm": 0.4190317392349243, "learning_rate": 5.366594458030523e-06, "loss": 0.2429, "step": 31844 }, { "epoch": 1.4613831398283694, "grad_norm": 0.5045154094696045, "learning_rate": 5.366349929594212e-06, "loss": 0.3539, "step": 31845 }, { "epoch": 1.461429030333624, "grad_norm": 0.5018970370292664, "learning_rate": 5.3661054002769445e-06, "loss": 0.3724, "step": 31846 }, { "epoch": 1.4614749208388784, "grad_norm": 0.7125783562660217, "learning_rate": 5.365860870079307e-06, "loss": 0.4157, "step": 31847 }, { "epoch": 1.461520811344133, "grad_norm": 0.4515393376350403, "learning_rate": 5.365616339001892e-06, "loss": 0.31, "step": 31848 }, { "epoch": 1.4615667018493874, "grad_norm": 0.4798808991909027, "learning_rate": 5.365371807045285e-06, "loss": 0.3497, "step": 31849 }, { "epoch": 1.461612592354642, "grad_norm": 0.4762817323207855, "learning_rate": 5.365127274210074e-06, "loss": 0.3379, "step": 31850 }, { "epoch": 1.4616584828598964, "grad_norm": 0.4323571026325226, "learning_rate": 5.364882740496847e-06, "loss": 0.277, "step": 31851 }, { "epoch": 1.4617043733651507, "grad_norm": 0.5357112288475037, "learning_rate": 5.364638205906192e-06, "loss": 0.4157, "step": 31852 }, { "epoch": 1.4617502638704052, "grad_norm": 0.4910373389720917, "learning_rate": 5.364393670438697e-06, "loss": 0.3624, "step": 31853 }, { "epoch": 1.4617961543756597, "grad_norm": 0.45407652854919434, "learning_rate": 5.364149134094951e-06, "loss": 0.3042, "step": 31854 }, { "epoch": 1.4618420448809142, "grad_norm": 0.5146250128746033, "learning_rate": 5.363904596875541e-06, "loss": 0.3831, "step": 31855 }, { "epoch": 1.4618879353861687, "grad_norm": 0.4933314621448517, "learning_rate": 5.363660058781054e-06, "loss": 0.3884, "step": 31856 }, { "epoch": 1.461933825891423, "grad_norm": 0.49251964688301086, "learning_rate": 5.36341551981208e-06, "loss": 0.332, "step": 31857 }, { "epoch": 1.4619797163966775, "grad_norm": 0.4987245798110962, "learning_rate": 5.363170979969207e-06, "loss": 0.4071, "step": 31858 }, { "epoch": 1.462025606901932, "grad_norm": 0.458042711019516, "learning_rate": 5.362926439253022e-06, "loss": 0.3153, "step": 31859 }, { "epoch": 1.4620714974071864, "grad_norm": 0.4696270823478699, "learning_rate": 5.362681897664115e-06, "loss": 0.3869, "step": 31860 }, { "epoch": 1.462117387912441, "grad_norm": 0.4788892865180969, "learning_rate": 5.362437355203071e-06, "loss": 0.3523, "step": 31861 }, { "epoch": 1.4621632784176954, "grad_norm": 0.43843215703964233, "learning_rate": 5.362192811870479e-06, "loss": 0.3214, "step": 31862 }, { "epoch": 1.46220916892295, "grad_norm": 0.4562893211841583, "learning_rate": 5.3619482676669285e-06, "loss": 0.3109, "step": 31863 }, { "epoch": 1.4622550594282044, "grad_norm": 0.45047619938850403, "learning_rate": 5.361703722593006e-06, "loss": 0.3431, "step": 31864 }, { "epoch": 1.4623009499334587, "grad_norm": 0.49364861845970154, "learning_rate": 5.361459176649302e-06, "loss": 0.4432, "step": 31865 }, { "epoch": 1.4623468404387132, "grad_norm": 0.46983715891838074, "learning_rate": 5.361214629836401e-06, "loss": 0.3794, "step": 31866 }, { "epoch": 1.4623927309439677, "grad_norm": 0.44851234555244446, "learning_rate": 5.360970082154892e-06, "loss": 0.3093, "step": 31867 }, { "epoch": 1.4624386214492222, "grad_norm": 0.45752227306365967, "learning_rate": 5.360725533605366e-06, "loss": 0.3596, "step": 31868 }, { "epoch": 1.4624845119544765, "grad_norm": 0.47107943892478943, "learning_rate": 5.3604809841884075e-06, "loss": 0.3419, "step": 31869 }, { "epoch": 1.462530402459731, "grad_norm": 0.45457693934440613, "learning_rate": 5.360236433904606e-06, "loss": 0.3498, "step": 31870 }, { "epoch": 1.4625762929649855, "grad_norm": 0.4456236958503723, "learning_rate": 5.359991882754552e-06, "loss": 0.3447, "step": 31871 }, { "epoch": 1.46262218347024, "grad_norm": 0.49727508425712585, "learning_rate": 5.359747330738828e-06, "loss": 0.3716, "step": 31872 }, { "epoch": 1.4626680739754945, "grad_norm": 0.4723261594772339, "learning_rate": 5.359502777858025e-06, "loss": 0.3799, "step": 31873 }, { "epoch": 1.462713964480749, "grad_norm": 0.48986056447029114, "learning_rate": 5.359258224112735e-06, "loss": 0.351, "step": 31874 }, { "epoch": 1.4627598549860035, "grad_norm": 0.49697962403297424, "learning_rate": 5.359013669503539e-06, "loss": 0.3706, "step": 31875 }, { "epoch": 1.462805745491258, "grad_norm": 0.4182332456111908, "learning_rate": 5.358769114031029e-06, "loss": 0.279, "step": 31876 }, { "epoch": 1.4628516359965122, "grad_norm": 0.4793785810470581, "learning_rate": 5.358524557695795e-06, "loss": 0.3492, "step": 31877 }, { "epoch": 1.4628975265017667, "grad_norm": 0.5213657021522522, "learning_rate": 5.358280000498419e-06, "loss": 0.419, "step": 31878 }, { "epoch": 1.4629434170070212, "grad_norm": 0.4321836531162262, "learning_rate": 5.3580354424394956e-06, "loss": 0.2958, "step": 31879 }, { "epoch": 1.4629893075122757, "grad_norm": 0.45759573578834534, "learning_rate": 5.357790883519608e-06, "loss": 0.3048, "step": 31880 }, { "epoch": 1.4630351980175302, "grad_norm": 0.5028917789459229, "learning_rate": 5.357546323739347e-06, "loss": 0.4082, "step": 31881 }, { "epoch": 1.4630810885227845, "grad_norm": 0.5087604522705078, "learning_rate": 5.357301763099301e-06, "loss": 0.451, "step": 31882 }, { "epoch": 1.463126979028039, "grad_norm": 0.47791752219200134, "learning_rate": 5.357057201600054e-06, "loss": 0.3806, "step": 31883 }, { "epoch": 1.4631728695332935, "grad_norm": 0.47763291001319885, "learning_rate": 5.3568126392422e-06, "loss": 0.3599, "step": 31884 }, { "epoch": 1.463218760038548, "grad_norm": 0.46538180112838745, "learning_rate": 5.356568076026323e-06, "loss": 0.3705, "step": 31885 }, { "epoch": 1.4632646505438025, "grad_norm": 0.45281895995140076, "learning_rate": 5.356323511953012e-06, "loss": 0.3423, "step": 31886 }, { "epoch": 1.463310541049057, "grad_norm": 0.49143868684768677, "learning_rate": 5.356078947022856e-06, "loss": 0.414, "step": 31887 }, { "epoch": 1.4633564315543115, "grad_norm": 0.5179584622383118, "learning_rate": 5.355834381236443e-06, "loss": 0.4076, "step": 31888 }, { "epoch": 1.463402322059566, "grad_norm": 0.4880094826221466, "learning_rate": 5.355589814594359e-06, "loss": 0.3787, "step": 31889 }, { "epoch": 1.4634482125648203, "grad_norm": 0.4866298735141754, "learning_rate": 5.355345247097194e-06, "loss": 0.3991, "step": 31890 }, { "epoch": 1.4634941030700748, "grad_norm": 0.4958963990211487, "learning_rate": 5.355100678745539e-06, "loss": 0.4125, "step": 31891 }, { "epoch": 1.4635399935753293, "grad_norm": 0.4967672526836395, "learning_rate": 5.354856109539977e-06, "loss": 0.3868, "step": 31892 }, { "epoch": 1.4635858840805838, "grad_norm": 0.4800032377243042, "learning_rate": 5.354611539481096e-06, "loss": 0.3916, "step": 31893 }, { "epoch": 1.4636317745858383, "grad_norm": 0.48168689012527466, "learning_rate": 5.354366968569489e-06, "loss": 0.3314, "step": 31894 }, { "epoch": 1.4636776650910925, "grad_norm": 0.456110417842865, "learning_rate": 5.354122396805739e-06, "loss": 0.342, "step": 31895 }, { "epoch": 1.463723555596347, "grad_norm": 0.48731985688209534, "learning_rate": 5.353877824190437e-06, "loss": 0.3985, "step": 31896 }, { "epoch": 1.4637694461016015, "grad_norm": 0.4670856297016144, "learning_rate": 5.353633250724171e-06, "loss": 0.3715, "step": 31897 }, { "epoch": 1.463815336606856, "grad_norm": 0.4790845513343811, "learning_rate": 5.353388676407529e-06, "loss": 0.3274, "step": 31898 }, { "epoch": 1.4638612271121105, "grad_norm": 0.4402669072151184, "learning_rate": 5.353144101241099e-06, "loss": 0.2907, "step": 31899 }, { "epoch": 1.463907117617365, "grad_norm": 0.49588459730148315, "learning_rate": 5.352899525225468e-06, "loss": 0.4017, "step": 31900 }, { "epoch": 1.4639530081226195, "grad_norm": 0.43539783358573914, "learning_rate": 5.352654948361224e-06, "loss": 0.2993, "step": 31901 }, { "epoch": 1.463998898627874, "grad_norm": 0.47025546431541443, "learning_rate": 5.3524103706489584e-06, "loss": 0.3658, "step": 31902 }, { "epoch": 1.4640447891331283, "grad_norm": 0.5211172103881836, "learning_rate": 5.352165792089257e-06, "loss": 0.4068, "step": 31903 }, { "epoch": 1.4640906796383828, "grad_norm": 0.44131729006767273, "learning_rate": 5.351921212682706e-06, "loss": 0.2993, "step": 31904 }, { "epoch": 1.4641365701436373, "grad_norm": 0.4939483106136322, "learning_rate": 5.351676632429897e-06, "loss": 0.4259, "step": 31905 }, { "epoch": 1.4641824606488918, "grad_norm": 0.48727333545684814, "learning_rate": 5.3514320513314165e-06, "loss": 0.3981, "step": 31906 }, { "epoch": 1.4642283511541463, "grad_norm": 0.42784011363983154, "learning_rate": 5.351187469387852e-06, "loss": 0.282, "step": 31907 }, { "epoch": 1.4642742416594006, "grad_norm": 0.46634697914123535, "learning_rate": 5.350942886599794e-06, "loss": 0.3432, "step": 31908 }, { "epoch": 1.464320132164655, "grad_norm": 0.49110129475593567, "learning_rate": 5.350698302967829e-06, "loss": 0.3756, "step": 31909 }, { "epoch": 1.4643660226699096, "grad_norm": 0.47671616077423096, "learning_rate": 5.3504537184925445e-06, "loss": 0.3842, "step": 31910 }, { "epoch": 1.464411913175164, "grad_norm": 0.49569159746170044, "learning_rate": 5.350209133174529e-06, "loss": 0.4218, "step": 31911 }, { "epoch": 1.4644578036804186, "grad_norm": 0.5032116770744324, "learning_rate": 5.3499645470143715e-06, "loss": 0.3871, "step": 31912 }, { "epoch": 1.464503694185673, "grad_norm": 0.4802033007144928, "learning_rate": 5.349719960012659e-06, "loss": 0.3735, "step": 31913 }, { "epoch": 1.4645495846909276, "grad_norm": 0.5009698867797852, "learning_rate": 5.349475372169982e-06, "loss": 0.4292, "step": 31914 }, { "epoch": 1.4645954751961818, "grad_norm": 0.47545239329338074, "learning_rate": 5.349230783486926e-06, "loss": 0.3496, "step": 31915 }, { "epoch": 1.4646413657014363, "grad_norm": 0.4688998758792877, "learning_rate": 5.348986193964081e-06, "loss": 0.378, "step": 31916 }, { "epoch": 1.4646872562066908, "grad_norm": 0.4508756101131439, "learning_rate": 5.348741603602033e-06, "loss": 0.3678, "step": 31917 }, { "epoch": 1.4647331467119453, "grad_norm": 0.5321968793869019, "learning_rate": 5.348497012401371e-06, "loss": 0.4689, "step": 31918 }, { "epoch": 1.4647790372171998, "grad_norm": 0.46053096652030945, "learning_rate": 5.348252420362686e-06, "loss": 0.3355, "step": 31919 }, { "epoch": 1.464824927722454, "grad_norm": 0.4595707654953003, "learning_rate": 5.348007827486563e-06, "loss": 0.3379, "step": 31920 }, { "epoch": 1.4648708182277086, "grad_norm": 0.6103337407112122, "learning_rate": 5.347763233773591e-06, "loss": 0.3984, "step": 31921 }, { "epoch": 1.464916708732963, "grad_norm": 0.4572864770889282, "learning_rate": 5.347518639224358e-06, "loss": 0.379, "step": 31922 }, { "epoch": 1.4649625992382176, "grad_norm": 0.582602858543396, "learning_rate": 5.347274043839451e-06, "loss": 0.4006, "step": 31923 }, { "epoch": 1.465008489743472, "grad_norm": 0.46633827686309814, "learning_rate": 5.347029447619462e-06, "loss": 0.3706, "step": 31924 }, { "epoch": 1.4650543802487266, "grad_norm": 0.48213115334510803, "learning_rate": 5.346784850564975e-06, "loss": 0.3948, "step": 31925 }, { "epoch": 1.465100270753981, "grad_norm": 0.5211972594261169, "learning_rate": 5.3465402526765806e-06, "loss": 0.3227, "step": 31926 }, { "epoch": 1.4651461612592356, "grad_norm": 0.41819635033607483, "learning_rate": 5.346295653954866e-06, "loss": 0.2801, "step": 31927 }, { "epoch": 1.4651920517644899, "grad_norm": 0.45224785804748535, "learning_rate": 5.34605105440042e-06, "loss": 0.3536, "step": 31928 }, { "epoch": 1.4652379422697444, "grad_norm": 0.4287731647491455, "learning_rate": 5.345806454013829e-06, "loss": 0.3155, "step": 31929 }, { "epoch": 1.4652838327749989, "grad_norm": 0.46453845500946045, "learning_rate": 5.345561852795684e-06, "loss": 0.3188, "step": 31930 }, { "epoch": 1.4653297232802533, "grad_norm": 0.5049861073493958, "learning_rate": 5.345317250746572e-06, "loss": 0.427, "step": 31931 }, { "epoch": 1.4653756137855078, "grad_norm": 0.44409817457199097, "learning_rate": 5.34507264786708e-06, "loss": 0.3505, "step": 31932 }, { "epoch": 1.4654215042907621, "grad_norm": 0.4532390236854553, "learning_rate": 5.3448280441577986e-06, "loss": 0.3195, "step": 31933 }, { "epoch": 1.4654673947960166, "grad_norm": 0.4819427728652954, "learning_rate": 5.3445834396193134e-06, "loss": 0.4109, "step": 31934 }, { "epoch": 1.4655132853012711, "grad_norm": 0.4371897876262665, "learning_rate": 5.344338834252214e-06, "loss": 0.3078, "step": 31935 }, { "epoch": 1.4655591758065256, "grad_norm": 0.45668089389801025, "learning_rate": 5.344094228057089e-06, "loss": 0.3386, "step": 31936 }, { "epoch": 1.4656050663117801, "grad_norm": 0.4692455530166626, "learning_rate": 5.343849621034525e-06, "loss": 0.3192, "step": 31937 }, { "epoch": 1.4656509568170346, "grad_norm": 0.47319069504737854, "learning_rate": 5.343605013185112e-06, "loss": 0.3565, "step": 31938 }, { "epoch": 1.465696847322289, "grad_norm": 0.4688185751438141, "learning_rate": 5.343360404509436e-06, "loss": 0.3426, "step": 31939 }, { "epoch": 1.4657427378275436, "grad_norm": 0.4668067693710327, "learning_rate": 5.343115795008088e-06, "loss": 0.3926, "step": 31940 }, { "epoch": 1.4657886283327979, "grad_norm": 0.46265363693237305, "learning_rate": 5.342871184681655e-06, "loss": 0.3471, "step": 31941 }, { "epoch": 1.4658345188380524, "grad_norm": 0.4943835735321045, "learning_rate": 5.342626573530725e-06, "loss": 0.3139, "step": 31942 }, { "epoch": 1.4658804093433069, "grad_norm": 0.49796488881111145, "learning_rate": 5.342381961555885e-06, "loss": 0.3771, "step": 31943 }, { "epoch": 1.4659262998485614, "grad_norm": 0.46749645471572876, "learning_rate": 5.342137348757726e-06, "loss": 0.3448, "step": 31944 }, { "epoch": 1.4659721903538159, "grad_norm": 0.49825426936149597, "learning_rate": 5.341892735136833e-06, "loss": 0.4084, "step": 31945 }, { "epoch": 1.4660180808590701, "grad_norm": 0.4281926453113556, "learning_rate": 5.341648120693796e-06, "loss": 0.2861, "step": 31946 }, { "epoch": 1.4660639713643246, "grad_norm": 0.4827999174594879, "learning_rate": 5.341403505429206e-06, "loss": 0.2663, "step": 31947 }, { "epoch": 1.4661098618695791, "grad_norm": 0.5395724177360535, "learning_rate": 5.341158889343645e-06, "loss": 0.4527, "step": 31948 }, { "epoch": 1.4661557523748336, "grad_norm": 0.472086101770401, "learning_rate": 5.340914272437705e-06, "loss": 0.3765, "step": 31949 }, { "epoch": 1.4662016428800881, "grad_norm": 0.4563523828983307, "learning_rate": 5.340669654711974e-06, "loss": 0.35, "step": 31950 }, { "epoch": 1.4662475333853426, "grad_norm": 0.4705981910228729, "learning_rate": 5.34042503616704e-06, "loss": 0.3167, "step": 31951 }, { "epoch": 1.4662934238905971, "grad_norm": 0.4338723421096802, "learning_rate": 5.340180416803492e-06, "loss": 0.3037, "step": 31952 }, { "epoch": 1.4663393143958516, "grad_norm": 0.47680768370628357, "learning_rate": 5.339935796621918e-06, "loss": 0.3752, "step": 31953 }, { "epoch": 1.466385204901106, "grad_norm": 0.4851015508174896, "learning_rate": 5.339691175622904e-06, "loss": 0.3885, "step": 31954 }, { "epoch": 1.4664310954063604, "grad_norm": 0.45983853936195374, "learning_rate": 5.339446553807041e-06, "loss": 0.3699, "step": 31955 }, { "epoch": 1.466476985911615, "grad_norm": 0.5084765553474426, "learning_rate": 5.339201931174916e-06, "loss": 0.3832, "step": 31956 }, { "epoch": 1.4665228764168694, "grad_norm": 0.5049951076507568, "learning_rate": 5.338957307727117e-06, "loss": 0.4217, "step": 31957 }, { "epoch": 1.4665687669221237, "grad_norm": 0.4427330195903778, "learning_rate": 5.338712683464233e-06, "loss": 0.3357, "step": 31958 }, { "epoch": 1.4666146574273782, "grad_norm": 0.5348069667816162, "learning_rate": 5.338468058386852e-06, "loss": 0.4284, "step": 31959 }, { "epoch": 1.4666605479326327, "grad_norm": 0.4667181074619293, "learning_rate": 5.338223432495562e-06, "loss": 0.3662, "step": 31960 }, { "epoch": 1.4667064384378872, "grad_norm": 0.49890953302383423, "learning_rate": 5.337978805790951e-06, "loss": 0.4072, "step": 31961 }, { "epoch": 1.4667523289431417, "grad_norm": 0.4647672772407532, "learning_rate": 5.337734178273608e-06, "loss": 0.4058, "step": 31962 }, { "epoch": 1.4667982194483962, "grad_norm": 0.46787896752357483, "learning_rate": 5.337489549944122e-06, "loss": 0.3638, "step": 31963 }, { "epoch": 1.4668441099536507, "grad_norm": 0.5214434266090393, "learning_rate": 5.3372449208030785e-06, "loss": 0.4037, "step": 31964 }, { "epoch": 1.4668900004589052, "grad_norm": 0.4922039806842804, "learning_rate": 5.337000290851067e-06, "loss": 0.3502, "step": 31965 }, { "epoch": 1.4669358909641594, "grad_norm": 0.43606099486351013, "learning_rate": 5.3367556600886775e-06, "loss": 0.3256, "step": 31966 }, { "epoch": 1.466981781469414, "grad_norm": 0.4347285032272339, "learning_rate": 5.336511028516496e-06, "loss": 0.3331, "step": 31967 }, { "epoch": 1.4670276719746684, "grad_norm": 0.5161684155464172, "learning_rate": 5.336266396135113e-06, "loss": 0.4209, "step": 31968 }, { "epoch": 1.467073562479923, "grad_norm": 0.4454857409000397, "learning_rate": 5.336021762945115e-06, "loss": 0.3337, "step": 31969 }, { "epoch": 1.4671194529851774, "grad_norm": 0.4700000286102295, "learning_rate": 5.33577712894709e-06, "loss": 0.351, "step": 31970 }, { "epoch": 1.4671653434904317, "grad_norm": 0.465406209230423, "learning_rate": 5.335532494141628e-06, "loss": 0.3446, "step": 31971 }, { "epoch": 1.4672112339956862, "grad_norm": 0.5046120882034302, "learning_rate": 5.3352878585293145e-06, "loss": 0.4171, "step": 31972 }, { "epoch": 1.4672571245009407, "grad_norm": 0.49165287613868713, "learning_rate": 5.33504322211074e-06, "loss": 0.3787, "step": 31973 }, { "epoch": 1.4673030150061952, "grad_norm": 0.45873063802719116, "learning_rate": 5.334798584886495e-06, "loss": 0.3318, "step": 31974 }, { "epoch": 1.4673489055114497, "grad_norm": 0.4731920063495636, "learning_rate": 5.334553946857162e-06, "loss": 0.3932, "step": 31975 }, { "epoch": 1.4673947960167042, "grad_norm": 0.4208911657333374, "learning_rate": 5.3343093080233334e-06, "loss": 0.3071, "step": 31976 }, { "epoch": 1.4674406865219587, "grad_norm": 0.45921406149864197, "learning_rate": 5.334064668385595e-06, "loss": 0.3685, "step": 31977 }, { "epoch": 1.4674865770272132, "grad_norm": 0.52915358543396, "learning_rate": 5.3338200279445396e-06, "loss": 0.3357, "step": 31978 }, { "epoch": 1.4675324675324675, "grad_norm": 0.4377219080924988, "learning_rate": 5.3335753867007516e-06, "loss": 0.3031, "step": 31979 }, { "epoch": 1.467578358037722, "grad_norm": 0.5103516578674316, "learning_rate": 5.333330744654817e-06, "loss": 0.3654, "step": 31980 }, { "epoch": 1.4676242485429765, "grad_norm": 0.4653784930706024, "learning_rate": 5.333086101807331e-06, "loss": 0.3675, "step": 31981 }, { "epoch": 1.467670139048231, "grad_norm": 0.5007166266441345, "learning_rate": 5.332841458158875e-06, "loss": 0.4159, "step": 31982 }, { "epoch": 1.4677160295534855, "grad_norm": 0.5113239884376526, "learning_rate": 5.332596813710041e-06, "loss": 0.3993, "step": 31983 }, { "epoch": 1.4677619200587397, "grad_norm": 0.4820045828819275, "learning_rate": 5.332352168461418e-06, "loss": 0.3886, "step": 31984 }, { "epoch": 1.4678078105639942, "grad_norm": 0.48079735040664673, "learning_rate": 5.332107522413594e-06, "loss": 0.3641, "step": 31985 }, { "epoch": 1.4678537010692487, "grad_norm": 0.4428570568561554, "learning_rate": 5.331862875567153e-06, "loss": 0.2904, "step": 31986 }, { "epoch": 1.4678995915745032, "grad_norm": 0.4756988286972046, "learning_rate": 5.331618227922688e-06, "loss": 0.3731, "step": 31987 }, { "epoch": 1.4679454820797577, "grad_norm": 0.458570271730423, "learning_rate": 5.3313735794807855e-06, "loss": 0.3698, "step": 31988 }, { "epoch": 1.4679913725850122, "grad_norm": 0.4607965052127838, "learning_rate": 5.331128930242036e-06, "loss": 0.3521, "step": 31989 }, { "epoch": 1.4680372630902667, "grad_norm": 0.4442620277404785, "learning_rate": 5.330884280207024e-06, "loss": 0.3381, "step": 31990 }, { "epoch": 1.4680831535955212, "grad_norm": 0.5599028468132019, "learning_rate": 5.330639629376339e-06, "loss": 0.4966, "step": 31991 }, { "epoch": 1.4681290441007755, "grad_norm": 0.5482249855995178, "learning_rate": 5.3303949777505725e-06, "loss": 0.386, "step": 31992 }, { "epoch": 1.46817493460603, "grad_norm": 0.5110315680503845, "learning_rate": 5.330150325330308e-06, "loss": 0.4475, "step": 31993 }, { "epoch": 1.4682208251112845, "grad_norm": 0.4702593684196472, "learning_rate": 5.3299056721161365e-06, "loss": 0.3766, "step": 31994 }, { "epoch": 1.468266715616539, "grad_norm": 0.41059210896492004, "learning_rate": 5.329661018108647e-06, "loss": 0.3244, "step": 31995 }, { "epoch": 1.4683126061217933, "grad_norm": 0.4426000714302063, "learning_rate": 5.329416363308427e-06, "loss": 0.3486, "step": 31996 }, { "epoch": 1.4683584966270478, "grad_norm": 0.44065266847610474, "learning_rate": 5.3291717077160636e-06, "loss": 0.3589, "step": 31997 }, { "epoch": 1.4684043871323023, "grad_norm": 0.4203678071498871, "learning_rate": 5.328927051332147e-06, "loss": 0.2945, "step": 31998 }, { "epoch": 1.4684502776375568, "grad_norm": 0.43769046664237976, "learning_rate": 5.328682394157264e-06, "loss": 0.2856, "step": 31999 }, { "epoch": 1.4684961681428113, "grad_norm": 0.4644850194454193, "learning_rate": 5.328437736192004e-06, "loss": 0.3368, "step": 32000 }, { "epoch": 1.4685420586480658, "grad_norm": 0.46735361218452454, "learning_rate": 5.328193077436955e-06, "loss": 0.3206, "step": 32001 }, { "epoch": 1.4685879491533202, "grad_norm": 0.5124819278717041, "learning_rate": 5.327948417892704e-06, "loss": 0.4113, "step": 32002 }, { "epoch": 1.4686338396585747, "grad_norm": 0.4593260586261749, "learning_rate": 5.327703757559841e-06, "loss": 0.3472, "step": 32003 }, { "epoch": 1.468679730163829, "grad_norm": 0.45220068097114563, "learning_rate": 5.327459096438955e-06, "loss": 0.3432, "step": 32004 }, { "epoch": 1.4687256206690835, "grad_norm": 0.47802838683128357, "learning_rate": 5.327214434530632e-06, "loss": 0.3327, "step": 32005 }, { "epoch": 1.468771511174338, "grad_norm": 0.4243192970752716, "learning_rate": 5.326969771835463e-06, "loss": 0.3045, "step": 32006 }, { "epoch": 1.4688174016795925, "grad_norm": 0.45395082235336304, "learning_rate": 5.326725108354035e-06, "loss": 0.3166, "step": 32007 }, { "epoch": 1.468863292184847, "grad_norm": 0.46591436862945557, "learning_rate": 5.326480444086935e-06, "loss": 0.3331, "step": 32008 }, { "epoch": 1.4689091826901013, "grad_norm": 0.5051455497741699, "learning_rate": 5.326235779034753e-06, "loss": 0.4232, "step": 32009 }, { "epoch": 1.4689550731953558, "grad_norm": 0.5273759365081787, "learning_rate": 5.325991113198076e-06, "loss": 0.44, "step": 32010 }, { "epoch": 1.4690009637006103, "grad_norm": 0.48473554849624634, "learning_rate": 5.325746446577494e-06, "loss": 0.3928, "step": 32011 }, { "epoch": 1.4690468542058648, "grad_norm": 0.49563896656036377, "learning_rate": 5.325501779173597e-06, "loss": 0.3685, "step": 32012 }, { "epoch": 1.4690927447111193, "grad_norm": 0.41852867603302, "learning_rate": 5.325257110986966e-06, "loss": 0.2782, "step": 32013 }, { "epoch": 1.4691386352163738, "grad_norm": 0.4839380085468292, "learning_rate": 5.325012442018197e-06, "loss": 0.3628, "step": 32014 }, { "epoch": 1.4691845257216283, "grad_norm": 0.44317543506622314, "learning_rate": 5.324767772267876e-06, "loss": 0.305, "step": 32015 }, { "epoch": 1.4692304162268828, "grad_norm": 0.4602982699871063, "learning_rate": 5.32452310173659e-06, "loss": 0.3765, "step": 32016 }, { "epoch": 1.469276306732137, "grad_norm": 0.447652131319046, "learning_rate": 5.324278430424929e-06, "loss": 0.2998, "step": 32017 }, { "epoch": 1.4693221972373915, "grad_norm": 0.4808535873889923, "learning_rate": 5.324033758333482e-06, "loss": 0.4026, "step": 32018 }, { "epoch": 1.469368087742646, "grad_norm": 0.4307735860347748, "learning_rate": 5.323789085462834e-06, "loss": 0.2991, "step": 32019 }, { "epoch": 1.4694139782479005, "grad_norm": 0.4704866111278534, "learning_rate": 5.323544411813576e-06, "loss": 0.3274, "step": 32020 }, { "epoch": 1.469459868753155, "grad_norm": 0.43960535526275635, "learning_rate": 5.323299737386296e-06, "loss": 0.3541, "step": 32021 }, { "epoch": 1.4695057592584093, "grad_norm": 0.4882011115550995, "learning_rate": 5.3230550621815824e-06, "loss": 0.3884, "step": 32022 }, { "epoch": 1.4695516497636638, "grad_norm": 0.46917080879211426, "learning_rate": 5.322810386200022e-06, "loss": 0.4322, "step": 32023 }, { "epoch": 1.4695975402689183, "grad_norm": 0.4688405394554138, "learning_rate": 5.322565709442207e-06, "loss": 0.3787, "step": 32024 }, { "epoch": 1.4696434307741728, "grad_norm": 0.4486179053783417, "learning_rate": 5.322321031908721e-06, "loss": 0.3274, "step": 32025 }, { "epoch": 1.4696893212794273, "grad_norm": 0.49341434240341187, "learning_rate": 5.322076353600156e-06, "loss": 0.3607, "step": 32026 }, { "epoch": 1.4697352117846818, "grad_norm": 0.4980112910270691, "learning_rate": 5.321831674517098e-06, "loss": 0.3966, "step": 32027 }, { "epoch": 1.4697811022899363, "grad_norm": 0.4310759902000427, "learning_rate": 5.321586994660137e-06, "loss": 0.2847, "step": 32028 }, { "epoch": 1.4698269927951908, "grad_norm": 0.4371199607849121, "learning_rate": 5.3213423140298615e-06, "loss": 0.3049, "step": 32029 }, { "epoch": 1.469872883300445, "grad_norm": 0.5193871259689331, "learning_rate": 5.321097632626858e-06, "loss": 0.338, "step": 32030 }, { "epoch": 1.4699187738056996, "grad_norm": 0.4821956753730774, "learning_rate": 5.320852950451716e-06, "loss": 0.3678, "step": 32031 }, { "epoch": 1.469964664310954, "grad_norm": 0.4550100862979889, "learning_rate": 5.320608267505024e-06, "loss": 0.3292, "step": 32032 }, { "epoch": 1.4700105548162086, "grad_norm": 0.4735218286514282, "learning_rate": 5.320363583787371e-06, "loss": 0.3515, "step": 32033 }, { "epoch": 1.470056445321463, "grad_norm": 0.3999912142753601, "learning_rate": 5.320118899299345e-06, "loss": 0.2634, "step": 32034 }, { "epoch": 1.4701023358267173, "grad_norm": 0.46924370527267456, "learning_rate": 5.319874214041533e-06, "loss": 0.3743, "step": 32035 }, { "epoch": 1.4701482263319718, "grad_norm": 0.5325684547424316, "learning_rate": 5.3196295280145235e-06, "loss": 0.4135, "step": 32036 }, { "epoch": 1.4701941168372263, "grad_norm": 0.4592367112636566, "learning_rate": 5.319384841218908e-06, "loss": 0.3383, "step": 32037 }, { "epoch": 1.4702400073424808, "grad_norm": 0.4844416677951813, "learning_rate": 5.319140153655272e-06, "loss": 0.3443, "step": 32038 }, { "epoch": 1.4702858978477353, "grad_norm": 0.45764854550361633, "learning_rate": 5.318895465324205e-06, "loss": 0.3134, "step": 32039 }, { "epoch": 1.4703317883529898, "grad_norm": 0.5871613025665283, "learning_rate": 5.318650776226294e-06, "loss": 0.3807, "step": 32040 }, { "epoch": 1.4703776788582443, "grad_norm": 0.46543848514556885, "learning_rate": 5.31840608636213e-06, "loss": 0.3446, "step": 32041 }, { "epoch": 1.4704235693634988, "grad_norm": 0.5003018975257874, "learning_rate": 5.318161395732299e-06, "loss": 0.357, "step": 32042 }, { "epoch": 1.470469459868753, "grad_norm": 0.4572242498397827, "learning_rate": 5.31791670433739e-06, "loss": 0.3339, "step": 32043 }, { "epoch": 1.4705153503740076, "grad_norm": 0.43412917852401733, "learning_rate": 5.317672012177993e-06, "loss": 0.3087, "step": 32044 }, { "epoch": 1.470561240879262, "grad_norm": 0.5248821973800659, "learning_rate": 5.3174273192546924e-06, "loss": 0.405, "step": 32045 }, { "epoch": 1.4706071313845166, "grad_norm": 0.4379834234714508, "learning_rate": 5.317182625568082e-06, "loss": 0.2952, "step": 32046 }, { "epoch": 1.4706530218897709, "grad_norm": 0.46651822328567505, "learning_rate": 5.316937931118745e-06, "loss": 0.3465, "step": 32047 }, { "epoch": 1.4706989123950254, "grad_norm": 0.4583413004875183, "learning_rate": 5.3166932359072735e-06, "loss": 0.3244, "step": 32048 }, { "epoch": 1.4707448029002799, "grad_norm": 0.4537734091281891, "learning_rate": 5.316448539934256e-06, "loss": 0.353, "step": 32049 }, { "epoch": 1.4707906934055344, "grad_norm": 0.5175872445106506, "learning_rate": 5.316203843200278e-06, "loss": 0.4384, "step": 32050 }, { "epoch": 1.4708365839107889, "grad_norm": 0.48653343319892883, "learning_rate": 5.31595914570593e-06, "loss": 0.4267, "step": 32051 }, { "epoch": 1.4708824744160434, "grad_norm": 0.4833705425262451, "learning_rate": 5.315714447451801e-06, "loss": 0.4135, "step": 32052 }, { "epoch": 1.4709283649212979, "grad_norm": 0.4492006301879883, "learning_rate": 5.315469748438477e-06, "loss": 0.3139, "step": 32053 }, { "epoch": 1.4709742554265524, "grad_norm": 0.5261073112487793, "learning_rate": 5.315225048666547e-06, "loss": 0.4262, "step": 32054 }, { "epoch": 1.4710201459318066, "grad_norm": 0.4762416183948517, "learning_rate": 5.314980348136602e-06, "loss": 0.3655, "step": 32055 }, { "epoch": 1.4710660364370611, "grad_norm": 0.5034158229827881, "learning_rate": 5.3147356468492285e-06, "loss": 0.3864, "step": 32056 }, { "epoch": 1.4711119269423156, "grad_norm": 0.46422988176345825, "learning_rate": 5.314490944805015e-06, "loss": 0.3773, "step": 32057 }, { "epoch": 1.4711578174475701, "grad_norm": 0.44940030574798584, "learning_rate": 5.314246242004549e-06, "loss": 0.3538, "step": 32058 }, { "epoch": 1.4712037079528246, "grad_norm": 0.5211067199707031, "learning_rate": 5.31400153844842e-06, "loss": 0.3886, "step": 32059 }, { "epoch": 1.471249598458079, "grad_norm": 0.45785287022590637, "learning_rate": 5.313756834137218e-06, "loss": 0.3139, "step": 32060 }, { "epoch": 1.4712954889633334, "grad_norm": 0.47487348318099976, "learning_rate": 5.31351212907153e-06, "loss": 0.4065, "step": 32061 }, { "epoch": 1.471341379468588, "grad_norm": 0.6778917908668518, "learning_rate": 5.313267423251942e-06, "loss": 0.2664, "step": 32062 }, { "epoch": 1.4713872699738424, "grad_norm": 0.4605705738067627, "learning_rate": 5.313022716679047e-06, "loss": 0.3435, "step": 32063 }, { "epoch": 1.471433160479097, "grad_norm": 0.4908944070339203, "learning_rate": 5.31277800935343e-06, "loss": 0.4296, "step": 32064 }, { "epoch": 1.4714790509843514, "grad_norm": 0.45814260840415955, "learning_rate": 5.3125333012756795e-06, "loss": 0.347, "step": 32065 }, { "epoch": 1.4715249414896059, "grad_norm": 0.43747884035110474, "learning_rate": 5.312288592446388e-06, "loss": 0.3082, "step": 32066 }, { "epoch": 1.4715708319948604, "grad_norm": 0.46185818314552307, "learning_rate": 5.312043882866139e-06, "loss": 0.3405, "step": 32067 }, { "epoch": 1.4716167225001147, "grad_norm": 0.46715840697288513, "learning_rate": 5.311799172535522e-06, "loss": 0.327, "step": 32068 }, { "epoch": 1.4716626130053692, "grad_norm": 0.46411746740341187, "learning_rate": 5.3115544614551286e-06, "loss": 0.3629, "step": 32069 }, { "epoch": 1.4717085035106237, "grad_norm": 0.456381231546402, "learning_rate": 5.311309749625544e-06, "loss": 0.3341, "step": 32070 }, { "epoch": 1.4717543940158782, "grad_norm": 0.4469074606895447, "learning_rate": 5.3110650370473584e-06, "loss": 0.3651, "step": 32071 }, { "epoch": 1.4718002845211327, "grad_norm": 0.45176440477371216, "learning_rate": 5.310820323721159e-06, "loss": 0.3439, "step": 32072 }, { "epoch": 1.471846175026387, "grad_norm": 0.47577545046806335, "learning_rate": 5.310575609647533e-06, "loss": 0.3474, "step": 32073 }, { "epoch": 1.4718920655316414, "grad_norm": 0.4674552083015442, "learning_rate": 5.310330894827074e-06, "loss": 0.3557, "step": 32074 }, { "epoch": 1.471937956036896, "grad_norm": 0.4747660160064697, "learning_rate": 5.310086179260365e-06, "loss": 0.4241, "step": 32075 }, { "epoch": 1.4719838465421504, "grad_norm": 0.45715874433517456, "learning_rate": 5.309841462947997e-06, "loss": 0.3199, "step": 32076 }, { "epoch": 1.472029737047405, "grad_norm": 0.5239123106002808, "learning_rate": 5.30959674589056e-06, "loss": 0.4113, "step": 32077 }, { "epoch": 1.4720756275526594, "grad_norm": 0.4803256094455719, "learning_rate": 5.309352028088638e-06, "loss": 0.3735, "step": 32078 }, { "epoch": 1.472121518057914, "grad_norm": 0.499620258808136, "learning_rate": 5.309107309542822e-06, "loss": 0.4205, "step": 32079 }, { "epoch": 1.4721674085631684, "grad_norm": 0.4901861250400543, "learning_rate": 5.308862590253702e-06, "loss": 0.3712, "step": 32080 }, { "epoch": 1.4722132990684227, "grad_norm": 0.483675479888916, "learning_rate": 5.3086178702218635e-06, "loss": 0.3499, "step": 32081 }, { "epoch": 1.4722591895736772, "grad_norm": 0.4482039511203766, "learning_rate": 5.308373149447897e-06, "loss": 0.2974, "step": 32082 }, { "epoch": 1.4723050800789317, "grad_norm": 0.4688926339149475, "learning_rate": 5.308128427932392e-06, "loss": 0.3238, "step": 32083 }, { "epoch": 1.4723509705841862, "grad_norm": 0.46346724033355713, "learning_rate": 5.307883705675932e-06, "loss": 0.3899, "step": 32084 }, { "epoch": 1.4723968610894405, "grad_norm": 0.47251787781715393, "learning_rate": 5.307638982679111e-06, "loss": 0.4171, "step": 32085 }, { "epoch": 1.472442751594695, "grad_norm": 0.43615666031837463, "learning_rate": 5.307394258942515e-06, "loss": 0.2724, "step": 32086 }, { "epoch": 1.4724886420999495, "grad_norm": 0.4318934381008148, "learning_rate": 5.307149534466734e-06, "loss": 0.2977, "step": 32087 }, { "epoch": 1.472534532605204, "grad_norm": 0.45368871092796326, "learning_rate": 5.306904809252355e-06, "loss": 0.3191, "step": 32088 }, { "epoch": 1.4725804231104584, "grad_norm": 0.49371564388275146, "learning_rate": 5.306660083299966e-06, "loss": 0.4144, "step": 32089 }, { "epoch": 1.472626313615713, "grad_norm": 0.45591971278190613, "learning_rate": 5.306415356610155e-06, "loss": 0.3639, "step": 32090 }, { "epoch": 1.4726722041209674, "grad_norm": 0.45543017983436584, "learning_rate": 5.306170629183513e-06, "loss": 0.3441, "step": 32091 }, { "epoch": 1.472718094626222, "grad_norm": 0.43691474199295044, "learning_rate": 5.305925901020628e-06, "loss": 0.3233, "step": 32092 }, { "epoch": 1.4727639851314762, "grad_norm": 0.4881291687488556, "learning_rate": 5.305681172122088e-06, "loss": 0.4285, "step": 32093 }, { "epoch": 1.4728098756367307, "grad_norm": 0.46096301078796387, "learning_rate": 5.30543644248848e-06, "loss": 0.3269, "step": 32094 }, { "epoch": 1.4728557661419852, "grad_norm": 0.46370741724967957, "learning_rate": 5.305191712120395e-06, "loss": 0.3652, "step": 32095 }, { "epoch": 1.4729016566472397, "grad_norm": 0.598700225353241, "learning_rate": 5.304946981018419e-06, "loss": 0.3996, "step": 32096 }, { "epoch": 1.4729475471524942, "grad_norm": 0.4296024739742279, "learning_rate": 5.3047022491831415e-06, "loss": 0.3206, "step": 32097 }, { "epoch": 1.4729934376577485, "grad_norm": 0.44607773423194885, "learning_rate": 5.304457516615151e-06, "loss": 0.3374, "step": 32098 }, { "epoch": 1.473039328163003, "grad_norm": 0.48436278104782104, "learning_rate": 5.304212783315038e-06, "loss": 0.4402, "step": 32099 }, { "epoch": 1.4730852186682575, "grad_norm": 0.44917213916778564, "learning_rate": 5.303968049283389e-06, "loss": 0.3144, "step": 32100 }, { "epoch": 1.473131109173512, "grad_norm": 0.48056715726852417, "learning_rate": 5.3037233145207925e-06, "loss": 0.3178, "step": 32101 }, { "epoch": 1.4731769996787665, "grad_norm": 0.49721914529800415, "learning_rate": 5.303478579027836e-06, "loss": 0.4529, "step": 32102 }, { "epoch": 1.473222890184021, "grad_norm": 0.46930742263793945, "learning_rate": 5.303233842805111e-06, "loss": 0.4143, "step": 32103 }, { "epoch": 1.4732687806892755, "grad_norm": 0.44961121678352356, "learning_rate": 5.302989105853203e-06, "loss": 0.3229, "step": 32104 }, { "epoch": 1.47331467119453, "grad_norm": 0.4435790777206421, "learning_rate": 5.302744368172703e-06, "loss": 0.3282, "step": 32105 }, { "epoch": 1.4733605616997842, "grad_norm": 0.5323469638824463, "learning_rate": 5.302499629764198e-06, "loss": 0.4331, "step": 32106 }, { "epoch": 1.4734064522050387, "grad_norm": 0.4732191562652588, "learning_rate": 5.302254890628275e-06, "loss": 0.3269, "step": 32107 }, { "epoch": 1.4734523427102932, "grad_norm": 0.4918588399887085, "learning_rate": 5.302010150765527e-06, "loss": 0.4352, "step": 32108 }, { "epoch": 1.4734982332155477, "grad_norm": 0.4445362091064453, "learning_rate": 5.301765410176539e-06, "loss": 0.3237, "step": 32109 }, { "epoch": 1.4735441237208022, "grad_norm": 0.4067014157772064, "learning_rate": 5.301520668861902e-06, "loss": 0.278, "step": 32110 }, { "epoch": 1.4735900142260565, "grad_norm": 0.4547867774963379, "learning_rate": 5.3012759268222e-06, "loss": 0.3225, "step": 32111 }, { "epoch": 1.473635904731311, "grad_norm": 0.44877687096595764, "learning_rate": 5.301031184058026e-06, "loss": 0.3078, "step": 32112 }, { "epoch": 1.4736817952365655, "grad_norm": 0.5194283127784729, "learning_rate": 5.300786440569966e-06, "loss": 0.4406, "step": 32113 }, { "epoch": 1.47372768574182, "grad_norm": 0.5044810175895691, "learning_rate": 5.300541696358611e-06, "loss": 0.4203, "step": 32114 }, { "epoch": 1.4737735762470745, "grad_norm": 0.4562418758869171, "learning_rate": 5.3002969514245485e-06, "loss": 0.2764, "step": 32115 }, { "epoch": 1.473819466752329, "grad_norm": 0.47908076643943787, "learning_rate": 5.300052205768364e-06, "loss": 0.3461, "step": 32116 }, { "epoch": 1.4738653572575835, "grad_norm": 0.4281238615512848, "learning_rate": 5.299807459390651e-06, "loss": 0.2408, "step": 32117 }, { "epoch": 1.473911247762838, "grad_norm": 0.44104140996932983, "learning_rate": 5.299562712291995e-06, "loss": 0.3247, "step": 32118 }, { "epoch": 1.4739571382680923, "grad_norm": 0.46291911602020264, "learning_rate": 5.299317964472984e-06, "loss": 0.3629, "step": 32119 }, { "epoch": 1.4740030287733468, "grad_norm": 0.47003173828125, "learning_rate": 5.299073215934209e-06, "loss": 0.3418, "step": 32120 }, { "epoch": 1.4740489192786013, "grad_norm": 0.46763402223587036, "learning_rate": 5.298828466676258e-06, "loss": 0.3611, "step": 32121 }, { "epoch": 1.4740948097838558, "grad_norm": 0.5117161273956299, "learning_rate": 5.298583716699718e-06, "loss": 0.4045, "step": 32122 }, { "epoch": 1.4741407002891103, "grad_norm": 0.4899384677410126, "learning_rate": 5.298338966005179e-06, "loss": 0.3735, "step": 32123 }, { "epoch": 1.4741865907943645, "grad_norm": 0.48555582761764526, "learning_rate": 5.298094214593227e-06, "loss": 0.4109, "step": 32124 }, { "epoch": 1.474232481299619, "grad_norm": 0.4935511350631714, "learning_rate": 5.297849462464455e-06, "loss": 0.3251, "step": 32125 }, { "epoch": 1.4742783718048735, "grad_norm": 0.48091232776641846, "learning_rate": 5.297604709619449e-06, "loss": 0.4142, "step": 32126 }, { "epoch": 1.474324262310128, "grad_norm": 0.4515378773212433, "learning_rate": 5.297359956058795e-06, "loss": 0.2981, "step": 32127 }, { "epoch": 1.4743701528153825, "grad_norm": 0.4305891990661621, "learning_rate": 5.297115201783086e-06, "loss": 0.2811, "step": 32128 }, { "epoch": 1.474416043320637, "grad_norm": 0.4781722128391266, "learning_rate": 5.296870446792907e-06, "loss": 0.3764, "step": 32129 }, { "epoch": 1.4744619338258915, "grad_norm": 0.5054376721382141, "learning_rate": 5.29662569108885e-06, "loss": 0.3978, "step": 32130 }, { "epoch": 1.474507824331146, "grad_norm": 0.43782874941825867, "learning_rate": 5.296380934671503e-06, "loss": 0.3156, "step": 32131 }, { "epoch": 1.4745537148364003, "grad_norm": 0.47763556241989136, "learning_rate": 5.296136177541451e-06, "loss": 0.4283, "step": 32132 }, { "epoch": 1.4745996053416548, "grad_norm": 0.4743375778198242, "learning_rate": 5.295891419699286e-06, "loss": 0.3578, "step": 32133 }, { "epoch": 1.4746454958469093, "grad_norm": 0.47007471323013306, "learning_rate": 5.295646661145596e-06, "loss": 0.3753, "step": 32134 }, { "epoch": 1.4746913863521638, "grad_norm": 0.4671744406223297, "learning_rate": 5.295401901880968e-06, "loss": 0.31, "step": 32135 }, { "epoch": 1.474737276857418, "grad_norm": 0.4932475984096527, "learning_rate": 5.295157141905992e-06, "loss": 0.3852, "step": 32136 }, { "epoch": 1.4747831673626726, "grad_norm": 0.46660348773002625, "learning_rate": 5.294912381221257e-06, "loss": 0.345, "step": 32137 }, { "epoch": 1.474829057867927, "grad_norm": 0.46788978576660156, "learning_rate": 5.2946676198273494e-06, "loss": 0.387, "step": 32138 }, { "epoch": 1.4748749483731816, "grad_norm": 0.4677716791629791, "learning_rate": 5.29442285772486e-06, "loss": 0.3948, "step": 32139 }, { "epoch": 1.474920838878436, "grad_norm": 0.44886189699172974, "learning_rate": 5.294178094914376e-06, "loss": 0.3123, "step": 32140 }, { "epoch": 1.4749667293836906, "grad_norm": 0.46136191487312317, "learning_rate": 5.293933331396487e-06, "loss": 0.3558, "step": 32141 }, { "epoch": 1.475012619888945, "grad_norm": 0.4687035083770752, "learning_rate": 5.293688567171781e-06, "loss": 0.3391, "step": 32142 }, { "epoch": 1.4750585103941996, "grad_norm": 0.4563540518283844, "learning_rate": 5.293443802240847e-06, "loss": 0.3216, "step": 32143 }, { "epoch": 1.4751044008994538, "grad_norm": 0.4705131947994232, "learning_rate": 5.293199036604273e-06, "loss": 0.3701, "step": 32144 }, { "epoch": 1.4751502914047083, "grad_norm": 0.5243456959724426, "learning_rate": 5.292954270262648e-06, "loss": 0.4167, "step": 32145 }, { "epoch": 1.4751961819099628, "grad_norm": 0.5293105244636536, "learning_rate": 5.29270950321656e-06, "loss": 0.4437, "step": 32146 }, { "epoch": 1.4752420724152173, "grad_norm": 0.44688811898231506, "learning_rate": 5.292464735466599e-06, "loss": 0.3426, "step": 32147 }, { "epoch": 1.4752879629204718, "grad_norm": 0.4468919336795807, "learning_rate": 5.292219967013353e-06, "loss": 0.3188, "step": 32148 }, { "epoch": 1.475333853425726, "grad_norm": 0.4285844564437866, "learning_rate": 5.291975197857408e-06, "loss": 0.302, "step": 32149 }, { "epoch": 1.4753797439309806, "grad_norm": 0.4776487350463867, "learning_rate": 5.291730427999355e-06, "loss": 0.399, "step": 32150 }, { "epoch": 1.475425634436235, "grad_norm": 0.47976982593536377, "learning_rate": 5.291485657439785e-06, "loss": 0.3555, "step": 32151 }, { "epoch": 1.4754715249414896, "grad_norm": 0.43079400062561035, "learning_rate": 5.291240886179281e-06, "loss": 0.3161, "step": 32152 }, { "epoch": 1.475517415446744, "grad_norm": 0.475788950920105, "learning_rate": 5.290996114218436e-06, "loss": 0.3519, "step": 32153 }, { "epoch": 1.4755633059519986, "grad_norm": 0.5033673644065857, "learning_rate": 5.290751341557838e-06, "loss": 0.4372, "step": 32154 }, { "epoch": 1.475609196457253, "grad_norm": 0.4571196734905243, "learning_rate": 5.290506568198074e-06, "loss": 0.3269, "step": 32155 }, { "epoch": 1.4756550869625076, "grad_norm": 0.43819674849510193, "learning_rate": 5.2902617941397334e-06, "loss": 0.3036, "step": 32156 }, { "epoch": 1.4757009774677619, "grad_norm": 0.4568757712841034, "learning_rate": 5.2900170193834046e-06, "loss": 0.3655, "step": 32157 }, { "epoch": 1.4757468679730164, "grad_norm": 0.4275202453136444, "learning_rate": 5.289772243929676e-06, "loss": 0.2739, "step": 32158 }, { "epoch": 1.4757927584782708, "grad_norm": 0.4915981590747833, "learning_rate": 5.289527467779139e-06, "loss": 0.4168, "step": 32159 }, { "epoch": 1.4758386489835253, "grad_norm": 0.4561948776245117, "learning_rate": 5.289282690932377e-06, "loss": 0.3607, "step": 32160 }, { "epoch": 1.4758845394887798, "grad_norm": 0.4902079999446869, "learning_rate": 5.289037913389982e-06, "loss": 0.3705, "step": 32161 }, { "epoch": 1.4759304299940341, "grad_norm": 0.45454102754592896, "learning_rate": 5.2887931351525424e-06, "loss": 0.2949, "step": 32162 }, { "epoch": 1.4759763204992886, "grad_norm": 0.4169740080833435, "learning_rate": 5.288548356220645e-06, "loss": 0.2931, "step": 32163 }, { "epoch": 1.4760222110045431, "grad_norm": 0.45563727617263794, "learning_rate": 5.288303576594882e-06, "loss": 0.3207, "step": 32164 }, { "epoch": 1.4760681015097976, "grad_norm": 0.45957526564598083, "learning_rate": 5.28805879627584e-06, "loss": 0.3259, "step": 32165 }, { "epoch": 1.4761139920150521, "grad_norm": 0.4953639507293701, "learning_rate": 5.287814015264107e-06, "loss": 0.3838, "step": 32166 }, { "epoch": 1.4761598825203066, "grad_norm": 0.4996055066585541, "learning_rate": 5.2875692335602705e-06, "loss": 0.4255, "step": 32167 }, { "epoch": 1.476205773025561, "grad_norm": 0.47862255573272705, "learning_rate": 5.2873244511649224e-06, "loss": 0.3787, "step": 32168 }, { "epoch": 1.4762516635308156, "grad_norm": 0.4911181926727295, "learning_rate": 5.2870796680786505e-06, "loss": 0.3761, "step": 32169 }, { "epoch": 1.4762975540360699, "grad_norm": 0.43438398838043213, "learning_rate": 5.286834884302041e-06, "loss": 0.3189, "step": 32170 }, { "epoch": 1.4763434445413244, "grad_norm": 0.49739471077919006, "learning_rate": 5.2865900998356845e-06, "loss": 0.3793, "step": 32171 }, { "epoch": 1.4763893350465789, "grad_norm": 0.48677533864974976, "learning_rate": 5.286345314680169e-06, "loss": 0.3802, "step": 32172 }, { "epoch": 1.4764352255518334, "grad_norm": 0.4675295650959015, "learning_rate": 5.2861005288360845e-06, "loss": 0.35, "step": 32173 }, { "epoch": 1.4764811160570877, "grad_norm": 0.4477384090423584, "learning_rate": 5.2858557423040165e-06, "loss": 0.3341, "step": 32174 }, { "epoch": 1.4765270065623421, "grad_norm": 0.41926464438438416, "learning_rate": 5.285610955084558e-06, "loss": 0.292, "step": 32175 }, { "epoch": 1.4765728970675966, "grad_norm": 0.4473084807395935, "learning_rate": 5.285366167178295e-06, "loss": 0.2846, "step": 32176 }, { "epoch": 1.4766187875728511, "grad_norm": 0.450969398021698, "learning_rate": 5.285121378585815e-06, "loss": 0.3367, "step": 32177 }, { "epoch": 1.4766646780781056, "grad_norm": 0.46748706698417664, "learning_rate": 5.284876589307708e-06, "loss": 0.3904, "step": 32178 }, { "epoch": 1.4767105685833601, "grad_norm": 0.4565579295158386, "learning_rate": 5.284631799344564e-06, "loss": 0.3298, "step": 32179 }, { "epoch": 1.4767564590886146, "grad_norm": 0.4720044434070587, "learning_rate": 5.284387008696971e-06, "loss": 0.3543, "step": 32180 }, { "epoch": 1.4768023495938691, "grad_norm": 0.4883100390434265, "learning_rate": 5.284142217365514e-06, "loss": 0.3532, "step": 32181 }, { "epoch": 1.4768482400991234, "grad_norm": 0.48219752311706543, "learning_rate": 5.283897425350788e-06, "loss": 0.3726, "step": 32182 }, { "epoch": 1.476894130604378, "grad_norm": 0.46640753746032715, "learning_rate": 5.283652632653376e-06, "loss": 0.324, "step": 32183 }, { "epoch": 1.4769400211096324, "grad_norm": 0.49780508875846863, "learning_rate": 5.283407839273869e-06, "loss": 0.4142, "step": 32184 }, { "epoch": 1.476985911614887, "grad_norm": 0.4580658972263336, "learning_rate": 5.283163045212857e-06, "loss": 0.3527, "step": 32185 }, { "epoch": 1.4770318021201414, "grad_norm": 0.47155308723449707, "learning_rate": 5.282918250470926e-06, "loss": 0.3651, "step": 32186 }, { "epoch": 1.4770776926253957, "grad_norm": 0.5855438709259033, "learning_rate": 5.2826734550486665e-06, "loss": 0.4308, "step": 32187 }, { "epoch": 1.4771235831306502, "grad_norm": 0.4865555465221405, "learning_rate": 5.282428658946667e-06, "loss": 0.421, "step": 32188 }, { "epoch": 1.4771694736359047, "grad_norm": 0.5320597887039185, "learning_rate": 5.282183862165514e-06, "loss": 0.3529, "step": 32189 }, { "epoch": 1.4772153641411592, "grad_norm": 0.47090551257133484, "learning_rate": 5.2819390647058e-06, "loss": 0.3681, "step": 32190 }, { "epoch": 1.4772612546464137, "grad_norm": 0.5162542462348938, "learning_rate": 5.281694266568112e-06, "loss": 0.4189, "step": 32191 }, { "epoch": 1.4773071451516682, "grad_norm": 0.48198291659355164, "learning_rate": 5.281449467753036e-06, "loss": 0.3873, "step": 32192 }, { "epoch": 1.4773530356569227, "grad_norm": 0.45941951870918274, "learning_rate": 5.281204668261164e-06, "loss": 0.3788, "step": 32193 }, { "epoch": 1.4773989261621772, "grad_norm": 0.44771191477775574, "learning_rate": 5.280959868093083e-06, "loss": 0.3224, "step": 32194 }, { "epoch": 1.4774448166674314, "grad_norm": 0.5035176873207092, "learning_rate": 5.2807150672493816e-06, "loss": 0.4214, "step": 32195 }, { "epoch": 1.477490707172686, "grad_norm": 0.4409049451351166, "learning_rate": 5.2804702657306495e-06, "loss": 0.2641, "step": 32196 }, { "epoch": 1.4775365976779404, "grad_norm": 0.48864734172821045, "learning_rate": 5.280225463537476e-06, "loss": 0.3558, "step": 32197 }, { "epoch": 1.477582488183195, "grad_norm": 0.50663822889328, "learning_rate": 5.279980660670448e-06, "loss": 0.3584, "step": 32198 }, { "epoch": 1.4776283786884494, "grad_norm": 0.45244652032852173, "learning_rate": 5.279735857130155e-06, "loss": 0.3552, "step": 32199 }, { "epoch": 1.4776742691937037, "grad_norm": 0.5134270191192627, "learning_rate": 5.279491052917185e-06, "loss": 0.4434, "step": 32200 }, { "epoch": 1.4777201596989582, "grad_norm": 0.441841185092926, "learning_rate": 5.279246248032127e-06, "loss": 0.3022, "step": 32201 }, { "epoch": 1.4777660502042127, "grad_norm": 0.455996572971344, "learning_rate": 5.279001442475572e-06, "loss": 0.3428, "step": 32202 }, { "epoch": 1.4778119407094672, "grad_norm": 0.49613645672798157, "learning_rate": 5.278756636248104e-06, "loss": 0.3667, "step": 32203 }, { "epoch": 1.4778578312147217, "grad_norm": 0.4697081744670868, "learning_rate": 5.278511829350316e-06, "loss": 0.3463, "step": 32204 }, { "epoch": 1.4779037217199762, "grad_norm": 0.44042932987213135, "learning_rate": 5.278267021782794e-06, "loss": 0.3127, "step": 32205 }, { "epoch": 1.4779496122252307, "grad_norm": 0.5432997941970825, "learning_rate": 5.278022213546128e-06, "loss": 0.5186, "step": 32206 }, { "epoch": 1.4779955027304852, "grad_norm": 0.5859554409980774, "learning_rate": 5.277777404640906e-06, "loss": 0.3792, "step": 32207 }, { "epoch": 1.4780413932357395, "grad_norm": 0.42461588978767395, "learning_rate": 5.277532595067718e-06, "loss": 0.2833, "step": 32208 }, { "epoch": 1.478087283740994, "grad_norm": 0.4361141622066498, "learning_rate": 5.2772877848271506e-06, "loss": 0.3243, "step": 32209 }, { "epoch": 1.4781331742462485, "grad_norm": 0.45226922631263733, "learning_rate": 5.277042973919795e-06, "loss": 0.3091, "step": 32210 }, { "epoch": 1.478179064751503, "grad_norm": 0.45459383726119995, "learning_rate": 5.2767981623462365e-06, "loss": 0.3537, "step": 32211 }, { "epoch": 1.4782249552567575, "grad_norm": 0.4496331810951233, "learning_rate": 5.276553350107066e-06, "loss": 0.3471, "step": 32212 }, { "epoch": 1.4782708457620117, "grad_norm": 0.49105075001716614, "learning_rate": 5.276308537202874e-06, "loss": 0.4306, "step": 32213 }, { "epoch": 1.4783167362672662, "grad_norm": 0.6723960041999817, "learning_rate": 5.276063723634246e-06, "loss": 0.353, "step": 32214 }, { "epoch": 1.4783626267725207, "grad_norm": 0.4893755316734314, "learning_rate": 5.275818909401771e-06, "loss": 0.3834, "step": 32215 }, { "epoch": 1.4784085172777752, "grad_norm": 0.5055813789367676, "learning_rate": 5.27557409450604e-06, "loss": 0.3832, "step": 32216 }, { "epoch": 1.4784544077830297, "grad_norm": 0.6979690194129944, "learning_rate": 5.275329278947638e-06, "loss": 0.3284, "step": 32217 }, { "epoch": 1.4785002982882842, "grad_norm": 0.4800167679786682, "learning_rate": 5.2750844627271594e-06, "loss": 0.3489, "step": 32218 }, { "epoch": 1.4785461887935387, "grad_norm": 0.5017383694648743, "learning_rate": 5.274839645845188e-06, "loss": 0.4234, "step": 32219 }, { "epoch": 1.4785920792987932, "grad_norm": 0.4398152232170105, "learning_rate": 5.2745948283023126e-06, "loss": 0.2997, "step": 32220 }, { "epoch": 1.4786379698040475, "grad_norm": 0.48357245326042175, "learning_rate": 5.274350010099125e-06, "loss": 0.4023, "step": 32221 }, { "epoch": 1.478683860309302, "grad_norm": 0.4752514660358429, "learning_rate": 5.27410519123621e-06, "loss": 0.3683, "step": 32222 }, { "epoch": 1.4787297508145565, "grad_norm": 0.44536012411117554, "learning_rate": 5.27386037171416e-06, "loss": 0.2829, "step": 32223 }, { "epoch": 1.478775641319811, "grad_norm": 0.46751123666763306, "learning_rate": 5.273615551533564e-06, "loss": 0.2962, "step": 32224 }, { "epoch": 1.4788215318250653, "grad_norm": 0.4844211935997009, "learning_rate": 5.273370730695008e-06, "loss": 0.3782, "step": 32225 }, { "epoch": 1.4788674223303198, "grad_norm": 0.4923376441001892, "learning_rate": 5.273125909199079e-06, "loss": 0.4338, "step": 32226 }, { "epoch": 1.4789133128355743, "grad_norm": 0.4800521433353424, "learning_rate": 5.272881087046372e-06, "loss": 0.3419, "step": 32227 }, { "epoch": 1.4789592033408288, "grad_norm": 0.5042484998703003, "learning_rate": 5.2726362642374695e-06, "loss": 0.3546, "step": 32228 }, { "epoch": 1.4790050938460833, "grad_norm": 0.4690392017364502, "learning_rate": 5.272391440772965e-06, "loss": 0.3362, "step": 32229 }, { "epoch": 1.4790509843513377, "grad_norm": 0.4888511002063751, "learning_rate": 5.272146616653444e-06, "loss": 0.3994, "step": 32230 }, { "epoch": 1.4790968748565922, "grad_norm": 0.4465472996234894, "learning_rate": 5.271901791879496e-06, "loss": 0.3252, "step": 32231 }, { "epoch": 1.4791427653618467, "grad_norm": 0.4621584415435791, "learning_rate": 5.271656966451709e-06, "loss": 0.3453, "step": 32232 }, { "epoch": 1.479188655867101, "grad_norm": 0.4941501319408417, "learning_rate": 5.271412140370674e-06, "loss": 0.3861, "step": 32233 }, { "epoch": 1.4792345463723555, "grad_norm": 0.4701978862285614, "learning_rate": 5.271167313636978e-06, "loss": 0.3554, "step": 32234 }, { "epoch": 1.47928043687761, "grad_norm": 0.44477006793022156, "learning_rate": 5.270922486251211e-06, "loss": 0.3368, "step": 32235 }, { "epoch": 1.4793263273828645, "grad_norm": 0.46687495708465576, "learning_rate": 5.270677658213961e-06, "loss": 0.3784, "step": 32236 }, { "epoch": 1.479372217888119, "grad_norm": 0.46737995743751526, "learning_rate": 5.270432829525816e-06, "loss": 0.3469, "step": 32237 }, { "epoch": 1.4794181083933733, "grad_norm": 0.4454015791416168, "learning_rate": 5.270188000187366e-06, "loss": 0.3089, "step": 32238 }, { "epoch": 1.4794639988986278, "grad_norm": 0.46454596519470215, "learning_rate": 5.269943170199197e-06, "loss": 0.3817, "step": 32239 }, { "epoch": 1.4795098894038823, "grad_norm": 0.4369686245918274, "learning_rate": 5.269698339561903e-06, "loss": 0.284, "step": 32240 }, { "epoch": 1.4795557799091368, "grad_norm": 0.4287276864051819, "learning_rate": 5.269453508276069e-06, "loss": 0.2818, "step": 32241 }, { "epoch": 1.4796016704143913, "grad_norm": 0.45772886276245117, "learning_rate": 5.269208676342283e-06, "loss": 0.3319, "step": 32242 }, { "epoch": 1.4796475609196458, "grad_norm": 0.47653254866600037, "learning_rate": 5.268963843761135e-06, "loss": 0.3294, "step": 32243 }, { "epoch": 1.4796934514249003, "grad_norm": 0.5394758582115173, "learning_rate": 5.268719010533214e-06, "loss": 0.3902, "step": 32244 }, { "epoch": 1.4797393419301548, "grad_norm": 0.45063939690589905, "learning_rate": 5.2684741766591085e-06, "loss": 0.3184, "step": 32245 }, { "epoch": 1.479785232435409, "grad_norm": 0.4706563353538513, "learning_rate": 5.268229342139408e-06, "loss": 0.4076, "step": 32246 }, { "epoch": 1.4798311229406635, "grad_norm": 0.5012007355690002, "learning_rate": 5.267984506974702e-06, "loss": 0.407, "step": 32247 }, { "epoch": 1.479877013445918, "grad_norm": 0.48534274101257324, "learning_rate": 5.267739671165576e-06, "loss": 0.3283, "step": 32248 }, { "epoch": 1.4799229039511725, "grad_norm": 0.4272443354129791, "learning_rate": 5.2674948347126195e-06, "loss": 0.2831, "step": 32249 }, { "epoch": 1.479968794456427, "grad_norm": 0.45840010046958923, "learning_rate": 5.267249997616425e-06, "loss": 0.3179, "step": 32250 }, { "epoch": 1.4800146849616813, "grad_norm": 0.4599660038948059, "learning_rate": 5.267005159877577e-06, "loss": 0.3739, "step": 32251 }, { "epoch": 1.4800605754669358, "grad_norm": 0.4887057840824127, "learning_rate": 5.266760321496666e-06, "loss": 0.3721, "step": 32252 }, { "epoch": 1.4801064659721903, "grad_norm": 0.4779162108898163, "learning_rate": 5.266515482474281e-06, "loss": 0.4151, "step": 32253 }, { "epoch": 1.4801523564774448, "grad_norm": 0.46435460448265076, "learning_rate": 5.26627064281101e-06, "loss": 0.3655, "step": 32254 }, { "epoch": 1.4801982469826993, "grad_norm": 0.4353535771369934, "learning_rate": 5.2660258025074425e-06, "loss": 0.2925, "step": 32255 }, { "epoch": 1.4802441374879538, "grad_norm": 0.4641478657722473, "learning_rate": 5.265780961564167e-06, "loss": 0.3714, "step": 32256 }, { "epoch": 1.4802900279932083, "grad_norm": 0.4852233827114105, "learning_rate": 5.265536119981772e-06, "loss": 0.3744, "step": 32257 }, { "epoch": 1.4803359184984628, "grad_norm": 0.42776840925216675, "learning_rate": 5.265291277760846e-06, "loss": 0.2918, "step": 32258 }, { "epoch": 1.480381809003717, "grad_norm": 0.45558902621269226, "learning_rate": 5.265046434901979e-06, "loss": 0.3256, "step": 32259 }, { "epoch": 1.4804276995089716, "grad_norm": 0.4512421190738678, "learning_rate": 5.264801591405757e-06, "loss": 0.331, "step": 32260 }, { "epoch": 1.480473590014226, "grad_norm": 0.4861771762371063, "learning_rate": 5.264556747272773e-06, "loss": 0.3928, "step": 32261 }, { "epoch": 1.4805194805194806, "grad_norm": 0.44187191128730774, "learning_rate": 5.2643119025036135e-06, "loss": 0.3047, "step": 32262 }, { "epoch": 1.4805653710247348, "grad_norm": 0.4591623842716217, "learning_rate": 5.264067057098866e-06, "loss": 0.3472, "step": 32263 }, { "epoch": 1.4806112615299893, "grad_norm": 0.46147510409355164, "learning_rate": 5.263822211059122e-06, "loss": 0.4002, "step": 32264 }, { "epoch": 1.4806571520352438, "grad_norm": 0.5028408765792847, "learning_rate": 5.2635773643849675e-06, "loss": 0.3535, "step": 32265 }, { "epoch": 1.4807030425404983, "grad_norm": 0.5251145958900452, "learning_rate": 5.263332517076992e-06, "loss": 0.446, "step": 32266 }, { "epoch": 1.4807489330457528, "grad_norm": 0.4643406569957733, "learning_rate": 5.263087669135787e-06, "loss": 0.3698, "step": 32267 }, { "epoch": 1.4807948235510073, "grad_norm": 0.4679146409034729, "learning_rate": 5.262842820561938e-06, "loss": 0.3567, "step": 32268 }, { "epoch": 1.4808407140562618, "grad_norm": 0.46273306012153625, "learning_rate": 5.262597971356035e-06, "loss": 0.3765, "step": 32269 }, { "epoch": 1.4808866045615163, "grad_norm": 0.42155322432518005, "learning_rate": 5.262353121518667e-06, "loss": 0.2734, "step": 32270 }, { "epoch": 1.4809324950667706, "grad_norm": 0.43986257910728455, "learning_rate": 5.262108271050422e-06, "loss": 0.3089, "step": 32271 }, { "epoch": 1.480978385572025, "grad_norm": 0.4873642325401306, "learning_rate": 5.261863419951891e-06, "loss": 0.3635, "step": 32272 }, { "epoch": 1.4810242760772796, "grad_norm": 0.502087414264679, "learning_rate": 5.261618568223661e-06, "loss": 0.3935, "step": 32273 }, { "epoch": 1.481070166582534, "grad_norm": 0.4453190565109253, "learning_rate": 5.261373715866319e-06, "loss": 0.3367, "step": 32274 }, { "epoch": 1.4811160570877886, "grad_norm": 0.4663076400756836, "learning_rate": 5.261128862880457e-06, "loss": 0.3405, "step": 32275 }, { "epoch": 1.4811619475930429, "grad_norm": 0.43091490864753723, "learning_rate": 5.260884009266661e-06, "loss": 0.2768, "step": 32276 }, { "epoch": 1.4812078380982974, "grad_norm": 0.4805290102958679, "learning_rate": 5.260639155025522e-06, "loss": 0.3561, "step": 32277 }, { "epoch": 1.4812537286035519, "grad_norm": 0.5315728187561035, "learning_rate": 5.26039430015763e-06, "loss": 0.4363, "step": 32278 }, { "epoch": 1.4812996191088064, "grad_norm": 0.49049949645996094, "learning_rate": 5.260149444663571e-06, "loss": 0.3421, "step": 32279 }, { "epoch": 1.4813455096140609, "grad_norm": 0.49210989475250244, "learning_rate": 5.259904588543933e-06, "loss": 0.3901, "step": 32280 }, { "epoch": 1.4813914001193154, "grad_norm": 0.4296182692050934, "learning_rate": 5.259659731799307e-06, "loss": 0.3098, "step": 32281 }, { "epoch": 1.4814372906245699, "grad_norm": 0.5041384696960449, "learning_rate": 5.259414874430282e-06, "loss": 0.41, "step": 32282 }, { "epoch": 1.4814831811298244, "grad_norm": 0.4752067029476166, "learning_rate": 5.259170016437446e-06, "loss": 0.3963, "step": 32283 }, { "epoch": 1.4815290716350786, "grad_norm": 0.5466283559799194, "learning_rate": 5.2589251578213884e-06, "loss": 0.2981, "step": 32284 }, { "epoch": 1.4815749621403331, "grad_norm": 0.4475005865097046, "learning_rate": 5.2586802985826965e-06, "loss": 0.3144, "step": 32285 }, { "epoch": 1.4816208526455876, "grad_norm": 0.5330756902694702, "learning_rate": 5.258435438721961e-06, "loss": 0.435, "step": 32286 }, { "epoch": 1.4816667431508421, "grad_norm": 0.48924750089645386, "learning_rate": 5.258190578239769e-06, "loss": 0.357, "step": 32287 }, { "epoch": 1.4817126336560966, "grad_norm": 0.4536020755767822, "learning_rate": 5.25794571713671e-06, "loss": 0.3328, "step": 32288 }, { "epoch": 1.481758524161351, "grad_norm": 0.6382410526275635, "learning_rate": 5.257700855413373e-06, "loss": 0.3403, "step": 32289 }, { "epoch": 1.4818044146666054, "grad_norm": 0.4127758741378784, "learning_rate": 5.257455993070349e-06, "loss": 0.2692, "step": 32290 }, { "epoch": 1.48185030517186, "grad_norm": 0.45960569381713867, "learning_rate": 5.257211130108222e-06, "loss": 0.3781, "step": 32291 }, { "epoch": 1.4818961956771144, "grad_norm": 0.4709983468055725, "learning_rate": 5.256966266527585e-06, "loss": 0.3833, "step": 32292 }, { "epoch": 1.481942086182369, "grad_norm": 0.4444618821144104, "learning_rate": 5.2567214023290235e-06, "loss": 0.3394, "step": 32293 }, { "epoch": 1.4819879766876234, "grad_norm": 0.457006573677063, "learning_rate": 5.25647653751313e-06, "loss": 0.317, "step": 32294 }, { "epoch": 1.4820338671928779, "grad_norm": 0.4663785696029663, "learning_rate": 5.2562316720804904e-06, "loss": 0.3268, "step": 32295 }, { "epoch": 1.4820797576981324, "grad_norm": 0.45807361602783203, "learning_rate": 5.255986806031694e-06, "loss": 0.3586, "step": 32296 }, { "epoch": 1.4821256482033867, "grad_norm": 0.5094810128211975, "learning_rate": 5.25574193936733e-06, "loss": 0.4504, "step": 32297 }, { "epoch": 1.4821715387086412, "grad_norm": 0.49613264203071594, "learning_rate": 5.255497072087989e-06, "loss": 0.3469, "step": 32298 }, { "epoch": 1.4822174292138957, "grad_norm": 0.4654496908187866, "learning_rate": 5.255252204194257e-06, "loss": 0.357, "step": 32299 }, { "epoch": 1.4822633197191502, "grad_norm": 0.4875797629356384, "learning_rate": 5.255007335686725e-06, "loss": 0.3411, "step": 32300 }, { "epoch": 1.4823092102244046, "grad_norm": 0.47078442573547363, "learning_rate": 5.25476246656598e-06, "loss": 0.391, "step": 32301 }, { "epoch": 1.482355100729659, "grad_norm": 0.46430617570877075, "learning_rate": 5.2545175968326115e-06, "loss": 0.4009, "step": 32302 }, { "epoch": 1.4824009912349134, "grad_norm": 0.45111218094825745, "learning_rate": 5.2542727264872085e-06, "loss": 0.3337, "step": 32303 }, { "epoch": 1.482446881740168, "grad_norm": 0.5206025838851929, "learning_rate": 5.25402785553036e-06, "loss": 0.4645, "step": 32304 }, { "epoch": 1.4824927722454224, "grad_norm": 0.4681684374809265, "learning_rate": 5.253782983962656e-06, "loss": 0.376, "step": 32305 }, { "epoch": 1.482538662750677, "grad_norm": 0.47294217348098755, "learning_rate": 5.2535381117846825e-06, "loss": 0.3636, "step": 32306 }, { "epoch": 1.4825845532559314, "grad_norm": 0.4847589135169983, "learning_rate": 5.253293238997029e-06, "loss": 0.4077, "step": 32307 }, { "epoch": 1.482630443761186, "grad_norm": 0.4365226626396179, "learning_rate": 5.253048365600286e-06, "loss": 0.3256, "step": 32308 }, { "epoch": 1.4826763342664404, "grad_norm": 0.45794156193733215, "learning_rate": 5.252803491595043e-06, "loss": 0.3799, "step": 32309 }, { "epoch": 1.4827222247716947, "grad_norm": 0.43100976943969727, "learning_rate": 5.252558616981886e-06, "loss": 0.3047, "step": 32310 }, { "epoch": 1.4827681152769492, "grad_norm": 0.5044086575508118, "learning_rate": 5.252313741761406e-06, "loss": 0.4274, "step": 32311 }, { "epoch": 1.4828140057822037, "grad_norm": 0.4636385142803192, "learning_rate": 5.25206886593419e-06, "loss": 0.3955, "step": 32312 }, { "epoch": 1.4828598962874582, "grad_norm": 0.4615718424320221, "learning_rate": 5.2518239895008285e-06, "loss": 0.3069, "step": 32313 }, { "epoch": 1.4829057867927125, "grad_norm": 0.4650508463382721, "learning_rate": 5.2515791124619085e-06, "loss": 0.3308, "step": 32314 }, { "epoch": 1.482951677297967, "grad_norm": 0.466871052980423, "learning_rate": 5.251334234818023e-06, "loss": 0.3862, "step": 32315 }, { "epoch": 1.4829975678032215, "grad_norm": 0.45182231068611145, "learning_rate": 5.251089356569756e-06, "loss": 0.356, "step": 32316 }, { "epoch": 1.483043458308476, "grad_norm": 0.441617876291275, "learning_rate": 5.2508444777176996e-06, "loss": 0.2881, "step": 32317 }, { "epoch": 1.4830893488137304, "grad_norm": 0.4877391755580902, "learning_rate": 5.25059959826244e-06, "loss": 0.3611, "step": 32318 }, { "epoch": 1.483135239318985, "grad_norm": 0.4494521915912628, "learning_rate": 5.250354718204568e-06, "loss": 0.3606, "step": 32319 }, { "epoch": 1.4831811298242394, "grad_norm": 0.4764392375946045, "learning_rate": 5.250109837544673e-06, "loss": 0.3567, "step": 32320 }, { "epoch": 1.483227020329494, "grad_norm": 0.48611682653427124, "learning_rate": 5.24986495628334e-06, "loss": 0.3997, "step": 32321 }, { "epoch": 1.4832729108347482, "grad_norm": 0.5012817978858948, "learning_rate": 5.249620074421164e-06, "loss": 0.4123, "step": 32322 }, { "epoch": 1.4833188013400027, "grad_norm": 0.4377935230731964, "learning_rate": 5.249375191958729e-06, "loss": 0.3091, "step": 32323 }, { "epoch": 1.4833646918452572, "grad_norm": 0.48370859026908875, "learning_rate": 5.249130308896624e-06, "loss": 0.288, "step": 32324 }, { "epoch": 1.4834105823505117, "grad_norm": 0.45328912138938904, "learning_rate": 5.24888542523544e-06, "loss": 0.3471, "step": 32325 }, { "epoch": 1.4834564728557662, "grad_norm": 0.47834107279777527, "learning_rate": 5.248640540975767e-06, "loss": 0.3894, "step": 32326 }, { "epoch": 1.4835023633610205, "grad_norm": 0.47861477732658386, "learning_rate": 5.248395656118191e-06, "loss": 0.3708, "step": 32327 }, { "epoch": 1.483548253866275, "grad_norm": 0.5039728879928589, "learning_rate": 5.2481507706633e-06, "loss": 0.4615, "step": 32328 }, { "epoch": 1.4835941443715295, "grad_norm": 0.4393763542175293, "learning_rate": 5.247905884611687e-06, "loss": 0.3145, "step": 32329 }, { "epoch": 1.483640034876784, "grad_norm": 0.4937411844730377, "learning_rate": 5.247660997963937e-06, "loss": 0.4024, "step": 32330 }, { "epoch": 1.4836859253820385, "grad_norm": 0.4825166165828705, "learning_rate": 5.24741611072064e-06, "loss": 0.4018, "step": 32331 }, { "epoch": 1.483731815887293, "grad_norm": 0.5424790978431702, "learning_rate": 5.247171222882388e-06, "loss": 0.4045, "step": 32332 }, { "epoch": 1.4837777063925475, "grad_norm": 0.41208282113075256, "learning_rate": 5.246926334449766e-06, "loss": 0.2672, "step": 32333 }, { "epoch": 1.483823596897802, "grad_norm": 0.45391663908958435, "learning_rate": 5.246681445423363e-06, "loss": 0.3338, "step": 32334 }, { "epoch": 1.4838694874030562, "grad_norm": 0.5171379446983337, "learning_rate": 5.24643655580377e-06, "loss": 0.436, "step": 32335 }, { "epoch": 1.4839153779083107, "grad_norm": 0.46104681491851807, "learning_rate": 5.246191665591574e-06, "loss": 0.3358, "step": 32336 }, { "epoch": 1.4839612684135652, "grad_norm": 0.4757278859615326, "learning_rate": 5.245946774787366e-06, "loss": 0.3098, "step": 32337 }, { "epoch": 1.4840071589188197, "grad_norm": 0.46621063351631165, "learning_rate": 5.245701883391734e-06, "loss": 0.3207, "step": 32338 }, { "epoch": 1.4840530494240742, "grad_norm": 0.4821682870388031, "learning_rate": 5.245456991405264e-06, "loss": 0.3918, "step": 32339 }, { "epoch": 1.4840989399293285, "grad_norm": 0.4943676292896271, "learning_rate": 5.2452120988285505e-06, "loss": 0.3877, "step": 32340 }, { "epoch": 1.484144830434583, "grad_norm": 0.45784708857536316, "learning_rate": 5.244967205662177e-06, "loss": 0.3254, "step": 32341 }, { "epoch": 1.4841907209398375, "grad_norm": 0.4543229341506958, "learning_rate": 5.2447223119067345e-06, "loss": 0.3261, "step": 32342 }, { "epoch": 1.484236611445092, "grad_norm": 0.45193830132484436, "learning_rate": 5.244477417562814e-06, "loss": 0.29, "step": 32343 }, { "epoch": 1.4842825019503465, "grad_norm": 0.5345727205276489, "learning_rate": 5.244232522631002e-06, "loss": 0.3467, "step": 32344 }, { "epoch": 1.484328392455601, "grad_norm": 0.48128750920295715, "learning_rate": 5.2439876271118865e-06, "loss": 0.3923, "step": 32345 }, { "epoch": 1.4843742829608555, "grad_norm": 0.45415621995925903, "learning_rate": 5.243742731006059e-06, "loss": 0.3143, "step": 32346 }, { "epoch": 1.48442017346611, "grad_norm": 0.45187845826148987, "learning_rate": 5.243497834314106e-06, "loss": 0.2751, "step": 32347 }, { "epoch": 1.4844660639713643, "grad_norm": 0.48964959383010864, "learning_rate": 5.243252937036619e-06, "loss": 0.3606, "step": 32348 }, { "epoch": 1.4845119544766188, "grad_norm": 0.47665393352508545, "learning_rate": 5.243008039174185e-06, "loss": 0.3824, "step": 32349 }, { "epoch": 1.4845578449818733, "grad_norm": 0.45537135004997253, "learning_rate": 5.242763140727393e-06, "loss": 0.3521, "step": 32350 }, { "epoch": 1.4846037354871278, "grad_norm": 0.5056536793708801, "learning_rate": 5.242518241696833e-06, "loss": 0.4333, "step": 32351 }, { "epoch": 1.484649625992382, "grad_norm": 0.46078360080718994, "learning_rate": 5.242273342083092e-06, "loss": 0.3357, "step": 32352 }, { "epoch": 1.4846955164976365, "grad_norm": 0.45424604415893555, "learning_rate": 5.24202844188676e-06, "loss": 0.3517, "step": 32353 }, { "epoch": 1.484741407002891, "grad_norm": 0.48669958114624023, "learning_rate": 5.241783541108427e-06, "loss": 0.3621, "step": 32354 }, { "epoch": 1.4847872975081455, "grad_norm": 0.46680521965026855, "learning_rate": 5.241538639748681e-06, "loss": 0.3291, "step": 32355 }, { "epoch": 1.4848331880134, "grad_norm": 0.47588977217674255, "learning_rate": 5.24129373780811e-06, "loss": 0.3283, "step": 32356 }, { "epoch": 1.4848790785186545, "grad_norm": 0.5736688375473022, "learning_rate": 5.241048835287305e-06, "loss": 0.3394, "step": 32357 }, { "epoch": 1.484924969023909, "grad_norm": 0.443052738904953, "learning_rate": 5.240803932186852e-06, "loss": 0.3372, "step": 32358 }, { "epoch": 1.4849708595291635, "grad_norm": 0.4627077877521515, "learning_rate": 5.2405590285073425e-06, "loss": 0.336, "step": 32359 }, { "epoch": 1.4850167500344178, "grad_norm": 0.48419806361198425, "learning_rate": 5.240314124249365e-06, "loss": 0.3496, "step": 32360 }, { "epoch": 1.4850626405396723, "grad_norm": 0.4560392200946808, "learning_rate": 5.2400692194135065e-06, "loss": 0.3324, "step": 32361 }, { "epoch": 1.4851085310449268, "grad_norm": 0.4768802225589752, "learning_rate": 5.239824314000357e-06, "loss": 0.3035, "step": 32362 }, { "epoch": 1.4851544215501813, "grad_norm": 0.4935522973537445, "learning_rate": 5.239579408010507e-06, "loss": 0.4155, "step": 32363 }, { "epoch": 1.4852003120554358, "grad_norm": 0.49052897095680237, "learning_rate": 5.239334501444543e-06, "loss": 0.4197, "step": 32364 }, { "epoch": 1.48524620256069, "grad_norm": 0.461058109998703, "learning_rate": 5.239089594303056e-06, "loss": 0.3499, "step": 32365 }, { "epoch": 1.4852920930659446, "grad_norm": 0.49561581015586853, "learning_rate": 5.238844686586635e-06, "loss": 0.3627, "step": 32366 }, { "epoch": 1.485337983571199, "grad_norm": 0.44844287633895874, "learning_rate": 5.238599778295865e-06, "loss": 0.3284, "step": 32367 }, { "epoch": 1.4853838740764536, "grad_norm": 0.5191134810447693, "learning_rate": 5.23835486943134e-06, "loss": 0.349, "step": 32368 }, { "epoch": 1.485429764581708, "grad_norm": 0.46778756380081177, "learning_rate": 5.238109959993646e-06, "loss": 0.3685, "step": 32369 }, { "epoch": 1.4854756550869626, "grad_norm": 0.517098605632782, "learning_rate": 5.237865049983374e-06, "loss": 0.4609, "step": 32370 }, { "epoch": 1.485521545592217, "grad_norm": 0.4473532736301422, "learning_rate": 5.237620139401111e-06, "loss": 0.3454, "step": 32371 }, { "epoch": 1.4855674360974715, "grad_norm": 0.4636293649673462, "learning_rate": 5.237375228247446e-06, "loss": 0.3634, "step": 32372 }, { "epoch": 1.4856133266027258, "grad_norm": 0.4639020562171936, "learning_rate": 5.237130316522967e-06, "loss": 0.3723, "step": 32373 }, { "epoch": 1.4856592171079803, "grad_norm": 0.5386571288108826, "learning_rate": 5.236885404228267e-06, "loss": 0.4864, "step": 32374 }, { "epoch": 1.4857051076132348, "grad_norm": 0.509666919708252, "learning_rate": 5.2366404913639315e-06, "loss": 0.4246, "step": 32375 }, { "epoch": 1.4857509981184893, "grad_norm": 0.4744151830673218, "learning_rate": 5.2363955779305495e-06, "loss": 0.3399, "step": 32376 }, { "epoch": 1.4857968886237438, "grad_norm": 0.478560209274292, "learning_rate": 5.2361506639287126e-06, "loss": 0.4371, "step": 32377 }, { "epoch": 1.485842779128998, "grad_norm": 0.44602057337760925, "learning_rate": 5.235905749359006e-06, "loss": 0.3476, "step": 32378 }, { "epoch": 1.4858886696342526, "grad_norm": 0.49468302726745605, "learning_rate": 5.235660834222021e-06, "loss": 0.3869, "step": 32379 }, { "epoch": 1.485934560139507, "grad_norm": 0.4631974399089813, "learning_rate": 5.235415918518346e-06, "loss": 0.3551, "step": 32380 }, { "epoch": 1.4859804506447616, "grad_norm": 0.4616166055202484, "learning_rate": 5.235171002248571e-06, "loss": 0.3298, "step": 32381 }, { "epoch": 1.486026341150016, "grad_norm": 0.4785248637199402, "learning_rate": 5.234926085413284e-06, "loss": 0.3504, "step": 32382 }, { "epoch": 1.4860722316552706, "grad_norm": 0.4794504940509796, "learning_rate": 5.234681168013073e-06, "loss": 0.4118, "step": 32383 }, { "epoch": 1.486118122160525, "grad_norm": 0.49514123797416687, "learning_rate": 5.234436250048528e-06, "loss": 0.3695, "step": 32384 }, { "epoch": 1.4861640126657796, "grad_norm": 0.462048202753067, "learning_rate": 5.234191331520237e-06, "loss": 0.3685, "step": 32385 }, { "epoch": 1.4862099031710339, "grad_norm": 0.4800395667552948, "learning_rate": 5.233946412428791e-06, "loss": 0.387, "step": 32386 }, { "epoch": 1.4862557936762884, "grad_norm": 0.7294582724571228, "learning_rate": 5.233701492774778e-06, "loss": 0.3456, "step": 32387 }, { "epoch": 1.4863016841815428, "grad_norm": 0.45245125889778137, "learning_rate": 5.233456572558786e-06, "loss": 0.3244, "step": 32388 }, { "epoch": 1.4863475746867973, "grad_norm": 0.4890178442001343, "learning_rate": 5.233211651781405e-06, "loss": 0.4162, "step": 32389 }, { "epoch": 1.4863934651920518, "grad_norm": 0.45102447271347046, "learning_rate": 5.232966730443222e-06, "loss": 0.2726, "step": 32390 }, { "epoch": 1.4864393556973061, "grad_norm": 0.4705100357532501, "learning_rate": 5.23272180854483e-06, "loss": 0.3299, "step": 32391 }, { "epoch": 1.4864852462025606, "grad_norm": 0.5006636381149292, "learning_rate": 5.232476886086815e-06, "loss": 0.3854, "step": 32392 }, { "epoch": 1.4865311367078151, "grad_norm": 0.48193633556365967, "learning_rate": 5.232231963069765e-06, "loss": 0.3778, "step": 32393 }, { "epoch": 1.4865770272130696, "grad_norm": 0.484199583530426, "learning_rate": 5.231987039494272e-06, "loss": 0.4163, "step": 32394 }, { "epoch": 1.486622917718324, "grad_norm": 0.5995803475379944, "learning_rate": 5.231742115360921e-06, "loss": 0.3724, "step": 32395 }, { "epoch": 1.4866688082235786, "grad_norm": 0.46093231439590454, "learning_rate": 5.231497190670305e-06, "loss": 0.3119, "step": 32396 }, { "epoch": 1.486714698728833, "grad_norm": 0.4827044904232025, "learning_rate": 5.231252265423011e-06, "loss": 0.3728, "step": 32397 }, { "epoch": 1.4867605892340876, "grad_norm": 0.47655683755874634, "learning_rate": 5.231007339619629e-06, "loss": 0.3582, "step": 32398 }, { "epoch": 1.4868064797393419, "grad_norm": 0.474122017621994, "learning_rate": 5.230762413260747e-06, "loss": 0.3698, "step": 32399 }, { "epoch": 1.4868523702445964, "grad_norm": 0.4862482249736786, "learning_rate": 5.230517486346954e-06, "loss": 0.3652, "step": 32400 }, { "epoch": 1.4868982607498509, "grad_norm": 0.5885858535766602, "learning_rate": 5.230272558878839e-06, "loss": 0.3548, "step": 32401 }, { "epoch": 1.4869441512551054, "grad_norm": 0.5089755058288574, "learning_rate": 5.230027630856992e-06, "loss": 0.422, "step": 32402 }, { "epoch": 1.4869900417603596, "grad_norm": 0.4889802038669586, "learning_rate": 5.2297827022820014e-06, "loss": 0.3282, "step": 32403 }, { "epoch": 1.4870359322656141, "grad_norm": 0.4968697130680084, "learning_rate": 5.229537773154454e-06, "loss": 0.4023, "step": 32404 }, { "epoch": 1.4870818227708686, "grad_norm": 0.4942907691001892, "learning_rate": 5.229292843474942e-06, "loss": 0.4031, "step": 32405 }, { "epoch": 1.4871277132761231, "grad_norm": 0.4551313519477844, "learning_rate": 5.229047913244052e-06, "loss": 0.3495, "step": 32406 }, { "epoch": 1.4871736037813776, "grad_norm": 0.43653157353401184, "learning_rate": 5.228802982462374e-06, "loss": 0.3077, "step": 32407 }, { "epoch": 1.4872194942866321, "grad_norm": 0.48374438285827637, "learning_rate": 5.228558051130499e-06, "loss": 0.3829, "step": 32408 }, { "epoch": 1.4872653847918866, "grad_norm": 0.5199885368347168, "learning_rate": 5.2283131192490135e-06, "loss": 0.411, "step": 32409 }, { "epoch": 1.4873112752971411, "grad_norm": 0.4609754681587219, "learning_rate": 5.228068186818505e-06, "loss": 0.3636, "step": 32410 }, { "epoch": 1.4873571658023954, "grad_norm": 0.4843488335609436, "learning_rate": 5.227823253839568e-06, "loss": 0.388, "step": 32411 }, { "epoch": 1.48740305630765, "grad_norm": 0.4751400351524353, "learning_rate": 5.227578320312785e-06, "loss": 0.3866, "step": 32412 }, { "epoch": 1.4874489468129044, "grad_norm": 0.4935431480407715, "learning_rate": 5.227333386238748e-06, "loss": 0.3811, "step": 32413 }, { "epoch": 1.487494837318159, "grad_norm": 0.42536357045173645, "learning_rate": 5.227088451618049e-06, "loss": 0.2898, "step": 32414 }, { "epoch": 1.4875407278234134, "grad_norm": 0.47140803933143616, "learning_rate": 5.226843516451271e-06, "loss": 0.3941, "step": 32415 }, { "epoch": 1.4875866183286677, "grad_norm": 0.4453476369380951, "learning_rate": 5.226598580739006e-06, "loss": 0.3107, "step": 32416 }, { "epoch": 1.4876325088339222, "grad_norm": 0.5224695801734924, "learning_rate": 5.2263536444818445e-06, "loss": 0.413, "step": 32417 }, { "epoch": 1.4876783993391767, "grad_norm": 0.46678829193115234, "learning_rate": 5.226108707680372e-06, "loss": 0.3622, "step": 32418 }, { "epoch": 1.4877242898444312, "grad_norm": 0.5104742050170898, "learning_rate": 5.225863770335181e-06, "loss": 0.429, "step": 32419 }, { "epoch": 1.4877701803496857, "grad_norm": 0.48558154702186584, "learning_rate": 5.225618832446859e-06, "loss": 0.3427, "step": 32420 }, { "epoch": 1.4878160708549402, "grad_norm": 0.4677046537399292, "learning_rate": 5.225373894015994e-06, "loss": 0.3886, "step": 32421 }, { "epoch": 1.4878619613601947, "grad_norm": 0.4536486268043518, "learning_rate": 5.225128955043176e-06, "loss": 0.3307, "step": 32422 }, { "epoch": 1.4879078518654492, "grad_norm": 0.45813390612602234, "learning_rate": 5.224884015528994e-06, "loss": 0.3514, "step": 32423 }, { "epoch": 1.4879537423707034, "grad_norm": 0.46142256259918213, "learning_rate": 5.224639075474036e-06, "loss": 0.3857, "step": 32424 }, { "epoch": 1.487999632875958, "grad_norm": 0.471737265586853, "learning_rate": 5.224394134878895e-06, "loss": 0.3903, "step": 32425 }, { "epoch": 1.4880455233812124, "grad_norm": 0.46197670698165894, "learning_rate": 5.224149193744155e-06, "loss": 0.3494, "step": 32426 }, { "epoch": 1.488091413886467, "grad_norm": 0.4563344717025757, "learning_rate": 5.223904252070406e-06, "loss": 0.4207, "step": 32427 }, { "epoch": 1.4881373043917214, "grad_norm": 0.46322908997535706, "learning_rate": 5.2236593098582385e-06, "loss": 0.3455, "step": 32428 }, { "epoch": 1.4881831948969757, "grad_norm": 0.4734077453613281, "learning_rate": 5.2234143671082415e-06, "loss": 0.3605, "step": 32429 }, { "epoch": 1.4882290854022302, "grad_norm": 0.4871661067008972, "learning_rate": 5.223169423821004e-06, "loss": 0.3812, "step": 32430 }, { "epoch": 1.4882749759074847, "grad_norm": 0.4972514808177948, "learning_rate": 5.222924479997114e-06, "loss": 0.4036, "step": 32431 }, { "epoch": 1.4883208664127392, "grad_norm": 0.464763879776001, "learning_rate": 5.222679535637159e-06, "loss": 0.3558, "step": 32432 }, { "epoch": 1.4883667569179937, "grad_norm": 0.472945898771286, "learning_rate": 5.222434590741733e-06, "loss": 0.3378, "step": 32433 }, { "epoch": 1.4884126474232482, "grad_norm": 0.468758761882782, "learning_rate": 5.22218964531142e-06, "loss": 0.3347, "step": 32434 }, { "epoch": 1.4884585379285027, "grad_norm": 0.5076015591621399, "learning_rate": 5.221944699346811e-06, "loss": 0.4377, "step": 32435 }, { "epoch": 1.4885044284337572, "grad_norm": 0.5030063986778259, "learning_rate": 5.221699752848498e-06, "loss": 0.434, "step": 32436 }, { "epoch": 1.4885503189390115, "grad_norm": 0.5112724900245667, "learning_rate": 5.221454805817063e-06, "loss": 0.4284, "step": 32437 }, { "epoch": 1.488596209444266, "grad_norm": 0.4544411599636078, "learning_rate": 5.221209858253101e-06, "loss": 0.3338, "step": 32438 }, { "epoch": 1.4886420999495205, "grad_norm": 0.566962718963623, "learning_rate": 5.2209649101572005e-06, "loss": 0.4587, "step": 32439 }, { "epoch": 1.488687990454775, "grad_norm": 0.4672989249229431, "learning_rate": 5.220719961529947e-06, "loss": 0.3463, "step": 32440 }, { "epoch": 1.4887338809600292, "grad_norm": 0.4605965316295624, "learning_rate": 5.220475012371932e-06, "loss": 0.3104, "step": 32441 }, { "epoch": 1.4887797714652837, "grad_norm": 0.42739707231521606, "learning_rate": 5.220230062683746e-06, "loss": 0.3049, "step": 32442 }, { "epoch": 1.4888256619705382, "grad_norm": 0.4944610893726349, "learning_rate": 5.2199851124659725e-06, "loss": 0.3941, "step": 32443 }, { "epoch": 1.4888715524757927, "grad_norm": 0.46612539887428284, "learning_rate": 5.219740161719207e-06, "loss": 0.3496, "step": 32444 }, { "epoch": 1.4889174429810472, "grad_norm": 0.4866597354412079, "learning_rate": 5.219495210444036e-06, "loss": 0.36, "step": 32445 }, { "epoch": 1.4889633334863017, "grad_norm": 0.4737932085990906, "learning_rate": 5.219250258641046e-06, "loss": 0.3687, "step": 32446 }, { "epoch": 1.4890092239915562, "grad_norm": 0.4782184958457947, "learning_rate": 5.2190053063108305e-06, "loss": 0.3499, "step": 32447 }, { "epoch": 1.4890551144968107, "grad_norm": 0.4779413938522339, "learning_rate": 5.218760353453975e-06, "loss": 0.3313, "step": 32448 }, { "epoch": 1.489101005002065, "grad_norm": 0.4625546634197235, "learning_rate": 5.218515400071071e-06, "loss": 0.3323, "step": 32449 }, { "epoch": 1.4891468955073195, "grad_norm": 0.4414535164833069, "learning_rate": 5.218270446162707e-06, "loss": 0.305, "step": 32450 }, { "epoch": 1.489192786012574, "grad_norm": 0.45431190729141235, "learning_rate": 5.218025491729469e-06, "loss": 0.3032, "step": 32451 }, { "epoch": 1.4892386765178285, "grad_norm": 0.4497077167034149, "learning_rate": 5.21778053677195e-06, "loss": 0.3209, "step": 32452 }, { "epoch": 1.489284567023083, "grad_norm": 0.4962162971496582, "learning_rate": 5.217535581290739e-06, "loss": 0.3764, "step": 32453 }, { "epoch": 1.4893304575283373, "grad_norm": 0.4543689489364624, "learning_rate": 5.217290625286421e-06, "loss": 0.3096, "step": 32454 }, { "epoch": 1.4893763480335918, "grad_norm": 0.46530142426490784, "learning_rate": 5.217045668759588e-06, "loss": 0.3354, "step": 32455 }, { "epoch": 1.4894222385388463, "grad_norm": 0.4765910804271698, "learning_rate": 5.2168007117108295e-06, "loss": 0.3676, "step": 32456 }, { "epoch": 1.4894681290441008, "grad_norm": 0.48544952273368835, "learning_rate": 5.216555754140734e-06, "loss": 0.3254, "step": 32457 }, { "epoch": 1.4895140195493553, "grad_norm": 0.4873560070991516, "learning_rate": 5.21631079604989e-06, "loss": 0.3587, "step": 32458 }, { "epoch": 1.4895599100546097, "grad_norm": 0.4649731516838074, "learning_rate": 5.216065837438887e-06, "loss": 0.3481, "step": 32459 }, { "epoch": 1.4896058005598642, "grad_norm": 0.4719788432121277, "learning_rate": 5.215820878308313e-06, "loss": 0.3656, "step": 32460 }, { "epoch": 1.4896516910651187, "grad_norm": 0.4643266499042511, "learning_rate": 5.215575918658758e-06, "loss": 0.369, "step": 32461 }, { "epoch": 1.489697581570373, "grad_norm": 0.5103459358215332, "learning_rate": 5.215330958490812e-06, "loss": 0.4008, "step": 32462 }, { "epoch": 1.4897434720756275, "grad_norm": 0.4633001685142517, "learning_rate": 5.215085997805063e-06, "loss": 0.3829, "step": 32463 }, { "epoch": 1.489789362580882, "grad_norm": 0.47449991106987, "learning_rate": 5.214841036602099e-06, "loss": 0.3571, "step": 32464 }, { "epoch": 1.4898352530861365, "grad_norm": 0.4497971534729004, "learning_rate": 5.214596074882511e-06, "loss": 0.3309, "step": 32465 }, { "epoch": 1.489881143591391, "grad_norm": 0.4497924745082855, "learning_rate": 5.214351112646886e-06, "loss": 0.3567, "step": 32466 }, { "epoch": 1.4899270340966453, "grad_norm": 0.47269895672798157, "learning_rate": 5.214106149895814e-06, "loss": 0.3398, "step": 32467 }, { "epoch": 1.4899729246018998, "grad_norm": 0.5033291578292847, "learning_rate": 5.213861186629886e-06, "loss": 0.3666, "step": 32468 }, { "epoch": 1.4900188151071543, "grad_norm": 0.47006574273109436, "learning_rate": 5.2136162228496885e-06, "loss": 0.3436, "step": 32469 }, { "epoch": 1.4900647056124088, "grad_norm": 0.47165316343307495, "learning_rate": 5.213371258555812e-06, "loss": 0.3542, "step": 32470 }, { "epoch": 1.4901105961176633, "grad_norm": 0.47586381435394287, "learning_rate": 5.213126293748844e-06, "loss": 0.3584, "step": 32471 }, { "epoch": 1.4901564866229178, "grad_norm": 0.48044487833976746, "learning_rate": 5.212881328429374e-06, "loss": 0.3645, "step": 32472 }, { "epoch": 1.4902023771281723, "grad_norm": 0.47151049971580505, "learning_rate": 5.212636362597994e-06, "loss": 0.3601, "step": 32473 }, { "epoch": 1.4902482676334268, "grad_norm": 0.48227182030677795, "learning_rate": 5.212391396255289e-06, "loss": 0.3594, "step": 32474 }, { "epoch": 1.490294158138681, "grad_norm": 0.4740387797355652, "learning_rate": 5.21214642940185e-06, "loss": 0.3979, "step": 32475 }, { "epoch": 1.4903400486439355, "grad_norm": 0.46698257327079773, "learning_rate": 5.211901462038266e-06, "loss": 0.3795, "step": 32476 }, { "epoch": 1.49038593914919, "grad_norm": 0.5277775526046753, "learning_rate": 5.211656494165125e-06, "loss": 0.378, "step": 32477 }, { "epoch": 1.4904318296544445, "grad_norm": 0.4319857656955719, "learning_rate": 5.211411525783017e-06, "loss": 0.3053, "step": 32478 }, { "epoch": 1.490477720159699, "grad_norm": 0.4771716594696045, "learning_rate": 5.211166556892532e-06, "loss": 0.3962, "step": 32479 }, { "epoch": 1.4905236106649533, "grad_norm": 0.48755839467048645, "learning_rate": 5.210921587494257e-06, "loss": 0.3585, "step": 32480 }, { "epoch": 1.4905695011702078, "grad_norm": 0.42155900597572327, "learning_rate": 5.210676617588782e-06, "loss": 0.2816, "step": 32481 }, { "epoch": 1.4906153916754623, "grad_norm": 0.4692254364490509, "learning_rate": 5.210431647176698e-06, "loss": 0.383, "step": 32482 }, { "epoch": 1.4906612821807168, "grad_norm": 0.45834192633628845, "learning_rate": 5.210186676258591e-06, "loss": 0.3391, "step": 32483 }, { "epoch": 1.4907071726859713, "grad_norm": 0.46294498443603516, "learning_rate": 5.209941704835052e-06, "loss": 0.334, "step": 32484 }, { "epoch": 1.4907530631912258, "grad_norm": 0.4737560451030731, "learning_rate": 5.20969673290667e-06, "loss": 0.3317, "step": 32485 }, { "epoch": 1.4907989536964803, "grad_norm": 0.45365896821022034, "learning_rate": 5.209451760474031e-06, "loss": 0.3242, "step": 32486 }, { "epoch": 1.4908448442017348, "grad_norm": 0.47143787145614624, "learning_rate": 5.20920678753773e-06, "loss": 0.3566, "step": 32487 }, { "epoch": 1.490890734706989, "grad_norm": 0.49440547823905945, "learning_rate": 5.20896181409835e-06, "loss": 0.34, "step": 32488 }, { "epoch": 1.4909366252122436, "grad_norm": 0.53519207239151, "learning_rate": 5.2087168401564824e-06, "loss": 0.4628, "step": 32489 }, { "epoch": 1.490982515717498, "grad_norm": 0.5015696883201599, "learning_rate": 5.208471865712719e-06, "loss": 0.4151, "step": 32490 }, { "epoch": 1.4910284062227526, "grad_norm": 0.4778095483779907, "learning_rate": 5.208226890767646e-06, "loss": 0.3479, "step": 32491 }, { "epoch": 1.4910742967280068, "grad_norm": 0.4882211685180664, "learning_rate": 5.207981915321853e-06, "loss": 0.3874, "step": 32492 }, { "epoch": 1.4911201872332613, "grad_norm": 0.5058813691139221, "learning_rate": 5.207736939375928e-06, "loss": 0.3914, "step": 32493 }, { "epoch": 1.4911660777385158, "grad_norm": 0.449704110622406, "learning_rate": 5.207491962930462e-06, "loss": 0.3366, "step": 32494 }, { "epoch": 1.4912119682437703, "grad_norm": 0.4503457546234131, "learning_rate": 5.207246985986043e-06, "loss": 0.3591, "step": 32495 }, { "epoch": 1.4912578587490248, "grad_norm": 0.4372318983078003, "learning_rate": 5.2070020085432615e-06, "loss": 0.3264, "step": 32496 }, { "epoch": 1.4913037492542793, "grad_norm": 0.460175484418869, "learning_rate": 5.206757030602705e-06, "loss": 0.333, "step": 32497 }, { "epoch": 1.4913496397595338, "grad_norm": 0.4229927659034729, "learning_rate": 5.2065120521649615e-06, "loss": 0.3136, "step": 32498 }, { "epoch": 1.4913955302647883, "grad_norm": 0.48736435174942017, "learning_rate": 5.206267073230624e-06, "loss": 0.4042, "step": 32499 }, { "epoch": 1.4914414207700426, "grad_norm": 0.4254434108734131, "learning_rate": 5.206022093800277e-06, "loss": 0.3126, "step": 32500 }, { "epoch": 1.491487311275297, "grad_norm": 0.46921733021736145, "learning_rate": 5.205777113874514e-06, "loss": 0.4057, "step": 32501 }, { "epoch": 1.4915332017805516, "grad_norm": 0.47904855012893677, "learning_rate": 5.205532133453922e-06, "loss": 0.386, "step": 32502 }, { "epoch": 1.491579092285806, "grad_norm": 0.45980754494667053, "learning_rate": 5.205287152539088e-06, "loss": 0.3579, "step": 32503 }, { "epoch": 1.4916249827910606, "grad_norm": 0.5059465169906616, "learning_rate": 5.205042171130606e-06, "loss": 0.4508, "step": 32504 }, { "epoch": 1.4916708732963149, "grad_norm": 0.4449034035205841, "learning_rate": 5.204797189229059e-06, "loss": 0.3738, "step": 32505 }, { "epoch": 1.4917167638015694, "grad_norm": 0.48877862095832825, "learning_rate": 5.204552206835042e-06, "loss": 0.3965, "step": 32506 }, { "epoch": 1.4917626543068239, "grad_norm": 0.474043071269989, "learning_rate": 5.204307223949141e-06, "loss": 0.3383, "step": 32507 }, { "epoch": 1.4918085448120784, "grad_norm": 0.47350358963012695, "learning_rate": 5.2040622405719456e-06, "loss": 0.3897, "step": 32508 }, { "epoch": 1.4918544353173329, "grad_norm": 0.459407776594162, "learning_rate": 5.203817256704044e-06, "loss": 0.3215, "step": 32509 }, { "epoch": 1.4919003258225874, "grad_norm": 0.4775444269180298, "learning_rate": 5.203572272346027e-06, "loss": 0.3631, "step": 32510 }, { "epoch": 1.4919462163278419, "grad_norm": 0.44606396555900574, "learning_rate": 5.203327287498482e-06, "loss": 0.342, "step": 32511 }, { "epoch": 1.4919921068330964, "grad_norm": 0.45047304034233093, "learning_rate": 5.203082302162e-06, "loss": 0.3272, "step": 32512 }, { "epoch": 1.4920379973383506, "grad_norm": 0.5457999110221863, "learning_rate": 5.202837316337169e-06, "loss": 0.3529, "step": 32513 }, { "epoch": 1.4920838878436051, "grad_norm": 0.4722496569156647, "learning_rate": 5.202592330024578e-06, "loss": 0.4096, "step": 32514 }, { "epoch": 1.4921297783488596, "grad_norm": 0.5037813782691956, "learning_rate": 5.202347343224817e-06, "loss": 0.4141, "step": 32515 }, { "epoch": 1.4921756688541141, "grad_norm": 0.5722838640213013, "learning_rate": 5.2021023559384734e-06, "loss": 0.4265, "step": 32516 }, { "epoch": 1.4922215593593686, "grad_norm": 0.48118695616722107, "learning_rate": 5.201857368166139e-06, "loss": 0.3208, "step": 32517 }, { "epoch": 1.492267449864623, "grad_norm": 0.48233741521835327, "learning_rate": 5.201612379908401e-06, "loss": 0.3852, "step": 32518 }, { "epoch": 1.4923133403698774, "grad_norm": 0.4746929705142975, "learning_rate": 5.201367391165847e-06, "loss": 0.3612, "step": 32519 }, { "epoch": 1.492359230875132, "grad_norm": 0.4887195825576782, "learning_rate": 5.201122401939069e-06, "loss": 0.4005, "step": 32520 }, { "epoch": 1.4924051213803864, "grad_norm": 0.4761020541191101, "learning_rate": 5.200877412228655e-06, "loss": 0.364, "step": 32521 }, { "epoch": 1.4924510118856409, "grad_norm": 0.4892801344394684, "learning_rate": 5.200632422035194e-06, "loss": 0.3459, "step": 32522 }, { "epoch": 1.4924969023908954, "grad_norm": 0.4638475477695465, "learning_rate": 5.2003874313592775e-06, "loss": 0.3397, "step": 32523 }, { "epoch": 1.4925427928961499, "grad_norm": 0.48983097076416016, "learning_rate": 5.200142440201491e-06, "loss": 0.3717, "step": 32524 }, { "epoch": 1.4925886834014044, "grad_norm": 0.4548947215080261, "learning_rate": 5.199897448562424e-06, "loss": 0.3735, "step": 32525 }, { "epoch": 1.4926345739066587, "grad_norm": 0.4728935956954956, "learning_rate": 5.199652456442668e-06, "loss": 0.3753, "step": 32526 }, { "epoch": 1.4926804644119132, "grad_norm": 0.5063821077346802, "learning_rate": 5.199407463842811e-06, "loss": 0.4168, "step": 32527 }, { "epoch": 1.4927263549171677, "grad_norm": 0.45788809657096863, "learning_rate": 5.1991624707634415e-06, "loss": 0.3297, "step": 32528 }, { "epoch": 1.4927722454224222, "grad_norm": 0.4555186331272125, "learning_rate": 5.198917477205149e-06, "loss": 0.3524, "step": 32529 }, { "epoch": 1.4928181359276764, "grad_norm": 0.4603189527988434, "learning_rate": 5.198672483168523e-06, "loss": 0.2956, "step": 32530 }, { "epoch": 1.492864026432931, "grad_norm": 0.4926038384437561, "learning_rate": 5.1984274886541515e-06, "loss": 0.3459, "step": 32531 }, { "epoch": 1.4929099169381854, "grad_norm": 0.46656593680381775, "learning_rate": 5.198182493662627e-06, "loss": 0.3778, "step": 32532 }, { "epoch": 1.49295580744344, "grad_norm": 0.4872576594352722, "learning_rate": 5.197937498194533e-06, "loss": 0.3497, "step": 32533 }, { "epoch": 1.4930016979486944, "grad_norm": 0.48604539036750793, "learning_rate": 5.197692502250465e-06, "loss": 0.3485, "step": 32534 }, { "epoch": 1.493047588453949, "grad_norm": 0.47751009464263916, "learning_rate": 5.197447505831007e-06, "loss": 0.3476, "step": 32535 }, { "epoch": 1.4930934789592034, "grad_norm": 0.46607398986816406, "learning_rate": 5.197202508936749e-06, "loss": 0.3471, "step": 32536 }, { "epoch": 1.493139369464458, "grad_norm": 0.4623022675514221, "learning_rate": 5.196957511568282e-06, "loss": 0.3566, "step": 32537 }, { "epoch": 1.4931852599697122, "grad_norm": 0.4550822377204895, "learning_rate": 5.196712513726195e-06, "loss": 0.3157, "step": 32538 }, { "epoch": 1.4932311504749667, "grad_norm": 0.541007399559021, "learning_rate": 5.196467515411078e-06, "loss": 0.461, "step": 32539 }, { "epoch": 1.4932770409802212, "grad_norm": 0.43232396245002747, "learning_rate": 5.196222516623516e-06, "loss": 0.3283, "step": 32540 }, { "epoch": 1.4933229314854757, "grad_norm": 0.4820578694343567, "learning_rate": 5.195977517364103e-06, "loss": 0.3694, "step": 32541 }, { "epoch": 1.4933688219907302, "grad_norm": 0.45731353759765625, "learning_rate": 5.1957325176334224e-06, "loss": 0.3422, "step": 32542 }, { "epoch": 1.4934147124959845, "grad_norm": 0.5556846261024475, "learning_rate": 5.19548751743207e-06, "loss": 0.4527, "step": 32543 }, { "epoch": 1.493460603001239, "grad_norm": 0.46742039918899536, "learning_rate": 5.19524251676063e-06, "loss": 0.4024, "step": 32544 }, { "epoch": 1.4935064935064934, "grad_norm": 0.44558605551719666, "learning_rate": 5.194997515619695e-06, "loss": 0.2982, "step": 32545 }, { "epoch": 1.493552384011748, "grad_norm": 0.49691998958587646, "learning_rate": 5.194752514009852e-06, "loss": 0.4825, "step": 32546 }, { "epoch": 1.4935982745170024, "grad_norm": 0.4831225275993347, "learning_rate": 5.19450751193169e-06, "loss": 0.4455, "step": 32547 }, { "epoch": 1.493644165022257, "grad_norm": 0.4995240271091461, "learning_rate": 5.194262509385799e-06, "loss": 0.3554, "step": 32548 }, { "epoch": 1.4936900555275114, "grad_norm": 0.4603930115699768, "learning_rate": 5.194017506372769e-06, "loss": 0.3691, "step": 32549 }, { "epoch": 1.493735946032766, "grad_norm": 0.5147192478179932, "learning_rate": 5.193772502893188e-06, "loss": 0.4557, "step": 32550 }, { "epoch": 1.4937818365380202, "grad_norm": 0.49076563119888306, "learning_rate": 5.1935274989476434e-06, "loss": 0.3789, "step": 32551 }, { "epoch": 1.4938277270432747, "grad_norm": 0.48677903413772583, "learning_rate": 5.1932824945367285e-06, "loss": 0.3984, "step": 32552 }, { "epoch": 1.4938736175485292, "grad_norm": 0.4535476863384247, "learning_rate": 5.193037489661029e-06, "loss": 0.317, "step": 32553 }, { "epoch": 1.4939195080537837, "grad_norm": 0.4989411234855652, "learning_rate": 5.192792484321134e-06, "loss": 0.3981, "step": 32554 }, { "epoch": 1.4939653985590382, "grad_norm": 0.47385919094085693, "learning_rate": 5.1925474785176366e-06, "loss": 0.3963, "step": 32555 }, { "epoch": 1.4940112890642925, "grad_norm": 0.46761974692344666, "learning_rate": 5.192302472251123e-06, "loss": 0.3861, "step": 32556 }, { "epoch": 1.494057179569547, "grad_norm": 0.4709824025630951, "learning_rate": 5.192057465522181e-06, "loss": 0.3264, "step": 32557 }, { "epoch": 1.4941030700748015, "grad_norm": 0.42888540029525757, "learning_rate": 5.1918124583314026e-06, "loss": 0.2885, "step": 32558 }, { "epoch": 1.494148960580056, "grad_norm": 0.46583297848701477, "learning_rate": 5.1915674506793745e-06, "loss": 0.3388, "step": 32559 }, { "epoch": 1.4941948510853105, "grad_norm": 0.45026281476020813, "learning_rate": 5.191322442566688e-06, "loss": 0.3339, "step": 32560 }, { "epoch": 1.494240741590565, "grad_norm": 0.46284499764442444, "learning_rate": 5.191077433993933e-06, "loss": 0.374, "step": 32561 }, { "epoch": 1.4942866320958195, "grad_norm": 0.4848873019218445, "learning_rate": 5.190832424961695e-06, "loss": 0.3413, "step": 32562 }, { "epoch": 1.494332522601074, "grad_norm": 0.48311084508895874, "learning_rate": 5.190587415470565e-06, "loss": 0.3538, "step": 32563 }, { "epoch": 1.4943784131063282, "grad_norm": 0.570075511932373, "learning_rate": 5.1903424055211324e-06, "loss": 0.3483, "step": 32564 }, { "epoch": 1.4944243036115827, "grad_norm": 0.4560588598251343, "learning_rate": 5.1900973951139865e-06, "loss": 0.3247, "step": 32565 }, { "epoch": 1.4944701941168372, "grad_norm": 0.5002174377441406, "learning_rate": 5.189852384249718e-06, "loss": 0.4003, "step": 32566 }, { "epoch": 1.4945160846220917, "grad_norm": 0.47151753306388855, "learning_rate": 5.189607372928914e-06, "loss": 0.3459, "step": 32567 }, { "epoch": 1.4945619751273462, "grad_norm": 0.4523603022098541, "learning_rate": 5.189362361152163e-06, "loss": 0.3667, "step": 32568 }, { "epoch": 1.4946078656326005, "grad_norm": 0.4433008134365082, "learning_rate": 5.189117348920056e-06, "loss": 0.3013, "step": 32569 }, { "epoch": 1.494653756137855, "grad_norm": 0.5028434991836548, "learning_rate": 5.1888723362331806e-06, "loss": 0.4026, "step": 32570 }, { "epoch": 1.4946996466431095, "grad_norm": 0.517037034034729, "learning_rate": 5.188627323092128e-06, "loss": 0.4529, "step": 32571 }, { "epoch": 1.494745537148364, "grad_norm": 0.4504440724849701, "learning_rate": 5.1883823094974864e-06, "loss": 0.3621, "step": 32572 }, { "epoch": 1.4947914276536185, "grad_norm": 0.4609016180038452, "learning_rate": 5.188137295449843e-06, "loss": 0.3703, "step": 32573 }, { "epoch": 1.494837318158873, "grad_norm": 0.5202422142028809, "learning_rate": 5.18789228094979e-06, "loss": 0.4068, "step": 32574 }, { "epoch": 1.4948832086641275, "grad_norm": 0.4575437307357788, "learning_rate": 5.187647265997916e-06, "loss": 0.3681, "step": 32575 }, { "epoch": 1.494929099169382, "grad_norm": 0.4750671088695526, "learning_rate": 5.1874022505948086e-06, "loss": 0.3486, "step": 32576 }, { "epoch": 1.4949749896746363, "grad_norm": 0.46575966477394104, "learning_rate": 5.187157234741058e-06, "loss": 0.3862, "step": 32577 }, { "epoch": 1.4950208801798908, "grad_norm": 0.48859214782714844, "learning_rate": 5.186912218437255e-06, "loss": 0.4036, "step": 32578 }, { "epoch": 1.4950667706851453, "grad_norm": 0.48272618651390076, "learning_rate": 5.186667201683984e-06, "loss": 0.354, "step": 32579 }, { "epoch": 1.4951126611903998, "grad_norm": 0.48283910751342773, "learning_rate": 5.186422184481839e-06, "loss": 0.39, "step": 32580 }, { "epoch": 1.495158551695654, "grad_norm": 0.45446208119392395, "learning_rate": 5.186177166831408e-06, "loss": 0.381, "step": 32581 }, { "epoch": 1.4952044422009085, "grad_norm": 0.48283690214157104, "learning_rate": 5.185932148733279e-06, "loss": 0.358, "step": 32582 }, { "epoch": 1.495250332706163, "grad_norm": 0.4367370903491974, "learning_rate": 5.185687130188042e-06, "loss": 0.2933, "step": 32583 }, { "epoch": 1.4952962232114175, "grad_norm": 0.4511030316352844, "learning_rate": 5.185442111196286e-06, "loss": 0.3017, "step": 32584 }, { "epoch": 1.495342113716672, "grad_norm": 0.49175339937210083, "learning_rate": 5.185197091758599e-06, "loss": 0.4095, "step": 32585 }, { "epoch": 1.4953880042219265, "grad_norm": 0.4730582535266876, "learning_rate": 5.184952071875573e-06, "loss": 0.3217, "step": 32586 }, { "epoch": 1.495433894727181, "grad_norm": 0.5141056776046753, "learning_rate": 5.184707051547795e-06, "loss": 0.4114, "step": 32587 }, { "epoch": 1.4954797852324355, "grad_norm": 0.5044583082199097, "learning_rate": 5.184462030775856e-06, "loss": 0.3971, "step": 32588 }, { "epoch": 1.4955256757376898, "grad_norm": 0.42000943422317505, "learning_rate": 5.184217009560342e-06, "loss": 0.303, "step": 32589 }, { "epoch": 1.4955715662429443, "grad_norm": 0.45065370202064514, "learning_rate": 5.183971987901846e-06, "loss": 0.3526, "step": 32590 }, { "epoch": 1.4956174567481988, "grad_norm": 0.46547064185142517, "learning_rate": 5.183726965800955e-06, "loss": 0.3551, "step": 32591 }, { "epoch": 1.4956633472534533, "grad_norm": 0.45704925060272217, "learning_rate": 5.1834819432582585e-06, "loss": 0.3515, "step": 32592 }, { "epoch": 1.4957092377587078, "grad_norm": 0.48265931010246277, "learning_rate": 5.183236920274346e-06, "loss": 0.3658, "step": 32593 }, { "epoch": 1.495755128263962, "grad_norm": 0.4727610647678375, "learning_rate": 5.182991896849806e-06, "loss": 0.3772, "step": 32594 }, { "epoch": 1.4958010187692166, "grad_norm": 0.4863734841346741, "learning_rate": 5.182746872985229e-06, "loss": 0.3668, "step": 32595 }, { "epoch": 1.495846909274471, "grad_norm": 1.7209581136703491, "learning_rate": 5.182501848681203e-06, "loss": 0.307, "step": 32596 }, { "epoch": 1.4958927997797256, "grad_norm": 0.45055389404296875, "learning_rate": 5.182256823938318e-06, "loss": 0.3202, "step": 32597 }, { "epoch": 1.49593869028498, "grad_norm": 0.46957501769065857, "learning_rate": 5.182011798757161e-06, "loss": 0.3666, "step": 32598 }, { "epoch": 1.4959845807902346, "grad_norm": 0.4440155625343323, "learning_rate": 5.181766773138326e-06, "loss": 0.2994, "step": 32599 }, { "epoch": 1.496030471295489, "grad_norm": 0.45915642380714417, "learning_rate": 5.181521747082397e-06, "loss": 0.3553, "step": 32600 }, { "epoch": 1.4960763618007435, "grad_norm": 0.43989524245262146, "learning_rate": 5.181276720589967e-06, "loss": 0.3188, "step": 32601 }, { "epoch": 1.4961222523059978, "grad_norm": 0.5142198204994202, "learning_rate": 5.181031693661622e-06, "loss": 0.4522, "step": 32602 }, { "epoch": 1.4961681428112523, "grad_norm": 0.46187636256217957, "learning_rate": 5.180786666297954e-06, "loss": 0.348, "step": 32603 }, { "epoch": 1.4962140333165068, "grad_norm": 0.5160707235336304, "learning_rate": 5.180541638499552e-06, "loss": 0.3872, "step": 32604 }, { "epoch": 1.4962599238217613, "grad_norm": 0.4476722776889801, "learning_rate": 5.180296610267003e-06, "loss": 0.3154, "step": 32605 }, { "epoch": 1.4963058143270158, "grad_norm": 0.5009312033653259, "learning_rate": 5.180051581600899e-06, "loss": 0.3848, "step": 32606 }, { "epoch": 1.49635170483227, "grad_norm": 0.4788958430290222, "learning_rate": 5.179806552501826e-06, "loss": 0.3441, "step": 32607 }, { "epoch": 1.4963975953375246, "grad_norm": 0.4858202338218689, "learning_rate": 5.179561522970376e-06, "loss": 0.3245, "step": 32608 }, { "epoch": 1.496443485842779, "grad_norm": 0.4013120234012604, "learning_rate": 5.179316493007136e-06, "loss": 0.2091, "step": 32609 }, { "epoch": 1.4964893763480336, "grad_norm": 0.4568566679954529, "learning_rate": 5.179071462612698e-06, "loss": 0.3044, "step": 32610 }, { "epoch": 1.496535266853288, "grad_norm": 0.44588610529899597, "learning_rate": 5.178826431787649e-06, "loss": 0.2768, "step": 32611 }, { "epoch": 1.4965811573585426, "grad_norm": 0.4667956233024597, "learning_rate": 5.17858140053258e-06, "loss": 0.3684, "step": 32612 }, { "epoch": 1.496627047863797, "grad_norm": 0.4453117549419403, "learning_rate": 5.178336368848077e-06, "loss": 0.3085, "step": 32613 }, { "epoch": 1.4966729383690516, "grad_norm": 0.45313918590545654, "learning_rate": 5.178091336734733e-06, "loss": 0.3313, "step": 32614 }, { "epoch": 1.4967188288743059, "grad_norm": 0.4657720923423767, "learning_rate": 5.1778463041931346e-06, "loss": 0.3481, "step": 32615 }, { "epoch": 1.4967647193795603, "grad_norm": 0.4799053370952606, "learning_rate": 5.177601271223872e-06, "loss": 0.3385, "step": 32616 }, { "epoch": 1.4968106098848148, "grad_norm": 0.4610821008682251, "learning_rate": 5.177356237827535e-06, "loss": 0.3312, "step": 32617 }, { "epoch": 1.4968565003900693, "grad_norm": 0.4326995313167572, "learning_rate": 5.177111204004711e-06, "loss": 0.3285, "step": 32618 }, { "epoch": 1.4969023908953236, "grad_norm": 0.46440601348876953, "learning_rate": 5.17686616975599e-06, "loss": 0.3228, "step": 32619 }, { "epoch": 1.4969482814005781, "grad_norm": 0.4918068051338196, "learning_rate": 5.176621135081965e-06, "loss": 0.3217, "step": 32620 }, { "epoch": 1.4969941719058326, "grad_norm": 0.4461206793785095, "learning_rate": 5.1763760999832205e-06, "loss": 0.3031, "step": 32621 }, { "epoch": 1.4970400624110871, "grad_norm": 0.5044770836830139, "learning_rate": 5.176131064460346e-06, "loss": 0.4036, "step": 32622 }, { "epoch": 1.4970859529163416, "grad_norm": 0.4721382260322571, "learning_rate": 5.175886028513933e-06, "loss": 0.3581, "step": 32623 }, { "epoch": 1.497131843421596, "grad_norm": 0.45932653546333313, "learning_rate": 5.175640992144568e-06, "loss": 0.3339, "step": 32624 }, { "epoch": 1.4971777339268506, "grad_norm": 0.5004351139068604, "learning_rate": 5.175395955352842e-06, "loss": 0.4254, "step": 32625 }, { "epoch": 1.497223624432105, "grad_norm": 0.4440430998802185, "learning_rate": 5.175150918139346e-06, "loss": 0.3442, "step": 32626 }, { "epoch": 1.4972695149373594, "grad_norm": 0.4274478554725647, "learning_rate": 5.174905880504666e-06, "loss": 0.3258, "step": 32627 }, { "epoch": 1.4973154054426139, "grad_norm": 0.48320508003234863, "learning_rate": 5.174660842449392e-06, "loss": 0.4057, "step": 32628 }, { "epoch": 1.4973612959478684, "grad_norm": 0.4785393476486206, "learning_rate": 5.174415803974116e-06, "loss": 0.3463, "step": 32629 }, { "epoch": 1.4974071864531229, "grad_norm": 0.442158043384552, "learning_rate": 5.174170765079423e-06, "loss": 0.3081, "step": 32630 }, { "epoch": 1.4974530769583774, "grad_norm": 0.4580605626106262, "learning_rate": 5.173925725765905e-06, "loss": 0.3671, "step": 32631 }, { "epoch": 1.4974989674636316, "grad_norm": 0.46944111585617065, "learning_rate": 5.173680686034151e-06, "loss": 0.372, "step": 32632 }, { "epoch": 1.4975448579688861, "grad_norm": 0.4412649869918823, "learning_rate": 5.173435645884749e-06, "loss": 0.3415, "step": 32633 }, { "epoch": 1.4975907484741406, "grad_norm": 0.44685855507850647, "learning_rate": 5.17319060531829e-06, "loss": 0.3098, "step": 32634 }, { "epoch": 1.4976366389793951, "grad_norm": 0.4748915433883667, "learning_rate": 5.172945564335361e-06, "loss": 0.4001, "step": 32635 }, { "epoch": 1.4976825294846496, "grad_norm": 0.48031970858573914, "learning_rate": 5.172700522936552e-06, "loss": 0.3513, "step": 32636 }, { "epoch": 1.4977284199899041, "grad_norm": 0.5012335777282715, "learning_rate": 5.172455481122456e-06, "loss": 0.4244, "step": 32637 }, { "epoch": 1.4977743104951586, "grad_norm": 0.43695971369743347, "learning_rate": 5.1722104388936555e-06, "loss": 0.3219, "step": 32638 }, { "epoch": 1.4978202010004131, "grad_norm": 0.5308743715286255, "learning_rate": 5.171965396250745e-06, "loss": 0.4515, "step": 32639 }, { "epoch": 1.4978660915056674, "grad_norm": 0.4448179006576538, "learning_rate": 5.1717203531943115e-06, "loss": 0.3204, "step": 32640 }, { "epoch": 1.497911982010922, "grad_norm": 0.517703652381897, "learning_rate": 5.171475309724944e-06, "loss": 0.5369, "step": 32641 }, { "epoch": 1.4979578725161764, "grad_norm": 0.43918561935424805, "learning_rate": 5.171230265843234e-06, "loss": 0.3329, "step": 32642 }, { "epoch": 1.498003763021431, "grad_norm": 0.467804491519928, "learning_rate": 5.170985221549769e-06, "loss": 0.3163, "step": 32643 }, { "epoch": 1.4980496535266854, "grad_norm": 0.43559709191322327, "learning_rate": 5.170740176845137e-06, "loss": 0.2675, "step": 32644 }, { "epoch": 1.4980955440319397, "grad_norm": 0.4466151297092438, "learning_rate": 5.170495131729929e-06, "loss": 0.331, "step": 32645 }, { "epoch": 1.4981414345371942, "grad_norm": 0.4450010657310486, "learning_rate": 5.170250086204735e-06, "loss": 0.3635, "step": 32646 }, { "epoch": 1.4981873250424487, "grad_norm": 0.4183286428451538, "learning_rate": 5.170005040270143e-06, "loss": 0.3092, "step": 32647 }, { "epoch": 1.4982332155477032, "grad_norm": 0.45541322231292725, "learning_rate": 5.169759993926744e-06, "loss": 0.3683, "step": 32648 }, { "epoch": 1.4982791060529577, "grad_norm": 0.49249210953712463, "learning_rate": 5.1695149471751236e-06, "loss": 0.3931, "step": 32649 }, { "epoch": 1.4983249965582122, "grad_norm": 0.45634743571281433, "learning_rate": 5.1692699000158745e-06, "loss": 0.3253, "step": 32650 }, { "epoch": 1.4983708870634667, "grad_norm": 0.47145089507102966, "learning_rate": 5.169024852449585e-06, "loss": 0.392, "step": 32651 }, { "epoch": 1.4984167775687212, "grad_norm": 0.4584760367870331, "learning_rate": 5.168779804476842e-06, "loss": 0.3571, "step": 32652 }, { "epoch": 1.4984626680739754, "grad_norm": 0.41557759046554565, "learning_rate": 5.168534756098238e-06, "loss": 0.2923, "step": 32653 }, { "epoch": 1.49850855857923, "grad_norm": 0.46119579672813416, "learning_rate": 5.168289707314362e-06, "loss": 0.2863, "step": 32654 }, { "epoch": 1.4985544490844844, "grad_norm": 0.4361475110054016, "learning_rate": 5.168044658125802e-06, "loss": 0.3395, "step": 32655 }, { "epoch": 1.498600339589739, "grad_norm": 0.46039947867393494, "learning_rate": 5.167799608533146e-06, "loss": 0.3845, "step": 32656 }, { "epoch": 1.4986462300949934, "grad_norm": 0.44175317883491516, "learning_rate": 5.167554558536987e-06, "loss": 0.3256, "step": 32657 }, { "epoch": 1.4986921206002477, "grad_norm": 0.4562526047229767, "learning_rate": 5.1673095081379095e-06, "loss": 0.3389, "step": 32658 }, { "epoch": 1.4987380111055022, "grad_norm": 0.5073937177658081, "learning_rate": 5.1670644573365094e-06, "loss": 0.3749, "step": 32659 }, { "epoch": 1.4987839016107567, "grad_norm": 0.4532459080219269, "learning_rate": 5.16681940613337e-06, "loss": 0.3883, "step": 32660 }, { "epoch": 1.4988297921160112, "grad_norm": 0.6352558135986328, "learning_rate": 5.166574354529081e-06, "loss": 0.4997, "step": 32661 }, { "epoch": 1.4988756826212657, "grad_norm": 0.4959786832332611, "learning_rate": 5.166329302524235e-06, "loss": 0.4019, "step": 32662 }, { "epoch": 1.4989215731265202, "grad_norm": 0.46843844652175903, "learning_rate": 5.16608425011942e-06, "loss": 0.3443, "step": 32663 }, { "epoch": 1.4989674636317747, "grad_norm": 0.4782419800758362, "learning_rate": 5.1658391973152245e-06, "loss": 0.4028, "step": 32664 }, { "epoch": 1.4990133541370292, "grad_norm": 0.4790678322315216, "learning_rate": 5.165594144112238e-06, "loss": 0.3774, "step": 32665 }, { "epoch": 1.4990592446422835, "grad_norm": 0.4578644037246704, "learning_rate": 5.165349090511048e-06, "loss": 0.3521, "step": 32666 }, { "epoch": 1.499105135147538, "grad_norm": 0.4387330114841461, "learning_rate": 5.165104036512247e-06, "loss": 0.32, "step": 32667 }, { "epoch": 1.4991510256527925, "grad_norm": 0.48777422308921814, "learning_rate": 5.164858982116423e-06, "loss": 0.4076, "step": 32668 }, { "epoch": 1.499196916158047, "grad_norm": 0.5067765116691589, "learning_rate": 5.1646139273241645e-06, "loss": 0.3739, "step": 32669 }, { "epoch": 1.4992428066633012, "grad_norm": 0.5063968896865845, "learning_rate": 5.164368872136063e-06, "loss": 0.3676, "step": 32670 }, { "epoch": 1.4992886971685557, "grad_norm": 0.5246476531028748, "learning_rate": 5.164123816552706e-06, "loss": 0.4213, "step": 32671 }, { "epoch": 1.4993345876738102, "grad_norm": 0.5739502310752869, "learning_rate": 5.163878760574682e-06, "loss": 0.4129, "step": 32672 }, { "epoch": 1.4993804781790647, "grad_norm": 0.4550324082374573, "learning_rate": 5.163633704202581e-06, "loss": 0.3495, "step": 32673 }, { "epoch": 1.4994263686843192, "grad_norm": 0.44659504294395447, "learning_rate": 5.163388647436993e-06, "loss": 0.3085, "step": 32674 }, { "epoch": 1.4994722591895737, "grad_norm": 0.4911981523036957, "learning_rate": 5.163143590278508e-06, "loss": 0.3894, "step": 32675 }, { "epoch": 1.4995181496948282, "grad_norm": 0.44962671399116516, "learning_rate": 5.162898532727713e-06, "loss": 0.3283, "step": 32676 }, { "epoch": 1.4995640402000827, "grad_norm": 0.4686223864555359, "learning_rate": 5.1626534747852e-06, "loss": 0.377, "step": 32677 }, { "epoch": 1.499609930705337, "grad_norm": 0.4423569142818451, "learning_rate": 5.162408416451554e-06, "loss": 0.3192, "step": 32678 }, { "epoch": 1.4996558212105915, "grad_norm": 0.4755551815032959, "learning_rate": 5.162163357727369e-06, "loss": 0.3216, "step": 32679 }, { "epoch": 1.499701711715846, "grad_norm": 0.4955936074256897, "learning_rate": 5.161918298613231e-06, "loss": 0.4336, "step": 32680 }, { "epoch": 1.4997476022211005, "grad_norm": 0.45997166633605957, "learning_rate": 5.161673239109732e-06, "loss": 0.3833, "step": 32681 }, { "epoch": 1.499793492726355, "grad_norm": 0.43228137493133545, "learning_rate": 5.1614281792174605e-06, "loss": 0.316, "step": 32682 }, { "epoch": 1.4998393832316093, "grad_norm": 0.460433691740036, "learning_rate": 5.161183118937003e-06, "loss": 0.3313, "step": 32683 }, { "epoch": 1.4998852737368638, "grad_norm": 0.6957261562347412, "learning_rate": 5.160938058268951e-06, "loss": 0.3829, "step": 32684 }, { "epoch": 1.4999311642421183, "grad_norm": 0.5120862126350403, "learning_rate": 5.160692997213896e-06, "loss": 0.4335, "step": 32685 }, { "epoch": 1.4999770547473728, "grad_norm": 0.4699173867702484, "learning_rate": 5.160447935772423e-06, "loss": 0.3048, "step": 32686 }, { "epoch": 1.5000229452526272, "grad_norm": 0.489084392786026, "learning_rate": 5.1602028739451235e-06, "loss": 0.3968, "step": 32687 }, { "epoch": 1.5000688357578817, "grad_norm": 0.48347148299217224, "learning_rate": 5.159957811732588e-06, "loss": 0.3644, "step": 32688 }, { "epoch": 1.5001147262631362, "grad_norm": 0.4803374409675598, "learning_rate": 5.159712749135404e-06, "loss": 0.3729, "step": 32689 }, { "epoch": 1.5001606167683907, "grad_norm": 0.44852668046951294, "learning_rate": 5.159467686154159e-06, "loss": 0.3284, "step": 32690 }, { "epoch": 1.5002065072736452, "grad_norm": 0.47035688161849976, "learning_rate": 5.159222622789448e-06, "loss": 0.3443, "step": 32691 }, { "epoch": 1.5002523977788995, "grad_norm": 0.47691479325294495, "learning_rate": 5.158977559041856e-06, "loss": 0.3743, "step": 32692 }, { "epoch": 1.500298288284154, "grad_norm": 0.5123530626296997, "learning_rate": 5.158732494911971e-06, "loss": 0.4253, "step": 32693 }, { "epoch": 1.5003441787894085, "grad_norm": 0.5266831517219543, "learning_rate": 5.158487430400386e-06, "loss": 0.3428, "step": 32694 }, { "epoch": 1.5003900692946628, "grad_norm": 0.42494234442710876, "learning_rate": 5.158242365507689e-06, "loss": 0.3002, "step": 32695 }, { "epoch": 1.5004359597999173, "grad_norm": 0.4968528151512146, "learning_rate": 5.157997300234468e-06, "loss": 0.4039, "step": 32696 }, { "epoch": 1.5004818503051718, "grad_norm": 0.47835028171539307, "learning_rate": 5.1577522345813155e-06, "loss": 0.3356, "step": 32697 }, { "epoch": 1.5005277408104263, "grad_norm": 0.5209726691246033, "learning_rate": 5.157507168548816e-06, "loss": 0.3967, "step": 32698 }, { "epoch": 1.5005736313156808, "grad_norm": 0.5262799859046936, "learning_rate": 5.157262102137564e-06, "loss": 0.4171, "step": 32699 }, { "epoch": 1.5006195218209353, "grad_norm": 0.5023545026779175, "learning_rate": 5.157017035348144e-06, "loss": 0.415, "step": 32700 }, { "epoch": 1.5006654123261898, "grad_norm": 0.4932379126548767, "learning_rate": 5.156771968181148e-06, "loss": 0.3946, "step": 32701 }, { "epoch": 1.5007113028314443, "grad_norm": 0.4738796055316925, "learning_rate": 5.156526900637166e-06, "loss": 0.3475, "step": 32702 }, { "epoch": 1.5007571933366988, "grad_norm": 0.4888899326324463, "learning_rate": 5.156281832716787e-06, "loss": 0.361, "step": 32703 }, { "epoch": 1.500803083841953, "grad_norm": 0.5066118836402893, "learning_rate": 5.156036764420597e-06, "loss": 0.4418, "step": 32704 }, { "epoch": 1.5008489743472075, "grad_norm": 0.4515625238418579, "learning_rate": 5.1557916957491895e-06, "loss": 0.3576, "step": 32705 }, { "epoch": 1.500894864852462, "grad_norm": 0.4855920672416687, "learning_rate": 5.15554662670315e-06, "loss": 0.3495, "step": 32706 }, { "epoch": 1.5009407553577165, "grad_norm": 0.4401746690273285, "learning_rate": 5.155301557283073e-06, "loss": 0.3074, "step": 32707 }, { "epoch": 1.5009866458629708, "grad_norm": 0.5005365014076233, "learning_rate": 5.155056487489544e-06, "loss": 0.3806, "step": 32708 }, { "epoch": 1.5010325363682253, "grad_norm": 0.5161315202713013, "learning_rate": 5.154811417323152e-06, "loss": 0.3768, "step": 32709 }, { "epoch": 1.5010784268734798, "grad_norm": 0.4449882209300995, "learning_rate": 5.1545663467844875e-06, "loss": 0.3072, "step": 32710 }, { "epoch": 1.5011243173787343, "grad_norm": 0.4637942612171173, "learning_rate": 5.15432127587414e-06, "loss": 0.3507, "step": 32711 }, { "epoch": 1.5011702078839888, "grad_norm": 0.4692681133747101, "learning_rate": 5.154076204592699e-06, "loss": 0.3544, "step": 32712 }, { "epoch": 1.5012160983892433, "grad_norm": 0.4487364888191223, "learning_rate": 5.153831132940753e-06, "loss": 0.3485, "step": 32713 }, { "epoch": 1.5012619888944978, "grad_norm": 0.4455161988735199, "learning_rate": 5.153586060918892e-06, "loss": 0.3514, "step": 32714 }, { "epoch": 1.5013078793997523, "grad_norm": 0.4687962234020233, "learning_rate": 5.153340988527704e-06, "loss": 0.3282, "step": 32715 }, { "epoch": 1.5013537699050068, "grad_norm": 0.46618759632110596, "learning_rate": 5.153095915767781e-06, "loss": 0.323, "step": 32716 }, { "epoch": 1.501399660410261, "grad_norm": 0.5234248042106628, "learning_rate": 5.1528508426397085e-06, "loss": 0.4018, "step": 32717 }, { "epoch": 1.5014455509155156, "grad_norm": 0.47964876890182495, "learning_rate": 5.15260576914408e-06, "loss": 0.4102, "step": 32718 }, { "epoch": 1.50149144142077, "grad_norm": 0.48703891038894653, "learning_rate": 5.152360695281482e-06, "loss": 0.3589, "step": 32719 }, { "epoch": 1.5015373319260243, "grad_norm": 0.480169415473938, "learning_rate": 5.152115621052503e-06, "loss": 0.4371, "step": 32720 }, { "epoch": 1.5015832224312788, "grad_norm": 0.46981653571128845, "learning_rate": 5.151870546457736e-06, "loss": 0.3737, "step": 32721 }, { "epoch": 1.5016291129365333, "grad_norm": 0.4615938663482666, "learning_rate": 5.151625471497768e-06, "loss": 0.3296, "step": 32722 }, { "epoch": 1.5016750034417878, "grad_norm": 0.43904951214790344, "learning_rate": 5.151380396173188e-06, "loss": 0.2751, "step": 32723 }, { "epoch": 1.5017208939470423, "grad_norm": 0.5143400430679321, "learning_rate": 5.151135320484586e-06, "loss": 0.377, "step": 32724 }, { "epoch": 1.5017667844522968, "grad_norm": 0.44622352719306946, "learning_rate": 5.1508902444325525e-06, "loss": 0.3144, "step": 32725 }, { "epoch": 1.5018126749575513, "grad_norm": 0.43638700246810913, "learning_rate": 5.150645168017674e-06, "loss": 0.3222, "step": 32726 }, { "epoch": 1.5018585654628058, "grad_norm": 0.4574262201786041, "learning_rate": 5.150400091240541e-06, "loss": 0.3954, "step": 32727 }, { "epoch": 1.5019044559680603, "grad_norm": 0.4736306965351105, "learning_rate": 5.150155014101744e-06, "loss": 0.3351, "step": 32728 }, { "epoch": 1.5019503464733148, "grad_norm": 0.4707596004009247, "learning_rate": 5.149909936601874e-06, "loss": 0.3608, "step": 32729 }, { "epoch": 1.501996236978569, "grad_norm": 0.42518213391304016, "learning_rate": 5.149664858741516e-06, "loss": 0.3002, "step": 32730 }, { "epoch": 1.5020421274838236, "grad_norm": 0.4604383707046509, "learning_rate": 5.149419780521259e-06, "loss": 0.3361, "step": 32731 }, { "epoch": 1.502088017989078, "grad_norm": 0.47936442494392395, "learning_rate": 5.149174701941696e-06, "loss": 0.363, "step": 32732 }, { "epoch": 1.5021339084943324, "grad_norm": 0.4596308171749115, "learning_rate": 5.148929623003416e-06, "loss": 0.3124, "step": 32733 }, { "epoch": 1.5021797989995869, "grad_norm": 0.4643476903438568, "learning_rate": 5.148684543707006e-06, "loss": 0.3332, "step": 32734 }, { "epoch": 1.5022256895048414, "grad_norm": 0.42815670371055603, "learning_rate": 5.148439464053059e-06, "loss": 0.2833, "step": 32735 }, { "epoch": 1.5022715800100959, "grad_norm": 0.4539550244808197, "learning_rate": 5.14819438404216e-06, "loss": 0.3238, "step": 32736 }, { "epoch": 1.5023174705153504, "grad_norm": 0.4566689133644104, "learning_rate": 5.1479493036749e-06, "loss": 0.3574, "step": 32737 }, { "epoch": 1.5023633610206049, "grad_norm": 0.4827084541320801, "learning_rate": 5.147704222951868e-06, "loss": 0.3854, "step": 32738 }, { "epoch": 1.5024092515258594, "grad_norm": 0.4791339337825775, "learning_rate": 5.147459141873656e-06, "loss": 0.4275, "step": 32739 }, { "epoch": 1.5024551420311139, "grad_norm": 0.49924468994140625, "learning_rate": 5.147214060440851e-06, "loss": 0.412, "step": 32740 }, { "epoch": 1.5025010325363684, "grad_norm": 0.45989498496055603, "learning_rate": 5.1469689786540415e-06, "loss": 0.3671, "step": 32741 }, { "epoch": 1.5025469230416229, "grad_norm": 0.5214325785636902, "learning_rate": 5.146723896513819e-06, "loss": 0.4592, "step": 32742 }, { "epoch": 1.5025928135468771, "grad_norm": 0.4800948202610016, "learning_rate": 5.146478814020771e-06, "loss": 0.369, "step": 32743 }, { "epoch": 1.5026387040521316, "grad_norm": 0.530949592590332, "learning_rate": 5.146233731175489e-06, "loss": 0.3479, "step": 32744 }, { "epoch": 1.5026845945573861, "grad_norm": 0.4928324222564697, "learning_rate": 5.145988647978559e-06, "loss": 0.3526, "step": 32745 }, { "epoch": 1.5027304850626404, "grad_norm": 0.5022740364074707, "learning_rate": 5.1457435644305746e-06, "loss": 0.3649, "step": 32746 }, { "epoch": 1.502776375567895, "grad_norm": 0.49258044362068176, "learning_rate": 5.145498480532122e-06, "loss": 0.3714, "step": 32747 }, { "epoch": 1.5028222660731494, "grad_norm": 0.47175848484039307, "learning_rate": 5.14525339628379e-06, "loss": 0.3502, "step": 32748 }, { "epoch": 1.502868156578404, "grad_norm": 0.4350002408027649, "learning_rate": 5.1450083116861704e-06, "loss": 0.2896, "step": 32749 }, { "epoch": 1.5029140470836584, "grad_norm": 0.45505622029304504, "learning_rate": 5.1447632267398524e-06, "loss": 0.3508, "step": 32750 }, { "epoch": 1.5029599375889129, "grad_norm": 0.4473058581352234, "learning_rate": 5.144518141445425e-06, "loss": 0.3339, "step": 32751 }, { "epoch": 1.5030058280941674, "grad_norm": 0.4679861068725586, "learning_rate": 5.1442730558034745e-06, "loss": 0.3637, "step": 32752 }, { "epoch": 1.5030517185994219, "grad_norm": 0.47852495312690735, "learning_rate": 5.144027969814595e-06, "loss": 0.3405, "step": 32753 }, { "epoch": 1.5030976091046764, "grad_norm": 0.4426574409008026, "learning_rate": 5.143782883479374e-06, "loss": 0.3279, "step": 32754 }, { "epoch": 1.5031434996099307, "grad_norm": 0.49324122071266174, "learning_rate": 5.143537796798398e-06, "loss": 0.3969, "step": 32755 }, { "epoch": 1.5031893901151852, "grad_norm": 0.4641503691673279, "learning_rate": 5.14329270977226e-06, "loss": 0.3379, "step": 32756 }, { "epoch": 1.5032352806204397, "grad_norm": 0.4565747380256653, "learning_rate": 5.14304762240155e-06, "loss": 0.3504, "step": 32757 }, { "epoch": 1.5032811711256941, "grad_norm": 0.4210914373397827, "learning_rate": 5.142802534686855e-06, "loss": 0.2787, "step": 32758 }, { "epoch": 1.5033270616309484, "grad_norm": 0.4932003319263458, "learning_rate": 5.142557446628764e-06, "loss": 0.3989, "step": 32759 }, { "epoch": 1.503372952136203, "grad_norm": 0.45669296383857727, "learning_rate": 5.142312358227868e-06, "loss": 0.4044, "step": 32760 }, { "epoch": 1.5034188426414574, "grad_norm": 0.47934290766716003, "learning_rate": 5.142067269484755e-06, "loss": 0.4095, "step": 32761 }, { "epoch": 1.503464733146712, "grad_norm": 0.41430023312568665, "learning_rate": 5.1418221804000164e-06, "loss": 0.2636, "step": 32762 }, { "epoch": 1.5035106236519664, "grad_norm": 0.49233314394950867, "learning_rate": 5.141577090974239e-06, "loss": 0.422, "step": 32763 }, { "epoch": 1.503556514157221, "grad_norm": 0.45914387702941895, "learning_rate": 5.141332001208014e-06, "loss": 0.3369, "step": 32764 }, { "epoch": 1.5036024046624754, "grad_norm": 0.509056568145752, "learning_rate": 5.141086911101929e-06, "loss": 0.4102, "step": 32765 }, { "epoch": 1.50364829516773, "grad_norm": 0.493757426738739, "learning_rate": 5.140841820656575e-06, "loss": 0.3962, "step": 32766 }, { "epoch": 1.5036941856729844, "grad_norm": 0.5295786261558533, "learning_rate": 5.140596729872541e-06, "loss": 0.4891, "step": 32767 }, { "epoch": 1.5037400761782387, "grad_norm": 0.4630822241306305, "learning_rate": 5.140351638750417e-06, "loss": 0.3274, "step": 32768 }, { "epoch": 1.5037859666834932, "grad_norm": 0.4026663601398468, "learning_rate": 5.140106547290791e-06, "loss": 0.2616, "step": 32769 }, { "epoch": 1.5038318571887477, "grad_norm": 0.4737588167190552, "learning_rate": 5.139861455494254e-06, "loss": 0.3581, "step": 32770 }, { "epoch": 1.503877747694002, "grad_norm": 0.4924485981464386, "learning_rate": 5.139616363361393e-06, "loss": 0.419, "step": 32771 }, { "epoch": 1.5039236381992565, "grad_norm": 0.48611074686050415, "learning_rate": 5.139371270892799e-06, "loss": 0.3675, "step": 32772 }, { "epoch": 1.503969528704511, "grad_norm": 0.4732033908367157, "learning_rate": 5.139126178089061e-06, "loss": 0.3647, "step": 32773 }, { "epoch": 1.5040154192097654, "grad_norm": 0.48148900270462036, "learning_rate": 5.138881084950769e-06, "loss": 0.3458, "step": 32774 }, { "epoch": 1.50406130971502, "grad_norm": 0.4908605217933655, "learning_rate": 5.13863599147851e-06, "loss": 0.3629, "step": 32775 }, { "epoch": 1.5041072002202744, "grad_norm": 0.43375730514526367, "learning_rate": 5.138390897672877e-06, "loss": 0.312, "step": 32776 }, { "epoch": 1.504153090725529, "grad_norm": 0.5111060738563538, "learning_rate": 5.138145803534457e-06, "loss": 0.4097, "step": 32777 }, { "epoch": 1.5041989812307834, "grad_norm": 0.43197059631347656, "learning_rate": 5.137900709063841e-06, "loss": 0.3071, "step": 32778 }, { "epoch": 1.504244871736038, "grad_norm": 0.49968311190605164, "learning_rate": 5.137655614261616e-06, "loss": 0.423, "step": 32779 }, { "epoch": 1.5042907622412924, "grad_norm": 0.4544350206851959, "learning_rate": 5.137410519128373e-06, "loss": 0.3163, "step": 32780 }, { "epoch": 1.5043366527465467, "grad_norm": 0.4515538215637207, "learning_rate": 5.137165423664702e-06, "loss": 0.2797, "step": 32781 }, { "epoch": 1.5043825432518012, "grad_norm": 0.4915199279785156, "learning_rate": 5.13692032787119e-06, "loss": 0.3675, "step": 32782 }, { "epoch": 1.5044284337570557, "grad_norm": 0.4520937502384186, "learning_rate": 5.136675231748429e-06, "loss": 0.3587, "step": 32783 }, { "epoch": 1.50447432426231, "grad_norm": 0.48954692482948303, "learning_rate": 5.136430135297007e-06, "loss": 0.3674, "step": 32784 }, { "epoch": 1.5045202147675645, "grad_norm": 0.43955543637275696, "learning_rate": 5.136185038517513e-06, "loss": 0.2879, "step": 32785 }, { "epoch": 1.504566105272819, "grad_norm": 0.46372300386428833, "learning_rate": 5.135939941410536e-06, "loss": 0.3749, "step": 32786 }, { "epoch": 1.5046119957780735, "grad_norm": 0.45749542117118835, "learning_rate": 5.135694843976669e-06, "loss": 0.3357, "step": 32787 }, { "epoch": 1.504657886283328, "grad_norm": 0.48393115401268005, "learning_rate": 5.135449746216496e-06, "loss": 0.2938, "step": 32788 }, { "epoch": 1.5047037767885825, "grad_norm": 0.4295254051685333, "learning_rate": 5.13520464813061e-06, "loss": 0.3198, "step": 32789 }, { "epoch": 1.504749667293837, "grad_norm": 0.4525389075279236, "learning_rate": 5.134959549719602e-06, "loss": 0.3462, "step": 32790 }, { "epoch": 1.5047955577990915, "grad_norm": 0.6302730441093445, "learning_rate": 5.134714450984055e-06, "loss": 0.309, "step": 32791 }, { "epoch": 1.504841448304346, "grad_norm": 0.48473483324050903, "learning_rate": 5.134469351924564e-06, "loss": 0.4143, "step": 32792 }, { "epoch": 1.5048873388096002, "grad_norm": 0.4580526649951935, "learning_rate": 5.134224252541718e-06, "loss": 0.3311, "step": 32793 }, { "epoch": 1.5049332293148547, "grad_norm": 0.4836895763874054, "learning_rate": 5.133979152836104e-06, "loss": 0.3791, "step": 32794 }, { "epoch": 1.5049791198201092, "grad_norm": 0.46281442046165466, "learning_rate": 5.1337340528083125e-06, "loss": 0.3135, "step": 32795 }, { "epoch": 1.5050250103253637, "grad_norm": 0.46044954657554626, "learning_rate": 5.1334889524589316e-06, "loss": 0.3254, "step": 32796 }, { "epoch": 1.505070900830618, "grad_norm": 0.48662540316581726, "learning_rate": 5.133243851788552e-06, "loss": 0.354, "step": 32797 }, { "epoch": 1.5051167913358725, "grad_norm": 0.44564583897590637, "learning_rate": 5.132998750797764e-06, "loss": 0.3536, "step": 32798 }, { "epoch": 1.505162681841127, "grad_norm": 0.48173239827156067, "learning_rate": 5.132753649487156e-06, "loss": 0.3926, "step": 32799 }, { "epoch": 1.5052085723463815, "grad_norm": 0.43214672803878784, "learning_rate": 5.132508547857317e-06, "loss": 0.2912, "step": 32800 }, { "epoch": 1.505254462851636, "grad_norm": 0.5324264168739319, "learning_rate": 5.132263445908837e-06, "loss": 0.4409, "step": 32801 }, { "epoch": 1.5053003533568905, "grad_norm": 0.45244595408439636, "learning_rate": 5.132018343642305e-06, "loss": 0.31, "step": 32802 }, { "epoch": 1.505346243862145, "grad_norm": 0.5400252342224121, "learning_rate": 5.13177324105831e-06, "loss": 0.342, "step": 32803 }, { "epoch": 1.5053921343673995, "grad_norm": 0.45666155219078064, "learning_rate": 5.131528138157443e-06, "loss": 0.3714, "step": 32804 }, { "epoch": 1.505438024872654, "grad_norm": 0.5679479241371155, "learning_rate": 5.131283034940292e-06, "loss": 0.468, "step": 32805 }, { "epoch": 1.5054839153779083, "grad_norm": 0.47452667355537415, "learning_rate": 5.131037931407447e-06, "loss": 0.3576, "step": 32806 }, { "epoch": 1.5055298058831628, "grad_norm": 0.4930656850337982, "learning_rate": 5.130792827559496e-06, "loss": 0.4241, "step": 32807 }, { "epoch": 1.5055756963884173, "grad_norm": 0.45299631357192993, "learning_rate": 5.1305477233970304e-06, "loss": 0.2887, "step": 32808 }, { "epoch": 1.5056215868936715, "grad_norm": 0.45337390899658203, "learning_rate": 5.130302618920637e-06, "loss": 0.3073, "step": 32809 }, { "epoch": 1.505667477398926, "grad_norm": 0.473055899143219, "learning_rate": 5.13005751413091e-06, "loss": 0.3737, "step": 32810 }, { "epoch": 1.5057133679041805, "grad_norm": 0.4456495940685272, "learning_rate": 5.129812409028434e-06, "loss": 0.3251, "step": 32811 }, { "epoch": 1.505759258409435, "grad_norm": 0.4496329128742218, "learning_rate": 5.129567303613801e-06, "loss": 0.3344, "step": 32812 }, { "epoch": 1.5058051489146895, "grad_norm": 0.47687768936157227, "learning_rate": 5.129322197887598e-06, "loss": 0.3531, "step": 32813 }, { "epoch": 1.505851039419944, "grad_norm": 0.4649117588996887, "learning_rate": 5.1290770918504174e-06, "loss": 0.4048, "step": 32814 }, { "epoch": 1.5058969299251985, "grad_norm": 0.4566599428653717, "learning_rate": 5.128831985502846e-06, "loss": 0.3753, "step": 32815 }, { "epoch": 1.505942820430453, "grad_norm": 0.46511006355285645, "learning_rate": 5.1285868788454765e-06, "loss": 0.3163, "step": 32816 }, { "epoch": 1.5059887109357075, "grad_norm": 0.4653542637825012, "learning_rate": 5.128341771878893e-06, "loss": 0.3611, "step": 32817 }, { "epoch": 1.506034601440962, "grad_norm": 0.46036452054977417, "learning_rate": 5.12809666460369e-06, "loss": 0.3316, "step": 32818 }, { "epoch": 1.5060804919462163, "grad_norm": 0.48076075315475464, "learning_rate": 5.127851557020454e-06, "loss": 0.3492, "step": 32819 }, { "epoch": 1.5061263824514708, "grad_norm": 0.5332815647125244, "learning_rate": 5.127606449129776e-06, "loss": 0.4524, "step": 32820 }, { "epoch": 1.5061722729567253, "grad_norm": 0.4529314339160919, "learning_rate": 5.1273613409322455e-06, "loss": 0.343, "step": 32821 }, { "epoch": 1.5062181634619796, "grad_norm": 0.48692092299461365, "learning_rate": 5.1271162324284505e-06, "loss": 0.3909, "step": 32822 }, { "epoch": 1.506264053967234, "grad_norm": 0.48001629114151, "learning_rate": 5.126871123618979e-06, "loss": 0.4067, "step": 32823 }, { "epoch": 1.5063099444724886, "grad_norm": 0.537373423576355, "learning_rate": 5.126626014504426e-06, "loss": 0.4757, "step": 32824 }, { "epoch": 1.506355834977743, "grad_norm": 0.4413423538208008, "learning_rate": 5.126380905085375e-06, "loss": 0.3317, "step": 32825 }, { "epoch": 1.5064017254829976, "grad_norm": 0.44824352860450745, "learning_rate": 5.1261357953624194e-06, "loss": 0.3212, "step": 32826 }, { "epoch": 1.506447615988252, "grad_norm": 0.5113248229026794, "learning_rate": 5.125890685336147e-06, "loss": 0.4355, "step": 32827 }, { "epoch": 1.5064935064935066, "grad_norm": 0.46396464109420776, "learning_rate": 5.125645575007145e-06, "loss": 0.3319, "step": 32828 }, { "epoch": 1.506539396998761, "grad_norm": 0.4569416344165802, "learning_rate": 5.125400464376007e-06, "loss": 0.3792, "step": 32829 }, { "epoch": 1.5065852875040155, "grad_norm": 0.5107754468917847, "learning_rate": 5.12515535344332e-06, "loss": 0.4297, "step": 32830 }, { "epoch": 1.50663117800927, "grad_norm": 0.45438775420188904, "learning_rate": 5.124910242209673e-06, "loss": 0.3419, "step": 32831 }, { "epoch": 1.5066770685145243, "grad_norm": 0.4571935832500458, "learning_rate": 5.124665130675658e-06, "loss": 0.3613, "step": 32832 }, { "epoch": 1.5067229590197788, "grad_norm": 0.4636681377887726, "learning_rate": 5.124420018841861e-06, "loss": 0.3202, "step": 32833 }, { "epoch": 1.5067688495250333, "grad_norm": 0.46161359548568726, "learning_rate": 5.1241749067088745e-06, "loss": 0.344, "step": 32834 }, { "epoch": 1.5068147400302876, "grad_norm": 0.4809042513370514, "learning_rate": 5.123929794277286e-06, "loss": 0.336, "step": 32835 }, { "epoch": 1.506860630535542, "grad_norm": 0.4723934233188629, "learning_rate": 5.123684681547685e-06, "loss": 0.3201, "step": 32836 }, { "epoch": 1.5069065210407966, "grad_norm": 0.462736576795578, "learning_rate": 5.123439568520661e-06, "loss": 0.3462, "step": 32837 }, { "epoch": 1.506952411546051, "grad_norm": 0.5029008388519287, "learning_rate": 5.123194455196806e-06, "loss": 0.4139, "step": 32838 }, { "epoch": 1.5069983020513056, "grad_norm": 0.49763917922973633, "learning_rate": 5.1229493415767065e-06, "loss": 0.3982, "step": 32839 }, { "epoch": 1.50704419255656, "grad_norm": 0.4570232033729553, "learning_rate": 5.12270422766095e-06, "loss": 0.3344, "step": 32840 }, { "epoch": 1.5070900830618146, "grad_norm": 0.44540923833847046, "learning_rate": 5.122459113450131e-06, "loss": 0.335, "step": 32841 }, { "epoch": 1.507135973567069, "grad_norm": 0.4682774841785431, "learning_rate": 5.1222139989448365e-06, "loss": 0.339, "step": 32842 }, { "epoch": 1.5071818640723236, "grad_norm": 0.4959350824356079, "learning_rate": 5.1219688841456546e-06, "loss": 0.4294, "step": 32843 }, { "epoch": 1.5072277545775778, "grad_norm": 0.445987343788147, "learning_rate": 5.121723769053178e-06, "loss": 0.2967, "step": 32844 }, { "epoch": 1.5072736450828323, "grad_norm": 0.4685248136520386, "learning_rate": 5.121478653667993e-06, "loss": 0.3371, "step": 32845 }, { "epoch": 1.5073195355880868, "grad_norm": 0.4719371199607849, "learning_rate": 5.121233537990691e-06, "loss": 0.3816, "step": 32846 }, { "epoch": 1.5073654260933413, "grad_norm": 0.4594067335128784, "learning_rate": 5.120988422021859e-06, "loss": 0.3772, "step": 32847 }, { "epoch": 1.5074113165985956, "grad_norm": 0.4767225682735443, "learning_rate": 5.120743305762088e-06, "loss": 0.3507, "step": 32848 }, { "epoch": 1.5074572071038501, "grad_norm": 0.4705440104007721, "learning_rate": 5.12049818921197e-06, "loss": 0.3273, "step": 32849 }, { "epoch": 1.5075030976091046, "grad_norm": 0.4943653643131256, "learning_rate": 5.120253072372089e-06, "loss": 0.3963, "step": 32850 }, { "epoch": 1.5075489881143591, "grad_norm": 0.46062716841697693, "learning_rate": 5.120007955243039e-06, "loss": 0.2931, "step": 32851 }, { "epoch": 1.5075948786196136, "grad_norm": 0.47785094380378723, "learning_rate": 5.119762837825407e-06, "loss": 0.3766, "step": 32852 }, { "epoch": 1.507640769124868, "grad_norm": 0.44761592149734497, "learning_rate": 5.119517720119782e-06, "loss": 0.2986, "step": 32853 }, { "epoch": 1.5076866596301226, "grad_norm": 0.4577309787273407, "learning_rate": 5.1192726021267566e-06, "loss": 0.3507, "step": 32854 }, { "epoch": 1.507732550135377, "grad_norm": 0.4630909562110901, "learning_rate": 5.119027483846919e-06, "loss": 0.3161, "step": 32855 }, { "epoch": 1.5077784406406316, "grad_norm": 0.4636571705341339, "learning_rate": 5.118782365280855e-06, "loss": 0.3431, "step": 32856 }, { "epoch": 1.5078243311458859, "grad_norm": 0.515229344367981, "learning_rate": 5.1185372464291586e-06, "loss": 0.451, "step": 32857 }, { "epoch": 1.5078702216511404, "grad_norm": 0.4891173243522644, "learning_rate": 5.118292127292417e-06, "loss": 0.3743, "step": 32858 }, { "epoch": 1.5079161121563949, "grad_norm": 0.49580472707748413, "learning_rate": 5.118047007871221e-06, "loss": 0.4029, "step": 32859 }, { "epoch": 1.5079620026616491, "grad_norm": 0.5243240594863892, "learning_rate": 5.1178018881661595e-06, "loss": 0.4444, "step": 32860 }, { "epoch": 1.5080078931669036, "grad_norm": 0.485748827457428, "learning_rate": 5.117556768177821e-06, "loss": 0.3937, "step": 32861 }, { "epoch": 1.5080537836721581, "grad_norm": 0.4666086733341217, "learning_rate": 5.117311647906794e-06, "loss": 0.3582, "step": 32862 }, { "epoch": 1.5080996741774126, "grad_norm": 0.4761119484901428, "learning_rate": 5.117066527353671e-06, "loss": 0.4144, "step": 32863 }, { "epoch": 1.5081455646826671, "grad_norm": 0.47545504570007324, "learning_rate": 5.11682140651904e-06, "loss": 0.3332, "step": 32864 }, { "epoch": 1.5081914551879216, "grad_norm": 0.47717857360839844, "learning_rate": 5.116576285403491e-06, "loss": 0.3713, "step": 32865 }, { "epoch": 1.5082373456931761, "grad_norm": 0.4473412334918976, "learning_rate": 5.116331164007612e-06, "loss": 0.2819, "step": 32866 }, { "epoch": 1.5082832361984306, "grad_norm": 0.43218520283699036, "learning_rate": 5.116086042331993e-06, "loss": 0.3104, "step": 32867 }, { "epoch": 1.5083291267036851, "grad_norm": 0.4776930510997772, "learning_rate": 5.115840920377223e-06, "loss": 0.3887, "step": 32868 }, { "epoch": 1.5083750172089396, "grad_norm": 0.46142086386680603, "learning_rate": 5.115595798143894e-06, "loss": 0.3332, "step": 32869 }, { "epoch": 1.508420907714194, "grad_norm": 0.4706965982913971, "learning_rate": 5.115350675632592e-06, "loss": 0.3651, "step": 32870 }, { "epoch": 1.5084667982194484, "grad_norm": 0.5128078460693359, "learning_rate": 5.11510555284391e-06, "loss": 0.4355, "step": 32871 }, { "epoch": 1.508512688724703, "grad_norm": 0.418459415435791, "learning_rate": 5.1148604297784346e-06, "loss": 0.2931, "step": 32872 }, { "epoch": 1.5085585792299572, "grad_norm": 0.48294806480407715, "learning_rate": 5.114615306436755e-06, "loss": 0.3655, "step": 32873 }, { "epoch": 1.5086044697352117, "grad_norm": 0.47281691431999207, "learning_rate": 5.114370182819461e-06, "loss": 0.3462, "step": 32874 }, { "epoch": 1.5086503602404662, "grad_norm": 0.4664763808250427, "learning_rate": 5.114125058927146e-06, "loss": 0.304, "step": 32875 }, { "epoch": 1.5086962507457207, "grad_norm": 0.4891469478607178, "learning_rate": 5.113879934760395e-06, "loss": 0.4151, "step": 32876 }, { "epoch": 1.5087421412509752, "grad_norm": 0.5529966950416565, "learning_rate": 5.1136348103197985e-06, "loss": 0.3969, "step": 32877 }, { "epoch": 1.5087880317562297, "grad_norm": 0.43017295002937317, "learning_rate": 5.113389685605946e-06, "loss": 0.274, "step": 32878 }, { "epoch": 1.5088339222614842, "grad_norm": 0.449070006608963, "learning_rate": 5.113144560619426e-06, "loss": 0.3108, "step": 32879 }, { "epoch": 1.5088798127667387, "grad_norm": 0.5158154964447021, "learning_rate": 5.1128994353608305e-06, "loss": 0.406, "step": 32880 }, { "epoch": 1.5089257032719932, "grad_norm": 0.520153284072876, "learning_rate": 5.1126543098307466e-06, "loss": 0.479, "step": 32881 }, { "epoch": 1.5089715937772474, "grad_norm": 0.481496125459671, "learning_rate": 5.112409184029766e-06, "loss": 0.3899, "step": 32882 }, { "epoch": 1.509017484282502, "grad_norm": 0.5117810368537903, "learning_rate": 5.1121640579584765e-06, "loss": 0.4212, "step": 32883 }, { "epoch": 1.5090633747877564, "grad_norm": 0.42805805802345276, "learning_rate": 5.111918931617467e-06, "loss": 0.2614, "step": 32884 }, { "epoch": 1.509109265293011, "grad_norm": 0.5474495887756348, "learning_rate": 5.111673805007326e-06, "loss": 0.5101, "step": 32885 }, { "epoch": 1.5091551557982652, "grad_norm": 0.4785231351852417, "learning_rate": 5.111428678128648e-06, "loss": 0.3519, "step": 32886 }, { "epoch": 1.5092010463035197, "grad_norm": 0.47195032238960266, "learning_rate": 5.111183550982019e-06, "loss": 0.3296, "step": 32887 }, { "epoch": 1.5092469368087742, "grad_norm": 0.49945446848869324, "learning_rate": 5.110938423568027e-06, "loss": 0.4029, "step": 32888 }, { "epoch": 1.5092928273140287, "grad_norm": 0.48852723836898804, "learning_rate": 5.110693295887264e-06, "loss": 0.3284, "step": 32889 }, { "epoch": 1.5093387178192832, "grad_norm": 0.4610905051231384, "learning_rate": 5.110448167940318e-06, "loss": 0.3132, "step": 32890 }, { "epoch": 1.5093846083245377, "grad_norm": 0.4688301384449005, "learning_rate": 5.110203039727779e-06, "loss": 0.3762, "step": 32891 }, { "epoch": 1.5094304988297922, "grad_norm": 0.48501455783843994, "learning_rate": 5.109957911250236e-06, "loss": 0.3383, "step": 32892 }, { "epoch": 1.5094763893350467, "grad_norm": 0.5033056735992432, "learning_rate": 5.10971278250828e-06, "loss": 0.4053, "step": 32893 }, { "epoch": 1.5095222798403012, "grad_norm": 0.4347902238368988, "learning_rate": 5.1094676535025e-06, "loss": 0.2833, "step": 32894 }, { "epoch": 1.5095681703455555, "grad_norm": 0.4654155671596527, "learning_rate": 5.109222524233484e-06, "loss": 0.3303, "step": 32895 }, { "epoch": 1.50961406085081, "grad_norm": 0.47166234254837036, "learning_rate": 5.1089773947018215e-06, "loss": 0.377, "step": 32896 }, { "epoch": 1.5096599513560645, "grad_norm": 0.48635968565940857, "learning_rate": 5.108732264908104e-06, "loss": 0.3967, "step": 32897 }, { "epoch": 1.5097058418613187, "grad_norm": 0.47026216983795166, "learning_rate": 5.108487134852919e-06, "loss": 0.3459, "step": 32898 }, { "epoch": 1.5097517323665732, "grad_norm": 0.43051543831825256, "learning_rate": 5.108242004536857e-06, "loss": 0.3146, "step": 32899 }, { "epoch": 1.5097976228718277, "grad_norm": 0.4713224172592163, "learning_rate": 5.107996873960508e-06, "loss": 0.3799, "step": 32900 }, { "epoch": 1.5098435133770822, "grad_norm": 0.4791678190231323, "learning_rate": 5.107751743124458e-06, "loss": 0.3621, "step": 32901 }, { "epoch": 1.5098894038823367, "grad_norm": 0.4685980975627899, "learning_rate": 5.1075066120293e-06, "loss": 0.4133, "step": 32902 }, { "epoch": 1.5099352943875912, "grad_norm": 0.4428599774837494, "learning_rate": 5.107261480675625e-06, "loss": 0.3247, "step": 32903 }, { "epoch": 1.5099811848928457, "grad_norm": 0.5112189054489136, "learning_rate": 5.107016349064018e-06, "loss": 0.3064, "step": 32904 }, { "epoch": 1.5100270753981002, "grad_norm": 0.549166202545166, "learning_rate": 5.10677121719507e-06, "loss": 0.4021, "step": 32905 }, { "epoch": 1.5100729659033547, "grad_norm": 0.45137226581573486, "learning_rate": 5.106526085069372e-06, "loss": 0.3501, "step": 32906 }, { "epoch": 1.5101188564086092, "grad_norm": 0.4434645473957062, "learning_rate": 5.106280952687512e-06, "loss": 0.2721, "step": 32907 }, { "epoch": 1.5101647469138635, "grad_norm": 0.4948899745941162, "learning_rate": 5.106035820050081e-06, "loss": 0.4279, "step": 32908 }, { "epoch": 1.510210637419118, "grad_norm": 0.4634902775287628, "learning_rate": 5.105790687157666e-06, "loss": 0.3354, "step": 32909 }, { "epoch": 1.5102565279243725, "grad_norm": 0.45112544298171997, "learning_rate": 5.105545554010858e-06, "loss": 0.3014, "step": 32910 }, { "epoch": 1.5103024184296268, "grad_norm": 0.5241010785102844, "learning_rate": 5.105300420610246e-06, "loss": 0.4362, "step": 32911 }, { "epoch": 1.5103483089348813, "grad_norm": 0.4839531183242798, "learning_rate": 5.10505528695642e-06, "loss": 0.3111, "step": 32912 }, { "epoch": 1.5103941994401358, "grad_norm": 0.5071544647216797, "learning_rate": 5.104810153049969e-06, "loss": 0.3804, "step": 32913 }, { "epoch": 1.5104400899453903, "grad_norm": 0.46169421076774597, "learning_rate": 5.104565018891482e-06, "loss": 0.3777, "step": 32914 }, { "epoch": 1.5104859804506447, "grad_norm": 0.44316917657852173, "learning_rate": 5.104319884481551e-06, "loss": 0.3275, "step": 32915 }, { "epoch": 1.5105318709558992, "grad_norm": 0.46173954010009766, "learning_rate": 5.104074749820762e-06, "loss": 0.3838, "step": 32916 }, { "epoch": 1.5105777614611537, "grad_norm": 0.46743935346603394, "learning_rate": 5.103829614909707e-06, "loss": 0.3278, "step": 32917 }, { "epoch": 1.5106236519664082, "grad_norm": 0.47058355808258057, "learning_rate": 5.1035844797489735e-06, "loss": 0.3732, "step": 32918 }, { "epoch": 1.5106695424716627, "grad_norm": 0.4932444095611572, "learning_rate": 5.103339344339153e-06, "loss": 0.3802, "step": 32919 }, { "epoch": 1.5107154329769172, "grad_norm": 0.47195911407470703, "learning_rate": 5.1030942086808346e-06, "loss": 0.3793, "step": 32920 }, { "epoch": 1.5107613234821715, "grad_norm": 0.4412645995616913, "learning_rate": 5.102849072774605e-06, "loss": 0.3213, "step": 32921 }, { "epoch": 1.510807213987426, "grad_norm": 0.4529319703578949, "learning_rate": 5.102603936621056e-06, "loss": 0.3323, "step": 32922 }, { "epoch": 1.5108531044926805, "grad_norm": 0.4406147599220276, "learning_rate": 5.102358800220779e-06, "loss": 0.3213, "step": 32923 }, { "epoch": 1.5108989949979348, "grad_norm": 0.4675256907939911, "learning_rate": 5.1021136635743594e-06, "loss": 0.3726, "step": 32924 }, { "epoch": 1.5109448855031893, "grad_norm": 0.4172089993953705, "learning_rate": 5.101868526682391e-06, "loss": 0.296, "step": 32925 }, { "epoch": 1.5109907760084438, "grad_norm": 0.4600245952606201, "learning_rate": 5.101623389545459e-06, "loss": 0.3414, "step": 32926 }, { "epoch": 1.5110366665136983, "grad_norm": 0.4454044699668884, "learning_rate": 5.101378252164155e-06, "loss": 0.3407, "step": 32927 }, { "epoch": 1.5110825570189528, "grad_norm": 0.49111026525497437, "learning_rate": 5.101133114539069e-06, "loss": 0.3794, "step": 32928 }, { "epoch": 1.5111284475242073, "grad_norm": 0.47979646921157837, "learning_rate": 5.10088797667079e-06, "loss": 0.3699, "step": 32929 }, { "epoch": 1.5111743380294618, "grad_norm": 0.4436599910259247, "learning_rate": 5.100642838559907e-06, "loss": 0.3358, "step": 32930 }, { "epoch": 1.5112202285347163, "grad_norm": 0.49318814277648926, "learning_rate": 5.100397700207009e-06, "loss": 0.4268, "step": 32931 }, { "epoch": 1.5112661190399708, "grad_norm": 0.4757654368877411, "learning_rate": 5.100152561612687e-06, "loss": 0.3318, "step": 32932 }, { "epoch": 1.511312009545225, "grad_norm": 0.47588858008384705, "learning_rate": 5.099907422777529e-06, "loss": 0.3609, "step": 32933 }, { "epoch": 1.5113579000504795, "grad_norm": 0.4523887634277344, "learning_rate": 5.099662283702126e-06, "loss": 0.3118, "step": 32934 }, { "epoch": 1.511403790555734, "grad_norm": 0.5391321778297424, "learning_rate": 5.0994171443870654e-06, "loss": 0.4016, "step": 32935 }, { "epoch": 1.5114496810609885, "grad_norm": 0.48915722966194153, "learning_rate": 5.09917200483294e-06, "loss": 0.4505, "step": 32936 }, { "epoch": 1.5114955715662428, "grad_norm": 0.4565589427947998, "learning_rate": 5.098926865040336e-06, "loss": 0.3421, "step": 32937 }, { "epoch": 1.5115414620714973, "grad_norm": 0.4710758626461029, "learning_rate": 5.098681725009843e-06, "loss": 0.3861, "step": 32938 }, { "epoch": 1.5115873525767518, "grad_norm": 0.5026084184646606, "learning_rate": 5.0984365847420524e-06, "loss": 0.4225, "step": 32939 }, { "epoch": 1.5116332430820063, "grad_norm": 0.4483088552951813, "learning_rate": 5.098191444237553e-06, "loss": 0.3192, "step": 32940 }, { "epoch": 1.5116791335872608, "grad_norm": 0.45624521374702454, "learning_rate": 5.0979463034969355e-06, "loss": 0.257, "step": 32941 }, { "epoch": 1.5117250240925153, "grad_norm": 0.4575149118900299, "learning_rate": 5.097701162520787e-06, "loss": 0.3217, "step": 32942 }, { "epoch": 1.5117709145977698, "grad_norm": 0.4449019134044647, "learning_rate": 5.097456021309698e-06, "loss": 0.3016, "step": 32943 }, { "epoch": 1.5118168051030243, "grad_norm": 0.5556275844573975, "learning_rate": 5.097210879864258e-06, "loss": 0.4079, "step": 32944 }, { "epoch": 1.5118626956082788, "grad_norm": 0.5077406764030457, "learning_rate": 5.096965738185056e-06, "loss": 0.3994, "step": 32945 }, { "epoch": 1.511908586113533, "grad_norm": 0.46166062355041504, "learning_rate": 5.0967205962726815e-06, "loss": 0.3027, "step": 32946 }, { "epoch": 1.5119544766187876, "grad_norm": 0.4965742528438568, "learning_rate": 5.096475454127726e-06, "loss": 0.3712, "step": 32947 }, { "epoch": 1.512000367124042, "grad_norm": 0.4740668535232544, "learning_rate": 5.096230311750779e-06, "loss": 0.3701, "step": 32948 }, { "epoch": 1.5120462576292963, "grad_norm": 0.4472162425518036, "learning_rate": 5.095985169142424e-06, "loss": 0.3122, "step": 32949 }, { "epoch": 1.5120921481345508, "grad_norm": 0.4320763349533081, "learning_rate": 5.095740026303258e-06, "loss": 0.3196, "step": 32950 }, { "epoch": 1.5121380386398053, "grad_norm": 0.44756749272346497, "learning_rate": 5.0954948832338666e-06, "loss": 0.3226, "step": 32951 }, { "epoch": 1.5121839291450598, "grad_norm": 0.5062116980552673, "learning_rate": 5.095249739934841e-06, "loss": 0.4349, "step": 32952 }, { "epoch": 1.5122298196503143, "grad_norm": 0.4963386356830597, "learning_rate": 5.095004596406769e-06, "loss": 0.3861, "step": 32953 }, { "epoch": 1.5122757101555688, "grad_norm": 0.49955645203590393, "learning_rate": 5.0947594526502416e-06, "loss": 0.4271, "step": 32954 }, { "epoch": 1.5123216006608233, "grad_norm": 0.4615998864173889, "learning_rate": 5.094514308665847e-06, "loss": 0.3515, "step": 32955 }, { "epoch": 1.5123674911660778, "grad_norm": 0.4534638822078705, "learning_rate": 5.094269164454174e-06, "loss": 0.3241, "step": 32956 }, { "epoch": 1.5124133816713323, "grad_norm": 0.4982343316078186, "learning_rate": 5.094024020015817e-06, "loss": 0.3808, "step": 32957 }, { "epoch": 1.5124592721765868, "grad_norm": 0.4697071611881256, "learning_rate": 5.093778875351359e-06, "loss": 0.3814, "step": 32958 }, { "epoch": 1.512505162681841, "grad_norm": 0.4335629940032959, "learning_rate": 5.093533730461394e-06, "loss": 0.3123, "step": 32959 }, { "epoch": 1.5125510531870956, "grad_norm": 0.4794343113899231, "learning_rate": 5.093288585346509e-06, "loss": 0.3598, "step": 32960 }, { "epoch": 1.51259694369235, "grad_norm": 0.4779466986656189, "learning_rate": 5.093043440007294e-06, "loss": 0.3088, "step": 32961 }, { "epoch": 1.5126428341976044, "grad_norm": 0.47442707419395447, "learning_rate": 5.09279829444434e-06, "loss": 0.3799, "step": 32962 }, { "epoch": 1.5126887247028589, "grad_norm": 0.4841737151145935, "learning_rate": 5.0925531486582344e-06, "loss": 0.3631, "step": 32963 }, { "epoch": 1.5127346152081134, "grad_norm": 0.44799405336380005, "learning_rate": 5.092308002649567e-06, "loss": 0.3415, "step": 32964 }, { "epoch": 1.5127805057133679, "grad_norm": 0.4868413209915161, "learning_rate": 5.09206285641893e-06, "loss": 0.3745, "step": 32965 }, { "epoch": 1.5128263962186224, "grad_norm": 0.4384042024612427, "learning_rate": 5.09181770996691e-06, "loss": 0.2923, "step": 32966 }, { "epoch": 1.5128722867238769, "grad_norm": 0.5177088379859924, "learning_rate": 5.091572563294096e-06, "loss": 0.453, "step": 32967 }, { "epoch": 1.5129181772291314, "grad_norm": 0.5143230557441711, "learning_rate": 5.091327416401082e-06, "loss": 0.4383, "step": 32968 }, { "epoch": 1.5129640677343859, "grad_norm": 0.45902279019355774, "learning_rate": 5.091082269288452e-06, "loss": 0.3324, "step": 32969 }, { "epoch": 1.5130099582396404, "grad_norm": 0.447106271982193, "learning_rate": 5.090837121956799e-06, "loss": 0.2901, "step": 32970 }, { "epoch": 1.5130558487448946, "grad_norm": 0.4388471245765686, "learning_rate": 5.090591974406711e-06, "loss": 0.3315, "step": 32971 }, { "epoch": 1.5131017392501491, "grad_norm": 0.43902021646499634, "learning_rate": 5.090346826638777e-06, "loss": 0.32, "step": 32972 }, { "epoch": 1.5131476297554036, "grad_norm": 0.47846272587776184, "learning_rate": 5.0901016786535885e-06, "loss": 0.3861, "step": 32973 }, { "epoch": 1.5131935202606581, "grad_norm": 0.4471953809261322, "learning_rate": 5.089856530451734e-06, "loss": 0.3023, "step": 32974 }, { "epoch": 1.5132394107659124, "grad_norm": 0.4588596522808075, "learning_rate": 5.089611382033802e-06, "loss": 0.3268, "step": 32975 }, { "epoch": 1.513285301271167, "grad_norm": 0.47428715229034424, "learning_rate": 5.089366233400384e-06, "loss": 0.38, "step": 32976 }, { "epoch": 1.5133311917764214, "grad_norm": 0.4670547842979431, "learning_rate": 5.089121084552066e-06, "loss": 0.3931, "step": 32977 }, { "epoch": 1.513377082281676, "grad_norm": 0.4484016001224518, "learning_rate": 5.088875935489442e-06, "loss": 0.3377, "step": 32978 }, { "epoch": 1.5134229727869304, "grad_norm": 0.4753055274486542, "learning_rate": 5.0886307862131e-06, "loss": 0.3814, "step": 32979 }, { "epoch": 1.5134688632921849, "grad_norm": 0.4561261832714081, "learning_rate": 5.088385636723628e-06, "loss": 0.3688, "step": 32980 }, { "epoch": 1.5135147537974394, "grad_norm": 0.4856032431125641, "learning_rate": 5.088140487021617e-06, "loss": 0.4027, "step": 32981 }, { "epoch": 1.5135606443026939, "grad_norm": 0.4726117253303528, "learning_rate": 5.087895337107656e-06, "loss": 0.3359, "step": 32982 }, { "epoch": 1.5136065348079484, "grad_norm": 0.4547535479068756, "learning_rate": 5.087650186982333e-06, "loss": 0.3491, "step": 32983 }, { "epoch": 1.5136524253132027, "grad_norm": 0.4858349859714508, "learning_rate": 5.08740503664624e-06, "loss": 0.3907, "step": 32984 }, { "epoch": 1.5136983158184572, "grad_norm": 0.4974118769168854, "learning_rate": 5.087159886099966e-06, "loss": 0.3962, "step": 32985 }, { "epoch": 1.5137442063237116, "grad_norm": 0.4718133211135864, "learning_rate": 5.086914735344099e-06, "loss": 0.339, "step": 32986 }, { "epoch": 1.513790096828966, "grad_norm": 0.46721309423446655, "learning_rate": 5.086669584379229e-06, "loss": 0.3667, "step": 32987 }, { "epoch": 1.5138359873342204, "grad_norm": 0.4504622220993042, "learning_rate": 5.086424433205948e-06, "loss": 0.3165, "step": 32988 }, { "epoch": 1.513881877839475, "grad_norm": 0.4800935387611389, "learning_rate": 5.086179281824841e-06, "loss": 0.346, "step": 32989 }, { "epoch": 1.5139277683447294, "grad_norm": 0.4713888466358185, "learning_rate": 5.0859341302365016e-06, "loss": 0.3486, "step": 32990 }, { "epoch": 1.513973658849984, "grad_norm": 0.5469886660575867, "learning_rate": 5.085688978441518e-06, "loss": 0.443, "step": 32991 }, { "epoch": 1.5140195493552384, "grad_norm": 0.4701731503009796, "learning_rate": 5.085443826440479e-06, "loss": 0.3332, "step": 32992 }, { "epoch": 1.514065439860493, "grad_norm": 0.4823015630245209, "learning_rate": 5.085198674233975e-06, "loss": 0.3761, "step": 32993 }, { "epoch": 1.5141113303657474, "grad_norm": 0.4821377992630005, "learning_rate": 5.084953521822594e-06, "loss": 0.3324, "step": 32994 }, { "epoch": 1.514157220871002, "grad_norm": 0.5180010795593262, "learning_rate": 5.0847083692069276e-06, "loss": 0.4829, "step": 32995 }, { "epoch": 1.5142031113762564, "grad_norm": 0.4725053012371063, "learning_rate": 5.084463216387565e-06, "loss": 0.378, "step": 32996 }, { "epoch": 1.5142490018815107, "grad_norm": 0.4603176712989807, "learning_rate": 5.084218063365093e-06, "loss": 0.3552, "step": 32997 }, { "epoch": 1.5142948923867652, "grad_norm": 0.47787052392959595, "learning_rate": 5.083972910140104e-06, "loss": 0.3515, "step": 32998 }, { "epoch": 1.5143407828920197, "grad_norm": 0.49880218505859375, "learning_rate": 5.083727756713187e-06, "loss": 0.427, "step": 32999 }, { "epoch": 1.514386673397274, "grad_norm": 0.4680313766002655, "learning_rate": 5.0834826030849305e-06, "loss": 0.349, "step": 33000 }, { "epoch": 1.5144325639025285, "grad_norm": 0.4542843997478485, "learning_rate": 5.083237449255926e-06, "loss": 0.3091, "step": 33001 }, { "epoch": 1.514478454407783, "grad_norm": 0.5088198184967041, "learning_rate": 5.082992295226761e-06, "loss": 0.4181, "step": 33002 }, { "epoch": 1.5145243449130374, "grad_norm": 0.45194828510284424, "learning_rate": 5.082747140998024e-06, "loss": 0.3156, "step": 33003 }, { "epoch": 1.514570235418292, "grad_norm": 0.46553921699523926, "learning_rate": 5.0825019865703075e-06, "loss": 0.3532, "step": 33004 }, { "epoch": 1.5146161259235464, "grad_norm": 0.49687710404396057, "learning_rate": 5.0822568319442e-06, "loss": 0.4048, "step": 33005 }, { "epoch": 1.514662016428801, "grad_norm": 0.42273738980293274, "learning_rate": 5.082011677120292e-06, "loss": 0.2688, "step": 33006 }, { "epoch": 1.5147079069340554, "grad_norm": 0.45425301790237427, "learning_rate": 5.081766522099171e-06, "loss": 0.3582, "step": 33007 }, { "epoch": 1.51475379743931, "grad_norm": 0.4508279860019684, "learning_rate": 5.081521366881425e-06, "loss": 0.3351, "step": 33008 }, { "epoch": 1.5147996879445644, "grad_norm": 0.5306543111801147, "learning_rate": 5.081276211467649e-06, "loss": 0.4035, "step": 33009 }, { "epoch": 1.5148455784498187, "grad_norm": 0.4281502962112427, "learning_rate": 5.081031055858428e-06, "loss": 0.2977, "step": 33010 }, { "epoch": 1.5148914689550732, "grad_norm": 0.5367514491081238, "learning_rate": 5.080785900054352e-06, "loss": 0.3218, "step": 33011 }, { "epoch": 1.5149373594603277, "grad_norm": 0.4951612651348114, "learning_rate": 5.080540744056013e-06, "loss": 0.3737, "step": 33012 }, { "epoch": 1.514983249965582, "grad_norm": 0.49979129433631897, "learning_rate": 5.0802955878639995e-06, "loss": 0.4201, "step": 33013 }, { "epoch": 1.5150291404708365, "grad_norm": 0.4428306519985199, "learning_rate": 5.080050431478898e-06, "loss": 0.3516, "step": 33014 }, { "epoch": 1.515075030976091, "grad_norm": 0.491031289100647, "learning_rate": 5.079805274901302e-06, "loss": 0.348, "step": 33015 }, { "epoch": 1.5151209214813455, "grad_norm": 0.4457719027996063, "learning_rate": 5.079560118131799e-06, "loss": 0.2981, "step": 33016 }, { "epoch": 1.5151668119866, "grad_norm": 0.4750492572784424, "learning_rate": 5.0793149611709806e-06, "loss": 0.3848, "step": 33017 }, { "epoch": 1.5152127024918545, "grad_norm": 0.47274166345596313, "learning_rate": 5.079069804019432e-06, "loss": 0.3604, "step": 33018 }, { "epoch": 1.515258592997109, "grad_norm": 0.48137491941452026, "learning_rate": 5.078824646677748e-06, "loss": 0.3937, "step": 33019 }, { "epoch": 1.5153044835023635, "grad_norm": 0.48941293358802795, "learning_rate": 5.078579489146513e-06, "loss": 0.3785, "step": 33020 }, { "epoch": 1.515350374007618, "grad_norm": 0.46263161301612854, "learning_rate": 5.078334331426321e-06, "loss": 0.316, "step": 33021 }, { "epoch": 1.5153962645128722, "grad_norm": 0.4745839834213257, "learning_rate": 5.078089173517759e-06, "loss": 0.3489, "step": 33022 }, { "epoch": 1.5154421550181267, "grad_norm": 0.4619411826133728, "learning_rate": 5.077844015421418e-06, "loss": 0.3348, "step": 33023 }, { "epoch": 1.5154880455233812, "grad_norm": 0.49199873208999634, "learning_rate": 5.0775988571378875e-06, "loss": 0.4442, "step": 33024 }, { "epoch": 1.5155339360286355, "grad_norm": 0.45103219151496887, "learning_rate": 5.0773536986677544e-06, "loss": 0.3184, "step": 33025 }, { "epoch": 1.51557982653389, "grad_norm": 0.4490686357021332, "learning_rate": 5.0771085400116096e-06, "loss": 0.3403, "step": 33026 }, { "epoch": 1.5156257170391445, "grad_norm": 0.4395928680896759, "learning_rate": 5.076863381170045e-06, "loss": 0.3235, "step": 33027 }, { "epoch": 1.515671607544399, "grad_norm": 0.45402446389198303, "learning_rate": 5.0766182221436475e-06, "loss": 0.3164, "step": 33028 }, { "epoch": 1.5157174980496535, "grad_norm": 0.545629620552063, "learning_rate": 5.076373062933007e-06, "loss": 0.3969, "step": 33029 }, { "epoch": 1.515763388554908, "grad_norm": 0.4608049988746643, "learning_rate": 5.0761279035387135e-06, "loss": 0.3336, "step": 33030 }, { "epoch": 1.5158092790601625, "grad_norm": 0.4742632806301117, "learning_rate": 5.075882743961357e-06, "loss": 0.3575, "step": 33031 }, { "epoch": 1.515855169565417, "grad_norm": 0.4876706898212433, "learning_rate": 5.0756375842015245e-06, "loss": 0.3916, "step": 33032 }, { "epoch": 1.5159010600706715, "grad_norm": 0.43666982650756836, "learning_rate": 5.07539242425981e-06, "loss": 0.3123, "step": 33033 }, { "epoch": 1.515946950575926, "grad_norm": 0.4772433936595917, "learning_rate": 5.0751472641367995e-06, "loss": 0.3999, "step": 33034 }, { "epoch": 1.5159928410811803, "grad_norm": 0.46065327525138855, "learning_rate": 5.074902103833082e-06, "loss": 0.3581, "step": 33035 }, { "epoch": 1.5160387315864348, "grad_norm": 0.44228842854499817, "learning_rate": 5.074656943349252e-06, "loss": 0.3082, "step": 33036 }, { "epoch": 1.5160846220916893, "grad_norm": 0.5365902781486511, "learning_rate": 5.074411782685893e-06, "loss": 0.4621, "step": 33037 }, { "epoch": 1.5161305125969435, "grad_norm": 0.46863114833831787, "learning_rate": 5.074166621843598e-06, "loss": 0.3259, "step": 33038 }, { "epoch": 1.516176403102198, "grad_norm": 0.45192092657089233, "learning_rate": 5.073921460822956e-06, "loss": 0.322, "step": 33039 }, { "epoch": 1.5162222936074525, "grad_norm": 0.47045275568962097, "learning_rate": 5.0736762996245546e-06, "loss": 0.3892, "step": 33040 }, { "epoch": 1.516268184112707, "grad_norm": 0.45325735211372375, "learning_rate": 5.0734311382489875e-06, "loss": 0.342, "step": 33041 }, { "epoch": 1.5163140746179615, "grad_norm": 0.46869683265686035, "learning_rate": 5.0731859766968395e-06, "loss": 0.3876, "step": 33042 }, { "epoch": 1.516359965123216, "grad_norm": 0.5243753790855408, "learning_rate": 5.072940814968702e-06, "loss": 0.4647, "step": 33043 }, { "epoch": 1.5164058556284705, "grad_norm": 0.4870217442512512, "learning_rate": 5.072695653065167e-06, "loss": 0.3562, "step": 33044 }, { "epoch": 1.516451746133725, "grad_norm": 0.4675758481025696, "learning_rate": 5.072450490986821e-06, "loss": 0.3736, "step": 33045 }, { "epoch": 1.5164976366389795, "grad_norm": 0.49644336104393005, "learning_rate": 5.072205328734254e-06, "loss": 0.3937, "step": 33046 }, { "epoch": 1.516543527144234, "grad_norm": 0.42883315682411194, "learning_rate": 5.071960166308056e-06, "loss": 0.2877, "step": 33047 }, { "epoch": 1.5165894176494883, "grad_norm": 0.4704356789588928, "learning_rate": 5.071715003708817e-06, "loss": 0.3175, "step": 33048 }, { "epoch": 1.5166353081547428, "grad_norm": 0.4845125377178192, "learning_rate": 5.071469840937125e-06, "loss": 0.3531, "step": 33049 }, { "epoch": 1.5166811986599973, "grad_norm": 0.4675728678703308, "learning_rate": 5.071224677993573e-06, "loss": 0.3955, "step": 33050 }, { "epoch": 1.5167270891652516, "grad_norm": 0.47753751277923584, "learning_rate": 5.070979514878747e-06, "loss": 0.3734, "step": 33051 }, { "epoch": 1.516772979670506, "grad_norm": 0.46507972478866577, "learning_rate": 5.070734351593236e-06, "loss": 0.3556, "step": 33052 }, { "epoch": 1.5168188701757606, "grad_norm": 0.4897947907447815, "learning_rate": 5.070489188137633e-06, "loss": 0.4292, "step": 33053 }, { "epoch": 1.516864760681015, "grad_norm": 0.46886664628982544, "learning_rate": 5.070244024512524e-06, "loss": 0.3781, "step": 33054 }, { "epoch": 1.5169106511862696, "grad_norm": 0.46459871530532837, "learning_rate": 5.069998860718503e-06, "loss": 0.3459, "step": 33055 }, { "epoch": 1.516956541691524, "grad_norm": 0.47577965259552, "learning_rate": 5.0697536967561555e-06, "loss": 0.3872, "step": 33056 }, { "epoch": 1.5170024321967785, "grad_norm": 0.4306287467479706, "learning_rate": 5.06950853262607e-06, "loss": 0.3193, "step": 33057 }, { "epoch": 1.517048322702033, "grad_norm": 0.44365522265434265, "learning_rate": 5.069263368328843e-06, "loss": 0.313, "step": 33058 }, { "epoch": 1.5170942132072875, "grad_norm": 0.45083296298980713, "learning_rate": 5.069018203865056e-06, "loss": 0.28, "step": 33059 }, { "epoch": 1.5171401037125418, "grad_norm": 0.4979355037212372, "learning_rate": 5.068773039235302e-06, "loss": 0.3927, "step": 33060 }, { "epoch": 1.5171859942177963, "grad_norm": 0.4147357940673828, "learning_rate": 5.068527874440173e-06, "loss": 0.2573, "step": 33061 }, { "epoch": 1.5172318847230508, "grad_norm": 0.46807530522346497, "learning_rate": 5.068282709480254e-06, "loss": 0.3594, "step": 33062 }, { "epoch": 1.5172777752283053, "grad_norm": 0.47481852769851685, "learning_rate": 5.068037544356137e-06, "loss": 0.3624, "step": 33063 }, { "epoch": 1.5173236657335596, "grad_norm": 0.4877435564994812, "learning_rate": 5.067792379068413e-06, "loss": 0.4044, "step": 33064 }, { "epoch": 1.517369556238814, "grad_norm": 0.46689414978027344, "learning_rate": 5.067547213617668e-06, "loss": 0.3507, "step": 33065 }, { "epoch": 1.5174154467440686, "grad_norm": 0.5162866711616516, "learning_rate": 5.067302048004493e-06, "loss": 0.3488, "step": 33066 }, { "epoch": 1.517461337249323, "grad_norm": 0.4684656262397766, "learning_rate": 5.067056882229478e-06, "loss": 0.3723, "step": 33067 }, { "epoch": 1.5175072277545776, "grad_norm": 0.4358789324760437, "learning_rate": 5.066811716293213e-06, "loss": 0.2943, "step": 33068 }, { "epoch": 1.517553118259832, "grad_norm": 0.5011264681816101, "learning_rate": 5.066566550196285e-06, "loss": 0.3924, "step": 33069 }, { "epoch": 1.5175990087650866, "grad_norm": 0.4692758619785309, "learning_rate": 5.066321383939288e-06, "loss": 0.3895, "step": 33070 }, { "epoch": 1.517644899270341, "grad_norm": 0.46320345997810364, "learning_rate": 5.0660762175228075e-06, "loss": 0.39, "step": 33071 }, { "epoch": 1.5176907897755956, "grad_norm": 0.46558186411857605, "learning_rate": 5.065831050947437e-06, "loss": 0.3546, "step": 33072 }, { "epoch": 1.5177366802808498, "grad_norm": 0.4773818850517273, "learning_rate": 5.065585884213761e-06, "loss": 0.3487, "step": 33073 }, { "epoch": 1.5177825707861043, "grad_norm": 0.4680628478527069, "learning_rate": 5.065340717322372e-06, "loss": 0.3576, "step": 33074 }, { "epoch": 1.5178284612913588, "grad_norm": 0.45360931754112244, "learning_rate": 5.0650955502738595e-06, "loss": 0.3144, "step": 33075 }, { "epoch": 1.5178743517966131, "grad_norm": 0.4445614516735077, "learning_rate": 5.064850383068813e-06, "loss": 0.3297, "step": 33076 }, { "epoch": 1.5179202423018676, "grad_norm": 0.48551374673843384, "learning_rate": 5.064605215707822e-06, "loss": 0.4392, "step": 33077 }, { "epoch": 1.5179661328071221, "grad_norm": 0.529626727104187, "learning_rate": 5.064360048191475e-06, "loss": 0.4724, "step": 33078 }, { "epoch": 1.5180120233123766, "grad_norm": 0.47567227482795715, "learning_rate": 5.064114880520363e-06, "loss": 0.3656, "step": 33079 }, { "epoch": 1.518057913817631, "grad_norm": 0.4787372052669525, "learning_rate": 5.0638697126950746e-06, "loss": 0.374, "step": 33080 }, { "epoch": 1.5181038043228856, "grad_norm": 0.44507479667663574, "learning_rate": 5.0636245447162004e-06, "loss": 0.3376, "step": 33081 }, { "epoch": 1.51814969482814, "grad_norm": 0.46953943371772766, "learning_rate": 5.063379376584328e-06, "loss": 0.3931, "step": 33082 }, { "epoch": 1.5181955853333946, "grad_norm": 0.4642258882522583, "learning_rate": 5.063134208300049e-06, "loss": 0.3238, "step": 33083 }, { "epoch": 1.518241475838649, "grad_norm": 0.45909154415130615, "learning_rate": 5.062889039863953e-06, "loss": 0.3199, "step": 33084 }, { "epoch": 1.5182873663439036, "grad_norm": 0.45715761184692383, "learning_rate": 5.062643871276627e-06, "loss": 0.3142, "step": 33085 }, { "epoch": 1.5183332568491579, "grad_norm": 0.481799840927124, "learning_rate": 5.062398702538662e-06, "loss": 0.4009, "step": 33086 }, { "epoch": 1.5183791473544124, "grad_norm": 0.4709456264972687, "learning_rate": 5.06215353365065e-06, "loss": 0.3932, "step": 33087 }, { "epoch": 1.5184250378596669, "grad_norm": 0.44289568066596985, "learning_rate": 5.061908364613177e-06, "loss": 0.2747, "step": 33088 }, { "epoch": 1.5184709283649211, "grad_norm": 0.4894202649593353, "learning_rate": 5.061663195426835e-06, "loss": 0.3492, "step": 33089 }, { "epoch": 1.5185168188701756, "grad_norm": 0.4981745183467865, "learning_rate": 5.061418026092211e-06, "loss": 0.4332, "step": 33090 }, { "epoch": 1.5185627093754301, "grad_norm": 0.4492800235748291, "learning_rate": 5.0611728566098965e-06, "loss": 0.3548, "step": 33091 }, { "epoch": 1.5186085998806846, "grad_norm": 0.46527099609375, "learning_rate": 5.06092768698048e-06, "loss": 0.3617, "step": 33092 }, { "epoch": 1.5186544903859391, "grad_norm": 0.49841877818107605, "learning_rate": 5.060682517204552e-06, "loss": 0.4057, "step": 33093 }, { "epoch": 1.5187003808911936, "grad_norm": 0.49266549944877625, "learning_rate": 5.060437347282703e-06, "loss": 0.3755, "step": 33094 }, { "epoch": 1.5187462713964481, "grad_norm": 0.4791688323020935, "learning_rate": 5.06019217721552e-06, "loss": 0.4298, "step": 33095 }, { "epoch": 1.5187921619017026, "grad_norm": 0.4628967344760895, "learning_rate": 5.059947007003595e-06, "loss": 0.3454, "step": 33096 }, { "epoch": 1.5188380524069571, "grad_norm": 0.4693751633167267, "learning_rate": 5.059701836647515e-06, "loss": 0.3902, "step": 33097 }, { "epoch": 1.5188839429122116, "grad_norm": 0.47565728425979614, "learning_rate": 5.059456666147871e-06, "loss": 0.372, "step": 33098 }, { "epoch": 1.518929833417466, "grad_norm": 0.47959795594215393, "learning_rate": 5.059211495505254e-06, "loss": 0.3895, "step": 33099 }, { "epoch": 1.5189757239227204, "grad_norm": 0.46603432297706604, "learning_rate": 5.05896632472025e-06, "loss": 0.3139, "step": 33100 }, { "epoch": 1.519021614427975, "grad_norm": 0.46787333488464355, "learning_rate": 5.0587211537934525e-06, "loss": 0.3288, "step": 33101 }, { "epoch": 1.5190675049332292, "grad_norm": 0.49200358986854553, "learning_rate": 5.058475982725447e-06, "loss": 0.3843, "step": 33102 }, { "epoch": 1.5191133954384837, "grad_norm": 0.466325044631958, "learning_rate": 5.058230811516827e-06, "loss": 0.3702, "step": 33103 }, { "epoch": 1.5191592859437382, "grad_norm": 0.4988346993923187, "learning_rate": 5.057985640168179e-06, "loss": 0.3754, "step": 33104 }, { "epoch": 1.5192051764489927, "grad_norm": 0.4740409851074219, "learning_rate": 5.057740468680096e-06, "loss": 0.4087, "step": 33105 }, { "epoch": 1.5192510669542472, "grad_norm": 0.4210430383682251, "learning_rate": 5.0574952970531645e-06, "loss": 0.2668, "step": 33106 }, { "epoch": 1.5192969574595017, "grad_norm": 0.46198585629463196, "learning_rate": 5.0572501252879735e-06, "loss": 0.3778, "step": 33107 }, { "epoch": 1.5193428479647562, "grad_norm": 0.49492454528808594, "learning_rate": 5.057004953385114e-06, "loss": 0.4157, "step": 33108 }, { "epoch": 1.5193887384700107, "grad_norm": 0.42376402020454407, "learning_rate": 5.056759781345177e-06, "loss": 0.3022, "step": 33109 }, { "epoch": 1.5194346289752652, "grad_norm": 0.4857349395751953, "learning_rate": 5.0565146091687516e-06, "loss": 0.4018, "step": 33110 }, { "epoch": 1.5194805194805194, "grad_norm": 0.42582279443740845, "learning_rate": 5.056269436856424e-06, "loss": 0.2865, "step": 33111 }, { "epoch": 1.519526409985774, "grad_norm": 0.48234376311302185, "learning_rate": 5.056024264408788e-06, "loss": 0.3801, "step": 33112 }, { "epoch": 1.5195723004910284, "grad_norm": 0.4814351499080658, "learning_rate": 5.055779091826429e-06, "loss": 0.3548, "step": 33113 }, { "epoch": 1.5196181909962827, "grad_norm": 0.4461040496826172, "learning_rate": 5.05553391910994e-06, "loss": 0.3313, "step": 33114 }, { "epoch": 1.5196640815015372, "grad_norm": 0.4580996036529541, "learning_rate": 5.05528874625991e-06, "loss": 0.3268, "step": 33115 }, { "epoch": 1.5197099720067917, "grad_norm": 0.5304268598556519, "learning_rate": 5.055043573276928e-06, "loss": 0.3692, "step": 33116 }, { "epoch": 1.5197558625120462, "grad_norm": 0.4541924297809601, "learning_rate": 5.054798400161584e-06, "loss": 0.3254, "step": 33117 }, { "epoch": 1.5198017530173007, "grad_norm": 0.4889978766441345, "learning_rate": 5.054553226914466e-06, "loss": 0.3762, "step": 33118 }, { "epoch": 1.5198476435225552, "grad_norm": 0.4738158881664276, "learning_rate": 5.054308053536165e-06, "loss": 0.3742, "step": 33119 }, { "epoch": 1.5198935340278097, "grad_norm": 0.4959319531917572, "learning_rate": 5.054062880027271e-06, "loss": 0.3216, "step": 33120 }, { "epoch": 1.5199394245330642, "grad_norm": 0.4813539683818817, "learning_rate": 5.053817706388372e-06, "loss": 0.3656, "step": 33121 }, { "epoch": 1.5199853150383187, "grad_norm": 0.4650638997554779, "learning_rate": 5.053572532620059e-06, "loss": 0.3225, "step": 33122 }, { "epoch": 1.5200312055435732, "grad_norm": 0.45099329948425293, "learning_rate": 5.0533273587229205e-06, "loss": 0.3139, "step": 33123 }, { "epoch": 1.5200770960488275, "grad_norm": 0.4505697190761566, "learning_rate": 5.053082184697547e-06, "loss": 0.3299, "step": 33124 }, { "epoch": 1.520122986554082, "grad_norm": 0.4831925332546234, "learning_rate": 5.052837010544526e-06, "loss": 0.3844, "step": 33125 }, { "epoch": 1.5201688770593365, "grad_norm": 0.472883939743042, "learning_rate": 5.0525918362644515e-06, "loss": 0.3529, "step": 33126 }, { "epoch": 1.5202147675645907, "grad_norm": 0.4358261823654175, "learning_rate": 5.0523466618579085e-06, "loss": 0.2886, "step": 33127 }, { "epoch": 1.5202606580698452, "grad_norm": 0.4866802990436554, "learning_rate": 5.052101487325488e-06, "loss": 0.3727, "step": 33128 }, { "epoch": 1.5203065485750997, "grad_norm": 0.44990724325180054, "learning_rate": 5.051856312667781e-06, "loss": 0.2941, "step": 33129 }, { "epoch": 1.5203524390803542, "grad_norm": 0.4570745825767517, "learning_rate": 5.051611137885375e-06, "loss": 0.359, "step": 33130 }, { "epoch": 1.5203983295856087, "grad_norm": 0.47600966691970825, "learning_rate": 5.0513659629788605e-06, "loss": 0.3545, "step": 33131 }, { "epoch": 1.5204442200908632, "grad_norm": 0.4671674072742462, "learning_rate": 5.051120787948828e-06, "loss": 0.3582, "step": 33132 }, { "epoch": 1.5204901105961177, "grad_norm": 0.42479634284973145, "learning_rate": 5.050875612795865e-06, "loss": 0.3294, "step": 33133 }, { "epoch": 1.5205360011013722, "grad_norm": 0.46646901965141296, "learning_rate": 5.050630437520562e-06, "loss": 0.3258, "step": 33134 }, { "epoch": 1.5205818916066267, "grad_norm": 0.4532846510410309, "learning_rate": 5.05038526212351e-06, "loss": 0.3527, "step": 33135 }, { "epoch": 1.5206277821118812, "grad_norm": 0.45455634593963623, "learning_rate": 5.050140086605297e-06, "loss": 0.3427, "step": 33136 }, { "epoch": 1.5206736726171355, "grad_norm": 0.48840072751045227, "learning_rate": 5.049894910966513e-06, "loss": 0.3993, "step": 33137 }, { "epoch": 1.52071956312239, "grad_norm": 0.45427241921424866, "learning_rate": 5.049649735207748e-06, "loss": 0.3153, "step": 33138 }, { "epoch": 1.5207654536276445, "grad_norm": 0.473233699798584, "learning_rate": 5.0494045593295894e-06, "loss": 0.3629, "step": 33139 }, { "epoch": 1.5208113441328988, "grad_norm": 0.4572136104106903, "learning_rate": 5.04915938333263e-06, "loss": 0.3384, "step": 33140 }, { "epoch": 1.5208572346381533, "grad_norm": 0.4904002249240875, "learning_rate": 5.048914207217457e-06, "loss": 0.4717, "step": 33141 }, { "epoch": 1.5209031251434078, "grad_norm": 0.45605236291885376, "learning_rate": 5.048669030984661e-06, "loss": 0.3395, "step": 33142 }, { "epoch": 1.5209490156486623, "grad_norm": 0.481944739818573, "learning_rate": 5.048423854634832e-06, "loss": 0.3915, "step": 33143 }, { "epoch": 1.5209949061539167, "grad_norm": 0.4432668685913086, "learning_rate": 5.048178678168557e-06, "loss": 0.2883, "step": 33144 }, { "epoch": 1.5210407966591712, "grad_norm": 0.4559985101222992, "learning_rate": 5.047933501586429e-06, "loss": 0.3476, "step": 33145 }, { "epoch": 1.5210866871644257, "grad_norm": 0.4293455183506012, "learning_rate": 5.047688324889035e-06, "loss": 0.2774, "step": 33146 }, { "epoch": 1.5211325776696802, "grad_norm": 0.5065532922744751, "learning_rate": 5.047443148076966e-06, "loss": 0.3841, "step": 33147 }, { "epoch": 1.5211784681749347, "grad_norm": 0.45357683300971985, "learning_rate": 5.047197971150813e-06, "loss": 0.3223, "step": 33148 }, { "epoch": 1.521224358680189, "grad_norm": 0.4754711091518402, "learning_rate": 5.046952794111162e-06, "loss": 0.3765, "step": 33149 }, { "epoch": 1.5212702491854435, "grad_norm": 0.46898791193962097, "learning_rate": 5.046707616958604e-06, "loss": 0.3527, "step": 33150 }, { "epoch": 1.521316139690698, "grad_norm": 0.45028096437454224, "learning_rate": 5.046462439693729e-06, "loss": 0.34, "step": 33151 }, { "epoch": 1.5213620301959525, "grad_norm": 0.44368526339530945, "learning_rate": 5.046217262317128e-06, "loss": 0.3106, "step": 33152 }, { "epoch": 1.5214079207012068, "grad_norm": 0.43974897265434265, "learning_rate": 5.045972084829388e-06, "loss": 0.2911, "step": 33153 }, { "epoch": 1.5214538112064613, "grad_norm": 0.4517296850681305, "learning_rate": 5.045726907231101e-06, "loss": 0.328, "step": 33154 }, { "epoch": 1.5214997017117158, "grad_norm": 0.46778419613838196, "learning_rate": 5.0454817295228534e-06, "loss": 0.3587, "step": 33155 }, { "epoch": 1.5215455922169703, "grad_norm": 0.4485432207584381, "learning_rate": 5.045236551705236e-06, "loss": 0.3204, "step": 33156 }, { "epoch": 1.5215914827222248, "grad_norm": 0.4924410581588745, "learning_rate": 5.044991373778842e-06, "loss": 0.3958, "step": 33157 }, { "epoch": 1.5216373732274793, "grad_norm": 0.4524105191230774, "learning_rate": 5.044746195744254e-06, "loss": 0.3442, "step": 33158 }, { "epoch": 1.5216832637327338, "grad_norm": 0.470284104347229, "learning_rate": 5.044501017602069e-06, "loss": 0.3771, "step": 33159 }, { "epoch": 1.5217291542379883, "grad_norm": 0.4767764210700989, "learning_rate": 5.044255839352872e-06, "loss": 0.3552, "step": 33160 }, { "epoch": 1.5217750447432428, "grad_norm": 0.4740636646747589, "learning_rate": 5.044010660997253e-06, "loss": 0.3455, "step": 33161 }, { "epoch": 1.521820935248497, "grad_norm": 0.46428853273391724, "learning_rate": 5.0437654825358015e-06, "loss": 0.3653, "step": 33162 }, { "epoch": 1.5218668257537515, "grad_norm": 0.43491166830062866, "learning_rate": 5.04352030396911e-06, "loss": 0.2986, "step": 33163 }, { "epoch": 1.521912716259006, "grad_norm": 0.4821775257587433, "learning_rate": 5.043275125297765e-06, "loss": 0.4169, "step": 33164 }, { "epoch": 1.5219586067642603, "grad_norm": 0.45576924085617065, "learning_rate": 5.0430299465223566e-06, "loss": 0.329, "step": 33165 }, { "epoch": 1.5220044972695148, "grad_norm": 0.49046722054481506, "learning_rate": 5.042784767643476e-06, "loss": 0.4104, "step": 33166 }, { "epoch": 1.5220503877747693, "grad_norm": 0.4616661071777344, "learning_rate": 5.04253958866171e-06, "loss": 0.3569, "step": 33167 }, { "epoch": 1.5220962782800238, "grad_norm": 0.46663323044776917, "learning_rate": 5.04229440957765e-06, "loss": 0.3227, "step": 33168 }, { "epoch": 1.5221421687852783, "grad_norm": 0.4276491701602936, "learning_rate": 5.042049230391885e-06, "loss": 0.2919, "step": 33169 }, { "epoch": 1.5221880592905328, "grad_norm": 0.48580917716026306, "learning_rate": 5.041804051105006e-06, "loss": 0.4082, "step": 33170 }, { "epoch": 1.5222339497957873, "grad_norm": 0.46233025193214417, "learning_rate": 5.041558871717603e-06, "loss": 0.311, "step": 33171 }, { "epoch": 1.5222798403010418, "grad_norm": 0.4323754906654358, "learning_rate": 5.04131369223026e-06, "loss": 0.3058, "step": 33172 }, { "epoch": 1.5223257308062963, "grad_norm": 0.4929503798484802, "learning_rate": 5.041068512643572e-06, "loss": 0.3542, "step": 33173 }, { "epoch": 1.5223716213115508, "grad_norm": 0.4551067650318146, "learning_rate": 5.040823332958128e-06, "loss": 0.3408, "step": 33174 }, { "epoch": 1.522417511816805, "grad_norm": 0.46326974034309387, "learning_rate": 5.040578153174518e-06, "loss": 0.3554, "step": 33175 }, { "epoch": 1.5224634023220596, "grad_norm": 0.46471384167671204, "learning_rate": 5.040332973293328e-06, "loss": 0.3366, "step": 33176 }, { "epoch": 1.522509292827314, "grad_norm": 0.5243381261825562, "learning_rate": 5.040087793315152e-06, "loss": 0.4453, "step": 33177 }, { "epoch": 1.5225551833325683, "grad_norm": 0.4876064658164978, "learning_rate": 5.039842613240574e-06, "loss": 0.3668, "step": 33178 }, { "epoch": 1.5226010738378228, "grad_norm": 0.4409368634223938, "learning_rate": 5.039597433070189e-06, "loss": 0.3536, "step": 33179 }, { "epoch": 1.5226469643430773, "grad_norm": 0.43520769476890564, "learning_rate": 5.039352252804586e-06, "loss": 0.3231, "step": 33180 }, { "epoch": 1.5226928548483318, "grad_norm": 0.4593453109264374, "learning_rate": 5.039107072444353e-06, "loss": 0.3438, "step": 33181 }, { "epoch": 1.5227387453535863, "grad_norm": 0.448498398065567, "learning_rate": 5.038861891990079e-06, "loss": 0.3034, "step": 33182 }, { "epoch": 1.5227846358588408, "grad_norm": 0.44905877113342285, "learning_rate": 5.038616711442354e-06, "loss": 0.3345, "step": 33183 }, { "epoch": 1.5228305263640953, "grad_norm": 0.4815274775028229, "learning_rate": 5.038371530801769e-06, "loss": 0.361, "step": 33184 }, { "epoch": 1.5228764168693498, "grad_norm": 0.4651052951812744, "learning_rate": 5.038126350068912e-06, "loss": 0.3377, "step": 33185 }, { "epoch": 1.5229223073746043, "grad_norm": 0.48631033301353455, "learning_rate": 5.037881169244374e-06, "loss": 0.3258, "step": 33186 }, { "epoch": 1.5229681978798588, "grad_norm": 0.48365604877471924, "learning_rate": 5.037635988328743e-06, "loss": 0.43, "step": 33187 }, { "epoch": 1.523014088385113, "grad_norm": 0.4597786068916321, "learning_rate": 5.03739080732261e-06, "loss": 0.3288, "step": 33188 }, { "epoch": 1.5230599788903676, "grad_norm": 0.447753369808197, "learning_rate": 5.0371456262265616e-06, "loss": 0.3199, "step": 33189 }, { "epoch": 1.523105869395622, "grad_norm": 0.4303513467311859, "learning_rate": 5.036900445041191e-06, "loss": 0.317, "step": 33190 }, { "epoch": 1.5231517599008764, "grad_norm": 0.4908033609390259, "learning_rate": 5.036655263767087e-06, "loss": 0.3649, "step": 33191 }, { "epoch": 1.5231976504061309, "grad_norm": 0.42502832412719727, "learning_rate": 5.03641008240484e-06, "loss": 0.3276, "step": 33192 }, { "epoch": 1.5232435409113854, "grad_norm": 0.4938699007034302, "learning_rate": 5.036164900955036e-06, "loss": 0.3807, "step": 33193 }, { "epoch": 1.5232894314166399, "grad_norm": 0.5009687542915344, "learning_rate": 5.035919719418268e-06, "loss": 0.4001, "step": 33194 }, { "epoch": 1.5233353219218944, "grad_norm": 0.46142950654029846, "learning_rate": 5.0356745377951236e-06, "loss": 0.3323, "step": 33195 }, { "epoch": 1.5233812124271489, "grad_norm": 0.4864007234573364, "learning_rate": 5.0354293560861935e-06, "loss": 0.3347, "step": 33196 }, { "epoch": 1.5234271029324034, "grad_norm": 0.4798075258731842, "learning_rate": 5.035184174292068e-06, "loss": 0.3681, "step": 33197 }, { "epoch": 1.5234729934376579, "grad_norm": 0.5086239576339722, "learning_rate": 5.034938992413334e-06, "loss": 0.3986, "step": 33198 }, { "epoch": 1.5235188839429123, "grad_norm": 0.45581093430519104, "learning_rate": 5.034693810450583e-06, "loss": 0.3113, "step": 33199 }, { "epoch": 1.5235647744481666, "grad_norm": 0.43493783473968506, "learning_rate": 5.034448628404405e-06, "loss": 0.3288, "step": 33200 }, { "epoch": 1.5236106649534211, "grad_norm": 0.475620836019516, "learning_rate": 5.034203446275388e-06, "loss": 0.349, "step": 33201 }, { "epoch": 1.5236565554586756, "grad_norm": 0.4633347690105438, "learning_rate": 5.033958264064124e-06, "loss": 0.3602, "step": 33202 }, { "epoch": 1.52370244596393, "grad_norm": 0.469974547624588, "learning_rate": 5.033713081771201e-06, "loss": 0.3947, "step": 33203 }, { "epoch": 1.5237483364691844, "grad_norm": 0.4608430862426758, "learning_rate": 5.0334678993972075e-06, "loss": 0.3348, "step": 33204 }, { "epoch": 1.523794226974439, "grad_norm": 0.5178831815719604, "learning_rate": 5.033222716942735e-06, "loss": 0.4424, "step": 33205 }, { "epoch": 1.5238401174796934, "grad_norm": 0.5134355425834656, "learning_rate": 5.032977534408372e-06, "loss": 0.41, "step": 33206 }, { "epoch": 1.5238860079849479, "grad_norm": 0.47879335284233093, "learning_rate": 5.0327323517947094e-06, "loss": 0.3531, "step": 33207 }, { "epoch": 1.5239318984902024, "grad_norm": 0.4919058084487915, "learning_rate": 5.032487169102334e-06, "loss": 0.3916, "step": 33208 }, { "epoch": 1.5239777889954569, "grad_norm": 0.44016921520233154, "learning_rate": 5.032241986331838e-06, "loss": 0.2985, "step": 33209 }, { "epoch": 1.5240236795007114, "grad_norm": 0.48360544443130493, "learning_rate": 5.031996803483811e-06, "loss": 0.3909, "step": 33210 }, { "epoch": 1.5240695700059659, "grad_norm": 0.5125842094421387, "learning_rate": 5.031751620558841e-06, "loss": 0.4284, "step": 33211 }, { "epoch": 1.5241154605112204, "grad_norm": 0.4602748453617096, "learning_rate": 5.0315064375575175e-06, "loss": 0.3717, "step": 33212 }, { "epoch": 1.5241613510164747, "grad_norm": 0.5097305774688721, "learning_rate": 5.031261254480433e-06, "loss": 0.4198, "step": 33213 }, { "epoch": 1.5242072415217292, "grad_norm": 0.4983460307121277, "learning_rate": 5.031016071328174e-06, "loss": 0.3966, "step": 33214 }, { "epoch": 1.5242531320269836, "grad_norm": 0.5090299844741821, "learning_rate": 5.030770888101331e-06, "loss": 0.444, "step": 33215 }, { "epoch": 1.524299022532238, "grad_norm": 0.44167542457580566, "learning_rate": 5.030525704800493e-06, "loss": 0.358, "step": 33216 }, { "epoch": 1.5243449130374924, "grad_norm": 0.45539042353630066, "learning_rate": 5.0302805214262514e-06, "loss": 0.3045, "step": 33217 }, { "epoch": 1.524390803542747, "grad_norm": 0.4757000505924225, "learning_rate": 5.030035337979195e-06, "loss": 0.3231, "step": 33218 }, { "epoch": 1.5244366940480014, "grad_norm": 0.46485990285873413, "learning_rate": 5.029790154459914e-06, "loss": 0.3754, "step": 33219 }, { "epoch": 1.524482584553256, "grad_norm": 0.4744590222835541, "learning_rate": 5.029544970868995e-06, "loss": 0.3242, "step": 33220 }, { "epoch": 1.5245284750585104, "grad_norm": 0.45173555612564087, "learning_rate": 5.0292997872070306e-06, "loss": 0.3301, "step": 33221 }, { "epoch": 1.524574365563765, "grad_norm": 0.44999220967292786, "learning_rate": 5.029054603474609e-06, "loss": 0.315, "step": 33222 }, { "epoch": 1.5246202560690194, "grad_norm": 0.47932276129722595, "learning_rate": 5.02880941967232e-06, "loss": 0.4138, "step": 33223 }, { "epoch": 1.524666146574274, "grad_norm": 0.4254818260669708, "learning_rate": 5.028564235800755e-06, "loss": 0.2855, "step": 33224 }, { "epoch": 1.5247120370795284, "grad_norm": 0.477897584438324, "learning_rate": 5.0283190518605e-06, "loss": 0.3753, "step": 33225 }, { "epoch": 1.5247579275847827, "grad_norm": 0.4940219521522522, "learning_rate": 5.028073867852148e-06, "loss": 0.4059, "step": 33226 }, { "epoch": 1.5248038180900372, "grad_norm": 0.4944014847278595, "learning_rate": 5.027828683776286e-06, "loss": 0.3923, "step": 33227 }, { "epoch": 1.5248497085952917, "grad_norm": 0.5033700466156006, "learning_rate": 5.027583499633506e-06, "loss": 0.4049, "step": 33228 }, { "epoch": 1.524895599100546, "grad_norm": 0.5869184136390686, "learning_rate": 5.027338315424397e-06, "loss": 0.3402, "step": 33229 }, { "epoch": 1.5249414896058004, "grad_norm": 0.514778196811676, "learning_rate": 5.0270931311495465e-06, "loss": 0.3018, "step": 33230 }, { "epoch": 1.524987380111055, "grad_norm": 0.48651888966560364, "learning_rate": 5.026847946809547e-06, "loss": 0.3844, "step": 33231 }, { "epoch": 1.5250332706163094, "grad_norm": 0.4721430838108063, "learning_rate": 5.026602762404984e-06, "loss": 0.3633, "step": 33232 }, { "epoch": 1.525079161121564, "grad_norm": 0.4642482399940491, "learning_rate": 5.026357577936451e-06, "loss": 0.3281, "step": 33233 }, { "epoch": 1.5251250516268184, "grad_norm": 0.40732863545417786, "learning_rate": 5.0261123934045374e-06, "loss": 0.2431, "step": 33234 }, { "epoch": 1.525170942132073, "grad_norm": 0.452980101108551, "learning_rate": 5.025867208809831e-06, "loss": 0.3078, "step": 33235 }, { "epoch": 1.5252168326373274, "grad_norm": 0.4706696569919586, "learning_rate": 5.025622024152923e-06, "loss": 0.3403, "step": 33236 }, { "epoch": 1.525262723142582, "grad_norm": 0.623918890953064, "learning_rate": 5.025376839434401e-06, "loss": 0.3699, "step": 33237 }, { "epoch": 1.5253086136478362, "grad_norm": 0.5010150671005249, "learning_rate": 5.025131654654856e-06, "loss": 0.4035, "step": 33238 }, { "epoch": 1.5253545041530907, "grad_norm": 0.4390171468257904, "learning_rate": 5.024886469814878e-06, "loss": 0.3126, "step": 33239 }, { "epoch": 1.5254003946583452, "grad_norm": 0.46011510491371155, "learning_rate": 5.024641284915055e-06, "loss": 0.358, "step": 33240 }, { "epoch": 1.5254462851635997, "grad_norm": 0.5175339579582214, "learning_rate": 5.024396099955977e-06, "loss": 0.3876, "step": 33241 }, { "epoch": 1.525492175668854, "grad_norm": 0.45749351382255554, "learning_rate": 5.024150914938236e-06, "loss": 0.315, "step": 33242 }, { "epoch": 1.5255380661741085, "grad_norm": 0.47081509232521057, "learning_rate": 5.023905729862419e-06, "loss": 0.372, "step": 33243 }, { "epoch": 1.525583956679363, "grad_norm": 0.4796488285064697, "learning_rate": 5.023660544729114e-06, "loss": 0.368, "step": 33244 }, { "epoch": 1.5256298471846175, "grad_norm": 0.4658336639404297, "learning_rate": 5.023415359538915e-06, "loss": 0.3685, "step": 33245 }, { "epoch": 1.525675737689872, "grad_norm": 0.467491090297699, "learning_rate": 5.02317017429241e-06, "loss": 0.3484, "step": 33246 }, { "epoch": 1.5257216281951265, "grad_norm": 0.5008457899093628, "learning_rate": 5.022924988990187e-06, "loss": 0.3518, "step": 33247 }, { "epoch": 1.525767518700381, "grad_norm": 0.4612867534160614, "learning_rate": 5.022679803632836e-06, "loss": 0.356, "step": 33248 }, { "epoch": 1.5258134092056355, "grad_norm": 0.47690388560295105, "learning_rate": 5.022434618220948e-06, "loss": 0.3715, "step": 33249 }, { "epoch": 1.52585929971089, "grad_norm": 0.4424048364162445, "learning_rate": 5.022189432755111e-06, "loss": 0.2964, "step": 33250 }, { "epoch": 1.5259051902161442, "grad_norm": 0.4892492890357971, "learning_rate": 5.021944247235918e-06, "loss": 0.3392, "step": 33251 }, { "epoch": 1.5259510807213987, "grad_norm": 0.49987250566482544, "learning_rate": 5.021699061663954e-06, "loss": 0.4293, "step": 33252 }, { "epoch": 1.5259969712266532, "grad_norm": 0.46126264333724976, "learning_rate": 5.0214538760398115e-06, "loss": 0.3126, "step": 33253 }, { "epoch": 1.5260428617319075, "grad_norm": 0.4869365394115448, "learning_rate": 5.021208690364078e-06, "loss": 0.4293, "step": 33254 }, { "epoch": 1.526088752237162, "grad_norm": 0.4923984408378601, "learning_rate": 5.020963504637344e-06, "loss": 0.3664, "step": 33255 }, { "epoch": 1.5261346427424165, "grad_norm": 0.4820711016654968, "learning_rate": 5.020718318860201e-06, "loss": 0.3787, "step": 33256 }, { "epoch": 1.526180533247671, "grad_norm": 0.49870505928993225, "learning_rate": 5.020473133033235e-06, "loss": 0.3915, "step": 33257 }, { "epoch": 1.5262264237529255, "grad_norm": 0.4556707739830017, "learning_rate": 5.0202279471570396e-06, "loss": 0.3668, "step": 33258 }, { "epoch": 1.52627231425818, "grad_norm": 0.47212743759155273, "learning_rate": 5.019982761232201e-06, "loss": 0.399, "step": 33259 }, { "epoch": 1.5263182047634345, "grad_norm": 0.41015321016311646, "learning_rate": 5.0197375752593115e-06, "loss": 0.2524, "step": 33260 }, { "epoch": 1.526364095268689, "grad_norm": 0.5067545175552368, "learning_rate": 5.019492389238958e-06, "loss": 0.3974, "step": 33261 }, { "epoch": 1.5264099857739435, "grad_norm": 0.435400128364563, "learning_rate": 5.019247203171733e-06, "loss": 0.2743, "step": 33262 }, { "epoch": 1.526455876279198, "grad_norm": 0.44996505975723267, "learning_rate": 5.019002017058223e-06, "loss": 0.3255, "step": 33263 }, { "epoch": 1.5265017667844523, "grad_norm": 0.4678812325000763, "learning_rate": 5.01875683089902e-06, "loss": 0.3779, "step": 33264 }, { "epoch": 1.5265476572897068, "grad_norm": 0.44305774569511414, "learning_rate": 5.0185116446947125e-06, "loss": 0.3178, "step": 33265 }, { "epoch": 1.5265935477949613, "grad_norm": 0.4542619585990906, "learning_rate": 5.01826645844589e-06, "loss": 0.3207, "step": 33266 }, { "epoch": 1.5266394383002155, "grad_norm": 0.44277340173721313, "learning_rate": 5.018021272153143e-06, "loss": 0.2882, "step": 33267 }, { "epoch": 1.52668532880547, "grad_norm": 0.4699394106864929, "learning_rate": 5.017776085817061e-06, "loss": 0.3545, "step": 33268 }, { "epoch": 1.5267312193107245, "grad_norm": 0.39964911341667175, "learning_rate": 5.017530899438231e-06, "loss": 0.2771, "step": 33269 }, { "epoch": 1.526777109815979, "grad_norm": 0.4964044392108917, "learning_rate": 5.017285713017247e-06, "loss": 0.3539, "step": 33270 }, { "epoch": 1.5268230003212335, "grad_norm": 0.4560113847255707, "learning_rate": 5.017040526554695e-06, "loss": 0.3872, "step": 33271 }, { "epoch": 1.526868890826488, "grad_norm": 0.5335861444473267, "learning_rate": 5.016795340051166e-06, "loss": 0.4502, "step": 33272 }, { "epoch": 1.5269147813317425, "grad_norm": 0.4584243595600128, "learning_rate": 5.016550153507251e-06, "loss": 0.3302, "step": 33273 }, { "epoch": 1.526960671836997, "grad_norm": 0.49385321140289307, "learning_rate": 5.016304966923536e-06, "loss": 0.4404, "step": 33274 }, { "epoch": 1.5270065623422515, "grad_norm": 0.516526460647583, "learning_rate": 5.016059780300614e-06, "loss": 0.3891, "step": 33275 }, { "epoch": 1.527052452847506, "grad_norm": 0.4815191328525543, "learning_rate": 5.015814593639073e-06, "loss": 0.3725, "step": 33276 }, { "epoch": 1.5270983433527603, "grad_norm": 0.46832793951034546, "learning_rate": 5.015569406939502e-06, "loss": 0.3308, "step": 33277 }, { "epoch": 1.5271442338580148, "grad_norm": 0.5530183911323547, "learning_rate": 5.015324220202493e-06, "loss": 0.3361, "step": 33278 }, { "epoch": 1.5271901243632693, "grad_norm": 0.4593767821788788, "learning_rate": 5.0150790334286335e-06, "loss": 0.3135, "step": 33279 }, { "epoch": 1.5272360148685236, "grad_norm": 0.413675457239151, "learning_rate": 5.0148338466185145e-06, "loss": 0.2705, "step": 33280 }, { "epoch": 1.527281905373778, "grad_norm": 0.44036993384361267, "learning_rate": 5.014588659772723e-06, "loss": 0.3465, "step": 33281 }, { "epoch": 1.5273277958790326, "grad_norm": 0.47713518142700195, "learning_rate": 5.014343472891852e-06, "loss": 0.3678, "step": 33282 }, { "epoch": 1.527373686384287, "grad_norm": 0.44120803475379944, "learning_rate": 5.014098285976487e-06, "loss": 0.3206, "step": 33283 }, { "epoch": 1.5274195768895416, "grad_norm": 0.4527011215686798, "learning_rate": 5.013853099027223e-06, "loss": 0.3285, "step": 33284 }, { "epoch": 1.527465467394796, "grad_norm": 0.536106526851654, "learning_rate": 5.013607912044646e-06, "loss": 0.4905, "step": 33285 }, { "epoch": 1.5275113579000505, "grad_norm": 0.44855910539627075, "learning_rate": 5.013362725029345e-06, "loss": 0.34, "step": 33286 }, { "epoch": 1.527557248405305, "grad_norm": 0.47430697083473206, "learning_rate": 5.013117537981912e-06, "loss": 0.4075, "step": 33287 }, { "epoch": 1.5276031389105595, "grad_norm": 0.4752584397792816, "learning_rate": 5.012872350902934e-06, "loss": 0.3565, "step": 33288 }, { "epoch": 1.5276490294158138, "grad_norm": 0.47045838832855225, "learning_rate": 5.0126271637930046e-06, "loss": 0.342, "step": 33289 }, { "epoch": 1.5276949199210683, "grad_norm": 0.5077087879180908, "learning_rate": 5.0123819766527095e-06, "loss": 0.4233, "step": 33290 }, { "epoch": 1.5277408104263228, "grad_norm": 0.4747716188430786, "learning_rate": 5.012136789482639e-06, "loss": 0.2936, "step": 33291 }, { "epoch": 1.527786700931577, "grad_norm": 0.5030711889266968, "learning_rate": 5.011891602283383e-06, "loss": 0.3794, "step": 33292 }, { "epoch": 1.5278325914368316, "grad_norm": 0.45925700664520264, "learning_rate": 5.011646415055533e-06, "loss": 0.33, "step": 33293 }, { "epoch": 1.527878481942086, "grad_norm": 0.5080087780952454, "learning_rate": 5.011401227799676e-06, "loss": 0.3775, "step": 33294 }, { "epoch": 1.5279243724473406, "grad_norm": 0.49982157349586487, "learning_rate": 5.011156040516404e-06, "loss": 0.3761, "step": 33295 }, { "epoch": 1.527970262952595, "grad_norm": 0.46139052510261536, "learning_rate": 5.010910853206304e-06, "loss": 0.3649, "step": 33296 }, { "epoch": 1.5280161534578496, "grad_norm": 0.45476680994033813, "learning_rate": 5.010665665869967e-06, "loss": 0.318, "step": 33297 }, { "epoch": 1.528062043963104, "grad_norm": 0.43298497796058655, "learning_rate": 5.010420478507981e-06, "loss": 0.2903, "step": 33298 }, { "epoch": 1.5281079344683586, "grad_norm": 0.4857610762119293, "learning_rate": 5.01017529112094e-06, "loss": 0.3567, "step": 33299 }, { "epoch": 1.528153824973613, "grad_norm": 0.5234953165054321, "learning_rate": 5.00993010370943e-06, "loss": 0.458, "step": 33300 }, { "epoch": 1.5281997154788676, "grad_norm": 0.4120674133300781, "learning_rate": 5.00968491627404e-06, "loss": 0.2565, "step": 33301 }, { "epoch": 1.5282456059841218, "grad_norm": 0.45423442125320435, "learning_rate": 5.0094397288153615e-06, "loss": 0.3514, "step": 33302 }, { "epoch": 1.5282914964893763, "grad_norm": 0.46962639689445496, "learning_rate": 5.009194541333982e-06, "loss": 0.3324, "step": 33303 }, { "epoch": 1.5283373869946308, "grad_norm": 0.48889923095703125, "learning_rate": 5.008949353830495e-06, "loss": 0.3592, "step": 33304 }, { "epoch": 1.5283832774998851, "grad_norm": 0.437005877494812, "learning_rate": 5.008704166305486e-06, "loss": 0.3053, "step": 33305 }, { "epoch": 1.5284291680051396, "grad_norm": 0.5269743204116821, "learning_rate": 5.008458978759548e-06, "loss": 0.4889, "step": 33306 }, { "epoch": 1.5284750585103941, "grad_norm": 0.4587824046611786, "learning_rate": 5.008213791193267e-06, "loss": 0.3495, "step": 33307 }, { "epoch": 1.5285209490156486, "grad_norm": 0.49283888936042786, "learning_rate": 5.0079686036072336e-06, "loss": 0.3453, "step": 33308 }, { "epoch": 1.528566839520903, "grad_norm": 0.4359007477760315, "learning_rate": 5.007723416002039e-06, "loss": 0.2895, "step": 33309 }, { "epoch": 1.5286127300261576, "grad_norm": 0.467785120010376, "learning_rate": 5.007478228378273e-06, "loss": 0.353, "step": 33310 }, { "epoch": 1.528658620531412, "grad_norm": 0.48227131366729736, "learning_rate": 5.0072330407365245e-06, "loss": 0.3926, "step": 33311 }, { "epoch": 1.5287045110366666, "grad_norm": 0.47962939739227295, "learning_rate": 5.006987853077382e-06, "loss": 0.3826, "step": 33312 }, { "epoch": 1.528750401541921, "grad_norm": 0.47314366698265076, "learning_rate": 5.006742665401436e-06, "loss": 0.3542, "step": 33313 }, { "epoch": 1.5287962920471756, "grad_norm": 0.5658924579620361, "learning_rate": 5.006497477709275e-06, "loss": 0.3853, "step": 33314 }, { "epoch": 1.5288421825524299, "grad_norm": 0.46082544326782227, "learning_rate": 5.006252290001491e-06, "loss": 0.3292, "step": 33315 }, { "epoch": 1.5288880730576844, "grad_norm": 0.4744125306606293, "learning_rate": 5.006007102278672e-06, "loss": 0.3721, "step": 33316 }, { "epoch": 1.5289339635629389, "grad_norm": 0.4767659604549408, "learning_rate": 5.005761914541408e-06, "loss": 0.3277, "step": 33317 }, { "epoch": 1.5289798540681931, "grad_norm": 0.5028297305107117, "learning_rate": 5.005516726790288e-06, "loss": 0.3879, "step": 33318 }, { "epoch": 1.5290257445734476, "grad_norm": 0.4611974060535431, "learning_rate": 5.005271539025902e-06, "loss": 0.3367, "step": 33319 }, { "epoch": 1.5290716350787021, "grad_norm": 0.49858367443084717, "learning_rate": 5.0050263512488396e-06, "loss": 0.4284, "step": 33320 }, { "epoch": 1.5291175255839566, "grad_norm": 0.4822252094745636, "learning_rate": 5.004781163459691e-06, "loss": 0.3776, "step": 33321 }, { "epoch": 1.5291634160892111, "grad_norm": 0.49153226613998413, "learning_rate": 5.004535975659045e-06, "loss": 0.3953, "step": 33322 }, { "epoch": 1.5292093065944656, "grad_norm": 0.4621138870716095, "learning_rate": 5.00429078784749e-06, "loss": 0.3597, "step": 33323 }, { "epoch": 1.5292551970997201, "grad_norm": 0.48682576417922974, "learning_rate": 5.004045600025619e-06, "loss": 0.394, "step": 33324 }, { "epoch": 1.5293010876049746, "grad_norm": 0.4705032408237457, "learning_rate": 5.0038004121940185e-06, "loss": 0.3457, "step": 33325 }, { "epoch": 1.5293469781102291, "grad_norm": 0.47932180762290955, "learning_rate": 5.003555224353278e-06, "loss": 0.3776, "step": 33326 }, { "epoch": 1.5293928686154834, "grad_norm": 0.47892990708351135, "learning_rate": 5.003310036503991e-06, "loss": 0.3615, "step": 33327 }, { "epoch": 1.529438759120738, "grad_norm": 0.4796202778816223, "learning_rate": 5.0030648486467435e-06, "loss": 0.3597, "step": 33328 }, { "epoch": 1.5294846496259924, "grad_norm": 0.48022696375846863, "learning_rate": 5.002819660782125e-06, "loss": 0.3787, "step": 33329 }, { "epoch": 1.529530540131247, "grad_norm": 0.4858448803424835, "learning_rate": 5.002574472910727e-06, "loss": 0.3676, "step": 33330 }, { "epoch": 1.5295764306365012, "grad_norm": 0.5513169765472412, "learning_rate": 5.002329285033137e-06, "loss": 0.4413, "step": 33331 }, { "epoch": 1.5296223211417557, "grad_norm": 0.44000500440597534, "learning_rate": 5.002084097149949e-06, "loss": 0.2916, "step": 33332 }, { "epoch": 1.5296682116470102, "grad_norm": 0.49267736077308655, "learning_rate": 5.001838909261747e-06, "loss": 0.408, "step": 33333 }, { "epoch": 1.5297141021522647, "grad_norm": 0.4637821614742279, "learning_rate": 5.001593721369123e-06, "loss": 0.3576, "step": 33334 }, { "epoch": 1.5297599926575192, "grad_norm": 0.43816161155700684, "learning_rate": 5.001348533472666e-06, "loss": 0.3423, "step": 33335 }, { "epoch": 1.5298058831627737, "grad_norm": 0.46674075722694397, "learning_rate": 5.001103345572967e-06, "loss": 0.3229, "step": 33336 }, { "epoch": 1.5298517736680282, "grad_norm": 0.47178855538368225, "learning_rate": 5.000858157670614e-06, "loss": 0.3874, "step": 33337 }, { "epoch": 1.5298976641732827, "grad_norm": 0.47776514291763306, "learning_rate": 5.000612969766199e-06, "loss": 0.3899, "step": 33338 }, { "epoch": 1.5299435546785372, "grad_norm": 0.4624825119972229, "learning_rate": 5.0003677818603105e-06, "loss": 0.3463, "step": 33339 }, { "epoch": 1.5299894451837914, "grad_norm": 0.41650810837745667, "learning_rate": 5.0001225939535345e-06, "loss": 0.2644, "step": 33340 }, { "epoch": 1.530035335689046, "grad_norm": 0.4418177306652069, "learning_rate": 4.999877406046466e-06, "loss": 0.3218, "step": 33341 }, { "epoch": 1.5300812261943004, "grad_norm": 0.5067244172096252, "learning_rate": 4.999632218139693e-06, "loss": 0.4056, "step": 33342 }, { "epoch": 1.5301271166995547, "grad_norm": 0.4835446774959564, "learning_rate": 4.999387030233803e-06, "loss": 0.3032, "step": 33343 }, { "epoch": 1.5301730072048092, "grad_norm": 0.4892168641090393, "learning_rate": 4.999141842329387e-06, "loss": 0.4081, "step": 33344 }, { "epoch": 1.5302188977100637, "grad_norm": 0.4792460501194, "learning_rate": 4.9988966544270355e-06, "loss": 0.3712, "step": 33345 }, { "epoch": 1.5302647882153182, "grad_norm": 0.5082064270973206, "learning_rate": 4.998651466527335e-06, "loss": 0.4559, "step": 33346 }, { "epoch": 1.5303106787205727, "grad_norm": 0.47889474034309387, "learning_rate": 4.998406278630879e-06, "loss": 0.3725, "step": 33347 }, { "epoch": 1.5303565692258272, "grad_norm": 0.48877692222595215, "learning_rate": 4.998161090738255e-06, "loss": 0.4291, "step": 33348 }, { "epoch": 1.5304024597310817, "grad_norm": 0.49434706568717957, "learning_rate": 4.997915902850054e-06, "loss": 0.34, "step": 33349 }, { "epoch": 1.5304483502363362, "grad_norm": 0.5326569080352783, "learning_rate": 4.9976707149668634e-06, "loss": 0.4728, "step": 33350 }, { "epoch": 1.5304942407415907, "grad_norm": 0.46771886944770813, "learning_rate": 4.997425527089275e-06, "loss": 0.3932, "step": 33351 }, { "epoch": 1.5305401312468452, "grad_norm": 0.47147440910339355, "learning_rate": 4.997180339217876e-06, "loss": 0.3661, "step": 33352 }, { "epoch": 1.5305860217520995, "grad_norm": 0.44111692905426025, "learning_rate": 4.996935151353257e-06, "loss": 0.2915, "step": 33353 }, { "epoch": 1.530631912257354, "grad_norm": 0.45354127883911133, "learning_rate": 4.996689963496011e-06, "loss": 0.2927, "step": 33354 }, { "epoch": 1.5306778027626085, "grad_norm": 0.4173949360847473, "learning_rate": 4.996444775646722e-06, "loss": 0.3002, "step": 33355 }, { "epoch": 1.5307236932678627, "grad_norm": 0.47706976532936096, "learning_rate": 4.996199587805983e-06, "loss": 0.4036, "step": 33356 }, { "epoch": 1.5307695837731172, "grad_norm": 0.4859236776828766, "learning_rate": 4.995954399974384e-06, "loss": 0.4167, "step": 33357 }, { "epoch": 1.5308154742783717, "grad_norm": 0.4598395824432373, "learning_rate": 4.995709212152509e-06, "loss": 0.3502, "step": 33358 }, { "epoch": 1.5308613647836262, "grad_norm": 0.5090404748916626, "learning_rate": 4.995464024340956e-06, "loss": 0.4159, "step": 33359 }, { "epoch": 1.5309072552888807, "grad_norm": 0.43861424922943115, "learning_rate": 4.995218836540311e-06, "loss": 0.3206, "step": 33360 }, { "epoch": 1.5309531457941352, "grad_norm": 0.4799749553203583, "learning_rate": 4.994973648751161e-06, "loss": 0.4103, "step": 33361 }, { "epoch": 1.5309990362993897, "grad_norm": 0.44542351365089417, "learning_rate": 4.994728460974099e-06, "loss": 0.3232, "step": 33362 }, { "epoch": 1.5310449268046442, "grad_norm": 0.47915101051330566, "learning_rate": 4.9944832732097135e-06, "loss": 0.4032, "step": 33363 }, { "epoch": 1.5310908173098987, "grad_norm": 0.4975765347480774, "learning_rate": 4.994238085458592e-06, "loss": 0.3773, "step": 33364 }, { "epoch": 1.5311367078151532, "grad_norm": 0.4953908920288086, "learning_rate": 4.993992897721329e-06, "loss": 0.3833, "step": 33365 }, { "epoch": 1.5311825983204075, "grad_norm": 0.4649415612220764, "learning_rate": 4.993747709998511e-06, "loss": 0.3534, "step": 33366 }, { "epoch": 1.531228488825662, "grad_norm": 0.4955553710460663, "learning_rate": 4.993502522290725e-06, "loss": 0.4145, "step": 33367 }, { "epoch": 1.5312743793309165, "grad_norm": 0.47564664483070374, "learning_rate": 4.993257334598565e-06, "loss": 0.3509, "step": 33368 }, { "epoch": 1.5313202698361708, "grad_norm": 0.4572297930717468, "learning_rate": 4.99301214692262e-06, "loss": 0.3175, "step": 33369 }, { "epoch": 1.5313661603414253, "grad_norm": 0.4961814284324646, "learning_rate": 4.992766959263477e-06, "loss": 0.3761, "step": 33370 }, { "epoch": 1.5314120508466798, "grad_norm": 0.4717697203159332, "learning_rate": 4.992521771621728e-06, "loss": 0.3824, "step": 33371 }, { "epoch": 1.5314579413519342, "grad_norm": 0.45403072237968445, "learning_rate": 4.992276583997962e-06, "loss": 0.3045, "step": 33372 }, { "epoch": 1.5315038318571887, "grad_norm": 0.49734988808631897, "learning_rate": 4.992031396392766e-06, "loss": 0.429, "step": 33373 }, { "epoch": 1.5315497223624432, "grad_norm": 0.49346253275871277, "learning_rate": 4.991786208806735e-06, "loss": 0.3808, "step": 33374 }, { "epoch": 1.5315956128676977, "grad_norm": 0.44582995772361755, "learning_rate": 4.991541021240455e-06, "loss": 0.3086, "step": 33375 }, { "epoch": 1.5316415033729522, "grad_norm": 0.44004857540130615, "learning_rate": 4.991295833694516e-06, "loss": 0.3515, "step": 33376 }, { "epoch": 1.5316873938782067, "grad_norm": 0.5284396409988403, "learning_rate": 4.991050646169506e-06, "loss": 0.4389, "step": 33377 }, { "epoch": 1.531733284383461, "grad_norm": 0.41716140508651733, "learning_rate": 4.9908054586660184e-06, "loss": 0.2475, "step": 33378 }, { "epoch": 1.5317791748887155, "grad_norm": 0.4962914288043976, "learning_rate": 4.990560271184641e-06, "loss": 0.3886, "step": 33379 }, { "epoch": 1.53182506539397, "grad_norm": 0.46365785598754883, "learning_rate": 4.990315083725961e-06, "loss": 0.3376, "step": 33380 }, { "epoch": 1.5318709558992243, "grad_norm": 0.5083673000335693, "learning_rate": 4.990069896290572e-06, "loss": 0.4284, "step": 33381 }, { "epoch": 1.5319168464044788, "grad_norm": 0.44102224707603455, "learning_rate": 4.989824708879061e-06, "loss": 0.295, "step": 33382 }, { "epoch": 1.5319627369097333, "grad_norm": 0.562372088432312, "learning_rate": 4.989579521492019e-06, "loss": 0.4321, "step": 33383 }, { "epoch": 1.5320086274149878, "grad_norm": 0.4857560694217682, "learning_rate": 4.989334334130034e-06, "loss": 0.3881, "step": 33384 }, { "epoch": 1.5320545179202423, "grad_norm": 0.43600326776504517, "learning_rate": 4.989089146793698e-06, "loss": 0.2853, "step": 33385 }, { "epoch": 1.5321004084254968, "grad_norm": 0.5014874339103699, "learning_rate": 4.988843959483597e-06, "loss": 0.4352, "step": 33386 }, { "epoch": 1.5321462989307513, "grad_norm": 0.4651113450527191, "learning_rate": 4.988598772200325e-06, "loss": 0.3544, "step": 33387 }, { "epoch": 1.5321921894360058, "grad_norm": 0.43831777572631836, "learning_rate": 4.988353584944469e-06, "loss": 0.3054, "step": 33388 }, { "epoch": 1.5322380799412603, "grad_norm": 0.4669461250305176, "learning_rate": 4.988108397716617e-06, "loss": 0.3663, "step": 33389 }, { "epoch": 1.5322839704465148, "grad_norm": 0.4665217399597168, "learning_rate": 4.987863210517362e-06, "loss": 0.334, "step": 33390 }, { "epoch": 1.532329860951769, "grad_norm": 0.46735626459121704, "learning_rate": 4.987618023347293e-06, "loss": 0.3623, "step": 33391 }, { "epoch": 1.5323757514570235, "grad_norm": 0.4254129230976105, "learning_rate": 4.987372836206997e-06, "loss": 0.2881, "step": 33392 }, { "epoch": 1.532421641962278, "grad_norm": 0.49478113651275635, "learning_rate": 4.9871276490970665e-06, "loss": 0.3741, "step": 33393 }, { "epoch": 1.5324675324675323, "grad_norm": 0.5142353773117065, "learning_rate": 4.986882462018091e-06, "loss": 0.3573, "step": 33394 }, { "epoch": 1.5325134229727868, "grad_norm": 0.4721316397190094, "learning_rate": 4.986637274970655e-06, "loss": 0.383, "step": 33395 }, { "epoch": 1.5325593134780413, "grad_norm": 0.47167855501174927, "learning_rate": 4.986392087955356e-06, "loss": 0.3489, "step": 33396 }, { "epoch": 1.5326052039832958, "grad_norm": 0.42862963676452637, "learning_rate": 4.986146900972778e-06, "loss": 0.3075, "step": 33397 }, { "epoch": 1.5326510944885503, "grad_norm": 0.45063650608062744, "learning_rate": 4.985901714023513e-06, "loss": 0.3736, "step": 33398 }, { "epoch": 1.5326969849938048, "grad_norm": 0.5179423689842224, "learning_rate": 4.98565652710815e-06, "loss": 0.4584, "step": 33399 }, { "epoch": 1.5327428754990593, "grad_norm": 0.499192476272583, "learning_rate": 4.985411340227279e-06, "loss": 0.3778, "step": 33400 }, { "epoch": 1.5327887660043138, "grad_norm": 0.4588735103607178, "learning_rate": 4.985166153381486e-06, "loss": 0.3006, "step": 33401 }, { "epoch": 1.5328346565095683, "grad_norm": 0.47090548276901245, "learning_rate": 4.984920966571367e-06, "loss": 0.3517, "step": 33402 }, { "epoch": 1.5328805470148228, "grad_norm": 0.4428320527076721, "learning_rate": 4.984675779797508e-06, "loss": 0.2874, "step": 33403 }, { "epoch": 1.532926437520077, "grad_norm": 0.47406089305877686, "learning_rate": 4.984430593060498e-06, "loss": 0.3309, "step": 33404 }, { "epoch": 1.5329723280253316, "grad_norm": 0.47259098291397095, "learning_rate": 4.984185406360929e-06, "loss": 0.3811, "step": 33405 }, { "epoch": 1.533018218530586, "grad_norm": 0.4673413932323456, "learning_rate": 4.983940219699388e-06, "loss": 0.3007, "step": 33406 }, { "epoch": 1.5330641090358403, "grad_norm": 0.46608853340148926, "learning_rate": 4.983695033076466e-06, "loss": 0.3262, "step": 33407 }, { "epoch": 1.5331099995410948, "grad_norm": 0.4304957091808319, "learning_rate": 4.98344984649275e-06, "loss": 0.3045, "step": 33408 }, { "epoch": 1.5331558900463493, "grad_norm": 0.48709484934806824, "learning_rate": 4.983204659948836e-06, "loss": 0.3583, "step": 33409 }, { "epoch": 1.5332017805516038, "grad_norm": 0.4840950667858124, "learning_rate": 4.9829594734453075e-06, "loss": 0.3896, "step": 33410 }, { "epoch": 1.5332476710568583, "grad_norm": 0.5038446187973022, "learning_rate": 4.982714286982754e-06, "loss": 0.3546, "step": 33411 }, { "epoch": 1.5332935615621128, "grad_norm": 0.48782816529273987, "learning_rate": 4.9824691005617695e-06, "loss": 0.3755, "step": 33412 }, { "epoch": 1.5333394520673673, "grad_norm": 0.4720396101474762, "learning_rate": 4.982223914182942e-06, "loss": 0.3112, "step": 33413 }, { "epoch": 1.5333853425726218, "grad_norm": 0.49455755949020386, "learning_rate": 4.981978727846859e-06, "loss": 0.397, "step": 33414 }, { "epoch": 1.5334312330778763, "grad_norm": 0.44888776540756226, "learning_rate": 4.981733541554112e-06, "loss": 0.3339, "step": 33415 }, { "epoch": 1.5334771235831306, "grad_norm": 0.49241727590560913, "learning_rate": 4.98148835530529e-06, "loss": 0.3465, "step": 33416 }, { "epoch": 1.533523014088385, "grad_norm": 0.49762701988220215, "learning_rate": 4.981243169100981e-06, "loss": 0.3642, "step": 33417 }, { "epoch": 1.5335689045936396, "grad_norm": 0.5293100476264954, "learning_rate": 4.980997982941779e-06, "loss": 0.3965, "step": 33418 }, { "epoch": 1.533614795098894, "grad_norm": 0.5477155447006226, "learning_rate": 4.98075279682827e-06, "loss": 0.4733, "step": 33419 }, { "epoch": 1.5336606856041484, "grad_norm": 0.47349607944488525, "learning_rate": 4.9805076107610425e-06, "loss": 0.3271, "step": 33420 }, { "epoch": 1.5337065761094029, "grad_norm": 0.514231264591217, "learning_rate": 4.98026242474069e-06, "loss": 0.3945, "step": 33421 }, { "epoch": 1.5337524666146574, "grad_norm": 0.5172644257545471, "learning_rate": 4.9800172387678005e-06, "loss": 0.4287, "step": 33422 }, { "epoch": 1.5337983571199119, "grad_norm": 0.4736723303794861, "learning_rate": 4.979772052842961e-06, "loss": 0.3518, "step": 33423 }, { "epoch": 1.5338442476251664, "grad_norm": 0.4812438189983368, "learning_rate": 4.9795268669667655e-06, "loss": 0.3316, "step": 33424 }, { "epoch": 1.5338901381304209, "grad_norm": 0.4548267424106598, "learning_rate": 4.979281681139801e-06, "loss": 0.3185, "step": 33425 }, { "epoch": 1.5339360286356754, "grad_norm": 0.49130624532699585, "learning_rate": 4.979036495362656e-06, "loss": 0.3595, "step": 33426 }, { "epoch": 1.5339819191409299, "grad_norm": 0.45011934638023376, "learning_rate": 4.978791309635924e-06, "loss": 0.3027, "step": 33427 }, { "epoch": 1.5340278096461843, "grad_norm": 0.46906059980392456, "learning_rate": 4.978546123960192e-06, "loss": 0.3664, "step": 33428 }, { "epoch": 1.5340737001514386, "grad_norm": 0.4785364866256714, "learning_rate": 4.978300938336047e-06, "loss": 0.3607, "step": 33429 }, { "epoch": 1.5341195906566931, "grad_norm": 0.5014561414718628, "learning_rate": 4.978055752764083e-06, "loss": 0.3961, "step": 33430 }, { "epoch": 1.5341654811619476, "grad_norm": 0.4950072467327118, "learning_rate": 4.97781056724489e-06, "loss": 0.3964, "step": 33431 }, { "epoch": 1.534211371667202, "grad_norm": 0.48121893405914307, "learning_rate": 4.977565381779053e-06, "loss": 0.392, "step": 33432 }, { "epoch": 1.5342572621724564, "grad_norm": 0.46876922249794006, "learning_rate": 4.977320196367165e-06, "loss": 0.3399, "step": 33433 }, { "epoch": 1.534303152677711, "grad_norm": 0.4675857722759247, "learning_rate": 4.977075011009816e-06, "loss": 0.361, "step": 33434 }, { "epoch": 1.5343490431829654, "grad_norm": 0.4779052138328552, "learning_rate": 4.976829825707592e-06, "loss": 0.3653, "step": 33435 }, { "epoch": 1.5343949336882199, "grad_norm": 0.492640882730484, "learning_rate": 4.976584640461086e-06, "loss": 0.413, "step": 33436 }, { "epoch": 1.5344408241934744, "grad_norm": 0.5001112222671509, "learning_rate": 4.976339455270888e-06, "loss": 0.4531, "step": 33437 }, { "epoch": 1.5344867146987289, "grad_norm": 0.44833678007125854, "learning_rate": 4.976094270137582e-06, "loss": 0.2971, "step": 33438 }, { "epoch": 1.5345326052039834, "grad_norm": 0.48467087745666504, "learning_rate": 4.975849085061766e-06, "loss": 0.4089, "step": 33439 }, { "epoch": 1.5345784957092379, "grad_norm": 0.4982539117336273, "learning_rate": 4.975603900044024e-06, "loss": 0.4146, "step": 33440 }, { "epoch": 1.5346243862144924, "grad_norm": 0.49855053424835205, "learning_rate": 4.975358715084947e-06, "loss": 0.4076, "step": 33441 }, { "epoch": 1.5346702767197467, "grad_norm": 0.5264787077903748, "learning_rate": 4.975113530185124e-06, "loss": 0.4335, "step": 33442 }, { "epoch": 1.5347161672250011, "grad_norm": 0.48854929208755493, "learning_rate": 4.974868345345146e-06, "loss": 0.366, "step": 33443 }, { "epoch": 1.5347620577302556, "grad_norm": 0.47693586349487305, "learning_rate": 4.974623160565601e-06, "loss": 0.3906, "step": 33444 }, { "epoch": 1.53480794823551, "grad_norm": 0.4990052282810211, "learning_rate": 4.9743779758470774e-06, "loss": 0.3768, "step": 33445 }, { "epoch": 1.5348538387407644, "grad_norm": 0.4892053008079529, "learning_rate": 4.9741327911901695e-06, "loss": 0.3533, "step": 33446 }, { "epoch": 1.534899729246019, "grad_norm": 0.4960637390613556, "learning_rate": 4.973887606595464e-06, "loss": 0.3822, "step": 33447 }, { "epoch": 1.5349456197512734, "grad_norm": 0.46830883622169495, "learning_rate": 4.9736424220635496e-06, "loss": 0.3663, "step": 33448 }, { "epoch": 1.534991510256528, "grad_norm": 0.4579513669013977, "learning_rate": 4.9733972375950175e-06, "loss": 0.3515, "step": 33449 }, { "epoch": 1.5350374007617824, "grad_norm": 0.5010212063789368, "learning_rate": 4.973152053190457e-06, "loss": 0.3808, "step": 33450 }, { "epoch": 1.535083291267037, "grad_norm": 0.438052773475647, "learning_rate": 4.972906868850454e-06, "loss": 0.2938, "step": 33451 }, { "epoch": 1.5351291817722914, "grad_norm": 0.4766504764556885, "learning_rate": 4.972661684575605e-06, "loss": 0.3363, "step": 33452 }, { "epoch": 1.535175072277546, "grad_norm": 0.4499192535877228, "learning_rate": 4.972416500366496e-06, "loss": 0.3017, "step": 33453 }, { "epoch": 1.5352209627828004, "grad_norm": 0.4784931242465973, "learning_rate": 4.972171316223714e-06, "loss": 0.3567, "step": 33454 }, { "epoch": 1.5352668532880547, "grad_norm": 0.5144932270050049, "learning_rate": 4.971926132147853e-06, "loss": 0.4282, "step": 33455 }, { "epoch": 1.5353127437933092, "grad_norm": 0.4516366422176361, "learning_rate": 4.971680948139501e-06, "loss": 0.3523, "step": 33456 }, { "epoch": 1.5353586342985637, "grad_norm": 0.4766368865966797, "learning_rate": 4.971435764199247e-06, "loss": 0.3099, "step": 33457 }, { "epoch": 1.535404524803818, "grad_norm": 0.4583054482936859, "learning_rate": 4.9711905803276815e-06, "loss": 0.3359, "step": 33458 }, { "epoch": 1.5354504153090724, "grad_norm": 0.4670715034008026, "learning_rate": 4.970945396525393e-06, "loss": 0.3649, "step": 33459 }, { "epoch": 1.535496305814327, "grad_norm": 0.4758431613445282, "learning_rate": 4.97070021279297e-06, "loss": 0.3384, "step": 33460 }, { "epoch": 1.5355421963195814, "grad_norm": 0.4251309037208557, "learning_rate": 4.970455029131006e-06, "loss": 0.2563, "step": 33461 }, { "epoch": 1.535588086824836, "grad_norm": 0.4490491449832916, "learning_rate": 4.970209845540089e-06, "loss": 0.2967, "step": 33462 }, { "epoch": 1.5356339773300904, "grad_norm": 0.5228750705718994, "learning_rate": 4.9699646620208054e-06, "loss": 0.404, "step": 33463 }, { "epoch": 1.535679867835345, "grad_norm": 0.45339563488960266, "learning_rate": 4.969719478573749e-06, "loss": 0.3601, "step": 33464 }, { "epoch": 1.5357257583405994, "grad_norm": 0.4756806790828705, "learning_rate": 4.969474295199508e-06, "loss": 0.3664, "step": 33465 }, { "epoch": 1.535771648845854, "grad_norm": 0.500033438205719, "learning_rate": 4.969229111898669e-06, "loss": 0.3748, "step": 33466 }, { "epoch": 1.5358175393511082, "grad_norm": 0.49815234541893005, "learning_rate": 4.968983928671827e-06, "loss": 0.3906, "step": 33467 }, { "epoch": 1.5358634298563627, "grad_norm": 0.4770985245704651, "learning_rate": 4.968738745519568e-06, "loss": 0.313, "step": 33468 }, { "epoch": 1.5359093203616172, "grad_norm": 0.4826403856277466, "learning_rate": 4.9684935624424825e-06, "loss": 0.3428, "step": 33469 }, { "epoch": 1.5359552108668715, "grad_norm": 0.4563618302345276, "learning_rate": 4.9682483794411606e-06, "loss": 0.3729, "step": 33470 }, { "epoch": 1.536001101372126, "grad_norm": 0.475603848695755, "learning_rate": 4.968003196516191e-06, "loss": 0.3406, "step": 33471 }, { "epoch": 1.5360469918773805, "grad_norm": 0.4416847229003906, "learning_rate": 4.967758013668164e-06, "loss": 0.2676, "step": 33472 }, { "epoch": 1.536092882382635, "grad_norm": 0.4959782361984253, "learning_rate": 4.9675128308976665e-06, "loss": 0.4389, "step": 33473 }, { "epoch": 1.5361387728878895, "grad_norm": 0.4590708911418915, "learning_rate": 4.967267648205293e-06, "loss": 0.3322, "step": 33474 }, { "epoch": 1.536184663393144, "grad_norm": 0.48880016803741455, "learning_rate": 4.967022465591631e-06, "loss": 0.4122, "step": 33475 }, { "epoch": 1.5362305538983985, "grad_norm": 0.4766637682914734, "learning_rate": 4.9667772830572665e-06, "loss": 0.333, "step": 33476 }, { "epoch": 1.536276444403653, "grad_norm": 0.4620327651500702, "learning_rate": 4.966532100602794e-06, "loss": 0.2889, "step": 33477 }, { "epoch": 1.5363223349089075, "grad_norm": 0.4541313052177429, "learning_rate": 4.966286918228802e-06, "loss": 0.3651, "step": 33478 }, { "epoch": 1.536368225414162, "grad_norm": 0.4923425614833832, "learning_rate": 4.966041735935877e-06, "loss": 0.3885, "step": 33479 }, { "epoch": 1.5364141159194162, "grad_norm": 0.45403826236724854, "learning_rate": 4.9657965537246134e-06, "loss": 0.3027, "step": 33480 }, { "epoch": 1.5364600064246707, "grad_norm": 0.4706946611404419, "learning_rate": 4.965551371595597e-06, "loss": 0.3707, "step": 33481 }, { "epoch": 1.5365058969299252, "grad_norm": 0.48963186144828796, "learning_rate": 4.965306189549418e-06, "loss": 0.4482, "step": 33482 }, { "epoch": 1.5365517874351795, "grad_norm": 0.4674972891807556, "learning_rate": 4.9650610075866675e-06, "loss": 0.3471, "step": 33483 }, { "epoch": 1.536597677940434, "grad_norm": 0.46600818634033203, "learning_rate": 4.9648158257079345e-06, "loss": 0.3787, "step": 33484 }, { "epoch": 1.5366435684456885, "grad_norm": 0.475747287273407, "learning_rate": 4.964570643913807e-06, "loss": 0.4019, "step": 33485 }, { "epoch": 1.536689458950943, "grad_norm": 0.4632922410964966, "learning_rate": 4.964325462204878e-06, "loss": 0.3427, "step": 33486 }, { "epoch": 1.5367353494561975, "grad_norm": 0.4941183626651764, "learning_rate": 4.964080280581735e-06, "loss": 0.3473, "step": 33487 }, { "epoch": 1.536781239961452, "grad_norm": 0.48881468176841736, "learning_rate": 4.963835099044965e-06, "loss": 0.3417, "step": 33488 }, { "epoch": 1.5368271304667065, "grad_norm": 0.47942933440208435, "learning_rate": 4.963589917595162e-06, "loss": 0.3721, "step": 33489 }, { "epoch": 1.536873020971961, "grad_norm": 0.4614333510398865, "learning_rate": 4.963344736232914e-06, "loss": 0.3326, "step": 33490 }, { "epoch": 1.5369189114772155, "grad_norm": 0.4957965910434723, "learning_rate": 4.963099554958809e-06, "loss": 0.3323, "step": 33491 }, { "epoch": 1.53696480198247, "grad_norm": 0.47933393716812134, "learning_rate": 4.96285437377344e-06, "loss": 0.3866, "step": 33492 }, { "epoch": 1.5370106924877243, "grad_norm": 0.45818474888801575, "learning_rate": 4.9626091926773935e-06, "loss": 0.3597, "step": 33493 }, { "epoch": 1.5370565829929788, "grad_norm": 0.49736732244491577, "learning_rate": 4.962364011671258e-06, "loss": 0.4185, "step": 33494 }, { "epoch": 1.5371024734982333, "grad_norm": 0.4605158567428589, "learning_rate": 4.962118830755627e-06, "loss": 0.3551, "step": 33495 }, { "epoch": 1.5371483640034875, "grad_norm": 0.46528831124305725, "learning_rate": 4.9618736499310894e-06, "loss": 0.3461, "step": 33496 }, { "epoch": 1.537194254508742, "grad_norm": 0.4810307025909424, "learning_rate": 4.961628469198232e-06, "loss": 0.3419, "step": 33497 }, { "epoch": 1.5372401450139965, "grad_norm": 0.4557763636112213, "learning_rate": 4.961383288557647e-06, "loss": 0.3544, "step": 33498 }, { "epoch": 1.537286035519251, "grad_norm": 0.4526757299900055, "learning_rate": 4.961138108009923e-06, "loss": 0.3499, "step": 33499 }, { "epoch": 1.5373319260245055, "grad_norm": 0.4954931139945984, "learning_rate": 4.960892927555648e-06, "loss": 0.4136, "step": 33500 }, { "epoch": 1.53737781652976, "grad_norm": 0.5315516591072083, "learning_rate": 4.960647747195415e-06, "loss": 0.4032, "step": 33501 }, { "epoch": 1.5374237070350145, "grad_norm": 0.48933500051498413, "learning_rate": 4.960402566929812e-06, "loss": 0.4347, "step": 33502 }, { "epoch": 1.537469597540269, "grad_norm": 0.5072292685508728, "learning_rate": 4.960157386759426e-06, "loss": 0.3955, "step": 33503 }, { "epoch": 1.5375154880455235, "grad_norm": 0.5325661897659302, "learning_rate": 4.95991220668485e-06, "loss": 0.4201, "step": 33504 }, { "epoch": 1.5375613785507778, "grad_norm": 0.44953417778015137, "learning_rate": 4.959667026706673e-06, "loss": 0.3376, "step": 33505 }, { "epoch": 1.5376072690560323, "grad_norm": 0.5028049945831299, "learning_rate": 4.9594218468254854e-06, "loss": 0.3721, "step": 33506 }, { "epoch": 1.5376531595612868, "grad_norm": 0.4753458499908447, "learning_rate": 4.959176667041873e-06, "loss": 0.3525, "step": 33507 }, { "epoch": 1.5376990500665413, "grad_norm": 0.4624435007572174, "learning_rate": 4.958931487356429e-06, "loss": 0.3043, "step": 33508 }, { "epoch": 1.5377449405717956, "grad_norm": 0.5080615878105164, "learning_rate": 4.958686307769742e-06, "loss": 0.3729, "step": 33509 }, { "epoch": 1.53779083107705, "grad_norm": 0.46574658155441284, "learning_rate": 4.9584411282824e-06, "loss": 0.3344, "step": 33510 }, { "epoch": 1.5378367215823046, "grad_norm": 0.4834032654762268, "learning_rate": 4.958195948894995e-06, "loss": 0.3527, "step": 33511 }, { "epoch": 1.537882612087559, "grad_norm": 0.4529985189437866, "learning_rate": 4.957950769608116e-06, "loss": 0.323, "step": 33512 }, { "epoch": 1.5379285025928136, "grad_norm": 0.44337084889411926, "learning_rate": 4.957705590422351e-06, "loss": 0.2973, "step": 33513 }, { "epoch": 1.537974393098068, "grad_norm": 0.4465126097202301, "learning_rate": 4.9574604113382914e-06, "loss": 0.3197, "step": 33514 }, { "epoch": 1.5380202836033225, "grad_norm": 0.4845251441001892, "learning_rate": 4.957215232356527e-06, "loss": 0.3644, "step": 33515 }, { "epoch": 1.538066174108577, "grad_norm": 0.4352024495601654, "learning_rate": 4.956970053477643e-06, "loss": 0.3145, "step": 33516 }, { "epoch": 1.5381120646138315, "grad_norm": 0.42236316204071045, "learning_rate": 4.956724874702236e-06, "loss": 0.2947, "step": 33517 }, { "epoch": 1.5381579551190858, "grad_norm": 0.506375253200531, "learning_rate": 4.956479696030891e-06, "loss": 0.4013, "step": 33518 }, { "epoch": 1.5382038456243403, "grad_norm": 0.4780620038509369, "learning_rate": 4.9562345174641984e-06, "loss": 0.4045, "step": 33519 }, { "epoch": 1.5382497361295948, "grad_norm": 0.46596357226371765, "learning_rate": 4.955989339002749e-06, "loss": 0.3391, "step": 33520 }, { "epoch": 1.538295626634849, "grad_norm": 0.5346869826316833, "learning_rate": 4.9557441606471306e-06, "loss": 0.4813, "step": 33521 }, { "epoch": 1.5383415171401036, "grad_norm": 0.4888061583042145, "learning_rate": 4.955498982397933e-06, "loss": 0.3987, "step": 33522 }, { "epoch": 1.538387407645358, "grad_norm": 0.4588027000427246, "learning_rate": 4.955253804255747e-06, "loss": 0.3819, "step": 33523 }, { "epoch": 1.5384332981506126, "grad_norm": 0.45934948325157166, "learning_rate": 4.955008626221162e-06, "loss": 0.3275, "step": 33524 }, { "epoch": 1.538479188655867, "grad_norm": 0.4335435926914215, "learning_rate": 4.954763448294764e-06, "loss": 0.2672, "step": 33525 }, { "epoch": 1.5385250791611216, "grad_norm": 0.46984604001045227, "learning_rate": 4.954518270477148e-06, "loss": 0.262, "step": 33526 }, { "epoch": 1.538570969666376, "grad_norm": 0.4909984767436981, "learning_rate": 4.954273092768902e-06, "loss": 0.397, "step": 33527 }, { "epoch": 1.5386168601716306, "grad_norm": 0.47724518179893494, "learning_rate": 4.954027915170614e-06, "loss": 0.36, "step": 33528 }, { "epoch": 1.538662750676885, "grad_norm": 0.45673078298568726, "learning_rate": 4.953782737682874e-06, "loss": 0.3481, "step": 33529 }, { "epoch": 1.5387086411821396, "grad_norm": 0.42836862802505493, "learning_rate": 4.953537560306273e-06, "loss": 0.319, "step": 33530 }, { "epoch": 1.5387545316873938, "grad_norm": 0.46772244572639465, "learning_rate": 4.953292383041397e-06, "loss": 0.3153, "step": 33531 }, { "epoch": 1.5388004221926483, "grad_norm": 0.5043237209320068, "learning_rate": 4.95304720588884e-06, "loss": 0.3643, "step": 33532 }, { "epoch": 1.5388463126979028, "grad_norm": 0.48656636476516724, "learning_rate": 4.952802028849189e-06, "loss": 0.3963, "step": 33533 }, { "epoch": 1.5388922032031571, "grad_norm": 0.4500691890716553, "learning_rate": 4.952556851923035e-06, "loss": 0.2868, "step": 33534 }, { "epoch": 1.5389380937084116, "grad_norm": 0.4585524797439575, "learning_rate": 4.9523116751109665e-06, "loss": 0.3379, "step": 33535 }, { "epoch": 1.5389839842136661, "grad_norm": 0.4980064034461975, "learning_rate": 4.952066498413573e-06, "loss": 0.4013, "step": 33536 }, { "epoch": 1.5390298747189206, "grad_norm": 0.507173478603363, "learning_rate": 4.951821321831445e-06, "loss": 0.3856, "step": 33537 }, { "epoch": 1.539075765224175, "grad_norm": 0.5247698426246643, "learning_rate": 4.95157614536517e-06, "loss": 0.3778, "step": 33538 }, { "epoch": 1.5391216557294296, "grad_norm": 0.4858540892601013, "learning_rate": 4.95133096901534e-06, "loss": 0.3764, "step": 33539 }, { "epoch": 1.539167546234684, "grad_norm": 0.4381698668003082, "learning_rate": 4.951085792782545e-06, "loss": 0.2703, "step": 33540 }, { "epoch": 1.5392134367399386, "grad_norm": 0.45550626516342163, "learning_rate": 4.950840616667371e-06, "loss": 0.329, "step": 33541 }, { "epoch": 1.539259327245193, "grad_norm": 0.46883624792099, "learning_rate": 4.950595440670412e-06, "loss": 0.4082, "step": 33542 }, { "epoch": 1.5393052177504476, "grad_norm": 0.4338158667087555, "learning_rate": 4.9503502647922546e-06, "loss": 0.2846, "step": 33543 }, { "epoch": 1.5393511082557019, "grad_norm": 0.5083686113357544, "learning_rate": 4.950105089033489e-06, "loss": 0.3285, "step": 33544 }, { "epoch": 1.5393969987609564, "grad_norm": 0.4538721740245819, "learning_rate": 4.949859913394705e-06, "loss": 0.3284, "step": 33545 }, { "epoch": 1.5394428892662109, "grad_norm": 0.4593503475189209, "learning_rate": 4.949614737876492e-06, "loss": 0.3321, "step": 33546 }, { "epoch": 1.5394887797714651, "grad_norm": 0.4546606242656708, "learning_rate": 4.949369562479438e-06, "loss": 0.3298, "step": 33547 }, { "epoch": 1.5395346702767196, "grad_norm": 0.46268191933631897, "learning_rate": 4.949124387204136e-06, "loss": 0.378, "step": 33548 }, { "epoch": 1.5395805607819741, "grad_norm": 0.5112650394439697, "learning_rate": 4.948879212051174e-06, "loss": 0.3885, "step": 33549 }, { "epoch": 1.5396264512872286, "grad_norm": 0.48819857835769653, "learning_rate": 4.94863403702114e-06, "loss": 0.4141, "step": 33550 }, { "epoch": 1.5396723417924831, "grad_norm": 0.4744936525821686, "learning_rate": 4.948388862114627e-06, "loss": 0.4149, "step": 33551 }, { "epoch": 1.5397182322977376, "grad_norm": 0.4827585518360138, "learning_rate": 4.948143687332222e-06, "loss": 0.3807, "step": 33552 }, { "epoch": 1.5397641228029921, "grad_norm": 0.4800211191177368, "learning_rate": 4.947898512674513e-06, "loss": 0.3655, "step": 33553 }, { "epoch": 1.5398100133082466, "grad_norm": 0.48756587505340576, "learning_rate": 4.947653338142093e-06, "loss": 0.3834, "step": 33554 }, { "epoch": 1.5398559038135011, "grad_norm": 0.48141101002693176, "learning_rate": 4.947408163735551e-06, "loss": 0.3545, "step": 33555 }, { "epoch": 1.5399017943187554, "grad_norm": 0.471162348985672, "learning_rate": 4.947162989455474e-06, "loss": 0.3492, "step": 33556 }, { "epoch": 1.53994768482401, "grad_norm": 0.47999197244644165, "learning_rate": 4.946917815302455e-06, "loss": 0.388, "step": 33557 }, { "epoch": 1.5399935753292644, "grad_norm": 0.5018584132194519, "learning_rate": 4.946672641277081e-06, "loss": 0.3897, "step": 33558 }, { "epoch": 1.5400394658345187, "grad_norm": 0.4959656596183777, "learning_rate": 4.946427467379942e-06, "loss": 0.3913, "step": 33559 }, { "epoch": 1.5400853563397732, "grad_norm": 0.5107811093330383, "learning_rate": 4.946182293611629e-06, "loss": 0.4037, "step": 33560 }, { "epoch": 1.5401312468450277, "grad_norm": 0.4421091377735138, "learning_rate": 4.945937119972731e-06, "loss": 0.2774, "step": 33561 }, { "epoch": 1.5401771373502822, "grad_norm": 0.4308513104915619, "learning_rate": 4.945691946463836e-06, "loss": 0.306, "step": 33562 }, { "epoch": 1.5402230278555367, "grad_norm": 0.4444197118282318, "learning_rate": 4.945446773085535e-06, "loss": 0.3001, "step": 33563 }, { "epoch": 1.5402689183607912, "grad_norm": 0.47759947180747986, "learning_rate": 4.945201599838419e-06, "loss": 0.4016, "step": 33564 }, { "epoch": 1.5403148088660457, "grad_norm": 0.4484749734401703, "learning_rate": 4.9449564267230724e-06, "loss": 0.3116, "step": 33565 }, { "epoch": 1.5403606993713002, "grad_norm": 0.46381714940071106, "learning_rate": 4.944711253740091e-06, "loss": 0.3193, "step": 33566 }, { "epoch": 1.5404065898765547, "grad_norm": 0.5293952226638794, "learning_rate": 4.944466080890062e-06, "loss": 0.4635, "step": 33567 }, { "epoch": 1.5404524803818092, "grad_norm": 0.44792816042900085, "learning_rate": 4.9442209081735715e-06, "loss": 0.3316, "step": 33568 }, { "epoch": 1.5404983708870634, "grad_norm": 0.5117705464363098, "learning_rate": 4.943975735591214e-06, "loss": 0.3919, "step": 33569 }, { "epoch": 1.540544261392318, "grad_norm": 0.4713311791419983, "learning_rate": 4.943730563143578e-06, "loss": 0.3861, "step": 33570 }, { "epoch": 1.5405901518975724, "grad_norm": 0.4796547293663025, "learning_rate": 4.943485390831252e-06, "loss": 0.3881, "step": 33571 }, { "epoch": 1.5406360424028267, "grad_norm": 0.4474563002586365, "learning_rate": 4.943240218654825e-06, "loss": 0.3081, "step": 33572 }, { "epoch": 1.5406819329080812, "grad_norm": 0.5067110061645508, "learning_rate": 4.9429950466148874e-06, "loss": 0.3579, "step": 33573 }, { "epoch": 1.5407278234133357, "grad_norm": 0.4425121247768402, "learning_rate": 4.94274987471203e-06, "loss": 0.3273, "step": 33574 }, { "epoch": 1.5407737139185902, "grad_norm": 0.436112642288208, "learning_rate": 4.942504702946838e-06, "loss": 0.2652, "step": 33575 }, { "epoch": 1.5408196044238447, "grad_norm": 0.47832804918289185, "learning_rate": 4.9422595313199065e-06, "loss": 0.3643, "step": 33576 }, { "epoch": 1.5408654949290992, "grad_norm": 0.49406495690345764, "learning_rate": 4.942014359831822e-06, "loss": 0.4523, "step": 33577 }, { "epoch": 1.5409113854343537, "grad_norm": 0.45660126209259033, "learning_rate": 4.941769188483174e-06, "loss": 0.3612, "step": 33578 }, { "epoch": 1.5409572759396082, "grad_norm": 0.43226879835128784, "learning_rate": 4.941524017274554e-06, "loss": 0.3537, "step": 33579 }, { "epoch": 1.5410031664448627, "grad_norm": 0.4497494697570801, "learning_rate": 4.94127884620655e-06, "loss": 0.3312, "step": 33580 }, { "epoch": 1.5410490569501172, "grad_norm": 0.4486096501350403, "learning_rate": 4.94103367527975e-06, "loss": 0.3361, "step": 33581 }, { "epoch": 1.5410949474553715, "grad_norm": 0.4650793671607971, "learning_rate": 4.940788504494748e-06, "loss": 0.3462, "step": 33582 }, { "epoch": 1.541140837960626, "grad_norm": 0.5677633881568909, "learning_rate": 4.94054333385213e-06, "loss": 0.4902, "step": 33583 }, { "epoch": 1.5411867284658805, "grad_norm": 0.4618867337703705, "learning_rate": 4.940298163352486e-06, "loss": 0.3343, "step": 33584 }, { "epoch": 1.5412326189711347, "grad_norm": 0.5067033767700195, "learning_rate": 4.940052992996407e-06, "loss": 0.3897, "step": 33585 }, { "epoch": 1.5412785094763892, "grad_norm": 0.5166358351707458, "learning_rate": 4.939807822784482e-06, "loss": 0.44, "step": 33586 }, { "epoch": 1.5413243999816437, "grad_norm": 0.48250359296798706, "learning_rate": 4.939562652717298e-06, "loss": 0.3924, "step": 33587 }, { "epoch": 1.5413702904868982, "grad_norm": 0.45088499784469604, "learning_rate": 4.939317482795449e-06, "loss": 0.305, "step": 33588 }, { "epoch": 1.5414161809921527, "grad_norm": 0.46606260538101196, "learning_rate": 4.939072313019522e-06, "loss": 0.3457, "step": 33589 }, { "epoch": 1.5414620714974072, "grad_norm": 0.484151691198349, "learning_rate": 4.938827143390104e-06, "loss": 0.3814, "step": 33590 }, { "epoch": 1.5415079620026617, "grad_norm": 0.4708734154701233, "learning_rate": 4.938581973907791e-06, "loss": 0.3373, "step": 33591 }, { "epoch": 1.5415538525079162, "grad_norm": 0.45363467931747437, "learning_rate": 4.938336804573168e-06, "loss": 0.3345, "step": 33592 }, { "epoch": 1.5415997430131707, "grad_norm": 0.5060080289840698, "learning_rate": 4.938091635386824e-06, "loss": 0.3991, "step": 33593 }, { "epoch": 1.541645633518425, "grad_norm": 0.46949246525764465, "learning_rate": 4.937846466349352e-06, "loss": 0.362, "step": 33594 }, { "epoch": 1.5416915240236795, "grad_norm": 0.4366362988948822, "learning_rate": 4.937601297461339e-06, "loss": 0.297, "step": 33595 }, { "epoch": 1.541737414528934, "grad_norm": 0.49495115876197815, "learning_rate": 4.937356128723373e-06, "loss": 0.3648, "step": 33596 }, { "epoch": 1.5417833050341885, "grad_norm": 0.47930920124053955, "learning_rate": 4.937110960136049e-06, "loss": 0.3915, "step": 33597 }, { "epoch": 1.5418291955394428, "grad_norm": 0.44499385356903076, "learning_rate": 4.936865791699953e-06, "loss": 0.3198, "step": 33598 }, { "epoch": 1.5418750860446973, "grad_norm": 0.45275044441223145, "learning_rate": 4.936620623415672e-06, "loss": 0.3223, "step": 33599 }, { "epoch": 1.5419209765499517, "grad_norm": 0.4935304522514343, "learning_rate": 4.936375455283802e-06, "loss": 0.3444, "step": 33600 }, { "epoch": 1.5419668670552062, "grad_norm": 0.4994998574256897, "learning_rate": 4.936130287304928e-06, "loss": 0.4172, "step": 33601 }, { "epoch": 1.5420127575604607, "grad_norm": 0.46916863322257996, "learning_rate": 4.935885119479638e-06, "loss": 0.3931, "step": 33602 }, { "epoch": 1.5420586480657152, "grad_norm": 0.4675274193286896, "learning_rate": 4.9356399518085256e-06, "loss": 0.3485, "step": 33603 }, { "epoch": 1.5421045385709697, "grad_norm": 0.5057913661003113, "learning_rate": 4.93539478429218e-06, "loss": 0.3885, "step": 33604 }, { "epoch": 1.5421504290762242, "grad_norm": 0.4735415279865265, "learning_rate": 4.935149616931189e-06, "loss": 0.37, "step": 33605 }, { "epoch": 1.5421963195814787, "grad_norm": 0.4587606191635132, "learning_rate": 4.934904449726141e-06, "loss": 0.3049, "step": 33606 }, { "epoch": 1.542242210086733, "grad_norm": 0.5668028593063354, "learning_rate": 4.93465928267763e-06, "loss": 0.465, "step": 33607 }, { "epoch": 1.5422881005919875, "grad_norm": 0.4814692437648773, "learning_rate": 4.934414115786242e-06, "loss": 0.3679, "step": 33608 }, { "epoch": 1.542333991097242, "grad_norm": 0.5004754662513733, "learning_rate": 4.934168949052564e-06, "loss": 0.3914, "step": 33609 }, { "epoch": 1.5423798816024963, "grad_norm": 0.4443148374557495, "learning_rate": 4.933923782477193e-06, "loss": 0.3163, "step": 33610 }, { "epoch": 1.5424257721077508, "grad_norm": 0.5136976838111877, "learning_rate": 4.933678616060714e-06, "loss": 0.3177, "step": 33611 }, { "epoch": 1.5424716626130053, "grad_norm": 0.4516414403915405, "learning_rate": 4.933433449803715e-06, "loss": 0.3265, "step": 33612 }, { "epoch": 1.5425175531182598, "grad_norm": 0.48030439019203186, "learning_rate": 4.9331882837067884e-06, "loss": 0.3425, "step": 33613 }, { "epoch": 1.5425634436235143, "grad_norm": 0.47294512391090393, "learning_rate": 4.932943117770524e-06, "loss": 0.3638, "step": 33614 }, { "epoch": 1.5426093341287688, "grad_norm": 0.4611269533634186, "learning_rate": 4.932697951995509e-06, "loss": 0.3431, "step": 33615 }, { "epoch": 1.5426552246340233, "grad_norm": 0.4959776997566223, "learning_rate": 4.932452786382335e-06, "loss": 0.3477, "step": 33616 }, { "epoch": 1.5427011151392778, "grad_norm": 0.48662444949150085, "learning_rate": 4.932207620931591e-06, "loss": 0.3364, "step": 33617 }, { "epoch": 1.5427470056445323, "grad_norm": 0.5055026412010193, "learning_rate": 4.931962455643863e-06, "loss": 0.4492, "step": 33618 }, { "epoch": 1.5427928961497868, "grad_norm": 0.49302318692207336, "learning_rate": 4.931717290519747e-06, "loss": 0.3854, "step": 33619 }, { "epoch": 1.542838786655041, "grad_norm": 0.4855456054210663, "learning_rate": 4.931472125559829e-06, "loss": 0.341, "step": 33620 }, { "epoch": 1.5428846771602955, "grad_norm": 0.5016556978225708, "learning_rate": 4.931226960764698e-06, "loss": 0.3932, "step": 33621 }, { "epoch": 1.54293056766555, "grad_norm": 0.523742139339447, "learning_rate": 4.930981796134946e-06, "loss": 0.46, "step": 33622 }, { "epoch": 1.5429764581708043, "grad_norm": 0.5382898449897766, "learning_rate": 4.930736631671161e-06, "loss": 0.4623, "step": 33623 }, { "epoch": 1.5430223486760588, "grad_norm": 0.5127125382423401, "learning_rate": 4.930491467373929e-06, "loss": 0.4361, "step": 33624 }, { "epoch": 1.5430682391813133, "grad_norm": 0.4651939868927002, "learning_rate": 4.930246303243846e-06, "loss": 0.3789, "step": 33625 }, { "epoch": 1.5431141296865678, "grad_norm": 0.47741934657096863, "learning_rate": 4.930001139281499e-06, "loss": 0.352, "step": 33626 }, { "epoch": 1.5431600201918223, "grad_norm": 0.47967615723609924, "learning_rate": 4.929755975487476e-06, "loss": 0.3321, "step": 33627 }, { "epoch": 1.5432059106970768, "grad_norm": 0.5015032887458801, "learning_rate": 4.929510811862369e-06, "loss": 0.4331, "step": 33628 }, { "epoch": 1.5432518012023313, "grad_norm": 0.4766474962234497, "learning_rate": 4.929265648406766e-06, "loss": 0.3785, "step": 33629 }, { "epoch": 1.5432976917075858, "grad_norm": 0.498293936252594, "learning_rate": 4.929020485121254e-06, "loss": 0.4095, "step": 33630 }, { "epoch": 1.5433435822128403, "grad_norm": 0.546868622303009, "learning_rate": 4.928775322006428e-06, "loss": 0.3927, "step": 33631 }, { "epoch": 1.5433894727180948, "grad_norm": 0.5303363800048828, "learning_rate": 4.928530159062876e-06, "loss": 0.4343, "step": 33632 }, { "epoch": 1.543435363223349, "grad_norm": 0.47333669662475586, "learning_rate": 4.928284996291183e-06, "loss": 0.3722, "step": 33633 }, { "epoch": 1.5434812537286036, "grad_norm": 0.4681225121021271, "learning_rate": 4.928039833691945e-06, "loss": 0.3517, "step": 33634 }, { "epoch": 1.543527144233858, "grad_norm": 0.49124595522880554, "learning_rate": 4.927794671265748e-06, "loss": 0.3695, "step": 33635 }, { "epoch": 1.5435730347391123, "grad_norm": 0.4366133511066437, "learning_rate": 4.927549509013181e-06, "loss": 0.2735, "step": 33636 }, { "epoch": 1.5436189252443668, "grad_norm": 0.48148953914642334, "learning_rate": 4.927304346934834e-06, "loss": 0.3843, "step": 33637 }, { "epoch": 1.5436648157496213, "grad_norm": 0.48762959241867065, "learning_rate": 4.927059185031299e-06, "loss": 0.4392, "step": 33638 }, { "epoch": 1.5437107062548758, "grad_norm": 0.5201305747032166, "learning_rate": 4.926814023303163e-06, "loss": 0.4282, "step": 33639 }, { "epoch": 1.5437565967601303, "grad_norm": 0.4859296977519989, "learning_rate": 4.926568861751014e-06, "loss": 0.3947, "step": 33640 }, { "epoch": 1.5438024872653848, "grad_norm": 0.47841450572013855, "learning_rate": 4.926323700375446e-06, "loss": 0.3566, "step": 33641 }, { "epoch": 1.5438483777706393, "grad_norm": 0.44055384397506714, "learning_rate": 4.926078539177046e-06, "loss": 0.3354, "step": 33642 }, { "epoch": 1.5438942682758938, "grad_norm": 0.48486483097076416, "learning_rate": 4.925833378156403e-06, "loss": 0.3561, "step": 33643 }, { "epoch": 1.5439401587811483, "grad_norm": 0.49194344878196716, "learning_rate": 4.925588217314109e-06, "loss": 0.3833, "step": 33644 }, { "epoch": 1.5439860492864026, "grad_norm": 0.48740342259407043, "learning_rate": 4.925343056650751e-06, "loss": 0.4077, "step": 33645 }, { "epoch": 1.544031939791657, "grad_norm": 0.4789520800113678, "learning_rate": 4.925097896166917e-06, "loss": 0.3763, "step": 33646 }, { "epoch": 1.5440778302969116, "grad_norm": 0.44591280817985535, "learning_rate": 4.924852735863202e-06, "loss": 0.31, "step": 33647 }, { "epoch": 1.5441237208021659, "grad_norm": 0.47929877042770386, "learning_rate": 4.924607575740192e-06, "loss": 0.3907, "step": 33648 }, { "epoch": 1.5441696113074204, "grad_norm": 0.43600186705589294, "learning_rate": 4.9243624157984755e-06, "loss": 0.3173, "step": 33649 }, { "epoch": 1.5442155018126749, "grad_norm": 0.4479968547821045, "learning_rate": 4.924117256038645e-06, "loss": 0.3051, "step": 33650 }, { "epoch": 1.5442613923179294, "grad_norm": 0.47753340005874634, "learning_rate": 4.923872096461288e-06, "loss": 0.3659, "step": 33651 }, { "epoch": 1.5443072828231839, "grad_norm": 0.47188812494277954, "learning_rate": 4.923626937066994e-06, "loss": 0.368, "step": 33652 }, { "epoch": 1.5443531733284384, "grad_norm": 0.5013169050216675, "learning_rate": 4.923381777856353e-06, "loss": 0.3894, "step": 33653 }, { "epoch": 1.5443990638336929, "grad_norm": 0.44498521089553833, "learning_rate": 4.923136618829958e-06, "loss": 0.3007, "step": 33654 }, { "epoch": 1.5444449543389474, "grad_norm": 0.4475548565387726, "learning_rate": 4.92289145998839e-06, "loss": 0.3414, "step": 33655 }, { "epoch": 1.5444908448442018, "grad_norm": 0.4787669777870178, "learning_rate": 4.922646301332247e-06, "loss": 0.3788, "step": 33656 }, { "epoch": 1.5445367353494563, "grad_norm": 0.4321731626987457, "learning_rate": 4.922401142862115e-06, "loss": 0.3173, "step": 33657 }, { "epoch": 1.5445826258547106, "grad_norm": 0.5022450685501099, "learning_rate": 4.9221559845785824e-06, "loss": 0.3322, "step": 33658 }, { "epoch": 1.5446285163599651, "grad_norm": 0.4767553210258484, "learning_rate": 4.921910826482242e-06, "loss": 0.3268, "step": 33659 }, { "epoch": 1.5446744068652196, "grad_norm": 0.4745975434780121, "learning_rate": 4.92166566857368e-06, "loss": 0.4289, "step": 33660 }, { "epoch": 1.544720297370474, "grad_norm": 0.5147960782051086, "learning_rate": 4.9214205108534866e-06, "loss": 0.4689, "step": 33661 }, { "epoch": 1.5447661878757284, "grad_norm": 0.47956761717796326, "learning_rate": 4.921175353322253e-06, "loss": 0.4306, "step": 33662 }, { "epoch": 1.544812078380983, "grad_norm": 0.45985832810401917, "learning_rate": 4.920930195980569e-06, "loss": 0.3608, "step": 33663 }, { "epoch": 1.5448579688862374, "grad_norm": 0.4663658142089844, "learning_rate": 4.920685038829021e-06, "loss": 0.374, "step": 33664 }, { "epoch": 1.5449038593914919, "grad_norm": 0.487166166305542, "learning_rate": 4.920439881868202e-06, "loss": 0.3584, "step": 33665 }, { "epoch": 1.5449497498967464, "grad_norm": 0.48380008339881897, "learning_rate": 4.9201947250987e-06, "loss": 0.3852, "step": 33666 }, { "epoch": 1.5449956404020009, "grad_norm": 0.44474971294403076, "learning_rate": 4.919949568521102e-06, "loss": 0.3135, "step": 33667 }, { "epoch": 1.5450415309072554, "grad_norm": 0.511354386806488, "learning_rate": 4.919704412136002e-06, "loss": 0.3823, "step": 33668 }, { "epoch": 1.5450874214125099, "grad_norm": 0.4838235676288605, "learning_rate": 4.919459255943988e-06, "loss": 0.3903, "step": 33669 }, { "epoch": 1.5451333119177644, "grad_norm": 0.4718272089958191, "learning_rate": 4.919214099945649e-06, "loss": 0.3437, "step": 33670 }, { "epoch": 1.5451792024230186, "grad_norm": 0.909521758556366, "learning_rate": 4.918968944141573e-06, "loss": 0.3736, "step": 33671 }, { "epoch": 1.5452250929282731, "grad_norm": 0.5044870972633362, "learning_rate": 4.918723788532353e-06, "loss": 0.3897, "step": 33672 }, { "epoch": 1.5452709834335276, "grad_norm": 0.4717123508453369, "learning_rate": 4.9184786331185765e-06, "loss": 0.3565, "step": 33673 }, { "epoch": 1.545316873938782, "grad_norm": 0.44002804160118103, "learning_rate": 4.9182334779008305e-06, "loss": 0.2778, "step": 33674 }, { "epoch": 1.5453627644440364, "grad_norm": 0.504742443561554, "learning_rate": 4.91798832287971e-06, "loss": 0.3756, "step": 33675 }, { "epoch": 1.545408654949291, "grad_norm": 0.42752397060394287, "learning_rate": 4.917743168055802e-06, "loss": 0.3017, "step": 33676 }, { "epoch": 1.5454545454545454, "grad_norm": 0.44653525948524475, "learning_rate": 4.917498013429693e-06, "loss": 0.348, "step": 33677 }, { "epoch": 1.5455004359598, "grad_norm": 0.5015146136283875, "learning_rate": 4.9172528590019775e-06, "loss": 0.4103, "step": 33678 }, { "epoch": 1.5455463264650544, "grad_norm": 0.43168386816978455, "learning_rate": 4.9170077047732425e-06, "loss": 0.2808, "step": 33679 }, { "epoch": 1.545592216970309, "grad_norm": 0.47256985306739807, "learning_rate": 4.916762550744076e-06, "loss": 0.3495, "step": 33680 }, { "epoch": 1.5456381074755634, "grad_norm": 0.48931726813316345, "learning_rate": 4.916517396915071e-06, "loss": 0.4532, "step": 33681 }, { "epoch": 1.545683997980818, "grad_norm": 0.46851399540901184, "learning_rate": 4.9162722432868156e-06, "loss": 0.3702, "step": 33682 }, { "epoch": 1.5457298884860722, "grad_norm": 0.4916781187057495, "learning_rate": 4.916027089859897e-06, "loss": 0.3554, "step": 33683 }, { "epoch": 1.5457757789913267, "grad_norm": 0.47459301352500916, "learning_rate": 4.915781936634909e-06, "loss": 0.3749, "step": 33684 }, { "epoch": 1.5458216694965812, "grad_norm": 0.45352235436439514, "learning_rate": 4.915536783612438e-06, "loss": 0.2996, "step": 33685 }, { "epoch": 1.5458675600018357, "grad_norm": 0.4767163097858429, "learning_rate": 4.915291630793074e-06, "loss": 0.3613, "step": 33686 }, { "epoch": 1.54591345050709, "grad_norm": 0.5026130080223083, "learning_rate": 4.9150464781774074e-06, "loss": 0.4085, "step": 33687 }, { "epoch": 1.5459593410123444, "grad_norm": 0.5052232146263123, "learning_rate": 4.914801325766027e-06, "loss": 0.4035, "step": 33688 }, { "epoch": 1.546005231517599, "grad_norm": 0.4421968460083008, "learning_rate": 4.914556173559522e-06, "loss": 0.3008, "step": 33689 }, { "epoch": 1.5460511220228534, "grad_norm": 0.4664362072944641, "learning_rate": 4.9143110215584825e-06, "loss": 0.3868, "step": 33690 }, { "epoch": 1.546097012528108, "grad_norm": 0.5112592577934265, "learning_rate": 4.914065869763499e-06, "loss": 0.3991, "step": 33691 }, { "epoch": 1.5461429030333624, "grad_norm": 0.4529246687889099, "learning_rate": 4.913820718175159e-06, "loss": 0.2979, "step": 33692 }, { "epoch": 1.546188793538617, "grad_norm": 0.49521708488464355, "learning_rate": 4.913575566794054e-06, "loss": 0.3917, "step": 33693 }, { "epoch": 1.5462346840438714, "grad_norm": 0.47084107995033264, "learning_rate": 4.913330415620773e-06, "loss": 0.3448, "step": 33694 }, { "epoch": 1.546280574549126, "grad_norm": 0.47486555576324463, "learning_rate": 4.913085264655901e-06, "loss": 0.3585, "step": 33695 }, { "epoch": 1.5463264650543802, "grad_norm": 0.47531548142433167, "learning_rate": 4.912840113900035e-06, "loss": 0.3732, "step": 33696 }, { "epoch": 1.5463723555596347, "grad_norm": 0.467709481716156, "learning_rate": 4.9125949633537614e-06, "loss": 0.334, "step": 33697 }, { "epoch": 1.5464182460648892, "grad_norm": 0.4966362416744232, "learning_rate": 4.912349813017667e-06, "loss": 0.3814, "step": 33698 }, { "epoch": 1.5464641365701435, "grad_norm": 0.5105735659599304, "learning_rate": 4.912104662892346e-06, "loss": 0.4094, "step": 33699 }, { "epoch": 1.546510027075398, "grad_norm": 0.4202808141708374, "learning_rate": 4.911859512978385e-06, "loss": 0.2941, "step": 33700 }, { "epoch": 1.5465559175806525, "grad_norm": 0.47547367215156555, "learning_rate": 4.911614363276374e-06, "loss": 0.3877, "step": 33701 }, { "epoch": 1.546601808085907, "grad_norm": 0.4933498501777649, "learning_rate": 4.911369213786901e-06, "loss": 0.3992, "step": 33702 }, { "epoch": 1.5466476985911615, "grad_norm": 0.46759510040283203, "learning_rate": 4.911124064510559e-06, "loss": 0.3371, "step": 33703 }, { "epoch": 1.546693589096416, "grad_norm": 0.47216543555259705, "learning_rate": 4.910878915447935e-06, "loss": 0.3447, "step": 33704 }, { "epoch": 1.5467394796016705, "grad_norm": 0.5177017450332642, "learning_rate": 4.910633766599617e-06, "loss": 0.3955, "step": 33705 }, { "epoch": 1.546785370106925, "grad_norm": 0.48004966974258423, "learning_rate": 4.910388617966199e-06, "loss": 0.3952, "step": 33706 }, { "epoch": 1.5468312606121795, "grad_norm": 0.4812425971031189, "learning_rate": 4.910143469548269e-06, "loss": 0.3341, "step": 33707 }, { "epoch": 1.546877151117434, "grad_norm": 0.5259968042373657, "learning_rate": 4.909898321346413e-06, "loss": 0.3306, "step": 33708 }, { "epoch": 1.5469230416226882, "grad_norm": 0.4751369059085846, "learning_rate": 4.909653173361225e-06, "loss": 0.3628, "step": 33709 }, { "epoch": 1.5469689321279427, "grad_norm": 0.501651406288147, "learning_rate": 4.909408025593292e-06, "loss": 0.4191, "step": 33710 }, { "epoch": 1.5470148226331972, "grad_norm": 0.5016651153564453, "learning_rate": 4.909162878043202e-06, "loss": 0.423, "step": 33711 }, { "epoch": 1.5470607131384515, "grad_norm": 0.48306190967559814, "learning_rate": 4.9089177307115495e-06, "loss": 0.3661, "step": 33712 }, { "epoch": 1.547106603643706, "grad_norm": 0.48112261295318604, "learning_rate": 4.90867258359892e-06, "loss": 0.3718, "step": 33713 }, { "epoch": 1.5471524941489605, "grad_norm": 0.4686889350414276, "learning_rate": 4.908427436705904e-06, "loss": 0.3199, "step": 33714 }, { "epoch": 1.547198384654215, "grad_norm": 0.4698586165904999, "learning_rate": 4.908182290033092e-06, "loss": 0.3481, "step": 33715 }, { "epoch": 1.5472442751594695, "grad_norm": 0.47841984033584595, "learning_rate": 4.907937143581072e-06, "loss": 0.3543, "step": 33716 }, { "epoch": 1.547290165664724, "grad_norm": 0.4799274802207947, "learning_rate": 4.907691997350432e-06, "loss": 0.3854, "step": 33717 }, { "epoch": 1.5473360561699785, "grad_norm": 0.44813692569732666, "learning_rate": 4.907446851341766e-06, "loss": 0.3274, "step": 33718 }, { "epoch": 1.547381946675233, "grad_norm": 0.49787187576293945, "learning_rate": 4.907201705555662e-06, "loss": 0.3446, "step": 33719 }, { "epoch": 1.5474278371804875, "grad_norm": 0.5546039342880249, "learning_rate": 4.906956559992707e-06, "loss": 0.4069, "step": 33720 }, { "epoch": 1.547473727685742, "grad_norm": 0.49720388650894165, "learning_rate": 4.906711414653493e-06, "loss": 0.3823, "step": 33721 }, { "epoch": 1.5475196181909963, "grad_norm": 0.4946902096271515, "learning_rate": 4.906466269538609e-06, "loss": 0.435, "step": 33722 }, { "epoch": 1.5475655086962508, "grad_norm": 0.4508228302001953, "learning_rate": 4.906221124648642e-06, "loss": 0.3277, "step": 33723 }, { "epoch": 1.5476113992015053, "grad_norm": 0.4800426959991455, "learning_rate": 4.905975979984186e-06, "loss": 0.3583, "step": 33724 }, { "epoch": 1.5476572897067595, "grad_norm": 0.48433807492256165, "learning_rate": 4.905730835545827e-06, "loss": 0.4073, "step": 33725 }, { "epoch": 1.547703180212014, "grad_norm": 0.4893975853919983, "learning_rate": 4.905485691334153e-06, "loss": 0.4089, "step": 33726 }, { "epoch": 1.5477490707172685, "grad_norm": 0.48508909344673157, "learning_rate": 4.90524054734976e-06, "loss": 0.4032, "step": 33727 }, { "epoch": 1.547794961222523, "grad_norm": 0.44432926177978516, "learning_rate": 4.904995403593232e-06, "loss": 0.3373, "step": 33728 }, { "epoch": 1.5478408517277775, "grad_norm": 0.45035335421562195, "learning_rate": 4.90475026006516e-06, "loss": 0.3163, "step": 33729 }, { "epoch": 1.547886742233032, "grad_norm": 0.5301679968833923, "learning_rate": 4.904505116766134e-06, "loss": 0.3468, "step": 33730 }, { "epoch": 1.5479326327382865, "grad_norm": 0.43505701422691345, "learning_rate": 4.904259973696744e-06, "loss": 0.2741, "step": 33731 }, { "epoch": 1.547978523243541, "grad_norm": 0.4907168745994568, "learning_rate": 4.904014830857575e-06, "loss": 0.3747, "step": 33732 }, { "epoch": 1.5480244137487955, "grad_norm": 0.500189483165741, "learning_rate": 4.903769688249224e-06, "loss": 0.4033, "step": 33733 }, { "epoch": 1.5480703042540498, "grad_norm": 0.4848182499408722, "learning_rate": 4.903524545872275e-06, "loss": 0.3313, "step": 33734 }, { "epoch": 1.5481161947593043, "grad_norm": 0.45743417739868164, "learning_rate": 4.903279403727319e-06, "loss": 0.3007, "step": 33735 }, { "epoch": 1.5481620852645588, "grad_norm": 0.47727182507514954, "learning_rate": 4.903034261814945e-06, "loss": 0.3791, "step": 33736 }, { "epoch": 1.548207975769813, "grad_norm": 0.45982953906059265, "learning_rate": 4.902789120135744e-06, "loss": 0.3835, "step": 33737 }, { "epoch": 1.5482538662750676, "grad_norm": 0.5072396397590637, "learning_rate": 4.902543978690305e-06, "loss": 0.485, "step": 33738 }, { "epoch": 1.548299756780322, "grad_norm": 0.5088459253311157, "learning_rate": 4.902298837479214e-06, "loss": 0.394, "step": 33739 }, { "epoch": 1.5483456472855766, "grad_norm": 0.49588534235954285, "learning_rate": 4.902053696503065e-06, "loss": 0.4047, "step": 33740 }, { "epoch": 1.548391537790831, "grad_norm": 0.48816725611686707, "learning_rate": 4.901808555762448e-06, "loss": 0.3825, "step": 33741 }, { "epoch": 1.5484374282960855, "grad_norm": 0.5040444135665894, "learning_rate": 4.9015634152579475e-06, "loss": 0.3718, "step": 33742 }, { "epoch": 1.54848331880134, "grad_norm": 0.536422073841095, "learning_rate": 4.901318274990159e-06, "loss": 0.3599, "step": 33743 }, { "epoch": 1.5485292093065945, "grad_norm": 0.4810895323753357, "learning_rate": 4.901073134959667e-06, "loss": 0.3683, "step": 33744 }, { "epoch": 1.548575099811849, "grad_norm": 0.45391982793807983, "learning_rate": 4.900827995167063e-06, "loss": 0.3394, "step": 33745 }, { "epoch": 1.5486209903171035, "grad_norm": 0.5132395029067993, "learning_rate": 4.900582855612936e-06, "loss": 0.4163, "step": 33746 }, { "epoch": 1.5486668808223578, "grad_norm": 0.48213669657707214, "learning_rate": 4.9003377162978764e-06, "loss": 0.3303, "step": 33747 }, { "epoch": 1.5487127713276123, "grad_norm": 0.4752504825592041, "learning_rate": 4.900092577222472e-06, "loss": 0.3435, "step": 33748 }, { "epoch": 1.5487586618328668, "grad_norm": 0.5047745108604431, "learning_rate": 4.899847438387315e-06, "loss": 0.3299, "step": 33749 }, { "epoch": 1.548804552338121, "grad_norm": 0.4779907763004303, "learning_rate": 4.899602299792993e-06, "loss": 0.3895, "step": 33750 }, { "epoch": 1.5488504428433756, "grad_norm": 0.4793146848678589, "learning_rate": 4.899357161440095e-06, "loss": 0.3781, "step": 33751 }, { "epoch": 1.54889633334863, "grad_norm": 0.4452204704284668, "learning_rate": 4.8991120233292116e-06, "loss": 0.3125, "step": 33752 }, { "epoch": 1.5489422238538846, "grad_norm": 0.46993136405944824, "learning_rate": 4.898866885460933e-06, "loss": 0.3661, "step": 33753 }, { "epoch": 1.548988114359139, "grad_norm": 0.47236761450767517, "learning_rate": 4.898621747835845e-06, "loss": 0.3645, "step": 33754 }, { "epoch": 1.5490340048643936, "grad_norm": 0.4964770972728729, "learning_rate": 4.898376610454542e-06, "loss": 0.4016, "step": 33755 }, { "epoch": 1.549079895369648, "grad_norm": 0.4356682598590851, "learning_rate": 4.89813147331761e-06, "loss": 0.263, "step": 33756 }, { "epoch": 1.5491257858749026, "grad_norm": 0.4814671277999878, "learning_rate": 4.8978863364256405e-06, "loss": 0.4095, "step": 33757 }, { "epoch": 1.549171676380157, "grad_norm": 0.4583836495876312, "learning_rate": 4.897641199779222e-06, "loss": 0.3413, "step": 33758 }, { "epoch": 1.5492175668854116, "grad_norm": 0.4807807207107544, "learning_rate": 4.8973960633789445e-06, "loss": 0.4078, "step": 33759 }, { "epoch": 1.5492634573906658, "grad_norm": 0.44903725385665894, "learning_rate": 4.897150927225395e-06, "loss": 0.2791, "step": 33760 }, { "epoch": 1.5493093478959203, "grad_norm": 0.45138269662857056, "learning_rate": 4.896905791319167e-06, "loss": 0.3398, "step": 33761 }, { "epoch": 1.5493552384011748, "grad_norm": 0.4773988425731659, "learning_rate": 4.8966606556608485e-06, "loss": 0.394, "step": 33762 }, { "epoch": 1.5494011289064291, "grad_norm": 0.5426749587059021, "learning_rate": 4.896415520251027e-06, "loss": 0.4537, "step": 33763 }, { "epoch": 1.5494470194116836, "grad_norm": 0.5032060146331787, "learning_rate": 4.896170385090295e-06, "loss": 0.3815, "step": 33764 }, { "epoch": 1.549492909916938, "grad_norm": 0.45466119050979614, "learning_rate": 4.89592525017924e-06, "loss": 0.3511, "step": 33765 }, { "epoch": 1.5495388004221926, "grad_norm": 0.46169033646583557, "learning_rate": 4.895680115518452e-06, "loss": 0.3609, "step": 33766 }, { "epoch": 1.549584690927447, "grad_norm": 0.45012903213500977, "learning_rate": 4.895434981108519e-06, "loss": 0.3172, "step": 33767 }, { "epoch": 1.5496305814327016, "grad_norm": 0.47289618849754333, "learning_rate": 4.895189846950034e-06, "loss": 0.3804, "step": 33768 }, { "epoch": 1.549676471937956, "grad_norm": 0.4494435489177704, "learning_rate": 4.894944713043583e-06, "loss": 0.3955, "step": 33769 }, { "epoch": 1.5497223624432106, "grad_norm": 0.48021695017814636, "learning_rate": 4.894699579389755e-06, "loss": 0.384, "step": 33770 }, { "epoch": 1.549768252948465, "grad_norm": 0.4484626054763794, "learning_rate": 4.894454445989144e-06, "loss": 0.307, "step": 33771 }, { "epoch": 1.5498141434537194, "grad_norm": 0.5259761214256287, "learning_rate": 4.8942093128423366e-06, "loss": 0.4155, "step": 33772 }, { "epoch": 1.5498600339589739, "grad_norm": 0.4955519139766693, "learning_rate": 4.893964179949921e-06, "loss": 0.3946, "step": 33773 }, { "epoch": 1.5499059244642284, "grad_norm": 0.39875203371047974, "learning_rate": 4.8937190473124895e-06, "loss": 0.266, "step": 33774 }, { "epoch": 1.5499518149694829, "grad_norm": 0.45695504546165466, "learning_rate": 4.89347391493063e-06, "loss": 0.3552, "step": 33775 }, { "epoch": 1.5499977054747371, "grad_norm": 0.44218578934669495, "learning_rate": 4.89322878280493e-06, "loss": 0.2991, "step": 33776 }, { "epoch": 1.5500435959799916, "grad_norm": 0.41821911931037903, "learning_rate": 4.892983650935983e-06, "loss": 0.2794, "step": 33777 }, { "epoch": 1.5500894864852461, "grad_norm": 0.5973734855651855, "learning_rate": 4.8927385193243775e-06, "loss": 0.4021, "step": 33778 }, { "epoch": 1.5501353769905006, "grad_norm": 0.4495666027069092, "learning_rate": 4.892493387970699e-06, "loss": 0.3455, "step": 33779 }, { "epoch": 1.5501812674957551, "grad_norm": 0.4741359353065491, "learning_rate": 4.8922482568755425e-06, "loss": 0.3859, "step": 33780 }, { "epoch": 1.5502271580010096, "grad_norm": 0.4612380266189575, "learning_rate": 4.892003126039494e-06, "loss": 0.3678, "step": 33781 }, { "epoch": 1.5502730485062641, "grad_norm": 0.5034440159797668, "learning_rate": 4.891757995463144e-06, "loss": 0.4165, "step": 33782 }, { "epoch": 1.5503189390115186, "grad_norm": 0.43555501103401184, "learning_rate": 4.891512865147081e-06, "loss": 0.3019, "step": 33783 }, { "epoch": 1.5503648295167731, "grad_norm": 0.46017640829086304, "learning_rate": 4.891267735091898e-06, "loss": 0.3691, "step": 33784 }, { "epoch": 1.5504107200220274, "grad_norm": 0.48416104912757874, "learning_rate": 4.8910226052981785e-06, "loss": 0.4106, "step": 33785 }, { "epoch": 1.550456610527282, "grad_norm": 0.4599337577819824, "learning_rate": 4.890777475766518e-06, "loss": 0.324, "step": 33786 }, { "epoch": 1.5505025010325364, "grad_norm": 0.4213723838329315, "learning_rate": 4.890532346497502e-06, "loss": 0.2552, "step": 33787 }, { "epoch": 1.5505483915377907, "grad_norm": 0.4804609715938568, "learning_rate": 4.89028721749172e-06, "loss": 0.3838, "step": 33788 }, { "epoch": 1.5505942820430452, "grad_norm": 0.45456886291503906, "learning_rate": 4.890042088749765e-06, "loss": 0.3154, "step": 33789 }, { "epoch": 1.5506401725482997, "grad_norm": 0.4597671926021576, "learning_rate": 4.889796960272223e-06, "loss": 0.3319, "step": 33790 }, { "epoch": 1.5506860630535542, "grad_norm": 0.4779617488384247, "learning_rate": 4.889551832059682e-06, "loss": 0.3744, "step": 33791 }, { "epoch": 1.5507319535588087, "grad_norm": 0.4518517255783081, "learning_rate": 4.889306704112737e-06, "loss": 0.3044, "step": 33792 }, { "epoch": 1.5507778440640632, "grad_norm": 0.5017098188400269, "learning_rate": 4.889061576431975e-06, "loss": 0.3672, "step": 33793 }, { "epoch": 1.5508237345693177, "grad_norm": 0.42467671632766724, "learning_rate": 4.888816449017983e-06, "loss": 0.3012, "step": 33794 }, { "epoch": 1.5508696250745722, "grad_norm": 0.48134827613830566, "learning_rate": 4.8885713218713535e-06, "loss": 0.3808, "step": 33795 }, { "epoch": 1.5509155155798267, "grad_norm": 0.42205482721328735, "learning_rate": 4.8883261949926745e-06, "loss": 0.3089, "step": 33796 }, { "epoch": 1.5509614060850812, "grad_norm": 0.4670848548412323, "learning_rate": 4.888081068382534e-06, "loss": 0.357, "step": 33797 }, { "epoch": 1.5510072965903354, "grad_norm": 0.432318776845932, "learning_rate": 4.887835942041525e-06, "loss": 0.3048, "step": 33798 }, { "epoch": 1.55105318709559, "grad_norm": 0.48607152700424194, "learning_rate": 4.887590815970235e-06, "loss": 0.387, "step": 33799 }, { "epoch": 1.5510990776008444, "grad_norm": 0.45279988646507263, "learning_rate": 4.887345690169254e-06, "loss": 0.3218, "step": 33800 }, { "epoch": 1.5511449681060987, "grad_norm": 0.47188666462898254, "learning_rate": 4.88710056463917e-06, "loss": 0.3581, "step": 33801 }, { "epoch": 1.5511908586113532, "grad_norm": 0.5018243193626404, "learning_rate": 4.886855439380576e-06, "loss": 0.4253, "step": 33802 }, { "epoch": 1.5512367491166077, "grad_norm": 0.48465701937675476, "learning_rate": 4.8866103143940565e-06, "loss": 0.3564, "step": 33803 }, { "epoch": 1.5512826396218622, "grad_norm": 0.45508936047554016, "learning_rate": 4.886365189680203e-06, "loss": 0.3554, "step": 33804 }, { "epoch": 1.5513285301271167, "grad_norm": 0.43549874424934387, "learning_rate": 4.886120065239606e-06, "loss": 0.3096, "step": 33805 }, { "epoch": 1.5513744206323712, "grad_norm": 0.44993752241134644, "learning_rate": 4.8858749410728556e-06, "loss": 0.3267, "step": 33806 }, { "epoch": 1.5514203111376257, "grad_norm": 0.44739818572998047, "learning_rate": 4.885629817180539e-06, "loss": 0.3356, "step": 33807 }, { "epoch": 1.5514662016428802, "grad_norm": 0.5050371289253235, "learning_rate": 4.8853846935632465e-06, "loss": 0.402, "step": 33808 }, { "epoch": 1.5515120921481347, "grad_norm": 0.4535248279571533, "learning_rate": 4.885139570221568e-06, "loss": 0.3169, "step": 33809 }, { "epoch": 1.5515579826533892, "grad_norm": 0.44346967339515686, "learning_rate": 4.884894447156091e-06, "loss": 0.3021, "step": 33810 }, { "epoch": 1.5516038731586435, "grad_norm": 0.49705013632774353, "learning_rate": 4.884649324367409e-06, "loss": 0.3744, "step": 33811 }, { "epoch": 1.551649763663898, "grad_norm": 0.5165277719497681, "learning_rate": 4.8844042018561086e-06, "loss": 0.4018, "step": 33812 }, { "epoch": 1.5516956541691524, "grad_norm": 0.4656561315059662, "learning_rate": 4.884159079622778e-06, "loss": 0.3681, "step": 33813 }, { "epoch": 1.5517415446744067, "grad_norm": 0.4630248546600342, "learning_rate": 4.883913957668008e-06, "loss": 0.3793, "step": 33814 }, { "epoch": 1.5517874351796612, "grad_norm": 0.47022202610969543, "learning_rate": 4.883668835992391e-06, "loss": 0.3635, "step": 33815 }, { "epoch": 1.5518333256849157, "grad_norm": 0.4275858402252197, "learning_rate": 4.883423714596511e-06, "loss": 0.2863, "step": 33816 }, { "epoch": 1.5518792161901702, "grad_norm": 0.4666968286037445, "learning_rate": 4.883178593480962e-06, "loss": 0.3365, "step": 33817 }, { "epoch": 1.5519251066954247, "grad_norm": 0.4844528138637543, "learning_rate": 4.882933472646332e-06, "loss": 0.3906, "step": 33818 }, { "epoch": 1.5519709972006792, "grad_norm": 0.4875427782535553, "learning_rate": 4.882688352093206e-06, "loss": 0.382, "step": 33819 }, { "epoch": 1.5520168877059337, "grad_norm": 0.4495276212692261, "learning_rate": 4.882443231822181e-06, "loss": 0.3041, "step": 33820 }, { "epoch": 1.5520627782111882, "grad_norm": 0.48284369707107544, "learning_rate": 4.882198111833843e-06, "loss": 0.3244, "step": 33821 }, { "epoch": 1.5521086687164427, "grad_norm": 0.4668222963809967, "learning_rate": 4.88195299212878e-06, "loss": 0.3972, "step": 33822 }, { "epoch": 1.552154559221697, "grad_norm": 0.4829729199409485, "learning_rate": 4.881707872707584e-06, "loss": 0.3964, "step": 33823 }, { "epoch": 1.5522004497269515, "grad_norm": 0.46594879031181335, "learning_rate": 4.881462753570843e-06, "loss": 0.3513, "step": 33824 }, { "epoch": 1.552246340232206, "grad_norm": 0.46571871638298035, "learning_rate": 4.881217634719145e-06, "loss": 0.3806, "step": 33825 }, { "epoch": 1.5522922307374603, "grad_norm": 0.4769730269908905, "learning_rate": 4.880972516153083e-06, "loss": 0.3401, "step": 33826 }, { "epoch": 1.5523381212427148, "grad_norm": 0.44877541065216064, "learning_rate": 4.880727397873244e-06, "loss": 0.3344, "step": 33827 }, { "epoch": 1.5523840117479693, "grad_norm": 0.4549187123775482, "learning_rate": 4.880482279880218e-06, "loss": 0.3295, "step": 33828 }, { "epoch": 1.5524299022532237, "grad_norm": 0.5007297396659851, "learning_rate": 4.880237162174595e-06, "loss": 0.413, "step": 33829 }, { "epoch": 1.5524757927584782, "grad_norm": 0.545470654964447, "learning_rate": 4.879992044756964e-06, "loss": 0.4126, "step": 33830 }, { "epoch": 1.5525216832637327, "grad_norm": 0.4608706831932068, "learning_rate": 4.879746927627911e-06, "loss": 0.3789, "step": 33831 }, { "epoch": 1.5525675737689872, "grad_norm": 0.5089528560638428, "learning_rate": 4.879501810788032e-06, "loss": 0.3845, "step": 33832 }, { "epoch": 1.5526134642742417, "grad_norm": 0.5185964107513428, "learning_rate": 4.879256694237914e-06, "loss": 0.3725, "step": 33833 }, { "epoch": 1.5526593547794962, "grad_norm": 0.46582528948783875, "learning_rate": 4.8790115779781445e-06, "loss": 0.3253, "step": 33834 }, { "epoch": 1.5527052452847507, "grad_norm": 0.43996596336364746, "learning_rate": 4.878766462009311e-06, "loss": 0.312, "step": 33835 }, { "epoch": 1.552751135790005, "grad_norm": 0.44419464468955994, "learning_rate": 4.878521346332009e-06, "loss": 0.305, "step": 33836 }, { "epoch": 1.5527970262952595, "grad_norm": 0.448348730802536, "learning_rate": 4.878276230946824e-06, "loss": 0.3398, "step": 33837 }, { "epoch": 1.552842916800514, "grad_norm": 0.47825533151626587, "learning_rate": 4.878031115854346e-06, "loss": 0.4338, "step": 33838 }, { "epoch": 1.5528888073057683, "grad_norm": 0.5453977584838867, "learning_rate": 4.877786001055165e-06, "loss": 0.4315, "step": 33839 }, { "epoch": 1.5529346978110228, "grad_norm": 0.47256314754486084, "learning_rate": 4.877540886549871e-06, "loss": 0.3726, "step": 33840 }, { "epoch": 1.5529805883162773, "grad_norm": 0.4944014251232147, "learning_rate": 4.87729577233905e-06, "loss": 0.3931, "step": 33841 }, { "epoch": 1.5530264788215318, "grad_norm": 0.44876396656036377, "learning_rate": 4.877050658423296e-06, "loss": 0.3089, "step": 33842 }, { "epoch": 1.5530723693267863, "grad_norm": 0.47473201155662537, "learning_rate": 4.876805544803196e-06, "loss": 0.3499, "step": 33843 }, { "epoch": 1.5531182598320408, "grad_norm": 0.46753138303756714, "learning_rate": 4.8765604314793395e-06, "loss": 0.371, "step": 33844 }, { "epoch": 1.5531641503372953, "grad_norm": 0.46229952573776245, "learning_rate": 4.8763153184523165e-06, "loss": 0.3837, "step": 33845 }, { "epoch": 1.5532100408425498, "grad_norm": 0.4687906801700592, "learning_rate": 4.8760702057227165e-06, "loss": 0.3846, "step": 33846 }, { "epoch": 1.5532559313478043, "grad_norm": 0.49077412486076355, "learning_rate": 4.875825093291126e-06, "loss": 0.4523, "step": 33847 }, { "epoch": 1.5533018218530588, "grad_norm": 0.4721636474132538, "learning_rate": 4.87557998115814e-06, "loss": 0.3509, "step": 33848 }, { "epoch": 1.553347712358313, "grad_norm": 0.4530836343765259, "learning_rate": 4.875334869324344e-06, "loss": 0.3197, "step": 33849 }, { "epoch": 1.5533936028635675, "grad_norm": 0.4735397696495056, "learning_rate": 4.875089757790328e-06, "loss": 0.3672, "step": 33850 }, { "epoch": 1.553439493368822, "grad_norm": 0.45790648460388184, "learning_rate": 4.874844646556682e-06, "loss": 0.3592, "step": 33851 }, { "epoch": 1.5534853838740763, "grad_norm": 0.46510395407676697, "learning_rate": 4.874599535623995e-06, "loss": 0.3506, "step": 33852 }, { "epoch": 1.5535312743793308, "grad_norm": 0.4455546438694, "learning_rate": 4.874354424992855e-06, "loss": 0.2868, "step": 33853 }, { "epoch": 1.5535771648845853, "grad_norm": 0.4468923509120941, "learning_rate": 4.8741093146638555e-06, "loss": 0.304, "step": 33854 }, { "epoch": 1.5536230553898398, "grad_norm": 0.4555032551288605, "learning_rate": 4.873864204637583e-06, "loss": 0.3476, "step": 33855 }, { "epoch": 1.5536689458950943, "grad_norm": 0.509987473487854, "learning_rate": 4.873619094914626e-06, "loss": 0.4246, "step": 33856 }, { "epoch": 1.5537148364003488, "grad_norm": 0.5118496417999268, "learning_rate": 4.873373985495576e-06, "loss": 0.416, "step": 33857 }, { "epoch": 1.5537607269056033, "grad_norm": 0.5022135972976685, "learning_rate": 4.873128876381022e-06, "loss": 0.3652, "step": 33858 }, { "epoch": 1.5538066174108578, "grad_norm": 0.5145373344421387, "learning_rate": 4.872883767571551e-06, "loss": 0.3888, "step": 33859 }, { "epoch": 1.5538525079161123, "grad_norm": 0.4669910967350006, "learning_rate": 4.872638659067756e-06, "loss": 0.3428, "step": 33860 }, { "epoch": 1.5538983984213666, "grad_norm": 0.49525442719459534, "learning_rate": 4.872393550870226e-06, "loss": 0.3538, "step": 33861 }, { "epoch": 1.553944288926621, "grad_norm": 0.49172645807266235, "learning_rate": 4.872148442979546e-06, "loss": 0.3442, "step": 33862 }, { "epoch": 1.5539901794318756, "grad_norm": 0.45497870445251465, "learning_rate": 4.8719033353963104e-06, "loss": 0.3487, "step": 33863 }, { "epoch": 1.55403606993713, "grad_norm": 0.4599754214286804, "learning_rate": 4.871658228121108e-06, "loss": 0.3309, "step": 33864 }, { "epoch": 1.5540819604423843, "grad_norm": 0.4922204315662384, "learning_rate": 4.871413121154527e-06, "loss": 0.3742, "step": 33865 }, { "epoch": 1.5541278509476388, "grad_norm": 0.4618149995803833, "learning_rate": 4.871168014497154e-06, "loss": 0.3987, "step": 33866 }, { "epoch": 1.5541737414528933, "grad_norm": 0.47546014189720154, "learning_rate": 4.870922908149584e-06, "loss": 0.3534, "step": 33867 }, { "epoch": 1.5542196319581478, "grad_norm": 0.5193238854408264, "learning_rate": 4.870677802112404e-06, "loss": 0.368, "step": 33868 }, { "epoch": 1.5542655224634023, "grad_norm": 0.47295698523521423, "learning_rate": 4.8704326963862e-06, "loss": 0.3828, "step": 33869 }, { "epoch": 1.5543114129686568, "grad_norm": 0.4816264510154724, "learning_rate": 4.870187590971567e-06, "loss": 0.338, "step": 33870 }, { "epoch": 1.5543573034739113, "grad_norm": 0.49063390493392944, "learning_rate": 4.869942485869091e-06, "loss": 0.4119, "step": 33871 }, { "epoch": 1.5544031939791658, "grad_norm": 0.4739210307598114, "learning_rate": 4.869697381079363e-06, "loss": 0.3441, "step": 33872 }, { "epoch": 1.5544490844844203, "grad_norm": 0.43170270323753357, "learning_rate": 4.869452276602971e-06, "loss": 0.2702, "step": 33873 }, { "epoch": 1.5544949749896746, "grad_norm": 0.5101765990257263, "learning_rate": 4.869207172440506e-06, "loss": 0.4116, "step": 33874 }, { "epoch": 1.554540865494929, "grad_norm": 0.46140211820602417, "learning_rate": 4.868962068592554e-06, "loss": 0.3369, "step": 33875 }, { "epoch": 1.5545867560001836, "grad_norm": 0.47751447558403015, "learning_rate": 4.868716965059709e-06, "loss": 0.3614, "step": 33876 }, { "epoch": 1.5546326465054379, "grad_norm": 0.556000292301178, "learning_rate": 4.868471861842559e-06, "loss": 0.3779, "step": 33877 }, { "epoch": 1.5546785370106924, "grad_norm": 0.4481445550918579, "learning_rate": 4.868226758941691e-06, "loss": 0.36, "step": 33878 }, { "epoch": 1.5547244275159469, "grad_norm": 0.4806668162345886, "learning_rate": 4.867981656357696e-06, "loss": 0.3993, "step": 33879 }, { "epoch": 1.5547703180212014, "grad_norm": 0.4933127164840698, "learning_rate": 4.867736554091165e-06, "loss": 0.437, "step": 33880 }, { "epoch": 1.5548162085264559, "grad_norm": 0.514406681060791, "learning_rate": 4.867491452142684e-06, "loss": 0.4964, "step": 33881 }, { "epoch": 1.5548620990317104, "grad_norm": 0.4856951832771301, "learning_rate": 4.867246350512846e-06, "loss": 0.3641, "step": 33882 }, { "epoch": 1.5549079895369649, "grad_norm": 0.4681156873703003, "learning_rate": 4.867001249202238e-06, "loss": 0.3538, "step": 33883 }, { "epoch": 1.5549538800422193, "grad_norm": 0.49690771102905273, "learning_rate": 4.8667561482114485e-06, "loss": 0.3906, "step": 33884 }, { "epoch": 1.5549997705474738, "grad_norm": 0.4789225459098816, "learning_rate": 4.86651104754107e-06, "loss": 0.3344, "step": 33885 }, { "epoch": 1.5550456610527283, "grad_norm": 0.44636818766593933, "learning_rate": 4.86626594719169e-06, "loss": 0.3151, "step": 33886 }, { "epoch": 1.5550915515579826, "grad_norm": 0.47394487261772156, "learning_rate": 4.866020847163898e-06, "loss": 0.336, "step": 33887 }, { "epoch": 1.5551374420632371, "grad_norm": 0.4233240485191345, "learning_rate": 4.865775747458284e-06, "loss": 0.3012, "step": 33888 }, { "epoch": 1.5551833325684916, "grad_norm": 0.4341496229171753, "learning_rate": 4.865530648075437e-06, "loss": 0.3237, "step": 33889 }, { "epoch": 1.555229223073746, "grad_norm": 0.46069103479385376, "learning_rate": 4.865285549015944e-06, "loss": 0.3352, "step": 33890 }, { "epoch": 1.5552751135790004, "grad_norm": 0.46911177039146423, "learning_rate": 4.8650404502804e-06, "loss": 0.365, "step": 33891 }, { "epoch": 1.5553210040842549, "grad_norm": 0.4479244649410248, "learning_rate": 4.8647953518693906e-06, "loss": 0.3249, "step": 33892 }, { "epoch": 1.5553668945895094, "grad_norm": 0.4458649158477783, "learning_rate": 4.8645502537835045e-06, "loss": 0.2635, "step": 33893 }, { "epoch": 1.5554127850947639, "grad_norm": 0.4572649300098419, "learning_rate": 4.864305156023333e-06, "loss": 0.3201, "step": 33894 }, { "epoch": 1.5554586756000184, "grad_norm": 0.552188515663147, "learning_rate": 4.864060058589465e-06, "loss": 0.4649, "step": 33895 }, { "epoch": 1.5555045661052729, "grad_norm": 0.4331375062465668, "learning_rate": 4.863814961482487e-06, "loss": 0.2927, "step": 33896 }, { "epoch": 1.5555504566105274, "grad_norm": 0.507425844669342, "learning_rate": 4.863569864702994e-06, "loss": 0.4455, "step": 33897 }, { "epoch": 1.5555963471157819, "grad_norm": 0.4967997968196869, "learning_rate": 4.863324768251573e-06, "loss": 0.4153, "step": 33898 }, { "epoch": 1.5556422376210364, "grad_norm": 0.4184669256210327, "learning_rate": 4.8630796721288126e-06, "loss": 0.2793, "step": 33899 }, { "epoch": 1.5556881281262906, "grad_norm": 0.48595064878463745, "learning_rate": 4.8628345763353e-06, "loss": 0.3584, "step": 33900 }, { "epoch": 1.5557340186315451, "grad_norm": 0.4529782235622406, "learning_rate": 4.862589480871628e-06, "loss": 0.3472, "step": 33901 }, { "epoch": 1.5557799091367996, "grad_norm": 0.6260877251625061, "learning_rate": 4.862344385738386e-06, "loss": 0.3535, "step": 33902 }, { "epoch": 1.555825799642054, "grad_norm": 0.5443751215934753, "learning_rate": 4.8620992909361605e-06, "loss": 0.3639, "step": 33903 }, { "epoch": 1.5558716901473084, "grad_norm": 0.4476192891597748, "learning_rate": 4.861854196465545e-06, "loss": 0.3083, "step": 33904 }, { "epoch": 1.555917580652563, "grad_norm": 0.5101179480552673, "learning_rate": 4.861609102327125e-06, "loss": 0.3973, "step": 33905 }, { "epoch": 1.5559634711578174, "grad_norm": 0.456595242023468, "learning_rate": 4.86136400852149e-06, "loss": 0.3082, "step": 33906 }, { "epoch": 1.556009361663072, "grad_norm": 0.47844791412353516, "learning_rate": 4.861118915049233e-06, "loss": 0.3799, "step": 33907 }, { "epoch": 1.5560552521683264, "grad_norm": 0.5073503255844116, "learning_rate": 4.860873821910942e-06, "loss": 0.4056, "step": 33908 }, { "epoch": 1.556101142673581, "grad_norm": 0.47735118865966797, "learning_rate": 4.860628729107203e-06, "loss": 0.3456, "step": 33909 }, { "epoch": 1.5561470331788354, "grad_norm": 0.47785258293151855, "learning_rate": 4.860383636638609e-06, "loss": 0.3569, "step": 33910 }, { "epoch": 1.55619292368409, "grad_norm": 0.4961502254009247, "learning_rate": 4.860138544505749e-06, "loss": 0.3668, "step": 33911 }, { "epoch": 1.5562388141893442, "grad_norm": 0.5004842877388, "learning_rate": 4.85989345270921e-06, "loss": 0.4564, "step": 33912 }, { "epoch": 1.5562847046945987, "grad_norm": 0.458984911441803, "learning_rate": 4.859648361249585e-06, "loss": 0.3555, "step": 33913 }, { "epoch": 1.5563305951998532, "grad_norm": 0.49291127920150757, "learning_rate": 4.8594032701274605e-06, "loss": 0.4534, "step": 33914 }, { "epoch": 1.5563764857051074, "grad_norm": 0.4456261098384857, "learning_rate": 4.859158179343425e-06, "loss": 0.3136, "step": 33915 }, { "epoch": 1.556422376210362, "grad_norm": 0.49355629086494446, "learning_rate": 4.858913088898072e-06, "loss": 0.3645, "step": 33916 }, { "epoch": 1.5564682667156164, "grad_norm": 0.45331108570098877, "learning_rate": 4.8586679987919884e-06, "loss": 0.358, "step": 33917 }, { "epoch": 1.556514157220871, "grad_norm": 0.48834094405174255, "learning_rate": 4.858422909025762e-06, "loss": 0.3444, "step": 33918 }, { "epoch": 1.5565600477261254, "grad_norm": 0.49738866090774536, "learning_rate": 4.858177819599985e-06, "loss": 0.385, "step": 33919 }, { "epoch": 1.55660593823138, "grad_norm": 0.46537724137306213, "learning_rate": 4.857932730515246e-06, "loss": 0.3607, "step": 33920 }, { "epoch": 1.5566518287366344, "grad_norm": 0.4307282269001007, "learning_rate": 4.857687641772133e-06, "loss": 0.2924, "step": 33921 }, { "epoch": 1.556697719241889, "grad_norm": 0.487947553396225, "learning_rate": 4.8574425533712376e-06, "loss": 0.3758, "step": 33922 }, { "epoch": 1.5567436097471434, "grad_norm": 0.4825795292854309, "learning_rate": 4.857197465313147e-06, "loss": 0.4028, "step": 33923 }, { "epoch": 1.556789500252398, "grad_norm": 0.47242680191993713, "learning_rate": 4.856952377598451e-06, "loss": 0.3635, "step": 33924 }, { "epoch": 1.5568353907576522, "grad_norm": 0.4348750114440918, "learning_rate": 4.8567072902277405e-06, "loss": 0.3598, "step": 33925 }, { "epoch": 1.5568812812629067, "grad_norm": 0.44803258776664734, "learning_rate": 4.856462203201603e-06, "loss": 0.3264, "step": 33926 }, { "epoch": 1.5569271717681612, "grad_norm": 0.41285648941993713, "learning_rate": 4.856217116520627e-06, "loss": 0.2523, "step": 33927 }, { "epoch": 1.5569730622734155, "grad_norm": 0.4857197701931, "learning_rate": 4.8559720301854055e-06, "loss": 0.3668, "step": 33928 }, { "epoch": 1.55701895277867, "grad_norm": 0.44854727387428284, "learning_rate": 4.855726944196526e-06, "loss": 0.347, "step": 33929 }, { "epoch": 1.5570648432839245, "grad_norm": 0.4953623414039612, "learning_rate": 4.855481858554578e-06, "loss": 0.3463, "step": 33930 }, { "epoch": 1.557110733789179, "grad_norm": 0.46095573902130127, "learning_rate": 4.855236773260149e-06, "loss": 0.3247, "step": 33931 }, { "epoch": 1.5571566242944335, "grad_norm": 0.45630112290382385, "learning_rate": 4.854991688313831e-06, "loss": 0.3528, "step": 33932 }, { "epoch": 1.557202514799688, "grad_norm": 0.48347222805023193, "learning_rate": 4.854746603716212e-06, "loss": 0.3713, "step": 33933 }, { "epoch": 1.5572484053049425, "grad_norm": 0.4526872932910919, "learning_rate": 4.85450151946788e-06, "loss": 0.2854, "step": 33934 }, { "epoch": 1.557294295810197, "grad_norm": 0.44880005717277527, "learning_rate": 4.854256435569428e-06, "loss": 0.3217, "step": 33935 }, { "epoch": 1.5573401863154515, "grad_norm": 0.4592137336730957, "learning_rate": 4.854011352021443e-06, "loss": 0.3149, "step": 33936 }, { "epoch": 1.557386076820706, "grad_norm": 0.4712860584259033, "learning_rate": 4.853766268824513e-06, "loss": 0.3554, "step": 33937 }, { "epoch": 1.5574319673259602, "grad_norm": 0.47146177291870117, "learning_rate": 4.8535211859792305e-06, "loss": 0.355, "step": 33938 }, { "epoch": 1.5574778578312147, "grad_norm": 0.44241660833358765, "learning_rate": 4.853276103486183e-06, "loss": 0.347, "step": 33939 }, { "epoch": 1.5575237483364692, "grad_norm": 0.4568447172641754, "learning_rate": 4.8530310213459585e-06, "loss": 0.3133, "step": 33940 }, { "epoch": 1.5575696388417235, "grad_norm": 0.42443621158599854, "learning_rate": 4.8527859395591504e-06, "loss": 0.2782, "step": 33941 }, { "epoch": 1.557615529346978, "grad_norm": 0.515719473361969, "learning_rate": 4.852540858126345e-06, "loss": 0.4186, "step": 33942 }, { "epoch": 1.5576614198522325, "grad_norm": 0.48515060544013977, "learning_rate": 4.852295777048131e-06, "loss": 0.3629, "step": 33943 }, { "epoch": 1.557707310357487, "grad_norm": 0.4753115773200989, "learning_rate": 4.852050696325101e-06, "loss": 0.3363, "step": 33944 }, { "epoch": 1.5577532008627415, "grad_norm": 0.49088215827941895, "learning_rate": 4.851805615957842e-06, "loss": 0.3581, "step": 33945 }, { "epoch": 1.557799091367996, "grad_norm": 0.46973127126693726, "learning_rate": 4.851560535946943e-06, "loss": 0.3565, "step": 33946 }, { "epoch": 1.5578449818732505, "grad_norm": 0.4709365963935852, "learning_rate": 4.851315456292995e-06, "loss": 0.3577, "step": 33947 }, { "epoch": 1.557890872378505, "grad_norm": 0.4910786747932434, "learning_rate": 4.851070376996586e-06, "loss": 0.3675, "step": 33948 }, { "epoch": 1.5579367628837595, "grad_norm": 0.47230836749076843, "learning_rate": 4.850825298058304e-06, "loss": 0.3772, "step": 33949 }, { "epoch": 1.5579826533890138, "grad_norm": 0.4961956739425659, "learning_rate": 4.850580219478742e-06, "loss": 0.4197, "step": 33950 }, { "epoch": 1.5580285438942683, "grad_norm": 0.5157335996627808, "learning_rate": 4.850335141258488e-06, "loss": 0.4177, "step": 33951 }, { "epoch": 1.5580744343995228, "grad_norm": 0.4636906385421753, "learning_rate": 4.850090063398129e-06, "loss": 0.3721, "step": 33952 }, { "epoch": 1.5581203249047773, "grad_norm": 0.4237014949321747, "learning_rate": 4.849844985898257e-06, "loss": 0.2916, "step": 33953 }, { "epoch": 1.5581662154100315, "grad_norm": 0.45102742314338684, "learning_rate": 4.84959990875946e-06, "loss": 0.3904, "step": 33954 }, { "epoch": 1.558212105915286, "grad_norm": 0.4883011281490326, "learning_rate": 4.849354831982327e-06, "loss": 0.3798, "step": 33955 }, { "epoch": 1.5582579964205405, "grad_norm": 0.4765281081199646, "learning_rate": 4.849109755567449e-06, "loss": 0.3675, "step": 33956 }, { "epoch": 1.558303886925795, "grad_norm": 0.47588610649108887, "learning_rate": 4.848864679515415e-06, "loss": 0.3783, "step": 33957 }, { "epoch": 1.5583497774310495, "grad_norm": 0.475922167301178, "learning_rate": 4.848619603826813e-06, "loss": 0.3743, "step": 33958 }, { "epoch": 1.558395667936304, "grad_norm": 0.4717026948928833, "learning_rate": 4.848374528502234e-06, "loss": 0.3588, "step": 33959 }, { "epoch": 1.5584415584415585, "grad_norm": 0.4554486572742462, "learning_rate": 4.848129453542266e-06, "loss": 0.3006, "step": 33960 }, { "epoch": 1.558487448946813, "grad_norm": 0.474334180355072, "learning_rate": 4.847884378947497e-06, "loss": 0.3537, "step": 33961 }, { "epoch": 1.5585333394520675, "grad_norm": 0.5182164907455444, "learning_rate": 4.84763930471852e-06, "loss": 0.429, "step": 33962 }, { "epoch": 1.5585792299573218, "grad_norm": 0.5130590796470642, "learning_rate": 4.847394230855922e-06, "loss": 0.426, "step": 33963 }, { "epoch": 1.5586251204625763, "grad_norm": 0.4523591995239258, "learning_rate": 4.847149157360293e-06, "loss": 0.3373, "step": 33964 }, { "epoch": 1.5586710109678308, "grad_norm": 0.49015969038009644, "learning_rate": 4.846904084232221e-06, "loss": 0.3784, "step": 33965 }, { "epoch": 1.558716901473085, "grad_norm": 0.4569879174232483, "learning_rate": 4.846659011472297e-06, "loss": 0.3484, "step": 33966 }, { "epoch": 1.5587627919783396, "grad_norm": 0.5073875784873962, "learning_rate": 4.846413939081111e-06, "loss": 0.4182, "step": 33967 }, { "epoch": 1.558808682483594, "grad_norm": 0.4717338979244232, "learning_rate": 4.846168867059249e-06, "loss": 0.3355, "step": 33968 }, { "epoch": 1.5588545729888486, "grad_norm": 0.4369087815284729, "learning_rate": 4.845923795407303e-06, "loss": 0.3232, "step": 33969 }, { "epoch": 1.558900463494103, "grad_norm": 0.4897747039794922, "learning_rate": 4.845678724125863e-06, "loss": 0.366, "step": 33970 }, { "epoch": 1.5589463539993575, "grad_norm": 0.43615251779556274, "learning_rate": 4.845433653215513e-06, "loss": 0.2948, "step": 33971 }, { "epoch": 1.558992244504612, "grad_norm": 0.4515037536621094, "learning_rate": 4.84518858267685e-06, "loss": 0.3914, "step": 33972 }, { "epoch": 1.5590381350098665, "grad_norm": 0.48962491750717163, "learning_rate": 4.844943512510458e-06, "loss": 0.4044, "step": 33973 }, { "epoch": 1.559084025515121, "grad_norm": 0.4202832579612732, "learning_rate": 4.844698442716928e-06, "loss": 0.3008, "step": 33974 }, { "epoch": 1.5591299160203755, "grad_norm": 0.4623993933200836, "learning_rate": 4.8444533732968504e-06, "loss": 0.3411, "step": 33975 }, { "epoch": 1.5591758065256298, "grad_norm": 0.48730501532554626, "learning_rate": 4.844208304250813e-06, "loss": 0.362, "step": 33976 }, { "epoch": 1.5592216970308843, "grad_norm": 0.4625155031681061, "learning_rate": 4.843963235579403e-06, "loss": 0.3407, "step": 33977 }, { "epoch": 1.5592675875361388, "grad_norm": 0.4783336818218231, "learning_rate": 4.843718167283215e-06, "loss": 0.3721, "step": 33978 }, { "epoch": 1.559313478041393, "grad_norm": 0.4403071403503418, "learning_rate": 4.843473099362835e-06, "loss": 0.2825, "step": 33979 }, { "epoch": 1.5593593685466476, "grad_norm": 0.45905882120132446, "learning_rate": 4.8432280318188515e-06, "loss": 0.3395, "step": 33980 }, { "epoch": 1.559405259051902, "grad_norm": 0.5128629207611084, "learning_rate": 4.842982964651858e-06, "loss": 0.4177, "step": 33981 }, { "epoch": 1.5594511495571566, "grad_norm": 0.49125218391418457, "learning_rate": 4.842737897862439e-06, "loss": 0.3845, "step": 33982 }, { "epoch": 1.559497040062411, "grad_norm": 0.5071790218353271, "learning_rate": 4.842492831451184e-06, "loss": 0.4003, "step": 33983 }, { "epoch": 1.5595429305676656, "grad_norm": 0.48845842480659485, "learning_rate": 4.842247765418686e-06, "loss": 0.3466, "step": 33984 }, { "epoch": 1.55958882107292, "grad_norm": 0.4492848813533783, "learning_rate": 4.842002699765532e-06, "loss": 0.3156, "step": 33985 }, { "epoch": 1.5596347115781746, "grad_norm": 0.4443824887275696, "learning_rate": 4.841757634492312e-06, "loss": 0.3005, "step": 33986 }, { "epoch": 1.559680602083429, "grad_norm": 0.4452267587184906, "learning_rate": 4.841512569599615e-06, "loss": 0.3316, "step": 33987 }, { "epoch": 1.5597264925886836, "grad_norm": 0.49753159284591675, "learning_rate": 4.84126750508803e-06, "loss": 0.4079, "step": 33988 }, { "epoch": 1.5597723830939378, "grad_norm": 0.46810078620910645, "learning_rate": 4.841022440958145e-06, "loss": 0.3619, "step": 33989 }, { "epoch": 1.5598182735991923, "grad_norm": 0.4785688519477844, "learning_rate": 4.840777377210554e-06, "loss": 0.3596, "step": 33990 }, { "epoch": 1.5598641641044468, "grad_norm": 0.5039102435112, "learning_rate": 4.8405323138458415e-06, "loss": 0.3872, "step": 33991 }, { "epoch": 1.5599100546097011, "grad_norm": 0.469425767660141, "learning_rate": 4.840287250864597e-06, "loss": 0.3478, "step": 33992 }, { "epoch": 1.5599559451149556, "grad_norm": 0.4289983808994293, "learning_rate": 4.840042188267413e-06, "loss": 0.287, "step": 33993 }, { "epoch": 1.56000183562021, "grad_norm": 0.4520646929740906, "learning_rate": 4.839797126054877e-06, "loss": 0.3216, "step": 33994 }, { "epoch": 1.5600477261254646, "grad_norm": 0.4336785674095154, "learning_rate": 4.839552064227579e-06, "loss": 0.3121, "step": 33995 }, { "epoch": 1.560093616630719, "grad_norm": 0.4593063294887543, "learning_rate": 4.839307002786106e-06, "loss": 0.3403, "step": 33996 }, { "epoch": 1.5601395071359736, "grad_norm": 0.5014958381652832, "learning_rate": 4.83906194173105e-06, "loss": 0.3729, "step": 33997 }, { "epoch": 1.560185397641228, "grad_norm": 0.48236924409866333, "learning_rate": 4.838816881063e-06, "loss": 0.4369, "step": 33998 }, { "epoch": 1.5602312881464826, "grad_norm": 0.44015175104141235, "learning_rate": 4.838571820782542e-06, "loss": 0.3018, "step": 33999 }, { "epoch": 1.560277178651737, "grad_norm": 0.4763874411582947, "learning_rate": 4.838326760890269e-06, "loss": 0.3874, "step": 34000 }, { "epoch": 1.5603230691569914, "grad_norm": 0.47120100259780884, "learning_rate": 4.83808170138677e-06, "loss": 0.3173, "step": 34001 }, { "epoch": 1.5603689596622459, "grad_norm": 0.4392794668674469, "learning_rate": 4.837836642272632e-06, "loss": 0.2955, "step": 34002 }, { "epoch": 1.5604148501675004, "grad_norm": 0.4662547707557678, "learning_rate": 4.837591583548447e-06, "loss": 0.3381, "step": 34003 }, { "epoch": 1.5604607406727546, "grad_norm": 0.48285943269729614, "learning_rate": 4.837346525214804e-06, "loss": 0.3631, "step": 34004 }, { "epoch": 1.5605066311780091, "grad_norm": 0.46197715401649475, "learning_rate": 4.837101467272287e-06, "loss": 0.3778, "step": 34005 }, { "epoch": 1.5605525216832636, "grad_norm": 0.4374086558818817, "learning_rate": 4.836856409721493e-06, "loss": 0.3063, "step": 34006 }, { "epoch": 1.5605984121885181, "grad_norm": 0.4273760914802551, "learning_rate": 4.836611352563008e-06, "loss": 0.2891, "step": 34007 }, { "epoch": 1.5606443026937726, "grad_norm": 0.45668232440948486, "learning_rate": 4.836366295797419e-06, "loss": 0.3816, "step": 34008 }, { "epoch": 1.5606901931990271, "grad_norm": 0.4518336057662964, "learning_rate": 4.83612123942532e-06, "loss": 0.3523, "step": 34009 }, { "epoch": 1.5607360837042816, "grad_norm": 0.48512694239616394, "learning_rate": 4.835876183447296e-06, "loss": 0.395, "step": 34010 }, { "epoch": 1.5607819742095361, "grad_norm": 0.43445539474487305, "learning_rate": 4.835631127863937e-06, "loss": 0.3218, "step": 34011 }, { "epoch": 1.5608278647147906, "grad_norm": 0.4571727216243744, "learning_rate": 4.835386072675836e-06, "loss": 0.3799, "step": 34012 }, { "epoch": 1.5608737552200451, "grad_norm": 0.4810318350791931, "learning_rate": 4.835141017883579e-06, "loss": 0.3923, "step": 34013 }, { "epoch": 1.5609196457252994, "grad_norm": 0.4938943386077881, "learning_rate": 4.834895963487754e-06, "loss": 0.3989, "step": 34014 }, { "epoch": 1.560965536230554, "grad_norm": 0.4688873291015625, "learning_rate": 4.8346509094889525e-06, "loss": 0.3548, "step": 34015 }, { "epoch": 1.5610114267358084, "grad_norm": 0.498010516166687, "learning_rate": 4.834405855887765e-06, "loss": 0.3907, "step": 34016 }, { "epoch": 1.5610573172410627, "grad_norm": 0.44251519441604614, "learning_rate": 4.834160802684777e-06, "loss": 0.3361, "step": 34017 }, { "epoch": 1.5611032077463172, "grad_norm": 0.5103425979614258, "learning_rate": 4.833915749880582e-06, "loss": 0.4208, "step": 34018 }, { "epoch": 1.5611490982515717, "grad_norm": 0.45707976818084717, "learning_rate": 4.833670697475767e-06, "loss": 0.3433, "step": 34019 }, { "epoch": 1.5611949887568262, "grad_norm": 0.4793717861175537, "learning_rate": 4.833425645470919e-06, "loss": 0.3376, "step": 34020 }, { "epoch": 1.5612408792620807, "grad_norm": 0.4782590866088867, "learning_rate": 4.8331805938666316e-06, "loss": 0.3822, "step": 34021 }, { "epoch": 1.5612867697673352, "grad_norm": 0.42645901441574097, "learning_rate": 4.832935542663493e-06, "loss": 0.2685, "step": 34022 }, { "epoch": 1.5613326602725897, "grad_norm": 0.5145801305770874, "learning_rate": 4.83269049186209e-06, "loss": 0.3422, "step": 34023 }, { "epoch": 1.5613785507778442, "grad_norm": 0.47988617420196533, "learning_rate": 4.832445441463014e-06, "loss": 0.3738, "step": 34024 }, { "epoch": 1.5614244412830987, "grad_norm": 0.4541892111301422, "learning_rate": 4.832200391466855e-06, "loss": 0.3607, "step": 34025 }, { "epoch": 1.5614703317883531, "grad_norm": 0.48689472675323486, "learning_rate": 4.831955341874199e-06, "loss": 0.375, "step": 34026 }, { "epoch": 1.5615162222936074, "grad_norm": 0.5040265321731567, "learning_rate": 4.831710292685639e-06, "loss": 0.422, "step": 34027 }, { "epoch": 1.561562112798862, "grad_norm": 0.4834090769290924, "learning_rate": 4.831465243901763e-06, "loss": 0.3658, "step": 34028 }, { "epoch": 1.5616080033041164, "grad_norm": 0.4912855923175812, "learning_rate": 4.83122019552316e-06, "loss": 0.3417, "step": 34029 }, { "epoch": 1.5616538938093707, "grad_norm": 0.45810794830322266, "learning_rate": 4.830975147550417e-06, "loss": 0.3398, "step": 34030 }, { "epoch": 1.5616997843146252, "grad_norm": 0.47721803188323975, "learning_rate": 4.830730099984128e-06, "loss": 0.3506, "step": 34031 }, { "epoch": 1.5617456748198797, "grad_norm": 0.48147934675216675, "learning_rate": 4.830485052824879e-06, "loss": 0.4135, "step": 34032 }, { "epoch": 1.5617915653251342, "grad_norm": 0.45746269822120667, "learning_rate": 4.830240006073258e-06, "loss": 0.3532, "step": 34033 }, { "epoch": 1.5618374558303887, "grad_norm": 0.47519248723983765, "learning_rate": 4.829994959729858e-06, "loss": 0.3568, "step": 34034 }, { "epoch": 1.5618833463356432, "grad_norm": 0.4243800640106201, "learning_rate": 4.829749913795267e-06, "loss": 0.2965, "step": 34035 }, { "epoch": 1.5619292368408977, "grad_norm": 0.4840381443500519, "learning_rate": 4.829504868270071e-06, "loss": 0.3805, "step": 34036 }, { "epoch": 1.5619751273461522, "grad_norm": 0.4488910436630249, "learning_rate": 4.829259823154864e-06, "loss": 0.3313, "step": 34037 }, { "epoch": 1.5620210178514067, "grad_norm": 0.46201086044311523, "learning_rate": 4.829014778450234e-06, "loss": 0.3824, "step": 34038 }, { "epoch": 1.562066908356661, "grad_norm": 0.4562823474407196, "learning_rate": 4.828769734156768e-06, "loss": 0.3247, "step": 34039 }, { "epoch": 1.5621127988619155, "grad_norm": 0.4547005295753479, "learning_rate": 4.828524690275058e-06, "loss": 0.3168, "step": 34040 }, { "epoch": 1.56215868936717, "grad_norm": 0.47842517495155334, "learning_rate": 4.828279646805691e-06, "loss": 0.3479, "step": 34041 }, { "epoch": 1.5622045798724244, "grad_norm": 0.4355309009552002, "learning_rate": 4.828034603749256e-06, "loss": 0.2759, "step": 34042 }, { "epoch": 1.5622504703776787, "grad_norm": 0.49969831109046936, "learning_rate": 4.827789561106346e-06, "loss": 0.4011, "step": 34043 }, { "epoch": 1.5622963608829332, "grad_norm": 0.4737112820148468, "learning_rate": 4.827544518877547e-06, "loss": 0.3617, "step": 34044 }, { "epoch": 1.5623422513881877, "grad_norm": 0.45174118876457214, "learning_rate": 4.827299477063449e-06, "loss": 0.3112, "step": 34045 }, { "epoch": 1.5623881418934422, "grad_norm": 0.4617692828178406, "learning_rate": 4.827054435664641e-06, "loss": 0.3662, "step": 34046 }, { "epoch": 1.5624340323986967, "grad_norm": 0.4640926122665405, "learning_rate": 4.8268093946817125e-06, "loss": 0.3432, "step": 34047 }, { "epoch": 1.5624799229039512, "grad_norm": 0.45087364315986633, "learning_rate": 4.826564354115251e-06, "loss": 0.3364, "step": 34048 }, { "epoch": 1.5625258134092057, "grad_norm": 0.4648876190185547, "learning_rate": 4.8263193139658496e-06, "loss": 0.3828, "step": 34049 }, { "epoch": 1.5625717039144602, "grad_norm": 0.4506319463253021, "learning_rate": 4.826074274234096e-06, "loss": 0.3398, "step": 34050 }, { "epoch": 1.5626175944197147, "grad_norm": 0.4686102271080017, "learning_rate": 4.825829234920578e-06, "loss": 0.3428, "step": 34051 }, { "epoch": 1.562663484924969, "grad_norm": 0.5071650147438049, "learning_rate": 4.825584196025886e-06, "loss": 0.3691, "step": 34052 }, { "epoch": 1.5627093754302235, "grad_norm": 0.4780280888080597, "learning_rate": 4.8253391575506085e-06, "loss": 0.3536, "step": 34053 }, { "epoch": 1.562755265935478, "grad_norm": 0.41944676637649536, "learning_rate": 4.825094119495334e-06, "loss": 0.2997, "step": 34054 }, { "epoch": 1.5628011564407323, "grad_norm": 0.46455785632133484, "learning_rate": 4.8248490818606545e-06, "loss": 0.3131, "step": 34055 }, { "epoch": 1.5628470469459868, "grad_norm": 0.4990754723548889, "learning_rate": 4.82460404464716e-06, "loss": 0.3579, "step": 34056 }, { "epoch": 1.5628929374512412, "grad_norm": 0.4536486864089966, "learning_rate": 4.824359007855433e-06, "loss": 0.3371, "step": 34057 }, { "epoch": 1.5629388279564957, "grad_norm": 0.4606002867221832, "learning_rate": 4.824113971486069e-06, "loss": 0.3218, "step": 34058 }, { "epoch": 1.5629847184617502, "grad_norm": 0.503911554813385, "learning_rate": 4.823868935539656e-06, "loss": 0.4206, "step": 34059 }, { "epoch": 1.5630306089670047, "grad_norm": 0.4997771084308624, "learning_rate": 4.823623900016783e-06, "loss": 0.412, "step": 34060 }, { "epoch": 1.5630764994722592, "grad_norm": 0.48437240719795227, "learning_rate": 4.823378864918037e-06, "loss": 0.4098, "step": 34061 }, { "epoch": 1.5631223899775137, "grad_norm": 0.5180996060371399, "learning_rate": 4.8231338302440104e-06, "loss": 0.4011, "step": 34062 }, { "epoch": 1.5631682804827682, "grad_norm": 0.47583699226379395, "learning_rate": 4.822888795995291e-06, "loss": 0.3855, "step": 34063 }, { "epoch": 1.5632141709880227, "grad_norm": 0.4277336001396179, "learning_rate": 4.822643762172466e-06, "loss": 0.2978, "step": 34064 }, { "epoch": 1.563260061493277, "grad_norm": 0.48672977089881897, "learning_rate": 4.82239872877613e-06, "loss": 0.3907, "step": 34065 }, { "epoch": 1.5633059519985315, "grad_norm": 0.4795621633529663, "learning_rate": 4.822153695806868e-06, "loss": 0.3968, "step": 34066 }, { "epoch": 1.563351842503786, "grad_norm": 0.3996714949607849, "learning_rate": 4.821908663265269e-06, "loss": 0.2446, "step": 34067 }, { "epoch": 1.5633977330090403, "grad_norm": 0.4766741991043091, "learning_rate": 4.821663631151925e-06, "loss": 0.3754, "step": 34068 }, { "epoch": 1.5634436235142948, "grad_norm": 0.4672122597694397, "learning_rate": 4.821418599467424e-06, "loss": 0.3716, "step": 34069 }, { "epoch": 1.5634895140195493, "grad_norm": 0.4888591468334198, "learning_rate": 4.8211735682123515e-06, "loss": 0.3927, "step": 34070 }, { "epoch": 1.5635354045248038, "grad_norm": 0.5323549509048462, "learning_rate": 4.820928537387304e-06, "loss": 0.4709, "step": 34071 }, { "epoch": 1.5635812950300583, "grad_norm": 0.4848652184009552, "learning_rate": 4.8206835069928644e-06, "loss": 0.386, "step": 34072 }, { "epoch": 1.5636271855353128, "grad_norm": 0.43395841121673584, "learning_rate": 4.820438477029625e-06, "loss": 0.2939, "step": 34073 }, { "epoch": 1.5636730760405673, "grad_norm": 0.42950427532196045, "learning_rate": 4.820193447498175e-06, "loss": 0.2506, "step": 34074 }, { "epoch": 1.5637189665458218, "grad_norm": 0.4448913335800171, "learning_rate": 4.8199484183991044e-06, "loss": 0.3698, "step": 34075 }, { "epoch": 1.5637648570510763, "grad_norm": 0.4335491359233856, "learning_rate": 4.819703389732997e-06, "loss": 0.3255, "step": 34076 }, { "epoch": 1.5638107475563308, "grad_norm": 0.4486835300922394, "learning_rate": 4.819458361500449e-06, "loss": 0.3066, "step": 34077 }, { "epoch": 1.563856638061585, "grad_norm": 0.4256046712398529, "learning_rate": 4.819213333702048e-06, "loss": 0.295, "step": 34078 }, { "epoch": 1.5639025285668395, "grad_norm": 0.46987104415893555, "learning_rate": 4.818968306338378e-06, "loss": 0.3363, "step": 34079 }, { "epoch": 1.563948419072094, "grad_norm": 0.48400285840034485, "learning_rate": 4.818723279410035e-06, "loss": 0.3743, "step": 34080 }, { "epoch": 1.5639943095773483, "grad_norm": 0.4564042091369629, "learning_rate": 4.818478252917604e-06, "loss": 0.297, "step": 34081 }, { "epoch": 1.5640402000826028, "grad_norm": 0.46303045749664307, "learning_rate": 4.818233226861676e-06, "loss": 0.3235, "step": 34082 }, { "epoch": 1.5640860905878573, "grad_norm": 0.4713778495788574, "learning_rate": 4.81798820124284e-06, "loss": 0.3575, "step": 34083 }, { "epoch": 1.5641319810931118, "grad_norm": 0.5021894574165344, "learning_rate": 4.817743176061685e-06, "loss": 0.3805, "step": 34084 }, { "epoch": 1.5641778715983663, "grad_norm": 0.4478503465652466, "learning_rate": 4.817498151318798e-06, "loss": 0.2923, "step": 34085 }, { "epoch": 1.5642237621036208, "grad_norm": 0.46586576104164124, "learning_rate": 4.817253127014773e-06, "loss": 0.3668, "step": 34086 }, { "epoch": 1.5642696526088753, "grad_norm": 0.43985575437545776, "learning_rate": 4.817008103150196e-06, "loss": 0.3208, "step": 34087 }, { "epoch": 1.5643155431141298, "grad_norm": 0.5239508152008057, "learning_rate": 4.8167630797256554e-06, "loss": 0.4144, "step": 34088 }, { "epoch": 1.5643614336193843, "grad_norm": 0.5419965982437134, "learning_rate": 4.816518056741743e-06, "loss": 0.4223, "step": 34089 }, { "epoch": 1.5644073241246386, "grad_norm": 0.44531160593032837, "learning_rate": 4.816273034199047e-06, "loss": 0.331, "step": 34090 }, { "epoch": 1.564453214629893, "grad_norm": 0.45661619305610657, "learning_rate": 4.816028012098154e-06, "loss": 0.3363, "step": 34091 }, { "epoch": 1.5644991051351476, "grad_norm": 0.4739121198654175, "learning_rate": 4.815782990439658e-06, "loss": 0.3863, "step": 34092 }, { "epoch": 1.5645449956404018, "grad_norm": 0.390281081199646, "learning_rate": 4.815537969224145e-06, "loss": 0.2511, "step": 34093 }, { "epoch": 1.5645908861456563, "grad_norm": 0.4408140480518341, "learning_rate": 4.815292948452207e-06, "loss": 0.3199, "step": 34094 }, { "epoch": 1.5646367766509108, "grad_norm": 0.4515714645385742, "learning_rate": 4.815047928124428e-06, "loss": 0.3418, "step": 34095 }, { "epoch": 1.5646826671561653, "grad_norm": 0.43468818068504333, "learning_rate": 4.814802908241402e-06, "loss": 0.3364, "step": 34096 }, { "epoch": 1.5647285576614198, "grad_norm": 0.527330219745636, "learning_rate": 4.814557888803717e-06, "loss": 0.4352, "step": 34097 }, { "epoch": 1.5647744481666743, "grad_norm": 0.46747714281082153, "learning_rate": 4.814312869811959e-06, "loss": 0.333, "step": 34098 }, { "epoch": 1.5648203386719288, "grad_norm": 0.46116548776626587, "learning_rate": 4.814067851266723e-06, "loss": 0.3168, "step": 34099 }, { "epoch": 1.5648662291771833, "grad_norm": 0.43344518542289734, "learning_rate": 4.813822833168596e-06, "loss": 0.3143, "step": 34100 }, { "epoch": 1.5649121196824378, "grad_norm": 0.4438704550266266, "learning_rate": 4.813577815518162e-06, "loss": 0.3295, "step": 34101 }, { "epoch": 1.5649580101876923, "grad_norm": 0.43321946263313293, "learning_rate": 4.813332798316017e-06, "loss": 0.2744, "step": 34102 }, { "epoch": 1.5650039006929466, "grad_norm": 0.4740264117717743, "learning_rate": 4.8130877815627485e-06, "loss": 0.3326, "step": 34103 }, { "epoch": 1.565049791198201, "grad_norm": 0.4633159041404724, "learning_rate": 4.812842765258943e-06, "loss": 0.3567, "step": 34104 }, { "epoch": 1.5650956817034556, "grad_norm": 0.4954968988895416, "learning_rate": 4.812597749405193e-06, "loss": 0.3875, "step": 34105 }, { "epoch": 1.5651415722087099, "grad_norm": 0.4982178807258606, "learning_rate": 4.812352734002087e-06, "loss": 0.4469, "step": 34106 }, { "epoch": 1.5651874627139644, "grad_norm": 0.4895936846733093, "learning_rate": 4.81210771905021e-06, "loss": 0.3594, "step": 34107 }, { "epoch": 1.5652333532192189, "grad_norm": 0.4916960299015045, "learning_rate": 4.811862704550158e-06, "loss": 0.3417, "step": 34108 }, { "epoch": 1.5652792437244734, "grad_norm": 0.4567507803440094, "learning_rate": 4.811617690502516e-06, "loss": 0.3092, "step": 34109 }, { "epoch": 1.5653251342297279, "grad_norm": 0.44885534048080444, "learning_rate": 4.8113726769078726e-06, "loss": 0.336, "step": 34110 }, { "epoch": 1.5653710247349824, "grad_norm": 0.4257388412952423, "learning_rate": 4.81112766376682e-06, "loss": 0.2859, "step": 34111 }, { "epoch": 1.5654169152402369, "grad_norm": 0.4598163068294525, "learning_rate": 4.810882651079946e-06, "loss": 0.3342, "step": 34112 }, { "epoch": 1.5654628057454913, "grad_norm": 0.43642479181289673, "learning_rate": 4.810637638847837e-06, "loss": 0.3102, "step": 34113 }, { "epoch": 1.5655086962507458, "grad_norm": 0.5114754438400269, "learning_rate": 4.810392627071087e-06, "loss": 0.4316, "step": 34114 }, { "epoch": 1.5655545867560003, "grad_norm": 0.4800611138343811, "learning_rate": 4.8101476157502835e-06, "loss": 0.3557, "step": 34115 }, { "epoch": 1.5656004772612546, "grad_norm": 0.4600962996482849, "learning_rate": 4.809902604886013e-06, "loss": 0.3676, "step": 34116 }, { "epoch": 1.5656463677665091, "grad_norm": 0.456121563911438, "learning_rate": 4.809657594478868e-06, "loss": 0.3175, "step": 34117 }, { "epoch": 1.5656922582717636, "grad_norm": 0.5247764587402344, "learning_rate": 4.809412584529437e-06, "loss": 0.4591, "step": 34118 }, { "epoch": 1.565738148777018, "grad_norm": 0.44318264722824097, "learning_rate": 4.809167575038306e-06, "loss": 0.3368, "step": 34119 }, { "epoch": 1.5657840392822724, "grad_norm": 0.4649055302143097, "learning_rate": 4.808922566006069e-06, "loss": 0.3509, "step": 34120 }, { "epoch": 1.5658299297875269, "grad_norm": 0.42826753854751587, "learning_rate": 4.8086775574333124e-06, "loss": 0.2989, "step": 34121 }, { "epoch": 1.5658758202927814, "grad_norm": 0.4467582702636719, "learning_rate": 4.8084325493206255e-06, "loss": 0.3214, "step": 34122 }, { "epoch": 1.5659217107980359, "grad_norm": 0.4620489776134491, "learning_rate": 4.808187541668599e-06, "loss": 0.3586, "step": 34123 }, { "epoch": 1.5659676013032904, "grad_norm": 0.4536444842815399, "learning_rate": 4.807942534477821e-06, "loss": 0.3361, "step": 34124 }, { "epoch": 1.5660134918085449, "grad_norm": 0.4625416398048401, "learning_rate": 4.807697527748879e-06, "loss": 0.3724, "step": 34125 }, { "epoch": 1.5660593823137994, "grad_norm": 0.4803650677204132, "learning_rate": 4.807452521482365e-06, "loss": 0.3573, "step": 34126 }, { "epoch": 1.5661052728190539, "grad_norm": 0.5104102492332458, "learning_rate": 4.807207515678867e-06, "loss": 0.3246, "step": 34127 }, { "epoch": 1.5661511633243081, "grad_norm": 0.4634581208229065, "learning_rate": 4.806962510338974e-06, "loss": 0.3635, "step": 34128 }, { "epoch": 1.5661970538295626, "grad_norm": 0.4899982810020447, "learning_rate": 4.806717505463273e-06, "loss": 0.437, "step": 34129 }, { "epoch": 1.5662429443348171, "grad_norm": 0.485330194234848, "learning_rate": 4.806472501052357e-06, "loss": 0.3696, "step": 34130 }, { "epoch": 1.5662888348400716, "grad_norm": 0.4710325002670288, "learning_rate": 4.806227497106814e-06, "loss": 0.4068, "step": 34131 }, { "epoch": 1.566334725345326, "grad_norm": 0.49292463064193726, "learning_rate": 4.805982493627232e-06, "loss": 0.3812, "step": 34132 }, { "epoch": 1.5663806158505804, "grad_norm": 0.4320945739746094, "learning_rate": 4.805737490614202e-06, "loss": 0.2572, "step": 34133 }, { "epoch": 1.566426506355835, "grad_norm": 0.46861350536346436, "learning_rate": 4.805492488068312e-06, "loss": 0.396, "step": 34134 }, { "epoch": 1.5664723968610894, "grad_norm": 0.5624412298202515, "learning_rate": 4.805247485990149e-06, "loss": 0.3063, "step": 34135 }, { "epoch": 1.566518287366344, "grad_norm": 0.44513922929763794, "learning_rate": 4.805002484380306e-06, "loss": 0.3538, "step": 34136 }, { "epoch": 1.5665641778715984, "grad_norm": 0.4849376082420349, "learning_rate": 4.804757483239371e-06, "loss": 0.3716, "step": 34137 }, { "epoch": 1.566610068376853, "grad_norm": 0.508667528629303, "learning_rate": 4.804512482567931e-06, "loss": 0.3884, "step": 34138 }, { "epoch": 1.5666559588821074, "grad_norm": 0.4708024263381958, "learning_rate": 4.804267482366578e-06, "loss": 0.3486, "step": 34139 }, { "epoch": 1.566701849387362, "grad_norm": 0.4759385883808136, "learning_rate": 4.804022482635901e-06, "loss": 0.3555, "step": 34140 }, { "epoch": 1.5667477398926162, "grad_norm": 0.5195858478546143, "learning_rate": 4.803777483376484e-06, "loss": 0.4318, "step": 34141 }, { "epoch": 1.5667936303978707, "grad_norm": 0.4676797688007355, "learning_rate": 4.8035324845889246e-06, "loss": 0.3377, "step": 34142 }, { "epoch": 1.5668395209031252, "grad_norm": 0.4206415116786957, "learning_rate": 4.803287486273806e-06, "loss": 0.2664, "step": 34143 }, { "epoch": 1.5668854114083794, "grad_norm": 0.477561891078949, "learning_rate": 4.803042488431718e-06, "loss": 0.3829, "step": 34144 }, { "epoch": 1.566931301913634, "grad_norm": 0.4732050597667694, "learning_rate": 4.802797491063252e-06, "loss": 0.3805, "step": 34145 }, { "epoch": 1.5669771924188884, "grad_norm": 0.4746980369091034, "learning_rate": 4.802552494168995e-06, "loss": 0.3592, "step": 34146 }, { "epoch": 1.567023082924143, "grad_norm": 0.4743090569972992, "learning_rate": 4.802307497749537e-06, "loss": 0.3497, "step": 34147 }, { "epoch": 1.5670689734293974, "grad_norm": 0.4263301491737366, "learning_rate": 4.802062501805468e-06, "loss": 0.2692, "step": 34148 }, { "epoch": 1.567114863934652, "grad_norm": 0.5128852128982544, "learning_rate": 4.801817506337376e-06, "loss": 0.406, "step": 34149 }, { "epoch": 1.5671607544399064, "grad_norm": 0.43033042550086975, "learning_rate": 4.8015725113458485e-06, "loss": 0.2896, "step": 34150 }, { "epoch": 1.567206644945161, "grad_norm": 0.4806380271911621, "learning_rate": 4.801327516831478e-06, "loss": 0.3602, "step": 34151 }, { "epoch": 1.5672525354504154, "grad_norm": 0.43870940804481506, "learning_rate": 4.801082522794853e-06, "loss": 0.3172, "step": 34152 }, { "epoch": 1.56729842595567, "grad_norm": 0.474739670753479, "learning_rate": 4.800837529236559e-06, "loss": 0.3485, "step": 34153 }, { "epoch": 1.5673443164609242, "grad_norm": 0.488978773355484, "learning_rate": 4.80059253615719e-06, "loss": 0.3419, "step": 34154 }, { "epoch": 1.5673902069661787, "grad_norm": 0.4817551076412201, "learning_rate": 4.800347543557334e-06, "loss": 0.3756, "step": 34155 }, { "epoch": 1.5674360974714332, "grad_norm": 0.4170075058937073, "learning_rate": 4.800102551437576e-06, "loss": 0.2632, "step": 34156 }, { "epoch": 1.5674819879766875, "grad_norm": 0.45500314235687256, "learning_rate": 4.79985755979851e-06, "loss": 0.3362, "step": 34157 }, { "epoch": 1.567527878481942, "grad_norm": 0.47636979818344116, "learning_rate": 4.799612568640724e-06, "loss": 0.3839, "step": 34158 }, { "epoch": 1.5675737689871965, "grad_norm": 0.46664899587631226, "learning_rate": 4.799367577964807e-06, "loss": 0.3696, "step": 34159 }, { "epoch": 1.567619659492451, "grad_norm": 0.546671450138092, "learning_rate": 4.799122587771346e-06, "loss": 0.3555, "step": 34160 }, { "epoch": 1.5676655499977055, "grad_norm": 0.488545686006546, "learning_rate": 4.798877598060932e-06, "loss": 0.3409, "step": 34161 }, { "epoch": 1.56771144050296, "grad_norm": 0.44966620206832886, "learning_rate": 4.798632608834155e-06, "loss": 0.3389, "step": 34162 }, { "epoch": 1.5677573310082145, "grad_norm": 0.47057265043258667, "learning_rate": 4.798387620091601e-06, "loss": 0.3792, "step": 34163 }, { "epoch": 1.567803221513469, "grad_norm": 0.47142040729522705, "learning_rate": 4.7981426318338624e-06, "loss": 0.3777, "step": 34164 }, { "epoch": 1.5678491120187235, "grad_norm": 0.46498560905456543, "learning_rate": 4.797897644061527e-06, "loss": 0.3643, "step": 34165 }, { "epoch": 1.567895002523978, "grad_norm": 0.4516003131866455, "learning_rate": 4.797652656775184e-06, "loss": 0.3476, "step": 34166 }, { "epoch": 1.5679408930292322, "grad_norm": 0.44859495759010315, "learning_rate": 4.7974076699754234e-06, "loss": 0.3452, "step": 34167 }, { "epoch": 1.5679867835344867, "grad_norm": 0.47780841588974, "learning_rate": 4.797162683662834e-06, "loss": 0.388, "step": 34168 }, { "epoch": 1.5680326740397412, "grad_norm": 0.41988447308540344, "learning_rate": 4.796917697838002e-06, "loss": 0.3009, "step": 34169 }, { "epoch": 1.5680785645449955, "grad_norm": 0.4456281065940857, "learning_rate": 4.79667271250152e-06, "loss": 0.2994, "step": 34170 }, { "epoch": 1.56812445505025, "grad_norm": 0.4825037121772766, "learning_rate": 4.796427727653976e-06, "loss": 0.3701, "step": 34171 }, { "epoch": 1.5681703455555045, "grad_norm": 0.46703827381134033, "learning_rate": 4.796182743295957e-06, "loss": 0.3723, "step": 34172 }, { "epoch": 1.568216236060759, "grad_norm": 0.4766594171524048, "learning_rate": 4.795937759428056e-06, "loss": 0.3976, "step": 34173 }, { "epoch": 1.5682621265660135, "grad_norm": 0.4836577773094177, "learning_rate": 4.7956927760508615e-06, "loss": 0.4145, "step": 34174 }, { "epoch": 1.568308017071268, "grad_norm": 0.4990769326686859, "learning_rate": 4.7954477931649596e-06, "loss": 0.3345, "step": 34175 }, { "epoch": 1.5683539075765225, "grad_norm": 0.441350519657135, "learning_rate": 4.795202810770942e-06, "loss": 0.2888, "step": 34176 }, { "epoch": 1.568399798081777, "grad_norm": 0.47990211844444275, "learning_rate": 4.794957828869397e-06, "loss": 0.3361, "step": 34177 }, { "epoch": 1.5684456885870315, "grad_norm": 0.46796905994415283, "learning_rate": 4.794712847460912e-06, "loss": 0.3484, "step": 34178 }, { "epoch": 1.5684915790922858, "grad_norm": 0.4664016664028168, "learning_rate": 4.79446786654608e-06, "loss": 0.3224, "step": 34179 }, { "epoch": 1.5685374695975403, "grad_norm": 0.43120306730270386, "learning_rate": 4.794222886125487e-06, "loss": 0.2798, "step": 34180 }, { "epoch": 1.5685833601027948, "grad_norm": 0.45119747519493103, "learning_rate": 4.793977906199723e-06, "loss": 0.3149, "step": 34181 }, { "epoch": 1.568629250608049, "grad_norm": 0.4717291295528412, "learning_rate": 4.7937329267693776e-06, "loss": 0.3465, "step": 34182 }, { "epoch": 1.5686751411133035, "grad_norm": 0.5033001899719238, "learning_rate": 4.79348794783504e-06, "loss": 0.4083, "step": 34183 }, { "epoch": 1.568721031618558, "grad_norm": 0.5068426132202148, "learning_rate": 4.793242969397296e-06, "loss": 0.433, "step": 34184 }, { "epoch": 1.5687669221238125, "grad_norm": 0.4615301191806793, "learning_rate": 4.79299799145674e-06, "loss": 0.3624, "step": 34185 }, { "epoch": 1.568812812629067, "grad_norm": 0.5054537057876587, "learning_rate": 4.7927530140139575e-06, "loss": 0.4236, "step": 34186 }, { "epoch": 1.5688587031343215, "grad_norm": 0.45371052622795105, "learning_rate": 4.7925080370695385e-06, "loss": 0.3225, "step": 34187 }, { "epoch": 1.568904593639576, "grad_norm": 0.47033756971359253, "learning_rate": 4.792263060624073e-06, "loss": 0.3205, "step": 34188 }, { "epoch": 1.5689504841448305, "grad_norm": 0.4672520160675049, "learning_rate": 4.79201808467815e-06, "loss": 0.3325, "step": 34189 }, { "epoch": 1.568996374650085, "grad_norm": 0.46877363324165344, "learning_rate": 4.791773109232354e-06, "loss": 0.3716, "step": 34190 }, { "epoch": 1.5690422651553395, "grad_norm": 0.4696226418018341, "learning_rate": 4.791528134287283e-06, "loss": 0.347, "step": 34191 }, { "epoch": 1.5690881556605938, "grad_norm": 0.4276796281337738, "learning_rate": 4.791283159843518e-06, "loss": 0.2654, "step": 34192 }, { "epoch": 1.5691340461658483, "grad_norm": 0.47269725799560547, "learning_rate": 4.791038185901653e-06, "loss": 0.3592, "step": 34193 }, { "epoch": 1.5691799366711028, "grad_norm": 0.4391658902168274, "learning_rate": 4.790793212462272e-06, "loss": 0.302, "step": 34194 }, { "epoch": 1.569225827176357, "grad_norm": 0.46739891171455383, "learning_rate": 4.79054823952597e-06, "loss": 0.3401, "step": 34195 }, { "epoch": 1.5692717176816116, "grad_norm": 0.4465655982494354, "learning_rate": 4.790303267093333e-06, "loss": 0.3205, "step": 34196 }, { "epoch": 1.569317608186866, "grad_norm": 0.5543652772903442, "learning_rate": 4.79005829516495e-06, "loss": 0.499, "step": 34197 }, { "epoch": 1.5693634986921206, "grad_norm": 0.44123291969299316, "learning_rate": 4.789813323741411e-06, "loss": 0.3184, "step": 34198 }, { "epoch": 1.569409389197375, "grad_norm": 0.9951797127723694, "learning_rate": 4.789568352823304e-06, "loss": 0.4241, "step": 34199 }, { "epoch": 1.5694552797026295, "grad_norm": 0.518595814704895, "learning_rate": 4.789323382411218e-06, "loss": 0.3935, "step": 34200 }, { "epoch": 1.569501170207884, "grad_norm": 0.5077909827232361, "learning_rate": 4.7890784125057435e-06, "loss": 0.3957, "step": 34201 }, { "epoch": 1.5695470607131385, "grad_norm": 0.44167929887771606, "learning_rate": 4.78883344310747e-06, "loss": 0.2878, "step": 34202 }, { "epoch": 1.569592951218393, "grad_norm": 0.4390799105167389, "learning_rate": 4.788588474216984e-06, "loss": 0.298, "step": 34203 }, { "epoch": 1.5696388417236475, "grad_norm": 0.44792142510414124, "learning_rate": 4.788343505834877e-06, "loss": 0.3597, "step": 34204 }, { "epoch": 1.5696847322289018, "grad_norm": 0.4795165956020355, "learning_rate": 4.788098537961737e-06, "loss": 0.3339, "step": 34205 }, { "epoch": 1.5697306227341563, "grad_norm": 0.4769688844680786, "learning_rate": 4.787853570598151e-06, "loss": 0.3513, "step": 34206 }, { "epoch": 1.5697765132394108, "grad_norm": 0.4868701100349426, "learning_rate": 4.787608603744712e-06, "loss": 0.3512, "step": 34207 }, { "epoch": 1.569822403744665, "grad_norm": 0.4186769127845764, "learning_rate": 4.787363637402008e-06, "loss": 0.2903, "step": 34208 }, { "epoch": 1.5698682942499196, "grad_norm": 0.4714920222759247, "learning_rate": 4.7871186715706266e-06, "loss": 0.3919, "step": 34209 }, { "epoch": 1.569914184755174, "grad_norm": 0.475122332572937, "learning_rate": 4.786873706251158e-06, "loss": 0.3476, "step": 34210 }, { "epoch": 1.5699600752604286, "grad_norm": 0.46369802951812744, "learning_rate": 4.78662874144419e-06, "loss": 0.3482, "step": 34211 }, { "epoch": 1.570005965765683, "grad_norm": 0.46473565697669983, "learning_rate": 4.786383777150312e-06, "loss": 0.3476, "step": 34212 }, { "epoch": 1.5700518562709376, "grad_norm": 0.4648815393447876, "learning_rate": 4.786138813370116e-06, "loss": 0.3365, "step": 34213 }, { "epoch": 1.570097746776192, "grad_norm": 0.48359236121177673, "learning_rate": 4.785893850104187e-06, "loss": 0.3704, "step": 34214 }, { "epoch": 1.5701436372814466, "grad_norm": 0.45097431540489197, "learning_rate": 4.785648887353115e-06, "loss": 0.3388, "step": 34215 }, { "epoch": 1.570189527786701, "grad_norm": 0.49320828914642334, "learning_rate": 4.785403925117491e-06, "loss": 0.3925, "step": 34216 }, { "epoch": 1.5702354182919553, "grad_norm": 0.4789159595966339, "learning_rate": 4.785158963397903e-06, "loss": 0.398, "step": 34217 }, { "epoch": 1.5702813087972098, "grad_norm": 0.4815405309200287, "learning_rate": 4.784914002194939e-06, "loss": 0.351, "step": 34218 }, { "epoch": 1.5703271993024643, "grad_norm": 0.46454918384552, "learning_rate": 4.7846690415091896e-06, "loss": 0.3572, "step": 34219 }, { "epoch": 1.5703730898077188, "grad_norm": 0.48363715410232544, "learning_rate": 4.784424081341243e-06, "loss": 0.3929, "step": 34220 }, { "epoch": 1.5704189803129731, "grad_norm": 0.45669469237327576, "learning_rate": 4.784179121691687e-06, "loss": 0.3271, "step": 34221 }, { "epoch": 1.5704648708182276, "grad_norm": 0.4062530994415283, "learning_rate": 4.783934162561114e-06, "loss": 0.2591, "step": 34222 }, { "epoch": 1.570510761323482, "grad_norm": 0.5039300918579102, "learning_rate": 4.783689203950111e-06, "loss": 0.3835, "step": 34223 }, { "epoch": 1.5705566518287366, "grad_norm": 0.49327167868614197, "learning_rate": 4.783444245859268e-06, "loss": 0.3464, "step": 34224 }, { "epoch": 1.570602542333991, "grad_norm": 0.4999888241291046, "learning_rate": 4.783199288289171e-06, "loss": 0.3878, "step": 34225 }, { "epoch": 1.5706484328392456, "grad_norm": 0.43679288029670715, "learning_rate": 4.782954331240413e-06, "loss": 0.3191, "step": 34226 }, { "epoch": 1.5706943233445, "grad_norm": 0.4738425016403198, "learning_rate": 4.7827093747135815e-06, "loss": 0.3629, "step": 34227 }, { "epoch": 1.5707402138497546, "grad_norm": 0.46536460518836975, "learning_rate": 4.782464418709263e-06, "loss": 0.3416, "step": 34228 }, { "epoch": 1.570786104355009, "grad_norm": 0.5771710276603699, "learning_rate": 4.782219463228051e-06, "loss": 0.3923, "step": 34229 }, { "epoch": 1.5708319948602634, "grad_norm": 0.4636612832546234, "learning_rate": 4.781974508270532e-06, "loss": 0.3378, "step": 34230 }, { "epoch": 1.5708778853655179, "grad_norm": 0.43743571639060974, "learning_rate": 4.781729553837295e-06, "loss": 0.3274, "step": 34231 }, { "epoch": 1.5709237758707724, "grad_norm": 0.4824206531047821, "learning_rate": 4.7814845999289305e-06, "loss": 0.3621, "step": 34232 }, { "epoch": 1.5709696663760266, "grad_norm": 0.45043420791625977, "learning_rate": 4.781239646546026e-06, "loss": 0.3136, "step": 34233 }, { "epoch": 1.5710155568812811, "grad_norm": 0.46529099345207214, "learning_rate": 4.78099469368917e-06, "loss": 0.3439, "step": 34234 }, { "epoch": 1.5710614473865356, "grad_norm": 0.4561481773853302, "learning_rate": 4.780749741358955e-06, "loss": 0.332, "step": 34235 }, { "epoch": 1.5711073378917901, "grad_norm": 0.49924275279045105, "learning_rate": 4.7805047895559676e-06, "loss": 0.4402, "step": 34236 }, { "epoch": 1.5711532283970446, "grad_norm": 0.48606640100479126, "learning_rate": 4.780259838280794e-06, "loss": 0.368, "step": 34237 }, { "epoch": 1.5711991189022991, "grad_norm": 0.44855543971061707, "learning_rate": 4.780014887534028e-06, "loss": 0.3563, "step": 34238 }, { "epoch": 1.5712450094075536, "grad_norm": 0.4757257401943207, "learning_rate": 4.779769937316258e-06, "loss": 0.3406, "step": 34239 }, { "epoch": 1.5712908999128081, "grad_norm": 0.45499545335769653, "learning_rate": 4.779524987628069e-06, "loss": 0.3636, "step": 34240 }, { "epoch": 1.5713367904180626, "grad_norm": 0.4529830813407898, "learning_rate": 4.779280038470055e-06, "loss": 0.3125, "step": 34241 }, { "epoch": 1.5713826809233171, "grad_norm": 0.450122207403183, "learning_rate": 4.779035089842803e-06, "loss": 0.3131, "step": 34242 }, { "epoch": 1.5714285714285714, "grad_norm": 0.44742995500564575, "learning_rate": 4.778790141746899e-06, "loss": 0.2988, "step": 34243 }, { "epoch": 1.571474461933826, "grad_norm": 0.4290243983268738, "learning_rate": 4.778545194182937e-06, "loss": 0.2892, "step": 34244 }, { "epoch": 1.5715203524390804, "grad_norm": 0.44179683923721313, "learning_rate": 4.778300247151505e-06, "loss": 0.3292, "step": 34245 }, { "epoch": 1.5715662429443347, "grad_norm": 0.4820263087749481, "learning_rate": 4.778055300653189e-06, "loss": 0.3378, "step": 34246 }, { "epoch": 1.5716121334495892, "grad_norm": 0.45052969455718994, "learning_rate": 4.777810354688581e-06, "loss": 0.3183, "step": 34247 }, { "epoch": 1.5716580239548437, "grad_norm": 0.4890190064907074, "learning_rate": 4.77756540925827e-06, "loss": 0.3422, "step": 34248 }, { "epoch": 1.5717039144600982, "grad_norm": 0.4480682909488678, "learning_rate": 4.77732046436284e-06, "loss": 0.3448, "step": 34249 }, { "epoch": 1.5717498049653527, "grad_norm": 0.4340474605560303, "learning_rate": 4.777075520002887e-06, "loss": 0.3099, "step": 34250 }, { "epoch": 1.5717956954706072, "grad_norm": 0.5461264252662659, "learning_rate": 4.776830576178998e-06, "loss": 0.4269, "step": 34251 }, { "epoch": 1.5718415859758617, "grad_norm": 0.48331037163734436, "learning_rate": 4.7765856328917585e-06, "loss": 0.3697, "step": 34252 }, { "epoch": 1.5718874764811162, "grad_norm": 0.44253432750701904, "learning_rate": 4.776340690141762e-06, "loss": 0.3097, "step": 34253 }, { "epoch": 1.5719333669863707, "grad_norm": 0.4688449800014496, "learning_rate": 4.776095747929596e-06, "loss": 0.3254, "step": 34254 }, { "epoch": 1.5719792574916251, "grad_norm": 0.4992218017578125, "learning_rate": 4.775850806255845e-06, "loss": 0.37, "step": 34255 }, { "epoch": 1.5720251479968794, "grad_norm": 0.48047375679016113, "learning_rate": 4.775605865121106e-06, "loss": 0.3926, "step": 34256 }, { "epoch": 1.572071038502134, "grad_norm": 0.4353560209274292, "learning_rate": 4.7753609245259644e-06, "loss": 0.3177, "step": 34257 }, { "epoch": 1.5721169290073884, "grad_norm": 0.4713706374168396, "learning_rate": 4.7751159844710085e-06, "loss": 0.3953, "step": 34258 }, { "epoch": 1.5721628195126427, "grad_norm": 0.48382440209388733, "learning_rate": 4.774871044956825e-06, "loss": 0.3195, "step": 34259 }, { "epoch": 1.5722087100178972, "grad_norm": 0.4641120135784149, "learning_rate": 4.774626105984008e-06, "loss": 0.3647, "step": 34260 }, { "epoch": 1.5722546005231517, "grad_norm": 0.4703112840652466, "learning_rate": 4.774381167553143e-06, "loss": 0.363, "step": 34261 }, { "epoch": 1.5723004910284062, "grad_norm": 0.4731237292289734, "learning_rate": 4.774136229664821e-06, "loss": 0.387, "step": 34262 }, { "epoch": 1.5723463815336607, "grad_norm": 0.493058979511261, "learning_rate": 4.77389129231963e-06, "loss": 0.4028, "step": 34263 }, { "epoch": 1.5723922720389152, "grad_norm": 0.4944364130496979, "learning_rate": 4.773646355518158e-06, "loss": 0.3621, "step": 34264 }, { "epoch": 1.5724381625441697, "grad_norm": 0.4646797180175781, "learning_rate": 4.773401419260995e-06, "loss": 0.3789, "step": 34265 }, { "epoch": 1.5724840530494242, "grad_norm": 0.4787401854991913, "learning_rate": 4.77315648354873e-06, "loss": 0.4034, "step": 34266 }, { "epoch": 1.5725299435546787, "grad_norm": 0.45806455612182617, "learning_rate": 4.772911548381954e-06, "loss": 0.2936, "step": 34267 }, { "epoch": 1.572575834059933, "grad_norm": 0.4789261221885681, "learning_rate": 4.772666613761252e-06, "loss": 0.3535, "step": 34268 }, { "epoch": 1.5726217245651875, "grad_norm": 0.46552324295043945, "learning_rate": 4.772421679687216e-06, "loss": 0.3398, "step": 34269 }, { "epoch": 1.572667615070442, "grad_norm": 0.4678603708744049, "learning_rate": 4.772176746160435e-06, "loss": 0.329, "step": 34270 }, { "epoch": 1.5727135055756962, "grad_norm": 0.4397526681423187, "learning_rate": 4.771931813181495e-06, "loss": 0.3033, "step": 34271 }, { "epoch": 1.5727593960809507, "grad_norm": 0.4484117031097412, "learning_rate": 4.771686880750987e-06, "loss": 0.3258, "step": 34272 }, { "epoch": 1.5728052865862052, "grad_norm": 0.4853927791118622, "learning_rate": 4.771441948869502e-06, "loss": 0.3384, "step": 34273 }, { "epoch": 1.5728511770914597, "grad_norm": 0.45818454027175903, "learning_rate": 4.771197017537625e-06, "loss": 0.3108, "step": 34274 }, { "epoch": 1.5728970675967142, "grad_norm": 0.4507461488246918, "learning_rate": 4.770952086755949e-06, "loss": 0.2873, "step": 34275 }, { "epoch": 1.5729429581019687, "grad_norm": 0.44860732555389404, "learning_rate": 4.7707071565250605e-06, "loss": 0.3003, "step": 34276 }, { "epoch": 1.5729888486072232, "grad_norm": 0.5549141764640808, "learning_rate": 4.770462226845547e-06, "loss": 0.3647, "step": 34277 }, { "epoch": 1.5730347391124777, "grad_norm": 0.4198443591594696, "learning_rate": 4.770217297718e-06, "loss": 0.2805, "step": 34278 }, { "epoch": 1.5730806296177322, "grad_norm": 0.46926406025886536, "learning_rate": 4.769972369143011e-06, "loss": 0.3821, "step": 34279 }, { "epoch": 1.5731265201229867, "grad_norm": 0.4389936029911041, "learning_rate": 4.7697274411211615e-06, "loss": 0.341, "step": 34280 }, { "epoch": 1.573172410628241, "grad_norm": 0.46562328934669495, "learning_rate": 4.769482513653047e-06, "loss": 0.3503, "step": 34281 }, { "epoch": 1.5732183011334955, "grad_norm": 0.4734017848968506, "learning_rate": 4.7692375867392545e-06, "loss": 0.3886, "step": 34282 }, { "epoch": 1.57326419163875, "grad_norm": 0.5125962495803833, "learning_rate": 4.7689926603803725e-06, "loss": 0.3927, "step": 34283 }, { "epoch": 1.5733100821440043, "grad_norm": 0.4991873502731323, "learning_rate": 4.76874773457699e-06, "loss": 0.4365, "step": 34284 }, { "epoch": 1.5733559726492587, "grad_norm": 0.4640202522277832, "learning_rate": 4.768502809329697e-06, "loss": 0.3498, "step": 34285 }, { "epoch": 1.5734018631545132, "grad_norm": 0.43064460158348083, "learning_rate": 4.7682578846390794e-06, "loss": 0.3084, "step": 34286 }, { "epoch": 1.5734477536597677, "grad_norm": 0.5220872759819031, "learning_rate": 4.76801296050573e-06, "loss": 0.4229, "step": 34287 }, { "epoch": 1.5734936441650222, "grad_norm": 0.5002781748771667, "learning_rate": 4.767768036930236e-06, "loss": 0.4249, "step": 34288 }, { "epoch": 1.5735395346702767, "grad_norm": 0.4616413116455078, "learning_rate": 4.767523113913188e-06, "loss": 0.3422, "step": 34289 }, { "epoch": 1.5735854251755312, "grad_norm": 0.46458911895751953, "learning_rate": 4.767278191455172e-06, "loss": 0.3258, "step": 34290 }, { "epoch": 1.5736313156807857, "grad_norm": 0.4459450840950012, "learning_rate": 4.767033269556779e-06, "loss": 0.3385, "step": 34291 }, { "epoch": 1.5736772061860402, "grad_norm": 0.4159854054450989, "learning_rate": 4.7667883482185975e-06, "loss": 0.2662, "step": 34292 }, { "epoch": 1.5737230966912947, "grad_norm": 0.4617675542831421, "learning_rate": 4.7665434274412145e-06, "loss": 0.327, "step": 34293 }, { "epoch": 1.573768987196549, "grad_norm": 0.49081090092658997, "learning_rate": 4.766298507225223e-06, "loss": 0.3825, "step": 34294 }, { "epoch": 1.5738148777018035, "grad_norm": 0.45218536257743835, "learning_rate": 4.7660535875712105e-06, "loss": 0.3218, "step": 34295 }, { "epoch": 1.573860768207058, "grad_norm": 0.4742080271244049, "learning_rate": 4.7658086684797636e-06, "loss": 0.3635, "step": 34296 }, { "epoch": 1.5739066587123123, "grad_norm": 0.4839315712451935, "learning_rate": 4.765563749951474e-06, "loss": 0.3748, "step": 34297 }, { "epoch": 1.5739525492175668, "grad_norm": 0.5110636353492737, "learning_rate": 4.765318831986929e-06, "loss": 0.3995, "step": 34298 }, { "epoch": 1.5739984397228213, "grad_norm": 0.4119517207145691, "learning_rate": 4.765073914586717e-06, "loss": 0.2342, "step": 34299 }, { "epoch": 1.5740443302280758, "grad_norm": 0.4538610279560089, "learning_rate": 4.76482899775143e-06, "loss": 0.3613, "step": 34300 }, { "epoch": 1.5740902207333303, "grad_norm": 0.4636071026325226, "learning_rate": 4.764584081481655e-06, "loss": 0.3573, "step": 34301 }, { "epoch": 1.5741361112385848, "grad_norm": 0.47652003169059753, "learning_rate": 4.764339165777979e-06, "loss": 0.342, "step": 34302 }, { "epoch": 1.5741820017438393, "grad_norm": 0.4804535210132599, "learning_rate": 4.7640942506409955e-06, "loss": 0.3357, "step": 34303 }, { "epoch": 1.5742278922490938, "grad_norm": 0.47071439027786255, "learning_rate": 4.76384933607129e-06, "loss": 0.3578, "step": 34304 }, { "epoch": 1.5742737827543483, "grad_norm": 0.46876317262649536, "learning_rate": 4.763604422069451e-06, "loss": 0.3319, "step": 34305 }, { "epoch": 1.5743196732596025, "grad_norm": 0.48963093757629395, "learning_rate": 4.763359508636071e-06, "loss": 0.3898, "step": 34306 }, { "epoch": 1.574365563764857, "grad_norm": 0.5622459053993225, "learning_rate": 4.763114595771736e-06, "loss": 0.4548, "step": 34307 }, { "epoch": 1.5744114542701115, "grad_norm": 0.491474986076355, "learning_rate": 4.762869683477033e-06, "loss": 0.354, "step": 34308 }, { "epoch": 1.574457344775366, "grad_norm": 0.42723217606544495, "learning_rate": 4.762624771752556e-06, "loss": 0.2997, "step": 34309 }, { "epoch": 1.5745032352806203, "grad_norm": 0.475632905960083, "learning_rate": 4.762379860598892e-06, "loss": 0.3394, "step": 34310 }, { "epoch": 1.5745491257858748, "grad_norm": 0.47174015641212463, "learning_rate": 4.762134950016628e-06, "loss": 0.355, "step": 34311 }, { "epoch": 1.5745950162911293, "grad_norm": 0.44658371806144714, "learning_rate": 4.761890040006355e-06, "loss": 0.3171, "step": 34312 }, { "epoch": 1.5746409067963838, "grad_norm": 0.48598629236221313, "learning_rate": 4.761645130568663e-06, "loss": 0.3298, "step": 34313 }, { "epoch": 1.5746867973016383, "grad_norm": 0.49269095063209534, "learning_rate": 4.761400221704135e-06, "loss": 0.3751, "step": 34314 }, { "epoch": 1.5747326878068928, "grad_norm": 0.5349605083465576, "learning_rate": 4.761155313413367e-06, "loss": 0.4361, "step": 34315 }, { "epoch": 1.5747785783121473, "grad_norm": 0.498081237077713, "learning_rate": 4.7609104056969455e-06, "loss": 0.429, "step": 34316 }, { "epoch": 1.5748244688174018, "grad_norm": 0.4983614683151245, "learning_rate": 4.760665498555457e-06, "loss": 0.3954, "step": 34317 }, { "epoch": 1.5748703593226563, "grad_norm": 0.4624408483505249, "learning_rate": 4.760420591989495e-06, "loss": 0.3643, "step": 34318 }, { "epoch": 1.5749162498279106, "grad_norm": 0.488615483045578, "learning_rate": 4.760175685999644e-06, "loss": 0.3905, "step": 34319 }, { "epoch": 1.574962140333165, "grad_norm": 0.43220335245132446, "learning_rate": 4.759930780586494e-06, "loss": 0.2832, "step": 34320 }, { "epoch": 1.5750080308384196, "grad_norm": 0.478853702545166, "learning_rate": 4.759685875750637e-06, "loss": 0.3576, "step": 34321 }, { "epoch": 1.5750539213436738, "grad_norm": 0.4924907386302948, "learning_rate": 4.759440971492659e-06, "loss": 0.4253, "step": 34322 }, { "epoch": 1.5750998118489283, "grad_norm": 0.47425708174705505, "learning_rate": 4.759196067813151e-06, "loss": 0.3665, "step": 34323 }, { "epoch": 1.5751457023541828, "grad_norm": 0.4653985798358917, "learning_rate": 4.758951164712697e-06, "loss": 0.3873, "step": 34324 }, { "epoch": 1.5751915928594373, "grad_norm": 0.4825315475463867, "learning_rate": 4.758706262191892e-06, "loss": 0.3667, "step": 34325 }, { "epoch": 1.5752374833646918, "grad_norm": 0.4883703887462616, "learning_rate": 4.758461360251321e-06, "loss": 0.3823, "step": 34326 }, { "epoch": 1.5752833738699463, "grad_norm": 0.4457126557826996, "learning_rate": 4.758216458891574e-06, "loss": 0.3409, "step": 34327 }, { "epoch": 1.5753292643752008, "grad_norm": 0.44325193762779236, "learning_rate": 4.757971558113241e-06, "loss": 0.2965, "step": 34328 }, { "epoch": 1.5753751548804553, "grad_norm": 0.44058743119239807, "learning_rate": 4.7577266579169104e-06, "loss": 0.3097, "step": 34329 }, { "epoch": 1.5754210453857098, "grad_norm": 0.4704897105693817, "learning_rate": 4.757481758303168e-06, "loss": 0.401, "step": 34330 }, { "epoch": 1.5754669358909643, "grad_norm": 0.4642285406589508, "learning_rate": 4.757236859272609e-06, "loss": 0.3256, "step": 34331 }, { "epoch": 1.5755128263962186, "grad_norm": 0.4991973340511322, "learning_rate": 4.756991960825817e-06, "loss": 0.2969, "step": 34332 }, { "epoch": 1.575558716901473, "grad_norm": 0.45864588022232056, "learning_rate": 4.756747062963382e-06, "loss": 0.3523, "step": 34333 }, { "epoch": 1.5756046074067276, "grad_norm": 0.47496944665908813, "learning_rate": 4.756502165685895e-06, "loss": 0.3945, "step": 34334 }, { "epoch": 1.5756504979119819, "grad_norm": 0.4338351786136627, "learning_rate": 4.756257268993943e-06, "loss": 0.2993, "step": 34335 }, { "epoch": 1.5756963884172364, "grad_norm": 0.5312501192092896, "learning_rate": 4.7560123728881135e-06, "loss": 0.3927, "step": 34336 }, { "epoch": 1.5757422789224909, "grad_norm": 0.5302611589431763, "learning_rate": 4.755767477369e-06, "loss": 0.4067, "step": 34337 }, { "epoch": 1.5757881694277454, "grad_norm": 0.485738068819046, "learning_rate": 4.755522582437187e-06, "loss": 0.3868, "step": 34338 }, { "epoch": 1.5758340599329999, "grad_norm": 0.477957546710968, "learning_rate": 4.7552776880932655e-06, "loss": 0.3648, "step": 34339 }, { "epoch": 1.5758799504382544, "grad_norm": 0.49350807070732117, "learning_rate": 4.755032794337824e-06, "loss": 0.3746, "step": 34340 }, { "epoch": 1.5759258409435088, "grad_norm": 0.44400399923324585, "learning_rate": 4.754787901171453e-06, "loss": 0.3179, "step": 34341 }, { "epoch": 1.5759717314487633, "grad_norm": 0.4642792344093323, "learning_rate": 4.754543008594736e-06, "loss": 0.3393, "step": 34342 }, { "epoch": 1.5760176219540178, "grad_norm": 0.4435717463493347, "learning_rate": 4.754298116608268e-06, "loss": 0.3187, "step": 34343 }, { "epoch": 1.5760635124592723, "grad_norm": 0.44980141520500183, "learning_rate": 4.754053225212636e-06, "loss": 0.2971, "step": 34344 }, { "epoch": 1.5761094029645266, "grad_norm": 0.5081884264945984, "learning_rate": 4.753808334408427e-06, "loss": 0.3972, "step": 34345 }, { "epoch": 1.5761552934697811, "grad_norm": 0.5386309623718262, "learning_rate": 4.753563444196232e-06, "loss": 0.4065, "step": 34346 }, { "epoch": 1.5762011839750356, "grad_norm": 0.48380935192108154, "learning_rate": 4.753318554576639e-06, "loss": 0.4014, "step": 34347 }, { "epoch": 1.57624707448029, "grad_norm": 0.4552316963672638, "learning_rate": 4.753073665550236e-06, "loss": 0.3113, "step": 34348 }, { "epoch": 1.5762929649855444, "grad_norm": 0.4361666142940521, "learning_rate": 4.752828777117614e-06, "loss": 0.3196, "step": 34349 }, { "epoch": 1.5763388554907989, "grad_norm": 0.4735325574874878, "learning_rate": 4.752583889279361e-06, "loss": 0.3143, "step": 34350 }, { "epoch": 1.5763847459960534, "grad_norm": 0.4618490934371948, "learning_rate": 4.752339002036063e-06, "loss": 0.3311, "step": 34351 }, { "epoch": 1.5764306365013079, "grad_norm": 0.4503174126148224, "learning_rate": 4.752094115388315e-06, "loss": 0.3268, "step": 34352 }, { "epoch": 1.5764765270065624, "grad_norm": 0.447312593460083, "learning_rate": 4.751849229336701e-06, "loss": 0.326, "step": 34353 }, { "epoch": 1.5765224175118169, "grad_norm": 0.4811963438987732, "learning_rate": 4.751604343881811e-06, "loss": 0.3692, "step": 34354 }, { "epoch": 1.5765683080170714, "grad_norm": 0.4374050199985504, "learning_rate": 4.751359459024235e-06, "loss": 0.3211, "step": 34355 }, { "epoch": 1.5766141985223259, "grad_norm": 0.44680067896842957, "learning_rate": 4.7511145747645614e-06, "loss": 0.3461, "step": 34356 }, { "epoch": 1.5766600890275801, "grad_norm": 0.4474058449268341, "learning_rate": 4.750869691103378e-06, "loss": 0.3348, "step": 34357 }, { "epoch": 1.5767059795328346, "grad_norm": 0.5072599649429321, "learning_rate": 4.750624808041273e-06, "loss": 0.4137, "step": 34358 }, { "epoch": 1.5767518700380891, "grad_norm": 0.5107139945030212, "learning_rate": 4.750379925578838e-06, "loss": 0.4494, "step": 34359 }, { "epoch": 1.5767977605433434, "grad_norm": 0.49037083983421326, "learning_rate": 4.750135043716661e-06, "loss": 0.3716, "step": 34360 }, { "epoch": 1.576843651048598, "grad_norm": 0.460433691740036, "learning_rate": 4.74989016245533e-06, "loss": 0.3412, "step": 34361 }, { "epoch": 1.5768895415538524, "grad_norm": 0.48525089025497437, "learning_rate": 4.749645281795434e-06, "loss": 0.4065, "step": 34362 }, { "epoch": 1.576935432059107, "grad_norm": 0.4107433259487152, "learning_rate": 4.749400401737562e-06, "loss": 0.2415, "step": 34363 }, { "epoch": 1.5769813225643614, "grad_norm": 0.4531669020652771, "learning_rate": 4.749155522282301e-06, "loss": 0.351, "step": 34364 }, { "epoch": 1.577027213069616, "grad_norm": 0.44392192363739014, "learning_rate": 4.748910643430245e-06, "loss": 0.3266, "step": 34365 }, { "epoch": 1.5770731035748704, "grad_norm": 0.4626510441303253, "learning_rate": 4.748665765181979e-06, "loss": 0.3606, "step": 34366 }, { "epoch": 1.577118994080125, "grad_norm": 0.4385238289833069, "learning_rate": 4.7484208875380914e-06, "loss": 0.3202, "step": 34367 }, { "epoch": 1.5771648845853794, "grad_norm": 0.4789722263813019, "learning_rate": 4.748176010499173e-06, "loss": 0.3724, "step": 34368 }, { "epoch": 1.577210775090634, "grad_norm": 0.45046213269233704, "learning_rate": 4.7479311340658115e-06, "loss": 0.3102, "step": 34369 }, { "epoch": 1.5772566655958882, "grad_norm": 0.45858317613601685, "learning_rate": 4.747686258238597e-06, "loss": 0.3367, "step": 34370 }, { "epoch": 1.5773025561011427, "grad_norm": 0.4763321876525879, "learning_rate": 4.747441383018116e-06, "loss": 0.3392, "step": 34371 }, { "epoch": 1.5773484466063972, "grad_norm": 0.4977017343044281, "learning_rate": 4.74719650840496e-06, "loss": 0.3626, "step": 34372 }, { "epoch": 1.5773943371116514, "grad_norm": 0.4347671866416931, "learning_rate": 4.746951634399714e-06, "loss": 0.3195, "step": 34373 }, { "epoch": 1.577440227616906, "grad_norm": 0.4792231619358063, "learning_rate": 4.746706761002972e-06, "loss": 0.3731, "step": 34374 }, { "epoch": 1.5774861181221604, "grad_norm": 0.43972858786582947, "learning_rate": 4.74646188821532e-06, "loss": 0.32, "step": 34375 }, { "epoch": 1.577532008627415, "grad_norm": 0.45526033639907837, "learning_rate": 4.746217016037346e-06, "loss": 0.3119, "step": 34376 }, { "epoch": 1.5775778991326694, "grad_norm": 0.42201414704322815, "learning_rate": 4.745972144469641e-06, "loss": 0.3001, "step": 34377 }, { "epoch": 1.577623789637924, "grad_norm": 0.43599236011505127, "learning_rate": 4.745727273512793e-06, "loss": 0.2981, "step": 34378 }, { "epoch": 1.5776696801431784, "grad_norm": 0.4688419997692108, "learning_rate": 4.745482403167389e-06, "loss": 0.3361, "step": 34379 }, { "epoch": 1.577715570648433, "grad_norm": 0.469624400138855, "learning_rate": 4.745237533434021e-06, "loss": 0.3369, "step": 34380 }, { "epoch": 1.5777614611536874, "grad_norm": 0.5070335268974304, "learning_rate": 4.744992664313277e-06, "loss": 0.443, "step": 34381 }, { "epoch": 1.577807351658942, "grad_norm": 0.4825596809387207, "learning_rate": 4.744747795805744e-06, "loss": 0.3459, "step": 34382 }, { "epoch": 1.5778532421641962, "grad_norm": 0.47976213693618774, "learning_rate": 4.7445029279120124e-06, "loss": 0.3265, "step": 34383 }, { "epoch": 1.5778991326694507, "grad_norm": 0.4541701376438141, "learning_rate": 4.744258060632671e-06, "loss": 0.3665, "step": 34384 }, { "epoch": 1.5779450231747052, "grad_norm": 0.4815753400325775, "learning_rate": 4.744013193968306e-06, "loss": 0.3918, "step": 34385 }, { "epoch": 1.5779909136799595, "grad_norm": 0.5039643049240112, "learning_rate": 4.74376832791951e-06, "loss": 0.3702, "step": 34386 }, { "epoch": 1.578036804185214, "grad_norm": 0.46420979499816895, "learning_rate": 4.743523462486871e-06, "loss": 0.3417, "step": 34387 }, { "epoch": 1.5780826946904685, "grad_norm": 0.4726588726043701, "learning_rate": 4.743278597670978e-06, "loss": 0.3472, "step": 34388 }, { "epoch": 1.578128585195723, "grad_norm": 0.44611823558807373, "learning_rate": 4.743033733472416e-06, "loss": 0.3012, "step": 34389 }, { "epoch": 1.5781744757009775, "grad_norm": 0.48438596725463867, "learning_rate": 4.74278886989178e-06, "loss": 0.3762, "step": 34390 }, { "epoch": 1.578220366206232, "grad_norm": 0.42503178119659424, "learning_rate": 4.742544006929654e-06, "loss": 0.3053, "step": 34391 }, { "epoch": 1.5782662567114865, "grad_norm": 0.46989893913269043, "learning_rate": 4.742299144586628e-06, "loss": 0.3484, "step": 34392 }, { "epoch": 1.578312147216741, "grad_norm": 0.45106247067451477, "learning_rate": 4.742054282863292e-06, "loss": 0.2922, "step": 34393 }, { "epoch": 1.5783580377219955, "grad_norm": 0.519317626953125, "learning_rate": 4.7418094217602335e-06, "loss": 0.4764, "step": 34394 }, { "epoch": 1.5784039282272497, "grad_norm": 0.4333176016807556, "learning_rate": 4.74156456127804e-06, "loss": 0.311, "step": 34395 }, { "epoch": 1.5784498187325042, "grad_norm": 0.4908159077167511, "learning_rate": 4.741319701417305e-06, "loss": 0.3912, "step": 34396 }, { "epoch": 1.5784957092377587, "grad_norm": 0.47922369837760925, "learning_rate": 4.741074842178614e-06, "loss": 0.4072, "step": 34397 }, { "epoch": 1.5785415997430132, "grad_norm": 0.44962525367736816, "learning_rate": 4.740829983562556e-06, "loss": 0.3539, "step": 34398 }, { "epoch": 1.5785874902482675, "grad_norm": 0.45752912759780884, "learning_rate": 4.7405851255697196e-06, "loss": 0.3128, "step": 34399 }, { "epoch": 1.578633380753522, "grad_norm": 0.4441259205341339, "learning_rate": 4.740340268200695e-06, "loss": 0.2774, "step": 34400 }, { "epoch": 1.5786792712587765, "grad_norm": 0.4637984037399292, "learning_rate": 4.740095411456068e-06, "loss": 0.3197, "step": 34401 }, { "epoch": 1.578725161764031, "grad_norm": 0.490584135055542, "learning_rate": 4.739850555336432e-06, "loss": 0.3632, "step": 34402 }, { "epoch": 1.5787710522692855, "grad_norm": 0.48650720715522766, "learning_rate": 4.7396056998423725e-06, "loss": 0.4131, "step": 34403 }, { "epoch": 1.57881694277454, "grad_norm": 0.47464945912361145, "learning_rate": 4.739360844974478e-06, "loss": 0.3429, "step": 34404 }, { "epoch": 1.5788628332797945, "grad_norm": 0.48285701870918274, "learning_rate": 4.73911599073334e-06, "loss": 0.3741, "step": 34405 }, { "epoch": 1.578908723785049, "grad_norm": 0.4599708318710327, "learning_rate": 4.7388711371195454e-06, "loss": 0.3155, "step": 34406 }, { "epoch": 1.5789546142903035, "grad_norm": 0.4671148955821991, "learning_rate": 4.738626284133682e-06, "loss": 0.3523, "step": 34407 }, { "epoch": 1.5790005047955578, "grad_norm": 0.5202903151512146, "learning_rate": 4.738381431776341e-06, "loss": 0.4559, "step": 34408 }, { "epoch": 1.5790463953008123, "grad_norm": 0.5136663913726807, "learning_rate": 4.73813658004811e-06, "loss": 0.4289, "step": 34409 }, { "epoch": 1.5790922858060668, "grad_norm": 0.48985564708709717, "learning_rate": 4.737891728949578e-06, "loss": 0.3463, "step": 34410 }, { "epoch": 1.579138176311321, "grad_norm": 0.49128296971321106, "learning_rate": 4.7376468784813334e-06, "loss": 0.3787, "step": 34411 }, { "epoch": 1.5791840668165755, "grad_norm": 0.44338926672935486, "learning_rate": 4.737402028643966e-06, "loss": 0.3158, "step": 34412 }, { "epoch": 1.57922995732183, "grad_norm": 0.47099071741104126, "learning_rate": 4.737157179438062e-06, "loss": 0.3998, "step": 34413 }, { "epoch": 1.5792758478270845, "grad_norm": 0.461872398853302, "learning_rate": 4.736912330864214e-06, "loss": 0.3558, "step": 34414 }, { "epoch": 1.579321738332339, "grad_norm": 0.482786625623703, "learning_rate": 4.7366674829230096e-06, "loss": 0.4006, "step": 34415 }, { "epoch": 1.5793676288375935, "grad_norm": 0.4322293698787689, "learning_rate": 4.7364226356150325e-06, "loss": 0.3336, "step": 34416 }, { "epoch": 1.579413519342848, "grad_norm": 0.4366132616996765, "learning_rate": 4.736177788940879e-06, "loss": 0.2899, "step": 34417 }, { "epoch": 1.5794594098481025, "grad_norm": 0.48266324400901794, "learning_rate": 4.735932942901136e-06, "loss": 0.3877, "step": 34418 }, { "epoch": 1.579505300353357, "grad_norm": 0.44812893867492676, "learning_rate": 4.735688097496387e-06, "loss": 0.2957, "step": 34419 }, { "epoch": 1.5795511908586115, "grad_norm": 0.4843303859233856, "learning_rate": 4.735443252727228e-06, "loss": 0.3638, "step": 34420 }, { "epoch": 1.5795970813638658, "grad_norm": 0.4521855413913727, "learning_rate": 4.735198408594244e-06, "loss": 0.2963, "step": 34421 }, { "epoch": 1.5796429718691203, "grad_norm": 0.6072704195976257, "learning_rate": 4.734953565098024e-06, "loss": 0.4828, "step": 34422 }, { "epoch": 1.5796888623743748, "grad_norm": 0.4128320813179016, "learning_rate": 4.7347087222391545e-06, "loss": 0.2606, "step": 34423 }, { "epoch": 1.579734752879629, "grad_norm": 0.49240946769714355, "learning_rate": 4.734463880018229e-06, "loss": 0.4032, "step": 34424 }, { "epoch": 1.5797806433848836, "grad_norm": 0.4533618688583374, "learning_rate": 4.734219038435835e-06, "loss": 0.3007, "step": 34425 }, { "epoch": 1.579826533890138, "grad_norm": 0.4841307997703552, "learning_rate": 4.733974197492558e-06, "loss": 0.355, "step": 34426 }, { "epoch": 1.5798724243953925, "grad_norm": 0.4938371181488037, "learning_rate": 4.7337293571889914e-06, "loss": 0.3939, "step": 34427 }, { "epoch": 1.579918314900647, "grad_norm": 0.46043580770492554, "learning_rate": 4.733484517525721e-06, "loss": 0.333, "step": 34428 }, { "epoch": 1.5799642054059015, "grad_norm": 0.4985210597515106, "learning_rate": 4.733239678503334e-06, "loss": 0.4072, "step": 34429 }, { "epoch": 1.580010095911156, "grad_norm": 0.45864444971084595, "learning_rate": 4.732994840122424e-06, "loss": 0.3443, "step": 34430 }, { "epoch": 1.5800559864164105, "grad_norm": 0.47806692123413086, "learning_rate": 4.732750002383577e-06, "loss": 0.3439, "step": 34431 }, { "epoch": 1.580101876921665, "grad_norm": 0.44951456785202026, "learning_rate": 4.7325051652873805e-06, "loss": 0.3151, "step": 34432 }, { "epoch": 1.5801477674269195, "grad_norm": 0.4935435354709625, "learning_rate": 4.732260328834426e-06, "loss": 0.37, "step": 34433 }, { "epoch": 1.5801936579321738, "grad_norm": 0.45162340998649597, "learning_rate": 4.732015493025301e-06, "loss": 0.3355, "step": 34434 }, { "epoch": 1.5802395484374283, "grad_norm": 0.4715065658092499, "learning_rate": 4.731770657860591e-06, "loss": 0.3739, "step": 34435 }, { "epoch": 1.5802854389426828, "grad_norm": 0.49243614077568054, "learning_rate": 4.731525823340893e-06, "loss": 0.3403, "step": 34436 }, { "epoch": 1.580331329447937, "grad_norm": 0.4590493440628052, "learning_rate": 4.731280989466788e-06, "loss": 0.3072, "step": 34437 }, { "epoch": 1.5803772199531916, "grad_norm": 0.49236413836479187, "learning_rate": 4.731036156238865e-06, "loss": 0.3745, "step": 34438 }, { "epoch": 1.580423110458446, "grad_norm": 0.46603336930274963, "learning_rate": 4.730791323657719e-06, "loss": 0.3957, "step": 34439 }, { "epoch": 1.5804690009637006, "grad_norm": 0.5645825862884521, "learning_rate": 4.730546491723934e-06, "loss": 0.4418, "step": 34440 }, { "epoch": 1.580514891468955, "grad_norm": 0.42145490646362305, "learning_rate": 4.730301660438099e-06, "loss": 0.3007, "step": 34441 }, { "epoch": 1.5805607819742096, "grad_norm": 0.47927382588386536, "learning_rate": 4.730056829800803e-06, "loss": 0.3513, "step": 34442 }, { "epoch": 1.580606672479464, "grad_norm": 0.4265795052051544, "learning_rate": 4.729811999812637e-06, "loss": 0.2761, "step": 34443 }, { "epoch": 1.5806525629847186, "grad_norm": 0.4777577519416809, "learning_rate": 4.729567170474185e-06, "loss": 0.3789, "step": 34444 }, { "epoch": 1.580698453489973, "grad_norm": 0.45921361446380615, "learning_rate": 4.72932234178604e-06, "loss": 0.3484, "step": 34445 }, { "epoch": 1.5807443439952273, "grad_norm": 0.47235363721847534, "learning_rate": 4.72907751374879e-06, "loss": 0.3943, "step": 34446 }, { "epoch": 1.5807902345004818, "grad_norm": 0.43517857789993286, "learning_rate": 4.728832686363023e-06, "loss": 0.3028, "step": 34447 }, { "epoch": 1.5808361250057363, "grad_norm": 0.5322061777114868, "learning_rate": 4.728587859629327e-06, "loss": 0.3722, "step": 34448 }, { "epoch": 1.5808820155109906, "grad_norm": 0.5091196894645691, "learning_rate": 4.728343033548293e-06, "loss": 0.2865, "step": 34449 }, { "epoch": 1.580927906016245, "grad_norm": 0.45518651604652405, "learning_rate": 4.728098208120505e-06, "loss": 0.3417, "step": 34450 }, { "epoch": 1.5809737965214996, "grad_norm": 0.4545513987541199, "learning_rate": 4.727853383346558e-06, "loss": 0.3546, "step": 34451 }, { "epoch": 1.581019687026754, "grad_norm": 0.473846971988678, "learning_rate": 4.727608559227037e-06, "loss": 0.3546, "step": 34452 }, { "epoch": 1.5810655775320086, "grad_norm": 0.49696484208106995, "learning_rate": 4.727363735762532e-06, "loss": 0.4247, "step": 34453 }, { "epoch": 1.581111468037263, "grad_norm": 0.45082351565361023, "learning_rate": 4.7271189129536295e-06, "loss": 0.3117, "step": 34454 }, { "epoch": 1.5811573585425176, "grad_norm": 0.4791335165500641, "learning_rate": 4.7268740908009216e-06, "loss": 0.3674, "step": 34455 }, { "epoch": 1.581203249047772, "grad_norm": 0.43058332800865173, "learning_rate": 4.726629269304995e-06, "loss": 0.2767, "step": 34456 }, { "epoch": 1.5812491395530266, "grad_norm": 0.4964630901813507, "learning_rate": 4.726384448466437e-06, "loss": 0.4038, "step": 34457 }, { "epoch": 1.581295030058281, "grad_norm": 0.44527408480644226, "learning_rate": 4.726139628285841e-06, "loss": 0.3645, "step": 34458 }, { "epoch": 1.5813409205635354, "grad_norm": 0.4617421627044678, "learning_rate": 4.725894808763791e-06, "loss": 0.3664, "step": 34459 }, { "epoch": 1.5813868110687899, "grad_norm": 0.5003541111946106, "learning_rate": 4.725649989900876e-06, "loss": 0.3968, "step": 34460 }, { "epoch": 1.5814327015740444, "grad_norm": 0.486860066652298, "learning_rate": 4.725405171697689e-06, "loss": 0.3814, "step": 34461 }, { "epoch": 1.5814785920792986, "grad_norm": 0.483610600233078, "learning_rate": 4.725160354154815e-06, "loss": 0.4072, "step": 34462 }, { "epoch": 1.5815244825845531, "grad_norm": 0.45767226815223694, "learning_rate": 4.724915537272843e-06, "loss": 0.3331, "step": 34463 }, { "epoch": 1.5815703730898076, "grad_norm": 0.4369322657585144, "learning_rate": 4.7246707210523626e-06, "loss": 0.322, "step": 34464 }, { "epoch": 1.5816162635950621, "grad_norm": 0.4593113958835602, "learning_rate": 4.724425905493963e-06, "loss": 0.299, "step": 34465 }, { "epoch": 1.5816621541003166, "grad_norm": 0.46761929988861084, "learning_rate": 4.724181090598229e-06, "loss": 0.3435, "step": 34466 }, { "epoch": 1.5817080446055711, "grad_norm": 0.44399577379226685, "learning_rate": 4.723936276365756e-06, "loss": 0.314, "step": 34467 }, { "epoch": 1.5817539351108256, "grad_norm": 0.479021817445755, "learning_rate": 4.723691462797128e-06, "loss": 0.37, "step": 34468 }, { "epoch": 1.5817998256160801, "grad_norm": 0.4903155565261841, "learning_rate": 4.723446649892934e-06, "loss": 0.4675, "step": 34469 }, { "epoch": 1.5818457161213346, "grad_norm": 0.45112359523773193, "learning_rate": 4.723201837653765e-06, "loss": 0.2817, "step": 34470 }, { "epoch": 1.5818916066265891, "grad_norm": 0.4757433533668518, "learning_rate": 4.722957026080208e-06, "loss": 0.3584, "step": 34471 }, { "epoch": 1.5819374971318434, "grad_norm": 0.46068274974823, "learning_rate": 4.72271221517285e-06, "loss": 0.3391, "step": 34472 }, { "epoch": 1.581983387637098, "grad_norm": 0.4295575022697449, "learning_rate": 4.7224674049322834e-06, "loss": 0.3029, "step": 34473 }, { "epoch": 1.5820292781423524, "grad_norm": 0.4634479582309723, "learning_rate": 4.722222595359095e-06, "loss": 0.3445, "step": 34474 }, { "epoch": 1.5820751686476067, "grad_norm": 0.46829524636268616, "learning_rate": 4.721977786453873e-06, "loss": 0.3813, "step": 34475 }, { "epoch": 1.5821210591528612, "grad_norm": 0.4492100179195404, "learning_rate": 4.721732978217207e-06, "loss": 0.3321, "step": 34476 }, { "epoch": 1.5821669496581157, "grad_norm": 0.4966422915458679, "learning_rate": 4.721488170649687e-06, "loss": 0.3577, "step": 34477 }, { "epoch": 1.5822128401633702, "grad_norm": 0.43524259328842163, "learning_rate": 4.721243363751896e-06, "loss": 0.2972, "step": 34478 }, { "epoch": 1.5822587306686247, "grad_norm": 0.4802364408969879, "learning_rate": 4.720998557524429e-06, "loss": 0.3793, "step": 34479 }, { "epoch": 1.5823046211738792, "grad_norm": 0.5156641602516174, "learning_rate": 4.720753751967875e-06, "loss": 0.4036, "step": 34480 }, { "epoch": 1.5823505116791337, "grad_norm": 0.483347624540329, "learning_rate": 4.720508947082816e-06, "loss": 0.3862, "step": 34481 }, { "epoch": 1.5823964021843882, "grad_norm": 0.4823780953884125, "learning_rate": 4.720264142869846e-06, "loss": 0.3742, "step": 34482 }, { "epoch": 1.5824422926896426, "grad_norm": 0.47350820899009705, "learning_rate": 4.720019339329554e-06, "loss": 0.3767, "step": 34483 }, { "epoch": 1.582488183194897, "grad_norm": 0.5104520916938782, "learning_rate": 4.719774536462525e-06, "loss": 0.4216, "step": 34484 }, { "epoch": 1.5825340737001514, "grad_norm": 0.4646252393722534, "learning_rate": 4.719529734269351e-06, "loss": 0.29, "step": 34485 }, { "epoch": 1.582579964205406, "grad_norm": 0.46553856134414673, "learning_rate": 4.71928493275062e-06, "loss": 0.3952, "step": 34486 }, { "epoch": 1.5826258547106604, "grad_norm": 0.4512983560562134, "learning_rate": 4.71904013190692e-06, "loss": 0.2926, "step": 34487 }, { "epoch": 1.5826717452159147, "grad_norm": 0.5158006548881531, "learning_rate": 4.718795331738838e-06, "loss": 0.3937, "step": 34488 }, { "epoch": 1.5827176357211692, "grad_norm": 0.4445599317550659, "learning_rate": 4.718550532246966e-06, "loss": 0.3221, "step": 34489 }, { "epoch": 1.5827635262264237, "grad_norm": 0.444424033164978, "learning_rate": 4.718305733431891e-06, "loss": 0.2861, "step": 34490 }, { "epoch": 1.5828094167316782, "grad_norm": 0.46928074955940247, "learning_rate": 4.718060935294202e-06, "loss": 0.3281, "step": 34491 }, { "epoch": 1.5828553072369327, "grad_norm": 0.4669865369796753, "learning_rate": 4.717816137834487e-06, "loss": 0.3264, "step": 34492 }, { "epoch": 1.5829011977421872, "grad_norm": 0.473935067653656, "learning_rate": 4.717571341053336e-06, "loss": 0.3441, "step": 34493 }, { "epoch": 1.5829470882474417, "grad_norm": 0.4499046504497528, "learning_rate": 4.717326544951334e-06, "loss": 0.3332, "step": 34494 }, { "epoch": 1.5829929787526962, "grad_norm": 0.4681978225708008, "learning_rate": 4.717081749529075e-06, "loss": 0.3397, "step": 34495 }, { "epoch": 1.5830388692579507, "grad_norm": 0.45647066831588745, "learning_rate": 4.716836954787145e-06, "loss": 0.302, "step": 34496 }, { "epoch": 1.583084759763205, "grad_norm": 0.43028759956359863, "learning_rate": 4.716592160726132e-06, "loss": 0.3116, "step": 34497 }, { "epoch": 1.5831306502684594, "grad_norm": 0.49799495935440063, "learning_rate": 4.716347367346626e-06, "loss": 0.4451, "step": 34498 }, { "epoch": 1.583176540773714, "grad_norm": 0.4543262720108032, "learning_rate": 4.716102574649215e-06, "loss": 0.3147, "step": 34499 }, { "epoch": 1.5832224312789682, "grad_norm": 0.4617965519428253, "learning_rate": 4.715857782634486e-06, "loss": 0.3449, "step": 34500 }, { "epoch": 1.5832683217842227, "grad_norm": 0.4661722183227539, "learning_rate": 4.7156129913030316e-06, "loss": 0.3697, "step": 34501 }, { "epoch": 1.5833142122894772, "grad_norm": 0.4829505681991577, "learning_rate": 4.715368200655439e-06, "loss": 0.3797, "step": 34502 }, { "epoch": 1.5833601027947317, "grad_norm": 0.4805639982223511, "learning_rate": 4.715123410692292e-06, "loss": 0.29, "step": 34503 }, { "epoch": 1.5834059932999862, "grad_norm": 0.43287238478660583, "learning_rate": 4.714878621414187e-06, "loss": 0.3068, "step": 34504 }, { "epoch": 1.5834518838052407, "grad_norm": 0.504483163356781, "learning_rate": 4.714633832821708e-06, "loss": 0.4195, "step": 34505 }, { "epoch": 1.5834977743104952, "grad_norm": 0.45353904366493225, "learning_rate": 4.714389044915443e-06, "loss": 0.3217, "step": 34506 }, { "epoch": 1.5835436648157497, "grad_norm": 0.5309372544288635, "learning_rate": 4.714144257695984e-06, "loss": 0.4362, "step": 34507 }, { "epoch": 1.5835895553210042, "grad_norm": 0.49125128984451294, "learning_rate": 4.713899471163918e-06, "loss": 0.4112, "step": 34508 }, { "epoch": 1.5836354458262587, "grad_norm": 0.9372045397758484, "learning_rate": 4.713654685319831e-06, "loss": 0.3898, "step": 34509 }, { "epoch": 1.583681336331513, "grad_norm": 0.4888708293437958, "learning_rate": 4.713409900164317e-06, "loss": 0.3527, "step": 34510 }, { "epoch": 1.5837272268367675, "grad_norm": 0.4270753264427185, "learning_rate": 4.713165115697961e-06, "loss": 0.3033, "step": 34511 }, { "epoch": 1.583773117342022, "grad_norm": 0.47595661878585815, "learning_rate": 4.712920331921351e-06, "loss": 0.3412, "step": 34512 }, { "epoch": 1.5838190078472763, "grad_norm": 0.4398542046546936, "learning_rate": 4.712675548835079e-06, "loss": 0.3221, "step": 34513 }, { "epoch": 1.5838648983525307, "grad_norm": 0.4746125042438507, "learning_rate": 4.71243076643973e-06, "loss": 0.3501, "step": 34514 }, { "epoch": 1.5839107888577852, "grad_norm": 0.4795666038990021, "learning_rate": 4.712185984735894e-06, "loss": 0.3811, "step": 34515 }, { "epoch": 1.5839566793630397, "grad_norm": 0.4941750168800354, "learning_rate": 4.711941203724162e-06, "loss": 0.3534, "step": 34516 }, { "epoch": 1.5840025698682942, "grad_norm": 0.4700463116168976, "learning_rate": 4.7116964234051185e-06, "loss": 0.3921, "step": 34517 }, { "epoch": 1.5840484603735487, "grad_norm": 0.4921993017196655, "learning_rate": 4.711451643779356e-06, "loss": 0.3998, "step": 34518 }, { "epoch": 1.5840943508788032, "grad_norm": 0.4448760151863098, "learning_rate": 4.711206864847459e-06, "loss": 0.3125, "step": 34519 }, { "epoch": 1.5841402413840577, "grad_norm": 0.4679434895515442, "learning_rate": 4.710962086610021e-06, "loss": 0.3365, "step": 34520 }, { "epoch": 1.5841861318893122, "grad_norm": 0.46245276927948, "learning_rate": 4.710717309067626e-06, "loss": 0.3467, "step": 34521 }, { "epoch": 1.5842320223945667, "grad_norm": 0.5030012130737305, "learning_rate": 4.710472532220863e-06, "loss": 0.3513, "step": 34522 }, { "epoch": 1.584277912899821, "grad_norm": 0.49735182523727417, "learning_rate": 4.710227756070325e-06, "loss": 0.3702, "step": 34523 }, { "epoch": 1.5843238034050755, "grad_norm": 0.450185090303421, "learning_rate": 4.709982980616599e-06, "loss": 0.3151, "step": 34524 }, { "epoch": 1.58436969391033, "grad_norm": 0.5023850798606873, "learning_rate": 4.709738205860268e-06, "loss": 0.3852, "step": 34525 }, { "epoch": 1.5844155844155843, "grad_norm": 0.4783794581890106, "learning_rate": 4.709493431801928e-06, "loss": 0.4325, "step": 34526 }, { "epoch": 1.5844614749208388, "grad_norm": 0.4516247808933258, "learning_rate": 4.709248658442165e-06, "loss": 0.3669, "step": 34527 }, { "epoch": 1.5845073654260933, "grad_norm": 0.4789596199989319, "learning_rate": 4.7090038857815655e-06, "loss": 0.351, "step": 34528 }, { "epoch": 1.5845532559313478, "grad_norm": 0.4462050199508667, "learning_rate": 4.70875911382072e-06, "loss": 0.3061, "step": 34529 }, { "epoch": 1.5845991464366023, "grad_norm": 0.45963054895401, "learning_rate": 4.708514342560218e-06, "loss": 0.3138, "step": 34530 }, { "epoch": 1.5846450369418568, "grad_norm": 0.49401775002479553, "learning_rate": 4.708269572000645e-06, "loss": 0.3855, "step": 34531 }, { "epoch": 1.5846909274471113, "grad_norm": 0.4915311336517334, "learning_rate": 4.708024802142593e-06, "loss": 0.3649, "step": 34532 }, { "epoch": 1.5847368179523658, "grad_norm": 0.5864157676696777, "learning_rate": 4.70778003298665e-06, "loss": 0.324, "step": 34533 }, { "epoch": 1.5847827084576203, "grad_norm": 0.4741992652416229, "learning_rate": 4.707535264533402e-06, "loss": 0.4122, "step": 34534 }, { "epoch": 1.5848285989628745, "grad_norm": 0.4905097484588623, "learning_rate": 4.707290496783441e-06, "loss": 0.3845, "step": 34535 }, { "epoch": 1.584874489468129, "grad_norm": 0.4716007709503174, "learning_rate": 4.707045729737354e-06, "loss": 0.3296, "step": 34536 }, { "epoch": 1.5849203799733835, "grad_norm": 0.47660380601882935, "learning_rate": 4.706800963395728e-06, "loss": 0.3862, "step": 34537 }, { "epoch": 1.5849662704786378, "grad_norm": 0.4517204165458679, "learning_rate": 4.706556197759154e-06, "loss": 0.3163, "step": 34538 }, { "epoch": 1.5850121609838923, "grad_norm": 0.43394896388053894, "learning_rate": 4.70631143282822e-06, "loss": 0.3079, "step": 34539 }, { "epoch": 1.5850580514891468, "grad_norm": 0.4525814950466156, "learning_rate": 4.706066668603514e-06, "loss": 0.3005, "step": 34540 }, { "epoch": 1.5851039419944013, "grad_norm": 0.47550779581069946, "learning_rate": 4.705821905085625e-06, "loss": 0.4208, "step": 34541 }, { "epoch": 1.5851498324996558, "grad_norm": 0.4417058527469635, "learning_rate": 4.705577142275142e-06, "loss": 0.2707, "step": 34542 }, { "epoch": 1.5851957230049103, "grad_norm": 0.47244930267333984, "learning_rate": 4.7053323801726505e-06, "loss": 0.3307, "step": 34543 }, { "epoch": 1.5852416135101648, "grad_norm": 0.4288998544216156, "learning_rate": 4.705087618778745e-06, "loss": 0.2981, "step": 34544 }, { "epoch": 1.5852875040154193, "grad_norm": 0.5020086169242859, "learning_rate": 4.704842858094009e-06, "loss": 0.3998, "step": 34545 }, { "epoch": 1.5853333945206738, "grad_norm": 0.4833795726299286, "learning_rate": 4.704598098119033e-06, "loss": 0.3512, "step": 34546 }, { "epoch": 1.5853792850259283, "grad_norm": 0.4819575548171997, "learning_rate": 4.704353338854406e-06, "loss": 0.3524, "step": 34547 }, { "epoch": 1.5854251755311826, "grad_norm": 0.47497355937957764, "learning_rate": 4.704108580300716e-06, "loss": 0.3286, "step": 34548 }, { "epoch": 1.585471066036437, "grad_norm": 0.49087902903556824, "learning_rate": 4.703863822458548e-06, "loss": 0.419, "step": 34549 }, { "epoch": 1.5855169565416916, "grad_norm": 0.48719003796577454, "learning_rate": 4.703619065328499e-06, "loss": 0.3792, "step": 34550 }, { "epoch": 1.5855628470469458, "grad_norm": 0.48690444231033325, "learning_rate": 4.703374308911151e-06, "loss": 0.3838, "step": 34551 }, { "epoch": 1.5856087375522003, "grad_norm": 0.5013701915740967, "learning_rate": 4.703129553207094e-06, "loss": 0.4197, "step": 34552 }, { "epoch": 1.5856546280574548, "grad_norm": 0.46105894446372986, "learning_rate": 4.702884798216915e-06, "loss": 0.3296, "step": 34553 }, { "epoch": 1.5857005185627093, "grad_norm": 0.4834175407886505, "learning_rate": 4.702640043941206e-06, "loss": 0.388, "step": 34554 }, { "epoch": 1.5857464090679638, "grad_norm": 0.467048317193985, "learning_rate": 4.702395290380554e-06, "loss": 0.3724, "step": 34555 }, { "epoch": 1.5857922995732183, "grad_norm": 0.5070849657058716, "learning_rate": 4.702150537535547e-06, "loss": 0.4464, "step": 34556 }, { "epoch": 1.5858381900784728, "grad_norm": 0.4568125903606415, "learning_rate": 4.701905785406774e-06, "loss": 0.3509, "step": 34557 }, { "epoch": 1.5858840805837273, "grad_norm": 0.46787765622138977, "learning_rate": 4.701661033994824e-06, "loss": 0.3284, "step": 34558 }, { "epoch": 1.5859299710889818, "grad_norm": 0.4700010418891907, "learning_rate": 4.701416283300283e-06, "loss": 0.3524, "step": 34559 }, { "epoch": 1.5859758615942363, "grad_norm": 0.46363264322280884, "learning_rate": 4.7011715333237436e-06, "loss": 0.3195, "step": 34560 }, { "epoch": 1.5860217520994906, "grad_norm": 0.5072803497314453, "learning_rate": 4.7009267840657915e-06, "loss": 0.4066, "step": 34561 }, { "epoch": 1.586067642604745, "grad_norm": 0.4677402973175049, "learning_rate": 4.700682035527016e-06, "loss": 0.3934, "step": 34562 }, { "epoch": 1.5861135331099996, "grad_norm": 0.48642536997795105, "learning_rate": 4.700437287708007e-06, "loss": 0.3319, "step": 34563 }, { "epoch": 1.5861594236152539, "grad_norm": 0.4711402654647827, "learning_rate": 4.700192540609351e-06, "loss": 0.3159, "step": 34564 }, { "epoch": 1.5862053141205084, "grad_norm": 0.4461587369441986, "learning_rate": 4.699947794231636e-06, "loss": 0.3624, "step": 34565 }, { "epoch": 1.5862512046257629, "grad_norm": 0.46647313237190247, "learning_rate": 4.699703048575454e-06, "loss": 0.3327, "step": 34566 }, { "epoch": 1.5862970951310174, "grad_norm": 0.509233295917511, "learning_rate": 4.699458303641391e-06, "loss": 0.43, "step": 34567 }, { "epoch": 1.5863429856362719, "grad_norm": 0.48189833760261536, "learning_rate": 4.699213559430034e-06, "loss": 0.3556, "step": 34568 }, { "epoch": 1.5863888761415263, "grad_norm": 0.4503464698791504, "learning_rate": 4.6989688159419755e-06, "loss": 0.3184, "step": 34569 }, { "epoch": 1.5864347666467808, "grad_norm": 0.5226192474365234, "learning_rate": 4.6987240731778015e-06, "loss": 0.3992, "step": 34570 }, { "epoch": 1.5864806571520353, "grad_norm": 0.4482278823852539, "learning_rate": 4.698479331138099e-06, "loss": 0.3264, "step": 34571 }, { "epoch": 1.5865265476572898, "grad_norm": 0.5040515065193176, "learning_rate": 4.698234589823463e-06, "loss": 0.3854, "step": 34572 }, { "epoch": 1.5865724381625441, "grad_norm": 0.5213207602500916, "learning_rate": 4.697989849234475e-06, "loss": 0.4311, "step": 34573 }, { "epoch": 1.5866183286677986, "grad_norm": 0.4529222548007965, "learning_rate": 4.697745109371724e-06, "loss": 0.3559, "step": 34574 }, { "epoch": 1.5866642191730531, "grad_norm": 0.49292704463005066, "learning_rate": 4.697500370235804e-06, "loss": 0.4109, "step": 34575 }, { "epoch": 1.5867101096783076, "grad_norm": 0.4504312574863434, "learning_rate": 4.697255631827299e-06, "loss": 0.3267, "step": 34576 }, { "epoch": 1.5867560001835619, "grad_norm": 0.4388074278831482, "learning_rate": 4.697010894146798e-06, "loss": 0.2933, "step": 34577 }, { "epoch": 1.5868018906888164, "grad_norm": 0.46784502267837524, "learning_rate": 4.696766157194891e-06, "loss": 0.3368, "step": 34578 }, { "epoch": 1.5868477811940709, "grad_norm": 0.4724850654602051, "learning_rate": 4.696521420972165e-06, "loss": 0.3367, "step": 34579 }, { "epoch": 1.5868936716993254, "grad_norm": 0.496127724647522, "learning_rate": 4.696276685479208e-06, "loss": 0.4084, "step": 34580 }, { "epoch": 1.5869395622045799, "grad_norm": 0.4329172968864441, "learning_rate": 4.696031950716612e-06, "loss": 0.2939, "step": 34581 }, { "epoch": 1.5869854527098344, "grad_norm": 0.4585079252719879, "learning_rate": 4.6957872166849636e-06, "loss": 0.3265, "step": 34582 }, { "epoch": 1.5870313432150889, "grad_norm": 0.4721071124076843, "learning_rate": 4.695542483384849e-06, "loss": 0.3711, "step": 34583 }, { "epoch": 1.5870772337203434, "grad_norm": 0.47259363532066345, "learning_rate": 4.695297750816859e-06, "loss": 0.3584, "step": 34584 }, { "epoch": 1.5871231242255979, "grad_norm": 0.4456488788127899, "learning_rate": 4.695053018981584e-06, "loss": 0.3567, "step": 34585 }, { "epoch": 1.5871690147308521, "grad_norm": 0.47606053948402405, "learning_rate": 4.694808287879609e-06, "loss": 0.3779, "step": 34586 }, { "epoch": 1.5872149052361066, "grad_norm": 0.4712086617946625, "learning_rate": 4.694563557511521e-06, "loss": 0.3306, "step": 34587 }, { "epoch": 1.5872607957413611, "grad_norm": 0.4578312337398529, "learning_rate": 4.694318827877914e-06, "loss": 0.3642, "step": 34588 }, { "epoch": 1.5873066862466154, "grad_norm": 0.46725407242774963, "learning_rate": 4.694074098979374e-06, "loss": 0.343, "step": 34589 }, { "epoch": 1.58735257675187, "grad_norm": 0.47350719571113586, "learning_rate": 4.6938293708164875e-06, "loss": 0.3736, "step": 34590 }, { "epoch": 1.5873984672571244, "grad_norm": 0.5089979767799377, "learning_rate": 4.693584643389846e-06, "loss": 0.4287, "step": 34591 }, { "epoch": 1.587444357762379, "grad_norm": 0.47449058294296265, "learning_rate": 4.693339916700037e-06, "loss": 0.3931, "step": 34592 }, { "epoch": 1.5874902482676334, "grad_norm": 0.4670298397541046, "learning_rate": 4.693095190747646e-06, "loss": 0.3434, "step": 34593 }, { "epoch": 1.587536138772888, "grad_norm": 0.42573282122612, "learning_rate": 4.692850465533268e-06, "loss": 0.2989, "step": 34594 }, { "epoch": 1.5875820292781424, "grad_norm": 0.419572651386261, "learning_rate": 4.692605741057487e-06, "loss": 0.3066, "step": 34595 }, { "epoch": 1.587627919783397, "grad_norm": 0.5445533394813538, "learning_rate": 4.692361017320889e-06, "loss": 0.3746, "step": 34596 }, { "epoch": 1.5876738102886514, "grad_norm": 0.49602025747299194, "learning_rate": 4.692116294324068e-06, "loss": 0.3649, "step": 34597 }, { "epoch": 1.587719700793906, "grad_norm": 0.5027179718017578, "learning_rate": 4.691871572067611e-06, "loss": 0.4138, "step": 34598 }, { "epoch": 1.5877655912991602, "grad_norm": 0.4785214066505432, "learning_rate": 4.691626850552104e-06, "loss": 0.3838, "step": 34599 }, { "epoch": 1.5878114818044147, "grad_norm": 0.4304395914077759, "learning_rate": 4.691382129778138e-06, "loss": 0.2985, "step": 34600 }, { "epoch": 1.5878573723096692, "grad_norm": 0.4875289499759674, "learning_rate": 4.691137409746301e-06, "loss": 0.3981, "step": 34601 }, { "epoch": 1.5879032628149234, "grad_norm": 0.4775334298610687, "learning_rate": 4.690892690457178e-06, "loss": 0.3871, "step": 34602 }, { "epoch": 1.587949153320178, "grad_norm": 0.4600880742073059, "learning_rate": 4.690647971911364e-06, "loss": 0.3516, "step": 34603 }, { "epoch": 1.5879950438254324, "grad_norm": 0.465518593788147, "learning_rate": 4.690403254109443e-06, "loss": 0.3578, "step": 34604 }, { "epoch": 1.588040934330687, "grad_norm": 0.4696153998374939, "learning_rate": 4.690158537052004e-06, "loss": 0.3879, "step": 34605 }, { "epoch": 1.5880868248359414, "grad_norm": 0.46333110332489014, "learning_rate": 4.689913820739636e-06, "loss": 0.3513, "step": 34606 }, { "epoch": 1.588132715341196, "grad_norm": 0.48700082302093506, "learning_rate": 4.689669105172929e-06, "loss": 0.3861, "step": 34607 }, { "epoch": 1.5881786058464504, "grad_norm": 0.47353264689445496, "learning_rate": 4.689424390352466e-06, "loss": 0.3946, "step": 34608 }, { "epoch": 1.588224496351705, "grad_norm": 0.430731862783432, "learning_rate": 4.689179676278843e-06, "loss": 0.3014, "step": 34609 }, { "epoch": 1.5882703868569594, "grad_norm": 0.4611452519893646, "learning_rate": 4.688934962952644e-06, "loss": 0.4035, "step": 34610 }, { "epoch": 1.588316277362214, "grad_norm": 0.5709488391876221, "learning_rate": 4.688690250374457e-06, "loss": 0.3988, "step": 34611 }, { "epoch": 1.5883621678674682, "grad_norm": 0.4743790626525879, "learning_rate": 4.688445538544873e-06, "loss": 0.4052, "step": 34612 }, { "epoch": 1.5884080583727227, "grad_norm": 0.46625393629074097, "learning_rate": 4.688200827464479e-06, "loss": 0.3481, "step": 34613 }, { "epoch": 1.5884539488779772, "grad_norm": 0.471148818731308, "learning_rate": 4.6879561171338615e-06, "loss": 0.3401, "step": 34614 }, { "epoch": 1.5884998393832315, "grad_norm": 0.4626951813697815, "learning_rate": 4.6877114075536135e-06, "loss": 0.3391, "step": 34615 }, { "epoch": 1.588545729888486, "grad_norm": 0.443096399307251, "learning_rate": 4.687466698724321e-06, "loss": 0.3111, "step": 34616 }, { "epoch": 1.5885916203937405, "grad_norm": 0.4645618796348572, "learning_rate": 4.687221990646573e-06, "loss": 0.3431, "step": 34617 }, { "epoch": 1.588637510898995, "grad_norm": 0.44557464122772217, "learning_rate": 4.686977283320954e-06, "loss": 0.3145, "step": 34618 }, { "epoch": 1.5886834014042495, "grad_norm": 0.48213982582092285, "learning_rate": 4.68673257674806e-06, "loss": 0.3815, "step": 34619 }, { "epoch": 1.588729291909504, "grad_norm": 0.46545517444610596, "learning_rate": 4.686487870928473e-06, "loss": 0.3293, "step": 34620 }, { "epoch": 1.5887751824147585, "grad_norm": 0.4708687663078308, "learning_rate": 4.6862431658627834e-06, "loss": 0.3559, "step": 34621 }, { "epoch": 1.588821072920013, "grad_norm": 0.4602172076702118, "learning_rate": 4.685998461551581e-06, "loss": 0.3382, "step": 34622 }, { "epoch": 1.5888669634252675, "grad_norm": 0.4869634509086609, "learning_rate": 4.685753757995453e-06, "loss": 0.3693, "step": 34623 }, { "epoch": 1.5889128539305217, "grad_norm": 0.46521803736686707, "learning_rate": 4.685509055194986e-06, "loss": 0.3357, "step": 34624 }, { "epoch": 1.5889587444357762, "grad_norm": 0.4544336497783661, "learning_rate": 4.685264353150773e-06, "loss": 0.3236, "step": 34625 }, { "epoch": 1.5890046349410307, "grad_norm": 0.49167975783348083, "learning_rate": 4.6850196518634e-06, "loss": 0.4445, "step": 34626 }, { "epoch": 1.589050525446285, "grad_norm": 0.5972442030906677, "learning_rate": 4.684774951333453e-06, "loss": 0.3391, "step": 34627 }, { "epoch": 1.5890964159515395, "grad_norm": 0.5371578931808472, "learning_rate": 4.684530251561526e-06, "loss": 0.4293, "step": 34628 }, { "epoch": 1.589142306456794, "grad_norm": 0.442306250333786, "learning_rate": 4.6842855525482026e-06, "loss": 0.3387, "step": 34629 }, { "epoch": 1.5891881969620485, "grad_norm": 0.4762756824493408, "learning_rate": 4.684040854294071e-06, "loss": 0.38, "step": 34630 }, { "epoch": 1.589234087467303, "grad_norm": 0.47143983840942383, "learning_rate": 4.683796156799723e-06, "loss": 0.3546, "step": 34631 }, { "epoch": 1.5892799779725575, "grad_norm": 0.5182498693466187, "learning_rate": 4.683551460065746e-06, "loss": 0.4613, "step": 34632 }, { "epoch": 1.589325868477812, "grad_norm": 0.4570844769477844, "learning_rate": 4.683306764092727e-06, "loss": 0.3273, "step": 34633 }, { "epoch": 1.5893717589830665, "grad_norm": 0.4789077639579773, "learning_rate": 4.683062068881256e-06, "loss": 0.3696, "step": 34634 }, { "epoch": 1.589417649488321, "grad_norm": 0.4777088165283203, "learning_rate": 4.682817374431921e-06, "loss": 0.3649, "step": 34635 }, { "epoch": 1.5894635399935755, "grad_norm": 0.4895303547382355, "learning_rate": 4.6825726807453075e-06, "loss": 0.3757, "step": 34636 }, { "epoch": 1.5895094304988298, "grad_norm": 0.477053701877594, "learning_rate": 4.682327987822009e-06, "loss": 0.3617, "step": 34637 }, { "epoch": 1.5895553210040843, "grad_norm": 0.46935850381851196, "learning_rate": 4.682083295662612e-06, "loss": 0.3415, "step": 34638 }, { "epoch": 1.5896012115093388, "grad_norm": 0.4935864806175232, "learning_rate": 4.681838604267702e-06, "loss": 0.3716, "step": 34639 }, { "epoch": 1.589647102014593, "grad_norm": 0.440094530582428, "learning_rate": 4.681593913637872e-06, "loss": 0.2922, "step": 34640 }, { "epoch": 1.5896929925198475, "grad_norm": 0.4910375773906708, "learning_rate": 4.681349223773708e-06, "loss": 0.4138, "step": 34641 }, { "epoch": 1.589738883025102, "grad_norm": 0.4861021041870117, "learning_rate": 4.6811045346757964e-06, "loss": 0.3913, "step": 34642 }, { "epoch": 1.5897847735303565, "grad_norm": 0.4904944598674774, "learning_rate": 4.6808598463447295e-06, "loss": 0.4097, "step": 34643 }, { "epoch": 1.589830664035611, "grad_norm": 0.4447546899318695, "learning_rate": 4.680615158781094e-06, "loss": 0.3336, "step": 34644 }, { "epoch": 1.5898765545408655, "grad_norm": 0.5124903321266174, "learning_rate": 4.680370471985476e-06, "loss": 0.4252, "step": 34645 }, { "epoch": 1.58992244504612, "grad_norm": 0.4588734209537506, "learning_rate": 4.680125785958468e-06, "loss": 0.3611, "step": 34646 }, { "epoch": 1.5899683355513745, "grad_norm": 0.49763569235801697, "learning_rate": 4.6798811007006575e-06, "loss": 0.3902, "step": 34647 }, { "epoch": 1.590014226056629, "grad_norm": 0.5236087441444397, "learning_rate": 4.67963641621263e-06, "loss": 0.4889, "step": 34648 }, { "epoch": 1.5900601165618835, "grad_norm": 0.8617398142814636, "learning_rate": 4.679391732494977e-06, "loss": 0.3431, "step": 34649 }, { "epoch": 1.5901060070671378, "grad_norm": 0.44143134355545044, "learning_rate": 4.679147049548286e-06, "loss": 0.2891, "step": 34650 }, { "epoch": 1.5901518975723923, "grad_norm": 0.4836884140968323, "learning_rate": 4.678902367373145e-06, "loss": 0.3478, "step": 34651 }, { "epoch": 1.5901977880776468, "grad_norm": 0.470175176858902, "learning_rate": 4.67865768597014e-06, "loss": 0.3429, "step": 34652 }, { "epoch": 1.590243678582901, "grad_norm": 0.47863808274269104, "learning_rate": 4.678413005339864e-06, "loss": 0.3558, "step": 34653 }, { "epoch": 1.5902895690881556, "grad_norm": 0.5038286447525024, "learning_rate": 4.678168325482903e-06, "loss": 0.3877, "step": 34654 }, { "epoch": 1.59033545959341, "grad_norm": 0.4705699384212494, "learning_rate": 4.6779236463998454e-06, "loss": 0.3735, "step": 34655 }, { "epoch": 1.5903813500986645, "grad_norm": 0.5444666743278503, "learning_rate": 4.6776789680912806e-06, "loss": 0.5396, "step": 34656 }, { "epoch": 1.590427240603919, "grad_norm": 0.4743742048740387, "learning_rate": 4.677434290557796e-06, "loss": 0.3331, "step": 34657 }, { "epoch": 1.5904731311091735, "grad_norm": 0.4315963685512543, "learning_rate": 4.677189613799978e-06, "loss": 0.2878, "step": 34658 }, { "epoch": 1.590519021614428, "grad_norm": 0.5056748986244202, "learning_rate": 4.676944937818419e-06, "loss": 0.4405, "step": 34659 }, { "epoch": 1.5905649121196825, "grad_norm": 0.47059184312820435, "learning_rate": 4.676700262613707e-06, "loss": 0.3645, "step": 34660 }, { "epoch": 1.590610802624937, "grad_norm": 0.4842603802680969, "learning_rate": 4.676455588186425e-06, "loss": 0.3663, "step": 34661 }, { "epoch": 1.5906566931301913, "grad_norm": 0.5041903853416443, "learning_rate": 4.676210914537168e-06, "loss": 0.4425, "step": 34662 }, { "epoch": 1.5907025836354458, "grad_norm": 0.4720912277698517, "learning_rate": 4.6759662416665205e-06, "loss": 0.3609, "step": 34663 }, { "epoch": 1.5907484741407003, "grad_norm": 0.46274659037590027, "learning_rate": 4.675721569575072e-06, "loss": 0.3577, "step": 34664 }, { "epoch": 1.5907943646459548, "grad_norm": 0.4845477044582367, "learning_rate": 4.675476898263411e-06, "loss": 0.3493, "step": 34665 }, { "epoch": 1.590840255151209, "grad_norm": 0.42338109016418457, "learning_rate": 4.675232227732127e-06, "loss": 0.2864, "step": 34666 }, { "epoch": 1.5908861456564636, "grad_norm": 0.4466056525707245, "learning_rate": 4.674987557981803e-06, "loss": 0.3235, "step": 34667 }, { "epoch": 1.590932036161718, "grad_norm": 0.47225505113601685, "learning_rate": 4.6747428890130345e-06, "loss": 0.3572, "step": 34668 }, { "epoch": 1.5909779266669726, "grad_norm": 0.5123305916786194, "learning_rate": 4.674498220826407e-06, "loss": 0.3971, "step": 34669 }, { "epoch": 1.591023817172227, "grad_norm": 0.4775237739086151, "learning_rate": 4.674253553422507e-06, "loss": 0.3934, "step": 34670 }, { "epoch": 1.5910697076774816, "grad_norm": 0.46348837018013, "learning_rate": 4.674008886801925e-06, "loss": 0.3426, "step": 34671 }, { "epoch": 1.591115598182736, "grad_norm": 0.5293883681297302, "learning_rate": 4.673764220965249e-06, "loss": 0.4223, "step": 34672 }, { "epoch": 1.5911614886879906, "grad_norm": 0.48984840512275696, "learning_rate": 4.673519555913066e-06, "loss": 0.3676, "step": 34673 }, { "epoch": 1.591207379193245, "grad_norm": 0.5124698877334595, "learning_rate": 4.673274891645967e-06, "loss": 0.4064, "step": 34674 }, { "epoch": 1.5912532696984993, "grad_norm": 0.5050203800201416, "learning_rate": 4.673030228164539e-06, "loss": 0.4332, "step": 34675 }, { "epoch": 1.5912991602037538, "grad_norm": 0.43710777163505554, "learning_rate": 4.672785565469368e-06, "loss": 0.31, "step": 34676 }, { "epoch": 1.5913450507090083, "grad_norm": 0.48700010776519775, "learning_rate": 4.672540903561046e-06, "loss": 0.3618, "step": 34677 }, { "epoch": 1.5913909412142626, "grad_norm": 0.4482556879520416, "learning_rate": 4.67229624244016e-06, "loss": 0.3464, "step": 34678 }, { "epoch": 1.591436831719517, "grad_norm": 0.45000559091567993, "learning_rate": 4.672051582107296e-06, "loss": 0.3439, "step": 34679 }, { "epoch": 1.5914827222247716, "grad_norm": 0.5079840421676636, "learning_rate": 4.671806922563047e-06, "loss": 0.3928, "step": 34680 }, { "epoch": 1.591528612730026, "grad_norm": 0.4834607243537903, "learning_rate": 4.671562263807997e-06, "loss": 0.382, "step": 34681 }, { "epoch": 1.5915745032352806, "grad_norm": 0.4709584414958954, "learning_rate": 4.671317605842739e-06, "loss": 0.3498, "step": 34682 }, { "epoch": 1.591620393740535, "grad_norm": 0.4898265302181244, "learning_rate": 4.671072948667854e-06, "loss": 0.3935, "step": 34683 }, { "epoch": 1.5916662842457896, "grad_norm": 0.4682188034057617, "learning_rate": 4.670828292283937e-06, "loss": 0.3933, "step": 34684 }, { "epoch": 1.591712174751044, "grad_norm": 0.4772651195526123, "learning_rate": 4.6705836366915755e-06, "loss": 0.3473, "step": 34685 }, { "epoch": 1.5917580652562986, "grad_norm": 0.4357205927371979, "learning_rate": 4.670338981891354e-06, "loss": 0.2978, "step": 34686 }, { "epoch": 1.591803955761553, "grad_norm": 0.4465365409851074, "learning_rate": 4.670094327883865e-06, "loss": 0.3359, "step": 34687 }, { "epoch": 1.5918498462668074, "grad_norm": 0.4873638153076172, "learning_rate": 4.669849674669695e-06, "loss": 0.4333, "step": 34688 }, { "epoch": 1.5918957367720619, "grad_norm": 0.4412349462509155, "learning_rate": 4.669605022249429e-06, "loss": 0.3036, "step": 34689 }, { "epoch": 1.5919416272773164, "grad_norm": 0.4592774212360382, "learning_rate": 4.6693603706236625e-06, "loss": 0.3121, "step": 34690 }, { "epoch": 1.5919875177825706, "grad_norm": 0.4286041259765625, "learning_rate": 4.6691157197929785e-06, "loss": 0.2859, "step": 34691 }, { "epoch": 1.5920334082878251, "grad_norm": 0.4584428668022156, "learning_rate": 4.668871069757966e-06, "loss": 0.3564, "step": 34692 }, { "epoch": 1.5920792987930796, "grad_norm": 0.46054837107658386, "learning_rate": 4.668626420519216e-06, "loss": 0.3284, "step": 34693 }, { "epoch": 1.5921251892983341, "grad_norm": 0.48860853910446167, "learning_rate": 4.668381772077314e-06, "loss": 0.3898, "step": 34694 }, { "epoch": 1.5921710798035886, "grad_norm": 0.4533975422382355, "learning_rate": 4.668137124432848e-06, "loss": 0.3614, "step": 34695 }, { "epoch": 1.5922169703088431, "grad_norm": 0.4359634816646576, "learning_rate": 4.667892477586409e-06, "loss": 0.2796, "step": 34696 }, { "epoch": 1.5922628608140976, "grad_norm": 0.44313374161720276, "learning_rate": 4.667647831538583e-06, "loss": 0.3098, "step": 34697 }, { "epoch": 1.5923087513193521, "grad_norm": 0.46732357144355774, "learning_rate": 4.667403186289959e-06, "loss": 0.3537, "step": 34698 }, { "epoch": 1.5923546418246066, "grad_norm": 0.4540899693965912, "learning_rate": 4.667158541841126e-06, "loss": 0.3112, "step": 34699 }, { "epoch": 1.5924005323298611, "grad_norm": 0.45713433623313904, "learning_rate": 4.6669138981926724e-06, "loss": 0.2922, "step": 34700 }, { "epoch": 1.5924464228351154, "grad_norm": 0.4429115951061249, "learning_rate": 4.6666692553451826e-06, "loss": 0.3187, "step": 34701 }, { "epoch": 1.59249231334037, "grad_norm": 0.4501625597476959, "learning_rate": 4.666424613299251e-06, "loss": 0.3345, "step": 34702 }, { "epoch": 1.5925382038456244, "grad_norm": 0.4606688618659973, "learning_rate": 4.666179972055462e-06, "loss": 0.2694, "step": 34703 }, { "epoch": 1.5925840943508787, "grad_norm": 0.5207648277282715, "learning_rate": 4.665935331614405e-06, "loss": 0.4544, "step": 34704 }, { "epoch": 1.5926299848561332, "grad_norm": 0.46853891015052795, "learning_rate": 4.665690691976668e-06, "loss": 0.3795, "step": 34705 }, { "epoch": 1.5926758753613877, "grad_norm": 0.4817691445350647, "learning_rate": 4.66544605314284e-06, "loss": 0.3978, "step": 34706 }, { "epoch": 1.5927217658666422, "grad_norm": 0.5696911215782166, "learning_rate": 4.665201415113507e-06, "loss": 0.5315, "step": 34707 }, { "epoch": 1.5927676563718967, "grad_norm": 0.4373834431171417, "learning_rate": 4.6649567778892605e-06, "loss": 0.3149, "step": 34708 }, { "epoch": 1.5928135468771512, "grad_norm": 0.4728403687477112, "learning_rate": 4.664712141470687e-06, "loss": 0.3658, "step": 34709 }, { "epoch": 1.5928594373824057, "grad_norm": 0.44083866477012634, "learning_rate": 4.664467505858373e-06, "loss": 0.3095, "step": 34710 }, { "epoch": 1.5929053278876601, "grad_norm": 0.47908201813697815, "learning_rate": 4.6642228710529106e-06, "loss": 0.339, "step": 34711 }, { "epoch": 1.5929512183929146, "grad_norm": 0.4389486014842987, "learning_rate": 4.663978237054887e-06, "loss": 0.3009, "step": 34712 }, { "epoch": 1.592997108898169, "grad_norm": 0.4782083332538605, "learning_rate": 4.6637336038648875e-06, "loss": 0.4075, "step": 34713 }, { "epoch": 1.5930429994034234, "grad_norm": 0.4747216999530792, "learning_rate": 4.6634889714835044e-06, "loss": 0.3509, "step": 34714 }, { "epoch": 1.593088889908678, "grad_norm": 0.5163630843162537, "learning_rate": 4.663244339911324e-06, "loss": 0.4079, "step": 34715 }, { "epoch": 1.5931347804139322, "grad_norm": 0.44474077224731445, "learning_rate": 4.662999709148935e-06, "loss": 0.3266, "step": 34716 }, { "epoch": 1.5931806709191867, "grad_norm": 0.44529733061790466, "learning_rate": 4.662755079196922e-06, "loss": 0.3286, "step": 34717 }, { "epoch": 1.5932265614244412, "grad_norm": 0.45347124338150024, "learning_rate": 4.662510450055881e-06, "loss": 0.3454, "step": 34718 }, { "epoch": 1.5932724519296957, "grad_norm": 0.4771330654621124, "learning_rate": 4.662265821726394e-06, "loss": 0.3701, "step": 34719 }, { "epoch": 1.5933183424349502, "grad_norm": 0.4837397038936615, "learning_rate": 4.6620211942090494e-06, "loss": 0.3854, "step": 34720 }, { "epoch": 1.5933642329402047, "grad_norm": 0.4684503972530365, "learning_rate": 4.66177656750444e-06, "loss": 0.3208, "step": 34721 }, { "epoch": 1.5934101234454592, "grad_norm": 0.44609692692756653, "learning_rate": 4.66153194161315e-06, "loss": 0.3063, "step": 34722 }, { "epoch": 1.5934560139507137, "grad_norm": 0.47343870997428894, "learning_rate": 4.661287316535768e-06, "loss": 0.3461, "step": 34723 }, { "epoch": 1.5935019044559682, "grad_norm": 0.4844398498535156, "learning_rate": 4.661042692272884e-06, "loss": 0.4094, "step": 34724 }, { "epoch": 1.5935477949612227, "grad_norm": 0.5398646593093872, "learning_rate": 4.660798068825086e-06, "loss": 0.4016, "step": 34725 }, { "epoch": 1.593593685466477, "grad_norm": 0.44693177938461304, "learning_rate": 4.66055344619296e-06, "loss": 0.315, "step": 34726 }, { "epoch": 1.5936395759717314, "grad_norm": 0.4499146640300751, "learning_rate": 4.660308824377097e-06, "loss": 0.3207, "step": 34727 }, { "epoch": 1.593685466476986, "grad_norm": 0.41190117597579956, "learning_rate": 4.6600642033780845e-06, "loss": 0.2952, "step": 34728 }, { "epoch": 1.5937313569822402, "grad_norm": 0.4760834872722626, "learning_rate": 4.6598195831965085e-06, "loss": 0.3782, "step": 34729 }, { "epoch": 1.5937772474874947, "grad_norm": 0.5275144577026367, "learning_rate": 4.659574963832961e-06, "loss": 0.4145, "step": 34730 }, { "epoch": 1.5938231379927492, "grad_norm": 0.41282689571380615, "learning_rate": 4.6593303452880275e-06, "loss": 0.2934, "step": 34731 }, { "epoch": 1.5938690284980037, "grad_norm": 0.48382943868637085, "learning_rate": 4.659085727562296e-06, "loss": 0.3895, "step": 34732 }, { "epoch": 1.5939149190032582, "grad_norm": 0.49519088864326477, "learning_rate": 4.658841110656357e-06, "loss": 0.3904, "step": 34733 }, { "epoch": 1.5939608095085127, "grad_norm": 0.537339985370636, "learning_rate": 4.658596494570798e-06, "loss": 0.3815, "step": 34734 }, { "epoch": 1.5940067000137672, "grad_norm": 0.42528122663497925, "learning_rate": 4.658351879306205e-06, "loss": 0.2916, "step": 34735 }, { "epoch": 1.5940525905190217, "grad_norm": 0.4270685017108917, "learning_rate": 4.6581072648631685e-06, "loss": 0.2967, "step": 34736 }, { "epoch": 1.5940984810242762, "grad_norm": 0.46985676884651184, "learning_rate": 4.657862651242277e-06, "loss": 0.3095, "step": 34737 }, { "epoch": 1.5941443715295307, "grad_norm": 0.5448096394538879, "learning_rate": 4.657618038444115e-06, "loss": 0.4252, "step": 34738 }, { "epoch": 1.594190262034785, "grad_norm": 0.48673850297927856, "learning_rate": 4.657373426469276e-06, "loss": 0.3731, "step": 34739 }, { "epoch": 1.5942361525400395, "grad_norm": 0.45394930243492126, "learning_rate": 4.657128815318346e-06, "loss": 0.313, "step": 34740 }, { "epoch": 1.594282043045294, "grad_norm": 0.5110794305801392, "learning_rate": 4.656884204991912e-06, "loss": 0.4121, "step": 34741 }, { "epoch": 1.5943279335505482, "grad_norm": 0.4714236557483673, "learning_rate": 4.656639595490564e-06, "loss": 0.3441, "step": 34742 }, { "epoch": 1.5943738240558027, "grad_norm": 0.4964030981063843, "learning_rate": 4.65639498681489e-06, "loss": 0.4028, "step": 34743 }, { "epoch": 1.5944197145610572, "grad_norm": 0.4547504484653473, "learning_rate": 4.656150378965475e-06, "loss": 0.3657, "step": 34744 }, { "epoch": 1.5944656050663117, "grad_norm": 0.4467400014400482, "learning_rate": 4.655905771942912e-06, "loss": 0.3198, "step": 34745 }, { "epoch": 1.5945114955715662, "grad_norm": 0.4882299304008484, "learning_rate": 4.655661165747787e-06, "loss": 0.3602, "step": 34746 }, { "epoch": 1.5945573860768207, "grad_norm": 0.44205793738365173, "learning_rate": 4.655416560380688e-06, "loss": 0.3375, "step": 34747 }, { "epoch": 1.5946032765820752, "grad_norm": 0.47099974751472473, "learning_rate": 4.655171955842202e-06, "loss": 0.3417, "step": 34748 }, { "epoch": 1.5946491670873297, "grad_norm": 0.4626399278640747, "learning_rate": 4.654927352132921e-06, "loss": 0.3004, "step": 34749 }, { "epoch": 1.5946950575925842, "grad_norm": 0.46260422468185425, "learning_rate": 4.65468274925343e-06, "loss": 0.3232, "step": 34750 }, { "epoch": 1.5947409480978385, "grad_norm": 0.555467963218689, "learning_rate": 4.654438147204317e-06, "loss": 0.3975, "step": 34751 }, { "epoch": 1.594786838603093, "grad_norm": 0.4430099129676819, "learning_rate": 4.654193545986172e-06, "loss": 0.3169, "step": 34752 }, { "epoch": 1.5948327291083475, "grad_norm": 0.5208683013916016, "learning_rate": 4.653948945599583e-06, "loss": 0.4242, "step": 34753 }, { "epoch": 1.594878619613602, "grad_norm": 0.4529822766780853, "learning_rate": 4.6537043460451345e-06, "loss": 0.3775, "step": 34754 }, { "epoch": 1.5949245101188563, "grad_norm": 0.43357154726982117, "learning_rate": 4.653459747323421e-06, "loss": 0.3186, "step": 34755 }, { "epoch": 1.5949704006241108, "grad_norm": 0.45859864354133606, "learning_rate": 4.653215149435027e-06, "loss": 0.3187, "step": 34756 }, { "epoch": 1.5950162911293653, "grad_norm": 0.5375958681106567, "learning_rate": 4.65297055238054e-06, "loss": 0.4376, "step": 34757 }, { "epoch": 1.5950621816346198, "grad_norm": 0.44979578256607056, "learning_rate": 4.65272595616055e-06, "loss": 0.3116, "step": 34758 }, { "epoch": 1.5951080721398743, "grad_norm": 0.4660671651363373, "learning_rate": 4.652481360775645e-06, "loss": 0.3921, "step": 34759 }, { "epoch": 1.5951539626451288, "grad_norm": 0.4531121850013733, "learning_rate": 4.65223676622641e-06, "loss": 0.3281, "step": 34760 }, { "epoch": 1.5951998531503833, "grad_norm": 0.5443975925445557, "learning_rate": 4.651992172513438e-06, "loss": 0.403, "step": 34761 }, { "epoch": 1.5952457436556378, "grad_norm": 0.4304085373878479, "learning_rate": 4.651747579637316e-06, "loss": 0.279, "step": 34762 }, { "epoch": 1.5952916341608923, "grad_norm": 0.4876463711261749, "learning_rate": 4.651502987598629e-06, "loss": 0.4351, "step": 34763 }, { "epoch": 1.5953375246661465, "grad_norm": 0.4766169488430023, "learning_rate": 4.651258396397968e-06, "loss": 0.3516, "step": 34764 }, { "epoch": 1.595383415171401, "grad_norm": 0.5024241209030151, "learning_rate": 4.651013806035921e-06, "loss": 0.3935, "step": 34765 }, { "epoch": 1.5954293056766555, "grad_norm": 0.4957285225391388, "learning_rate": 4.650769216513075e-06, "loss": 0.3942, "step": 34766 }, { "epoch": 1.5954751961819098, "grad_norm": 0.4947311580181122, "learning_rate": 4.650524627830019e-06, "loss": 0.4343, "step": 34767 }, { "epoch": 1.5955210866871643, "grad_norm": 0.47733622789382935, "learning_rate": 4.650280039987342e-06, "loss": 0.38, "step": 34768 }, { "epoch": 1.5955669771924188, "grad_norm": 0.494078665971756, "learning_rate": 4.650035452985629e-06, "loss": 0.3964, "step": 34769 }, { "epoch": 1.5956128676976733, "grad_norm": 0.3970520794391632, "learning_rate": 4.649790866825473e-06, "loss": 0.2624, "step": 34770 }, { "epoch": 1.5956587582029278, "grad_norm": 0.4800978899002075, "learning_rate": 4.649546281507458e-06, "loss": 0.3802, "step": 34771 }, { "epoch": 1.5957046487081823, "grad_norm": 0.5012110471725464, "learning_rate": 4.649301697032172e-06, "loss": 0.4484, "step": 34772 }, { "epoch": 1.5957505392134368, "grad_norm": 0.4730996787548065, "learning_rate": 4.649057113400208e-06, "loss": 0.3741, "step": 34773 }, { "epoch": 1.5957964297186913, "grad_norm": 0.44102975726127625, "learning_rate": 4.64881253061215e-06, "loss": 0.2918, "step": 34774 }, { "epoch": 1.5958423202239458, "grad_norm": 0.49235406517982483, "learning_rate": 4.648567948668584e-06, "loss": 0.4044, "step": 34775 }, { "epoch": 1.5958882107292003, "grad_norm": 0.44022056460380554, "learning_rate": 4.648323367570104e-06, "loss": 0.3514, "step": 34776 }, { "epoch": 1.5959341012344546, "grad_norm": 0.6699870824813843, "learning_rate": 4.648078787317295e-06, "loss": 0.3233, "step": 34777 }, { "epoch": 1.595979991739709, "grad_norm": 0.5118640661239624, "learning_rate": 4.647834207910745e-06, "loss": 0.4141, "step": 34778 }, { "epoch": 1.5960258822449636, "grad_norm": 0.4721646308898926, "learning_rate": 4.647589629351043e-06, "loss": 0.3568, "step": 34779 }, { "epoch": 1.5960717727502178, "grad_norm": 0.4855617880821228, "learning_rate": 4.647345051638777e-06, "loss": 0.3613, "step": 34780 }, { "epoch": 1.5961176632554723, "grad_norm": 0.5117548704147339, "learning_rate": 4.647100474774534e-06, "loss": 0.3943, "step": 34781 }, { "epoch": 1.5961635537607268, "grad_norm": 0.5065207481384277, "learning_rate": 4.646855898758903e-06, "loss": 0.4252, "step": 34782 }, { "epoch": 1.5962094442659813, "grad_norm": 0.4753648638725281, "learning_rate": 4.646611323592472e-06, "loss": 0.3159, "step": 34783 }, { "epoch": 1.5962553347712358, "grad_norm": 0.5063811540603638, "learning_rate": 4.64636674927583e-06, "loss": 0.3566, "step": 34784 }, { "epoch": 1.5963012252764903, "grad_norm": 0.4984270930290222, "learning_rate": 4.6461221758095635e-06, "loss": 0.4068, "step": 34785 }, { "epoch": 1.5963471157817448, "grad_norm": 0.5074690580368042, "learning_rate": 4.645877603194263e-06, "loss": 0.4476, "step": 34786 }, { "epoch": 1.5963930062869993, "grad_norm": 0.4590400457382202, "learning_rate": 4.645633031430514e-06, "loss": 0.3239, "step": 34787 }, { "epoch": 1.5964388967922538, "grad_norm": 0.45170119404792786, "learning_rate": 4.645388460518904e-06, "loss": 0.3172, "step": 34788 }, { "epoch": 1.5964847872975083, "grad_norm": 0.46132713556289673, "learning_rate": 4.645143890460026e-06, "loss": 0.3515, "step": 34789 }, { "epoch": 1.5965306778027626, "grad_norm": 0.6673550605773926, "learning_rate": 4.644899321254464e-06, "loss": 0.3615, "step": 34790 }, { "epoch": 1.596576568308017, "grad_norm": 0.4102596044540405, "learning_rate": 4.644654752902806e-06, "loss": 0.2762, "step": 34791 }, { "epoch": 1.5966224588132716, "grad_norm": 0.4621727168560028, "learning_rate": 4.6444101854056415e-06, "loss": 0.3069, "step": 34792 }, { "epoch": 1.5966683493185259, "grad_norm": 0.45002245903015137, "learning_rate": 4.6441656187635595e-06, "loss": 0.3143, "step": 34793 }, { "epoch": 1.5967142398237804, "grad_norm": 0.41082170605659485, "learning_rate": 4.643921052977144e-06, "loss": 0.2788, "step": 34794 }, { "epoch": 1.5967601303290349, "grad_norm": 0.44898471236228943, "learning_rate": 4.6436764880469895e-06, "loss": 0.2974, "step": 34795 }, { "epoch": 1.5968060208342894, "grad_norm": 0.8298036456108093, "learning_rate": 4.64343192397368e-06, "loss": 0.3275, "step": 34796 }, { "epoch": 1.5968519113395439, "grad_norm": 0.4956530034542084, "learning_rate": 4.643187360757801e-06, "loss": 0.3454, "step": 34797 }, { "epoch": 1.5968978018447983, "grad_norm": 0.4920119643211365, "learning_rate": 4.642942798399947e-06, "loss": 0.3731, "step": 34798 }, { "epoch": 1.5969436923500528, "grad_norm": 0.5594425201416016, "learning_rate": 4.642698236900702e-06, "loss": 0.3948, "step": 34799 }, { "epoch": 1.5969895828553073, "grad_norm": 0.5012873411178589, "learning_rate": 4.642453676260654e-06, "loss": 0.326, "step": 34800 }, { "epoch": 1.5970354733605618, "grad_norm": 0.4863239526748657, "learning_rate": 4.642209116480394e-06, "loss": 0.3539, "step": 34801 }, { "epoch": 1.5970813638658161, "grad_norm": 0.6023303270339966, "learning_rate": 4.641964557560507e-06, "loss": 0.366, "step": 34802 }, { "epoch": 1.5971272543710706, "grad_norm": 0.4957040846347809, "learning_rate": 4.641719999501581e-06, "loss": 0.3697, "step": 34803 }, { "epoch": 1.5971731448763251, "grad_norm": 0.4713757634162903, "learning_rate": 4.641475442304207e-06, "loss": 0.3196, "step": 34804 }, { "epoch": 1.5972190353815794, "grad_norm": 0.5363430976867676, "learning_rate": 4.641230885968972e-06, "loss": 0.3747, "step": 34805 }, { "epoch": 1.5972649258868339, "grad_norm": 0.4569535553455353, "learning_rate": 4.6409863304964616e-06, "loss": 0.3122, "step": 34806 }, { "epoch": 1.5973108163920884, "grad_norm": 0.43456047773361206, "learning_rate": 4.640741775887267e-06, "loss": 0.2966, "step": 34807 }, { "epoch": 1.5973567068973429, "grad_norm": 0.4803799092769623, "learning_rate": 4.640497222141976e-06, "loss": 0.345, "step": 34808 }, { "epoch": 1.5974025974025974, "grad_norm": 0.4783639907836914, "learning_rate": 4.640252669261172e-06, "loss": 0.3921, "step": 34809 }, { "epoch": 1.5974484879078519, "grad_norm": 0.5058416128158569, "learning_rate": 4.64000811724545e-06, "loss": 0.3076, "step": 34810 }, { "epoch": 1.5974943784131064, "grad_norm": 0.5651657581329346, "learning_rate": 4.639763566095395e-06, "loss": 0.4238, "step": 34811 }, { "epoch": 1.5975402689183609, "grad_norm": 0.5287007093429565, "learning_rate": 4.6395190158115925e-06, "loss": 0.439, "step": 34812 }, { "epoch": 1.5975861594236154, "grad_norm": 0.46182140707969666, "learning_rate": 4.639274466394635e-06, "loss": 0.3346, "step": 34813 }, { "epoch": 1.5976320499288699, "grad_norm": 0.48368632793426514, "learning_rate": 4.639029917845109e-06, "loss": 0.3619, "step": 34814 }, { "epoch": 1.5976779404341241, "grad_norm": 0.5247377753257751, "learning_rate": 4.638785370163602e-06, "loss": 0.3754, "step": 34815 }, { "epoch": 1.5977238309393786, "grad_norm": 0.47451063990592957, "learning_rate": 4.6385408233507e-06, "loss": 0.2921, "step": 34816 }, { "epoch": 1.5977697214446331, "grad_norm": 0.44768038392066956, "learning_rate": 4.638296277406996e-06, "loss": 0.3258, "step": 34817 }, { "epoch": 1.5978156119498874, "grad_norm": 0.4526967406272888, "learning_rate": 4.638051732333074e-06, "loss": 0.326, "step": 34818 }, { "epoch": 1.597861502455142, "grad_norm": 0.4210854768753052, "learning_rate": 4.637807188129522e-06, "loss": 0.3045, "step": 34819 }, { "epoch": 1.5979073929603964, "grad_norm": 0.4827062487602234, "learning_rate": 4.637562644796931e-06, "loss": 0.3444, "step": 34820 }, { "epoch": 1.597953283465651, "grad_norm": 0.46279793977737427, "learning_rate": 4.637318102335888e-06, "loss": 0.3574, "step": 34821 }, { "epoch": 1.5979991739709054, "grad_norm": 0.5102381706237793, "learning_rate": 4.637073560746979e-06, "loss": 0.3975, "step": 34822 }, { "epoch": 1.59804506447616, "grad_norm": 0.46953868865966797, "learning_rate": 4.636829020030794e-06, "loss": 0.3363, "step": 34823 }, { "epoch": 1.5980909549814144, "grad_norm": 0.49957776069641113, "learning_rate": 4.636584480187922e-06, "loss": 0.3883, "step": 34824 }, { "epoch": 1.598136845486669, "grad_norm": 0.47378212213516235, "learning_rate": 4.636339941218946e-06, "loss": 0.3538, "step": 34825 }, { "epoch": 1.5981827359919234, "grad_norm": 0.45736268162727356, "learning_rate": 4.636095403124461e-06, "loss": 0.3431, "step": 34826 }, { "epoch": 1.598228626497178, "grad_norm": 0.48532652854919434, "learning_rate": 4.635850865905051e-06, "loss": 0.3563, "step": 34827 }, { "epoch": 1.5982745170024322, "grad_norm": 0.48605310916900635, "learning_rate": 4.635606329561304e-06, "loss": 0.4214, "step": 34828 }, { "epoch": 1.5983204075076867, "grad_norm": 0.5004994869232178, "learning_rate": 4.6353617940938096e-06, "loss": 0.4341, "step": 34829 }, { "epoch": 1.5983662980129412, "grad_norm": 0.4945167899131775, "learning_rate": 4.635117259503155e-06, "loss": 0.4265, "step": 34830 }, { "epoch": 1.5984121885181954, "grad_norm": 0.5651324391365051, "learning_rate": 4.634872725789926e-06, "loss": 0.3942, "step": 34831 }, { "epoch": 1.59845807902345, "grad_norm": 0.4603986442089081, "learning_rate": 4.634628192954716e-06, "loss": 0.3313, "step": 34832 }, { "epoch": 1.5985039695287044, "grad_norm": 0.4635562598705292, "learning_rate": 4.634383660998109e-06, "loss": 0.358, "step": 34833 }, { "epoch": 1.598549860033959, "grad_norm": 0.4482724964618683, "learning_rate": 4.634139129920693e-06, "loss": 0.3288, "step": 34834 }, { "epoch": 1.5985957505392134, "grad_norm": 0.44460728764533997, "learning_rate": 4.633894599723058e-06, "loss": 0.3258, "step": 34835 }, { "epoch": 1.598641641044468, "grad_norm": 0.47591206431388855, "learning_rate": 4.63365007040579e-06, "loss": 0.3747, "step": 34836 }, { "epoch": 1.5986875315497224, "grad_norm": 0.47765445709228516, "learning_rate": 4.6334055419694775e-06, "loss": 0.3715, "step": 34837 }, { "epoch": 1.598733422054977, "grad_norm": 0.4598182439804077, "learning_rate": 4.63316101441471e-06, "loss": 0.3199, "step": 34838 }, { "epoch": 1.5987793125602314, "grad_norm": 0.44339531660079956, "learning_rate": 4.632916487742077e-06, "loss": 0.3296, "step": 34839 }, { "epoch": 1.5988252030654857, "grad_norm": 0.47321388125419617, "learning_rate": 4.632671961952159e-06, "loss": 0.365, "step": 34840 }, { "epoch": 1.5988710935707402, "grad_norm": 0.46899881958961487, "learning_rate": 4.632427437045551e-06, "loss": 0.3348, "step": 34841 }, { "epoch": 1.5989169840759947, "grad_norm": 0.49198564887046814, "learning_rate": 4.63218291302284e-06, "loss": 0.3822, "step": 34842 }, { "epoch": 1.5989628745812492, "grad_norm": 0.4984564185142517, "learning_rate": 4.631938389884612e-06, "loss": 0.4179, "step": 34843 }, { "epoch": 1.5990087650865035, "grad_norm": 0.4637838900089264, "learning_rate": 4.631693867631457e-06, "loss": 0.3799, "step": 34844 }, { "epoch": 1.599054655591758, "grad_norm": 0.4733388423919678, "learning_rate": 4.631449346263962e-06, "loss": 0.3649, "step": 34845 }, { "epoch": 1.5991005460970125, "grad_norm": 0.4993596076965332, "learning_rate": 4.631204825782715e-06, "loss": 0.3573, "step": 34846 }, { "epoch": 1.599146436602267, "grad_norm": 0.47080856561660767, "learning_rate": 4.630960306188301e-06, "loss": 0.3679, "step": 34847 }, { "epoch": 1.5991923271075215, "grad_norm": 0.45036759972572327, "learning_rate": 4.630715787481314e-06, "loss": 0.3096, "step": 34848 }, { "epoch": 1.599238217612776, "grad_norm": 0.42406123876571655, "learning_rate": 4.630471269662339e-06, "loss": 0.269, "step": 34849 }, { "epoch": 1.5992841081180305, "grad_norm": 0.5076342225074768, "learning_rate": 4.6302267527319625e-06, "loss": 0.4281, "step": 34850 }, { "epoch": 1.599329998623285, "grad_norm": 0.47667980194091797, "learning_rate": 4.629982236690776e-06, "loss": 0.3622, "step": 34851 }, { "epoch": 1.5993758891285395, "grad_norm": 0.4962298274040222, "learning_rate": 4.629737721539364e-06, "loss": 0.3633, "step": 34852 }, { "epoch": 1.5994217796337937, "grad_norm": 0.4731343984603882, "learning_rate": 4.629493207278315e-06, "loss": 0.3313, "step": 34853 }, { "epoch": 1.5994676701390482, "grad_norm": 0.5986219644546509, "learning_rate": 4.629248693908219e-06, "loss": 0.4005, "step": 34854 }, { "epoch": 1.5995135606443027, "grad_norm": 0.43405160307884216, "learning_rate": 4.629004181429663e-06, "loss": 0.3282, "step": 34855 }, { "epoch": 1.599559451149557, "grad_norm": 0.49386027455329895, "learning_rate": 4.628759669843235e-06, "loss": 0.369, "step": 34856 }, { "epoch": 1.5996053416548115, "grad_norm": 0.4660702645778656, "learning_rate": 4.628515159149523e-06, "loss": 0.3087, "step": 34857 }, { "epoch": 1.599651232160066, "grad_norm": 0.4737514853477478, "learning_rate": 4.628270649349115e-06, "loss": 0.353, "step": 34858 }, { "epoch": 1.5996971226653205, "grad_norm": 0.5699807405471802, "learning_rate": 4.628026140442596e-06, "loss": 0.3356, "step": 34859 }, { "epoch": 1.599743013170575, "grad_norm": 0.49088722467422485, "learning_rate": 4.627781632430559e-06, "loss": 0.3491, "step": 34860 }, { "epoch": 1.5997889036758295, "grad_norm": 0.45973482728004456, "learning_rate": 4.627537125313591e-06, "loss": 0.3163, "step": 34861 }, { "epoch": 1.599834794181084, "grad_norm": 0.43102431297302246, "learning_rate": 4.627292619092276e-06, "loss": 0.2719, "step": 34862 }, { "epoch": 1.5998806846863385, "grad_norm": 0.42631447315216064, "learning_rate": 4.627048113767206e-06, "loss": 0.2965, "step": 34863 }, { "epoch": 1.599926575191593, "grad_norm": 0.5110950469970703, "learning_rate": 4.626803609338968e-06, "loss": 0.3697, "step": 34864 }, { "epoch": 1.5999724656968475, "grad_norm": 0.45644357800483704, "learning_rate": 4.626559105808149e-06, "loss": 0.3551, "step": 34865 }, { "epoch": 1.6000183562021018, "grad_norm": 0.48917511105537415, "learning_rate": 4.626314603175337e-06, "loss": 0.3487, "step": 34866 }, { "epoch": 1.6000642467073563, "grad_norm": 0.46688464283943176, "learning_rate": 4.6260701014411215e-06, "loss": 0.3697, "step": 34867 }, { "epoch": 1.6001101372126108, "grad_norm": 0.4899645447731018, "learning_rate": 4.625825600606087e-06, "loss": 0.406, "step": 34868 }, { "epoch": 1.600156027717865, "grad_norm": 0.4923345744609833, "learning_rate": 4.625581100670827e-06, "loss": 0.3917, "step": 34869 }, { "epoch": 1.6002019182231195, "grad_norm": 0.44675078988075256, "learning_rate": 4.625336601635925e-06, "loss": 0.3067, "step": 34870 }, { "epoch": 1.600247808728374, "grad_norm": 0.46831583976745605, "learning_rate": 4.625092103501969e-06, "loss": 0.3761, "step": 34871 }, { "epoch": 1.6002936992336285, "grad_norm": 0.48872241377830505, "learning_rate": 4.62484760626955e-06, "loss": 0.3916, "step": 34872 }, { "epoch": 1.600339589738883, "grad_norm": 0.4432581961154938, "learning_rate": 4.624603109939254e-06, "loss": 0.3175, "step": 34873 }, { "epoch": 1.6003854802441375, "grad_norm": 0.4905319809913635, "learning_rate": 4.624358614511667e-06, "loss": 0.4025, "step": 34874 }, { "epoch": 1.600431370749392, "grad_norm": 0.56778883934021, "learning_rate": 4.624114119987381e-06, "loss": 0.3333, "step": 34875 }, { "epoch": 1.6004772612546465, "grad_norm": 0.5066177248954773, "learning_rate": 4.623869626366982e-06, "loss": 0.3829, "step": 34876 }, { "epoch": 1.600523151759901, "grad_norm": 0.4770393967628479, "learning_rate": 4.623625133651056e-06, "loss": 0.4121, "step": 34877 }, { "epoch": 1.6005690422651553, "grad_norm": 0.4968225955963135, "learning_rate": 4.623380641840195e-06, "loss": 0.4383, "step": 34878 }, { "epoch": 1.6006149327704098, "grad_norm": 0.4286067485809326, "learning_rate": 4.623136150934984e-06, "loss": 0.3046, "step": 34879 }, { "epoch": 1.6006608232756643, "grad_norm": 0.4685288667678833, "learning_rate": 4.622891660936012e-06, "loss": 0.3421, "step": 34880 }, { "epoch": 1.6007067137809188, "grad_norm": 0.4423278272151947, "learning_rate": 4.622647171843864e-06, "loss": 0.2903, "step": 34881 }, { "epoch": 1.600752604286173, "grad_norm": 0.4528770446777344, "learning_rate": 4.622402683659133e-06, "loss": 0.3523, "step": 34882 }, { "epoch": 1.6007984947914276, "grad_norm": 0.5023056864738464, "learning_rate": 4.622158196382405e-06, "loss": 0.3986, "step": 34883 }, { "epoch": 1.600844385296682, "grad_norm": 0.43100085854530334, "learning_rate": 4.621913710014264e-06, "loss": 0.2952, "step": 34884 }, { "epoch": 1.6008902758019365, "grad_norm": 0.4778672158718109, "learning_rate": 4.621669224555303e-06, "loss": 0.4142, "step": 34885 }, { "epoch": 1.600936166307191, "grad_norm": 0.4654640555381775, "learning_rate": 4.621424740006109e-06, "loss": 0.3503, "step": 34886 }, { "epoch": 1.6009820568124455, "grad_norm": 0.4785864055156708, "learning_rate": 4.621180256367268e-06, "loss": 0.4147, "step": 34887 }, { "epoch": 1.6010279473177, "grad_norm": 0.4578460156917572, "learning_rate": 4.6209357736393694e-06, "loss": 0.333, "step": 34888 }, { "epoch": 1.6010738378229545, "grad_norm": 0.5140366554260254, "learning_rate": 4.6206912918230004e-06, "loss": 0.3725, "step": 34889 }, { "epoch": 1.601119728328209, "grad_norm": 0.48435184359550476, "learning_rate": 4.620446810918748e-06, "loss": 0.3707, "step": 34890 }, { "epoch": 1.6011656188334633, "grad_norm": 0.4748537540435791, "learning_rate": 4.620202330927203e-06, "loss": 0.3644, "step": 34891 }, { "epoch": 1.6012115093387178, "grad_norm": 0.49680015444755554, "learning_rate": 4.619957851848951e-06, "loss": 0.3686, "step": 34892 }, { "epoch": 1.6012573998439723, "grad_norm": 0.4802763760089874, "learning_rate": 4.61971337368458e-06, "loss": 0.3566, "step": 34893 }, { "epoch": 1.6013032903492266, "grad_norm": 0.4818015992641449, "learning_rate": 4.619468896434679e-06, "loss": 0.3539, "step": 34894 }, { "epoch": 1.601349180854481, "grad_norm": 0.5197352766990662, "learning_rate": 4.619224420099835e-06, "loss": 0.3973, "step": 34895 }, { "epoch": 1.6013950713597356, "grad_norm": 0.5193723440170288, "learning_rate": 4.6189799446806344e-06, "loss": 0.4046, "step": 34896 }, { "epoch": 1.60144096186499, "grad_norm": 0.41530877351760864, "learning_rate": 4.618735470177669e-06, "loss": 0.2855, "step": 34897 }, { "epoch": 1.6014868523702446, "grad_norm": 0.505231499671936, "learning_rate": 4.6184909965915245e-06, "loss": 0.4059, "step": 34898 }, { "epoch": 1.601532742875499, "grad_norm": 0.46907904744148254, "learning_rate": 4.618246523922787e-06, "loss": 0.3279, "step": 34899 }, { "epoch": 1.6015786333807536, "grad_norm": 0.5181283354759216, "learning_rate": 4.6180020521720475e-06, "loss": 0.3189, "step": 34900 }, { "epoch": 1.601624523886008, "grad_norm": 0.42142054438591003, "learning_rate": 4.6177575813398925e-06, "loss": 0.2732, "step": 34901 }, { "epoch": 1.6016704143912626, "grad_norm": 0.4701882004737854, "learning_rate": 4.617513111426908e-06, "loss": 0.361, "step": 34902 }, { "epoch": 1.601716304896517, "grad_norm": 0.4183480441570282, "learning_rate": 4.617268642433685e-06, "loss": 0.2721, "step": 34903 }, { "epoch": 1.6017621954017713, "grad_norm": 0.4720074236392975, "learning_rate": 4.617024174360811e-06, "loss": 0.3306, "step": 34904 }, { "epoch": 1.6018080859070258, "grad_norm": 0.4673229157924652, "learning_rate": 4.616779707208872e-06, "loss": 0.3966, "step": 34905 }, { "epoch": 1.6018539764122803, "grad_norm": 0.5110482573509216, "learning_rate": 4.616535240978458e-06, "loss": 0.4048, "step": 34906 }, { "epoch": 1.6018998669175346, "grad_norm": 0.4976640045642853, "learning_rate": 4.616290775670155e-06, "loss": 0.4057, "step": 34907 }, { "epoch": 1.601945757422789, "grad_norm": 0.4749062657356262, "learning_rate": 4.616046311284551e-06, "loss": 0.3912, "step": 34908 }, { "epoch": 1.6019916479280436, "grad_norm": 0.4628889560699463, "learning_rate": 4.615801847822236e-06, "loss": 0.4016, "step": 34909 }, { "epoch": 1.602037538433298, "grad_norm": 0.5357798337936401, "learning_rate": 4.615557385283795e-06, "loss": 0.4365, "step": 34910 }, { "epoch": 1.6020834289385526, "grad_norm": 0.49128544330596924, "learning_rate": 4.6153129236698185e-06, "loss": 0.3982, "step": 34911 }, { "epoch": 1.602129319443807, "grad_norm": 0.4567388892173767, "learning_rate": 4.61506846298089e-06, "loss": 0.3549, "step": 34912 }, { "epoch": 1.6021752099490616, "grad_norm": 0.45051223039627075, "learning_rate": 4.614824003217603e-06, "loss": 0.3122, "step": 34913 }, { "epoch": 1.602221100454316, "grad_norm": 0.4980641007423401, "learning_rate": 4.614579544380542e-06, "loss": 0.3671, "step": 34914 }, { "epoch": 1.6022669909595706, "grad_norm": 0.47340893745422363, "learning_rate": 4.614335086470295e-06, "loss": 0.3652, "step": 34915 }, { "epoch": 1.602312881464825, "grad_norm": 0.4830954074859619, "learning_rate": 4.614090629487452e-06, "loss": 0.3798, "step": 34916 }, { "epoch": 1.6023587719700794, "grad_norm": 0.4438508450984955, "learning_rate": 4.613846173432599e-06, "loss": 0.361, "step": 34917 }, { "epoch": 1.6024046624753339, "grad_norm": 0.46878087520599365, "learning_rate": 4.613601718306321e-06, "loss": 0.4119, "step": 34918 }, { "epoch": 1.6024505529805884, "grad_norm": 0.4310548007488251, "learning_rate": 4.613357264109212e-06, "loss": 0.3367, "step": 34919 }, { "epoch": 1.6024964434858426, "grad_norm": 0.4591136574745178, "learning_rate": 4.613112810841856e-06, "loss": 0.2926, "step": 34920 }, { "epoch": 1.6025423339910971, "grad_norm": 0.4821411371231079, "learning_rate": 4.612868358504841e-06, "loss": 0.3985, "step": 34921 }, { "epoch": 1.6025882244963516, "grad_norm": 0.47148606181144714, "learning_rate": 4.6126239070987565e-06, "loss": 0.3793, "step": 34922 }, { "epoch": 1.6026341150016061, "grad_norm": 0.4340754747390747, "learning_rate": 4.612379456624189e-06, "loss": 0.328, "step": 34923 }, { "epoch": 1.6026800055068606, "grad_norm": 0.49296459555625916, "learning_rate": 4.612135007081724e-06, "loss": 0.3398, "step": 34924 }, { "epoch": 1.6027258960121151, "grad_norm": 0.4422965943813324, "learning_rate": 4.6118905584719554e-06, "loss": 0.3235, "step": 34925 }, { "epoch": 1.6027717865173696, "grad_norm": 0.45875436067581177, "learning_rate": 4.611646110795467e-06, "loss": 0.32, "step": 34926 }, { "epoch": 1.6028176770226241, "grad_norm": 0.483359158039093, "learning_rate": 4.611401664052845e-06, "loss": 0.4032, "step": 34927 }, { "epoch": 1.6028635675278786, "grad_norm": 0.47151580452919006, "learning_rate": 4.611157218244682e-06, "loss": 0.3194, "step": 34928 }, { "epoch": 1.602909458033133, "grad_norm": 0.4892735183238983, "learning_rate": 4.610912773371562e-06, "loss": 0.4292, "step": 34929 }, { "epoch": 1.6029553485383874, "grad_norm": 0.5474965572357178, "learning_rate": 4.610668329434073e-06, "loss": 0.412, "step": 34930 }, { "epoch": 1.603001239043642, "grad_norm": 0.445709764957428, "learning_rate": 4.6104238864328054e-06, "loss": 0.2953, "step": 34931 }, { "epoch": 1.6030471295488964, "grad_norm": 0.47554370760917664, "learning_rate": 4.610179444368345e-06, "loss": 0.3905, "step": 34932 }, { "epoch": 1.6030930200541507, "grad_norm": 0.4601365625858307, "learning_rate": 4.609935003241278e-06, "loss": 0.3563, "step": 34933 }, { "epoch": 1.6031389105594052, "grad_norm": 0.47242823243141174, "learning_rate": 4.6096905630521966e-06, "loss": 0.3517, "step": 34934 }, { "epoch": 1.6031848010646597, "grad_norm": 0.470455527305603, "learning_rate": 4.609446123801686e-06, "loss": 0.3156, "step": 34935 }, { "epoch": 1.6032306915699142, "grad_norm": 0.4755844473838806, "learning_rate": 4.609201685490333e-06, "loss": 0.3582, "step": 34936 }, { "epoch": 1.6032765820751687, "grad_norm": 0.4406697750091553, "learning_rate": 4.608957248118728e-06, "loss": 0.2844, "step": 34937 }, { "epoch": 1.6033224725804232, "grad_norm": 0.5107644200325012, "learning_rate": 4.608712811687457e-06, "loss": 0.3518, "step": 34938 }, { "epoch": 1.6033683630856777, "grad_norm": 0.41964754462242126, "learning_rate": 4.608468376197106e-06, "loss": 0.2654, "step": 34939 }, { "epoch": 1.6034142535909321, "grad_norm": 0.5450202822685242, "learning_rate": 4.608223941648268e-06, "loss": 0.3195, "step": 34940 }, { "epoch": 1.6034601440961866, "grad_norm": 0.5180357098579407, "learning_rate": 4.607979508041527e-06, "loss": 0.4253, "step": 34941 }, { "epoch": 1.603506034601441, "grad_norm": 0.44706830382347107, "learning_rate": 4.607735075377471e-06, "loss": 0.3458, "step": 34942 }, { "epoch": 1.6035519251066954, "grad_norm": 0.5048439502716064, "learning_rate": 4.607490643656688e-06, "loss": 0.3575, "step": 34943 }, { "epoch": 1.60359781561195, "grad_norm": 0.4818957448005676, "learning_rate": 4.607246212879768e-06, "loss": 0.3827, "step": 34944 }, { "epoch": 1.6036437061172042, "grad_norm": 0.480134516954422, "learning_rate": 4.607001783047296e-06, "loss": 0.4046, "step": 34945 }, { "epoch": 1.6036895966224587, "grad_norm": 0.47946253418922424, "learning_rate": 4.606757354159858e-06, "loss": 0.3342, "step": 34946 }, { "epoch": 1.6037354871277132, "grad_norm": 0.4680171310901642, "learning_rate": 4.606512926218048e-06, "loss": 0.3463, "step": 34947 }, { "epoch": 1.6037813776329677, "grad_norm": 0.4684879183769226, "learning_rate": 4.606268499222449e-06, "loss": 0.4038, "step": 34948 }, { "epoch": 1.6038272681382222, "grad_norm": 0.46269285678863525, "learning_rate": 4.606024073173649e-06, "loss": 0.3487, "step": 34949 }, { "epoch": 1.6038731586434767, "grad_norm": 0.4904498755931854, "learning_rate": 4.605779648072238e-06, "loss": 0.3969, "step": 34950 }, { "epoch": 1.6039190491487312, "grad_norm": 0.44177570939064026, "learning_rate": 4.6055352239188035e-06, "loss": 0.294, "step": 34951 }, { "epoch": 1.6039649396539857, "grad_norm": 0.4468318819999695, "learning_rate": 4.6052908007139305e-06, "loss": 0.3515, "step": 34952 }, { "epoch": 1.6040108301592402, "grad_norm": 0.427727609872818, "learning_rate": 4.6050463784582095e-06, "loss": 0.2729, "step": 34953 }, { "epoch": 1.6040567206644947, "grad_norm": 0.4571574330329895, "learning_rate": 4.604801957152228e-06, "loss": 0.3657, "step": 34954 }, { "epoch": 1.604102611169749, "grad_norm": 0.47174879908561707, "learning_rate": 4.604557536796569e-06, "loss": 0.3387, "step": 34955 }, { "epoch": 1.6041485016750034, "grad_norm": 0.44028279185295105, "learning_rate": 4.604313117391828e-06, "loss": 0.3213, "step": 34956 }, { "epoch": 1.604194392180258, "grad_norm": 0.4432438313961029, "learning_rate": 4.604068698938589e-06, "loss": 0.3194, "step": 34957 }, { "epoch": 1.6042402826855122, "grad_norm": 0.48729759454727173, "learning_rate": 4.6038242814374385e-06, "loss": 0.3959, "step": 34958 }, { "epoch": 1.6042861731907667, "grad_norm": 0.4271671175956726, "learning_rate": 4.603579864888967e-06, "loss": 0.2718, "step": 34959 }, { "epoch": 1.6043320636960212, "grad_norm": 0.45247501134872437, "learning_rate": 4.60333544929376e-06, "loss": 0.3168, "step": 34960 }, { "epoch": 1.6043779542012757, "grad_norm": 0.5119667649269104, "learning_rate": 4.603091034652405e-06, "loss": 0.3251, "step": 34961 }, { "epoch": 1.6044238447065302, "grad_norm": 0.4681013822555542, "learning_rate": 4.602846620965492e-06, "loss": 0.3724, "step": 34962 }, { "epoch": 1.6044697352117847, "grad_norm": 0.45092830061912537, "learning_rate": 4.6026022082336085e-06, "loss": 0.3511, "step": 34963 }, { "epoch": 1.6045156257170392, "grad_norm": 0.5134447813034058, "learning_rate": 4.602357796457339e-06, "loss": 0.4324, "step": 34964 }, { "epoch": 1.6045615162222937, "grad_norm": 0.4170004725456238, "learning_rate": 4.6021133856372755e-06, "loss": 0.2772, "step": 34965 }, { "epoch": 1.6046074067275482, "grad_norm": 0.4513907730579376, "learning_rate": 4.601868975774003e-06, "loss": 0.2992, "step": 34966 }, { "epoch": 1.6046532972328025, "grad_norm": 0.44555824995040894, "learning_rate": 4.6016245668681085e-06, "loss": 0.3266, "step": 34967 }, { "epoch": 1.604699187738057, "grad_norm": 0.4610910415649414, "learning_rate": 4.601380158920183e-06, "loss": 0.3007, "step": 34968 }, { "epoch": 1.6047450782433115, "grad_norm": 0.4259757101535797, "learning_rate": 4.601135751930812e-06, "loss": 0.2806, "step": 34969 }, { "epoch": 1.604790968748566, "grad_norm": 0.551280677318573, "learning_rate": 4.600891345900583e-06, "loss": 0.4716, "step": 34970 }, { "epoch": 1.6048368592538202, "grad_norm": 0.4932110905647278, "learning_rate": 4.600646940830086e-06, "loss": 0.4521, "step": 34971 }, { "epoch": 1.6048827497590747, "grad_norm": 0.4629458785057068, "learning_rate": 4.600402536719906e-06, "loss": 0.3553, "step": 34972 }, { "epoch": 1.6049286402643292, "grad_norm": 0.48184314370155334, "learning_rate": 4.60015813357063e-06, "loss": 0.3714, "step": 34973 }, { "epoch": 1.6049745307695837, "grad_norm": 0.5285406708717346, "learning_rate": 4.599913731382851e-06, "loss": 0.5326, "step": 34974 }, { "epoch": 1.6050204212748382, "grad_norm": 0.46760788559913635, "learning_rate": 4.599669330157152e-06, "loss": 0.3404, "step": 34975 }, { "epoch": 1.6050663117800927, "grad_norm": 0.45487216114997864, "learning_rate": 4.599424929894122e-06, "loss": 0.3195, "step": 34976 }, { "epoch": 1.6051122022853472, "grad_norm": 0.42622101306915283, "learning_rate": 4.599180530594346e-06, "loss": 0.2965, "step": 34977 }, { "epoch": 1.6051580927906017, "grad_norm": 0.48806440830230713, "learning_rate": 4.5989361322584165e-06, "loss": 0.4017, "step": 34978 }, { "epoch": 1.6052039832958562, "grad_norm": 0.4414578378200531, "learning_rate": 4.59869173488692e-06, "loss": 0.2864, "step": 34979 }, { "epoch": 1.6052498738011105, "grad_norm": 0.46192610263824463, "learning_rate": 4.598447338480442e-06, "loss": 0.3431, "step": 34980 }, { "epoch": 1.605295764306365, "grad_norm": 0.5118463635444641, "learning_rate": 4.598202943039571e-06, "loss": 0.4174, "step": 34981 }, { "epoch": 1.6053416548116195, "grad_norm": 0.48298126459121704, "learning_rate": 4.597958548564897e-06, "loss": 0.3425, "step": 34982 }, { "epoch": 1.6053875453168738, "grad_norm": 0.47807198762893677, "learning_rate": 4.597714155057003e-06, "loss": 0.326, "step": 34983 }, { "epoch": 1.6054334358221283, "grad_norm": 0.5108973979949951, "learning_rate": 4.597469762516481e-06, "loss": 0.4253, "step": 34984 }, { "epoch": 1.6054793263273828, "grad_norm": 0.4417910575866699, "learning_rate": 4.5972253709439176e-06, "loss": 0.3258, "step": 34985 }, { "epoch": 1.6055252168326373, "grad_norm": 0.4860847592353821, "learning_rate": 4.596980980339899e-06, "loss": 0.3759, "step": 34986 }, { "epoch": 1.6055711073378918, "grad_norm": 0.4457682967185974, "learning_rate": 4.596736590705015e-06, "loss": 0.3149, "step": 34987 }, { "epoch": 1.6056169978431463, "grad_norm": 0.4593941867351532, "learning_rate": 4.5964922020398515e-06, "loss": 0.3082, "step": 34988 }, { "epoch": 1.6056628883484008, "grad_norm": 0.40664488077163696, "learning_rate": 4.596247814344996e-06, "loss": 0.2702, "step": 34989 }, { "epoch": 1.6057087788536553, "grad_norm": 0.4798348844051361, "learning_rate": 4.596003427621038e-06, "loss": 0.3714, "step": 34990 }, { "epoch": 1.6057546693589098, "grad_norm": 0.48690807819366455, "learning_rate": 4.5957590418685655e-06, "loss": 0.3834, "step": 34991 }, { "epoch": 1.6058005598641643, "grad_norm": 0.4945811331272125, "learning_rate": 4.595514657088163e-06, "loss": 0.3859, "step": 34992 }, { "epoch": 1.6058464503694185, "grad_norm": 0.46243277192115784, "learning_rate": 4.595270273280422e-06, "loss": 0.3429, "step": 34993 }, { "epoch": 1.605892340874673, "grad_norm": 0.496953547000885, "learning_rate": 4.595025890445928e-06, "loss": 0.4086, "step": 34994 }, { "epoch": 1.6059382313799275, "grad_norm": 0.497714638710022, "learning_rate": 4.594781508585266e-06, "loss": 0.3612, "step": 34995 }, { "epoch": 1.6059841218851818, "grad_norm": 0.4800831973552704, "learning_rate": 4.59453712769903e-06, "loss": 0.3651, "step": 34996 }, { "epoch": 1.6060300123904363, "grad_norm": 0.4632662236690521, "learning_rate": 4.5942927477878044e-06, "loss": 0.3492, "step": 34997 }, { "epoch": 1.6060759028956908, "grad_norm": 0.47459548711776733, "learning_rate": 4.594048368852173e-06, "loss": 0.3465, "step": 34998 }, { "epoch": 1.6061217934009453, "grad_norm": 0.49554672837257385, "learning_rate": 4.59380399089273e-06, "loss": 0.3921, "step": 34999 }, { "epoch": 1.6061676839061998, "grad_norm": 0.4706018567085266, "learning_rate": 4.59355961391006e-06, "loss": 0.3624, "step": 35000 }, { "epoch": 1.6062135744114543, "grad_norm": 0.46854308247566223, "learning_rate": 4.59331523790475e-06, "loss": 0.3659, "step": 35001 }, { "epoch": 1.6062594649167088, "grad_norm": 0.46793970465660095, "learning_rate": 4.5930708628773894e-06, "loss": 0.3359, "step": 35002 }, { "epoch": 1.6063053554219633, "grad_norm": 0.4724282920360565, "learning_rate": 4.592826488828565e-06, "loss": 0.4074, "step": 35003 }, { "epoch": 1.6063512459272178, "grad_norm": 0.4942350685596466, "learning_rate": 4.592582115758862e-06, "loss": 0.3454, "step": 35004 }, { "epoch": 1.6063971364324723, "grad_norm": 0.44197481870651245, "learning_rate": 4.592337743668873e-06, "loss": 0.3143, "step": 35005 }, { "epoch": 1.6064430269377266, "grad_norm": 0.44394582509994507, "learning_rate": 4.592093372559183e-06, "loss": 0.3826, "step": 35006 }, { "epoch": 1.606488917442981, "grad_norm": 0.5206359028816223, "learning_rate": 4.591849002430379e-06, "loss": 0.39, "step": 35007 }, { "epoch": 1.6065348079482356, "grad_norm": 0.4473307132720947, "learning_rate": 4.591604633283049e-06, "loss": 0.3287, "step": 35008 }, { "epoch": 1.6065806984534898, "grad_norm": 0.39300963282585144, "learning_rate": 4.5913602651177824e-06, "loss": 0.254, "step": 35009 }, { "epoch": 1.6066265889587443, "grad_norm": 0.45773744583129883, "learning_rate": 4.5911158979351645e-06, "loss": 0.3756, "step": 35010 }, { "epoch": 1.6066724794639988, "grad_norm": 0.4700591266155243, "learning_rate": 4.5908715317357825e-06, "loss": 0.3788, "step": 35011 }, { "epoch": 1.6067183699692533, "grad_norm": 0.44999608397483826, "learning_rate": 4.590627166520227e-06, "loss": 0.2889, "step": 35012 }, { "epoch": 1.6067642604745078, "grad_norm": 0.4843432605266571, "learning_rate": 4.590382802289084e-06, "loss": 0.3788, "step": 35013 }, { "epoch": 1.6068101509797623, "grad_norm": 0.46214428544044495, "learning_rate": 4.59013843904294e-06, "loss": 0.3299, "step": 35014 }, { "epoch": 1.6068560414850168, "grad_norm": 0.4524962604045868, "learning_rate": 4.589894076782385e-06, "loss": 0.345, "step": 35015 }, { "epoch": 1.6069019319902713, "grad_norm": 0.47244617342948914, "learning_rate": 4.589649715508005e-06, "loss": 0.428, "step": 35016 }, { "epoch": 1.6069478224955258, "grad_norm": 0.4420822262763977, "learning_rate": 4.589405355220386e-06, "loss": 0.3157, "step": 35017 }, { "epoch": 1.60699371300078, "grad_norm": 0.42337849736213684, "learning_rate": 4.58916099592012e-06, "loss": 0.2683, "step": 35018 }, { "epoch": 1.6070396035060346, "grad_norm": 0.4918625056743622, "learning_rate": 4.588916637607792e-06, "loss": 0.3971, "step": 35019 }, { "epoch": 1.607085494011289, "grad_norm": 0.44537779688835144, "learning_rate": 4.5886722802839865e-06, "loss": 0.3095, "step": 35020 }, { "epoch": 1.6071313845165436, "grad_norm": 0.6261507272720337, "learning_rate": 4.5884279239492975e-06, "loss": 0.4151, "step": 35021 }, { "epoch": 1.6071772750217979, "grad_norm": 0.4522685110569, "learning_rate": 4.58818356860431e-06, "loss": 0.3084, "step": 35022 }, { "epoch": 1.6072231655270524, "grad_norm": 0.48193398118019104, "learning_rate": 4.587939214249609e-06, "loss": 0.3745, "step": 35023 }, { "epoch": 1.6072690560323069, "grad_norm": 0.45002859830856323, "learning_rate": 4.587694860885785e-06, "loss": 0.3298, "step": 35024 }, { "epoch": 1.6073149465375614, "grad_norm": 0.4775749146938324, "learning_rate": 4.587450508513425e-06, "loss": 0.3124, "step": 35025 }, { "epoch": 1.6073608370428158, "grad_norm": 0.4787912964820862, "learning_rate": 4.587206157133115e-06, "loss": 0.3941, "step": 35026 }, { "epoch": 1.6074067275480703, "grad_norm": 0.48591986298561096, "learning_rate": 4.586961806745445e-06, "loss": 0.3709, "step": 35027 }, { "epoch": 1.6074526180533248, "grad_norm": 0.49338817596435547, "learning_rate": 4.586717457351002e-06, "loss": 0.4213, "step": 35028 }, { "epoch": 1.6074985085585793, "grad_norm": 0.4702754616737366, "learning_rate": 4.586473108950371e-06, "loss": 0.3812, "step": 35029 }, { "epoch": 1.6075443990638338, "grad_norm": 0.4584999978542328, "learning_rate": 4.586228761544144e-06, "loss": 0.333, "step": 35030 }, { "epoch": 1.6075902895690881, "grad_norm": 0.45198914408683777, "learning_rate": 4.585984415132906e-06, "loss": 0.3395, "step": 35031 }, { "epoch": 1.6076361800743426, "grad_norm": 0.505451500415802, "learning_rate": 4.5857400697172425e-06, "loss": 0.3819, "step": 35032 }, { "epoch": 1.6076820705795971, "grad_norm": 0.49722203612327576, "learning_rate": 4.585495725297746e-06, "loss": 0.3612, "step": 35033 }, { "epoch": 1.6077279610848514, "grad_norm": 0.4900634288787842, "learning_rate": 4.585251381875001e-06, "loss": 0.3631, "step": 35034 }, { "epoch": 1.6077738515901059, "grad_norm": 0.43958601355552673, "learning_rate": 4.585007039449594e-06, "loss": 0.3253, "step": 35035 }, { "epoch": 1.6078197420953604, "grad_norm": 0.4619571566581726, "learning_rate": 4.584762698022116e-06, "loss": 0.3361, "step": 35036 }, { "epoch": 1.6078656326006149, "grad_norm": 0.47435927391052246, "learning_rate": 4.584518357593152e-06, "loss": 0.3125, "step": 35037 }, { "epoch": 1.6079115231058694, "grad_norm": 0.42160356044769287, "learning_rate": 4.584274018163288e-06, "loss": 0.2796, "step": 35038 }, { "epoch": 1.6079574136111239, "grad_norm": 0.4869803786277771, "learning_rate": 4.584029679733117e-06, "loss": 0.3574, "step": 35039 }, { "epoch": 1.6080033041163784, "grad_norm": 0.44030439853668213, "learning_rate": 4.583785342303224e-06, "loss": 0.2779, "step": 35040 }, { "epoch": 1.6080491946216329, "grad_norm": 0.48289743065834045, "learning_rate": 4.583541005874192e-06, "loss": 0.3653, "step": 35041 }, { "epoch": 1.6080950851268874, "grad_norm": 0.45983409881591797, "learning_rate": 4.583296670446615e-06, "loss": 0.2891, "step": 35042 }, { "epoch": 1.6081409756321419, "grad_norm": 0.45919692516326904, "learning_rate": 4.583052336021078e-06, "loss": 0.3127, "step": 35043 }, { "epoch": 1.6081868661373961, "grad_norm": 0.4721025824546814, "learning_rate": 4.582808002598168e-06, "loss": 0.3638, "step": 35044 }, { "epoch": 1.6082327566426506, "grad_norm": 0.46395421028137207, "learning_rate": 4.582563670178473e-06, "loss": 0.3389, "step": 35045 }, { "epoch": 1.6082786471479051, "grad_norm": 0.4705526828765869, "learning_rate": 4.582319338762581e-06, "loss": 0.3754, "step": 35046 }, { "epoch": 1.6083245376531594, "grad_norm": 0.48528969287872314, "learning_rate": 4.582075008351079e-06, "loss": 0.3405, "step": 35047 }, { "epoch": 1.608370428158414, "grad_norm": 0.5044940710067749, "learning_rate": 4.581830678944553e-06, "loss": 0.4444, "step": 35048 }, { "epoch": 1.6084163186636684, "grad_norm": 0.5420824289321899, "learning_rate": 4.5815863505435945e-06, "loss": 0.5006, "step": 35049 }, { "epoch": 1.608462209168923, "grad_norm": 0.45507562160491943, "learning_rate": 4.581342023148789e-06, "loss": 0.3111, "step": 35050 }, { "epoch": 1.6085080996741774, "grad_norm": 0.48054614663124084, "learning_rate": 4.581097696760722e-06, "loss": 0.388, "step": 35051 }, { "epoch": 1.608553990179432, "grad_norm": 0.47234001755714417, "learning_rate": 4.580853371379984e-06, "loss": 0.3648, "step": 35052 }, { "epoch": 1.6085998806846864, "grad_norm": 0.45196667313575745, "learning_rate": 4.580609047007162e-06, "loss": 0.307, "step": 35053 }, { "epoch": 1.608645771189941, "grad_norm": 0.5005353689193726, "learning_rate": 4.58036472364284e-06, "loss": 0.3713, "step": 35054 }, { "epoch": 1.6086916616951954, "grad_norm": 0.5007423162460327, "learning_rate": 4.58012040128761e-06, "loss": 0.3275, "step": 35055 }, { "epoch": 1.6087375522004497, "grad_norm": 0.462198942899704, "learning_rate": 4.579876079942059e-06, "loss": 0.3652, "step": 35056 }, { "epoch": 1.6087834427057042, "grad_norm": 0.9332163333892822, "learning_rate": 4.579631759606773e-06, "loss": 0.4063, "step": 35057 }, { "epoch": 1.6088293332109587, "grad_norm": 0.4544518291950226, "learning_rate": 4.57938744028234e-06, "loss": 0.3079, "step": 35058 }, { "epoch": 1.6088752237162132, "grad_norm": 0.45464959740638733, "learning_rate": 4.579143121969347e-06, "loss": 0.3192, "step": 35059 }, { "epoch": 1.6089211142214674, "grad_norm": 0.46009159088134766, "learning_rate": 4.578898804668381e-06, "loss": 0.3738, "step": 35060 }, { "epoch": 1.608967004726722, "grad_norm": 0.4810626804828644, "learning_rate": 4.5786544883800315e-06, "loss": 0.4268, "step": 35061 }, { "epoch": 1.6090128952319764, "grad_norm": 0.4706176817417145, "learning_rate": 4.578410173104887e-06, "loss": 0.3641, "step": 35062 }, { "epoch": 1.609058785737231, "grad_norm": 0.48172464966773987, "learning_rate": 4.578165858843529e-06, "loss": 0.3782, "step": 35063 }, { "epoch": 1.6091046762424854, "grad_norm": 0.7682211995124817, "learning_rate": 4.577921545596551e-06, "loss": 0.373, "step": 35064 }, { "epoch": 1.60915056674774, "grad_norm": 0.47874733805656433, "learning_rate": 4.577677233364539e-06, "loss": 0.3358, "step": 35065 }, { "epoch": 1.6091964572529944, "grad_norm": 0.4851420819759369, "learning_rate": 4.577432922148079e-06, "loss": 0.3432, "step": 35066 }, { "epoch": 1.609242347758249, "grad_norm": 0.457152396440506, "learning_rate": 4.577188611947761e-06, "loss": 0.3756, "step": 35067 }, { "epoch": 1.6092882382635034, "grad_norm": 0.5096479654312134, "learning_rate": 4.57694430276417e-06, "loss": 0.4299, "step": 35068 }, { "epoch": 1.6093341287687577, "grad_norm": 0.4651198983192444, "learning_rate": 4.576699994597892e-06, "loss": 0.3187, "step": 35069 }, { "epoch": 1.6093800192740122, "grad_norm": 0.5461528897285461, "learning_rate": 4.57645568744952e-06, "loss": 0.4453, "step": 35070 }, { "epoch": 1.6094259097792667, "grad_norm": 0.44050878286361694, "learning_rate": 4.576211381319639e-06, "loss": 0.3159, "step": 35071 }, { "epoch": 1.609471800284521, "grad_norm": 0.4955904483795166, "learning_rate": 4.575967076208833e-06, "loss": 0.3812, "step": 35072 }, { "epoch": 1.6095176907897755, "grad_norm": 0.4605967700481415, "learning_rate": 4.5757227721176945e-06, "loss": 0.3286, "step": 35073 }, { "epoch": 1.60956358129503, "grad_norm": 0.49206939339637756, "learning_rate": 4.575478469046808e-06, "loss": 0.4125, "step": 35074 }, { "epoch": 1.6096094718002845, "grad_norm": 0.4736131727695465, "learning_rate": 4.575234166996763e-06, "loss": 0.3891, "step": 35075 }, { "epoch": 1.609655362305539, "grad_norm": 0.5139959454536438, "learning_rate": 4.574989865968143e-06, "loss": 0.4505, "step": 35076 }, { "epoch": 1.6097012528107935, "grad_norm": 0.457712322473526, "learning_rate": 4.57474556596154e-06, "loss": 0.3508, "step": 35077 }, { "epoch": 1.609747143316048, "grad_norm": 0.46004945039749146, "learning_rate": 4.5745012669775405e-06, "loss": 0.3284, "step": 35078 }, { "epoch": 1.6097930338213025, "grad_norm": 0.8467329144477844, "learning_rate": 4.57425696901673e-06, "loss": 0.3715, "step": 35079 }, { "epoch": 1.609838924326557, "grad_norm": 0.4870775043964386, "learning_rate": 4.574012672079697e-06, "loss": 0.399, "step": 35080 }, { "epoch": 1.6098848148318115, "grad_norm": 0.4898388981819153, "learning_rate": 4.5737683761670304e-06, "loss": 0.3847, "step": 35081 }, { "epoch": 1.6099307053370657, "grad_norm": 0.47500932216644287, "learning_rate": 4.573524081279313e-06, "loss": 0.3699, "step": 35082 }, { "epoch": 1.6099765958423202, "grad_norm": 0.4638137221336365, "learning_rate": 4.5732797874171395e-06, "loss": 0.3206, "step": 35083 }, { "epoch": 1.6100224863475747, "grad_norm": 0.44559910893440247, "learning_rate": 4.573035494581094e-06, "loss": 0.3124, "step": 35084 }, { "epoch": 1.610068376852829, "grad_norm": 0.42249950766563416, "learning_rate": 4.572791202771759e-06, "loss": 0.279, "step": 35085 }, { "epoch": 1.6101142673580835, "grad_norm": 0.4786585569381714, "learning_rate": 4.5725469119897295e-06, "loss": 0.3613, "step": 35086 }, { "epoch": 1.610160157863338, "grad_norm": 0.4372207522392273, "learning_rate": 4.572302622235589e-06, "loss": 0.339, "step": 35087 }, { "epoch": 1.6102060483685925, "grad_norm": 0.49437057971954346, "learning_rate": 4.572058333509925e-06, "loss": 0.3851, "step": 35088 }, { "epoch": 1.610251938873847, "grad_norm": 0.8243091106414795, "learning_rate": 4.571814045813327e-06, "loss": 0.3276, "step": 35089 }, { "epoch": 1.6102978293791015, "grad_norm": 0.48955899477005005, "learning_rate": 4.5715697591463805e-06, "loss": 0.353, "step": 35090 }, { "epoch": 1.610343719884356, "grad_norm": 0.4993062913417816, "learning_rate": 4.571325473509672e-06, "loss": 0.3942, "step": 35091 }, { "epoch": 1.6103896103896105, "grad_norm": 0.49777162075042725, "learning_rate": 4.571081188903792e-06, "loss": 0.3697, "step": 35092 }, { "epoch": 1.610435500894865, "grad_norm": 0.4667954444885254, "learning_rate": 4.570836905329326e-06, "loss": 0.3616, "step": 35093 }, { "epoch": 1.6104813914001195, "grad_norm": 0.4534285366535187, "learning_rate": 4.570592622786862e-06, "loss": 0.33, "step": 35094 }, { "epoch": 1.6105272819053738, "grad_norm": 0.5129544138908386, "learning_rate": 4.570348341276987e-06, "loss": 0.4325, "step": 35095 }, { "epoch": 1.6105731724106283, "grad_norm": 0.48183056712150574, "learning_rate": 4.57010406080029e-06, "loss": 0.3672, "step": 35096 }, { "epoch": 1.6106190629158827, "grad_norm": 0.4934347867965698, "learning_rate": 4.5698597813573545e-06, "loss": 0.3836, "step": 35097 }, { "epoch": 1.610664953421137, "grad_norm": 0.7495447397232056, "learning_rate": 4.569615502948772e-06, "loss": 0.4272, "step": 35098 }, { "epoch": 1.6107108439263915, "grad_norm": 0.45012184977531433, "learning_rate": 4.5693712255751285e-06, "loss": 0.3135, "step": 35099 }, { "epoch": 1.610756734431646, "grad_norm": 0.4588436782360077, "learning_rate": 4.56912694923701e-06, "loss": 0.3192, "step": 35100 }, { "epoch": 1.6108026249369005, "grad_norm": 0.47536221146583557, "learning_rate": 4.568882673935007e-06, "loss": 0.3688, "step": 35101 }, { "epoch": 1.610848515442155, "grad_norm": 0.4482388198375702, "learning_rate": 4.5686383996697056e-06, "loss": 0.2877, "step": 35102 }, { "epoch": 1.6108944059474095, "grad_norm": 0.46007096767425537, "learning_rate": 4.568394126441689e-06, "loss": 0.3422, "step": 35103 }, { "epoch": 1.610940296452664, "grad_norm": 0.4419252872467041, "learning_rate": 4.568149854251552e-06, "loss": 0.3318, "step": 35104 }, { "epoch": 1.6109861869579185, "grad_norm": 0.5034579038619995, "learning_rate": 4.5679055830998766e-06, "loss": 0.3407, "step": 35105 }, { "epoch": 1.611032077463173, "grad_norm": 0.6039446592330933, "learning_rate": 4.567661312987252e-06, "loss": 0.3206, "step": 35106 }, { "epoch": 1.6110779679684273, "grad_norm": 0.4649903476238251, "learning_rate": 4.567417043914267e-06, "loss": 0.3338, "step": 35107 }, { "epoch": 1.6111238584736818, "grad_norm": 0.4459214210510254, "learning_rate": 4.5671727758815064e-06, "loss": 0.3114, "step": 35108 }, { "epoch": 1.6111697489789363, "grad_norm": 0.7183945178985596, "learning_rate": 4.566928508889559e-06, "loss": 0.4423, "step": 35109 }, { "epoch": 1.6112156394841908, "grad_norm": 0.47298669815063477, "learning_rate": 4.566684242939011e-06, "loss": 0.3921, "step": 35110 }, { "epoch": 1.611261529989445, "grad_norm": 0.43747541308403015, "learning_rate": 4.566439978030452e-06, "loss": 0.3031, "step": 35111 }, { "epoch": 1.6113074204946995, "grad_norm": 0.4505838453769684, "learning_rate": 4.566195714164467e-06, "loss": 0.322, "step": 35112 }, { "epoch": 1.611353310999954, "grad_norm": 0.5738093852996826, "learning_rate": 4.5659514513416424e-06, "loss": 0.472, "step": 35113 }, { "epoch": 1.6113992015052085, "grad_norm": 0.4550623893737793, "learning_rate": 4.56570718956257e-06, "loss": 0.3115, "step": 35114 }, { "epoch": 1.611445092010463, "grad_norm": 0.4841283857822418, "learning_rate": 4.565462928827835e-06, "loss": 0.3386, "step": 35115 }, { "epoch": 1.6114909825157175, "grad_norm": 0.46640875935554504, "learning_rate": 4.565218669138024e-06, "loss": 0.387, "step": 35116 }, { "epoch": 1.611536873020972, "grad_norm": 0.47442761063575745, "learning_rate": 4.564974410493725e-06, "loss": 0.3906, "step": 35117 }, { "epoch": 1.6115827635262265, "grad_norm": 0.47995996475219727, "learning_rate": 4.564730152895525e-06, "loss": 0.363, "step": 35118 }, { "epoch": 1.611628654031481, "grad_norm": 0.45361706614494324, "learning_rate": 4.56448589634401e-06, "loss": 0.2989, "step": 35119 }, { "epoch": 1.6116745445367353, "grad_norm": 0.5169030427932739, "learning_rate": 4.564241640839772e-06, "loss": 0.3516, "step": 35120 }, { "epoch": 1.6117204350419898, "grad_norm": 0.46917563676834106, "learning_rate": 4.5639973863833945e-06, "loss": 0.3247, "step": 35121 }, { "epoch": 1.6117663255472443, "grad_norm": 0.4739153981208801, "learning_rate": 4.563753132975464e-06, "loss": 0.4287, "step": 35122 }, { "epoch": 1.6118122160524986, "grad_norm": 0.47725197672843933, "learning_rate": 4.563508880616571e-06, "loss": 0.3578, "step": 35123 }, { "epoch": 1.611858106557753, "grad_norm": 0.4230272173881531, "learning_rate": 4.563264629307302e-06, "loss": 0.303, "step": 35124 }, { "epoch": 1.6119039970630076, "grad_norm": 0.476823627948761, "learning_rate": 4.563020379048241e-06, "loss": 0.3285, "step": 35125 }, { "epoch": 1.611949887568262, "grad_norm": 0.4682890772819519, "learning_rate": 4.5627761298399804e-06, "loss": 0.3332, "step": 35126 }, { "epoch": 1.6119957780735166, "grad_norm": 0.47459834814071655, "learning_rate": 4.562531881683105e-06, "loss": 0.4255, "step": 35127 }, { "epoch": 1.612041668578771, "grad_norm": 0.4748800992965698, "learning_rate": 4.562287634578201e-06, "loss": 0.3197, "step": 35128 }, { "epoch": 1.6120875590840256, "grad_norm": 0.4671265184879303, "learning_rate": 4.562043388525858e-06, "loss": 0.3562, "step": 35129 }, { "epoch": 1.61213344958928, "grad_norm": 0.4574284851551056, "learning_rate": 4.561799143526664e-06, "loss": 0.2941, "step": 35130 }, { "epoch": 1.6121793400945346, "grad_norm": 0.5040728449821472, "learning_rate": 4.561554899581202e-06, "loss": 0.4249, "step": 35131 }, { "epoch": 1.612225230599789, "grad_norm": 0.4620607793331146, "learning_rate": 4.561310656690065e-06, "loss": 0.3358, "step": 35132 }, { "epoch": 1.6122711211050433, "grad_norm": 0.4788861572742462, "learning_rate": 4.561066414853836e-06, "loss": 0.3303, "step": 35133 }, { "epoch": 1.6123170116102978, "grad_norm": 0.4754139482975006, "learning_rate": 4.560822174073102e-06, "loss": 0.3321, "step": 35134 }, { "epoch": 1.6123629021155523, "grad_norm": 0.46730247139930725, "learning_rate": 4.560577934348454e-06, "loss": 0.3793, "step": 35135 }, { "epoch": 1.6124087926208066, "grad_norm": 0.45752984285354614, "learning_rate": 4.560333695680478e-06, "loss": 0.3569, "step": 35136 }, { "epoch": 1.612454683126061, "grad_norm": 0.4766443371772766, "learning_rate": 4.560089458069758e-06, "loss": 0.4199, "step": 35137 }, { "epoch": 1.6125005736313156, "grad_norm": 0.4542084336280823, "learning_rate": 4.559845221516886e-06, "loss": 0.3573, "step": 35138 }, { "epoch": 1.61254646413657, "grad_norm": 0.537150502204895, "learning_rate": 4.5596009860224476e-06, "loss": 0.4694, "step": 35139 }, { "epoch": 1.6125923546418246, "grad_norm": 0.5264257788658142, "learning_rate": 4.5593567515870305e-06, "loss": 0.4516, "step": 35140 }, { "epoch": 1.612638245147079, "grad_norm": 0.4658612012863159, "learning_rate": 4.559112518211218e-06, "loss": 0.34, "step": 35141 }, { "epoch": 1.6126841356523336, "grad_norm": 0.5397464036941528, "learning_rate": 4.558868285895603e-06, "loss": 0.4702, "step": 35142 }, { "epoch": 1.612730026157588, "grad_norm": 0.482726126909256, "learning_rate": 4.558624054640771e-06, "loss": 0.3949, "step": 35143 }, { "epoch": 1.6127759166628426, "grad_norm": 0.4795769453048706, "learning_rate": 4.558379824447308e-06, "loss": 0.3266, "step": 35144 }, { "epoch": 1.6128218071680969, "grad_norm": 0.49074628949165344, "learning_rate": 4.558135595315803e-06, "loss": 0.3638, "step": 35145 }, { "epoch": 1.6128676976733514, "grad_norm": 0.4977889657020569, "learning_rate": 4.557891367246842e-06, "loss": 0.4562, "step": 35146 }, { "epoch": 1.6129135881786059, "grad_norm": 0.4551311731338501, "learning_rate": 4.55764714024101e-06, "loss": 0.2948, "step": 35147 }, { "epoch": 1.6129594786838604, "grad_norm": 0.4656989276409149, "learning_rate": 4.557402914298901e-06, "loss": 0.3634, "step": 35148 }, { "epoch": 1.6130053691891146, "grad_norm": 0.4575183391571045, "learning_rate": 4.557158689421097e-06, "loss": 0.3569, "step": 35149 }, { "epoch": 1.6130512596943691, "grad_norm": 0.4720980226993561, "learning_rate": 4.5569144656081855e-06, "loss": 0.345, "step": 35150 }, { "epoch": 1.6130971501996236, "grad_norm": 0.4591463506221771, "learning_rate": 4.556670242860757e-06, "loss": 0.3243, "step": 35151 }, { "epoch": 1.6131430407048781, "grad_norm": 0.48612621426582336, "learning_rate": 4.556426021179395e-06, "loss": 0.369, "step": 35152 }, { "epoch": 1.6131889312101326, "grad_norm": 0.4449271261692047, "learning_rate": 4.556181800564689e-06, "loss": 0.2802, "step": 35153 }, { "epoch": 1.6132348217153871, "grad_norm": 0.4914563298225403, "learning_rate": 4.555937581017226e-06, "loss": 0.4161, "step": 35154 }, { "epoch": 1.6132807122206416, "grad_norm": 0.45053595304489136, "learning_rate": 4.555693362537593e-06, "loss": 0.3382, "step": 35155 }, { "epoch": 1.6133266027258961, "grad_norm": 0.4463652968406677, "learning_rate": 4.555449145126375e-06, "loss": 0.345, "step": 35156 }, { "epoch": 1.6133724932311506, "grad_norm": 0.48425188660621643, "learning_rate": 4.555204928784164e-06, "loss": 0.3543, "step": 35157 }, { "epoch": 1.613418383736405, "grad_norm": 0.4901755154132843, "learning_rate": 4.554960713511544e-06, "loss": 0.3845, "step": 35158 }, { "epoch": 1.6134642742416594, "grad_norm": 0.5169627070426941, "learning_rate": 4.554716499309103e-06, "loss": 0.4247, "step": 35159 }, { "epoch": 1.6135101647469139, "grad_norm": 0.5189130902290344, "learning_rate": 4.554472286177429e-06, "loss": 0.4124, "step": 35160 }, { "epoch": 1.6135560552521682, "grad_norm": 0.49915915727615356, "learning_rate": 4.554228074117108e-06, "loss": 0.3925, "step": 35161 }, { "epoch": 1.6136019457574227, "grad_norm": 0.502739667892456, "learning_rate": 4.553983863128726e-06, "loss": 0.3996, "step": 35162 }, { "epoch": 1.6136478362626772, "grad_norm": 0.509726881980896, "learning_rate": 4.5537396532128745e-06, "loss": 0.4056, "step": 35163 }, { "epoch": 1.6136937267679317, "grad_norm": 0.4734102785587311, "learning_rate": 4.553495444370138e-06, "loss": 0.3524, "step": 35164 }, { "epoch": 1.6137396172731862, "grad_norm": 0.4455709159374237, "learning_rate": 4.553251236601103e-06, "loss": 0.3209, "step": 35165 }, { "epoch": 1.6137855077784407, "grad_norm": 0.48856645822525024, "learning_rate": 4.5530070299063586e-06, "loss": 0.3916, "step": 35166 }, { "epoch": 1.6138313982836952, "grad_norm": 0.48706477880477905, "learning_rate": 4.552762824286492e-06, "loss": 0.3467, "step": 35167 }, { "epoch": 1.6138772887889496, "grad_norm": 0.4798251688480377, "learning_rate": 4.552518619742086e-06, "loss": 0.3963, "step": 35168 }, { "epoch": 1.6139231792942041, "grad_norm": 0.47593826055526733, "learning_rate": 4.552274416273735e-06, "loss": 0.3691, "step": 35169 }, { "epoch": 1.6139690697994586, "grad_norm": 0.4565679132938385, "learning_rate": 4.552030213882022e-06, "loss": 0.3308, "step": 35170 }, { "epoch": 1.614014960304713, "grad_norm": 0.501885712146759, "learning_rate": 4.551786012567535e-06, "loss": 0.3932, "step": 35171 }, { "epoch": 1.6140608508099674, "grad_norm": 0.4721248745918274, "learning_rate": 4.55154181233086e-06, "loss": 0.3326, "step": 35172 }, { "epoch": 1.614106741315222, "grad_norm": 0.4689807593822479, "learning_rate": 4.551297613172588e-06, "loss": 0.3865, "step": 35173 }, { "epoch": 1.6141526318204762, "grad_norm": 0.4811904728412628, "learning_rate": 4.551053415093302e-06, "loss": 0.3748, "step": 35174 }, { "epoch": 1.6141985223257307, "grad_norm": 0.4928191900253296, "learning_rate": 4.55080921809359e-06, "loss": 0.4368, "step": 35175 }, { "epoch": 1.6142444128309852, "grad_norm": 0.4447808265686035, "learning_rate": 4.550565022174041e-06, "loss": 0.3218, "step": 35176 }, { "epoch": 1.6142903033362397, "grad_norm": 0.45746299624443054, "learning_rate": 4.550320827335243e-06, "loss": 0.3444, "step": 35177 }, { "epoch": 1.6143361938414942, "grad_norm": 0.46120956540107727, "learning_rate": 4.550076633577777e-06, "loss": 0.3299, "step": 35178 }, { "epoch": 1.6143820843467487, "grad_norm": 0.46369606256484985, "learning_rate": 4.549832440902238e-06, "loss": 0.3993, "step": 35179 }, { "epoch": 1.6144279748520032, "grad_norm": 0.5029719471931458, "learning_rate": 4.54958824930921e-06, "loss": 0.4189, "step": 35180 }, { "epoch": 1.6144738653572577, "grad_norm": 0.42532849311828613, "learning_rate": 4.549344058799279e-06, "loss": 0.2462, "step": 35181 }, { "epoch": 1.6145197558625122, "grad_norm": 0.5017949938774109, "learning_rate": 4.549099869373034e-06, "loss": 0.4349, "step": 35182 }, { "epoch": 1.6145656463677667, "grad_norm": 0.4536321461200714, "learning_rate": 4.548855681031062e-06, "loss": 0.3182, "step": 35183 }, { "epoch": 1.614611536873021, "grad_norm": 0.46532362699508667, "learning_rate": 4.548611493773947e-06, "loss": 0.3613, "step": 35184 }, { "epoch": 1.6146574273782754, "grad_norm": 0.4836522340774536, "learning_rate": 4.548367307602281e-06, "loss": 0.3756, "step": 35185 }, { "epoch": 1.61470331788353, "grad_norm": 0.49262407422065735, "learning_rate": 4.54812312251665e-06, "loss": 0.3743, "step": 35186 }, { "epoch": 1.6147492083887842, "grad_norm": 0.42764127254486084, "learning_rate": 4.547878938517638e-06, "loss": 0.2776, "step": 35187 }, { "epoch": 1.6147950988940387, "grad_norm": 0.4750308394432068, "learning_rate": 4.5476347556058355e-06, "loss": 0.3896, "step": 35188 }, { "epoch": 1.6148409893992932, "grad_norm": 0.5065581798553467, "learning_rate": 4.5473905737818295e-06, "loss": 0.3656, "step": 35189 }, { "epoch": 1.6148868799045477, "grad_norm": 0.4487263858318329, "learning_rate": 4.547146393046204e-06, "loss": 0.3511, "step": 35190 }, { "epoch": 1.6149327704098022, "grad_norm": 0.4229077696800232, "learning_rate": 4.5469022133995504e-06, "loss": 0.2773, "step": 35191 }, { "epoch": 1.6149786609150567, "grad_norm": 0.4307233989238739, "learning_rate": 4.546658034842455e-06, "loss": 0.2867, "step": 35192 }, { "epoch": 1.6150245514203112, "grad_norm": 0.5289615988731384, "learning_rate": 4.5464138573755015e-06, "loss": 0.4621, "step": 35193 }, { "epoch": 1.6150704419255657, "grad_norm": 0.42924270033836365, "learning_rate": 4.546169680999281e-06, "loss": 0.2887, "step": 35194 }, { "epoch": 1.6151163324308202, "grad_norm": 0.48323026299476624, "learning_rate": 4.54592550571438e-06, "loss": 0.3493, "step": 35195 }, { "epoch": 1.6151622229360745, "grad_norm": 0.43896231055259705, "learning_rate": 4.545681331521381e-06, "loss": 0.2999, "step": 35196 }, { "epoch": 1.615208113441329, "grad_norm": 0.4671870768070221, "learning_rate": 4.54543715842088e-06, "loss": 0.3563, "step": 35197 }, { "epoch": 1.6152540039465835, "grad_norm": 0.4926178753376007, "learning_rate": 4.545192986413457e-06, "loss": 0.3897, "step": 35198 }, { "epoch": 1.615299894451838, "grad_norm": 0.47545766830444336, "learning_rate": 4.544948815499699e-06, "loss": 0.3849, "step": 35199 }, { "epoch": 1.6153457849570922, "grad_norm": 0.5472874641418457, "learning_rate": 4.544704645680198e-06, "loss": 0.3144, "step": 35200 }, { "epoch": 1.6153916754623467, "grad_norm": 0.43695104122161865, "learning_rate": 4.544460476955539e-06, "loss": 0.3507, "step": 35201 }, { "epoch": 1.6154375659676012, "grad_norm": 0.4742199778556824, "learning_rate": 4.544216309326308e-06, "loss": 0.3605, "step": 35202 }, { "epoch": 1.6154834564728557, "grad_norm": 0.5102025270462036, "learning_rate": 4.543972142793093e-06, "loss": 0.4282, "step": 35203 }, { "epoch": 1.6155293469781102, "grad_norm": 0.40152162313461304, "learning_rate": 4.543727977356481e-06, "loss": 0.2609, "step": 35204 }, { "epoch": 1.6155752374833647, "grad_norm": 0.4831485152244568, "learning_rate": 4.54348381301706e-06, "loss": 0.3428, "step": 35205 }, { "epoch": 1.6156211279886192, "grad_norm": 0.4464220404624939, "learning_rate": 4.543239649775414e-06, "loss": 0.338, "step": 35206 }, { "epoch": 1.6156670184938737, "grad_norm": 0.4370708167552948, "learning_rate": 4.5429954876321345e-06, "loss": 0.3012, "step": 35207 }, { "epoch": 1.6157129089991282, "grad_norm": 0.4686935842037201, "learning_rate": 4.542751326587807e-06, "loss": 0.3219, "step": 35208 }, { "epoch": 1.6157587995043825, "grad_norm": 0.44972705841064453, "learning_rate": 4.542507166643017e-06, "loss": 0.3193, "step": 35209 }, { "epoch": 1.615804690009637, "grad_norm": 0.4709707200527191, "learning_rate": 4.542263007798353e-06, "loss": 0.3628, "step": 35210 }, { "epoch": 1.6158505805148915, "grad_norm": 0.49618273973464966, "learning_rate": 4.5420188500544035e-06, "loss": 0.4264, "step": 35211 }, { "epoch": 1.6158964710201458, "grad_norm": 0.4657321274280548, "learning_rate": 4.541774693411751e-06, "loss": 0.3133, "step": 35212 }, { "epoch": 1.6159423615254003, "grad_norm": 0.4910042881965637, "learning_rate": 4.541530537870988e-06, "loss": 0.3734, "step": 35213 }, { "epoch": 1.6159882520306548, "grad_norm": 0.4698801040649414, "learning_rate": 4.5412863834326995e-06, "loss": 0.3667, "step": 35214 }, { "epoch": 1.6160341425359093, "grad_norm": 0.47721824049949646, "learning_rate": 4.541042230097472e-06, "loss": 0.3565, "step": 35215 }, { "epoch": 1.6160800330411638, "grad_norm": 0.4822738766670227, "learning_rate": 4.5407980778658925e-06, "loss": 0.3703, "step": 35216 }, { "epoch": 1.6161259235464183, "grad_norm": 0.45121315121650696, "learning_rate": 4.54055392673855e-06, "loss": 0.3515, "step": 35217 }, { "epoch": 1.6161718140516728, "grad_norm": 0.5132384896278381, "learning_rate": 4.540309776716028e-06, "loss": 0.4246, "step": 35218 }, { "epoch": 1.6162177045569273, "grad_norm": 0.6126204133033752, "learning_rate": 4.540065627798919e-06, "loss": 0.4341, "step": 35219 }, { "epoch": 1.6162635950621818, "grad_norm": 0.5258728861808777, "learning_rate": 4.539821479987806e-06, "loss": 0.4129, "step": 35220 }, { "epoch": 1.6163094855674363, "grad_norm": 0.4921393394470215, "learning_rate": 4.5395773332832745e-06, "loss": 0.4004, "step": 35221 }, { "epoch": 1.6163553760726905, "grad_norm": 0.46148261427879333, "learning_rate": 4.539333187685917e-06, "loss": 0.3604, "step": 35222 }, { "epoch": 1.616401266577945, "grad_norm": 0.43393704295158386, "learning_rate": 4.539089043196318e-06, "loss": 0.3284, "step": 35223 }, { "epoch": 1.6164471570831995, "grad_norm": 0.4742489159107208, "learning_rate": 4.538844899815063e-06, "loss": 0.3742, "step": 35224 }, { "epoch": 1.6164930475884538, "grad_norm": 0.43717989325523376, "learning_rate": 4.538600757542742e-06, "loss": 0.2817, "step": 35225 }, { "epoch": 1.6165389380937083, "grad_norm": 0.4664948880672455, "learning_rate": 4.538356616379939e-06, "loss": 0.3289, "step": 35226 }, { "epoch": 1.6165848285989628, "grad_norm": 0.4446718096733093, "learning_rate": 4.538112476327242e-06, "loss": 0.3602, "step": 35227 }, { "epoch": 1.6166307191042173, "grad_norm": 0.48142009973526, "learning_rate": 4.53786833738524e-06, "loss": 0.3907, "step": 35228 }, { "epoch": 1.6166766096094718, "grad_norm": 0.5219006538391113, "learning_rate": 4.537624199554519e-06, "loss": 0.4319, "step": 35229 }, { "epoch": 1.6167225001147263, "grad_norm": 0.46773040294647217, "learning_rate": 4.537380062835665e-06, "loss": 0.3394, "step": 35230 }, { "epoch": 1.6167683906199808, "grad_norm": 0.484846830368042, "learning_rate": 4.537135927229267e-06, "loss": 0.3543, "step": 35231 }, { "epoch": 1.6168142811252353, "grad_norm": 0.4613887071609497, "learning_rate": 4.536891792735911e-06, "loss": 0.3505, "step": 35232 }, { "epoch": 1.6168601716304898, "grad_norm": 0.48618024587631226, "learning_rate": 4.536647659356182e-06, "loss": 0.3862, "step": 35233 }, { "epoch": 1.616906062135744, "grad_norm": 0.42986661195755005, "learning_rate": 4.536403527090671e-06, "loss": 0.2958, "step": 35234 }, { "epoch": 1.6169519526409986, "grad_norm": 0.4768902659416199, "learning_rate": 4.5361593959399635e-06, "loss": 0.3644, "step": 35235 }, { "epoch": 1.616997843146253, "grad_norm": 0.44302046298980713, "learning_rate": 4.5359152659046454e-06, "loss": 0.3526, "step": 35236 }, { "epoch": 1.6170437336515076, "grad_norm": 0.4578110873699188, "learning_rate": 4.535671136985305e-06, "loss": 0.3414, "step": 35237 }, { "epoch": 1.6170896241567618, "grad_norm": 0.44232332706451416, "learning_rate": 4.5354270091825295e-06, "loss": 0.3033, "step": 35238 }, { "epoch": 1.6171355146620163, "grad_norm": 0.4418845772743225, "learning_rate": 4.535182882496905e-06, "loss": 0.3276, "step": 35239 }, { "epoch": 1.6171814051672708, "grad_norm": 0.44349023699760437, "learning_rate": 4.534938756929017e-06, "loss": 0.3007, "step": 35240 }, { "epoch": 1.6172272956725253, "grad_norm": 0.4558194577693939, "learning_rate": 4.534694632479458e-06, "loss": 0.3445, "step": 35241 }, { "epoch": 1.6172731861777798, "grad_norm": 0.4796501696109772, "learning_rate": 4.53445050914881e-06, "loss": 0.3613, "step": 35242 }, { "epoch": 1.6173190766830343, "grad_norm": 0.5377784371376038, "learning_rate": 4.53420638693766e-06, "loss": 0.4803, "step": 35243 }, { "epoch": 1.6173649671882888, "grad_norm": 0.4884701371192932, "learning_rate": 4.533962265846598e-06, "loss": 0.3865, "step": 35244 }, { "epoch": 1.6174108576935433, "grad_norm": 0.45140549540519714, "learning_rate": 4.533718145876211e-06, "loss": 0.3294, "step": 35245 }, { "epoch": 1.6174567481987978, "grad_norm": 0.4348629117012024, "learning_rate": 4.533474027027083e-06, "loss": 0.3166, "step": 35246 }, { "epoch": 1.617502638704052, "grad_norm": 0.46847599744796753, "learning_rate": 4.533229909299803e-06, "loss": 0.3594, "step": 35247 }, { "epoch": 1.6175485292093066, "grad_norm": 1.1716820001602173, "learning_rate": 4.532985792694958e-06, "loss": 0.3672, "step": 35248 }, { "epoch": 1.617594419714561, "grad_norm": 0.45622408390045166, "learning_rate": 4.532741677213133e-06, "loss": 0.3154, "step": 35249 }, { "epoch": 1.6176403102198154, "grad_norm": 0.497847318649292, "learning_rate": 4.532497562854919e-06, "loss": 0.3698, "step": 35250 }, { "epoch": 1.6176862007250699, "grad_norm": 0.46620234847068787, "learning_rate": 4.532253449620901e-06, "loss": 0.3319, "step": 35251 }, { "epoch": 1.6177320912303244, "grad_norm": 0.4813525378704071, "learning_rate": 4.532009337511665e-06, "loss": 0.3751, "step": 35252 }, { "epoch": 1.6177779817355789, "grad_norm": 0.48124200105667114, "learning_rate": 4.531765226527799e-06, "loss": 0.3379, "step": 35253 }, { "epoch": 1.6178238722408333, "grad_norm": 0.501890242099762, "learning_rate": 4.53152111666989e-06, "loss": 0.3363, "step": 35254 }, { "epoch": 1.6178697627460878, "grad_norm": 0.45758265256881714, "learning_rate": 4.531277007938523e-06, "loss": 0.3432, "step": 35255 }, { "epoch": 1.6179156532513423, "grad_norm": 0.4688650071620941, "learning_rate": 4.531032900334289e-06, "loss": 0.3733, "step": 35256 }, { "epoch": 1.6179615437565968, "grad_norm": 0.5282744765281677, "learning_rate": 4.530788793857773e-06, "loss": 0.4149, "step": 35257 }, { "epoch": 1.6180074342618513, "grad_norm": 0.45652472972869873, "learning_rate": 4.53054468850956e-06, "loss": 0.3235, "step": 35258 }, { "epoch": 1.6180533247671058, "grad_norm": 0.4732910692691803, "learning_rate": 4.530300584290241e-06, "loss": 0.3854, "step": 35259 }, { "epoch": 1.6180992152723601, "grad_norm": 0.4966532588005066, "learning_rate": 4.5300564812004e-06, "loss": 0.3892, "step": 35260 }, { "epoch": 1.6181451057776146, "grad_norm": 0.489690363407135, "learning_rate": 4.529812379240624e-06, "loss": 0.3744, "step": 35261 }, { "epoch": 1.618190996282869, "grad_norm": 0.4421101212501526, "learning_rate": 4.5295682784115014e-06, "loss": 0.3061, "step": 35262 }, { "epoch": 1.6182368867881234, "grad_norm": 0.5143552422523499, "learning_rate": 4.52932417871362e-06, "loss": 0.4552, "step": 35263 }, { "epoch": 1.6182827772933779, "grad_norm": 0.4567424952983856, "learning_rate": 4.5290800801475634e-06, "loss": 0.3279, "step": 35264 }, { "epoch": 1.6183286677986324, "grad_norm": 0.45537069439888, "learning_rate": 4.528835982713922e-06, "loss": 0.3341, "step": 35265 }, { "epoch": 1.6183745583038869, "grad_norm": 0.49809014797210693, "learning_rate": 4.528591886413281e-06, "loss": 0.3896, "step": 35266 }, { "epoch": 1.6184204488091414, "grad_norm": 0.4608822464942932, "learning_rate": 4.528347791246227e-06, "loss": 0.3349, "step": 35267 }, { "epoch": 1.6184663393143959, "grad_norm": 0.48798486590385437, "learning_rate": 4.528103697213348e-06, "loss": 0.4052, "step": 35268 }, { "epoch": 1.6185122298196504, "grad_norm": 0.42442312836647034, "learning_rate": 4.527859604315232e-06, "loss": 0.3289, "step": 35269 }, { "epoch": 1.6185581203249049, "grad_norm": 0.4998888373374939, "learning_rate": 4.527615512552464e-06, "loss": 0.3886, "step": 35270 }, { "epoch": 1.6186040108301594, "grad_norm": 0.49102166295051575, "learning_rate": 4.52737142192563e-06, "loss": 0.3889, "step": 35271 }, { "epoch": 1.6186499013354139, "grad_norm": 0.5066962838172913, "learning_rate": 4.52712733243532e-06, "loss": 0.421, "step": 35272 }, { "epoch": 1.6186957918406681, "grad_norm": 0.4994352459907532, "learning_rate": 4.5268832440821194e-06, "loss": 0.397, "step": 35273 }, { "epoch": 1.6187416823459226, "grad_norm": 0.47642555832862854, "learning_rate": 4.526639156866614e-06, "loss": 0.388, "step": 35274 }, { "epoch": 1.6187875728511771, "grad_norm": 0.48843249678611755, "learning_rate": 4.526395070789394e-06, "loss": 0.3768, "step": 35275 }, { "epoch": 1.6188334633564314, "grad_norm": 0.47652867436408997, "learning_rate": 4.526150985851044e-06, "loss": 0.3909, "step": 35276 }, { "epoch": 1.618879353861686, "grad_norm": 0.4179336130619049, "learning_rate": 4.525906902052149e-06, "loss": 0.2755, "step": 35277 }, { "epoch": 1.6189252443669404, "grad_norm": 0.4650818705558777, "learning_rate": 4.525662819393301e-06, "loss": 0.3983, "step": 35278 }, { "epoch": 1.618971134872195, "grad_norm": 0.472674161195755, "learning_rate": 4.525418737875084e-06, "loss": 0.3765, "step": 35279 }, { "epoch": 1.6190170253774494, "grad_norm": 0.46041741967201233, "learning_rate": 4.5251746574980835e-06, "loss": 0.3875, "step": 35280 }, { "epoch": 1.619062915882704, "grad_norm": 0.47982272505760193, "learning_rate": 4.52493057826289e-06, "loss": 0.3719, "step": 35281 }, { "epoch": 1.6191088063879584, "grad_norm": 0.48932552337646484, "learning_rate": 4.524686500170087e-06, "loss": 0.4149, "step": 35282 }, { "epoch": 1.619154696893213, "grad_norm": 0.4422452747821808, "learning_rate": 4.524442423220262e-06, "loss": 0.3321, "step": 35283 }, { "epoch": 1.6192005873984674, "grad_norm": 0.48200148344039917, "learning_rate": 4.524198347414005e-06, "loss": 0.379, "step": 35284 }, { "epoch": 1.6192464779037217, "grad_norm": 0.48795610666275024, "learning_rate": 4.523954272751902e-06, "loss": 0.3566, "step": 35285 }, { "epoch": 1.6192923684089762, "grad_norm": 0.44464004039764404, "learning_rate": 4.523710199234535e-06, "loss": 0.2775, "step": 35286 }, { "epoch": 1.6193382589142307, "grad_norm": 0.47872304916381836, "learning_rate": 4.5234661268624965e-06, "loss": 0.3424, "step": 35287 }, { "epoch": 1.6193841494194852, "grad_norm": 0.4593062698841095, "learning_rate": 4.523222055636373e-06, "loss": 0.346, "step": 35288 }, { "epoch": 1.6194300399247394, "grad_norm": 0.5061553716659546, "learning_rate": 4.522977985556748e-06, "loss": 0.3816, "step": 35289 }, { "epoch": 1.619475930429994, "grad_norm": 0.46625834703445435, "learning_rate": 4.522733916624211e-06, "loss": 0.3283, "step": 35290 }, { "epoch": 1.6195218209352484, "grad_norm": 0.5105793476104736, "learning_rate": 4.5224898488393494e-06, "loss": 0.4052, "step": 35291 }, { "epoch": 1.619567711440503, "grad_norm": 0.5003365874290466, "learning_rate": 4.522245782202746e-06, "loss": 0.3392, "step": 35292 }, { "epoch": 1.6196136019457574, "grad_norm": 0.4674070477485657, "learning_rate": 4.522001716714993e-06, "loss": 0.3786, "step": 35293 }, { "epoch": 1.619659492451012, "grad_norm": 0.469468891620636, "learning_rate": 4.521757652376675e-06, "loss": 0.3316, "step": 35294 }, { "epoch": 1.6197053829562664, "grad_norm": 0.4948015511035919, "learning_rate": 4.521513589188378e-06, "loss": 0.3848, "step": 35295 }, { "epoch": 1.619751273461521, "grad_norm": 0.4885922074317932, "learning_rate": 4.521269527150691e-06, "loss": 0.3615, "step": 35296 }, { "epoch": 1.6197971639667754, "grad_norm": 0.43567728996276855, "learning_rate": 4.521025466264199e-06, "loss": 0.327, "step": 35297 }, { "epoch": 1.6198430544720297, "grad_norm": 0.4396163821220398, "learning_rate": 4.5207814065294884e-06, "loss": 0.3082, "step": 35298 }, { "epoch": 1.6198889449772842, "grad_norm": 0.4649001359939575, "learning_rate": 4.520537347947149e-06, "loss": 0.3429, "step": 35299 }, { "epoch": 1.6199348354825387, "grad_norm": 0.499847412109375, "learning_rate": 4.5202932905177656e-06, "loss": 0.4404, "step": 35300 }, { "epoch": 1.619980725987793, "grad_norm": 0.4450450539588928, "learning_rate": 4.520049234241926e-06, "loss": 0.3148, "step": 35301 }, { "epoch": 1.6200266164930475, "grad_norm": 0.5095664262771606, "learning_rate": 4.519805179120216e-06, "loss": 0.4186, "step": 35302 }, { "epoch": 1.620072506998302, "grad_norm": 0.45161136984825134, "learning_rate": 4.5195611251532226e-06, "loss": 0.3467, "step": 35303 }, { "epoch": 1.6201183975035565, "grad_norm": 0.5368122458457947, "learning_rate": 4.519317072341534e-06, "loss": 0.4899, "step": 35304 }, { "epoch": 1.620164288008811, "grad_norm": 0.4838189482688904, "learning_rate": 4.519073020685734e-06, "loss": 0.4231, "step": 35305 }, { "epoch": 1.6202101785140655, "grad_norm": 0.48127108812332153, "learning_rate": 4.518828970186414e-06, "loss": 0.3601, "step": 35306 }, { "epoch": 1.62025606901932, "grad_norm": 0.4628502130508423, "learning_rate": 4.518584920844159e-06, "loss": 0.3345, "step": 35307 }, { "epoch": 1.6203019595245745, "grad_norm": 0.4806426763534546, "learning_rate": 4.518340872659552e-06, "loss": 0.3208, "step": 35308 }, { "epoch": 1.620347850029829, "grad_norm": 0.46664339303970337, "learning_rate": 4.518096825633185e-06, "loss": 0.2977, "step": 35309 }, { "epoch": 1.6203937405350834, "grad_norm": 0.46739232540130615, "learning_rate": 4.5178527797656436e-06, "loss": 0.3429, "step": 35310 }, { "epoch": 1.6204396310403377, "grad_norm": 0.47474464774131775, "learning_rate": 4.5176087350575125e-06, "loss": 0.3324, "step": 35311 }, { "epoch": 1.6204855215455922, "grad_norm": 0.520048975944519, "learning_rate": 4.517364691509382e-06, "loss": 0.2796, "step": 35312 }, { "epoch": 1.6205314120508467, "grad_norm": 0.4948219358921051, "learning_rate": 4.5171206491218365e-06, "loss": 0.3703, "step": 35313 }, { "epoch": 1.620577302556101, "grad_norm": 0.43446993827819824, "learning_rate": 4.5168766078954615e-06, "loss": 0.3111, "step": 35314 }, { "epoch": 1.6206231930613555, "grad_norm": 0.4698401391506195, "learning_rate": 4.516632567830848e-06, "loss": 0.4173, "step": 35315 }, { "epoch": 1.62066908356661, "grad_norm": 0.4574570953845978, "learning_rate": 4.516388528928581e-06, "loss": 0.3504, "step": 35316 }, { "epoch": 1.6207149740718645, "grad_norm": 0.46265166997909546, "learning_rate": 4.516144491189245e-06, "loss": 0.3615, "step": 35317 }, { "epoch": 1.620760864577119, "grad_norm": 0.43952587246894836, "learning_rate": 4.51590045461343e-06, "loss": 0.3084, "step": 35318 }, { "epoch": 1.6208067550823735, "grad_norm": 0.48864656686782837, "learning_rate": 4.515656419201723e-06, "loss": 0.3839, "step": 35319 }, { "epoch": 1.620852645587628, "grad_norm": 0.45447131991386414, "learning_rate": 4.515412384954706e-06, "loss": 0.3373, "step": 35320 }, { "epoch": 1.6208985360928825, "grad_norm": 0.47784101963043213, "learning_rate": 4.515168351872971e-06, "loss": 0.4052, "step": 35321 }, { "epoch": 1.620944426598137, "grad_norm": 0.45506900548934937, "learning_rate": 4.5149243199571044e-06, "loss": 0.3264, "step": 35322 }, { "epoch": 1.6209903171033913, "grad_norm": 0.5313686728477478, "learning_rate": 4.51468028920769e-06, "loss": 0.37, "step": 35323 }, { "epoch": 1.6210362076086458, "grad_norm": 0.47584855556488037, "learning_rate": 4.514436259625317e-06, "loss": 0.3625, "step": 35324 }, { "epoch": 1.6210820981139002, "grad_norm": 0.4910423159599304, "learning_rate": 4.514192231210573e-06, "loss": 0.3418, "step": 35325 }, { "epoch": 1.6211279886191547, "grad_norm": 0.4454311728477478, "learning_rate": 4.51394820396404e-06, "loss": 0.3492, "step": 35326 }, { "epoch": 1.621173879124409, "grad_norm": 0.4458179473876953, "learning_rate": 4.51370417788631e-06, "loss": 0.3004, "step": 35327 }, { "epoch": 1.6212197696296635, "grad_norm": 0.4452100098133087, "learning_rate": 4.5134601529779685e-06, "loss": 0.2968, "step": 35328 }, { "epoch": 1.621265660134918, "grad_norm": 0.5456843972206116, "learning_rate": 4.5132161292396e-06, "loss": 0.4768, "step": 35329 }, { "epoch": 1.6213115506401725, "grad_norm": 0.47966283559799194, "learning_rate": 4.5129721066717955e-06, "loss": 0.3927, "step": 35330 }, { "epoch": 1.621357441145427, "grad_norm": 0.5243122577667236, "learning_rate": 4.5127280852751385e-06, "loss": 0.4663, "step": 35331 }, { "epoch": 1.6214033316506815, "grad_norm": 0.5283436179161072, "learning_rate": 4.512484065050216e-06, "loss": 0.4614, "step": 35332 }, { "epoch": 1.621449222155936, "grad_norm": 0.5018191337585449, "learning_rate": 4.512240045997616e-06, "loss": 0.4029, "step": 35333 }, { "epoch": 1.6214951126611905, "grad_norm": 0.44825559854507446, "learning_rate": 4.511996028117925e-06, "loss": 0.3212, "step": 35334 }, { "epoch": 1.621541003166445, "grad_norm": 0.4860967695713043, "learning_rate": 4.511752011411728e-06, "loss": 0.4162, "step": 35335 }, { "epoch": 1.6215868936716993, "grad_norm": 0.4744742810726166, "learning_rate": 4.511507995879615e-06, "loss": 0.4195, "step": 35336 }, { "epoch": 1.6216327841769538, "grad_norm": 0.4749017655849457, "learning_rate": 4.511263981522171e-06, "loss": 0.3712, "step": 35337 }, { "epoch": 1.6216786746822083, "grad_norm": 0.45471271872520447, "learning_rate": 4.511019968339982e-06, "loss": 0.2993, "step": 35338 }, { "epoch": 1.6217245651874626, "grad_norm": 0.42778950929641724, "learning_rate": 4.510775956333636e-06, "loss": 0.3015, "step": 35339 }, { "epoch": 1.621770455692717, "grad_norm": 0.46196243166923523, "learning_rate": 4.51053194550372e-06, "loss": 0.3482, "step": 35340 }, { "epoch": 1.6218163461979715, "grad_norm": 0.46847328543663025, "learning_rate": 4.51028793585082e-06, "loss": 0.3945, "step": 35341 }, { "epoch": 1.621862236703226, "grad_norm": 0.4768366515636444, "learning_rate": 4.510043927375522e-06, "loss": 0.3702, "step": 35342 }, { "epoch": 1.6219081272084805, "grad_norm": 0.49599939584732056, "learning_rate": 4.509799920078413e-06, "loss": 0.3961, "step": 35343 }, { "epoch": 1.621954017713735, "grad_norm": 0.4418201446533203, "learning_rate": 4.509555913960083e-06, "loss": 0.3051, "step": 35344 }, { "epoch": 1.6219999082189895, "grad_norm": 0.49317970871925354, "learning_rate": 4.509311909021115e-06, "loss": 0.4164, "step": 35345 }, { "epoch": 1.622045798724244, "grad_norm": 0.48890888690948486, "learning_rate": 4.509067905262097e-06, "loss": 0.4, "step": 35346 }, { "epoch": 1.6220916892294985, "grad_norm": 0.4389429986476898, "learning_rate": 4.508823902683617e-06, "loss": 0.2775, "step": 35347 }, { "epoch": 1.622137579734753, "grad_norm": 0.4804004728794098, "learning_rate": 4.5085799012862575e-06, "loss": 0.3154, "step": 35348 }, { "epoch": 1.6221834702400073, "grad_norm": 0.44557854533195496, "learning_rate": 4.50833590107061e-06, "loss": 0.3035, "step": 35349 }, { "epoch": 1.6222293607452618, "grad_norm": 0.4692220091819763, "learning_rate": 4.508091902037261e-06, "loss": 0.3504, "step": 35350 }, { "epoch": 1.6222752512505163, "grad_norm": 0.4567122757434845, "learning_rate": 4.507847904186793e-06, "loss": 0.3245, "step": 35351 }, { "epoch": 1.6223211417557706, "grad_norm": 0.49859338998794556, "learning_rate": 4.507603907519797e-06, "loss": 0.4147, "step": 35352 }, { "epoch": 1.622367032261025, "grad_norm": 0.5595880746841431, "learning_rate": 4.507359912036858e-06, "loss": 0.4829, "step": 35353 }, { "epoch": 1.6224129227662796, "grad_norm": 0.4808178246021271, "learning_rate": 4.507115917738562e-06, "loss": 0.4108, "step": 35354 }, { "epoch": 1.622458813271534, "grad_norm": 0.478484570980072, "learning_rate": 4.5068719246254985e-06, "loss": 0.3565, "step": 35355 }, { "epoch": 1.6225047037767886, "grad_norm": 0.5046977400779724, "learning_rate": 4.506627932698252e-06, "loss": 0.4142, "step": 35356 }, { "epoch": 1.622550594282043, "grad_norm": 0.46329638361930847, "learning_rate": 4.506383941957407e-06, "loss": 0.3184, "step": 35357 }, { "epoch": 1.6225964847872976, "grad_norm": 0.46192467212677, "learning_rate": 4.506139952403556e-06, "loss": 0.3659, "step": 35358 }, { "epoch": 1.622642375292552, "grad_norm": 0.4539344906806946, "learning_rate": 4.505895964037282e-06, "loss": 0.3492, "step": 35359 }, { "epoch": 1.6226882657978066, "grad_norm": 0.4501603841781616, "learning_rate": 4.50565197685917e-06, "loss": 0.3356, "step": 35360 }, { "epoch": 1.622734156303061, "grad_norm": 0.4797610938549042, "learning_rate": 4.505407990869812e-06, "loss": 0.3798, "step": 35361 }, { "epoch": 1.6227800468083153, "grad_norm": 0.48196348547935486, "learning_rate": 4.50516400606979e-06, "loss": 0.3728, "step": 35362 }, { "epoch": 1.6228259373135698, "grad_norm": 0.5280942320823669, "learning_rate": 4.504920022459691e-06, "loss": 0.4388, "step": 35363 }, { "epoch": 1.6228718278188243, "grad_norm": 0.45943018794059753, "learning_rate": 4.504676040040106e-06, "loss": 0.3152, "step": 35364 }, { "epoch": 1.6229177183240786, "grad_norm": 0.44655418395996094, "learning_rate": 4.504432058811618e-06, "loss": 0.3405, "step": 35365 }, { "epoch": 1.622963608829333, "grad_norm": 0.5128242373466492, "learning_rate": 4.504188078774813e-06, "loss": 0.4068, "step": 35366 }, { "epoch": 1.6230094993345876, "grad_norm": 0.4285009503364563, "learning_rate": 4.503944099930281e-06, "loss": 0.2535, "step": 35367 }, { "epoch": 1.623055389839842, "grad_norm": 0.5098015666007996, "learning_rate": 4.503700122278606e-06, "loss": 0.3909, "step": 35368 }, { "epoch": 1.6231012803450966, "grad_norm": 0.4700107276439667, "learning_rate": 4.503456145820377e-06, "loss": 0.3745, "step": 35369 }, { "epoch": 1.623147170850351, "grad_norm": 0.42465415596961975, "learning_rate": 4.503212170556176e-06, "loss": 0.3074, "step": 35370 }, { "epoch": 1.6231930613556056, "grad_norm": 0.5148603916168213, "learning_rate": 4.502968196486596e-06, "loss": 0.4515, "step": 35371 }, { "epoch": 1.62323895186086, "grad_norm": 0.4716781973838806, "learning_rate": 4.502724223612219e-06, "loss": 0.3312, "step": 35372 }, { "epoch": 1.6232848423661146, "grad_norm": 0.49580928683280945, "learning_rate": 4.502480251933634e-06, "loss": 0.3642, "step": 35373 }, { "epoch": 1.6233307328713689, "grad_norm": 0.4970363974571228, "learning_rate": 4.502236281451427e-06, "loss": 0.3948, "step": 35374 }, { "epoch": 1.6233766233766234, "grad_norm": 0.45346924662590027, "learning_rate": 4.501992312166185e-06, "loss": 0.2856, "step": 35375 }, { "epoch": 1.6234225138818779, "grad_norm": 0.479053795337677, "learning_rate": 4.501748344078493e-06, "loss": 0.4066, "step": 35376 }, { "epoch": 1.6234684043871324, "grad_norm": 0.5144193172454834, "learning_rate": 4.50150437718894e-06, "loss": 0.4022, "step": 35377 }, { "epoch": 1.6235142948923866, "grad_norm": 0.46112629771232605, "learning_rate": 4.501260411498112e-06, "loss": 0.3209, "step": 35378 }, { "epoch": 1.6235601853976411, "grad_norm": 0.45200371742248535, "learning_rate": 4.501016447006593e-06, "loss": 0.361, "step": 35379 }, { "epoch": 1.6236060759028956, "grad_norm": 0.4522021412849426, "learning_rate": 4.500772483714974e-06, "loss": 0.2926, "step": 35380 }, { "epoch": 1.6236519664081501, "grad_norm": 0.49623537063598633, "learning_rate": 4.50052852162384e-06, "loss": 0.3708, "step": 35381 }, { "epoch": 1.6236978569134046, "grad_norm": 0.4409749507904053, "learning_rate": 4.500284560733776e-06, "loss": 0.3059, "step": 35382 }, { "epoch": 1.6237437474186591, "grad_norm": 0.48686155676841736, "learning_rate": 4.500040601045371e-06, "loss": 0.3627, "step": 35383 }, { "epoch": 1.6237896379239136, "grad_norm": 0.5141722559928894, "learning_rate": 4.49979664255921e-06, "loss": 0.406, "step": 35384 }, { "epoch": 1.6238355284291681, "grad_norm": 0.4541027545928955, "learning_rate": 4.499552685275879e-06, "loss": 0.3222, "step": 35385 }, { "epoch": 1.6238814189344226, "grad_norm": 0.4378875195980072, "learning_rate": 4.499308729195968e-06, "loss": 0.2946, "step": 35386 }, { "epoch": 1.623927309439677, "grad_norm": 0.4457743167877197, "learning_rate": 4.499064774320061e-06, "loss": 0.3542, "step": 35387 }, { "epoch": 1.6239731999449314, "grad_norm": 0.4205888509750366, "learning_rate": 4.498820820648743e-06, "loss": 0.2757, "step": 35388 }, { "epoch": 1.6240190904501859, "grad_norm": 0.47514307498931885, "learning_rate": 4.498576868182605e-06, "loss": 0.3713, "step": 35389 }, { "epoch": 1.6240649809554402, "grad_norm": 0.46261709928512573, "learning_rate": 4.498332916922232e-06, "loss": 0.3235, "step": 35390 }, { "epoch": 1.6241108714606947, "grad_norm": 0.4553653597831726, "learning_rate": 4.498088966868206e-06, "loss": 0.3252, "step": 35391 }, { "epoch": 1.6241567619659492, "grad_norm": 0.47684141993522644, "learning_rate": 4.497845018021122e-06, "loss": 0.3229, "step": 35392 }, { "epoch": 1.6242026524712037, "grad_norm": 0.4928971230983734, "learning_rate": 4.497601070381561e-06, "loss": 0.3746, "step": 35393 }, { "epoch": 1.6242485429764582, "grad_norm": 0.4919544756412506, "learning_rate": 4.49735712395011e-06, "loss": 0.4021, "step": 35394 }, { "epoch": 1.6242944334817127, "grad_norm": 0.5035430192947388, "learning_rate": 4.497113178727357e-06, "loss": 0.4064, "step": 35395 }, { "epoch": 1.6243403239869671, "grad_norm": 0.5140207409858704, "learning_rate": 4.496869234713889e-06, "loss": 0.434, "step": 35396 }, { "epoch": 1.6243862144922216, "grad_norm": 0.48337191343307495, "learning_rate": 4.496625291910288e-06, "loss": 0.3935, "step": 35397 }, { "epoch": 1.6244321049974761, "grad_norm": 0.49859920144081116, "learning_rate": 4.49638135031715e-06, "loss": 0.3835, "step": 35398 }, { "epoch": 1.6244779955027306, "grad_norm": 0.48298388719558716, "learning_rate": 4.496137409935053e-06, "loss": 0.3939, "step": 35399 }, { "epoch": 1.624523886007985, "grad_norm": 0.4352428615093231, "learning_rate": 4.495893470764584e-06, "loss": 0.2523, "step": 35400 }, { "epoch": 1.6245697765132394, "grad_norm": 0.47244688868522644, "learning_rate": 4.495649532806336e-06, "loss": 0.3159, "step": 35401 }, { "epoch": 1.624615667018494, "grad_norm": 0.4874855577945709, "learning_rate": 4.49540559606089e-06, "loss": 0.3457, "step": 35402 }, { "epoch": 1.6246615575237482, "grad_norm": 0.4939901530742645, "learning_rate": 4.495161660528836e-06, "loss": 0.4127, "step": 35403 }, { "epoch": 1.6247074480290027, "grad_norm": 0.44046100974082947, "learning_rate": 4.494917726210756e-06, "loss": 0.283, "step": 35404 }, { "epoch": 1.6247533385342572, "grad_norm": 0.5058900713920593, "learning_rate": 4.4946737931072425e-06, "loss": 0.4453, "step": 35405 }, { "epoch": 1.6247992290395117, "grad_norm": 0.5065587162971497, "learning_rate": 4.494429861218878e-06, "loss": 0.4043, "step": 35406 }, { "epoch": 1.6248451195447662, "grad_norm": 0.45777061581611633, "learning_rate": 4.494185930546247e-06, "loss": 0.3342, "step": 35407 }, { "epoch": 1.6248910100500207, "grad_norm": 0.4357583224773407, "learning_rate": 4.493942001089943e-06, "loss": 0.3225, "step": 35408 }, { "epoch": 1.6249369005552752, "grad_norm": 0.47999265789985657, "learning_rate": 4.493698072850548e-06, "loss": 0.3396, "step": 35409 }, { "epoch": 1.6249827910605297, "grad_norm": 0.49701592326164246, "learning_rate": 4.4934541458286475e-06, "loss": 0.4065, "step": 35410 }, { "epoch": 1.6250286815657842, "grad_norm": 0.45117515325546265, "learning_rate": 4.493210220024832e-06, "loss": 0.3137, "step": 35411 }, { "epoch": 1.6250745720710384, "grad_norm": 0.45930275321006775, "learning_rate": 4.492966295439686e-06, "loss": 0.3068, "step": 35412 }, { "epoch": 1.625120462576293, "grad_norm": 0.44015687704086304, "learning_rate": 4.492722372073793e-06, "loss": 0.3124, "step": 35413 }, { "epoch": 1.6251663530815474, "grad_norm": 0.40956366062164307, "learning_rate": 4.492478449927746e-06, "loss": 0.2789, "step": 35414 }, { "epoch": 1.625212243586802, "grad_norm": 0.4509166181087494, "learning_rate": 4.492234529002127e-06, "loss": 0.3298, "step": 35415 }, { "epoch": 1.6252581340920562, "grad_norm": 0.5024508833885193, "learning_rate": 4.491990609297523e-06, "loss": 0.4077, "step": 35416 }, { "epoch": 1.6253040245973107, "grad_norm": 0.48165079951286316, "learning_rate": 4.491746690814523e-06, "loss": 0.4089, "step": 35417 }, { "epoch": 1.6253499151025652, "grad_norm": 0.4518864154815674, "learning_rate": 4.491502773553711e-06, "loss": 0.3262, "step": 35418 }, { "epoch": 1.6253958056078197, "grad_norm": 0.4665011763572693, "learning_rate": 4.491258857515673e-06, "loss": 0.326, "step": 35419 }, { "epoch": 1.6254416961130742, "grad_norm": 0.44789183139801025, "learning_rate": 4.4910149427009994e-06, "loss": 0.3254, "step": 35420 }, { "epoch": 1.6254875866183287, "grad_norm": 0.4629696011543274, "learning_rate": 4.490771029110274e-06, "loss": 0.3278, "step": 35421 }, { "epoch": 1.6255334771235832, "grad_norm": 0.44808197021484375, "learning_rate": 4.49052711674408e-06, "loss": 0.3458, "step": 35422 }, { "epoch": 1.6255793676288377, "grad_norm": 0.4735610783100128, "learning_rate": 4.49028320560301e-06, "loss": 0.3797, "step": 35423 }, { "epoch": 1.6256252581340922, "grad_norm": 0.4541337788105011, "learning_rate": 4.490039295687649e-06, "loss": 0.3137, "step": 35424 }, { "epoch": 1.6256711486393465, "grad_norm": 0.4597019851207733, "learning_rate": 4.489795386998581e-06, "loss": 0.3299, "step": 35425 }, { "epoch": 1.625717039144601, "grad_norm": 0.4339398741722107, "learning_rate": 4.489551479536395e-06, "loss": 0.2896, "step": 35426 }, { "epoch": 1.6257629296498555, "grad_norm": 0.46574637293815613, "learning_rate": 4.4893075733016765e-06, "loss": 0.3291, "step": 35427 }, { "epoch": 1.6258088201551097, "grad_norm": 0.4440774619579315, "learning_rate": 4.48906366829501e-06, "loss": 0.3258, "step": 35428 }, { "epoch": 1.6258547106603642, "grad_norm": 0.45186764001846313, "learning_rate": 4.488819764516986e-06, "loss": 0.3171, "step": 35429 }, { "epoch": 1.6259006011656187, "grad_norm": 0.46767154335975647, "learning_rate": 4.488575861968189e-06, "loss": 0.3365, "step": 35430 }, { "epoch": 1.6259464916708732, "grad_norm": 0.4650920629501343, "learning_rate": 4.488331960649206e-06, "loss": 0.3768, "step": 35431 }, { "epoch": 1.6259923821761277, "grad_norm": 0.4853613078594208, "learning_rate": 4.488088060560623e-06, "loss": 0.3833, "step": 35432 }, { "epoch": 1.6260382726813822, "grad_norm": 0.4423483610153198, "learning_rate": 4.487844161703027e-06, "loss": 0.3139, "step": 35433 }, { "epoch": 1.6260841631866367, "grad_norm": 0.48707109689712524, "learning_rate": 4.487600264077004e-06, "loss": 0.3913, "step": 35434 }, { "epoch": 1.6261300536918912, "grad_norm": 0.44890594482421875, "learning_rate": 4.487356367683139e-06, "loss": 0.2955, "step": 35435 }, { "epoch": 1.6261759441971457, "grad_norm": 0.46130481362342834, "learning_rate": 4.4871124725220215e-06, "loss": 0.3662, "step": 35436 }, { "epoch": 1.6262218347024002, "grad_norm": 0.449642151594162, "learning_rate": 4.486868578594237e-06, "loss": 0.3097, "step": 35437 }, { "epoch": 1.6262677252076545, "grad_norm": 0.437250554561615, "learning_rate": 4.486624685900371e-06, "loss": 0.2977, "step": 35438 }, { "epoch": 1.626313615712909, "grad_norm": 0.4712527096271515, "learning_rate": 4.486380794441011e-06, "loss": 0.3208, "step": 35439 }, { "epoch": 1.6263595062181635, "grad_norm": 0.48606249690055847, "learning_rate": 4.486136904216743e-06, "loss": 0.361, "step": 35440 }, { "epoch": 1.6264053967234178, "grad_norm": 0.4611618220806122, "learning_rate": 4.485893015228151e-06, "loss": 0.3163, "step": 35441 }, { "epoch": 1.6264512872286723, "grad_norm": 0.46257397532463074, "learning_rate": 4.485649127475828e-06, "loss": 0.3119, "step": 35442 }, { "epoch": 1.6264971777339268, "grad_norm": 0.5052624940872192, "learning_rate": 4.485405240960356e-06, "loss": 0.4335, "step": 35443 }, { "epoch": 1.6265430682391813, "grad_norm": 0.4491192102432251, "learning_rate": 4.485161355682319e-06, "loss": 0.3355, "step": 35444 }, { "epoch": 1.6265889587444358, "grad_norm": 0.6520278453826904, "learning_rate": 4.48491747164231e-06, "loss": 0.444, "step": 35445 }, { "epoch": 1.6266348492496903, "grad_norm": 0.4951401650905609, "learning_rate": 4.48467358884091e-06, "loss": 0.3589, "step": 35446 }, { "epoch": 1.6266807397549448, "grad_norm": 0.45782414078712463, "learning_rate": 4.484429707278707e-06, "loss": 0.3391, "step": 35447 }, { "epoch": 1.6267266302601993, "grad_norm": 0.46797433495521545, "learning_rate": 4.484185826956289e-06, "loss": 0.3183, "step": 35448 }, { "epoch": 1.6267725207654538, "grad_norm": 0.503954291343689, "learning_rate": 4.4839419478742415e-06, "loss": 0.3823, "step": 35449 }, { "epoch": 1.6268184112707083, "grad_norm": 0.46666282415390015, "learning_rate": 4.483698070033148e-06, "loss": 0.329, "step": 35450 }, { "epoch": 1.6268643017759625, "grad_norm": 0.4709049165248871, "learning_rate": 4.4834541934336e-06, "loss": 0.3067, "step": 35451 }, { "epoch": 1.626910192281217, "grad_norm": 0.4874848425388336, "learning_rate": 4.483210318076182e-06, "loss": 0.4211, "step": 35452 }, { "epoch": 1.6269560827864715, "grad_norm": 0.4769006073474884, "learning_rate": 4.4829664439614786e-06, "loss": 0.3616, "step": 35453 }, { "epoch": 1.6270019732917258, "grad_norm": 0.47058728337287903, "learning_rate": 4.4827225710900795e-06, "loss": 0.3959, "step": 35454 }, { "epoch": 1.6270478637969803, "grad_norm": 0.44224813580513, "learning_rate": 4.482478699462569e-06, "loss": 0.3241, "step": 35455 }, { "epoch": 1.6270937543022348, "grad_norm": 0.4598153829574585, "learning_rate": 4.482234829079531e-06, "loss": 0.3504, "step": 35456 }, { "epoch": 1.6271396448074893, "grad_norm": 0.5308739542961121, "learning_rate": 4.481990959941558e-06, "loss": 0.4136, "step": 35457 }, { "epoch": 1.6271855353127438, "grad_norm": 0.4537332355976105, "learning_rate": 4.481747092049233e-06, "loss": 0.2794, "step": 35458 }, { "epoch": 1.6272314258179983, "grad_norm": 0.649353563785553, "learning_rate": 4.4815032254031415e-06, "loss": 0.4316, "step": 35459 }, { "epoch": 1.6272773163232528, "grad_norm": 0.508204460144043, "learning_rate": 4.481259360003872e-06, "loss": 0.3581, "step": 35460 }, { "epoch": 1.6273232068285073, "grad_norm": 0.4447965621948242, "learning_rate": 4.48101549585201e-06, "loss": 0.3163, "step": 35461 }, { "epoch": 1.6273690973337618, "grad_norm": 0.4591626524925232, "learning_rate": 4.480771632948141e-06, "loss": 0.3193, "step": 35462 }, { "epoch": 1.627414987839016, "grad_norm": 0.49186432361602783, "learning_rate": 4.480527771292853e-06, "loss": 0.3673, "step": 35463 }, { "epoch": 1.6274608783442706, "grad_norm": 0.4688226282596588, "learning_rate": 4.480283910886733e-06, "loss": 0.3399, "step": 35464 }, { "epoch": 1.627506768849525, "grad_norm": 0.4897063970565796, "learning_rate": 4.4800400517303635e-06, "loss": 0.3743, "step": 35465 }, { "epoch": 1.6275526593547793, "grad_norm": 0.4215324819087982, "learning_rate": 4.479796193824335e-06, "loss": 0.259, "step": 35466 }, { "epoch": 1.6275985498600338, "grad_norm": 0.48412153124809265, "learning_rate": 4.479552337169233e-06, "loss": 0.3711, "step": 35467 }, { "epoch": 1.6276444403652883, "grad_norm": 0.5110636949539185, "learning_rate": 4.4793084817656435e-06, "loss": 0.4398, "step": 35468 }, { "epoch": 1.6276903308705428, "grad_norm": 0.4544069766998291, "learning_rate": 4.479064627614151e-06, "loss": 0.3661, "step": 35469 }, { "epoch": 1.6277362213757973, "grad_norm": 0.49742844700813293, "learning_rate": 4.478820774715346e-06, "loss": 0.4153, "step": 35470 }, { "epoch": 1.6277821118810518, "grad_norm": 0.47196000814437866, "learning_rate": 4.478576923069812e-06, "loss": 0.3621, "step": 35471 }, { "epoch": 1.6278280023863063, "grad_norm": 0.45216888189315796, "learning_rate": 4.4783330726781325e-06, "loss": 0.2966, "step": 35472 }, { "epoch": 1.6278738928915608, "grad_norm": 0.48925426602363586, "learning_rate": 4.478089223540902e-06, "loss": 0.4005, "step": 35473 }, { "epoch": 1.6279197833968153, "grad_norm": 0.43423992395401, "learning_rate": 4.4778453756587e-06, "loss": 0.3161, "step": 35474 }, { "epoch": 1.6279656739020698, "grad_norm": 0.4603155553340912, "learning_rate": 4.4776015290321145e-06, "loss": 0.3181, "step": 35475 }, { "epoch": 1.628011564407324, "grad_norm": 0.4592399597167969, "learning_rate": 4.477357683661734e-06, "loss": 0.3862, "step": 35476 }, { "epoch": 1.6280574549125786, "grad_norm": 0.49150657653808594, "learning_rate": 4.477113839548143e-06, "loss": 0.3746, "step": 35477 }, { "epoch": 1.628103345417833, "grad_norm": 0.4394973814487457, "learning_rate": 4.476869996691926e-06, "loss": 0.3272, "step": 35478 }, { "epoch": 1.6281492359230874, "grad_norm": 0.48618602752685547, "learning_rate": 4.476626155093673e-06, "loss": 0.3625, "step": 35479 }, { "epoch": 1.6281951264283419, "grad_norm": 0.46169352531433105, "learning_rate": 4.47638231475397e-06, "loss": 0.3445, "step": 35480 }, { "epoch": 1.6282410169335964, "grad_norm": 0.4708074927330017, "learning_rate": 4.4761384756734015e-06, "loss": 0.4214, "step": 35481 }, { "epoch": 1.6282869074388509, "grad_norm": 0.4659002423286438, "learning_rate": 4.4758946378525545e-06, "loss": 0.3545, "step": 35482 }, { "epoch": 1.6283327979441053, "grad_norm": 0.4801311790943146, "learning_rate": 4.475650801292016e-06, "loss": 0.3643, "step": 35483 }, { "epoch": 1.6283786884493598, "grad_norm": 0.458286315202713, "learning_rate": 4.47540696599237e-06, "loss": 0.3169, "step": 35484 }, { "epoch": 1.6284245789546143, "grad_norm": 0.49893537163734436, "learning_rate": 4.475163131954207e-06, "loss": 0.4242, "step": 35485 }, { "epoch": 1.6284704694598688, "grad_norm": 0.4759639799594879, "learning_rate": 4.4749192991781115e-06, "loss": 0.3748, "step": 35486 }, { "epoch": 1.6285163599651233, "grad_norm": 0.46488863229751587, "learning_rate": 4.4746754676646665e-06, "loss": 0.3196, "step": 35487 }, { "epoch": 1.6285622504703778, "grad_norm": 0.467911034822464, "learning_rate": 4.474431637414463e-06, "loss": 0.3213, "step": 35488 }, { "epoch": 1.6286081409756321, "grad_norm": 0.4602161645889282, "learning_rate": 4.474187808428086e-06, "loss": 0.338, "step": 35489 }, { "epoch": 1.6286540314808866, "grad_norm": 0.47880834341049194, "learning_rate": 4.47394398070612e-06, "loss": 0.3308, "step": 35490 }, { "epoch": 1.628699921986141, "grad_norm": 0.5082428455352783, "learning_rate": 4.4737001542491534e-06, "loss": 0.3946, "step": 35491 }, { "epoch": 1.6287458124913954, "grad_norm": 0.4887165427207947, "learning_rate": 4.473456329057773e-06, "loss": 0.3858, "step": 35492 }, { "epoch": 1.6287917029966499, "grad_norm": 0.4803290367126465, "learning_rate": 4.473212505132561e-06, "loss": 0.362, "step": 35493 }, { "epoch": 1.6288375935019044, "grad_norm": 0.4641997516155243, "learning_rate": 4.472968682474109e-06, "loss": 0.3409, "step": 35494 }, { "epoch": 1.6288834840071589, "grad_norm": 0.45477670431137085, "learning_rate": 4.472724861083001e-06, "loss": 0.3248, "step": 35495 }, { "epoch": 1.6289293745124134, "grad_norm": 0.5027965903282166, "learning_rate": 4.472481040959822e-06, "loss": 0.3931, "step": 35496 }, { "epoch": 1.6289752650176679, "grad_norm": 0.49711543321609497, "learning_rate": 4.47223722210516e-06, "loss": 0.4319, "step": 35497 }, { "epoch": 1.6290211555229224, "grad_norm": 0.46250882744789124, "learning_rate": 4.471993404519602e-06, "loss": 0.3301, "step": 35498 }, { "epoch": 1.6290670460281769, "grad_norm": 0.48355385661125183, "learning_rate": 4.471749588203733e-06, "loss": 0.3912, "step": 35499 }, { "epoch": 1.6291129365334314, "grad_norm": 0.4857760965824127, "learning_rate": 4.471505773158137e-06, "loss": 0.395, "step": 35500 }, { "epoch": 1.6291588270386856, "grad_norm": 0.5373702049255371, "learning_rate": 4.471261959383406e-06, "loss": 0.41, "step": 35501 }, { "epoch": 1.6292047175439401, "grad_norm": 0.4733721911907196, "learning_rate": 4.471018146880122e-06, "loss": 0.3898, "step": 35502 }, { "epoch": 1.6292506080491946, "grad_norm": 0.4943593144416809, "learning_rate": 4.4707743356488715e-06, "loss": 0.4354, "step": 35503 }, { "epoch": 1.6292964985544491, "grad_norm": 0.48634272813796997, "learning_rate": 4.470530525690242e-06, "loss": 0.4063, "step": 35504 }, { "epoch": 1.6293423890597034, "grad_norm": 0.5147380828857422, "learning_rate": 4.4702867170048215e-06, "loss": 0.378, "step": 35505 }, { "epoch": 1.629388279564958, "grad_norm": 0.487272173166275, "learning_rate": 4.470042909593191e-06, "loss": 0.3879, "step": 35506 }, { "epoch": 1.6294341700702124, "grad_norm": 0.4719718098640442, "learning_rate": 4.4697991034559415e-06, "loss": 0.4019, "step": 35507 }, { "epoch": 1.629480060575467, "grad_norm": 0.4661698043346405, "learning_rate": 4.469555298593661e-06, "loss": 0.3731, "step": 35508 }, { "epoch": 1.6295259510807214, "grad_norm": 0.4525972902774811, "learning_rate": 4.4693114950069275e-06, "loss": 0.2952, "step": 35509 }, { "epoch": 1.629571841585976, "grad_norm": 0.4766845405101776, "learning_rate": 4.469067692696334e-06, "loss": 0.3507, "step": 35510 }, { "epoch": 1.6296177320912304, "grad_norm": 0.5005804896354675, "learning_rate": 4.468823891662467e-06, "loss": 0.3847, "step": 35511 }, { "epoch": 1.629663622596485, "grad_norm": 0.49273112416267395, "learning_rate": 4.468580091905908e-06, "loss": 0.3962, "step": 35512 }, { "epoch": 1.6297095131017394, "grad_norm": 0.4786682724952698, "learning_rate": 4.468336293427249e-06, "loss": 0.3729, "step": 35513 }, { "epoch": 1.6297554036069937, "grad_norm": 0.4951097369194031, "learning_rate": 4.468092496227072e-06, "loss": 0.3763, "step": 35514 }, { "epoch": 1.6298012941122482, "grad_norm": 0.48214292526245117, "learning_rate": 4.467848700305963e-06, "loss": 0.3517, "step": 35515 }, { "epoch": 1.6298471846175027, "grad_norm": 0.4995957612991333, "learning_rate": 4.467604905664512e-06, "loss": 0.3974, "step": 35516 }, { "epoch": 1.629893075122757, "grad_norm": 0.4466254711151123, "learning_rate": 4.467361112303304e-06, "loss": 0.3325, "step": 35517 }, { "epoch": 1.6299389656280114, "grad_norm": 0.45246148109436035, "learning_rate": 4.467117320222923e-06, "loss": 0.3376, "step": 35518 }, { "epoch": 1.629984856133266, "grad_norm": 0.4883652329444885, "learning_rate": 4.466873529423957e-06, "loss": 0.4325, "step": 35519 }, { "epoch": 1.6300307466385204, "grad_norm": 0.45610594749450684, "learning_rate": 4.466629739906993e-06, "loss": 0.3206, "step": 35520 }, { "epoch": 1.630076637143775, "grad_norm": 0.5404648184776306, "learning_rate": 4.4663859516726124e-06, "loss": 0.394, "step": 35521 }, { "epoch": 1.6301225276490294, "grad_norm": 0.46589526534080505, "learning_rate": 4.466142164721409e-06, "loss": 0.3738, "step": 35522 }, { "epoch": 1.630168418154284, "grad_norm": 0.47595810890197754, "learning_rate": 4.4658983790539645e-06, "loss": 0.3888, "step": 35523 }, { "epoch": 1.6302143086595384, "grad_norm": 0.4296239912509918, "learning_rate": 4.465654594670864e-06, "loss": 0.2968, "step": 35524 }, { "epoch": 1.630260199164793, "grad_norm": 0.45196154713630676, "learning_rate": 4.465410811572698e-06, "loss": 0.3306, "step": 35525 }, { "epoch": 1.6303060896700474, "grad_norm": 0.4577309191226959, "learning_rate": 4.46516702976005e-06, "loss": 0.3368, "step": 35526 }, { "epoch": 1.6303519801753017, "grad_norm": 0.47257304191589355, "learning_rate": 4.4649232492335034e-06, "loss": 0.3618, "step": 35527 }, { "epoch": 1.6303978706805562, "grad_norm": 0.4611373543739319, "learning_rate": 4.46467946999365e-06, "loss": 0.3647, "step": 35528 }, { "epoch": 1.6304437611858107, "grad_norm": 0.4811505973339081, "learning_rate": 4.464435692041075e-06, "loss": 0.3351, "step": 35529 }, { "epoch": 1.630489651691065, "grad_norm": 0.4493474066257477, "learning_rate": 4.4641919153763596e-06, "loss": 0.3341, "step": 35530 }, { "epoch": 1.6305355421963195, "grad_norm": 0.5177345275878906, "learning_rate": 4.463948140000096e-06, "loss": 0.4794, "step": 35531 }, { "epoch": 1.630581432701574, "grad_norm": 0.4485916197299957, "learning_rate": 4.4637043659128675e-06, "loss": 0.2953, "step": 35532 }, { "epoch": 1.6306273232068285, "grad_norm": 0.4676695764064789, "learning_rate": 4.463460593115261e-06, "loss": 0.3367, "step": 35533 }, { "epoch": 1.630673213712083, "grad_norm": 0.49476614594459534, "learning_rate": 4.463216821607862e-06, "loss": 0.3665, "step": 35534 }, { "epoch": 1.6307191042173375, "grad_norm": 0.46426495909690857, "learning_rate": 4.462973051391257e-06, "loss": 0.3586, "step": 35535 }, { "epoch": 1.630764994722592, "grad_norm": 0.46032726764678955, "learning_rate": 4.462729282466033e-06, "loss": 0.285, "step": 35536 }, { "epoch": 1.6308108852278465, "grad_norm": 0.45155343413352966, "learning_rate": 4.462485514832773e-06, "loss": 0.3387, "step": 35537 }, { "epoch": 1.630856775733101, "grad_norm": 0.4509875178337097, "learning_rate": 4.462241748492069e-06, "loss": 0.3292, "step": 35538 }, { "epoch": 1.6309026662383554, "grad_norm": 0.44780299067497253, "learning_rate": 4.461997983444502e-06, "loss": 0.326, "step": 35539 }, { "epoch": 1.6309485567436097, "grad_norm": 0.44531455636024475, "learning_rate": 4.461754219690661e-06, "loss": 0.3416, "step": 35540 }, { "epoch": 1.6309944472488642, "grad_norm": 0.49277830123901367, "learning_rate": 4.46151045723113e-06, "loss": 0.4095, "step": 35541 }, { "epoch": 1.6310403377541187, "grad_norm": 0.48743683099746704, "learning_rate": 4.461266696066498e-06, "loss": 0.4087, "step": 35542 }, { "epoch": 1.631086228259373, "grad_norm": 0.4748031497001648, "learning_rate": 4.461022936197347e-06, "loss": 0.3238, "step": 35543 }, { "epoch": 1.6311321187646275, "grad_norm": 0.48686230182647705, "learning_rate": 4.460779177624268e-06, "loss": 0.3568, "step": 35544 }, { "epoch": 1.631178009269882, "grad_norm": 0.482170432806015, "learning_rate": 4.460535420347845e-06, "loss": 0.3542, "step": 35545 }, { "epoch": 1.6312238997751365, "grad_norm": 0.5064695477485657, "learning_rate": 4.460291664368663e-06, "loss": 0.4667, "step": 35546 }, { "epoch": 1.631269790280391, "grad_norm": 0.4757748246192932, "learning_rate": 4.46004790968731e-06, "loss": 0.3633, "step": 35547 }, { "epoch": 1.6313156807856455, "grad_norm": 0.4961652457714081, "learning_rate": 4.4598041563043714e-06, "loss": 0.4134, "step": 35548 }, { "epoch": 1.6313615712909, "grad_norm": 0.47700512409210205, "learning_rate": 4.459560404220431e-06, "loss": 0.3392, "step": 35549 }, { "epoch": 1.6314074617961545, "grad_norm": 0.44466179609298706, "learning_rate": 4.45931665343608e-06, "loss": 0.2977, "step": 35550 }, { "epoch": 1.631453352301409, "grad_norm": 0.5151621699333191, "learning_rate": 4.459072903951901e-06, "loss": 0.3584, "step": 35551 }, { "epoch": 1.6314992428066633, "grad_norm": 0.4676557779312134, "learning_rate": 4.458829155768481e-06, "loss": 0.333, "step": 35552 }, { "epoch": 1.6315451333119178, "grad_norm": 0.4812327027320862, "learning_rate": 4.458585408886406e-06, "loss": 0.4154, "step": 35553 }, { "epoch": 1.6315910238171722, "grad_norm": 0.508109450340271, "learning_rate": 4.458341663306263e-06, "loss": 0.4355, "step": 35554 }, { "epoch": 1.6316369143224265, "grad_norm": 0.4853738844394684, "learning_rate": 4.458097919028636e-06, "loss": 0.369, "step": 35555 }, { "epoch": 1.631682804827681, "grad_norm": 0.46401435136795044, "learning_rate": 4.4578541760541135e-06, "loss": 0.3513, "step": 35556 }, { "epoch": 1.6317286953329355, "grad_norm": 0.4852491617202759, "learning_rate": 4.4576104343832806e-06, "loss": 0.4132, "step": 35557 }, { "epoch": 1.63177458583819, "grad_norm": 0.5148482918739319, "learning_rate": 4.457366694016722e-06, "loss": 0.4633, "step": 35558 }, { "epoch": 1.6318204763434445, "grad_norm": 0.4928186535835266, "learning_rate": 4.457122954955026e-06, "loss": 0.3693, "step": 35559 }, { "epoch": 1.631866366848699, "grad_norm": 0.49567824602127075, "learning_rate": 4.456879217198778e-06, "loss": 0.3873, "step": 35560 }, { "epoch": 1.6319122573539535, "grad_norm": 0.5124966502189636, "learning_rate": 4.456635480748564e-06, "loss": 0.3893, "step": 35561 }, { "epoch": 1.631958147859208, "grad_norm": 0.5041531324386597, "learning_rate": 4.456391745604971e-06, "loss": 0.3597, "step": 35562 }, { "epoch": 1.6320040383644625, "grad_norm": 0.5168278217315674, "learning_rate": 4.456148011768584e-06, "loss": 0.4082, "step": 35563 }, { "epoch": 1.632049928869717, "grad_norm": 0.44661450386047363, "learning_rate": 4.455904279239987e-06, "loss": 0.3471, "step": 35564 }, { "epoch": 1.6320958193749713, "grad_norm": 0.48150816559791565, "learning_rate": 4.4556605480197714e-06, "loss": 0.3888, "step": 35565 }, { "epoch": 1.6321417098802258, "grad_norm": 0.4782267212867737, "learning_rate": 4.45541681810852e-06, "loss": 0.391, "step": 35566 }, { "epoch": 1.6321876003854803, "grad_norm": 0.48155367374420166, "learning_rate": 4.45517308950682e-06, "loss": 0.3625, "step": 35567 }, { "epoch": 1.6322334908907346, "grad_norm": 0.4995434284210205, "learning_rate": 4.454929362215255e-06, "loss": 0.385, "step": 35568 }, { "epoch": 1.632279381395989, "grad_norm": 0.4260987937450409, "learning_rate": 4.454685636234414e-06, "loss": 0.2639, "step": 35569 }, { "epoch": 1.6323252719012435, "grad_norm": 0.4724951982498169, "learning_rate": 4.454441911564881e-06, "loss": 0.3524, "step": 35570 }, { "epoch": 1.632371162406498, "grad_norm": 0.5226155519485474, "learning_rate": 4.454198188207242e-06, "loss": 0.4696, "step": 35571 }, { "epoch": 1.6324170529117525, "grad_norm": 0.4589334726333618, "learning_rate": 4.4539544661620855e-06, "loss": 0.3065, "step": 35572 }, { "epoch": 1.632462943417007, "grad_norm": 0.46255263686180115, "learning_rate": 4.453710745429997e-06, "loss": 0.3716, "step": 35573 }, { "epoch": 1.6325088339222615, "grad_norm": 0.4969880282878876, "learning_rate": 4.4534670260115606e-06, "loss": 0.4059, "step": 35574 }, { "epoch": 1.632554724427516, "grad_norm": 0.439707487821579, "learning_rate": 4.4532233079073645e-06, "loss": 0.3086, "step": 35575 }, { "epoch": 1.6326006149327705, "grad_norm": 0.4809582829475403, "learning_rate": 4.452979591117993e-06, "loss": 0.3789, "step": 35576 }, { "epoch": 1.632646505438025, "grad_norm": 0.44056758284568787, "learning_rate": 4.4527358756440326e-06, "loss": 0.3115, "step": 35577 }, { "epoch": 1.6326923959432793, "grad_norm": 0.5248442888259888, "learning_rate": 4.452492161486071e-06, "loss": 0.3129, "step": 35578 }, { "epoch": 1.6327382864485338, "grad_norm": 0.4645281136035919, "learning_rate": 4.452248448644693e-06, "loss": 0.3136, "step": 35579 }, { "epoch": 1.6327841769537883, "grad_norm": 0.46180009841918945, "learning_rate": 4.452004737120481e-06, "loss": 0.3209, "step": 35580 }, { "epoch": 1.6328300674590426, "grad_norm": 0.47050777077674866, "learning_rate": 4.451761026914028e-06, "loss": 0.3689, "step": 35581 }, { "epoch": 1.632875957964297, "grad_norm": 0.47126999497413635, "learning_rate": 4.451517318025917e-06, "loss": 0.3514, "step": 35582 }, { "epoch": 1.6329218484695516, "grad_norm": 0.45324668288230896, "learning_rate": 4.451273610456732e-06, "loss": 0.3456, "step": 35583 }, { "epoch": 1.632967738974806, "grad_norm": 0.5103207230567932, "learning_rate": 4.451029904207062e-06, "loss": 0.348, "step": 35584 }, { "epoch": 1.6330136294800606, "grad_norm": 0.45469173789024353, "learning_rate": 4.450786199277492e-06, "loss": 0.3173, "step": 35585 }, { "epoch": 1.633059519985315, "grad_norm": 0.46323975920677185, "learning_rate": 4.450542495668605e-06, "loss": 0.3609, "step": 35586 }, { "epoch": 1.6331054104905696, "grad_norm": 0.45869237184524536, "learning_rate": 4.4502987933809935e-06, "loss": 0.3383, "step": 35587 }, { "epoch": 1.633151300995824, "grad_norm": 0.47390031814575195, "learning_rate": 4.4500550924152385e-06, "loss": 0.3581, "step": 35588 }, { "epoch": 1.6331971915010786, "grad_norm": 0.4739677906036377, "learning_rate": 4.449811392771927e-06, "loss": 0.3352, "step": 35589 }, { "epoch": 1.6332430820063328, "grad_norm": 0.49291273951530457, "learning_rate": 4.449567694451646e-06, "loss": 0.3978, "step": 35590 }, { "epoch": 1.6332889725115873, "grad_norm": 0.4669926166534424, "learning_rate": 4.449323997454982e-06, "loss": 0.3408, "step": 35591 }, { "epoch": 1.6333348630168418, "grad_norm": 0.4422406852245331, "learning_rate": 4.449080301782517e-06, "loss": 0.2799, "step": 35592 }, { "epoch": 1.6333807535220963, "grad_norm": 0.45932871103286743, "learning_rate": 4.448836607434842e-06, "loss": 0.3624, "step": 35593 }, { "epoch": 1.6334266440273506, "grad_norm": 0.46716800332069397, "learning_rate": 4.448592914412542e-06, "loss": 0.3712, "step": 35594 }, { "epoch": 1.633472534532605, "grad_norm": 0.48791664838790894, "learning_rate": 4.4483492227162e-06, "loss": 0.365, "step": 35595 }, { "epoch": 1.6335184250378596, "grad_norm": 0.4919716417789459, "learning_rate": 4.448105532346406e-06, "loss": 0.4296, "step": 35596 }, { "epoch": 1.633564315543114, "grad_norm": 0.4869931936264038, "learning_rate": 4.447861843303743e-06, "loss": 0.3734, "step": 35597 }, { "epoch": 1.6336102060483686, "grad_norm": 0.5010695457458496, "learning_rate": 4.447618155588798e-06, "loss": 0.4287, "step": 35598 }, { "epoch": 1.633656096553623, "grad_norm": 0.42879530787467957, "learning_rate": 4.447374469202157e-06, "loss": 0.2804, "step": 35599 }, { "epoch": 1.6337019870588776, "grad_norm": 0.5159753561019897, "learning_rate": 4.4471307841444064e-06, "loss": 0.3996, "step": 35600 }, { "epoch": 1.633747877564132, "grad_norm": 0.5035858154296875, "learning_rate": 4.446887100416132e-06, "loss": 0.4228, "step": 35601 }, { "epoch": 1.6337937680693866, "grad_norm": 0.4661635458469391, "learning_rate": 4.446643418017917e-06, "loss": 0.3696, "step": 35602 }, { "epoch": 1.6338396585746409, "grad_norm": 0.45550912618637085, "learning_rate": 4.446399736950352e-06, "loss": 0.3488, "step": 35603 }, { "epoch": 1.6338855490798954, "grad_norm": 0.4925895929336548, "learning_rate": 4.446156057214021e-06, "loss": 0.3892, "step": 35604 }, { "epoch": 1.6339314395851499, "grad_norm": 0.4821862578392029, "learning_rate": 4.445912378809509e-06, "loss": 0.4019, "step": 35605 }, { "epoch": 1.6339773300904041, "grad_norm": 0.5001386404037476, "learning_rate": 4.445668701737404e-06, "loss": 0.3891, "step": 35606 }, { "epoch": 1.6340232205956586, "grad_norm": 0.47938376665115356, "learning_rate": 4.445425025998291e-06, "loss": 0.3918, "step": 35607 }, { "epoch": 1.6340691111009131, "grad_norm": 0.5095259547233582, "learning_rate": 4.445181351592754e-06, "loss": 0.3743, "step": 35608 }, { "epoch": 1.6341150016061676, "grad_norm": 0.5015885829925537, "learning_rate": 4.444937678521382e-06, "loss": 0.3679, "step": 35609 }, { "epoch": 1.6341608921114221, "grad_norm": 0.48548418283462524, "learning_rate": 4.4446940067847596e-06, "loss": 0.4005, "step": 35610 }, { "epoch": 1.6342067826166766, "grad_norm": 0.4505102038383484, "learning_rate": 4.444450336383472e-06, "loss": 0.3495, "step": 35611 }, { "epoch": 1.6342526731219311, "grad_norm": 0.5070098638534546, "learning_rate": 4.444206667318107e-06, "loss": 0.4246, "step": 35612 }, { "epoch": 1.6342985636271856, "grad_norm": 0.480770081281662, "learning_rate": 4.443962999589251e-06, "loss": 0.3877, "step": 35613 }, { "epoch": 1.6343444541324401, "grad_norm": 0.45760998129844666, "learning_rate": 4.443719333197484e-06, "loss": 0.3288, "step": 35614 }, { "epoch": 1.6343903446376946, "grad_norm": 0.49743637442588806, "learning_rate": 4.4434756681434e-06, "loss": 0.4411, "step": 35615 }, { "epoch": 1.634436235142949, "grad_norm": 0.48857566714286804, "learning_rate": 4.443232004427581e-06, "loss": 0.4169, "step": 35616 }, { "epoch": 1.6344821256482034, "grad_norm": 0.4636232256889343, "learning_rate": 4.442988342050613e-06, "loss": 0.3434, "step": 35617 }, { "epoch": 1.6345280161534579, "grad_norm": 0.41896358132362366, "learning_rate": 4.442744681013082e-06, "loss": 0.2875, "step": 35618 }, { "epoch": 1.6345739066587122, "grad_norm": 0.48809313774108887, "learning_rate": 4.442501021315575e-06, "loss": 0.3755, "step": 35619 }, { "epoch": 1.6346197971639667, "grad_norm": 0.4602876305580139, "learning_rate": 4.442257362958674e-06, "loss": 0.3694, "step": 35620 }, { "epoch": 1.6346656876692212, "grad_norm": 0.472431480884552, "learning_rate": 4.442013705942972e-06, "loss": 0.3619, "step": 35621 }, { "epoch": 1.6347115781744757, "grad_norm": 0.4966350197792053, "learning_rate": 4.441770050269051e-06, "loss": 0.409, "step": 35622 }, { "epoch": 1.6347574686797302, "grad_norm": 0.4724692702293396, "learning_rate": 4.441526395937493e-06, "loss": 0.4084, "step": 35623 }, { "epoch": 1.6348033591849847, "grad_norm": 0.4232155978679657, "learning_rate": 4.44128274294889e-06, "loss": 0.3098, "step": 35624 }, { "epoch": 1.6348492496902391, "grad_norm": 0.5240747928619385, "learning_rate": 4.4410390913038264e-06, "loss": 0.4283, "step": 35625 }, { "epoch": 1.6348951401954936, "grad_norm": 0.444128155708313, "learning_rate": 4.4407954410028865e-06, "loss": 0.287, "step": 35626 }, { "epoch": 1.6349410307007481, "grad_norm": 0.48619914054870605, "learning_rate": 4.440551792046657e-06, "loss": 0.3735, "step": 35627 }, { "epoch": 1.6349869212060026, "grad_norm": 0.5236396789550781, "learning_rate": 4.440308144435725e-06, "loss": 0.446, "step": 35628 }, { "epoch": 1.635032811711257, "grad_norm": 0.4722157120704651, "learning_rate": 4.440064498170673e-06, "loss": 0.3797, "step": 35629 }, { "epoch": 1.6350787022165114, "grad_norm": 0.43018248677253723, "learning_rate": 4.43982085325209e-06, "loss": 0.2873, "step": 35630 }, { "epoch": 1.635124592721766, "grad_norm": 0.5092353224754333, "learning_rate": 4.439577209680563e-06, "loss": 0.4322, "step": 35631 }, { "epoch": 1.6351704832270202, "grad_norm": 0.48995721340179443, "learning_rate": 4.439333567456675e-06, "loss": 0.4257, "step": 35632 }, { "epoch": 1.6352163737322747, "grad_norm": 0.46206921339035034, "learning_rate": 4.439089926581011e-06, "loss": 0.3162, "step": 35633 }, { "epoch": 1.6352622642375292, "grad_norm": 0.5321056246757507, "learning_rate": 4.438846287054161e-06, "loss": 0.4445, "step": 35634 }, { "epoch": 1.6353081547427837, "grad_norm": 0.4654044806957245, "learning_rate": 4.4386026488767075e-06, "loss": 0.3604, "step": 35635 }, { "epoch": 1.6353540452480382, "grad_norm": 0.4397132396697998, "learning_rate": 4.438359012049236e-06, "loss": 0.326, "step": 35636 }, { "epoch": 1.6353999357532927, "grad_norm": 0.45030948519706726, "learning_rate": 4.4381153765723366e-06, "loss": 0.364, "step": 35637 }, { "epoch": 1.6354458262585472, "grad_norm": 0.48009103536605835, "learning_rate": 4.437871742446591e-06, "loss": 0.3527, "step": 35638 }, { "epoch": 1.6354917167638017, "grad_norm": 0.4942985475063324, "learning_rate": 4.4376281096725855e-06, "loss": 0.3784, "step": 35639 }, { "epoch": 1.6355376072690562, "grad_norm": 0.4986535906791687, "learning_rate": 4.437384478250909e-06, "loss": 0.4117, "step": 35640 }, { "epoch": 1.6355834977743104, "grad_norm": 0.43956705927848816, "learning_rate": 4.4371408481821445e-06, "loss": 0.2991, "step": 35641 }, { "epoch": 1.635629388279565, "grad_norm": 0.472791850566864, "learning_rate": 4.436897219466877e-06, "loss": 0.3327, "step": 35642 }, { "epoch": 1.6356752787848194, "grad_norm": 0.47992491722106934, "learning_rate": 4.436653592105697e-06, "loss": 0.4125, "step": 35643 }, { "epoch": 1.6357211692900737, "grad_norm": 0.47765377163887024, "learning_rate": 4.436409966099185e-06, "loss": 0.3617, "step": 35644 }, { "epoch": 1.6357670597953282, "grad_norm": 0.5129196047782898, "learning_rate": 4.4361663414479286e-06, "loss": 0.4269, "step": 35645 }, { "epoch": 1.6358129503005827, "grad_norm": 0.5107359886169434, "learning_rate": 4.435922718152516e-06, "loss": 0.4252, "step": 35646 }, { "epoch": 1.6358588408058372, "grad_norm": 0.4762175381183624, "learning_rate": 4.435679096213531e-06, "loss": 0.3418, "step": 35647 }, { "epoch": 1.6359047313110917, "grad_norm": 0.4651995897293091, "learning_rate": 4.435435475631559e-06, "loss": 0.3623, "step": 35648 }, { "epoch": 1.6359506218163462, "grad_norm": 0.4553166329860687, "learning_rate": 4.435191856407187e-06, "loss": 0.3648, "step": 35649 }, { "epoch": 1.6359965123216007, "grad_norm": 0.43616724014282227, "learning_rate": 4.434948238541001e-06, "loss": 0.3024, "step": 35650 }, { "epoch": 1.6360424028268552, "grad_norm": 0.48796477913856506, "learning_rate": 4.434704622033583e-06, "loss": 0.4176, "step": 35651 }, { "epoch": 1.6360882933321097, "grad_norm": 0.48956993222236633, "learning_rate": 4.4344610068855245e-06, "loss": 0.3833, "step": 35652 }, { "epoch": 1.6361341838373642, "grad_norm": 0.47219109535217285, "learning_rate": 4.434217393097409e-06, "loss": 0.3959, "step": 35653 }, { "epoch": 1.6361800743426185, "grad_norm": 0.47809115052223206, "learning_rate": 4.433973780669821e-06, "loss": 0.3956, "step": 35654 }, { "epoch": 1.636225964847873, "grad_norm": 0.46596962213516235, "learning_rate": 4.433730169603349e-06, "loss": 0.3642, "step": 35655 }, { "epoch": 1.6362718553531275, "grad_norm": 0.4518429636955261, "learning_rate": 4.433486559898577e-06, "loss": 0.3208, "step": 35656 }, { "epoch": 1.6363177458583817, "grad_norm": 0.5009412169456482, "learning_rate": 4.433242951556088e-06, "loss": 0.4411, "step": 35657 }, { "epoch": 1.6363636363636362, "grad_norm": 0.43304285407066345, "learning_rate": 4.4329993445764744e-06, "loss": 0.2858, "step": 35658 }, { "epoch": 1.6364095268688907, "grad_norm": 0.46118709444999695, "learning_rate": 4.432755738960317e-06, "loss": 0.3386, "step": 35659 }, { "epoch": 1.6364554173741452, "grad_norm": 0.49600446224212646, "learning_rate": 4.432512134708203e-06, "loss": 0.3669, "step": 35660 }, { "epoch": 1.6365013078793997, "grad_norm": 0.4674980640411377, "learning_rate": 4.432268531820718e-06, "loss": 0.3494, "step": 35661 }, { "epoch": 1.6365471983846542, "grad_norm": 0.5115658640861511, "learning_rate": 4.43202493029845e-06, "loss": 0.4434, "step": 35662 }, { "epoch": 1.6365930888899087, "grad_norm": 0.4620290994644165, "learning_rate": 4.431781330141981e-06, "loss": 0.3318, "step": 35663 }, { "epoch": 1.6366389793951632, "grad_norm": 0.46119529008865356, "learning_rate": 4.431537731351897e-06, "loss": 0.3202, "step": 35664 }, { "epoch": 1.6366848699004177, "grad_norm": 0.48112353682518005, "learning_rate": 4.4312941339287894e-06, "loss": 0.3252, "step": 35665 }, { "epoch": 1.6367307604056722, "grad_norm": 0.49085715413093567, "learning_rate": 4.431050537873238e-06, "loss": 0.4159, "step": 35666 }, { "epoch": 1.6367766509109265, "grad_norm": 0.4530125558376312, "learning_rate": 4.430806943185828e-06, "loss": 0.3263, "step": 35667 }, { "epoch": 1.636822541416181, "grad_norm": 0.5141487717628479, "learning_rate": 4.43056334986715e-06, "loss": 0.3582, "step": 35668 }, { "epoch": 1.6368684319214355, "grad_norm": 0.4410403072834015, "learning_rate": 4.430319757917788e-06, "loss": 0.2958, "step": 35669 }, { "epoch": 1.6369143224266898, "grad_norm": 0.4575383961200714, "learning_rate": 4.430076167338326e-06, "loss": 0.3047, "step": 35670 }, { "epoch": 1.6369602129319443, "grad_norm": 0.5113713145256042, "learning_rate": 4.429832578129352e-06, "loss": 0.4092, "step": 35671 }, { "epoch": 1.6370061034371988, "grad_norm": 0.445078581571579, "learning_rate": 4.42958899029145e-06, "loss": 0.2902, "step": 35672 }, { "epoch": 1.6370519939424533, "grad_norm": 0.4611130356788635, "learning_rate": 4.429345403825205e-06, "loss": 0.2989, "step": 35673 }, { "epoch": 1.6370978844477078, "grad_norm": 0.4543219208717346, "learning_rate": 4.4291018187312055e-06, "loss": 0.3287, "step": 35674 }, { "epoch": 1.6371437749529623, "grad_norm": 0.46497899293899536, "learning_rate": 4.428858235010037e-06, "loss": 0.2495, "step": 35675 }, { "epoch": 1.6371896654582168, "grad_norm": 0.46645691990852356, "learning_rate": 4.428614652662283e-06, "loss": 0.33, "step": 35676 }, { "epoch": 1.6372355559634713, "grad_norm": 0.5070371031761169, "learning_rate": 4.428371071688531e-06, "loss": 0.3813, "step": 35677 }, { "epoch": 1.6372814464687258, "grad_norm": 0.4763984978199005, "learning_rate": 4.4281274920893656e-06, "loss": 0.3436, "step": 35678 }, { "epoch": 1.63732733697398, "grad_norm": 0.4638059437274933, "learning_rate": 4.427883913865372e-06, "loss": 0.3455, "step": 35679 }, { "epoch": 1.6373732274792345, "grad_norm": 0.45774126052856445, "learning_rate": 4.427640337017139e-06, "loss": 0.3073, "step": 35680 }, { "epoch": 1.637419117984489, "grad_norm": 0.4965127408504486, "learning_rate": 4.42739676154525e-06, "loss": 0.3966, "step": 35681 }, { "epoch": 1.6374650084897435, "grad_norm": 0.46903929114341736, "learning_rate": 4.427153187450291e-06, "loss": 0.3807, "step": 35682 }, { "epoch": 1.6375108989949978, "grad_norm": 0.5189263224601746, "learning_rate": 4.426909614732847e-06, "loss": 0.4119, "step": 35683 }, { "epoch": 1.6375567895002523, "grad_norm": 0.5395647287368774, "learning_rate": 4.426666043393506e-06, "loss": 0.4608, "step": 35684 }, { "epoch": 1.6376026800055068, "grad_norm": 0.4999631643295288, "learning_rate": 4.42642247343285e-06, "loss": 0.3856, "step": 35685 }, { "epoch": 1.6376485705107613, "grad_norm": 0.5340934991836548, "learning_rate": 4.426178904851468e-06, "loss": 0.5455, "step": 35686 }, { "epoch": 1.6376944610160158, "grad_norm": 0.4663400650024414, "learning_rate": 4.425935337649947e-06, "loss": 0.345, "step": 35687 }, { "epoch": 1.6377403515212703, "grad_norm": 0.49454426765441895, "learning_rate": 4.425691771828866e-06, "loss": 0.3882, "step": 35688 }, { "epoch": 1.6377862420265248, "grad_norm": 0.40748757123947144, "learning_rate": 4.425448207388819e-06, "loss": 0.2545, "step": 35689 }, { "epoch": 1.6378321325317793, "grad_norm": 0.4889836013317108, "learning_rate": 4.425204644330385e-06, "loss": 0.3993, "step": 35690 }, { "epoch": 1.6378780230370338, "grad_norm": 0.46511900424957275, "learning_rate": 4.4249610826541534e-06, "loss": 0.3789, "step": 35691 }, { "epoch": 1.637923913542288, "grad_norm": 0.46183717250823975, "learning_rate": 4.424717522360709e-06, "loss": 0.3374, "step": 35692 }, { "epoch": 1.6379698040475426, "grad_norm": 0.44178566336631775, "learning_rate": 4.424473963450638e-06, "loss": 0.3271, "step": 35693 }, { "epoch": 1.638015694552797, "grad_norm": 0.48861458897590637, "learning_rate": 4.424230405924523e-06, "loss": 0.4527, "step": 35694 }, { "epoch": 1.6380615850580513, "grad_norm": 0.45539265871047974, "learning_rate": 4.4239868497829544e-06, "loss": 0.3409, "step": 35695 }, { "epoch": 1.6381074755633058, "grad_norm": 0.45385536551475525, "learning_rate": 4.423743295026516e-06, "loss": 0.3377, "step": 35696 }, { "epoch": 1.6381533660685603, "grad_norm": 0.47125518321990967, "learning_rate": 4.423499741655792e-06, "loss": 0.3548, "step": 35697 }, { "epoch": 1.6381992565738148, "grad_norm": 0.40974971652030945, "learning_rate": 4.423256189671369e-06, "loss": 0.252, "step": 35698 }, { "epoch": 1.6382451470790693, "grad_norm": 0.4433223307132721, "learning_rate": 4.423012639073834e-06, "loss": 0.3281, "step": 35699 }, { "epoch": 1.6382910375843238, "grad_norm": 0.46210217475891113, "learning_rate": 4.4227690898637706e-06, "loss": 0.3633, "step": 35700 }, { "epoch": 1.6383369280895783, "grad_norm": 0.4420068860054016, "learning_rate": 4.422525542041765e-06, "loss": 0.3456, "step": 35701 }, { "epoch": 1.6383828185948328, "grad_norm": 0.4367007315158844, "learning_rate": 4.4222819956084034e-06, "loss": 0.3227, "step": 35702 }, { "epoch": 1.6384287091000873, "grad_norm": 0.4843018054962158, "learning_rate": 4.422038450564272e-06, "loss": 0.3768, "step": 35703 }, { "epoch": 1.6384745996053418, "grad_norm": 0.47956719994544983, "learning_rate": 4.421794906909955e-06, "loss": 0.3962, "step": 35704 }, { "epoch": 1.638520490110596, "grad_norm": 0.45831501483917236, "learning_rate": 4.421551364646039e-06, "loss": 0.2951, "step": 35705 }, { "epoch": 1.6385663806158506, "grad_norm": 0.49406731128692627, "learning_rate": 4.42130782377311e-06, "loss": 0.4475, "step": 35706 }, { "epoch": 1.638612271121105, "grad_norm": 0.44962841272354126, "learning_rate": 4.4210642842917515e-06, "loss": 0.3341, "step": 35707 }, { "epoch": 1.6386581616263594, "grad_norm": 0.4390642046928406, "learning_rate": 4.4208207462025524e-06, "loss": 0.3234, "step": 35708 }, { "epoch": 1.6387040521316139, "grad_norm": 0.48307743668556213, "learning_rate": 4.420577209506097e-06, "loss": 0.4099, "step": 35709 }, { "epoch": 1.6387499426368684, "grad_norm": 0.48022863268852234, "learning_rate": 4.4203336742029675e-06, "loss": 0.3806, "step": 35710 }, { "epoch": 1.6387958331421228, "grad_norm": 0.46951502561569214, "learning_rate": 4.420090140293756e-06, "loss": 0.3416, "step": 35711 }, { "epoch": 1.6388417236473773, "grad_norm": 0.4614250659942627, "learning_rate": 4.419846607779043e-06, "loss": 0.3378, "step": 35712 }, { "epoch": 1.6388876141526318, "grad_norm": 0.4302309453487396, "learning_rate": 4.419603076659416e-06, "loss": 0.3015, "step": 35713 }, { "epoch": 1.6389335046578863, "grad_norm": 0.4283803403377533, "learning_rate": 4.419359546935461e-06, "loss": 0.2891, "step": 35714 }, { "epoch": 1.6389793951631408, "grad_norm": 0.4737962484359741, "learning_rate": 4.419116018607762e-06, "loss": 0.4054, "step": 35715 }, { "epoch": 1.6390252856683953, "grad_norm": 0.4839833974838257, "learning_rate": 4.418872491676905e-06, "loss": 0.392, "step": 35716 }, { "epoch": 1.6390711761736498, "grad_norm": 0.47726672887802124, "learning_rate": 4.418628966143478e-06, "loss": 0.3963, "step": 35717 }, { "epoch": 1.6391170666789041, "grad_norm": 0.45434364676475525, "learning_rate": 4.418385442008065e-06, "loss": 0.321, "step": 35718 }, { "epoch": 1.6391629571841586, "grad_norm": 0.5004035830497742, "learning_rate": 4.418141919271249e-06, "loss": 0.4608, "step": 35719 }, { "epoch": 1.639208847689413, "grad_norm": 0.47498834133148193, "learning_rate": 4.4178983979336206e-06, "loss": 0.3719, "step": 35720 }, { "epoch": 1.6392547381946674, "grad_norm": 0.5097767114639282, "learning_rate": 4.417654877995762e-06, "loss": 0.4361, "step": 35721 }, { "epoch": 1.6393006286999219, "grad_norm": 0.4756438732147217, "learning_rate": 4.417411359458259e-06, "loss": 0.3419, "step": 35722 }, { "epoch": 1.6393465192051764, "grad_norm": 0.5038793683052063, "learning_rate": 4.417167842321699e-06, "loss": 0.3645, "step": 35723 }, { "epoch": 1.6393924097104309, "grad_norm": 0.48670700192451477, "learning_rate": 4.4169243265866656e-06, "loss": 0.3894, "step": 35724 }, { "epoch": 1.6394383002156854, "grad_norm": 0.4713616669178009, "learning_rate": 4.416680812253745e-06, "loss": 0.3841, "step": 35725 }, { "epoch": 1.6394841907209399, "grad_norm": 0.496724009513855, "learning_rate": 4.416437299323523e-06, "loss": 0.405, "step": 35726 }, { "epoch": 1.6395300812261944, "grad_norm": 0.48274707794189453, "learning_rate": 4.416193787796587e-06, "loss": 0.4238, "step": 35727 }, { "epoch": 1.6395759717314489, "grad_norm": 0.472925066947937, "learning_rate": 4.415950277673519e-06, "loss": 0.332, "step": 35728 }, { "epoch": 1.6396218622367034, "grad_norm": 0.5082917809486389, "learning_rate": 4.415706768954904e-06, "loss": 0.4158, "step": 35729 }, { "epoch": 1.6396677527419576, "grad_norm": 0.46859049797058105, "learning_rate": 4.4154632616413335e-06, "loss": 0.3702, "step": 35730 }, { "epoch": 1.6397136432472121, "grad_norm": 0.45591527223587036, "learning_rate": 4.415219755733389e-06, "loss": 0.3626, "step": 35731 }, { "epoch": 1.6397595337524666, "grad_norm": 0.4450148642063141, "learning_rate": 4.414976251231654e-06, "loss": 0.2943, "step": 35732 }, { "epoch": 1.639805424257721, "grad_norm": 0.4886983335018158, "learning_rate": 4.414732748136719e-06, "loss": 0.356, "step": 35733 }, { "epoch": 1.6398513147629754, "grad_norm": 0.4273974299430847, "learning_rate": 4.414489246449166e-06, "loss": 0.2776, "step": 35734 }, { "epoch": 1.63989720526823, "grad_norm": 0.45374929904937744, "learning_rate": 4.414245746169581e-06, "loss": 0.3289, "step": 35735 }, { "epoch": 1.6399430957734844, "grad_norm": 0.43119311332702637, "learning_rate": 4.414002247298551e-06, "loss": 0.2809, "step": 35736 }, { "epoch": 1.639988986278739, "grad_norm": 0.47092658281326294, "learning_rate": 4.41375874983666e-06, "loss": 0.3749, "step": 35737 }, { "epoch": 1.6400348767839934, "grad_norm": 0.47063392400741577, "learning_rate": 4.4135152537844935e-06, "loss": 0.3305, "step": 35738 }, { "epoch": 1.640080767289248, "grad_norm": 0.4680700898170471, "learning_rate": 4.413271759142639e-06, "loss": 0.3773, "step": 35739 }, { "epoch": 1.6401266577945024, "grad_norm": 0.5166497230529785, "learning_rate": 4.413028265911681e-06, "loss": 0.4445, "step": 35740 }, { "epoch": 1.640172548299757, "grad_norm": 0.4919517934322357, "learning_rate": 4.412784774092203e-06, "loss": 0.3635, "step": 35741 }, { "epoch": 1.6402184388050114, "grad_norm": 0.46016979217529297, "learning_rate": 4.412541283684794e-06, "loss": 0.3072, "step": 35742 }, { "epoch": 1.6402643293102657, "grad_norm": 0.46814101934432983, "learning_rate": 4.412297794690038e-06, "loss": 0.3468, "step": 35743 }, { "epoch": 1.6403102198155202, "grad_norm": 0.5016466975212097, "learning_rate": 4.4120543071085185e-06, "loss": 0.3945, "step": 35744 }, { "epoch": 1.6403561103207747, "grad_norm": 0.46005645394325256, "learning_rate": 4.411810820940825e-06, "loss": 0.3127, "step": 35745 }, { "epoch": 1.640402000826029, "grad_norm": 0.45622026920318604, "learning_rate": 4.411567336187539e-06, "loss": 0.3247, "step": 35746 }, { "epoch": 1.6404478913312834, "grad_norm": 0.5015740394592285, "learning_rate": 4.411323852849249e-06, "loss": 0.374, "step": 35747 }, { "epoch": 1.640493781836538, "grad_norm": 0.4581682085990906, "learning_rate": 4.411080370926539e-06, "loss": 0.32, "step": 35748 }, { "epoch": 1.6405396723417924, "grad_norm": 0.4913271963596344, "learning_rate": 4.410836890419995e-06, "loss": 0.4027, "step": 35749 }, { "epoch": 1.640585562847047, "grad_norm": 0.4913686513900757, "learning_rate": 4.410593411330201e-06, "loss": 0.3897, "step": 35750 }, { "epoch": 1.6406314533523014, "grad_norm": 0.4772675037384033, "learning_rate": 4.4103499336577445e-06, "loss": 0.4017, "step": 35751 }, { "epoch": 1.640677343857556, "grad_norm": 0.45257773995399475, "learning_rate": 4.410106457403212e-06, "loss": 0.3262, "step": 35752 }, { "epoch": 1.6407232343628104, "grad_norm": 0.46693772077560425, "learning_rate": 4.409862982567185e-06, "loss": 0.3501, "step": 35753 }, { "epoch": 1.640769124868065, "grad_norm": 0.4553653299808502, "learning_rate": 4.409619509150253e-06, "loss": 0.3772, "step": 35754 }, { "epoch": 1.6408150153733194, "grad_norm": 0.48222243785858154, "learning_rate": 4.4093760371529994e-06, "loss": 0.3874, "step": 35755 }, { "epoch": 1.6408609058785737, "grad_norm": 0.5167502164840698, "learning_rate": 4.409132566576008e-06, "loss": 0.4269, "step": 35756 }, { "epoch": 1.6409067963838282, "grad_norm": 0.48042425513267517, "learning_rate": 4.40888909741987e-06, "loss": 0.3749, "step": 35757 }, { "epoch": 1.6409526868890827, "grad_norm": 0.49616631865501404, "learning_rate": 4.408645629685166e-06, "loss": 0.3191, "step": 35758 }, { "epoch": 1.640998577394337, "grad_norm": 0.47820743918418884, "learning_rate": 4.40840216337248e-06, "loss": 0.3984, "step": 35759 }, { "epoch": 1.6410444678995915, "grad_norm": 0.4152669310569763, "learning_rate": 4.408158698482402e-06, "loss": 0.294, "step": 35760 }, { "epoch": 1.641090358404846, "grad_norm": 0.41199105978012085, "learning_rate": 4.407915235015516e-06, "loss": 0.2794, "step": 35761 }, { "epoch": 1.6411362489101005, "grad_norm": 0.4818490743637085, "learning_rate": 4.407671772972406e-06, "loss": 0.4046, "step": 35762 }, { "epoch": 1.641182139415355, "grad_norm": 0.47653257846832275, "learning_rate": 4.407428312353659e-06, "loss": 0.3434, "step": 35763 }, { "epoch": 1.6412280299206095, "grad_norm": 0.4745029807090759, "learning_rate": 4.4071848531598605e-06, "loss": 0.3408, "step": 35764 }, { "epoch": 1.641273920425864, "grad_norm": 0.4521040916442871, "learning_rate": 4.406941395391595e-06, "loss": 0.3199, "step": 35765 }, { "epoch": 1.6413198109311185, "grad_norm": 0.5148151516914368, "learning_rate": 4.4066979390494455e-06, "loss": 0.4018, "step": 35766 }, { "epoch": 1.641365701436373, "grad_norm": 0.5304829478263855, "learning_rate": 4.406454484134004e-06, "loss": 0.4389, "step": 35767 }, { "epoch": 1.6414115919416272, "grad_norm": 0.453359991312027, "learning_rate": 4.406211030645851e-06, "loss": 0.363, "step": 35768 }, { "epoch": 1.6414574824468817, "grad_norm": 0.48904773592948914, "learning_rate": 4.405967578585571e-06, "loss": 0.402, "step": 35769 }, { "epoch": 1.6415033729521362, "grad_norm": 0.49641793966293335, "learning_rate": 4.405724127953754e-06, "loss": 0.3901, "step": 35770 }, { "epoch": 1.6415492634573907, "grad_norm": 0.4273126721382141, "learning_rate": 4.405480678750983e-06, "loss": 0.3186, "step": 35771 }, { "epoch": 1.641595153962645, "grad_norm": 0.46698257327079773, "learning_rate": 4.405237230977841e-06, "loss": 0.3447, "step": 35772 }, { "epoch": 1.6416410444678995, "grad_norm": 0.4783484637737274, "learning_rate": 4.404993784634918e-06, "loss": 0.3653, "step": 35773 }, { "epoch": 1.641686934973154, "grad_norm": 0.4641822874546051, "learning_rate": 4.404750339722796e-06, "loss": 0.3076, "step": 35774 }, { "epoch": 1.6417328254784085, "grad_norm": 0.4685385823249817, "learning_rate": 4.404506896242062e-06, "loss": 0.2974, "step": 35775 }, { "epoch": 1.641778715983663, "grad_norm": 0.46848735213279724, "learning_rate": 4.404263454193301e-06, "loss": 0.3766, "step": 35776 }, { "epoch": 1.6418246064889175, "grad_norm": 0.4908662438392639, "learning_rate": 4.404020013577099e-06, "loss": 0.3875, "step": 35777 }, { "epoch": 1.641870496994172, "grad_norm": 0.4622827172279358, "learning_rate": 4.403776574394039e-06, "loss": 0.3268, "step": 35778 }, { "epoch": 1.6419163874994265, "grad_norm": 0.5139374732971191, "learning_rate": 4.40353313664471e-06, "loss": 0.3581, "step": 35779 }, { "epoch": 1.641962278004681, "grad_norm": 0.4291189908981323, "learning_rate": 4.403289700329695e-06, "loss": 0.2732, "step": 35780 }, { "epoch": 1.6420081685099353, "grad_norm": 0.4418382942676544, "learning_rate": 4.4030462654495784e-06, "loss": 0.3084, "step": 35781 }, { "epoch": 1.6420540590151897, "grad_norm": 0.4305156171321869, "learning_rate": 4.402802832004949e-06, "loss": 0.2827, "step": 35782 }, { "epoch": 1.6420999495204442, "grad_norm": 0.4875820279121399, "learning_rate": 4.40255939999639e-06, "loss": 0.3633, "step": 35783 }, { "epoch": 1.6421458400256985, "grad_norm": 0.45124122500419617, "learning_rate": 4.402315969424486e-06, "loss": 0.3739, "step": 35784 }, { "epoch": 1.642191730530953, "grad_norm": 0.4058588445186615, "learning_rate": 4.402072540289825e-06, "loss": 0.2511, "step": 35785 }, { "epoch": 1.6422376210362075, "grad_norm": 0.47803589701652527, "learning_rate": 4.4018291125929905e-06, "loss": 0.3323, "step": 35786 }, { "epoch": 1.642283511541462, "grad_norm": 0.4509625732898712, "learning_rate": 4.4015856863345665e-06, "loss": 0.3208, "step": 35787 }, { "epoch": 1.6423294020467165, "grad_norm": 0.4389108419418335, "learning_rate": 4.401342261515142e-06, "loss": 0.3152, "step": 35788 }, { "epoch": 1.642375292551971, "grad_norm": 0.4610974192619324, "learning_rate": 4.4010988381353e-06, "loss": 0.3349, "step": 35789 }, { "epoch": 1.6424211830572255, "grad_norm": 0.4365984797477722, "learning_rate": 4.400855416195625e-06, "loss": 0.3258, "step": 35790 }, { "epoch": 1.64246707356248, "grad_norm": 0.4406372010707855, "learning_rate": 4.400611995696706e-06, "loss": 0.3096, "step": 35791 }, { "epoch": 1.6425129640677345, "grad_norm": 0.46938857436180115, "learning_rate": 4.400368576639125e-06, "loss": 0.3348, "step": 35792 }, { "epoch": 1.642558854572989, "grad_norm": 0.4995099902153015, "learning_rate": 4.400125159023466e-06, "loss": 0.4216, "step": 35793 }, { "epoch": 1.6426047450782433, "grad_norm": 0.4595467150211334, "learning_rate": 4.3998817428503195e-06, "loss": 0.3556, "step": 35794 }, { "epoch": 1.6426506355834978, "grad_norm": 0.4711065888404846, "learning_rate": 4.399638328120268e-06, "loss": 0.3722, "step": 35795 }, { "epoch": 1.6426965260887523, "grad_norm": 0.44299525022506714, "learning_rate": 4.399394914833897e-06, "loss": 0.3163, "step": 35796 }, { "epoch": 1.6427424165940065, "grad_norm": 0.500495970249176, "learning_rate": 4.399151502991789e-06, "loss": 0.3963, "step": 35797 }, { "epoch": 1.642788307099261, "grad_norm": 0.5117055773735046, "learning_rate": 4.398908092594535e-06, "loss": 0.4474, "step": 35798 }, { "epoch": 1.6428341976045155, "grad_norm": 0.46225759387016296, "learning_rate": 4.398664683642716e-06, "loss": 0.3199, "step": 35799 }, { "epoch": 1.64288008810977, "grad_norm": 0.4997924864292145, "learning_rate": 4.3984212761369176e-06, "loss": 0.407, "step": 35800 }, { "epoch": 1.6429259786150245, "grad_norm": 0.49980151653289795, "learning_rate": 4.398177870077728e-06, "loss": 0.3741, "step": 35801 }, { "epoch": 1.642971869120279, "grad_norm": 0.4796699583530426, "learning_rate": 4.397934465465731e-06, "loss": 0.4081, "step": 35802 }, { "epoch": 1.6430177596255335, "grad_norm": 0.4289434850215912, "learning_rate": 4.397691062301508e-06, "loss": 0.3122, "step": 35803 }, { "epoch": 1.643063650130788, "grad_norm": 0.5133978128433228, "learning_rate": 4.397447660585651e-06, "loss": 0.3105, "step": 35804 }, { "epoch": 1.6431095406360425, "grad_norm": 0.4320638179779053, "learning_rate": 4.3972042603187425e-06, "loss": 0.2922, "step": 35805 }, { "epoch": 1.643155431141297, "grad_norm": 0.4650939404964447, "learning_rate": 4.396960861501366e-06, "loss": 0.3638, "step": 35806 }, { "epoch": 1.6432013216465513, "grad_norm": 0.4647216498851776, "learning_rate": 4.3967174641341095e-06, "loss": 0.3077, "step": 35807 }, { "epoch": 1.6432472121518058, "grad_norm": 0.43632805347442627, "learning_rate": 4.3964740682175576e-06, "loss": 0.3227, "step": 35808 }, { "epoch": 1.6432931026570603, "grad_norm": 0.4735108017921448, "learning_rate": 4.396230673752292e-06, "loss": 0.3608, "step": 35809 }, { "epoch": 1.6433389931623146, "grad_norm": 0.4759521484375, "learning_rate": 4.395987280738904e-06, "loss": 0.3487, "step": 35810 }, { "epoch": 1.643384883667569, "grad_norm": 0.5054999589920044, "learning_rate": 4.395743889177975e-06, "loss": 0.4497, "step": 35811 }, { "epoch": 1.6434307741728236, "grad_norm": 0.5128865242004395, "learning_rate": 4.39550049907009e-06, "loss": 0.4271, "step": 35812 }, { "epoch": 1.643476664678078, "grad_norm": 0.5182333588600159, "learning_rate": 4.395257110415838e-06, "loss": 0.3868, "step": 35813 }, { "epoch": 1.6435225551833326, "grad_norm": 0.47527754306793213, "learning_rate": 4.3950137232158005e-06, "loss": 0.3721, "step": 35814 }, { "epoch": 1.643568445688587, "grad_norm": 0.46385812759399414, "learning_rate": 4.394770337470563e-06, "loss": 0.326, "step": 35815 }, { "epoch": 1.6436143361938416, "grad_norm": 0.48085999488830566, "learning_rate": 4.3945269531807135e-06, "loss": 0.3634, "step": 35816 }, { "epoch": 1.643660226699096, "grad_norm": 0.4314105808734894, "learning_rate": 4.394283570346835e-06, "loss": 0.3147, "step": 35817 }, { "epoch": 1.6437061172043506, "grad_norm": 0.45863136649131775, "learning_rate": 4.394040188969513e-06, "loss": 0.3744, "step": 35818 }, { "epoch": 1.6437520077096048, "grad_norm": 0.43358537554740906, "learning_rate": 4.3937968090493345e-06, "loss": 0.333, "step": 35819 }, { "epoch": 1.6437978982148593, "grad_norm": 0.464619517326355, "learning_rate": 4.3935534305868825e-06, "loss": 0.3169, "step": 35820 }, { "epoch": 1.6438437887201138, "grad_norm": 0.4987611770629883, "learning_rate": 4.393310053582741e-06, "loss": 0.3952, "step": 35821 }, { "epoch": 1.643889679225368, "grad_norm": 0.5382171869277954, "learning_rate": 4.393066678037501e-06, "loss": 0.5477, "step": 35822 }, { "epoch": 1.6439355697306226, "grad_norm": 0.4668377637863159, "learning_rate": 4.392823303951743e-06, "loss": 0.3447, "step": 35823 }, { "epoch": 1.643981460235877, "grad_norm": 0.43894046545028687, "learning_rate": 4.392579931326051e-06, "loss": 0.3162, "step": 35824 }, { "epoch": 1.6440273507411316, "grad_norm": 0.45414823293685913, "learning_rate": 4.392336560161015e-06, "loss": 0.3249, "step": 35825 }, { "epoch": 1.644073241246386, "grad_norm": 0.4436976909637451, "learning_rate": 4.392093190457217e-06, "loss": 0.333, "step": 35826 }, { "epoch": 1.6441191317516406, "grad_norm": 0.49503812193870544, "learning_rate": 4.391849822215244e-06, "loss": 0.3477, "step": 35827 }, { "epoch": 1.644165022256895, "grad_norm": 0.41574251651763916, "learning_rate": 4.391606455435679e-06, "loss": 0.2649, "step": 35828 }, { "epoch": 1.6442109127621496, "grad_norm": 0.463836133480072, "learning_rate": 4.391363090119109e-06, "loss": 0.3153, "step": 35829 }, { "epoch": 1.644256803267404, "grad_norm": 0.48424381017684937, "learning_rate": 4.391119726266119e-06, "loss": 0.3733, "step": 35830 }, { "epoch": 1.6443026937726586, "grad_norm": 0.470198392868042, "learning_rate": 4.390876363877292e-06, "loss": 0.3901, "step": 35831 }, { "epoch": 1.6443485842779129, "grad_norm": 0.42296624183654785, "learning_rate": 4.390633002953216e-06, "loss": 0.2977, "step": 35832 }, { "epoch": 1.6443944747831674, "grad_norm": 0.4460691809654236, "learning_rate": 4.390389643494477e-06, "loss": 0.3148, "step": 35833 }, { "epoch": 1.6444403652884219, "grad_norm": 0.44360625743865967, "learning_rate": 4.390146285501657e-06, "loss": 0.3729, "step": 35834 }, { "epoch": 1.6444862557936761, "grad_norm": 0.4957050681114197, "learning_rate": 4.389902928975343e-06, "loss": 0.3751, "step": 35835 }, { "epoch": 1.6445321462989306, "grad_norm": 0.45793092250823975, "learning_rate": 4.38965957391612e-06, "loss": 0.3063, "step": 35836 }, { "epoch": 1.6445780368041851, "grad_norm": 0.44571229815483093, "learning_rate": 4.389416220324572e-06, "loss": 0.3092, "step": 35837 }, { "epoch": 1.6446239273094396, "grad_norm": 0.513238251209259, "learning_rate": 4.389172868201286e-06, "loss": 0.4177, "step": 35838 }, { "epoch": 1.6446698178146941, "grad_norm": 0.45076093077659607, "learning_rate": 4.388929517546847e-06, "loss": 0.3169, "step": 35839 }, { "epoch": 1.6447157083199486, "grad_norm": 0.4670800566673279, "learning_rate": 4.38868616836184e-06, "loss": 0.4033, "step": 35840 }, { "epoch": 1.6447615988252031, "grad_norm": 0.4193473160266876, "learning_rate": 4.388442820646849e-06, "loss": 0.2853, "step": 35841 }, { "epoch": 1.6448074893304576, "grad_norm": 0.5827734470367432, "learning_rate": 4.38819947440246e-06, "loss": 0.2997, "step": 35842 }, { "epoch": 1.6448533798357121, "grad_norm": 0.467144250869751, "learning_rate": 4.387956129629257e-06, "loss": 0.3226, "step": 35843 }, { "epoch": 1.6448992703409666, "grad_norm": 0.46242573857307434, "learning_rate": 4.387712786327829e-06, "loss": 0.3611, "step": 35844 }, { "epoch": 1.6449451608462209, "grad_norm": 0.4546317756175995, "learning_rate": 4.387469444498758e-06, "loss": 0.3245, "step": 35845 }, { "epoch": 1.6449910513514754, "grad_norm": 0.46148136258125305, "learning_rate": 4.387226104142628e-06, "loss": 0.3511, "step": 35846 }, { "epoch": 1.6450369418567299, "grad_norm": 0.4385524094104767, "learning_rate": 4.386982765260027e-06, "loss": 0.3423, "step": 35847 }, { "epoch": 1.6450828323619842, "grad_norm": 0.4744969606399536, "learning_rate": 4.386739427851538e-06, "loss": 0.3797, "step": 35848 }, { "epoch": 1.6451287228672387, "grad_norm": 0.49610549211502075, "learning_rate": 4.386496091917748e-06, "loss": 0.3459, "step": 35849 }, { "epoch": 1.6451746133724932, "grad_norm": 0.4768170118331909, "learning_rate": 4.386252757459242e-06, "loss": 0.3728, "step": 35850 }, { "epoch": 1.6452205038777477, "grad_norm": 0.45942503213882446, "learning_rate": 4.386009424476603e-06, "loss": 0.3367, "step": 35851 }, { "epoch": 1.6452663943830022, "grad_norm": 0.4893627166748047, "learning_rate": 4.3857660929704175e-06, "loss": 0.3338, "step": 35852 }, { "epoch": 1.6453122848882566, "grad_norm": 0.4380716383457184, "learning_rate": 4.385522762941271e-06, "loss": 0.2978, "step": 35853 }, { "epoch": 1.6453581753935111, "grad_norm": 0.43358251452445984, "learning_rate": 4.385279434389749e-06, "loss": 0.2782, "step": 35854 }, { "epoch": 1.6454040658987656, "grad_norm": 0.4585760831832886, "learning_rate": 4.385036107316435e-06, "loss": 0.3574, "step": 35855 }, { "epoch": 1.6454499564040201, "grad_norm": 0.45548945665359497, "learning_rate": 4.384792781721916e-06, "loss": 0.3078, "step": 35856 }, { "epoch": 1.6454958469092744, "grad_norm": 0.436739444732666, "learning_rate": 4.3845494576067766e-06, "loss": 0.2932, "step": 35857 }, { "epoch": 1.645541737414529, "grad_norm": 0.44653865694999695, "learning_rate": 4.384306134971599e-06, "loss": 0.3448, "step": 35858 }, { "epoch": 1.6455876279197834, "grad_norm": 0.47348302602767944, "learning_rate": 4.384062813816972e-06, "loss": 0.3685, "step": 35859 }, { "epoch": 1.645633518425038, "grad_norm": 0.4350849390029907, "learning_rate": 4.3838194941434815e-06, "loss": 0.3003, "step": 35860 }, { "epoch": 1.6456794089302922, "grad_norm": 0.5129213929176331, "learning_rate": 4.38357617595171e-06, "loss": 0.3823, "step": 35861 }, { "epoch": 1.6457252994355467, "grad_norm": 0.47791916131973267, "learning_rate": 4.383332859242241e-06, "loss": 0.3466, "step": 35862 }, { "epoch": 1.6457711899408012, "grad_norm": 0.48637765645980835, "learning_rate": 4.383089544015664e-06, "loss": 0.4034, "step": 35863 }, { "epoch": 1.6458170804460557, "grad_norm": 0.512289822101593, "learning_rate": 4.382846230272562e-06, "loss": 0.3976, "step": 35864 }, { "epoch": 1.6458629709513102, "grad_norm": 0.44551882147789, "learning_rate": 4.382602918013517e-06, "loss": 0.302, "step": 35865 }, { "epoch": 1.6459088614565647, "grad_norm": 0.5206678509712219, "learning_rate": 4.38235960723912e-06, "loss": 0.4688, "step": 35866 }, { "epoch": 1.6459547519618192, "grad_norm": 0.4506447911262512, "learning_rate": 4.382116297949954e-06, "loss": 0.3204, "step": 35867 }, { "epoch": 1.6460006424670737, "grad_norm": 0.4454910457134247, "learning_rate": 4.3818729901466e-06, "loss": 0.3236, "step": 35868 }, { "epoch": 1.6460465329723282, "grad_norm": 0.4584294259548187, "learning_rate": 4.381629683829649e-06, "loss": 0.3385, "step": 35869 }, { "epoch": 1.6460924234775824, "grad_norm": 0.4795841872692108, "learning_rate": 4.381386378999683e-06, "loss": 0.4148, "step": 35870 }, { "epoch": 1.646138313982837, "grad_norm": 0.4646975100040436, "learning_rate": 4.381143075657287e-06, "loss": 0.3615, "step": 35871 }, { "epoch": 1.6461842044880914, "grad_norm": 0.4283987581729889, "learning_rate": 4.380899773803048e-06, "loss": 0.3121, "step": 35872 }, { "epoch": 1.6462300949933457, "grad_norm": 0.4476897120475769, "learning_rate": 4.380656473437549e-06, "loss": 0.3546, "step": 35873 }, { "epoch": 1.6462759854986002, "grad_norm": 0.4907616674900055, "learning_rate": 4.380413174561373e-06, "loss": 0.3613, "step": 35874 }, { "epoch": 1.6463218760038547, "grad_norm": 0.4547068774700165, "learning_rate": 4.38016987717511e-06, "loss": 0.3313, "step": 35875 }, { "epoch": 1.6463677665091092, "grad_norm": 0.444036066532135, "learning_rate": 4.379926581279344e-06, "loss": 0.2901, "step": 35876 }, { "epoch": 1.6464136570143637, "grad_norm": 0.5038992166519165, "learning_rate": 4.379683286874657e-06, "loss": 0.4248, "step": 35877 }, { "epoch": 1.6464595475196182, "grad_norm": 0.47008755803108215, "learning_rate": 4.379439993961637e-06, "loss": 0.3634, "step": 35878 }, { "epoch": 1.6465054380248727, "grad_norm": 0.4997967481613159, "learning_rate": 4.379196702540867e-06, "loss": 0.4123, "step": 35879 }, { "epoch": 1.6465513285301272, "grad_norm": 0.42728912830352783, "learning_rate": 4.378953412612933e-06, "loss": 0.2981, "step": 35880 }, { "epoch": 1.6465972190353817, "grad_norm": 0.4909107983112335, "learning_rate": 4.378710124178421e-06, "loss": 0.3482, "step": 35881 }, { "epoch": 1.6466431095406362, "grad_norm": 0.4335554242134094, "learning_rate": 4.378466837237915e-06, "loss": 0.3034, "step": 35882 }, { "epoch": 1.6466890000458905, "grad_norm": 0.5149171948432922, "learning_rate": 4.378223551791999e-06, "loss": 0.409, "step": 35883 }, { "epoch": 1.646734890551145, "grad_norm": 0.46609222888946533, "learning_rate": 4.37798026784126e-06, "loss": 0.3466, "step": 35884 }, { "epoch": 1.6467807810563995, "grad_norm": 0.47231432795524597, "learning_rate": 4.377736985386283e-06, "loss": 0.3509, "step": 35885 }, { "epoch": 1.6468266715616537, "grad_norm": 0.47837042808532715, "learning_rate": 4.377493704427649e-06, "loss": 0.3681, "step": 35886 }, { "epoch": 1.6468725620669082, "grad_norm": 0.4613659977912903, "learning_rate": 4.377250424965949e-06, "loss": 0.3349, "step": 35887 }, { "epoch": 1.6469184525721627, "grad_norm": 0.4442126154899597, "learning_rate": 4.377007147001765e-06, "loss": 0.3255, "step": 35888 }, { "epoch": 1.6469643430774172, "grad_norm": 0.5007584691047668, "learning_rate": 4.37676387053568e-06, "loss": 0.4189, "step": 35889 }, { "epoch": 1.6470102335826717, "grad_norm": 0.507055938243866, "learning_rate": 4.376520595568284e-06, "loss": 0.3774, "step": 35890 }, { "epoch": 1.6470561240879262, "grad_norm": 0.4995647072792053, "learning_rate": 4.3762773221001596e-06, "loss": 0.3867, "step": 35891 }, { "epoch": 1.6471020145931807, "grad_norm": 0.46633684635162354, "learning_rate": 4.3760340501318896e-06, "loss": 0.3419, "step": 35892 }, { "epoch": 1.6471479050984352, "grad_norm": 0.45598554611206055, "learning_rate": 4.3757907796640605e-06, "loss": 0.3724, "step": 35893 }, { "epoch": 1.6471937956036897, "grad_norm": 0.5060083866119385, "learning_rate": 4.3755475106972585e-06, "loss": 0.4215, "step": 35894 }, { "epoch": 1.6472396861089442, "grad_norm": 0.490540474653244, "learning_rate": 4.375304243232068e-06, "loss": 0.3369, "step": 35895 }, { "epoch": 1.6472855766141985, "grad_norm": 0.4229942560195923, "learning_rate": 4.375060977269071e-06, "loss": 0.2657, "step": 35896 }, { "epoch": 1.647331467119453, "grad_norm": 0.5268293619155884, "learning_rate": 4.374817712808858e-06, "loss": 0.4335, "step": 35897 }, { "epoch": 1.6473773576247075, "grad_norm": 0.5111556053161621, "learning_rate": 4.3745744498520105e-06, "loss": 0.4356, "step": 35898 }, { "epoch": 1.6474232481299618, "grad_norm": 0.5039180517196655, "learning_rate": 4.3743311883991124e-06, "loss": 0.4458, "step": 35899 }, { "epoch": 1.6474691386352163, "grad_norm": 0.4656873643398285, "learning_rate": 4.374087928450752e-06, "loss": 0.3786, "step": 35900 }, { "epoch": 1.6475150291404708, "grad_norm": 0.47338080406188965, "learning_rate": 4.373844670007513e-06, "loss": 0.3552, "step": 35901 }, { "epoch": 1.6475609196457253, "grad_norm": 0.4701687693595886, "learning_rate": 4.3736014130699775e-06, "loss": 0.3615, "step": 35902 }, { "epoch": 1.6476068101509798, "grad_norm": 0.4791070818901062, "learning_rate": 4.373358157638735e-06, "loss": 0.3625, "step": 35903 }, { "epoch": 1.6476527006562343, "grad_norm": 0.4878644645214081, "learning_rate": 4.373114903714368e-06, "loss": 0.3469, "step": 35904 }, { "epoch": 1.6476985911614888, "grad_norm": 0.49195224046707153, "learning_rate": 4.372871651297461e-06, "loss": 0.3448, "step": 35905 }, { "epoch": 1.6477444816667433, "grad_norm": 0.5160082578659058, "learning_rate": 4.372628400388601e-06, "loss": 0.406, "step": 35906 }, { "epoch": 1.6477903721719978, "grad_norm": 0.4769684076309204, "learning_rate": 4.372385150988372e-06, "loss": 0.3449, "step": 35907 }, { "epoch": 1.647836262677252, "grad_norm": 0.4544503092765808, "learning_rate": 4.372141903097356e-06, "loss": 0.3145, "step": 35908 }, { "epoch": 1.6478821531825065, "grad_norm": 0.4470287263393402, "learning_rate": 4.3718986567161424e-06, "loss": 0.3144, "step": 35909 }, { "epoch": 1.647928043687761, "grad_norm": 0.46876898407936096, "learning_rate": 4.371655411845315e-06, "loss": 0.3279, "step": 35910 }, { "epoch": 1.6479739341930153, "grad_norm": 0.4714542627334595, "learning_rate": 4.371412168485457e-06, "loss": 0.3475, "step": 35911 }, { "epoch": 1.6480198246982698, "grad_norm": 0.5600622296333313, "learning_rate": 4.371168926637155e-06, "loss": 0.4228, "step": 35912 }, { "epoch": 1.6480657152035243, "grad_norm": 0.4761614203453064, "learning_rate": 4.370925686300993e-06, "loss": 0.3253, "step": 35913 }, { "epoch": 1.6481116057087788, "grad_norm": 0.4816685616970062, "learning_rate": 4.370682447477555e-06, "loss": 0.3794, "step": 35914 }, { "epoch": 1.6481574962140333, "grad_norm": 0.44472235441207886, "learning_rate": 4.370439210167429e-06, "loss": 0.3187, "step": 35915 }, { "epoch": 1.6482033867192878, "grad_norm": 0.41309282183647156, "learning_rate": 4.370195974371198e-06, "loss": 0.2922, "step": 35916 }, { "epoch": 1.6482492772245423, "grad_norm": 0.4674730598926544, "learning_rate": 4.369952740089444e-06, "loss": 0.326, "step": 35917 }, { "epoch": 1.6482951677297968, "grad_norm": 0.46466493606567383, "learning_rate": 4.369709507322757e-06, "loss": 0.3589, "step": 35918 }, { "epoch": 1.6483410582350513, "grad_norm": 0.48003995418548584, "learning_rate": 4.36946627607172e-06, "loss": 0.3584, "step": 35919 }, { "epoch": 1.6483869487403058, "grad_norm": 0.471615195274353, "learning_rate": 4.369223046336917e-06, "loss": 0.3971, "step": 35920 }, { "epoch": 1.64843283924556, "grad_norm": 0.4379487931728363, "learning_rate": 4.368979818118935e-06, "loss": 0.3028, "step": 35921 }, { "epoch": 1.6484787297508146, "grad_norm": 0.5069451332092285, "learning_rate": 4.368736591418356e-06, "loss": 0.3168, "step": 35922 }, { "epoch": 1.648524620256069, "grad_norm": 0.4645574986934662, "learning_rate": 4.3684933662357645e-06, "loss": 0.3733, "step": 35923 }, { "epoch": 1.6485705107613233, "grad_norm": 0.4728159010410309, "learning_rate": 4.36825014257175e-06, "loss": 0.3195, "step": 35924 }, { "epoch": 1.6486164012665778, "grad_norm": 0.505206823348999, "learning_rate": 4.368006920426894e-06, "loss": 0.424, "step": 35925 }, { "epoch": 1.6486622917718323, "grad_norm": 0.47774896025657654, "learning_rate": 4.3677636998017815e-06, "loss": 0.3855, "step": 35926 }, { "epoch": 1.6487081822770868, "grad_norm": 0.48680517077445984, "learning_rate": 4.367520480696997e-06, "loss": 0.3614, "step": 35927 }, { "epoch": 1.6487540727823413, "grad_norm": 0.4690234363079071, "learning_rate": 4.367277263113127e-06, "loss": 0.3414, "step": 35928 }, { "epoch": 1.6487999632875958, "grad_norm": 0.4303583800792694, "learning_rate": 4.367034047050755e-06, "loss": 0.3021, "step": 35929 }, { "epoch": 1.6488458537928503, "grad_norm": 0.4407225549221039, "learning_rate": 4.366790832510466e-06, "loss": 0.3068, "step": 35930 }, { "epoch": 1.6488917442981048, "grad_norm": 0.4914962351322174, "learning_rate": 4.366547619492846e-06, "loss": 0.3737, "step": 35931 }, { "epoch": 1.6489376348033593, "grad_norm": 0.4584190845489502, "learning_rate": 4.3663044079984795e-06, "loss": 0.3229, "step": 35932 }, { "epoch": 1.6489835253086138, "grad_norm": 0.48588070273399353, "learning_rate": 4.366061198027949e-06, "loss": 0.3676, "step": 35933 }, { "epoch": 1.649029415813868, "grad_norm": 0.43435367941856384, "learning_rate": 4.3658179895818436e-06, "loss": 0.2826, "step": 35934 }, { "epoch": 1.6490753063191226, "grad_norm": 0.49441584944725037, "learning_rate": 4.365574782660745e-06, "loss": 0.3399, "step": 35935 }, { "epoch": 1.649121196824377, "grad_norm": 0.4394732415676117, "learning_rate": 4.365331577265238e-06, "loss": 0.3213, "step": 35936 }, { "epoch": 1.6491670873296314, "grad_norm": 0.45605823397636414, "learning_rate": 4.365088373395909e-06, "loss": 0.3194, "step": 35937 }, { "epoch": 1.6492129778348859, "grad_norm": 0.49027353525161743, "learning_rate": 4.3648451710533425e-06, "loss": 0.3707, "step": 35938 }, { "epoch": 1.6492588683401403, "grad_norm": 0.4711936414241791, "learning_rate": 4.36460197023812e-06, "loss": 0.3239, "step": 35939 }, { "epoch": 1.6493047588453948, "grad_norm": 0.49379420280456543, "learning_rate": 4.364358770950832e-06, "loss": 0.3686, "step": 35940 }, { "epoch": 1.6493506493506493, "grad_norm": 0.483435720205307, "learning_rate": 4.364115573192061e-06, "loss": 0.4135, "step": 35941 }, { "epoch": 1.6493965398559038, "grad_norm": 0.4706214368343353, "learning_rate": 4.36387237696239e-06, "loss": 0.3312, "step": 35942 }, { "epoch": 1.6494424303611583, "grad_norm": 0.4465349018573761, "learning_rate": 4.363629182262405e-06, "loss": 0.2737, "step": 35943 }, { "epoch": 1.6494883208664128, "grad_norm": 0.44056040048599243, "learning_rate": 4.363385989092693e-06, "loss": 0.2673, "step": 35944 }, { "epoch": 1.6495342113716673, "grad_norm": 0.4617215692996979, "learning_rate": 4.363142797453833e-06, "loss": 0.3521, "step": 35945 }, { "epoch": 1.6495801018769216, "grad_norm": 0.47313228249549866, "learning_rate": 4.362899607346416e-06, "loss": 0.3588, "step": 35946 }, { "epoch": 1.649625992382176, "grad_norm": 0.47361859679222107, "learning_rate": 4.362656418771025e-06, "loss": 0.3339, "step": 35947 }, { "epoch": 1.6496718828874306, "grad_norm": 0.4966105818748474, "learning_rate": 4.362413231728243e-06, "loss": 0.4392, "step": 35948 }, { "epoch": 1.649717773392685, "grad_norm": 0.46930089592933655, "learning_rate": 4.362170046218657e-06, "loss": 0.3753, "step": 35949 }, { "epoch": 1.6497636638979394, "grad_norm": 0.48905280232429504, "learning_rate": 4.36192686224285e-06, "loss": 0.4378, "step": 35950 }, { "epoch": 1.6498095544031939, "grad_norm": 0.43733733892440796, "learning_rate": 4.3616836798014065e-06, "loss": 0.324, "step": 35951 }, { "epoch": 1.6498554449084484, "grad_norm": 0.4886120855808258, "learning_rate": 4.361440498894914e-06, "loss": 0.3397, "step": 35952 }, { "epoch": 1.6499013354137029, "grad_norm": 0.4351097047328949, "learning_rate": 4.361197319523955e-06, "loss": 0.3163, "step": 35953 }, { "epoch": 1.6499472259189574, "grad_norm": 0.4852837026119232, "learning_rate": 4.3609541416891145e-06, "loss": 0.4078, "step": 35954 }, { "epoch": 1.6499931164242119, "grad_norm": 0.46513986587524414, "learning_rate": 4.360710965390978e-06, "loss": 0.3245, "step": 35955 }, { "epoch": 1.6500390069294664, "grad_norm": 0.507186770439148, "learning_rate": 4.36046779063013e-06, "loss": 0.432, "step": 35956 }, { "epoch": 1.6500848974347209, "grad_norm": 0.5067645311355591, "learning_rate": 4.360224617407155e-06, "loss": 0.3308, "step": 35957 }, { "epoch": 1.6501307879399754, "grad_norm": 0.4566802382469177, "learning_rate": 4.359981445722638e-06, "loss": 0.3515, "step": 35958 }, { "epoch": 1.6501766784452296, "grad_norm": 0.46597445011138916, "learning_rate": 4.359738275577163e-06, "loss": 0.3704, "step": 35959 }, { "epoch": 1.6502225689504841, "grad_norm": 0.5354997515678406, "learning_rate": 4.359495106971317e-06, "loss": 0.3049, "step": 35960 }, { "epoch": 1.6502684594557386, "grad_norm": 0.5137149691581726, "learning_rate": 4.35925193990568e-06, "loss": 0.4244, "step": 35961 }, { "epoch": 1.650314349960993, "grad_norm": 0.45069023966789246, "learning_rate": 4.359008774380842e-06, "loss": 0.328, "step": 35962 }, { "epoch": 1.6503602404662474, "grad_norm": 0.4509267508983612, "learning_rate": 4.358765610397386e-06, "loss": 0.3241, "step": 35963 }, { "epoch": 1.650406130971502, "grad_norm": 0.4760028123855591, "learning_rate": 4.358522447955895e-06, "loss": 0.3872, "step": 35964 }, { "epoch": 1.6504520214767564, "grad_norm": 0.4613112509250641, "learning_rate": 4.3582792870569555e-06, "loss": 0.3752, "step": 35965 }, { "epoch": 1.650497911982011, "grad_norm": 0.4791675806045532, "learning_rate": 4.358036127701153e-06, "loss": 0.3846, "step": 35966 }, { "epoch": 1.6505438024872654, "grad_norm": 0.5006075501441956, "learning_rate": 4.357792969889068e-06, "loss": 0.3942, "step": 35967 }, { "epoch": 1.65058969299252, "grad_norm": 0.48373717069625854, "learning_rate": 4.357549813621291e-06, "loss": 0.3792, "step": 35968 }, { "epoch": 1.6506355834977744, "grad_norm": 0.4603436589241028, "learning_rate": 4.357306658898404e-06, "loss": 0.3444, "step": 35969 }, { "epoch": 1.650681474003029, "grad_norm": 0.5049374103546143, "learning_rate": 4.35706350572099e-06, "loss": 0.407, "step": 35970 }, { "epoch": 1.6507273645082834, "grad_norm": 0.4457077383995056, "learning_rate": 4.356820354089636e-06, "loss": 0.3016, "step": 35971 }, { "epoch": 1.6507732550135377, "grad_norm": 0.47533726692199707, "learning_rate": 4.356577204004927e-06, "loss": 0.3865, "step": 35972 }, { "epoch": 1.6508191455187922, "grad_norm": 0.48755377531051636, "learning_rate": 4.356334055467444e-06, "loss": 0.4039, "step": 35973 }, { "epoch": 1.6508650360240467, "grad_norm": 0.458965539932251, "learning_rate": 4.356090908477777e-06, "loss": 0.3505, "step": 35974 }, { "epoch": 1.650910926529301, "grad_norm": 0.41512060165405273, "learning_rate": 4.355847763036508e-06, "loss": 0.2691, "step": 35975 }, { "epoch": 1.6509568170345554, "grad_norm": 0.4832877218723297, "learning_rate": 4.35560461914422e-06, "loss": 0.3993, "step": 35976 }, { "epoch": 1.65100270753981, "grad_norm": 0.4928421974182129, "learning_rate": 4.355361476801502e-06, "loss": 0.4171, "step": 35977 }, { "epoch": 1.6510485980450644, "grad_norm": 0.44953614473342896, "learning_rate": 4.355118336008936e-06, "loss": 0.311, "step": 35978 }, { "epoch": 1.651094488550319, "grad_norm": 0.5108081102371216, "learning_rate": 4.354875196767104e-06, "loss": 0.4951, "step": 35979 }, { "epoch": 1.6511403790555734, "grad_norm": 0.44888004660606384, "learning_rate": 4.3546320590765965e-06, "loss": 0.3418, "step": 35980 }, { "epoch": 1.651186269560828, "grad_norm": 0.48100563883781433, "learning_rate": 4.3543889229379946e-06, "loss": 0.383, "step": 35981 }, { "epoch": 1.6512321600660824, "grad_norm": 0.4599229693412781, "learning_rate": 4.354145788351882e-06, "loss": 0.3037, "step": 35982 }, { "epoch": 1.651278050571337, "grad_norm": 0.4645184576511383, "learning_rate": 4.353902655318846e-06, "loss": 0.3327, "step": 35983 }, { "epoch": 1.6513239410765914, "grad_norm": 0.4297705292701721, "learning_rate": 4.353659523839471e-06, "loss": 0.2854, "step": 35984 }, { "epoch": 1.6513698315818457, "grad_norm": 0.4643173813819885, "learning_rate": 4.35341639391434e-06, "loss": 0.3433, "step": 35985 }, { "epoch": 1.6514157220871002, "grad_norm": 0.4644688069820404, "learning_rate": 4.353173265544039e-06, "loss": 0.3648, "step": 35986 }, { "epoch": 1.6514616125923547, "grad_norm": 0.4398758113384247, "learning_rate": 4.352930138729152e-06, "loss": 0.3092, "step": 35987 }, { "epoch": 1.651507503097609, "grad_norm": 0.4798223078250885, "learning_rate": 4.352687013470262e-06, "loss": 0.3498, "step": 35988 }, { "epoch": 1.6515533936028635, "grad_norm": 0.45280513167381287, "learning_rate": 4.352443889767958e-06, "loss": 0.3379, "step": 35989 }, { "epoch": 1.651599284108118, "grad_norm": 0.47199442982673645, "learning_rate": 4.352200767622821e-06, "loss": 0.3739, "step": 35990 }, { "epoch": 1.6516451746133725, "grad_norm": 0.4413686990737915, "learning_rate": 4.3519576470354375e-06, "loss": 0.3361, "step": 35991 }, { "epoch": 1.651691065118627, "grad_norm": 0.480246365070343, "learning_rate": 4.3517145280063895e-06, "loss": 0.35, "step": 35992 }, { "epoch": 1.6517369556238815, "grad_norm": 0.4901658296585083, "learning_rate": 4.351471410536265e-06, "loss": 0.447, "step": 35993 }, { "epoch": 1.651782846129136, "grad_norm": 0.483841210603714, "learning_rate": 4.351228294625648e-06, "loss": 0.3705, "step": 35994 }, { "epoch": 1.6518287366343904, "grad_norm": 0.4929962158203125, "learning_rate": 4.350985180275119e-06, "loss": 0.4044, "step": 35995 }, { "epoch": 1.651874627139645, "grad_norm": 0.4916427433490753, "learning_rate": 4.350742067485268e-06, "loss": 0.3808, "step": 35996 }, { "epoch": 1.6519205176448992, "grad_norm": 0.4855644106864929, "learning_rate": 4.350498956256678e-06, "loss": 0.3451, "step": 35997 }, { "epoch": 1.6519664081501537, "grad_norm": 0.4811583459377289, "learning_rate": 4.350255846589931e-06, "loss": 0.401, "step": 35998 }, { "epoch": 1.6520122986554082, "grad_norm": 0.4791715145111084, "learning_rate": 4.350012738485616e-06, "loss": 0.3607, "step": 35999 }, { "epoch": 1.6520581891606625, "grad_norm": 0.4449109733104706, "learning_rate": 4.349769631944314e-06, "loss": 0.3439, "step": 36000 }, { "epoch": 1.652104079665917, "grad_norm": 0.4651741683483124, "learning_rate": 4.349526526966609e-06, "loss": 0.3127, "step": 36001 }, { "epoch": 1.6521499701711715, "grad_norm": 0.5020276308059692, "learning_rate": 4.349283423553091e-06, "loss": 0.3915, "step": 36002 }, { "epoch": 1.652195860676426, "grad_norm": 0.4172147810459137, "learning_rate": 4.3490403217043395e-06, "loss": 0.2881, "step": 36003 }, { "epoch": 1.6522417511816805, "grad_norm": 0.4468369781970978, "learning_rate": 4.348797221420939e-06, "loss": 0.3293, "step": 36004 }, { "epoch": 1.652287641686935, "grad_norm": 0.4693075120449066, "learning_rate": 4.3485541227034765e-06, "loss": 0.3699, "step": 36005 }, { "epoch": 1.6523335321921895, "grad_norm": 0.4877202808856964, "learning_rate": 4.3483110255525365e-06, "loss": 0.3863, "step": 36006 }, { "epoch": 1.652379422697444, "grad_norm": 0.46974435448646545, "learning_rate": 4.348067929968702e-06, "loss": 0.3358, "step": 36007 }, { "epoch": 1.6524253132026985, "grad_norm": 0.46888473629951477, "learning_rate": 4.34782483595256e-06, "loss": 0.3764, "step": 36008 }, { "epoch": 1.652471203707953, "grad_norm": 0.49411866068840027, "learning_rate": 4.347581743504693e-06, "loss": 0.412, "step": 36009 }, { "epoch": 1.6525170942132072, "grad_norm": 0.45911452174186707, "learning_rate": 4.347338652625683e-06, "loss": 0.3089, "step": 36010 }, { "epoch": 1.6525629847184617, "grad_norm": 0.5009966492652893, "learning_rate": 4.347095563316121e-06, "loss": 0.4446, "step": 36011 }, { "epoch": 1.6526088752237162, "grad_norm": 0.45252281427383423, "learning_rate": 4.346852475576587e-06, "loss": 0.3268, "step": 36012 }, { "epoch": 1.6526547657289705, "grad_norm": 0.4503854811191559, "learning_rate": 4.346609389407665e-06, "loss": 0.3382, "step": 36013 }, { "epoch": 1.652700656234225, "grad_norm": 0.4583148658275604, "learning_rate": 4.346366304809943e-06, "loss": 0.3738, "step": 36014 }, { "epoch": 1.6527465467394795, "grad_norm": 0.5285967588424683, "learning_rate": 4.346123221784005e-06, "loss": 0.3902, "step": 36015 }, { "epoch": 1.652792437244734, "grad_norm": 0.5204439759254456, "learning_rate": 4.345880140330431e-06, "loss": 0.4404, "step": 36016 }, { "epoch": 1.6528383277499885, "grad_norm": 0.4820834994316101, "learning_rate": 4.345637060449811e-06, "loss": 0.3961, "step": 36017 }, { "epoch": 1.652884218255243, "grad_norm": 0.5019481182098389, "learning_rate": 4.345393982142728e-06, "loss": 0.4407, "step": 36018 }, { "epoch": 1.6529301087604975, "grad_norm": 0.4796825349330902, "learning_rate": 4.345150905409764e-06, "loss": 0.3526, "step": 36019 }, { "epoch": 1.652975999265752, "grad_norm": 0.4567500948905945, "learning_rate": 4.3449078302515065e-06, "loss": 0.3025, "step": 36020 }, { "epoch": 1.6530218897710065, "grad_norm": 0.44055721163749695, "learning_rate": 4.344664756668539e-06, "loss": 0.3273, "step": 36021 }, { "epoch": 1.653067780276261, "grad_norm": 0.4587423503398895, "learning_rate": 4.344421684661444e-06, "loss": 0.3692, "step": 36022 }, { "epoch": 1.6531136707815153, "grad_norm": 0.4305415749549866, "learning_rate": 4.34417861423081e-06, "loss": 0.3093, "step": 36023 }, { "epoch": 1.6531595612867698, "grad_norm": 0.4753001034259796, "learning_rate": 4.34393554537722e-06, "loss": 0.3536, "step": 36024 }, { "epoch": 1.6532054517920243, "grad_norm": 0.4818207323551178, "learning_rate": 4.343692478101258e-06, "loss": 0.415, "step": 36025 }, { "epoch": 1.6532513422972785, "grad_norm": 0.4840905964374542, "learning_rate": 4.343449412403506e-06, "loss": 0.3752, "step": 36026 }, { "epoch": 1.653297232802533, "grad_norm": 0.4998270571231842, "learning_rate": 4.343206348284553e-06, "loss": 0.4485, "step": 36027 }, { "epoch": 1.6533431233077875, "grad_norm": 0.4937484860420227, "learning_rate": 4.342963285744981e-06, "loss": 0.3527, "step": 36028 }, { "epoch": 1.653389013813042, "grad_norm": 0.48509126901626587, "learning_rate": 4.342720224785374e-06, "loss": 0.3522, "step": 36029 }, { "epoch": 1.6534349043182965, "grad_norm": 0.5056251287460327, "learning_rate": 4.342477165406319e-06, "loss": 0.4346, "step": 36030 }, { "epoch": 1.653480794823551, "grad_norm": 0.4848411977291107, "learning_rate": 4.342234107608398e-06, "loss": 0.3561, "step": 36031 }, { "epoch": 1.6535266853288055, "grad_norm": 0.47766000032424927, "learning_rate": 4.341991051392195e-06, "loss": 0.3973, "step": 36032 }, { "epoch": 1.65357257583406, "grad_norm": 0.44374167919158936, "learning_rate": 4.341747996758297e-06, "loss": 0.3393, "step": 36033 }, { "epoch": 1.6536184663393145, "grad_norm": 0.4642241895198822, "learning_rate": 4.341504943707288e-06, "loss": 0.3559, "step": 36034 }, { "epoch": 1.6536643568445688, "grad_norm": 0.47755149006843567, "learning_rate": 4.341261892239751e-06, "loss": 0.3804, "step": 36035 }, { "epoch": 1.6537102473498233, "grad_norm": 0.48947200179100037, "learning_rate": 4.341018842356272e-06, "loss": 0.36, "step": 36036 }, { "epoch": 1.6537561378550778, "grad_norm": 0.4508672058582306, "learning_rate": 4.340775794057435e-06, "loss": 0.3083, "step": 36037 }, { "epoch": 1.6538020283603323, "grad_norm": 0.4680686593055725, "learning_rate": 4.340532747343822e-06, "loss": 0.3894, "step": 36038 }, { "epoch": 1.6538479188655866, "grad_norm": 0.45429444313049316, "learning_rate": 4.340289702216021e-06, "loss": 0.3402, "step": 36039 }, { "epoch": 1.653893809370841, "grad_norm": 0.46961474418640137, "learning_rate": 4.340046658674616e-06, "loss": 0.3906, "step": 36040 }, { "epoch": 1.6539396998760956, "grad_norm": 0.49488213658332825, "learning_rate": 4.339803616720189e-06, "loss": 0.4053, "step": 36041 }, { "epoch": 1.65398559038135, "grad_norm": 0.4506267309188843, "learning_rate": 4.339560576353327e-06, "loss": 0.3358, "step": 36042 }, { "epoch": 1.6540314808866046, "grad_norm": 0.49704161286354065, "learning_rate": 4.339317537574613e-06, "loss": 0.4338, "step": 36043 }, { "epoch": 1.654077371391859, "grad_norm": 0.4659401476383209, "learning_rate": 4.33907450038463e-06, "loss": 0.3571, "step": 36044 }, { "epoch": 1.6541232618971136, "grad_norm": 0.4463460445404053, "learning_rate": 4.338831464783967e-06, "loss": 0.3099, "step": 36045 }, { "epoch": 1.654169152402368, "grad_norm": 0.49807626008987427, "learning_rate": 4.338588430773206e-06, "loss": 0.4087, "step": 36046 }, { "epoch": 1.6542150429076226, "grad_norm": 0.4588109254837036, "learning_rate": 4.3383453983529275e-06, "loss": 0.3338, "step": 36047 }, { "epoch": 1.6542609334128768, "grad_norm": 0.49700185656547546, "learning_rate": 4.338102367523723e-06, "loss": 0.377, "step": 36048 }, { "epoch": 1.6543068239181313, "grad_norm": 0.4630831182003021, "learning_rate": 4.337859338286172e-06, "loss": 0.3253, "step": 36049 }, { "epoch": 1.6543527144233858, "grad_norm": 0.4813178777694702, "learning_rate": 4.33761631064086e-06, "loss": 0.3906, "step": 36050 }, { "epoch": 1.65439860492864, "grad_norm": 0.4512578248977661, "learning_rate": 4.337373284588372e-06, "loss": 0.3298, "step": 36051 }, { "epoch": 1.6544444954338946, "grad_norm": 0.47649335861206055, "learning_rate": 4.337130260129293e-06, "loss": 0.3187, "step": 36052 }, { "epoch": 1.654490385939149, "grad_norm": 0.4801841676235199, "learning_rate": 4.336887237264205e-06, "loss": 0.3263, "step": 36053 }, { "epoch": 1.6545362764444036, "grad_norm": 0.4341984689235687, "learning_rate": 4.336644215993695e-06, "loss": 0.274, "step": 36054 }, { "epoch": 1.654582166949658, "grad_norm": 0.5016672015190125, "learning_rate": 4.3364011963183465e-06, "loss": 0.4242, "step": 36055 }, { "epoch": 1.6546280574549126, "grad_norm": 0.45307016372680664, "learning_rate": 4.336158178238744e-06, "loss": 0.3075, "step": 36056 }, { "epoch": 1.654673947960167, "grad_norm": 0.40461796522140503, "learning_rate": 4.335915161755469e-06, "loss": 0.2756, "step": 36057 }, { "epoch": 1.6547198384654216, "grad_norm": 0.47295016050338745, "learning_rate": 4.335672146869111e-06, "loss": 0.3766, "step": 36058 }, { "epoch": 1.654765728970676, "grad_norm": 0.7599577903747559, "learning_rate": 4.335429133580252e-06, "loss": 0.4031, "step": 36059 }, { "epoch": 1.6548116194759306, "grad_norm": 0.5119338631629944, "learning_rate": 4.335186121889473e-06, "loss": 0.4259, "step": 36060 }, { "epoch": 1.6548575099811849, "grad_norm": 0.4601072072982788, "learning_rate": 4.334943111797364e-06, "loss": 0.3298, "step": 36061 }, { "epoch": 1.6549034004864394, "grad_norm": 0.4406202435493469, "learning_rate": 4.334700103304508e-06, "loss": 0.2869, "step": 36062 }, { "epoch": 1.6549492909916939, "grad_norm": 0.46977996826171875, "learning_rate": 4.334457096411486e-06, "loss": 0.3142, "step": 36063 }, { "epoch": 1.6549951814969481, "grad_norm": 0.46411871910095215, "learning_rate": 4.334214091118886e-06, "loss": 0.3385, "step": 36064 }, { "epoch": 1.6550410720022026, "grad_norm": 0.6432812213897705, "learning_rate": 4.333971087427291e-06, "loss": 0.477, "step": 36065 }, { "epoch": 1.6550869625074571, "grad_norm": 0.43838033080101013, "learning_rate": 4.333728085337283e-06, "loss": 0.3091, "step": 36066 }, { "epoch": 1.6551328530127116, "grad_norm": 0.4661412537097931, "learning_rate": 4.333485084849451e-06, "loss": 0.3612, "step": 36067 }, { "epoch": 1.6551787435179661, "grad_norm": 0.4695759117603302, "learning_rate": 4.333242085964378e-06, "loss": 0.3653, "step": 36068 }, { "epoch": 1.6552246340232206, "grad_norm": 0.4891836941242218, "learning_rate": 4.332999088682644e-06, "loss": 0.3873, "step": 36069 }, { "epoch": 1.6552705245284751, "grad_norm": 0.4200678765773773, "learning_rate": 4.332756093004839e-06, "loss": 0.268, "step": 36070 }, { "epoch": 1.6553164150337296, "grad_norm": 0.47114959359169006, "learning_rate": 4.332513098931546e-06, "loss": 0.3422, "step": 36071 }, { "epoch": 1.6553623055389841, "grad_norm": 0.456096887588501, "learning_rate": 4.332270106463346e-06, "loss": 0.3206, "step": 36072 }, { "epoch": 1.6554081960442386, "grad_norm": 0.44225654006004333, "learning_rate": 4.332027115600827e-06, "loss": 0.3111, "step": 36073 }, { "epoch": 1.6554540865494929, "grad_norm": 0.4714425206184387, "learning_rate": 4.331784126344572e-06, "loss": 0.3094, "step": 36074 }, { "epoch": 1.6554999770547474, "grad_norm": 0.4704779088497162, "learning_rate": 4.331541138695163e-06, "loss": 0.3913, "step": 36075 }, { "epoch": 1.6555458675600019, "grad_norm": 0.524785578250885, "learning_rate": 4.331298152653189e-06, "loss": 0.4411, "step": 36076 }, { "epoch": 1.6555917580652562, "grad_norm": 0.4599447250366211, "learning_rate": 4.3310551682192315e-06, "loss": 0.3369, "step": 36077 }, { "epoch": 1.6556376485705107, "grad_norm": 0.46362996101379395, "learning_rate": 4.330812185393874e-06, "loss": 0.3549, "step": 36078 }, { "epoch": 1.6556835390757652, "grad_norm": 0.4824897050857544, "learning_rate": 4.330569204177704e-06, "loss": 0.39, "step": 36079 }, { "epoch": 1.6557294295810197, "grad_norm": 0.5028586983680725, "learning_rate": 4.3303262245713035e-06, "loss": 0.462, "step": 36080 }, { "epoch": 1.6557753200862741, "grad_norm": 0.42033007740974426, "learning_rate": 4.330083246575255e-06, "loss": 0.2855, "step": 36081 }, { "epoch": 1.6558212105915286, "grad_norm": 0.44922661781311035, "learning_rate": 4.329840270190146e-06, "loss": 0.3485, "step": 36082 }, { "epoch": 1.6558671010967831, "grad_norm": 0.47912341356277466, "learning_rate": 4.329597295416561e-06, "loss": 0.3547, "step": 36083 }, { "epoch": 1.6559129916020376, "grad_norm": 0.45944979786872864, "learning_rate": 4.32935432225508e-06, "loss": 0.3054, "step": 36084 }, { "epoch": 1.6559588821072921, "grad_norm": 0.4869399964809418, "learning_rate": 4.329111350706293e-06, "loss": 0.3863, "step": 36085 }, { "epoch": 1.6560047726125464, "grad_norm": 0.4809008538722992, "learning_rate": 4.328868380770781e-06, "loss": 0.3497, "step": 36086 }, { "epoch": 1.656050663117801, "grad_norm": 0.45028677582740784, "learning_rate": 4.328625412449126e-06, "loss": 0.3225, "step": 36087 }, { "epoch": 1.6560965536230554, "grad_norm": 0.45062077045440674, "learning_rate": 4.328382445741918e-06, "loss": 0.3199, "step": 36088 }, { "epoch": 1.6561424441283097, "grad_norm": 0.45899972319602966, "learning_rate": 4.328139480649738e-06, "loss": 0.3479, "step": 36089 }, { "epoch": 1.6561883346335642, "grad_norm": 0.4497840106487274, "learning_rate": 4.327896517173172e-06, "loss": 0.3516, "step": 36090 }, { "epoch": 1.6562342251388187, "grad_norm": 0.4907016456127167, "learning_rate": 4.3276535553127985e-06, "loss": 0.4042, "step": 36091 }, { "epoch": 1.6562801156440732, "grad_norm": 0.47974255681037903, "learning_rate": 4.327410595069209e-06, "loss": 0.4061, "step": 36092 }, { "epoch": 1.6563260061493277, "grad_norm": 0.4152029752731323, "learning_rate": 4.3271676364429844e-06, "loss": 0.271, "step": 36093 }, { "epoch": 1.6563718966545822, "grad_norm": 0.5030942559242249, "learning_rate": 4.326924679434708e-06, "loss": 0.358, "step": 36094 }, { "epoch": 1.6564177871598367, "grad_norm": 0.46497970819473267, "learning_rate": 4.326681724044966e-06, "loss": 0.3338, "step": 36095 }, { "epoch": 1.6564636776650912, "grad_norm": 0.5144133567810059, "learning_rate": 4.326438770274343e-06, "loss": 0.4023, "step": 36096 }, { "epoch": 1.6565095681703457, "grad_norm": 0.49707159399986267, "learning_rate": 4.3261958181234195e-06, "loss": 0.4081, "step": 36097 }, { "epoch": 1.6565554586756002, "grad_norm": 0.48274490237236023, "learning_rate": 4.325952867592785e-06, "loss": 0.3898, "step": 36098 }, { "epoch": 1.6566013491808544, "grad_norm": 0.4326018989086151, "learning_rate": 4.32570991868302e-06, "loss": 0.2839, "step": 36099 }, { "epoch": 1.656647239686109, "grad_norm": 0.4729047119617462, "learning_rate": 4.32546697139471e-06, "loss": 0.3725, "step": 36100 }, { "epoch": 1.6566931301913634, "grad_norm": 0.4617598056793213, "learning_rate": 4.32522402572844e-06, "loss": 0.3688, "step": 36101 }, { "epoch": 1.6567390206966177, "grad_norm": 0.5105594992637634, "learning_rate": 4.324981081684793e-06, "loss": 0.3889, "step": 36102 }, { "epoch": 1.6567849112018722, "grad_norm": 0.4739111065864563, "learning_rate": 4.32473813926435e-06, "loss": 0.3469, "step": 36103 }, { "epoch": 1.6568308017071267, "grad_norm": 0.48888498544692993, "learning_rate": 4.324495198467702e-06, "loss": 0.3535, "step": 36104 }, { "epoch": 1.6568766922123812, "grad_norm": 0.4776265621185303, "learning_rate": 4.324252259295431e-06, "loss": 0.3261, "step": 36105 }, { "epoch": 1.6569225827176357, "grad_norm": 0.4853723347187042, "learning_rate": 4.324009321748117e-06, "loss": 0.3499, "step": 36106 }, { "epoch": 1.6569684732228902, "grad_norm": 0.4828360974788666, "learning_rate": 4.323766385826349e-06, "loss": 0.3837, "step": 36107 }, { "epoch": 1.6570143637281447, "grad_norm": 0.44398459792137146, "learning_rate": 4.32352345153071e-06, "loss": 0.3195, "step": 36108 }, { "epoch": 1.6570602542333992, "grad_norm": 0.44199803471565247, "learning_rate": 4.323280518861781e-06, "loss": 0.3167, "step": 36109 }, { "epoch": 1.6571061447386537, "grad_norm": 0.4572855830192566, "learning_rate": 4.323037587820151e-06, "loss": 0.3762, "step": 36110 }, { "epoch": 1.6571520352439082, "grad_norm": 0.48942917585372925, "learning_rate": 4.3227946584064016e-06, "loss": 0.3846, "step": 36111 }, { "epoch": 1.6571979257491625, "grad_norm": 0.4529446065425873, "learning_rate": 4.3225517306211165e-06, "loss": 0.3138, "step": 36112 }, { "epoch": 1.657243816254417, "grad_norm": 0.4470978081226349, "learning_rate": 4.322308804464882e-06, "loss": 0.3151, "step": 36113 }, { "epoch": 1.6572897067596715, "grad_norm": 0.5017333030700684, "learning_rate": 4.3220658799382805e-06, "loss": 0.4201, "step": 36114 }, { "epoch": 1.6573355972649257, "grad_norm": 0.4790981411933899, "learning_rate": 4.321822957041896e-06, "loss": 0.3667, "step": 36115 }, { "epoch": 1.6573814877701802, "grad_norm": 0.491029292345047, "learning_rate": 4.321580035776314e-06, "loss": 0.3782, "step": 36116 }, { "epoch": 1.6574273782754347, "grad_norm": 0.5177568793296814, "learning_rate": 4.321337116142118e-06, "loss": 0.4502, "step": 36117 }, { "epoch": 1.6574732687806892, "grad_norm": 0.6120824813842773, "learning_rate": 4.321094198139889e-06, "loss": 0.3858, "step": 36118 }, { "epoch": 1.6575191592859437, "grad_norm": 0.46056845784187317, "learning_rate": 4.320851281770217e-06, "loss": 0.3209, "step": 36119 }, { "epoch": 1.6575650497911982, "grad_norm": 0.45297276973724365, "learning_rate": 4.320608367033684e-06, "loss": 0.3242, "step": 36120 }, { "epoch": 1.6576109402964527, "grad_norm": 0.4537056088447571, "learning_rate": 4.320365453930872e-06, "loss": 0.3665, "step": 36121 }, { "epoch": 1.6576568308017072, "grad_norm": 0.4922426640987396, "learning_rate": 4.3201225424623675e-06, "loss": 0.3313, "step": 36122 }, { "epoch": 1.6577027213069617, "grad_norm": 0.4928635358810425, "learning_rate": 4.319879632628753e-06, "loss": 0.3933, "step": 36123 }, { "epoch": 1.657748611812216, "grad_norm": 0.45508816838264465, "learning_rate": 4.3196367244306145e-06, "loss": 0.3112, "step": 36124 }, { "epoch": 1.6577945023174705, "grad_norm": 0.4624754786491394, "learning_rate": 4.319393817868533e-06, "loss": 0.3378, "step": 36125 }, { "epoch": 1.657840392822725, "grad_norm": 0.4666912853717804, "learning_rate": 4.319150912943096e-06, "loss": 0.3615, "step": 36126 }, { "epoch": 1.6578862833279795, "grad_norm": 0.4899422824382782, "learning_rate": 4.318908009654885e-06, "loss": 0.3428, "step": 36127 }, { "epoch": 1.6579321738332338, "grad_norm": 0.49616503715515137, "learning_rate": 4.318665108004486e-06, "loss": 0.4475, "step": 36128 }, { "epoch": 1.6579780643384883, "grad_norm": 0.44999024271965027, "learning_rate": 4.3184222079924835e-06, "loss": 0.3114, "step": 36129 }, { "epoch": 1.6580239548437428, "grad_norm": 0.45895087718963623, "learning_rate": 4.318179309619459e-06, "loss": 0.3611, "step": 36130 }, { "epoch": 1.6580698453489973, "grad_norm": 0.4932117760181427, "learning_rate": 4.317936412885997e-06, "loss": 0.3907, "step": 36131 }, { "epoch": 1.6581157358542518, "grad_norm": 0.4635425806045532, "learning_rate": 4.317693517792685e-06, "loss": 0.3398, "step": 36132 }, { "epoch": 1.6581616263595063, "grad_norm": 0.46430590748786926, "learning_rate": 4.317450624340103e-06, "loss": 0.4197, "step": 36133 }, { "epoch": 1.6582075168647608, "grad_norm": 0.42157426476478577, "learning_rate": 4.317207732528838e-06, "loss": 0.329, "step": 36134 }, { "epoch": 1.6582534073700153, "grad_norm": 0.44508829712867737, "learning_rate": 4.316964842359473e-06, "loss": 0.3118, "step": 36135 }, { "epoch": 1.6582992978752698, "grad_norm": 0.4645518660545349, "learning_rate": 4.316721953832592e-06, "loss": 0.3519, "step": 36136 }, { "epoch": 1.658345188380524, "grad_norm": 0.4577440619468689, "learning_rate": 4.316479066948778e-06, "loss": 0.3271, "step": 36137 }, { "epoch": 1.6583910788857785, "grad_norm": 0.4876921474933624, "learning_rate": 4.316236181708618e-06, "loss": 0.3933, "step": 36138 }, { "epoch": 1.658436969391033, "grad_norm": 0.4513891339302063, "learning_rate": 4.3159932981126925e-06, "loss": 0.3452, "step": 36139 }, { "epoch": 1.6584828598962873, "grad_norm": 0.46797114610671997, "learning_rate": 4.315750416161586e-06, "loss": 0.3789, "step": 36140 }, { "epoch": 1.6585287504015418, "grad_norm": 0.541668176651001, "learning_rate": 4.3155075358558865e-06, "loss": 0.5096, "step": 36141 }, { "epoch": 1.6585746409067963, "grad_norm": 0.48768866062164307, "learning_rate": 4.3152646571961745e-06, "loss": 0.3839, "step": 36142 }, { "epoch": 1.6586205314120508, "grad_norm": 0.482728511095047, "learning_rate": 4.3150217801830345e-06, "loss": 0.3538, "step": 36143 }, { "epoch": 1.6586664219173053, "grad_norm": 0.4559321701526642, "learning_rate": 4.314778904817052e-06, "loss": 0.3357, "step": 36144 }, { "epoch": 1.6587123124225598, "grad_norm": 0.48633721470832825, "learning_rate": 4.314536031098809e-06, "loss": 0.3972, "step": 36145 }, { "epoch": 1.6587582029278143, "grad_norm": 0.4220765233039856, "learning_rate": 4.314293159028889e-06, "loss": 0.288, "step": 36146 }, { "epoch": 1.6588040934330688, "grad_norm": 0.49434608221054077, "learning_rate": 4.314050288607879e-06, "loss": 0.3514, "step": 36147 }, { "epoch": 1.6588499839383233, "grad_norm": 0.4648244380950928, "learning_rate": 4.313807419836363e-06, "loss": 0.3464, "step": 36148 }, { "epoch": 1.6588958744435778, "grad_norm": 0.45056143403053284, "learning_rate": 4.313564552714921e-06, "loss": 0.3098, "step": 36149 }, { "epoch": 1.658941764948832, "grad_norm": 0.5130603313446045, "learning_rate": 4.313321687244142e-06, "loss": 0.426, "step": 36150 }, { "epoch": 1.6589876554540866, "grad_norm": 0.48223114013671875, "learning_rate": 4.313078823424607e-06, "loss": 0.3467, "step": 36151 }, { "epoch": 1.659033545959341, "grad_norm": 0.4116940498352051, "learning_rate": 4.312835961256898e-06, "loss": 0.2823, "step": 36152 }, { "epoch": 1.6590794364645953, "grad_norm": 0.4911724030971527, "learning_rate": 4.312593100741604e-06, "loss": 0.4004, "step": 36153 }, { "epoch": 1.6591253269698498, "grad_norm": 0.4769153594970703, "learning_rate": 4.312350241879307e-06, "loss": 0.3514, "step": 36154 }, { "epoch": 1.6591712174751043, "grad_norm": 0.4908784329891205, "learning_rate": 4.31210738467059e-06, "loss": 0.3687, "step": 36155 }, { "epoch": 1.6592171079803588, "grad_norm": 0.4965973198413849, "learning_rate": 4.311864529116038e-06, "loss": 0.3667, "step": 36156 }, { "epoch": 1.6592629984856133, "grad_norm": 0.4554583430290222, "learning_rate": 4.311621675216235e-06, "loss": 0.3192, "step": 36157 }, { "epoch": 1.6593088889908678, "grad_norm": 0.44832873344421387, "learning_rate": 4.311378822971765e-06, "loss": 0.3497, "step": 36158 }, { "epoch": 1.6593547794961223, "grad_norm": 0.5041817426681519, "learning_rate": 4.311135972383209e-06, "loss": 0.3883, "step": 36159 }, { "epoch": 1.6594006700013768, "grad_norm": 0.44811132550239563, "learning_rate": 4.310893123451156e-06, "loss": 0.3295, "step": 36160 }, { "epoch": 1.6594465605066313, "grad_norm": 0.47344714403152466, "learning_rate": 4.310650276176187e-06, "loss": 0.3631, "step": 36161 }, { "epoch": 1.6594924510118858, "grad_norm": 0.46713128685951233, "learning_rate": 4.3104074305588845e-06, "loss": 0.3661, "step": 36162 }, { "epoch": 1.65953834151714, "grad_norm": 0.4695574641227722, "learning_rate": 4.3101645865998364e-06, "loss": 0.3791, "step": 36163 }, { "epoch": 1.6595842320223946, "grad_norm": 0.441419780254364, "learning_rate": 4.309921744299625e-06, "loss": 0.2945, "step": 36164 }, { "epoch": 1.659630122527649, "grad_norm": 0.49451547861099243, "learning_rate": 4.309678903658833e-06, "loss": 0.4572, "step": 36165 }, { "epoch": 1.6596760130329034, "grad_norm": 0.5003684163093567, "learning_rate": 4.309436064678047e-06, "loss": 0.3841, "step": 36166 }, { "epoch": 1.6597219035381579, "grad_norm": 0.49283352494239807, "learning_rate": 4.309193227357849e-06, "loss": 0.3539, "step": 36167 }, { "epoch": 1.6597677940434123, "grad_norm": 0.45494237542152405, "learning_rate": 4.30895039169882e-06, "loss": 0.3574, "step": 36168 }, { "epoch": 1.6598136845486668, "grad_norm": 0.4487442672252655, "learning_rate": 4.30870755770155e-06, "loss": 0.3281, "step": 36169 }, { "epoch": 1.6598595750539213, "grad_norm": 0.4158509075641632, "learning_rate": 4.308464725366621e-06, "loss": 0.2899, "step": 36170 }, { "epoch": 1.6599054655591758, "grad_norm": 0.5776636004447937, "learning_rate": 4.308221894694614e-06, "loss": 0.3803, "step": 36171 }, { "epoch": 1.6599513560644303, "grad_norm": 0.47545215487480164, "learning_rate": 4.307979065686117e-06, "loss": 0.3867, "step": 36172 }, { "epoch": 1.6599972465696848, "grad_norm": 0.4764041006565094, "learning_rate": 4.307736238341711e-06, "loss": 0.3773, "step": 36173 }, { "epoch": 1.6600431370749393, "grad_norm": 0.4419650137424469, "learning_rate": 4.307493412661979e-06, "loss": 0.3482, "step": 36174 }, { "epoch": 1.6600890275801936, "grad_norm": 0.5252270698547363, "learning_rate": 4.3072505886475086e-06, "loss": 0.3428, "step": 36175 }, { "epoch": 1.660134918085448, "grad_norm": 0.4509217441082001, "learning_rate": 4.307007766298882e-06, "loss": 0.3138, "step": 36176 }, { "epoch": 1.6601808085907026, "grad_norm": 0.4895867109298706, "learning_rate": 4.3067649456166825e-06, "loss": 0.4179, "step": 36177 }, { "epoch": 1.6602266990959569, "grad_norm": 0.48821520805358887, "learning_rate": 4.306522126601495e-06, "loss": 0.3845, "step": 36178 }, { "epoch": 1.6602725896012114, "grad_norm": 0.5249105095863342, "learning_rate": 4.306279309253903e-06, "loss": 0.4358, "step": 36179 }, { "epoch": 1.6603184801064659, "grad_norm": 0.4857627749443054, "learning_rate": 4.306036493574489e-06, "loss": 0.4024, "step": 36180 }, { "epoch": 1.6603643706117204, "grad_norm": 0.4672257602214813, "learning_rate": 4.305793679563841e-06, "loss": 0.3368, "step": 36181 }, { "epoch": 1.6604102611169749, "grad_norm": 0.46526116132736206, "learning_rate": 4.305550867222539e-06, "loss": 0.3339, "step": 36182 }, { "epoch": 1.6604561516222294, "grad_norm": 0.4322390854358673, "learning_rate": 4.305308056551165e-06, "loss": 0.2941, "step": 36183 }, { "epoch": 1.6605020421274839, "grad_norm": 0.4829897880554199, "learning_rate": 4.305065247550309e-06, "loss": 0.3522, "step": 36184 }, { "epoch": 1.6605479326327384, "grad_norm": 0.47197550535202026, "learning_rate": 4.304822440220552e-06, "loss": 0.354, "step": 36185 }, { "epoch": 1.6605938231379929, "grad_norm": 0.4809637665748596, "learning_rate": 4.304579634562477e-06, "loss": 0.4257, "step": 36186 }, { "epoch": 1.6606397136432474, "grad_norm": 0.46118879318237305, "learning_rate": 4.304336830576667e-06, "loss": 0.3149, "step": 36187 }, { "epoch": 1.6606856041485016, "grad_norm": 0.4638432264328003, "learning_rate": 4.304094028263709e-06, "loss": 0.3533, "step": 36188 }, { "epoch": 1.6607314946537561, "grad_norm": 0.4894821345806122, "learning_rate": 4.303851227624186e-06, "loss": 0.413, "step": 36189 }, { "epoch": 1.6607773851590106, "grad_norm": 0.5373408794403076, "learning_rate": 4.303608428658678e-06, "loss": 0.4382, "step": 36190 }, { "epoch": 1.660823275664265, "grad_norm": 0.4608858823776245, "learning_rate": 4.303365631367774e-06, "loss": 0.3224, "step": 36191 }, { "epoch": 1.6608691661695194, "grad_norm": 0.42793843150138855, "learning_rate": 4.303122835752056e-06, "loss": 0.2998, "step": 36192 }, { "epoch": 1.660915056674774, "grad_norm": 0.45485982298851013, "learning_rate": 4.302880041812106e-06, "loss": 0.3379, "step": 36193 }, { "epoch": 1.6609609471800284, "grad_norm": 0.45809122920036316, "learning_rate": 4.3026372495485115e-06, "loss": 0.3262, "step": 36194 }, { "epoch": 1.661006837685283, "grad_norm": 0.46151870489120483, "learning_rate": 4.302394458961854e-06, "loss": 0.3288, "step": 36195 }, { "epoch": 1.6610527281905374, "grad_norm": 0.5126960277557373, "learning_rate": 4.302151670052715e-06, "loss": 0.4593, "step": 36196 }, { "epoch": 1.661098618695792, "grad_norm": 0.46597525477409363, "learning_rate": 4.301908882821684e-06, "loss": 0.374, "step": 36197 }, { "epoch": 1.6611445092010464, "grad_norm": 0.49306201934814453, "learning_rate": 4.301666097269341e-06, "loss": 0.3846, "step": 36198 }, { "epoch": 1.661190399706301, "grad_norm": 0.47155600786209106, "learning_rate": 4.30142331339627e-06, "loss": 0.3528, "step": 36199 }, { "epoch": 1.6612362902115554, "grad_norm": 0.4647429883480072, "learning_rate": 4.301180531203057e-06, "loss": 0.3726, "step": 36200 }, { "epoch": 1.6612821807168097, "grad_norm": 0.4472130835056305, "learning_rate": 4.300937750690284e-06, "loss": 0.34, "step": 36201 }, { "epoch": 1.6613280712220642, "grad_norm": 0.4980207085609436, "learning_rate": 4.300694971858532e-06, "loss": 0.3967, "step": 36202 }, { "epoch": 1.6613739617273187, "grad_norm": 0.5025799870491028, "learning_rate": 4.300452194708392e-06, "loss": 0.4018, "step": 36203 }, { "epoch": 1.661419852232573, "grad_norm": 0.4571799635887146, "learning_rate": 4.3002094192404425e-06, "loss": 0.3503, "step": 36204 }, { "epoch": 1.6614657427378274, "grad_norm": 0.4764159619808197, "learning_rate": 4.299966645455266e-06, "loss": 0.3489, "step": 36205 }, { "epoch": 1.661511633243082, "grad_norm": 0.5104197263717651, "learning_rate": 4.299723873353451e-06, "loss": 0.3898, "step": 36206 }, { "epoch": 1.6615575237483364, "grad_norm": 0.45109322667121887, "learning_rate": 4.29948110293558e-06, "loss": 0.3726, "step": 36207 }, { "epoch": 1.661603414253591, "grad_norm": 0.4569368064403534, "learning_rate": 4.299238334202234e-06, "loss": 0.3392, "step": 36208 }, { "epoch": 1.6616493047588454, "grad_norm": 0.44139808416366577, "learning_rate": 4.2989955671539994e-06, "loss": 0.3122, "step": 36209 }, { "epoch": 1.6616951952641, "grad_norm": 0.45661434531211853, "learning_rate": 4.2987528017914596e-06, "loss": 0.3131, "step": 36210 }, { "epoch": 1.6617410857693544, "grad_norm": 0.47844335436820984, "learning_rate": 4.298510038115195e-06, "loss": 0.3591, "step": 36211 }, { "epoch": 1.661786976274609, "grad_norm": 0.4727175831794739, "learning_rate": 4.298267276125796e-06, "loss": 0.3984, "step": 36212 }, { "epoch": 1.6618328667798632, "grad_norm": 0.5041042566299438, "learning_rate": 4.298024515823842e-06, "loss": 0.3878, "step": 36213 }, { "epoch": 1.6618787572851177, "grad_norm": 0.4534107744693756, "learning_rate": 4.297781757209917e-06, "loss": 0.3335, "step": 36214 }, { "epoch": 1.6619246477903722, "grad_norm": 0.47263941168785095, "learning_rate": 4.2975390002846056e-06, "loss": 0.3751, "step": 36215 }, { "epoch": 1.6619705382956267, "grad_norm": 0.46041783690452576, "learning_rate": 4.297296245048491e-06, "loss": 0.3647, "step": 36216 }, { "epoch": 1.662016428800881, "grad_norm": 0.5919760465621948, "learning_rate": 4.297053491502156e-06, "loss": 0.3277, "step": 36217 }, { "epoch": 1.6620623193061355, "grad_norm": 0.4689157009124756, "learning_rate": 4.296810739646186e-06, "loss": 0.3828, "step": 36218 }, { "epoch": 1.66210820981139, "grad_norm": 0.49491897225379944, "learning_rate": 4.296567989481166e-06, "loss": 0.4025, "step": 36219 }, { "epoch": 1.6621541003166445, "grad_norm": 0.4740844964981079, "learning_rate": 4.296325241007677e-06, "loss": 0.3683, "step": 36220 }, { "epoch": 1.662199990821899, "grad_norm": 0.5126505494117737, "learning_rate": 4.296082494226303e-06, "loss": 0.3932, "step": 36221 }, { "epoch": 1.6622458813271535, "grad_norm": 0.45973503589630127, "learning_rate": 4.29583974913763e-06, "loss": 0.333, "step": 36222 }, { "epoch": 1.662291771832408, "grad_norm": 0.46231767535209656, "learning_rate": 4.295597005742239e-06, "loss": 0.3485, "step": 36223 }, { "epoch": 1.6623376623376624, "grad_norm": 0.4517453908920288, "learning_rate": 4.295354264040713e-06, "loss": 0.3204, "step": 36224 }, { "epoch": 1.662383552842917, "grad_norm": 0.4346649646759033, "learning_rate": 4.295111524033641e-06, "loss": 0.2728, "step": 36225 }, { "epoch": 1.6624294433481712, "grad_norm": 0.48042160272598267, "learning_rate": 4.294868785721603e-06, "loss": 0.3267, "step": 36226 }, { "epoch": 1.6624753338534257, "grad_norm": 0.5077573657035828, "learning_rate": 4.29462604910518e-06, "loss": 0.3318, "step": 36227 }, { "epoch": 1.6625212243586802, "grad_norm": 0.43770632147789, "learning_rate": 4.294383314184961e-06, "loss": 0.304, "step": 36228 }, { "epoch": 1.6625671148639345, "grad_norm": 0.4767512381076813, "learning_rate": 4.294140580961528e-06, "loss": 0.3989, "step": 36229 }, { "epoch": 1.662613005369189, "grad_norm": 0.46568331122398376, "learning_rate": 4.293897849435462e-06, "loss": 0.3664, "step": 36230 }, { "epoch": 1.6626588958744435, "grad_norm": 0.4980507493019104, "learning_rate": 4.293655119607351e-06, "loss": 0.3803, "step": 36231 }, { "epoch": 1.662704786379698, "grad_norm": 0.4911334812641144, "learning_rate": 4.293412391477776e-06, "loss": 0.3696, "step": 36232 }, { "epoch": 1.6627506768849525, "grad_norm": 0.4816778898239136, "learning_rate": 4.293169665047319e-06, "loss": 0.3095, "step": 36233 }, { "epoch": 1.662796567390207, "grad_norm": 0.45551568269729614, "learning_rate": 4.292926940316568e-06, "loss": 0.3765, "step": 36234 }, { "epoch": 1.6628424578954615, "grad_norm": 0.4306444823741913, "learning_rate": 4.292684217286105e-06, "loss": 0.2949, "step": 36235 }, { "epoch": 1.662888348400716, "grad_norm": 0.4980430006980896, "learning_rate": 4.292441495956511e-06, "loss": 0.4266, "step": 36236 }, { "epoch": 1.6629342389059705, "grad_norm": 0.4745534360408783, "learning_rate": 4.292198776328374e-06, "loss": 0.3737, "step": 36237 }, { "epoch": 1.662980129411225, "grad_norm": 0.5083572864532471, "learning_rate": 4.291956058402276e-06, "loss": 0.3724, "step": 36238 }, { "epoch": 1.6630260199164792, "grad_norm": 0.46010252833366394, "learning_rate": 4.291713342178797e-06, "loss": 0.3415, "step": 36239 }, { "epoch": 1.6630719104217337, "grad_norm": 0.4652763307094574, "learning_rate": 4.291470627658526e-06, "loss": 0.3795, "step": 36240 }, { "epoch": 1.6631178009269882, "grad_norm": 0.48567676544189453, "learning_rate": 4.291227914842045e-06, "loss": 0.3609, "step": 36241 }, { "epoch": 1.6631636914322425, "grad_norm": 0.4545653760433197, "learning_rate": 4.290985203729936e-06, "loss": 0.3113, "step": 36242 }, { "epoch": 1.663209581937497, "grad_norm": 0.453704297542572, "learning_rate": 4.290742494322785e-06, "loss": 0.3219, "step": 36243 }, { "epoch": 1.6632554724427515, "grad_norm": 0.4520237445831299, "learning_rate": 4.290499786621174e-06, "loss": 0.348, "step": 36244 }, { "epoch": 1.663301362948006, "grad_norm": 0.484774112701416, "learning_rate": 4.290257080625686e-06, "loss": 0.3431, "step": 36245 }, { "epoch": 1.6633472534532605, "grad_norm": 0.47661012411117554, "learning_rate": 4.2900143763369066e-06, "loss": 0.3929, "step": 36246 }, { "epoch": 1.663393143958515, "grad_norm": 0.4306141436100006, "learning_rate": 4.2897716737554204e-06, "loss": 0.3201, "step": 36247 }, { "epoch": 1.6634390344637695, "grad_norm": 0.48564422130584717, "learning_rate": 4.289528972881806e-06, "loss": 0.3521, "step": 36248 }, { "epoch": 1.663484924969024, "grad_norm": 0.48704737424850464, "learning_rate": 4.289286273716652e-06, "loss": 0.3803, "step": 36249 }, { "epoch": 1.6635308154742785, "grad_norm": 0.5362771153450012, "learning_rate": 4.28904357626054e-06, "loss": 0.4631, "step": 36250 }, { "epoch": 1.663576705979533, "grad_norm": 0.47447440028190613, "learning_rate": 4.288800880514053e-06, "loss": 0.411, "step": 36251 }, { "epoch": 1.6636225964847873, "grad_norm": 0.4667125642299652, "learning_rate": 4.288558186477776e-06, "loss": 0.4023, "step": 36252 }, { "epoch": 1.6636684869900418, "grad_norm": 0.4606325328350067, "learning_rate": 4.288315494152293e-06, "loss": 0.3004, "step": 36253 }, { "epoch": 1.6637143774952963, "grad_norm": 0.45616137981414795, "learning_rate": 4.2880728035381855e-06, "loss": 0.3438, "step": 36254 }, { "epoch": 1.6637602680005505, "grad_norm": 0.47695672512054443, "learning_rate": 4.287830114636037e-06, "loss": 0.4234, "step": 36255 }, { "epoch": 1.663806158505805, "grad_norm": 0.4499322175979614, "learning_rate": 4.287587427446434e-06, "loss": 0.3426, "step": 36256 }, { "epoch": 1.6638520490110595, "grad_norm": 0.5467880368232727, "learning_rate": 4.287344741969959e-06, "loss": 0.494, "step": 36257 }, { "epoch": 1.663897939516314, "grad_norm": 0.47830554842948914, "learning_rate": 4.287102058207193e-06, "loss": 0.3596, "step": 36258 }, { "epoch": 1.6639438300215685, "grad_norm": 0.49418142437934875, "learning_rate": 4.286859376158723e-06, "loss": 0.3798, "step": 36259 }, { "epoch": 1.663989720526823, "grad_norm": 0.4680376350879669, "learning_rate": 4.286616695825132e-06, "loss": 0.3375, "step": 36260 }, { "epoch": 1.6640356110320775, "grad_norm": 0.4352279603481293, "learning_rate": 4.286374017207e-06, "loss": 0.344, "step": 36261 }, { "epoch": 1.664081501537332, "grad_norm": 0.45345935225486755, "learning_rate": 4.286131340304914e-06, "loss": 0.3184, "step": 36262 }, { "epoch": 1.6641273920425865, "grad_norm": 0.4989250600337982, "learning_rate": 4.2858886651194585e-06, "loss": 0.3871, "step": 36263 }, { "epoch": 1.6641732825478408, "grad_norm": 0.46464794874191284, "learning_rate": 4.285645991651214e-06, "loss": 0.303, "step": 36264 }, { "epoch": 1.6642191730530953, "grad_norm": 0.48235201835632324, "learning_rate": 4.285403319900766e-06, "loss": 0.3663, "step": 36265 }, { "epoch": 1.6642650635583498, "grad_norm": 0.45667579770088196, "learning_rate": 4.285160649868698e-06, "loss": 0.3623, "step": 36266 }, { "epoch": 1.664310954063604, "grad_norm": 0.4917313754558563, "learning_rate": 4.284917981555591e-06, "loss": 0.4021, "step": 36267 }, { "epoch": 1.6643568445688586, "grad_norm": 0.47283604741096497, "learning_rate": 4.284675314962032e-06, "loss": 0.3846, "step": 36268 }, { "epoch": 1.664402735074113, "grad_norm": 0.4974950850009918, "learning_rate": 4.284432650088604e-06, "loss": 0.394, "step": 36269 }, { "epoch": 1.6644486255793676, "grad_norm": 0.46044453978538513, "learning_rate": 4.284189986935887e-06, "loss": 0.3414, "step": 36270 }, { "epoch": 1.664494516084622, "grad_norm": 0.4786221981048584, "learning_rate": 4.283947325504469e-06, "loss": 0.3834, "step": 36271 }, { "epoch": 1.6645404065898766, "grad_norm": 0.4130963683128357, "learning_rate": 4.283704665794932e-06, "loss": 0.2798, "step": 36272 }, { "epoch": 1.664586297095131, "grad_norm": 0.46508416533470154, "learning_rate": 4.283462007807859e-06, "loss": 0.3562, "step": 36273 }, { "epoch": 1.6646321876003856, "grad_norm": 0.46477949619293213, "learning_rate": 4.283219351543834e-06, "loss": 0.3611, "step": 36274 }, { "epoch": 1.66467807810564, "grad_norm": 0.5179890990257263, "learning_rate": 4.28297669700344e-06, "loss": 0.4346, "step": 36275 }, { "epoch": 1.6647239686108946, "grad_norm": 0.4880692660808563, "learning_rate": 4.282734044187259e-06, "loss": 0.3738, "step": 36276 }, { "epoch": 1.6647698591161488, "grad_norm": 0.4676433503627777, "learning_rate": 4.282491393095878e-06, "loss": 0.3155, "step": 36277 }, { "epoch": 1.6648157496214033, "grad_norm": 0.5174286961555481, "learning_rate": 4.282248743729879e-06, "loss": 0.5062, "step": 36278 }, { "epoch": 1.6648616401266578, "grad_norm": 0.47100773453712463, "learning_rate": 4.282006096089844e-06, "loss": 0.3913, "step": 36279 }, { "epoch": 1.664907530631912, "grad_norm": 0.40979066491127014, "learning_rate": 4.281763450176359e-06, "loss": 0.276, "step": 36280 }, { "epoch": 1.6649534211371666, "grad_norm": 0.5318982601165771, "learning_rate": 4.281520805990006e-06, "loss": 0.4064, "step": 36281 }, { "epoch": 1.664999311642421, "grad_norm": 0.49839577078819275, "learning_rate": 4.281278163531366e-06, "loss": 0.405, "step": 36282 }, { "epoch": 1.6650452021476756, "grad_norm": 0.482532262802124, "learning_rate": 4.281035522801028e-06, "loss": 0.3993, "step": 36283 }, { "epoch": 1.66509109265293, "grad_norm": 0.49582916498184204, "learning_rate": 4.280792883799572e-06, "loss": 0.4073, "step": 36284 }, { "epoch": 1.6651369831581846, "grad_norm": 0.4148208200931549, "learning_rate": 4.280550246527584e-06, "loss": 0.2603, "step": 36285 }, { "epoch": 1.665182873663439, "grad_norm": 0.4479190707206726, "learning_rate": 4.280307610985643e-06, "loss": 0.3198, "step": 36286 }, { "epoch": 1.6652287641686936, "grad_norm": 0.6520950198173523, "learning_rate": 4.2800649771743365e-06, "loss": 0.3625, "step": 36287 }, { "epoch": 1.665274654673948, "grad_norm": 0.43715718388557434, "learning_rate": 4.279822345094246e-06, "loss": 0.3138, "step": 36288 }, { "epoch": 1.6653205451792026, "grad_norm": 0.43279600143432617, "learning_rate": 4.2795797147459535e-06, "loss": 0.3124, "step": 36289 }, { "epoch": 1.6653664356844569, "grad_norm": 0.464556485414505, "learning_rate": 4.279337086130047e-06, "loss": 0.3525, "step": 36290 }, { "epoch": 1.6654123261897114, "grad_norm": 0.444024920463562, "learning_rate": 4.279094459247108e-06, "loss": 0.3354, "step": 36291 }, { "epoch": 1.6654582166949659, "grad_norm": 0.4661685526371002, "learning_rate": 4.278851834097717e-06, "loss": 0.3395, "step": 36292 }, { "epoch": 1.6655041072002201, "grad_norm": 0.48338985443115234, "learning_rate": 4.2786092106824605e-06, "loss": 0.3819, "step": 36293 }, { "epoch": 1.6655499977054746, "grad_norm": 0.45614129304885864, "learning_rate": 4.278366589001922e-06, "loss": 0.3138, "step": 36294 }, { "epoch": 1.6655958882107291, "grad_norm": 0.46891483664512634, "learning_rate": 4.278123969056683e-06, "loss": 0.3951, "step": 36295 }, { "epoch": 1.6656417787159836, "grad_norm": 0.46651163697242737, "learning_rate": 4.277881350847329e-06, "loss": 0.3283, "step": 36296 }, { "epoch": 1.6656876692212381, "grad_norm": 0.45361328125, "learning_rate": 4.277638734374442e-06, "loss": 0.3488, "step": 36297 }, { "epoch": 1.6657335597264926, "grad_norm": 0.47035273909568787, "learning_rate": 4.277396119638605e-06, "loss": 0.3747, "step": 36298 }, { "epoch": 1.6657794502317471, "grad_norm": 0.47960934042930603, "learning_rate": 4.2771535066404035e-06, "loss": 0.391, "step": 36299 }, { "epoch": 1.6658253407370016, "grad_norm": 0.5951382517814636, "learning_rate": 4.276910895380419e-06, "loss": 0.3287, "step": 36300 }, { "epoch": 1.6658712312422561, "grad_norm": 0.45369866490364075, "learning_rate": 4.276668285859235e-06, "loss": 0.3608, "step": 36301 }, { "epoch": 1.6659171217475104, "grad_norm": 0.44291356205940247, "learning_rate": 4.276425678077438e-06, "loss": 0.3372, "step": 36302 }, { "epoch": 1.6659630122527649, "grad_norm": 0.5147892236709595, "learning_rate": 4.276183072035607e-06, "loss": 0.4503, "step": 36303 }, { "epoch": 1.6660089027580194, "grad_norm": 0.476492702960968, "learning_rate": 4.275940467734325e-06, "loss": 0.311, "step": 36304 }, { "epoch": 1.6660547932632739, "grad_norm": 0.5348714590072632, "learning_rate": 4.275697865174182e-06, "loss": 0.4501, "step": 36305 }, { "epoch": 1.6661006837685282, "grad_norm": 0.4763276278972626, "learning_rate": 4.275455264355755e-06, "loss": 0.3797, "step": 36306 }, { "epoch": 1.6661465742737827, "grad_norm": 0.4355514347553253, "learning_rate": 4.275212665279629e-06, "loss": 0.3148, "step": 36307 }, { "epoch": 1.6661924647790372, "grad_norm": 0.523844301700592, "learning_rate": 4.274970067946389e-06, "loss": 0.3998, "step": 36308 }, { "epoch": 1.6662383552842917, "grad_norm": 0.46772611141204834, "learning_rate": 4.274727472356617e-06, "loss": 0.3653, "step": 36309 }, { "epoch": 1.6662842457895461, "grad_norm": 0.4746570885181427, "learning_rate": 4.274484878510895e-06, "loss": 0.3743, "step": 36310 }, { "epoch": 1.6663301362948006, "grad_norm": 0.46243345737457275, "learning_rate": 4.274242286409809e-06, "loss": 0.3071, "step": 36311 }, { "epoch": 1.6663760268000551, "grad_norm": 0.4791312515735626, "learning_rate": 4.273999696053941e-06, "loss": 0.3587, "step": 36312 }, { "epoch": 1.6664219173053096, "grad_norm": 0.41274598240852356, "learning_rate": 4.273757107443875e-06, "loss": 0.29, "step": 36313 }, { "epoch": 1.6664678078105641, "grad_norm": 0.4645887017250061, "learning_rate": 4.273514520580194e-06, "loss": 0.3557, "step": 36314 }, { "epoch": 1.6665136983158184, "grad_norm": 0.48121514916419983, "learning_rate": 4.273271935463482e-06, "loss": 0.3544, "step": 36315 }, { "epoch": 1.666559588821073, "grad_norm": 0.4817105829715729, "learning_rate": 4.27302935209432e-06, "loss": 0.4152, "step": 36316 }, { "epoch": 1.6666054793263274, "grad_norm": 0.49046632647514343, "learning_rate": 4.272786770473294e-06, "loss": 0.4726, "step": 36317 }, { "epoch": 1.6666513698315817, "grad_norm": 0.5043453574180603, "learning_rate": 4.2725441906009866e-06, "loss": 0.3956, "step": 36318 }, { "epoch": 1.6666972603368362, "grad_norm": 0.46656617522239685, "learning_rate": 4.272301612477981e-06, "loss": 0.3222, "step": 36319 }, { "epoch": 1.6667431508420907, "grad_norm": 0.45955711603164673, "learning_rate": 4.272059036104857e-06, "loss": 0.3464, "step": 36320 }, { "epoch": 1.6667890413473452, "grad_norm": 0.48475947976112366, "learning_rate": 4.271816461482205e-06, "loss": 0.373, "step": 36321 }, { "epoch": 1.6668349318525997, "grad_norm": 0.4875313341617584, "learning_rate": 4.271573888610603e-06, "loss": 0.3308, "step": 36322 }, { "epoch": 1.6668808223578542, "grad_norm": 0.42728909850120544, "learning_rate": 4.2713313174906354e-06, "loss": 0.3115, "step": 36323 }, { "epoch": 1.6669267128631087, "grad_norm": 0.4502031207084656, "learning_rate": 4.271088748122888e-06, "loss": 0.3179, "step": 36324 }, { "epoch": 1.6669726033683632, "grad_norm": 0.42734870314598083, "learning_rate": 4.270846180507941e-06, "loss": 0.2855, "step": 36325 }, { "epoch": 1.6670184938736177, "grad_norm": 0.44200703501701355, "learning_rate": 4.270603614646378e-06, "loss": 0.3442, "step": 36326 }, { "epoch": 1.6670643843788722, "grad_norm": 0.4745289981365204, "learning_rate": 4.270361050538784e-06, "loss": 0.3119, "step": 36327 }, { "epoch": 1.6671102748841264, "grad_norm": 0.47574561834335327, "learning_rate": 4.270118488185742e-06, "loss": 0.325, "step": 36328 }, { "epoch": 1.667156165389381, "grad_norm": 0.45527952909469604, "learning_rate": 4.269875927587833e-06, "loss": 0.3307, "step": 36329 }, { "epoch": 1.6672020558946354, "grad_norm": 0.4937826693058014, "learning_rate": 4.269633368745643e-06, "loss": 0.3332, "step": 36330 }, { "epoch": 1.6672479463998897, "grad_norm": 0.4832055866718292, "learning_rate": 4.269390811659755e-06, "loss": 0.4036, "step": 36331 }, { "epoch": 1.6672938369051442, "grad_norm": 0.4986433982849121, "learning_rate": 4.269148256330749e-06, "loss": 0.3779, "step": 36332 }, { "epoch": 1.6673397274103987, "grad_norm": 0.49590370059013367, "learning_rate": 4.268905702759213e-06, "loss": 0.4297, "step": 36333 }, { "epoch": 1.6673856179156532, "grad_norm": 0.45368897914886475, "learning_rate": 4.268663150945727e-06, "loss": 0.2966, "step": 36334 }, { "epoch": 1.6674315084209077, "grad_norm": 0.4347810447216034, "learning_rate": 4.268420600890876e-06, "loss": 0.3172, "step": 36335 }, { "epoch": 1.6674773989261622, "grad_norm": 0.4685761332511902, "learning_rate": 4.268178052595244e-06, "loss": 0.3468, "step": 36336 }, { "epoch": 1.6675232894314167, "grad_norm": 0.44975653290748596, "learning_rate": 4.267935506059411e-06, "loss": 0.3183, "step": 36337 }, { "epoch": 1.6675691799366712, "grad_norm": 0.470132052898407, "learning_rate": 4.267692961283961e-06, "loss": 0.3384, "step": 36338 }, { "epoch": 1.6676150704419257, "grad_norm": 0.523624062538147, "learning_rate": 4.267450418269481e-06, "loss": 0.4281, "step": 36339 }, { "epoch": 1.6676609609471802, "grad_norm": 0.4622613787651062, "learning_rate": 4.267207877016549e-06, "loss": 0.3283, "step": 36340 }, { "epoch": 1.6677068514524345, "grad_norm": 0.48685312271118164, "learning_rate": 4.2669653375257516e-06, "loss": 0.3775, "step": 36341 }, { "epoch": 1.667752741957689, "grad_norm": 0.49793750047683716, "learning_rate": 4.266722799797672e-06, "loss": 0.3989, "step": 36342 }, { "epoch": 1.6677986324629435, "grad_norm": 0.4666235148906708, "learning_rate": 4.266480263832892e-06, "loss": 0.3266, "step": 36343 }, { "epoch": 1.6678445229681977, "grad_norm": 0.4603443741798401, "learning_rate": 4.266237729631995e-06, "loss": 0.3695, "step": 36344 }, { "epoch": 1.6678904134734522, "grad_norm": 0.4639434516429901, "learning_rate": 4.265995197195566e-06, "loss": 0.3934, "step": 36345 }, { "epoch": 1.6679363039787067, "grad_norm": 0.47402071952819824, "learning_rate": 4.265752666524186e-06, "loss": 0.372, "step": 36346 }, { "epoch": 1.6679821944839612, "grad_norm": 0.4229109287261963, "learning_rate": 4.265510137618438e-06, "loss": 0.2922, "step": 36347 }, { "epoch": 1.6680280849892157, "grad_norm": 0.4681171178817749, "learning_rate": 4.265267610478908e-06, "loss": 0.3341, "step": 36348 }, { "epoch": 1.6680739754944702, "grad_norm": 0.43394458293914795, "learning_rate": 4.265025085106178e-06, "loss": 0.3142, "step": 36349 }, { "epoch": 1.6681198659997247, "grad_norm": 0.5006320476531982, "learning_rate": 4.26478256150083e-06, "loss": 0.3873, "step": 36350 }, { "epoch": 1.6681657565049792, "grad_norm": 0.47395649552345276, "learning_rate": 4.2645400396634475e-06, "loss": 0.4092, "step": 36351 }, { "epoch": 1.6682116470102337, "grad_norm": 0.5267159342765808, "learning_rate": 4.2642975195946146e-06, "loss": 0.2888, "step": 36352 }, { "epoch": 1.668257537515488, "grad_norm": 0.5016491413116455, "learning_rate": 4.264055001294914e-06, "loss": 0.4657, "step": 36353 }, { "epoch": 1.6683034280207425, "grad_norm": 0.44743773341178894, "learning_rate": 4.263812484764926e-06, "loss": 0.3689, "step": 36354 }, { "epoch": 1.668349318525997, "grad_norm": 0.4475707411766052, "learning_rate": 4.26356997000524e-06, "loss": 0.2916, "step": 36355 }, { "epoch": 1.6683952090312513, "grad_norm": 0.48303112387657166, "learning_rate": 4.263327457016435e-06, "loss": 0.4169, "step": 36356 }, { "epoch": 1.6684410995365058, "grad_norm": 0.5823234915733337, "learning_rate": 4.263084945799095e-06, "loss": 0.4071, "step": 36357 }, { "epoch": 1.6684869900417603, "grad_norm": 0.511863648891449, "learning_rate": 4.262842436353803e-06, "loss": 0.4691, "step": 36358 }, { "epoch": 1.6685328805470148, "grad_norm": 0.46549227833747864, "learning_rate": 4.2625999286811435e-06, "loss": 0.3123, "step": 36359 }, { "epoch": 1.6685787710522693, "grad_norm": 0.4582907259464264, "learning_rate": 4.262357422781697e-06, "loss": 0.3396, "step": 36360 }, { "epoch": 1.6686246615575238, "grad_norm": 0.45790350437164307, "learning_rate": 4.2621149186560486e-06, "loss": 0.3424, "step": 36361 }, { "epoch": 1.6686705520627783, "grad_norm": 0.4680982530117035, "learning_rate": 4.261872416304781e-06, "loss": 0.343, "step": 36362 }, { "epoch": 1.6687164425680328, "grad_norm": 0.4952363967895508, "learning_rate": 4.261629915728475e-06, "loss": 0.4213, "step": 36363 }, { "epoch": 1.6687623330732873, "grad_norm": 0.43381890654563904, "learning_rate": 4.261387416927718e-06, "loss": 0.3196, "step": 36364 }, { "epoch": 1.6688082235785417, "grad_norm": 0.4501165747642517, "learning_rate": 4.261144919903092e-06, "loss": 0.3825, "step": 36365 }, { "epoch": 1.668854114083796, "grad_norm": 0.4674747586250305, "learning_rate": 4.260902424655178e-06, "loss": 0.3454, "step": 36366 }, { "epoch": 1.6689000045890505, "grad_norm": 0.44054991006851196, "learning_rate": 4.260659931184561e-06, "loss": 0.3089, "step": 36367 }, { "epoch": 1.668945895094305, "grad_norm": 0.48704859614372253, "learning_rate": 4.260417439491824e-06, "loss": 0.3911, "step": 36368 }, { "epoch": 1.6689917855995593, "grad_norm": 0.457518994808197, "learning_rate": 4.260174949577548e-06, "loss": 0.3368, "step": 36369 }, { "epoch": 1.6690376761048138, "grad_norm": 0.5266293287277222, "learning_rate": 4.2599324614423185e-06, "loss": 0.3607, "step": 36370 }, { "epoch": 1.6690835666100683, "grad_norm": 0.49156007170677185, "learning_rate": 4.259689975086718e-06, "loss": 0.4164, "step": 36371 }, { "epoch": 1.6691294571153228, "grad_norm": 0.46097517013549805, "learning_rate": 4.25944749051133e-06, "loss": 0.3095, "step": 36372 }, { "epoch": 1.6691753476205773, "grad_norm": 0.4610563814640045, "learning_rate": 4.259205007716737e-06, "loss": 0.327, "step": 36373 }, { "epoch": 1.6692212381258318, "grad_norm": 0.441128671169281, "learning_rate": 4.258962526703522e-06, "loss": 0.3061, "step": 36374 }, { "epoch": 1.6692671286310863, "grad_norm": 0.5286633968353271, "learning_rate": 4.2587200474722666e-06, "loss": 0.3806, "step": 36375 }, { "epoch": 1.6693130191363408, "grad_norm": 0.4847819209098816, "learning_rate": 4.258477570023557e-06, "loss": 0.3887, "step": 36376 }, { "epoch": 1.6693589096415953, "grad_norm": 0.4531690776348114, "learning_rate": 4.258235094357975e-06, "loss": 0.3292, "step": 36377 }, { "epoch": 1.6694048001468498, "grad_norm": 0.4556334912776947, "learning_rate": 4.2579926204761025e-06, "loss": 0.3241, "step": 36378 }, { "epoch": 1.669450690652104, "grad_norm": 0.4550252854824066, "learning_rate": 4.257750148378525e-06, "loss": 0.3072, "step": 36379 }, { "epoch": 1.6694965811573586, "grad_norm": 0.4946868419647217, "learning_rate": 4.257507678065824e-06, "loss": 0.3578, "step": 36380 }, { "epoch": 1.669542471662613, "grad_norm": 0.47052067518234253, "learning_rate": 4.25726520953858e-06, "loss": 0.3768, "step": 36381 }, { "epoch": 1.6695883621678673, "grad_norm": 0.5159062147140503, "learning_rate": 4.257022742797382e-06, "loss": 0.4535, "step": 36382 }, { "epoch": 1.6696342526731218, "grad_norm": 0.49265843629837036, "learning_rate": 4.2567802778428105e-06, "loss": 0.4313, "step": 36383 }, { "epoch": 1.6696801431783763, "grad_norm": 0.5001285076141357, "learning_rate": 4.256537814675446e-06, "loss": 0.3804, "step": 36384 }, { "epoch": 1.6697260336836308, "grad_norm": 0.4813125729560852, "learning_rate": 4.256295353295872e-06, "loss": 0.3178, "step": 36385 }, { "epoch": 1.6697719241888853, "grad_norm": 0.46900615096092224, "learning_rate": 4.2560528937046746e-06, "loss": 0.3558, "step": 36386 }, { "epoch": 1.6698178146941398, "grad_norm": 0.48367422819137573, "learning_rate": 4.2558104359024355e-06, "loss": 0.3386, "step": 36387 }, { "epoch": 1.6698637051993943, "grad_norm": 0.4695724844932556, "learning_rate": 4.2555679798897365e-06, "loss": 0.3169, "step": 36388 }, { "epoch": 1.6699095957046488, "grad_norm": 0.4959152042865753, "learning_rate": 4.255325525667162e-06, "loss": 0.3695, "step": 36389 }, { "epoch": 1.6699554862099033, "grad_norm": 0.5036804676055908, "learning_rate": 4.255083073235296e-06, "loss": 0.3928, "step": 36390 }, { "epoch": 1.6700013767151576, "grad_norm": 0.476836621761322, "learning_rate": 4.254840622594717e-06, "loss": 0.3718, "step": 36391 }, { "epoch": 1.670047267220412, "grad_norm": 0.4662763774394989, "learning_rate": 4.254598173746013e-06, "loss": 0.3415, "step": 36392 }, { "epoch": 1.6700931577256666, "grad_norm": 0.4697283208370209, "learning_rate": 4.254355726689765e-06, "loss": 0.3134, "step": 36393 }, { "epoch": 1.670139048230921, "grad_norm": 0.4429141879081726, "learning_rate": 4.2541132814265555e-06, "loss": 0.3212, "step": 36394 }, { "epoch": 1.6701849387361754, "grad_norm": 0.4392108619213104, "learning_rate": 4.253870837956969e-06, "loss": 0.2844, "step": 36395 }, { "epoch": 1.6702308292414298, "grad_norm": 0.4867120683193207, "learning_rate": 4.253628396281588e-06, "loss": 0.3862, "step": 36396 }, { "epoch": 1.6702767197466843, "grad_norm": 0.49286913871765137, "learning_rate": 4.253385956400993e-06, "loss": 0.3555, "step": 36397 }, { "epoch": 1.6703226102519388, "grad_norm": 0.44488343596458435, "learning_rate": 4.253143518315771e-06, "loss": 0.2991, "step": 36398 }, { "epoch": 1.6703685007571933, "grad_norm": 0.49190205335617065, "learning_rate": 4.2529010820265035e-06, "loss": 0.3736, "step": 36399 }, { "epoch": 1.6704143912624478, "grad_norm": 0.4606250524520874, "learning_rate": 4.252658647533772e-06, "loss": 0.384, "step": 36400 }, { "epoch": 1.6704602817677023, "grad_norm": 0.4886568486690521, "learning_rate": 4.252416214838162e-06, "loss": 0.393, "step": 36401 }, { "epoch": 1.6705061722729568, "grad_norm": 0.5127386450767517, "learning_rate": 4.252173783940254e-06, "loss": 0.4342, "step": 36402 }, { "epoch": 1.6705520627782113, "grad_norm": 0.5012575387954712, "learning_rate": 4.25193135484063e-06, "loss": 0.3985, "step": 36403 }, { "epoch": 1.6705979532834656, "grad_norm": 0.4899231493473053, "learning_rate": 4.251688927539879e-06, "loss": 0.3888, "step": 36404 }, { "epoch": 1.67064384378872, "grad_norm": 0.48674607276916504, "learning_rate": 4.251446502038579e-06, "loss": 0.3425, "step": 36405 }, { "epoch": 1.6706897342939746, "grad_norm": 0.5360870361328125, "learning_rate": 4.251204078337311e-06, "loss": 0.4129, "step": 36406 }, { "epoch": 1.6707356247992289, "grad_norm": 0.49139028787612915, "learning_rate": 4.250961656436663e-06, "loss": 0.3708, "step": 36407 }, { "epoch": 1.6707815153044834, "grad_norm": 0.43626487255096436, "learning_rate": 4.250719236337216e-06, "loss": 0.3377, "step": 36408 }, { "epoch": 1.6708274058097379, "grad_norm": 0.4327443540096283, "learning_rate": 4.250476818039552e-06, "loss": 0.2853, "step": 36409 }, { "epoch": 1.6708732963149924, "grad_norm": 0.49165529012680054, "learning_rate": 4.250234401544255e-06, "loss": 0.4109, "step": 36410 }, { "epoch": 1.6709191868202469, "grad_norm": 0.49875110387802124, "learning_rate": 4.249991986851909e-06, "loss": 0.4288, "step": 36411 }, { "epoch": 1.6709650773255014, "grad_norm": 0.4527752697467804, "learning_rate": 4.249749573963092e-06, "loss": 0.3532, "step": 36412 }, { "epoch": 1.6710109678307559, "grad_norm": 0.49607908725738525, "learning_rate": 4.2495071628783936e-06, "loss": 0.4127, "step": 36413 }, { "epoch": 1.6710568583360104, "grad_norm": 0.4266645014286041, "learning_rate": 4.249264753598394e-06, "loss": 0.2665, "step": 36414 }, { "epoch": 1.6711027488412649, "grad_norm": 0.45072200894355774, "learning_rate": 4.249022346123674e-06, "loss": 0.35, "step": 36415 }, { "epoch": 1.6711486393465194, "grad_norm": 0.5091655254364014, "learning_rate": 4.248779940454818e-06, "loss": 0.3738, "step": 36416 }, { "epoch": 1.6711945298517736, "grad_norm": 0.48325029015541077, "learning_rate": 4.2485375365924105e-06, "loss": 0.3964, "step": 36417 }, { "epoch": 1.6712404203570281, "grad_norm": 0.45113036036491394, "learning_rate": 4.248295134537032e-06, "loss": 0.3519, "step": 36418 }, { "epoch": 1.6712863108622826, "grad_norm": 0.4783253073692322, "learning_rate": 4.248052734289267e-06, "loss": 0.3684, "step": 36419 }, { "epoch": 1.671332201367537, "grad_norm": 0.4224016070365906, "learning_rate": 4.247810335849698e-06, "loss": 0.3012, "step": 36420 }, { "epoch": 1.6713780918727914, "grad_norm": 0.43874800205230713, "learning_rate": 4.247567939218908e-06, "loss": 0.3345, "step": 36421 }, { "epoch": 1.671423982378046, "grad_norm": 0.49213725328445435, "learning_rate": 4.247325544397478e-06, "loss": 0.3383, "step": 36422 }, { "epoch": 1.6714698728833004, "grad_norm": 0.47545698285102844, "learning_rate": 4.247083151385994e-06, "loss": 0.3738, "step": 36423 }, { "epoch": 1.671515763388555, "grad_norm": 0.5239338874816895, "learning_rate": 4.246840760185038e-06, "loss": 0.4821, "step": 36424 }, { "epoch": 1.6715616538938094, "grad_norm": 0.5066376328468323, "learning_rate": 4.246598370795189e-06, "loss": 0.3769, "step": 36425 }, { "epoch": 1.671607544399064, "grad_norm": 0.5015862584114075, "learning_rate": 4.246355983217038e-06, "loss": 0.4395, "step": 36426 }, { "epoch": 1.6716534349043184, "grad_norm": 0.4790043234825134, "learning_rate": 4.24611359745116e-06, "loss": 0.3731, "step": 36427 }, { "epoch": 1.671699325409573, "grad_norm": 0.5384829640388489, "learning_rate": 4.24587121349814e-06, "loss": 0.3323, "step": 36428 }, { "epoch": 1.6717452159148274, "grad_norm": 0.49120527505874634, "learning_rate": 4.245628831358564e-06, "loss": 0.3923, "step": 36429 }, { "epoch": 1.6717911064200817, "grad_norm": 0.45001354813575745, "learning_rate": 4.2453864510330115e-06, "loss": 0.3103, "step": 36430 }, { "epoch": 1.6718369969253362, "grad_norm": 0.4836060702800751, "learning_rate": 4.245144072522066e-06, "loss": 0.3925, "step": 36431 }, { "epoch": 1.6718828874305907, "grad_norm": 0.446071594953537, "learning_rate": 4.244901695826312e-06, "loss": 0.3004, "step": 36432 }, { "epoch": 1.671928777935845, "grad_norm": 0.45469948649406433, "learning_rate": 4.244659320946331e-06, "loss": 0.302, "step": 36433 }, { "epoch": 1.6719746684410994, "grad_norm": 0.5117505788803101, "learning_rate": 4.244416947882703e-06, "loss": 0.3725, "step": 36434 }, { "epoch": 1.672020558946354, "grad_norm": 0.468723863363266, "learning_rate": 4.244174576636017e-06, "loss": 0.3618, "step": 36435 }, { "epoch": 1.6720664494516084, "grad_norm": 0.4763523042201996, "learning_rate": 4.243932207206852e-06, "loss": 0.3774, "step": 36436 }, { "epoch": 1.672112339956863, "grad_norm": 0.4621734619140625, "learning_rate": 4.24368983959579e-06, "loss": 0.3366, "step": 36437 }, { "epoch": 1.6721582304621174, "grad_norm": 0.46940046548843384, "learning_rate": 4.243447473803418e-06, "loss": 0.3645, "step": 36438 }, { "epoch": 1.672204120967372, "grad_norm": 0.5135679244995117, "learning_rate": 4.243205109830314e-06, "loss": 0.4165, "step": 36439 }, { "epoch": 1.6722500114726264, "grad_norm": 0.42299628257751465, "learning_rate": 4.242962747677062e-06, "loss": 0.3182, "step": 36440 }, { "epoch": 1.672295901977881, "grad_norm": 0.47843003273010254, "learning_rate": 4.242720387344247e-06, "loss": 0.3624, "step": 36441 }, { "epoch": 1.6723417924831352, "grad_norm": 0.4899647831916809, "learning_rate": 4.242478028832451e-06, "loss": 0.4137, "step": 36442 }, { "epoch": 1.6723876829883897, "grad_norm": 0.45200687646865845, "learning_rate": 4.242235672142255e-06, "loss": 0.3074, "step": 36443 }, { "epoch": 1.6724335734936442, "grad_norm": 0.44975486397743225, "learning_rate": 4.241993317274244e-06, "loss": 0.3713, "step": 36444 }, { "epoch": 1.6724794639988985, "grad_norm": 0.47753891348838806, "learning_rate": 4.241750964229001e-06, "loss": 0.3791, "step": 36445 }, { "epoch": 1.672525354504153, "grad_norm": 0.49888068437576294, "learning_rate": 4.241508613007104e-06, "loss": 0.3942, "step": 36446 }, { "epoch": 1.6725712450094075, "grad_norm": 0.4564834237098694, "learning_rate": 4.241266263609142e-06, "loss": 0.3792, "step": 36447 }, { "epoch": 1.672617135514662, "grad_norm": 0.4786907732486725, "learning_rate": 4.241023916035696e-06, "loss": 0.3578, "step": 36448 }, { "epoch": 1.6726630260199165, "grad_norm": 0.5032262802124023, "learning_rate": 4.2407815702873474e-06, "loss": 0.3851, "step": 36449 }, { "epoch": 1.672708916525171, "grad_norm": 0.4884079694747925, "learning_rate": 4.240539226364677e-06, "loss": 0.366, "step": 36450 }, { "epoch": 1.6727548070304255, "grad_norm": 0.4401054084300995, "learning_rate": 4.240296884268272e-06, "loss": 0.3711, "step": 36451 }, { "epoch": 1.67280069753568, "grad_norm": 0.4238280653953552, "learning_rate": 4.240054543998714e-06, "loss": 0.2835, "step": 36452 }, { "epoch": 1.6728465880409344, "grad_norm": 0.4716249406337738, "learning_rate": 4.239812205556583e-06, "loss": 0.408, "step": 36453 }, { "epoch": 1.672892478546189, "grad_norm": 0.5285786986351013, "learning_rate": 4.239569868942466e-06, "loss": 0.3072, "step": 36454 }, { "epoch": 1.6729383690514432, "grad_norm": 0.45675376057624817, "learning_rate": 4.239327534156943e-06, "loss": 0.3523, "step": 36455 }, { "epoch": 1.6729842595566977, "grad_norm": 0.4795709550380707, "learning_rate": 4.239085201200594e-06, "loss": 0.3572, "step": 36456 }, { "epoch": 1.6730301500619522, "grad_norm": 0.5046348571777344, "learning_rate": 4.2388428700740085e-06, "loss": 0.413, "step": 36457 }, { "epoch": 1.6730760405672065, "grad_norm": 0.43525412678718567, "learning_rate": 4.238600540777765e-06, "loss": 0.3134, "step": 36458 }, { "epoch": 1.673121931072461, "grad_norm": 0.4625495374202728, "learning_rate": 4.238358213312446e-06, "loss": 0.3447, "step": 36459 }, { "epoch": 1.6731678215777155, "grad_norm": 0.49690213799476624, "learning_rate": 4.238115887678636e-06, "loss": 0.3657, "step": 36460 }, { "epoch": 1.67321371208297, "grad_norm": 0.46920424699783325, "learning_rate": 4.237873563876917e-06, "loss": 0.3871, "step": 36461 }, { "epoch": 1.6732596025882245, "grad_norm": 0.5055617690086365, "learning_rate": 4.237631241907869e-06, "loss": 0.3971, "step": 36462 }, { "epoch": 1.673305493093479, "grad_norm": 0.46127983927726746, "learning_rate": 4.23738892177208e-06, "loss": 0.3493, "step": 36463 }, { "epoch": 1.6733513835987335, "grad_norm": 0.4607318937778473, "learning_rate": 4.237146603470129e-06, "loss": 0.3142, "step": 36464 }, { "epoch": 1.673397274103988, "grad_norm": 0.4585719108581543, "learning_rate": 4.2369042870025996e-06, "loss": 0.3642, "step": 36465 }, { "epoch": 1.6734431646092425, "grad_norm": 0.4570196866989136, "learning_rate": 4.236661972370076e-06, "loss": 0.3219, "step": 36466 }, { "epoch": 1.673489055114497, "grad_norm": 0.4818902909755707, "learning_rate": 4.236419659573138e-06, "loss": 0.4103, "step": 36467 }, { "epoch": 1.6735349456197512, "grad_norm": 0.46282175183296204, "learning_rate": 4.236177348612368e-06, "loss": 0.3713, "step": 36468 }, { "epoch": 1.6735808361250057, "grad_norm": 0.48059719800949097, "learning_rate": 4.235935039488353e-06, "loss": 0.3556, "step": 36469 }, { "epoch": 1.6736267266302602, "grad_norm": 0.41766485571861267, "learning_rate": 4.235692732201674e-06, "loss": 0.243, "step": 36470 }, { "epoch": 1.6736726171355145, "grad_norm": 0.4788079559803009, "learning_rate": 4.23545042675291e-06, "loss": 0.3702, "step": 36471 }, { "epoch": 1.673718507640769, "grad_norm": 0.4729735255241394, "learning_rate": 4.235208123142648e-06, "loss": 0.3611, "step": 36472 }, { "epoch": 1.6737643981460235, "grad_norm": 0.5080539584159851, "learning_rate": 4.2349658213714685e-06, "loss": 0.4052, "step": 36473 }, { "epoch": 1.673810288651278, "grad_norm": 0.47496187686920166, "learning_rate": 4.234723521439954e-06, "loss": 0.3665, "step": 36474 }, { "epoch": 1.6738561791565325, "grad_norm": 0.48652276396751404, "learning_rate": 4.23448122334869e-06, "loss": 0.3991, "step": 36475 }, { "epoch": 1.673902069661787, "grad_norm": 0.461581826210022, "learning_rate": 4.234238927098256e-06, "loss": 0.3487, "step": 36476 }, { "epoch": 1.6739479601670415, "grad_norm": 0.4595324695110321, "learning_rate": 4.233996632689234e-06, "loss": 0.3121, "step": 36477 }, { "epoch": 1.673993850672296, "grad_norm": 0.4366900324821472, "learning_rate": 4.233754340122211e-06, "loss": 0.3316, "step": 36478 }, { "epoch": 1.6740397411775505, "grad_norm": 0.5074285864830017, "learning_rate": 4.233512049397766e-06, "loss": 0.4095, "step": 36479 }, { "epoch": 1.6740856316828048, "grad_norm": 0.5864399075508118, "learning_rate": 4.233269760516483e-06, "loss": 0.3881, "step": 36480 }, { "epoch": 1.6741315221880593, "grad_norm": 0.5089992880821228, "learning_rate": 4.233027473478944e-06, "loss": 0.3959, "step": 36481 }, { "epoch": 1.6741774126933138, "grad_norm": 0.45408928394317627, "learning_rate": 4.232785188285732e-06, "loss": 0.3136, "step": 36482 }, { "epoch": 1.6742233031985683, "grad_norm": 0.460478812456131, "learning_rate": 4.232542904937429e-06, "loss": 0.3516, "step": 36483 }, { "epoch": 1.6742691937038225, "grad_norm": 0.49170345067977905, "learning_rate": 4.232300623434616e-06, "loss": 0.4301, "step": 36484 }, { "epoch": 1.674315084209077, "grad_norm": 0.4590580463409424, "learning_rate": 4.232058343777881e-06, "loss": 0.3217, "step": 36485 }, { "epoch": 1.6743609747143315, "grad_norm": 0.46955394744873047, "learning_rate": 4.231816065967803e-06, "loss": 0.3372, "step": 36486 }, { "epoch": 1.674406865219586, "grad_norm": 0.47277143597602844, "learning_rate": 4.231573790004964e-06, "loss": 0.3314, "step": 36487 }, { "epoch": 1.6744527557248405, "grad_norm": 0.4322127103805542, "learning_rate": 4.2313315158899476e-06, "loss": 0.2644, "step": 36488 }, { "epoch": 1.674498646230095, "grad_norm": 0.44472944736480713, "learning_rate": 4.231089243623337e-06, "loss": 0.3432, "step": 36489 }, { "epoch": 1.6745445367353495, "grad_norm": 0.46111059188842773, "learning_rate": 4.2308469732057115e-06, "loss": 0.3654, "step": 36490 }, { "epoch": 1.674590427240604, "grad_norm": 0.4555404484272003, "learning_rate": 4.230604704637659e-06, "loss": 0.3479, "step": 36491 }, { "epoch": 1.6746363177458585, "grad_norm": 0.42896705865859985, "learning_rate": 4.23036243791976e-06, "loss": 0.3281, "step": 36492 }, { "epoch": 1.6746822082511128, "grad_norm": 0.4827493131160736, "learning_rate": 4.230120173052593e-06, "loss": 0.3391, "step": 36493 }, { "epoch": 1.6747280987563673, "grad_norm": 0.5437570810317993, "learning_rate": 4.2298779100367465e-06, "loss": 0.4725, "step": 36494 }, { "epoch": 1.6747739892616218, "grad_norm": 0.4679453670978546, "learning_rate": 4.2296356488728e-06, "loss": 0.3912, "step": 36495 }, { "epoch": 1.674819879766876, "grad_norm": 0.49863117933273315, "learning_rate": 4.229393389561337e-06, "loss": 0.4458, "step": 36496 }, { "epoch": 1.6748657702721306, "grad_norm": 0.47238606214523315, "learning_rate": 4.22915113210294e-06, "loss": 0.3721, "step": 36497 }, { "epoch": 1.674911660777385, "grad_norm": 0.47224247455596924, "learning_rate": 4.228908876498191e-06, "loss": 0.3289, "step": 36498 }, { "epoch": 1.6749575512826396, "grad_norm": 0.47474759817123413, "learning_rate": 4.228666622747671e-06, "loss": 0.2893, "step": 36499 }, { "epoch": 1.675003441787894, "grad_norm": 0.43323126435279846, "learning_rate": 4.228424370851966e-06, "loss": 0.3064, "step": 36500 }, { "epoch": 1.6750493322931486, "grad_norm": 0.4749133586883545, "learning_rate": 4.228182120811657e-06, "loss": 0.3569, "step": 36501 }, { "epoch": 1.675095222798403, "grad_norm": 0.5038188099861145, "learning_rate": 4.227939872627326e-06, "loss": 0.3965, "step": 36502 }, { "epoch": 1.6751411133036576, "grad_norm": 0.4963737428188324, "learning_rate": 4.2276976262995564e-06, "loss": 0.3473, "step": 36503 }, { "epoch": 1.675187003808912, "grad_norm": 0.4400983154773712, "learning_rate": 4.22745538182893e-06, "loss": 0.3113, "step": 36504 }, { "epoch": 1.6752328943141666, "grad_norm": 0.47862768173217773, "learning_rate": 4.2272131392160276e-06, "loss": 0.3621, "step": 36505 }, { "epoch": 1.6752787848194208, "grad_norm": 0.48373058438301086, "learning_rate": 4.226970898461436e-06, "loss": 0.4425, "step": 36506 }, { "epoch": 1.6753246753246753, "grad_norm": 0.48168671131134033, "learning_rate": 4.226728659565736e-06, "loss": 0.3945, "step": 36507 }, { "epoch": 1.6753705658299298, "grad_norm": 0.46919965744018555, "learning_rate": 4.226486422529508e-06, "loss": 0.3444, "step": 36508 }, { "epoch": 1.675416456335184, "grad_norm": 0.4852716326713562, "learning_rate": 4.226244187353337e-06, "loss": 0.4048, "step": 36509 }, { "epoch": 1.6754623468404386, "grad_norm": 0.45921608805656433, "learning_rate": 4.226001954037804e-06, "loss": 0.3193, "step": 36510 }, { "epoch": 1.675508237345693, "grad_norm": 0.45602890849113464, "learning_rate": 4.225759722583491e-06, "loss": 0.3498, "step": 36511 }, { "epoch": 1.6755541278509476, "grad_norm": 0.4616810381412506, "learning_rate": 4.225517492990983e-06, "loss": 0.3588, "step": 36512 }, { "epoch": 1.675600018356202, "grad_norm": 0.4362317621707916, "learning_rate": 4.225275265260861e-06, "loss": 0.2841, "step": 36513 }, { "epoch": 1.6756459088614566, "grad_norm": 0.4725174903869629, "learning_rate": 4.225033039393708e-06, "loss": 0.3518, "step": 36514 }, { "epoch": 1.675691799366711, "grad_norm": 0.437983900308609, "learning_rate": 4.224790815390104e-06, "loss": 0.2818, "step": 36515 }, { "epoch": 1.6757376898719656, "grad_norm": 0.48707646131515503, "learning_rate": 4.224548593250635e-06, "loss": 0.4123, "step": 36516 }, { "epoch": 1.67578358037722, "grad_norm": 0.4063223898410797, "learning_rate": 4.224306372975881e-06, "loss": 0.2543, "step": 36517 }, { "epoch": 1.6758294708824746, "grad_norm": 0.4906246066093445, "learning_rate": 4.2240641545664255e-06, "loss": 0.3504, "step": 36518 }, { "epoch": 1.6758753613877289, "grad_norm": 0.4598824381828308, "learning_rate": 4.223821938022851e-06, "loss": 0.3345, "step": 36519 }, { "epoch": 1.6759212518929834, "grad_norm": 0.5146784782409668, "learning_rate": 4.22357972334574e-06, "loss": 0.3948, "step": 36520 }, { "epoch": 1.6759671423982379, "grad_norm": 0.48112761974334717, "learning_rate": 4.2233375105356736e-06, "loss": 0.3725, "step": 36521 }, { "epoch": 1.6760130329034921, "grad_norm": 0.46727386116981506, "learning_rate": 4.223095299593236e-06, "loss": 0.3317, "step": 36522 }, { "epoch": 1.6760589234087466, "grad_norm": 0.47835224866867065, "learning_rate": 4.22285309051901e-06, "loss": 0.3741, "step": 36523 }, { "epoch": 1.6761048139140011, "grad_norm": 0.5095685720443726, "learning_rate": 4.222610883313576e-06, "loss": 0.4152, "step": 36524 }, { "epoch": 1.6761507044192556, "grad_norm": 0.44336411356925964, "learning_rate": 4.222368677977518e-06, "loss": 0.3456, "step": 36525 }, { "epoch": 1.6761965949245101, "grad_norm": 0.5016524791717529, "learning_rate": 4.222126474511418e-06, "loss": 0.4254, "step": 36526 }, { "epoch": 1.6762424854297646, "grad_norm": 0.464347779750824, "learning_rate": 4.221884272915857e-06, "loss": 0.3741, "step": 36527 }, { "epoch": 1.6762883759350191, "grad_norm": 0.45197755098342896, "learning_rate": 4.22164207319142e-06, "loss": 0.3, "step": 36528 }, { "epoch": 1.6763342664402736, "grad_norm": 0.4850073754787445, "learning_rate": 4.221399875338688e-06, "loss": 0.3815, "step": 36529 }, { "epoch": 1.676380156945528, "grad_norm": 0.45495352149009705, "learning_rate": 4.221157679358243e-06, "loss": 0.342, "step": 36530 }, { "epoch": 1.6764260474507824, "grad_norm": 0.46351560950279236, "learning_rate": 4.220915485250669e-06, "loss": 0.3318, "step": 36531 }, { "epoch": 1.6764719379560369, "grad_norm": 0.4632202386856079, "learning_rate": 4.220673293016547e-06, "loss": 0.3236, "step": 36532 }, { "epoch": 1.6765178284612914, "grad_norm": 0.44485148787498474, "learning_rate": 4.220431102656458e-06, "loss": 0.2849, "step": 36533 }, { "epoch": 1.6765637189665457, "grad_norm": 0.4886535704135895, "learning_rate": 4.220188914170988e-06, "loss": 0.41, "step": 36534 }, { "epoch": 1.6766096094718002, "grad_norm": 0.5439357757568359, "learning_rate": 4.219946727560718e-06, "loss": 0.3961, "step": 36535 }, { "epoch": 1.6766554999770547, "grad_norm": 0.4195284843444824, "learning_rate": 4.219704542826228e-06, "loss": 0.2581, "step": 36536 }, { "epoch": 1.6767013904823092, "grad_norm": 0.4579995274543762, "learning_rate": 4.219462359968105e-06, "loss": 0.3577, "step": 36537 }, { "epoch": 1.6767472809875636, "grad_norm": 0.4592399299144745, "learning_rate": 4.219220178986927e-06, "loss": 0.3338, "step": 36538 }, { "epoch": 1.6767931714928181, "grad_norm": 0.44765183329582214, "learning_rate": 4.2189779998832784e-06, "loss": 0.3213, "step": 36539 }, { "epoch": 1.6768390619980726, "grad_norm": 0.47102999687194824, "learning_rate": 4.218735822657741e-06, "loss": 0.3626, "step": 36540 }, { "epoch": 1.6768849525033271, "grad_norm": 0.4997973144054413, "learning_rate": 4.218493647310898e-06, "loss": 0.3614, "step": 36541 }, { "epoch": 1.6769308430085816, "grad_norm": 0.45439597964286804, "learning_rate": 4.218251473843329e-06, "loss": 0.3634, "step": 36542 }, { "epoch": 1.6769767335138361, "grad_norm": 0.4429270923137665, "learning_rate": 4.21800930225562e-06, "loss": 0.3147, "step": 36543 }, { "epoch": 1.6770226240190904, "grad_norm": 0.4388974606990814, "learning_rate": 4.2177671325483525e-06, "loss": 0.2994, "step": 36544 }, { "epoch": 1.677068514524345, "grad_norm": 0.4201744496822357, "learning_rate": 4.217524964722108e-06, "loss": 0.3031, "step": 36545 }, { "epoch": 1.6771144050295994, "grad_norm": 0.43443751335144043, "learning_rate": 4.217282798777468e-06, "loss": 0.323, "step": 36546 }, { "epoch": 1.6771602955348537, "grad_norm": 0.49964815378189087, "learning_rate": 4.217040634715016e-06, "loss": 0.3865, "step": 36547 }, { "epoch": 1.6772061860401082, "grad_norm": 0.48234280943870544, "learning_rate": 4.216798472535336e-06, "loss": 0.3611, "step": 36548 }, { "epoch": 1.6772520765453627, "grad_norm": 0.5201814770698547, "learning_rate": 4.216556312239005e-06, "loss": 0.3978, "step": 36549 }, { "epoch": 1.6772979670506172, "grad_norm": 0.44239118695259094, "learning_rate": 4.216314153826612e-06, "loss": 0.3046, "step": 36550 }, { "epoch": 1.6773438575558717, "grad_norm": 0.46834179759025574, "learning_rate": 4.216071997298735e-06, "loss": 0.4121, "step": 36551 }, { "epoch": 1.6773897480611262, "grad_norm": 0.422545850276947, "learning_rate": 4.215829842655957e-06, "loss": 0.2767, "step": 36552 }, { "epoch": 1.6774356385663807, "grad_norm": 0.4376762807369232, "learning_rate": 4.215587689898861e-06, "loss": 0.3155, "step": 36553 }, { "epoch": 1.6774815290716352, "grad_norm": 0.49101999402046204, "learning_rate": 4.215345539028029e-06, "loss": 0.4726, "step": 36554 }, { "epoch": 1.6775274195768897, "grad_norm": 0.4575490355491638, "learning_rate": 4.215103390044043e-06, "loss": 0.3315, "step": 36555 }, { "epoch": 1.6775733100821442, "grad_norm": 0.42061564326286316, "learning_rate": 4.214861242947486e-06, "loss": 0.2793, "step": 36556 }, { "epoch": 1.6776192005873984, "grad_norm": 0.5177093148231506, "learning_rate": 4.214619097738941e-06, "loss": 0.3703, "step": 36557 }, { "epoch": 1.677665091092653, "grad_norm": 0.4535965919494629, "learning_rate": 4.214376954418987e-06, "loss": 0.3034, "step": 36558 }, { "epoch": 1.6777109815979074, "grad_norm": 0.4873185157775879, "learning_rate": 4.214134812988211e-06, "loss": 0.4155, "step": 36559 }, { "epoch": 1.6777568721031617, "grad_norm": 0.45540252327919006, "learning_rate": 4.2138926734471915e-06, "loss": 0.3303, "step": 36560 }, { "epoch": 1.6778027626084162, "grad_norm": 0.44114428758621216, "learning_rate": 4.2136505357965115e-06, "loss": 0.3401, "step": 36561 }, { "epoch": 1.6778486531136707, "grad_norm": 0.527583658695221, "learning_rate": 4.213408400036755e-06, "loss": 0.4142, "step": 36562 }, { "epoch": 1.6778945436189252, "grad_norm": 0.4526752531528473, "learning_rate": 4.213166266168503e-06, "loss": 0.303, "step": 36563 }, { "epoch": 1.6779404341241797, "grad_norm": 0.44314995408058167, "learning_rate": 4.2129241341923345e-06, "loss": 0.3176, "step": 36564 }, { "epoch": 1.6779863246294342, "grad_norm": 0.441885769367218, "learning_rate": 4.2126820041088385e-06, "loss": 0.3276, "step": 36565 }, { "epoch": 1.6780322151346887, "grad_norm": 0.4680526554584503, "learning_rate": 4.212439875918593e-06, "loss": 0.3655, "step": 36566 }, { "epoch": 1.6780781056399432, "grad_norm": 0.4734300971031189, "learning_rate": 4.212197749622181e-06, "loss": 0.3358, "step": 36567 }, { "epoch": 1.6781239961451977, "grad_norm": 0.4728740155696869, "learning_rate": 4.2119556252201845e-06, "loss": 0.3522, "step": 36568 }, { "epoch": 1.678169886650452, "grad_norm": 0.45828431844711304, "learning_rate": 4.211713502713187e-06, "loss": 0.3324, "step": 36569 }, { "epoch": 1.6782157771557065, "grad_norm": 0.6216670274734497, "learning_rate": 4.211471382101768e-06, "loss": 0.4285, "step": 36570 }, { "epoch": 1.678261667660961, "grad_norm": 0.47429198026657104, "learning_rate": 4.2112292633865125e-06, "loss": 0.409, "step": 36571 }, { "epoch": 1.6783075581662155, "grad_norm": 0.5526126623153687, "learning_rate": 4.210987146568002e-06, "loss": 0.3891, "step": 36572 }, { "epoch": 1.6783534486714697, "grad_norm": 0.5467831492424011, "learning_rate": 4.210745031646817e-06, "loss": 0.4537, "step": 36573 }, { "epoch": 1.6783993391767242, "grad_norm": 0.48331567645072937, "learning_rate": 4.210502918623543e-06, "loss": 0.3883, "step": 36574 }, { "epoch": 1.6784452296819787, "grad_norm": 0.4123234748840332, "learning_rate": 4.21026080749876e-06, "loss": 0.2909, "step": 36575 }, { "epoch": 1.6784911201872332, "grad_norm": 0.4490203559398651, "learning_rate": 4.210018698273048e-06, "loss": 0.3628, "step": 36576 }, { "epoch": 1.6785370106924877, "grad_norm": 0.5207685232162476, "learning_rate": 4.209776590946994e-06, "loss": 0.4364, "step": 36577 }, { "epoch": 1.6785829011977422, "grad_norm": 0.4506654143333435, "learning_rate": 4.209534485521178e-06, "loss": 0.3241, "step": 36578 }, { "epoch": 1.6786287917029967, "grad_norm": 0.5041995644569397, "learning_rate": 4.209292381996183e-06, "loss": 0.421, "step": 36579 }, { "epoch": 1.6786746822082512, "grad_norm": 0.46286600828170776, "learning_rate": 4.209050280372587e-06, "loss": 0.337, "step": 36580 }, { "epoch": 1.6787205727135057, "grad_norm": 0.4742344617843628, "learning_rate": 4.2088081806509786e-06, "loss": 0.3774, "step": 36581 }, { "epoch": 1.67876646321876, "grad_norm": 0.44923317432403564, "learning_rate": 4.208566082831936e-06, "loss": 0.3547, "step": 36582 }, { "epoch": 1.6788123537240145, "grad_norm": 0.46849343180656433, "learning_rate": 4.208323986916041e-06, "loss": 0.3245, "step": 36583 }, { "epoch": 1.678858244229269, "grad_norm": 0.5013726353645325, "learning_rate": 4.208081892903878e-06, "loss": 0.3793, "step": 36584 }, { "epoch": 1.6789041347345233, "grad_norm": 0.46768638491630554, "learning_rate": 4.207839800796029e-06, "loss": 0.3715, "step": 36585 }, { "epoch": 1.6789500252397778, "grad_norm": 0.4959128499031067, "learning_rate": 4.207597710593072e-06, "loss": 0.3738, "step": 36586 }, { "epoch": 1.6789959157450323, "grad_norm": 0.4266280233860016, "learning_rate": 4.207355622295595e-06, "loss": 0.2903, "step": 36587 }, { "epoch": 1.6790418062502868, "grad_norm": 0.4653206765651703, "learning_rate": 4.207113535904177e-06, "loss": 0.3206, "step": 36588 }, { "epoch": 1.6790876967555413, "grad_norm": 0.40866971015930176, "learning_rate": 4.2068714514194e-06, "loss": 0.2627, "step": 36589 }, { "epoch": 1.6791335872607958, "grad_norm": 0.4527736008167267, "learning_rate": 4.206629368841849e-06, "loss": 0.3282, "step": 36590 }, { "epoch": 1.6791794777660503, "grad_norm": 0.474046915769577, "learning_rate": 4.206387288172102e-06, "loss": 0.3646, "step": 36591 }, { "epoch": 1.6792253682713048, "grad_norm": 0.469391405582428, "learning_rate": 4.206145209410743e-06, "loss": 0.3528, "step": 36592 }, { "epoch": 1.6792712587765593, "grad_norm": 0.4496285915374756, "learning_rate": 4.2059031325583546e-06, "loss": 0.3261, "step": 36593 }, { "epoch": 1.6793171492818137, "grad_norm": 0.4801749587059021, "learning_rate": 4.20566105761552e-06, "loss": 0.3818, "step": 36594 }, { "epoch": 1.679363039787068, "grad_norm": 0.46251434087753296, "learning_rate": 4.205418984582818e-06, "loss": 0.3504, "step": 36595 }, { "epoch": 1.6794089302923225, "grad_norm": 0.4858919084072113, "learning_rate": 4.205176913460833e-06, "loss": 0.4285, "step": 36596 }, { "epoch": 1.679454820797577, "grad_norm": 0.509089469909668, "learning_rate": 4.204934844250148e-06, "loss": 0.49, "step": 36597 }, { "epoch": 1.6795007113028313, "grad_norm": 0.48630544543266296, "learning_rate": 4.204692776951341e-06, "loss": 0.3693, "step": 36598 }, { "epoch": 1.6795466018080858, "grad_norm": 0.4375717043876648, "learning_rate": 4.204450711564999e-06, "loss": 0.2816, "step": 36599 }, { "epoch": 1.6795924923133403, "grad_norm": 0.45944130420684814, "learning_rate": 4.204208648091702e-06, "loss": 0.3363, "step": 36600 }, { "epoch": 1.6796383828185948, "grad_norm": 0.4906388819217682, "learning_rate": 4.20396658653203e-06, "loss": 0.3919, "step": 36601 }, { "epoch": 1.6796842733238493, "grad_norm": 0.496921569108963, "learning_rate": 4.20372452688657e-06, "loss": 0.3781, "step": 36602 }, { "epoch": 1.6797301638291038, "grad_norm": 0.4289204478263855, "learning_rate": 4.203482469155901e-06, "loss": 0.2901, "step": 36603 }, { "epoch": 1.6797760543343583, "grad_norm": 0.54576575756073, "learning_rate": 4.203240413340602e-06, "loss": 0.4721, "step": 36604 }, { "epoch": 1.6798219448396128, "grad_norm": 0.5237581729888916, "learning_rate": 4.202998359441262e-06, "loss": 0.457, "step": 36605 }, { "epoch": 1.6798678353448673, "grad_norm": 0.4975808560848236, "learning_rate": 4.202756307458458e-06, "loss": 0.3955, "step": 36606 }, { "epoch": 1.6799137258501218, "grad_norm": 0.47764453291893005, "learning_rate": 4.202514257392772e-06, "loss": 0.3387, "step": 36607 }, { "epoch": 1.679959616355376, "grad_norm": 0.4803623557090759, "learning_rate": 4.202272209244789e-06, "loss": 0.3959, "step": 36608 }, { "epoch": 1.6800055068606305, "grad_norm": 0.4637603461742401, "learning_rate": 4.20203016301509e-06, "loss": 0.3679, "step": 36609 }, { "epoch": 1.680051397365885, "grad_norm": 0.48946255445480347, "learning_rate": 4.201788118704255e-06, "loss": 0.4384, "step": 36610 }, { "epoch": 1.6800972878711393, "grad_norm": 0.4636344909667969, "learning_rate": 4.20154607631287e-06, "loss": 0.3614, "step": 36611 }, { "epoch": 1.6801431783763938, "grad_norm": 0.4655325710773468, "learning_rate": 4.201304035841514e-06, "loss": 0.3295, "step": 36612 }, { "epoch": 1.6801890688816483, "grad_norm": 0.43468889594078064, "learning_rate": 4.20106199729077e-06, "loss": 0.3045, "step": 36613 }, { "epoch": 1.6802349593869028, "grad_norm": 0.4637826979160309, "learning_rate": 4.200819960661217e-06, "loss": 0.3618, "step": 36614 }, { "epoch": 1.6802808498921573, "grad_norm": 0.47906258702278137, "learning_rate": 4.200577925953442e-06, "loss": 0.375, "step": 36615 }, { "epoch": 1.6803267403974118, "grad_norm": 0.43818292021751404, "learning_rate": 4.200335893168024e-06, "loss": 0.2931, "step": 36616 }, { "epoch": 1.6803726309026663, "grad_norm": 0.4494995176792145, "learning_rate": 4.200093862305546e-06, "loss": 0.3417, "step": 36617 }, { "epoch": 1.6804185214079208, "grad_norm": 0.43340763449668884, "learning_rate": 4.19985183336659e-06, "loss": 0.2826, "step": 36618 }, { "epoch": 1.6804644119131753, "grad_norm": 0.47955751419067383, "learning_rate": 4.199609806351739e-06, "loss": 0.42, "step": 36619 }, { "epoch": 1.6805103024184296, "grad_norm": 0.5102643966674805, "learning_rate": 4.1993677812615704e-06, "loss": 0.4368, "step": 36620 }, { "epoch": 1.680556192923684, "grad_norm": 0.4862624406814575, "learning_rate": 4.1991257580966725e-06, "loss": 0.3554, "step": 36621 }, { "epoch": 1.6806020834289386, "grad_norm": 0.4391551613807678, "learning_rate": 4.198883736857624e-06, "loss": 0.2856, "step": 36622 }, { "epoch": 1.6806479739341929, "grad_norm": 0.4572699964046478, "learning_rate": 4.198641717545007e-06, "loss": 0.3449, "step": 36623 }, { "epoch": 1.6806938644394473, "grad_norm": 0.45318472385406494, "learning_rate": 4.198399700159404e-06, "loss": 0.2873, "step": 36624 }, { "epoch": 1.6807397549447018, "grad_norm": 0.45232975482940674, "learning_rate": 4.198157684701397e-06, "loss": 0.2846, "step": 36625 }, { "epoch": 1.6807856454499563, "grad_norm": 0.49212661385536194, "learning_rate": 4.1979156711715655e-06, "loss": 0.3748, "step": 36626 }, { "epoch": 1.6808315359552108, "grad_norm": 0.4476878345012665, "learning_rate": 4.197673659570497e-06, "loss": 0.3439, "step": 36627 }, { "epoch": 1.6808774264604653, "grad_norm": 0.46549829840660095, "learning_rate": 4.197431649898769e-06, "loss": 0.3511, "step": 36628 }, { "epoch": 1.6809233169657198, "grad_norm": 0.4481625258922577, "learning_rate": 4.197189642156962e-06, "loss": 0.2946, "step": 36629 }, { "epoch": 1.6809692074709743, "grad_norm": 0.44091692566871643, "learning_rate": 4.196947636345663e-06, "loss": 0.2857, "step": 36630 }, { "epoch": 1.6810150979762288, "grad_norm": 0.4902280271053314, "learning_rate": 4.1967056324654515e-06, "loss": 0.4617, "step": 36631 }, { "epoch": 1.6810609884814833, "grad_norm": 0.41542696952819824, "learning_rate": 4.196463630516909e-06, "loss": 0.2866, "step": 36632 }, { "epoch": 1.6811068789867376, "grad_norm": 0.44120338559150696, "learning_rate": 4.196221630500617e-06, "loss": 0.3132, "step": 36633 }, { "epoch": 1.681152769491992, "grad_norm": 0.42308467626571655, "learning_rate": 4.19597963241716e-06, "loss": 0.29, "step": 36634 }, { "epoch": 1.6811986599972466, "grad_norm": 0.46878519654273987, "learning_rate": 4.195737636267116e-06, "loss": 0.3561, "step": 36635 }, { "epoch": 1.6812445505025009, "grad_norm": 0.4856399893760681, "learning_rate": 4.195495642051071e-06, "loss": 0.3465, "step": 36636 }, { "epoch": 1.6812904410077554, "grad_norm": 0.43609169125556946, "learning_rate": 4.195253649769605e-06, "loss": 0.2943, "step": 36637 }, { "epoch": 1.6813363315130099, "grad_norm": 0.4336220622062683, "learning_rate": 4.195011659423299e-06, "loss": 0.2822, "step": 36638 }, { "epoch": 1.6813822220182644, "grad_norm": 0.4066895842552185, "learning_rate": 4.1947696710127375e-06, "loss": 0.2661, "step": 36639 }, { "epoch": 1.6814281125235189, "grad_norm": 0.43248075246810913, "learning_rate": 4.1945276845385e-06, "loss": 0.297, "step": 36640 }, { "epoch": 1.6814740030287734, "grad_norm": 0.4402267634868622, "learning_rate": 4.194285700001167e-06, "loss": 0.305, "step": 36641 }, { "epoch": 1.6815198935340279, "grad_norm": 0.5321704745292664, "learning_rate": 4.194043717401325e-06, "loss": 0.4413, "step": 36642 }, { "epoch": 1.6815657840392824, "grad_norm": 0.43868669867515564, "learning_rate": 4.193801736739554e-06, "loss": 0.2725, "step": 36643 }, { "epoch": 1.6816116745445369, "grad_norm": 0.44483572244644165, "learning_rate": 4.193559758016435e-06, "loss": 0.3074, "step": 36644 }, { "epoch": 1.6816575650497914, "grad_norm": 0.46181023120880127, "learning_rate": 4.193317781232549e-06, "loss": 0.3242, "step": 36645 }, { "epoch": 1.6817034555550456, "grad_norm": 0.46997594833374023, "learning_rate": 4.19307580638848e-06, "loss": 0.3778, "step": 36646 }, { "epoch": 1.6817493460603001, "grad_norm": 0.531737208366394, "learning_rate": 4.19283383348481e-06, "loss": 0.3996, "step": 36647 }, { "epoch": 1.6817952365655546, "grad_norm": 0.4286327660083771, "learning_rate": 4.192591862522117e-06, "loss": 0.3108, "step": 36648 }, { "epoch": 1.681841127070809, "grad_norm": 0.46070733666419983, "learning_rate": 4.192349893500989e-06, "loss": 0.3984, "step": 36649 }, { "epoch": 1.6818870175760634, "grad_norm": 0.44115087389945984, "learning_rate": 4.192107926422003e-06, "loss": 0.2904, "step": 36650 }, { "epoch": 1.681932908081318, "grad_norm": 0.5475233793258667, "learning_rate": 4.191865961285742e-06, "loss": 0.37, "step": 36651 }, { "epoch": 1.6819787985865724, "grad_norm": 0.5091083645820618, "learning_rate": 4.191623998092789e-06, "loss": 0.4183, "step": 36652 }, { "epoch": 1.682024689091827, "grad_norm": 0.4824158847332001, "learning_rate": 4.191382036843725e-06, "loss": 0.3679, "step": 36653 }, { "epoch": 1.6820705795970814, "grad_norm": 0.473499596118927, "learning_rate": 4.191140077539131e-06, "loss": 0.3872, "step": 36654 }, { "epoch": 1.682116470102336, "grad_norm": 0.4657679796218872, "learning_rate": 4.190898120179592e-06, "loss": 0.3078, "step": 36655 }, { "epoch": 1.6821623606075904, "grad_norm": 0.449124813079834, "learning_rate": 4.190656164765687e-06, "loss": 0.3552, "step": 36656 }, { "epoch": 1.6822082511128449, "grad_norm": 0.44133663177490234, "learning_rate": 4.190414211297996e-06, "loss": 0.2953, "step": 36657 }, { "epoch": 1.6822541416180992, "grad_norm": 0.49887338280677795, "learning_rate": 4.190172259777106e-06, "loss": 0.3696, "step": 36658 }, { "epoch": 1.6823000321233537, "grad_norm": 0.46963536739349365, "learning_rate": 4.189930310203596e-06, "loss": 0.3009, "step": 36659 }, { "epoch": 1.6823459226286082, "grad_norm": 0.4850354492664337, "learning_rate": 4.1896883625780464e-06, "loss": 0.3766, "step": 36660 }, { "epoch": 1.6823918131338627, "grad_norm": 0.4611857533454895, "learning_rate": 4.189446416901042e-06, "loss": 0.3602, "step": 36661 }, { "epoch": 1.682437703639117, "grad_norm": 0.4727003276348114, "learning_rate": 4.189204473173163e-06, "loss": 0.3614, "step": 36662 }, { "epoch": 1.6824835941443714, "grad_norm": 0.43927904963493347, "learning_rate": 4.18896253139499e-06, "loss": 0.2916, "step": 36663 }, { "epoch": 1.682529484649626, "grad_norm": 0.47729185223579407, "learning_rate": 4.188720591567107e-06, "loss": 0.3871, "step": 36664 }, { "epoch": 1.6825753751548804, "grad_norm": 0.47336286306381226, "learning_rate": 4.1884786536900966e-06, "loss": 0.379, "step": 36665 }, { "epoch": 1.682621265660135, "grad_norm": 0.49842989444732666, "learning_rate": 4.188236717764536e-06, "loss": 0.3521, "step": 36666 }, { "epoch": 1.6826671561653894, "grad_norm": 0.4778773784637451, "learning_rate": 4.187994783791012e-06, "loss": 0.3877, "step": 36667 }, { "epoch": 1.682713046670644, "grad_norm": 0.48494231700897217, "learning_rate": 4.187752851770104e-06, "loss": 0.4289, "step": 36668 }, { "epoch": 1.6827589371758984, "grad_norm": 0.5036440491676331, "learning_rate": 4.187510921702392e-06, "loss": 0.4221, "step": 36669 }, { "epoch": 1.682804827681153, "grad_norm": 0.5084603428840637, "learning_rate": 4.187268993588462e-06, "loss": 0.4015, "step": 36670 }, { "epoch": 1.6828507181864072, "grad_norm": 0.49309879541397095, "learning_rate": 4.187027067428894e-06, "loss": 0.4272, "step": 36671 }, { "epoch": 1.6828966086916617, "grad_norm": 0.46676939725875854, "learning_rate": 4.186785143224267e-06, "loss": 0.356, "step": 36672 }, { "epoch": 1.6829424991969162, "grad_norm": 0.4170481860637665, "learning_rate": 4.186543220975166e-06, "loss": 0.2735, "step": 36673 }, { "epoch": 1.6829883897021705, "grad_norm": 0.47602564096450806, "learning_rate": 4.186301300682173e-06, "loss": 0.3743, "step": 36674 }, { "epoch": 1.683034280207425, "grad_norm": 0.46793580055236816, "learning_rate": 4.186059382345868e-06, "loss": 0.311, "step": 36675 }, { "epoch": 1.6830801707126795, "grad_norm": 0.49261555075645447, "learning_rate": 4.185817465966833e-06, "loss": 0.3774, "step": 36676 }, { "epoch": 1.683126061217934, "grad_norm": 0.5102073550224304, "learning_rate": 4.185575551545651e-06, "loss": 0.3542, "step": 36677 }, { "epoch": 1.6831719517231885, "grad_norm": 0.4897017478942871, "learning_rate": 4.185333639082903e-06, "loss": 0.3907, "step": 36678 }, { "epoch": 1.683217842228443, "grad_norm": 0.5146334171295166, "learning_rate": 4.185091728579167e-06, "loss": 0.4676, "step": 36679 }, { "epoch": 1.6832637327336974, "grad_norm": 0.4781786799430847, "learning_rate": 4.184849820035031e-06, "loss": 0.3661, "step": 36680 }, { "epoch": 1.683309623238952, "grad_norm": 0.48679864406585693, "learning_rate": 4.184607913451074e-06, "loss": 0.352, "step": 36681 }, { "epoch": 1.6833555137442064, "grad_norm": 0.4676544964313507, "learning_rate": 4.184366008827876e-06, "loss": 0.3276, "step": 36682 }, { "epoch": 1.683401404249461, "grad_norm": 0.4873086214065552, "learning_rate": 4.184124106166022e-06, "loss": 0.3772, "step": 36683 }, { "epoch": 1.6834472947547152, "grad_norm": 0.4920465648174286, "learning_rate": 4.183882205466092e-06, "loss": 0.416, "step": 36684 }, { "epoch": 1.6834931852599697, "grad_norm": 0.46206143498420715, "learning_rate": 4.1836403067286655e-06, "loss": 0.3562, "step": 36685 }, { "epoch": 1.6835390757652242, "grad_norm": 0.4901551604270935, "learning_rate": 4.183398409954328e-06, "loss": 0.4383, "step": 36686 }, { "epoch": 1.6835849662704785, "grad_norm": 0.4095803201198578, "learning_rate": 4.18315651514366e-06, "loss": 0.2852, "step": 36687 }, { "epoch": 1.683630856775733, "grad_norm": 0.4333679974079132, "learning_rate": 4.182914622297243e-06, "loss": 0.3068, "step": 36688 }, { "epoch": 1.6836767472809875, "grad_norm": 0.505587100982666, "learning_rate": 4.182672731415658e-06, "loss": 0.4101, "step": 36689 }, { "epoch": 1.683722637786242, "grad_norm": 0.5026230216026306, "learning_rate": 4.1824308424994875e-06, "loss": 0.4129, "step": 36690 }, { "epoch": 1.6837685282914965, "grad_norm": 0.47114697098731995, "learning_rate": 4.18218895554931e-06, "loss": 0.3705, "step": 36691 }, { "epoch": 1.683814418796751, "grad_norm": 0.4381970763206482, "learning_rate": 4.1819470705657126e-06, "loss": 0.3196, "step": 36692 }, { "epoch": 1.6838603093020055, "grad_norm": 0.46089982986450195, "learning_rate": 4.181705187549276e-06, "loss": 0.3551, "step": 36693 }, { "epoch": 1.68390619980726, "grad_norm": 0.4817427098751068, "learning_rate": 4.181463306500576e-06, "loss": 0.3567, "step": 36694 }, { "epoch": 1.6839520903125145, "grad_norm": 0.47132623195648193, "learning_rate": 4.181221427420201e-06, "loss": 0.3701, "step": 36695 }, { "epoch": 1.683997980817769, "grad_norm": 0.45564770698547363, "learning_rate": 4.18097955030873e-06, "loss": 0.4068, "step": 36696 }, { "epoch": 1.6840438713230232, "grad_norm": 0.5715989470481873, "learning_rate": 4.1807376751667435e-06, "loss": 0.3548, "step": 36697 }, { "epoch": 1.6840897618282777, "grad_norm": 0.5544700026512146, "learning_rate": 4.180495801994826e-06, "loss": 0.4487, "step": 36698 }, { "epoch": 1.6841356523335322, "grad_norm": 0.49110493063926697, "learning_rate": 4.180253930793557e-06, "loss": 0.3573, "step": 36699 }, { "epoch": 1.6841815428387865, "grad_norm": 0.47974082827568054, "learning_rate": 4.180012061563517e-06, "loss": 0.4064, "step": 36700 }, { "epoch": 1.684227433344041, "grad_norm": 0.48985666036605835, "learning_rate": 4.1797701943052904e-06, "loss": 0.4135, "step": 36701 }, { "epoch": 1.6842733238492955, "grad_norm": 0.43646353483200073, "learning_rate": 4.179528329019458e-06, "loss": 0.2989, "step": 36702 }, { "epoch": 1.68431921435455, "grad_norm": 0.4515102505683899, "learning_rate": 4.1792864657066e-06, "loss": 0.3387, "step": 36703 }, { "epoch": 1.6843651048598045, "grad_norm": 0.473971426486969, "learning_rate": 4.1790446043673e-06, "loss": 0.3517, "step": 36704 }, { "epoch": 1.684410995365059, "grad_norm": 0.46834880113601685, "learning_rate": 4.178802745002139e-06, "loss": 0.3488, "step": 36705 }, { "epoch": 1.6844568858703135, "grad_norm": 0.4526287913322449, "learning_rate": 4.178560887611696e-06, "loss": 0.3193, "step": 36706 }, { "epoch": 1.684502776375568, "grad_norm": 0.45599237084388733, "learning_rate": 4.178319032196556e-06, "loss": 0.328, "step": 36707 }, { "epoch": 1.6845486668808225, "grad_norm": 0.4968269169330597, "learning_rate": 4.178077178757301e-06, "loss": 0.3561, "step": 36708 }, { "epoch": 1.6845945573860768, "grad_norm": 0.496565043926239, "learning_rate": 4.177835327294509e-06, "loss": 0.3738, "step": 36709 }, { "epoch": 1.6846404478913313, "grad_norm": 0.5355930924415588, "learning_rate": 4.177593477808764e-06, "loss": 0.394, "step": 36710 }, { "epoch": 1.6846863383965858, "grad_norm": 0.41289010643959045, "learning_rate": 4.177351630300648e-06, "loss": 0.2449, "step": 36711 }, { "epoch": 1.68473222890184, "grad_norm": 0.4664144814014435, "learning_rate": 4.177109784770742e-06, "loss": 0.3619, "step": 36712 }, { "epoch": 1.6847781194070945, "grad_norm": 0.44762948155403137, "learning_rate": 4.176867941219624e-06, "loss": 0.3167, "step": 36713 }, { "epoch": 1.684824009912349, "grad_norm": 0.4577914774417877, "learning_rate": 4.176626099647881e-06, "loss": 0.3687, "step": 36714 }, { "epoch": 1.6848699004176035, "grad_norm": 0.5063039660453796, "learning_rate": 4.176384260056094e-06, "loss": 0.4125, "step": 36715 }, { "epoch": 1.684915790922858, "grad_norm": 0.5064852833747864, "learning_rate": 4.17614242244484e-06, "loss": 0.4282, "step": 36716 }, { "epoch": 1.6849616814281125, "grad_norm": 0.5046652555465698, "learning_rate": 4.1759005868147054e-06, "loss": 0.4354, "step": 36717 }, { "epoch": 1.685007571933367, "grad_norm": 0.526836633682251, "learning_rate": 4.1756587531662695e-06, "loss": 0.4302, "step": 36718 }, { "epoch": 1.6850534624386215, "grad_norm": 0.43159839510917664, "learning_rate": 4.175416921500113e-06, "loss": 0.2865, "step": 36719 }, { "epoch": 1.685099352943876, "grad_norm": 0.48072636127471924, "learning_rate": 4.175175091816821e-06, "loss": 0.3526, "step": 36720 }, { "epoch": 1.6851452434491305, "grad_norm": 0.47590675950050354, "learning_rate": 4.17493326411697e-06, "loss": 0.3454, "step": 36721 }, { "epoch": 1.6851911339543848, "grad_norm": 0.4662701189517975, "learning_rate": 4.1746914384011435e-06, "loss": 0.3507, "step": 36722 }, { "epoch": 1.6852370244596393, "grad_norm": 0.4876888692378998, "learning_rate": 4.174449614669927e-06, "loss": 0.3744, "step": 36723 }, { "epoch": 1.6852829149648938, "grad_norm": 0.47026485204696655, "learning_rate": 4.174207792923897e-06, "loss": 0.3421, "step": 36724 }, { "epoch": 1.685328805470148, "grad_norm": 0.5658932328224182, "learning_rate": 4.173965973163635e-06, "loss": 0.4592, "step": 36725 }, { "epoch": 1.6853746959754026, "grad_norm": 0.4143156409263611, "learning_rate": 4.173724155389727e-06, "loss": 0.2783, "step": 36726 }, { "epoch": 1.685420586480657, "grad_norm": 0.4525379240512848, "learning_rate": 4.17348233960275e-06, "loss": 0.3031, "step": 36727 }, { "epoch": 1.6854664769859116, "grad_norm": 0.489981472492218, "learning_rate": 4.173240525803286e-06, "loss": 0.3831, "step": 36728 }, { "epoch": 1.685512367491166, "grad_norm": 0.4131324291229248, "learning_rate": 4.172998713991919e-06, "loss": 0.3011, "step": 36729 }, { "epoch": 1.6855582579964206, "grad_norm": 0.4798654019832611, "learning_rate": 4.172756904169229e-06, "loss": 0.3561, "step": 36730 }, { "epoch": 1.685604148501675, "grad_norm": 0.44545382261276245, "learning_rate": 4.172515096335798e-06, "loss": 0.3654, "step": 36731 }, { "epoch": 1.6856500390069296, "grad_norm": 0.4579126238822937, "learning_rate": 4.172273290492207e-06, "loss": 0.3395, "step": 36732 }, { "epoch": 1.685695929512184, "grad_norm": 0.4803103804588318, "learning_rate": 4.172031486639037e-06, "loss": 0.3774, "step": 36733 }, { "epoch": 1.6857418200174386, "grad_norm": 0.43887925148010254, "learning_rate": 4.171789684776869e-06, "loss": 0.3183, "step": 36734 }, { "epoch": 1.6857877105226928, "grad_norm": 0.46445730328559875, "learning_rate": 4.1715478849062866e-06, "loss": 0.3233, "step": 36735 }, { "epoch": 1.6858336010279473, "grad_norm": 0.45388227701187134, "learning_rate": 4.171306087027871e-06, "loss": 0.3394, "step": 36736 }, { "epoch": 1.6858794915332018, "grad_norm": 0.5232885479927063, "learning_rate": 4.171064291142201e-06, "loss": 0.4215, "step": 36737 }, { "epoch": 1.685925382038456, "grad_norm": 0.5060933232307434, "learning_rate": 4.17082249724986e-06, "loss": 0.3982, "step": 36738 }, { "epoch": 1.6859712725437106, "grad_norm": 0.49918821454048157, "learning_rate": 4.17058070535143e-06, "loss": 0.4111, "step": 36739 }, { "epoch": 1.686017163048965, "grad_norm": 0.4382694363594055, "learning_rate": 4.170338915447491e-06, "loss": 0.3298, "step": 36740 }, { "epoch": 1.6860630535542196, "grad_norm": 0.457792192697525, "learning_rate": 4.170097127538626e-06, "loss": 0.3527, "step": 36741 }, { "epoch": 1.686108944059474, "grad_norm": 0.4837076663970947, "learning_rate": 4.1698553416254154e-06, "loss": 0.4135, "step": 36742 }, { "epoch": 1.6861548345647286, "grad_norm": 0.4626867473125458, "learning_rate": 4.16961355770844e-06, "loss": 0.3844, "step": 36743 }, { "epoch": 1.686200725069983, "grad_norm": 0.4796828329563141, "learning_rate": 4.16937177578828e-06, "loss": 0.3795, "step": 36744 }, { "epoch": 1.6862466155752376, "grad_norm": 0.48276805877685547, "learning_rate": 4.169129995865522e-06, "loss": 0.3525, "step": 36745 }, { "epoch": 1.686292506080492, "grad_norm": 0.4635700583457947, "learning_rate": 4.168888217940743e-06, "loss": 0.3367, "step": 36746 }, { "epoch": 1.6863383965857464, "grad_norm": 0.4601075053215027, "learning_rate": 4.168646442014525e-06, "loss": 0.3814, "step": 36747 }, { "epoch": 1.6863842870910009, "grad_norm": 0.519730269908905, "learning_rate": 4.16840466808745e-06, "loss": 0.3889, "step": 36748 }, { "epoch": 1.6864301775962554, "grad_norm": 0.4684511125087738, "learning_rate": 4.1681628961601005e-06, "loss": 0.3351, "step": 36749 }, { "epoch": 1.6864760681015099, "grad_norm": 0.44276687502861023, "learning_rate": 4.1679211262330545e-06, "loss": 0.3178, "step": 36750 }, { "epoch": 1.6865219586067641, "grad_norm": 0.4810556173324585, "learning_rate": 4.167679358306898e-06, "loss": 0.3741, "step": 36751 }, { "epoch": 1.6865678491120186, "grad_norm": 0.47345873713493347, "learning_rate": 4.1674375923822085e-06, "loss": 0.3404, "step": 36752 }, { "epoch": 1.6866137396172731, "grad_norm": 0.5055721998214722, "learning_rate": 4.167195828459569e-06, "loss": 0.3501, "step": 36753 }, { "epoch": 1.6866596301225276, "grad_norm": 0.46325966715812683, "learning_rate": 4.166954066539561e-06, "loss": 0.3773, "step": 36754 }, { "epoch": 1.6867055206277821, "grad_norm": 0.4470631182193756, "learning_rate": 4.166712306622766e-06, "loss": 0.3117, "step": 36755 }, { "epoch": 1.6867514111330366, "grad_norm": 0.4749334454536438, "learning_rate": 4.166470548709762e-06, "loss": 0.3295, "step": 36756 }, { "epoch": 1.6867973016382911, "grad_norm": 0.4975185692310333, "learning_rate": 4.166228792801137e-06, "loss": 0.3941, "step": 36757 }, { "epoch": 1.6868431921435456, "grad_norm": 0.524256706237793, "learning_rate": 4.165987038897468e-06, "loss": 0.4603, "step": 36758 }, { "epoch": 1.6868890826488, "grad_norm": 0.5346419811248779, "learning_rate": 4.1657452869993345e-06, "loss": 0.4157, "step": 36759 }, { "epoch": 1.6869349731540544, "grad_norm": 0.4835657477378845, "learning_rate": 4.165503537107323e-06, "loss": 0.4022, "step": 36760 }, { "epoch": 1.6869808636593089, "grad_norm": 0.45903512835502625, "learning_rate": 4.1652617892220115e-06, "loss": 0.3883, "step": 36761 }, { "epoch": 1.6870267541645634, "grad_norm": 0.5147690773010254, "learning_rate": 4.165020043343981e-06, "loss": 0.3504, "step": 36762 }, { "epoch": 1.6870726446698177, "grad_norm": 0.5026307702064514, "learning_rate": 4.164778299473815e-06, "loss": 0.437, "step": 36763 }, { "epoch": 1.6871185351750722, "grad_norm": 0.4412024915218353, "learning_rate": 4.164536557612093e-06, "loss": 0.2976, "step": 36764 }, { "epoch": 1.6871644256803267, "grad_norm": 0.4594278931617737, "learning_rate": 4.164294817759395e-06, "loss": 0.3712, "step": 36765 }, { "epoch": 1.6872103161855811, "grad_norm": 0.43304747343063354, "learning_rate": 4.164053079916306e-06, "loss": 0.3124, "step": 36766 }, { "epoch": 1.6872562066908356, "grad_norm": 0.5000492930412292, "learning_rate": 4.163811344083407e-06, "loss": 0.3843, "step": 36767 }, { "epoch": 1.6873020971960901, "grad_norm": 0.4477241337299347, "learning_rate": 4.163569610261275e-06, "loss": 0.3578, "step": 36768 }, { "epoch": 1.6873479877013446, "grad_norm": 0.46870797872543335, "learning_rate": 4.163327878450496e-06, "loss": 0.3504, "step": 36769 }, { "epoch": 1.6873938782065991, "grad_norm": 0.5244657397270203, "learning_rate": 4.16308614865165e-06, "loss": 0.4142, "step": 36770 }, { "epoch": 1.6874397687118536, "grad_norm": 0.48102545738220215, "learning_rate": 4.1628444208653144e-06, "loss": 0.3865, "step": 36771 }, { "epoch": 1.6874856592171081, "grad_norm": 0.43701088428497314, "learning_rate": 4.162602695092076e-06, "loss": 0.3125, "step": 36772 }, { "epoch": 1.6875315497223624, "grad_norm": 0.4430621862411499, "learning_rate": 4.162360971332514e-06, "loss": 0.2991, "step": 36773 }, { "epoch": 1.687577440227617, "grad_norm": 0.48940935730934143, "learning_rate": 4.162119249587208e-06, "loss": 0.3689, "step": 36774 }, { "epoch": 1.6876233307328714, "grad_norm": 0.4404969811439514, "learning_rate": 4.1618775298567425e-06, "loss": 0.306, "step": 36775 }, { "epoch": 1.6876692212381257, "grad_norm": 0.4469551146030426, "learning_rate": 4.161635812141698e-06, "loss": 0.3023, "step": 36776 }, { "epoch": 1.6877151117433802, "grad_norm": 0.4747537672519684, "learning_rate": 4.161394096442653e-06, "loss": 0.3999, "step": 36777 }, { "epoch": 1.6877610022486347, "grad_norm": 0.44679808616638184, "learning_rate": 4.161152382760189e-06, "loss": 0.2989, "step": 36778 }, { "epoch": 1.6878068927538892, "grad_norm": 0.5224229097366333, "learning_rate": 4.160910671094891e-06, "loss": 0.4162, "step": 36779 }, { "epoch": 1.6878527832591437, "grad_norm": 0.5652673244476318, "learning_rate": 4.160668961447339e-06, "loss": 0.3119, "step": 36780 }, { "epoch": 1.6878986737643982, "grad_norm": 0.4801797568798065, "learning_rate": 4.16042725381811e-06, "loss": 0.3464, "step": 36781 }, { "epoch": 1.6879445642696527, "grad_norm": 0.4744786024093628, "learning_rate": 4.160185548207791e-06, "loss": 0.3721, "step": 36782 }, { "epoch": 1.6879904547749072, "grad_norm": 0.46331149339675903, "learning_rate": 4.15994384461696e-06, "loss": 0.342, "step": 36783 }, { "epoch": 1.6880363452801617, "grad_norm": 0.4478573501110077, "learning_rate": 4.159702143046199e-06, "loss": 0.3336, "step": 36784 }, { "epoch": 1.6880822357854162, "grad_norm": 0.4576170742511749, "learning_rate": 4.159460443496089e-06, "loss": 0.3282, "step": 36785 }, { "epoch": 1.6881281262906704, "grad_norm": 0.4604507088661194, "learning_rate": 4.159218745967212e-06, "loss": 0.3607, "step": 36786 }, { "epoch": 1.688174016795925, "grad_norm": 0.48022356629371643, "learning_rate": 4.158977050460146e-06, "loss": 0.346, "step": 36787 }, { "epoch": 1.6882199073011794, "grad_norm": 0.4428163766860962, "learning_rate": 4.1587353569754775e-06, "loss": 0.3018, "step": 36788 }, { "epoch": 1.6882657978064337, "grad_norm": 0.47617003321647644, "learning_rate": 4.158493665513784e-06, "loss": 0.4376, "step": 36789 }, { "epoch": 1.6883116883116882, "grad_norm": 0.50802081823349, "learning_rate": 4.158251976075648e-06, "loss": 0.4479, "step": 36790 }, { "epoch": 1.6883575788169427, "grad_norm": 0.48347902297973633, "learning_rate": 4.15801028866165e-06, "loss": 0.3666, "step": 36791 }, { "epoch": 1.6884034693221972, "grad_norm": 0.4668214023113251, "learning_rate": 4.157768603272373e-06, "loss": 0.3715, "step": 36792 }, { "epoch": 1.6884493598274517, "grad_norm": 0.4578152298927307, "learning_rate": 4.157526919908393e-06, "loss": 0.3593, "step": 36793 }, { "epoch": 1.6884952503327062, "grad_norm": 0.4864543378353119, "learning_rate": 4.157285238570298e-06, "loss": 0.3672, "step": 36794 }, { "epoch": 1.6885411408379607, "grad_norm": 0.44036224484443665, "learning_rate": 4.1570435592586656e-06, "loss": 0.341, "step": 36795 }, { "epoch": 1.6885870313432152, "grad_norm": 0.4474756717681885, "learning_rate": 4.156801881974076e-06, "loss": 0.3262, "step": 36796 }, { "epoch": 1.6886329218484697, "grad_norm": 0.46045470237731934, "learning_rate": 4.156560206717113e-06, "loss": 0.3515, "step": 36797 }, { "epoch": 1.688678812353724, "grad_norm": 0.44514498114585876, "learning_rate": 4.156318533488357e-06, "loss": 0.3427, "step": 36798 }, { "epoch": 1.6887247028589785, "grad_norm": 0.46188172698020935, "learning_rate": 4.156076862288386e-06, "loss": 0.3424, "step": 36799 }, { "epoch": 1.688770593364233, "grad_norm": 0.43751174211502075, "learning_rate": 4.155835193117787e-06, "loss": 0.3283, "step": 36800 }, { "epoch": 1.6888164838694872, "grad_norm": 0.43324941396713257, "learning_rate": 4.155593525977136e-06, "loss": 0.2944, "step": 36801 }, { "epoch": 1.6888623743747417, "grad_norm": 0.44718584418296814, "learning_rate": 4.1553518608670165e-06, "loss": 0.3405, "step": 36802 }, { "epoch": 1.6889082648799962, "grad_norm": 0.4858654737472534, "learning_rate": 4.155110197788011e-06, "loss": 0.3638, "step": 36803 }, { "epoch": 1.6889541553852507, "grad_norm": 0.4785882830619812, "learning_rate": 4.154868536740697e-06, "loss": 0.4021, "step": 36804 }, { "epoch": 1.6890000458905052, "grad_norm": 0.45162996649742126, "learning_rate": 4.154626877725656e-06, "loss": 0.3409, "step": 36805 }, { "epoch": 1.6890459363957597, "grad_norm": 0.4666168987751007, "learning_rate": 4.154385220743474e-06, "loss": 0.3595, "step": 36806 }, { "epoch": 1.6890918269010142, "grad_norm": 0.46007511019706726, "learning_rate": 4.154143565794727e-06, "loss": 0.3107, "step": 36807 }, { "epoch": 1.6891377174062687, "grad_norm": 0.44796469807624817, "learning_rate": 4.153901912879998e-06, "loss": 0.3204, "step": 36808 }, { "epoch": 1.6891836079115232, "grad_norm": 0.432830274105072, "learning_rate": 4.153660261999866e-06, "loss": 0.2955, "step": 36809 }, { "epoch": 1.6892294984167777, "grad_norm": 0.48359212279319763, "learning_rate": 4.153418613154916e-06, "loss": 0.3659, "step": 36810 }, { "epoch": 1.689275388922032, "grad_norm": 0.47564437985420227, "learning_rate": 4.153176966345727e-06, "loss": 0.3724, "step": 36811 }, { "epoch": 1.6893212794272865, "grad_norm": 0.46529024839401245, "learning_rate": 4.152935321572879e-06, "loss": 0.3573, "step": 36812 }, { "epoch": 1.689367169932541, "grad_norm": 0.4471629858016968, "learning_rate": 4.152693678836955e-06, "loss": 0.3107, "step": 36813 }, { "epoch": 1.6894130604377953, "grad_norm": 0.49422475695610046, "learning_rate": 4.152452038138536e-06, "loss": 0.4159, "step": 36814 }, { "epoch": 1.6894589509430498, "grad_norm": 0.4844529628753662, "learning_rate": 4.1522103994782e-06, "loss": 0.4023, "step": 36815 }, { "epoch": 1.6895048414483043, "grad_norm": 0.46729210019111633, "learning_rate": 4.151968762856532e-06, "loss": 0.3818, "step": 36816 }, { "epoch": 1.6895507319535588, "grad_norm": 0.4574427306652069, "learning_rate": 4.151727128274112e-06, "loss": 0.3159, "step": 36817 }, { "epoch": 1.6895966224588133, "grad_norm": 0.48064419627189636, "learning_rate": 4.151485495731519e-06, "loss": 0.3334, "step": 36818 }, { "epoch": 1.6896425129640678, "grad_norm": 0.5360739827156067, "learning_rate": 4.151243865229337e-06, "loss": 0.4429, "step": 36819 }, { "epoch": 1.6896884034693223, "grad_norm": 0.4431733787059784, "learning_rate": 4.151002236768146e-06, "loss": 0.3413, "step": 36820 }, { "epoch": 1.6897342939745768, "grad_norm": 0.4831811487674713, "learning_rate": 4.150760610348525e-06, "loss": 0.3755, "step": 36821 }, { "epoch": 1.6897801844798312, "grad_norm": 0.4504460096359253, "learning_rate": 4.150518985971057e-06, "loss": 0.3207, "step": 36822 }, { "epoch": 1.6898260749850857, "grad_norm": 0.44837692379951477, "learning_rate": 4.150277363636323e-06, "loss": 0.3197, "step": 36823 }, { "epoch": 1.68987196549034, "grad_norm": 0.4410548806190491, "learning_rate": 4.150035743344904e-06, "loss": 0.3264, "step": 36824 }, { "epoch": 1.6899178559955945, "grad_norm": 0.45388856530189514, "learning_rate": 4.149794125097382e-06, "loss": 0.3628, "step": 36825 }, { "epoch": 1.689963746500849, "grad_norm": 0.46156802773475647, "learning_rate": 4.149552508894336e-06, "loss": 0.3548, "step": 36826 }, { "epoch": 1.6900096370061033, "grad_norm": 0.44656237959861755, "learning_rate": 4.149310894736345e-06, "loss": 0.3126, "step": 36827 }, { "epoch": 1.6900555275113578, "grad_norm": 0.4617288410663605, "learning_rate": 4.149069282623997e-06, "loss": 0.3664, "step": 36828 }, { "epoch": 1.6901014180166123, "grad_norm": 0.43301883339881897, "learning_rate": 4.148827672557867e-06, "loss": 0.36, "step": 36829 }, { "epoch": 1.6901473085218668, "grad_norm": 0.4307457506656647, "learning_rate": 4.148586064538537e-06, "loss": 0.3187, "step": 36830 }, { "epoch": 1.6901931990271213, "grad_norm": 0.4437965452671051, "learning_rate": 4.14834445856659e-06, "loss": 0.31, "step": 36831 }, { "epoch": 1.6902390895323758, "grad_norm": 0.49264201521873474, "learning_rate": 4.1481028546426055e-06, "loss": 0.4367, "step": 36832 }, { "epoch": 1.6902849800376303, "grad_norm": 0.4288793206214905, "learning_rate": 4.147861252767164e-06, "loss": 0.2752, "step": 36833 }, { "epoch": 1.6903308705428848, "grad_norm": 0.4393137991428375, "learning_rate": 4.147619652940848e-06, "loss": 0.2999, "step": 36834 }, { "epoch": 1.6903767610481393, "grad_norm": 0.507400393486023, "learning_rate": 4.147378055164239e-06, "loss": 0.3783, "step": 36835 }, { "epoch": 1.6904226515533936, "grad_norm": 0.4294240176677704, "learning_rate": 4.147136459437913e-06, "loss": 0.255, "step": 36836 }, { "epoch": 1.690468542058648, "grad_norm": 0.4532887935638428, "learning_rate": 4.146894865762458e-06, "loss": 0.3629, "step": 36837 }, { "epoch": 1.6905144325639025, "grad_norm": 0.44023844599723816, "learning_rate": 4.146653274138451e-06, "loss": 0.2768, "step": 36838 }, { "epoch": 1.690560323069157, "grad_norm": 0.5166060328483582, "learning_rate": 4.146411684566472e-06, "loss": 0.4024, "step": 36839 }, { "epoch": 1.6906062135744113, "grad_norm": 0.46761175990104675, "learning_rate": 4.146170097047106e-06, "loss": 0.3531, "step": 36840 }, { "epoch": 1.6906521040796658, "grad_norm": 0.4524780213832855, "learning_rate": 4.14592851158093e-06, "loss": 0.3162, "step": 36841 }, { "epoch": 1.6906979945849203, "grad_norm": 0.42384982109069824, "learning_rate": 4.1456869281685265e-06, "loss": 0.2664, "step": 36842 }, { "epoch": 1.6907438850901748, "grad_norm": 0.4624957740306854, "learning_rate": 4.145445346810475e-06, "loss": 0.3504, "step": 36843 }, { "epoch": 1.6907897755954293, "grad_norm": 0.44703394174575806, "learning_rate": 4.1452037675073595e-06, "loss": 0.3383, "step": 36844 }, { "epoch": 1.6908356661006838, "grad_norm": 0.4698556065559387, "learning_rate": 4.144962190259759e-06, "loss": 0.3737, "step": 36845 }, { "epoch": 1.6908815566059383, "grad_norm": 0.5036715269088745, "learning_rate": 4.144720615068254e-06, "loss": 0.4416, "step": 36846 }, { "epoch": 1.6909274471111928, "grad_norm": 0.5077207088470459, "learning_rate": 4.1444790419334265e-06, "loss": 0.4312, "step": 36847 }, { "epoch": 1.6909733376164473, "grad_norm": 0.433074951171875, "learning_rate": 4.144237470855857e-06, "loss": 0.3227, "step": 36848 }, { "epoch": 1.6910192281217016, "grad_norm": 0.4638296663761139, "learning_rate": 4.143995901836124e-06, "loss": 0.3823, "step": 36849 }, { "epoch": 1.691065118626956, "grad_norm": 0.44176164269447327, "learning_rate": 4.1437543348748145e-06, "loss": 0.3273, "step": 36850 }, { "epoch": 1.6911110091322106, "grad_norm": 0.4198774993419647, "learning_rate": 4.143512769972504e-06, "loss": 0.2833, "step": 36851 }, { "epoch": 1.6911568996374648, "grad_norm": 0.45586130023002625, "learning_rate": 4.143271207129773e-06, "loss": 0.3561, "step": 36852 }, { "epoch": 1.6912027901427193, "grad_norm": 0.45588189363479614, "learning_rate": 4.143029646347207e-06, "loss": 0.3477, "step": 36853 }, { "epoch": 1.6912486806479738, "grad_norm": 0.4814203083515167, "learning_rate": 4.142788087625383e-06, "loss": 0.3879, "step": 36854 }, { "epoch": 1.6912945711532283, "grad_norm": 0.5031703114509583, "learning_rate": 4.142546530964883e-06, "loss": 0.3952, "step": 36855 }, { "epoch": 1.6913404616584828, "grad_norm": 0.4574336111545563, "learning_rate": 4.142304976366289e-06, "loss": 0.3487, "step": 36856 }, { "epoch": 1.6913863521637373, "grad_norm": 0.47126877307891846, "learning_rate": 4.14206342383018e-06, "loss": 0.3388, "step": 36857 }, { "epoch": 1.6914322426689918, "grad_norm": 0.4507964551448822, "learning_rate": 4.141821873357136e-06, "loss": 0.3207, "step": 36858 }, { "epoch": 1.6914781331742463, "grad_norm": 0.4789820909500122, "learning_rate": 4.141580324947742e-06, "loss": 0.3792, "step": 36859 }, { "epoch": 1.6915240236795008, "grad_norm": 0.49611976742744446, "learning_rate": 4.141338778602576e-06, "loss": 0.3927, "step": 36860 }, { "epoch": 1.6915699141847553, "grad_norm": 0.5025447010993958, "learning_rate": 4.141097234322219e-06, "loss": 0.4073, "step": 36861 }, { "epoch": 1.6916158046900096, "grad_norm": 0.45788344740867615, "learning_rate": 4.140855692107253e-06, "loss": 0.3306, "step": 36862 }, { "epoch": 1.691661695195264, "grad_norm": 0.49800029397010803, "learning_rate": 4.140614151958257e-06, "loss": 0.4112, "step": 36863 }, { "epoch": 1.6917075857005186, "grad_norm": 0.4580008387565613, "learning_rate": 4.140372613875811e-06, "loss": 0.3347, "step": 36864 }, { "epoch": 1.6917534762057729, "grad_norm": 0.4278797209262848, "learning_rate": 4.1401310778604994e-06, "loss": 0.2699, "step": 36865 }, { "epoch": 1.6917993667110274, "grad_norm": 0.4615432918071747, "learning_rate": 4.139889543912902e-06, "loss": 0.3223, "step": 36866 }, { "epoch": 1.6918452572162819, "grad_norm": 0.4696485102176666, "learning_rate": 4.139648012033598e-06, "loss": 0.3395, "step": 36867 }, { "epoch": 1.6918911477215364, "grad_norm": 0.4438839256763458, "learning_rate": 4.139406482223169e-06, "loss": 0.3085, "step": 36868 }, { "epoch": 1.6919370382267909, "grad_norm": 0.489225298166275, "learning_rate": 4.139164954482196e-06, "loss": 0.4206, "step": 36869 }, { "epoch": 1.6919829287320454, "grad_norm": 0.44891610741615295, "learning_rate": 4.138923428811257e-06, "loss": 0.3026, "step": 36870 }, { "epoch": 1.6920288192372999, "grad_norm": 0.4640611410140991, "learning_rate": 4.138681905210938e-06, "loss": 0.3238, "step": 36871 }, { "epoch": 1.6920747097425544, "grad_norm": 0.467511922121048, "learning_rate": 4.138440383681819e-06, "loss": 0.3525, "step": 36872 }, { "epoch": 1.6921206002478089, "grad_norm": 0.4589094817638397, "learning_rate": 4.138198864224478e-06, "loss": 0.3531, "step": 36873 }, { "epoch": 1.6921664907530634, "grad_norm": 0.5083353519439697, "learning_rate": 4.137957346839493e-06, "loss": 0.4155, "step": 36874 }, { "epoch": 1.6922123812583176, "grad_norm": 0.4752688705921173, "learning_rate": 4.137715831527451e-06, "loss": 0.3467, "step": 36875 }, { "epoch": 1.6922582717635721, "grad_norm": 0.4769943058490753, "learning_rate": 4.1374743182889306e-06, "loss": 0.3147, "step": 36876 }, { "epoch": 1.6923041622688266, "grad_norm": 0.47080984711647034, "learning_rate": 4.137232807124512e-06, "loss": 0.3257, "step": 36877 }, { "epoch": 1.692350052774081, "grad_norm": 0.4451020658016205, "learning_rate": 4.1369912980347766e-06, "loss": 0.313, "step": 36878 }, { "epoch": 1.6923959432793354, "grad_norm": 0.515381395816803, "learning_rate": 4.136749791020305e-06, "loss": 0.4387, "step": 36879 }, { "epoch": 1.69244183378459, "grad_norm": 0.45998769998550415, "learning_rate": 4.136508286081675e-06, "loss": 0.3181, "step": 36880 }, { "epoch": 1.6924877242898444, "grad_norm": 0.47594398260116577, "learning_rate": 4.136266783219472e-06, "loss": 0.3783, "step": 36881 }, { "epoch": 1.692533614795099, "grad_norm": 0.4827595055103302, "learning_rate": 4.136025282434276e-06, "loss": 0.3814, "step": 36882 }, { "epoch": 1.6925795053003534, "grad_norm": 0.48962658643722534, "learning_rate": 4.135783783726665e-06, "loss": 0.358, "step": 36883 }, { "epoch": 1.692625395805608, "grad_norm": 0.4838423728942871, "learning_rate": 4.135542287097222e-06, "loss": 0.3848, "step": 36884 }, { "epoch": 1.6926712863108624, "grad_norm": 0.43589261174201965, "learning_rate": 4.135300792546527e-06, "loss": 0.2918, "step": 36885 }, { "epoch": 1.6927171768161169, "grad_norm": 0.5090653300285339, "learning_rate": 4.135059300075158e-06, "loss": 0.4372, "step": 36886 }, { "epoch": 1.6927630673213712, "grad_norm": 0.4632788896560669, "learning_rate": 4.134817809683702e-06, "loss": 0.3147, "step": 36887 }, { "epoch": 1.6928089578266257, "grad_norm": 0.4514651894569397, "learning_rate": 4.1345763213727345e-06, "loss": 0.3364, "step": 36888 }, { "epoch": 1.6928548483318802, "grad_norm": 0.48137402534484863, "learning_rate": 4.134334835142838e-06, "loss": 0.3794, "step": 36889 }, { "epoch": 1.6929007388371344, "grad_norm": 0.49424150586128235, "learning_rate": 4.1340933509945935e-06, "loss": 0.4263, "step": 36890 }, { "epoch": 1.692946629342389, "grad_norm": 0.4852655529975891, "learning_rate": 4.133851868928581e-06, "loss": 0.4279, "step": 36891 }, { "epoch": 1.6929925198476434, "grad_norm": 0.4533689022064209, "learning_rate": 4.133610388945379e-06, "loss": 0.3004, "step": 36892 }, { "epoch": 1.693038410352898, "grad_norm": 0.4615727365016937, "learning_rate": 4.1333689110455734e-06, "loss": 0.3356, "step": 36893 }, { "epoch": 1.6930843008581524, "grad_norm": 0.45964616537094116, "learning_rate": 4.133127435229743e-06, "loss": 0.3314, "step": 36894 }, { "epoch": 1.693130191363407, "grad_norm": 0.47771501541137695, "learning_rate": 4.1328859614984646e-06, "loss": 0.4177, "step": 36895 }, { "epoch": 1.6931760818686614, "grad_norm": 0.49474000930786133, "learning_rate": 4.1326444898523235e-06, "loss": 0.4055, "step": 36896 }, { "epoch": 1.693221972373916, "grad_norm": 0.44006356596946716, "learning_rate": 4.132403020291899e-06, "loss": 0.2787, "step": 36897 }, { "epoch": 1.6932678628791704, "grad_norm": 0.5134905576705933, "learning_rate": 4.13216155281777e-06, "loss": 0.4495, "step": 36898 }, { "epoch": 1.693313753384425, "grad_norm": 0.4804705083370209, "learning_rate": 4.13192008743052e-06, "loss": 0.3592, "step": 36899 }, { "epoch": 1.6933596438896792, "grad_norm": 0.4158957302570343, "learning_rate": 4.131678624130728e-06, "loss": 0.283, "step": 36900 }, { "epoch": 1.6934055343949337, "grad_norm": 0.4449012875556946, "learning_rate": 4.131437162918972e-06, "loss": 0.3637, "step": 36901 }, { "epoch": 1.6934514249001882, "grad_norm": 0.45994657278060913, "learning_rate": 4.131195703795839e-06, "loss": 0.3445, "step": 36902 }, { "epoch": 1.6934973154054425, "grad_norm": 0.4867658019065857, "learning_rate": 4.1309542467619055e-06, "loss": 0.3813, "step": 36903 }, { "epoch": 1.693543205910697, "grad_norm": 0.4464125633239746, "learning_rate": 4.1307127918177515e-06, "loss": 0.3406, "step": 36904 }, { "epoch": 1.6935890964159515, "grad_norm": 0.426437109708786, "learning_rate": 4.1304713389639604e-06, "loss": 0.2947, "step": 36905 }, { "epoch": 1.693634986921206, "grad_norm": 0.4777913987636566, "learning_rate": 4.130229888201111e-06, "loss": 0.3689, "step": 36906 }, { "epoch": 1.6936808774264605, "grad_norm": 0.4622422158718109, "learning_rate": 4.129988439529784e-06, "loss": 0.3253, "step": 36907 }, { "epoch": 1.693726767931715, "grad_norm": 0.4693111777305603, "learning_rate": 4.129746992950558e-06, "loss": 0.3708, "step": 36908 }, { "epoch": 1.6937726584369694, "grad_norm": 0.45766136050224304, "learning_rate": 4.1295055484640184e-06, "loss": 0.3261, "step": 36909 }, { "epoch": 1.693818548942224, "grad_norm": 0.47616150975227356, "learning_rate": 4.129264106070743e-06, "loss": 0.3451, "step": 36910 }, { "epoch": 1.6938644394474784, "grad_norm": 0.474323570728302, "learning_rate": 4.129022665771312e-06, "loss": 0.3377, "step": 36911 }, { "epoch": 1.693910329952733, "grad_norm": 0.4845379590988159, "learning_rate": 4.128781227566307e-06, "loss": 0.3958, "step": 36912 }, { "epoch": 1.6939562204579872, "grad_norm": 0.5337485074996948, "learning_rate": 4.128539791456309e-06, "loss": 0.4577, "step": 36913 }, { "epoch": 1.6940021109632417, "grad_norm": 0.4459736943244934, "learning_rate": 4.128298357441894e-06, "loss": 0.3171, "step": 36914 }, { "epoch": 1.6940480014684962, "grad_norm": 0.47228631377220154, "learning_rate": 4.12805692552365e-06, "loss": 0.3302, "step": 36915 }, { "epoch": 1.6940938919737505, "grad_norm": 0.4573717415332794, "learning_rate": 4.127815495702154e-06, "loss": 0.3497, "step": 36916 }, { "epoch": 1.694139782479005, "grad_norm": 0.48213067650794983, "learning_rate": 4.127574067977984e-06, "loss": 0.3515, "step": 36917 }, { "epoch": 1.6941856729842595, "grad_norm": 0.4555206894874573, "learning_rate": 4.127332642351725e-06, "loss": 0.3353, "step": 36918 }, { "epoch": 1.694231563489514, "grad_norm": 0.44800934195518494, "learning_rate": 4.1270912188239555e-06, "loss": 0.3062, "step": 36919 }, { "epoch": 1.6942774539947685, "grad_norm": 0.4445018470287323, "learning_rate": 4.1268497973952545e-06, "loss": 0.3145, "step": 36920 }, { "epoch": 1.694323344500023, "grad_norm": 0.49942195415496826, "learning_rate": 4.126608378066207e-06, "loss": 0.4066, "step": 36921 }, { "epoch": 1.6943692350052775, "grad_norm": 0.460245281457901, "learning_rate": 4.126366960837389e-06, "loss": 0.3483, "step": 36922 }, { "epoch": 1.694415125510532, "grad_norm": 0.4961283802986145, "learning_rate": 4.126125545709381e-06, "loss": 0.4086, "step": 36923 }, { "epoch": 1.6944610160157865, "grad_norm": 0.430593341588974, "learning_rate": 4.125884132682767e-06, "loss": 0.2751, "step": 36924 }, { "epoch": 1.6945069065210407, "grad_norm": 0.4765610992908478, "learning_rate": 4.125642721758127e-06, "loss": 0.3396, "step": 36925 }, { "epoch": 1.6945527970262952, "grad_norm": 0.4605938494205475, "learning_rate": 4.125401312936038e-06, "loss": 0.3167, "step": 36926 }, { "epoch": 1.6945986875315497, "grad_norm": 0.42799097299575806, "learning_rate": 4.125159906217084e-06, "loss": 0.2984, "step": 36927 }, { "epoch": 1.6946445780368042, "grad_norm": 0.4860045611858368, "learning_rate": 4.124918501601846e-06, "loss": 0.3459, "step": 36928 }, { "epoch": 1.6946904685420585, "grad_norm": 0.4884439706802368, "learning_rate": 4.124677099090898e-06, "loss": 0.4201, "step": 36929 }, { "epoch": 1.694736359047313, "grad_norm": 0.5269063711166382, "learning_rate": 4.124435698684829e-06, "loss": 0.4422, "step": 36930 }, { "epoch": 1.6947822495525675, "grad_norm": 0.42389658093452454, "learning_rate": 4.124194300384216e-06, "loss": 0.268, "step": 36931 }, { "epoch": 1.694828140057822, "grad_norm": 0.4852316379547119, "learning_rate": 4.123952904189638e-06, "loss": 0.3641, "step": 36932 }, { "epoch": 1.6948740305630765, "grad_norm": 0.48655593395233154, "learning_rate": 4.1237115101016775e-06, "loss": 0.3818, "step": 36933 }, { "epoch": 1.694919921068331, "grad_norm": 0.47353994846343994, "learning_rate": 4.123470118120915e-06, "loss": 0.3287, "step": 36934 }, { "epoch": 1.6949658115735855, "grad_norm": 0.4697560667991638, "learning_rate": 4.123228728247927e-06, "loss": 0.3717, "step": 36935 }, { "epoch": 1.69501170207884, "grad_norm": 0.42585283517837524, "learning_rate": 4.1229873404833e-06, "loss": 0.3098, "step": 36936 }, { "epoch": 1.6950575925840945, "grad_norm": 0.4320126175880432, "learning_rate": 4.122745954827611e-06, "loss": 0.2928, "step": 36937 }, { "epoch": 1.6951034830893488, "grad_norm": 0.4783463776111603, "learning_rate": 4.122504571281443e-06, "loss": 0.3861, "step": 36938 }, { "epoch": 1.6951493735946033, "grad_norm": 0.4963923990726471, "learning_rate": 4.122263189845372e-06, "loss": 0.3756, "step": 36939 }, { "epoch": 1.6951952640998578, "grad_norm": 0.4609127938747406, "learning_rate": 4.1220218105199815e-06, "loss": 0.3329, "step": 36940 }, { "epoch": 1.695241154605112, "grad_norm": 0.4785782992839813, "learning_rate": 4.121780433305852e-06, "loss": 0.35, "step": 36941 }, { "epoch": 1.6952870451103665, "grad_norm": 0.4581732749938965, "learning_rate": 4.121539058203563e-06, "loss": 0.3468, "step": 36942 }, { "epoch": 1.695332935615621, "grad_norm": 0.4667890667915344, "learning_rate": 4.121297685213695e-06, "loss": 0.3338, "step": 36943 }, { "epoch": 1.6953788261208755, "grad_norm": 0.44299349188804626, "learning_rate": 4.12105631433683e-06, "loss": 0.3241, "step": 36944 }, { "epoch": 1.69542471662613, "grad_norm": 0.4530942440032959, "learning_rate": 4.1208149455735445e-06, "loss": 0.3035, "step": 36945 }, { "epoch": 1.6954706071313845, "grad_norm": 0.4427909851074219, "learning_rate": 4.120573578924424e-06, "loss": 0.3247, "step": 36946 }, { "epoch": 1.695516497636639, "grad_norm": 0.4766167998313904, "learning_rate": 4.120332214390046e-06, "loss": 0.3769, "step": 36947 }, { "epoch": 1.6955623881418935, "grad_norm": 0.449705570936203, "learning_rate": 4.120090851970991e-06, "loss": 0.3215, "step": 36948 }, { "epoch": 1.695608278647148, "grad_norm": 0.4725184142589569, "learning_rate": 4.1198494916678406e-06, "loss": 0.3392, "step": 36949 }, { "epoch": 1.6956541691524025, "grad_norm": 0.4967580735683441, "learning_rate": 4.119608133481174e-06, "loss": 0.3738, "step": 36950 }, { "epoch": 1.6957000596576568, "grad_norm": 0.4509488642215729, "learning_rate": 4.11936677741157e-06, "loss": 0.3501, "step": 36951 }, { "epoch": 1.6957459501629113, "grad_norm": 0.4471164345741272, "learning_rate": 4.119125423459613e-06, "loss": 0.3482, "step": 36952 }, { "epoch": 1.6957918406681658, "grad_norm": 0.4719114303588867, "learning_rate": 4.118884071625882e-06, "loss": 0.3665, "step": 36953 }, { "epoch": 1.69583773117342, "grad_norm": 0.5000113248825073, "learning_rate": 4.118642721910955e-06, "loss": 0.348, "step": 36954 }, { "epoch": 1.6958836216786746, "grad_norm": 0.4456068277359009, "learning_rate": 4.118401374315416e-06, "loss": 0.3441, "step": 36955 }, { "epoch": 1.695929512183929, "grad_norm": 0.44855162501335144, "learning_rate": 4.1181600288398426e-06, "loss": 0.3387, "step": 36956 }, { "epoch": 1.6959754026891836, "grad_norm": 0.5031652450561523, "learning_rate": 4.1179186854848145e-06, "loss": 0.4061, "step": 36957 }, { "epoch": 1.696021293194438, "grad_norm": 0.4833148419857025, "learning_rate": 4.117677344250915e-06, "loss": 0.391, "step": 36958 }, { "epoch": 1.6960671836996926, "grad_norm": 0.48636889457702637, "learning_rate": 4.1174360051387235e-06, "loss": 0.3508, "step": 36959 }, { "epoch": 1.696113074204947, "grad_norm": 0.4749028980731964, "learning_rate": 4.1171946681488185e-06, "loss": 0.3888, "step": 36960 }, { "epoch": 1.6961589647102016, "grad_norm": 0.4741758406162262, "learning_rate": 4.116953333281783e-06, "loss": 0.3767, "step": 36961 }, { "epoch": 1.696204855215456, "grad_norm": 0.4679613709449768, "learning_rate": 4.1167120005381965e-06, "loss": 0.3384, "step": 36962 }, { "epoch": 1.6962507457207106, "grad_norm": 0.539152204990387, "learning_rate": 4.116470669918636e-06, "loss": 0.4761, "step": 36963 }, { "epoch": 1.6962966362259648, "grad_norm": 0.49847641587257385, "learning_rate": 4.116229341423688e-06, "loss": 0.3574, "step": 36964 }, { "epoch": 1.6963425267312193, "grad_norm": 0.5194189548492432, "learning_rate": 4.115988015053928e-06, "loss": 0.4081, "step": 36965 }, { "epoch": 1.6963884172364738, "grad_norm": 0.49501198530197144, "learning_rate": 4.115746690809936e-06, "loss": 0.3572, "step": 36966 }, { "epoch": 1.696434307741728, "grad_norm": 0.4745776355266571, "learning_rate": 4.115505368692297e-06, "loss": 0.4067, "step": 36967 }, { "epoch": 1.6964801982469826, "grad_norm": 0.4701031744480133, "learning_rate": 4.1152640487015875e-06, "loss": 0.3402, "step": 36968 }, { "epoch": 1.696526088752237, "grad_norm": 0.4699636995792389, "learning_rate": 4.115022730838389e-06, "loss": 0.3221, "step": 36969 }, { "epoch": 1.6965719792574916, "grad_norm": 0.5030108690261841, "learning_rate": 4.114781415103281e-06, "loss": 0.4353, "step": 36970 }, { "epoch": 1.696617869762746, "grad_norm": 0.4906654357910156, "learning_rate": 4.114540101496845e-06, "loss": 0.3561, "step": 36971 }, { "epoch": 1.6966637602680006, "grad_norm": 0.4536774158477783, "learning_rate": 4.114298790019661e-06, "loss": 0.3296, "step": 36972 }, { "epoch": 1.696709650773255, "grad_norm": 0.5202628970146179, "learning_rate": 4.114057480672305e-06, "loss": 0.3958, "step": 36973 }, { "epoch": 1.6967555412785096, "grad_norm": 0.4745253920555115, "learning_rate": 4.113816173455365e-06, "loss": 0.3696, "step": 36974 }, { "epoch": 1.696801431783764, "grad_norm": 0.4701108932495117, "learning_rate": 4.113574868369418e-06, "loss": 0.3482, "step": 36975 }, { "epoch": 1.6968473222890184, "grad_norm": 0.5014048218727112, "learning_rate": 4.113333565415041e-06, "loss": 0.4462, "step": 36976 }, { "epoch": 1.6968932127942729, "grad_norm": 0.4417453110218048, "learning_rate": 4.113092264592818e-06, "loss": 0.342, "step": 36977 }, { "epoch": 1.6969391032995274, "grad_norm": 0.43841734528541565, "learning_rate": 4.112850965903329e-06, "loss": 0.3077, "step": 36978 }, { "epoch": 1.6969849938047816, "grad_norm": 0.4786892533302307, "learning_rate": 4.112609669347152e-06, "loss": 0.3592, "step": 36979 }, { "epoch": 1.6970308843100361, "grad_norm": 0.43335777521133423, "learning_rate": 4.112368374924869e-06, "loss": 0.3061, "step": 36980 }, { "epoch": 1.6970767748152906, "grad_norm": 0.455598920583725, "learning_rate": 4.112127082637062e-06, "loss": 0.3128, "step": 36981 }, { "epoch": 1.6971226653205451, "grad_norm": 0.42871060967445374, "learning_rate": 4.111885792484306e-06, "loss": 0.2862, "step": 36982 }, { "epoch": 1.6971685558257996, "grad_norm": 0.43354126811027527, "learning_rate": 4.111644504467186e-06, "loss": 0.2968, "step": 36983 }, { "epoch": 1.6972144463310541, "grad_norm": 0.5511528253555298, "learning_rate": 4.111403218586282e-06, "loss": 0.3639, "step": 36984 }, { "epoch": 1.6972603368363086, "grad_norm": 0.47169139981269836, "learning_rate": 4.111161934842169e-06, "loss": 0.3449, "step": 36985 }, { "epoch": 1.6973062273415631, "grad_norm": 0.480385959148407, "learning_rate": 4.1109206532354355e-06, "loss": 0.4005, "step": 36986 }, { "epoch": 1.6973521178468176, "grad_norm": 0.4801252782344818, "learning_rate": 4.110679373766655e-06, "loss": 0.3955, "step": 36987 }, { "epoch": 1.697398008352072, "grad_norm": 0.4385213255882263, "learning_rate": 4.110438096436408e-06, "loss": 0.2744, "step": 36988 }, { "epoch": 1.6974438988573264, "grad_norm": 0.4913046061992645, "learning_rate": 4.110196821245279e-06, "loss": 0.3776, "step": 36989 }, { "epoch": 1.6974897893625809, "grad_norm": 0.4658375680446625, "learning_rate": 4.109955548193846e-06, "loss": 0.3108, "step": 36990 }, { "epoch": 1.6975356798678354, "grad_norm": 0.4557473957538605, "learning_rate": 4.109714277282688e-06, "loss": 0.3547, "step": 36991 }, { "epoch": 1.6975815703730897, "grad_norm": 0.4565676152706146, "learning_rate": 4.109473008512387e-06, "loss": 0.349, "step": 36992 }, { "epoch": 1.6976274608783442, "grad_norm": 0.48243609070777893, "learning_rate": 4.1092317418835225e-06, "loss": 0.4108, "step": 36993 }, { "epoch": 1.6976733513835986, "grad_norm": 0.48242318630218506, "learning_rate": 4.108990477396672e-06, "loss": 0.3857, "step": 36994 }, { "epoch": 1.6977192418888531, "grad_norm": 0.4577937126159668, "learning_rate": 4.108749215052421e-06, "loss": 0.3565, "step": 36995 }, { "epoch": 1.6977651323941076, "grad_norm": 0.473869264125824, "learning_rate": 4.1085079548513464e-06, "loss": 0.3959, "step": 36996 }, { "epoch": 1.6978110228993621, "grad_norm": 0.4573453962802887, "learning_rate": 4.108266696794028e-06, "loss": 0.3865, "step": 36997 }, { "epoch": 1.6978569134046166, "grad_norm": 0.4452921450138092, "learning_rate": 4.1080254408810475e-06, "loss": 0.3142, "step": 36998 }, { "epoch": 1.6979028039098711, "grad_norm": 0.5026951432228088, "learning_rate": 4.107784187112984e-06, "loss": 0.3865, "step": 36999 }, { "epoch": 1.6979486944151256, "grad_norm": 0.48680049180984497, "learning_rate": 4.1075429354904165e-06, "loss": 0.3786, "step": 37000 }, { "epoch": 1.6979945849203801, "grad_norm": 0.4942714273929596, "learning_rate": 4.107301686013928e-06, "loss": 0.391, "step": 37001 }, { "epoch": 1.6980404754256344, "grad_norm": 0.45103299617767334, "learning_rate": 4.1070604386840976e-06, "loss": 0.3156, "step": 37002 }, { "epoch": 1.698086365930889, "grad_norm": 0.42651480436325073, "learning_rate": 4.106819193501504e-06, "loss": 0.2787, "step": 37003 }, { "epoch": 1.6981322564361434, "grad_norm": 0.5122444033622742, "learning_rate": 4.10657795046673e-06, "loss": 0.3899, "step": 37004 }, { "epoch": 1.6981781469413977, "grad_norm": 0.473503053188324, "learning_rate": 4.106336709580353e-06, "loss": 0.389, "step": 37005 }, { "epoch": 1.6982240374466522, "grad_norm": 0.4633901119232178, "learning_rate": 4.106095470842955e-06, "loss": 0.334, "step": 37006 }, { "epoch": 1.6982699279519067, "grad_norm": 0.4847634732723236, "learning_rate": 4.1058542342551125e-06, "loss": 0.4163, "step": 37007 }, { "epoch": 1.6983158184571612, "grad_norm": 0.437478244304657, "learning_rate": 4.105612999817412e-06, "loss": 0.317, "step": 37008 }, { "epoch": 1.6983617089624157, "grad_norm": 0.4665999412536621, "learning_rate": 4.105371767530429e-06, "loss": 0.3677, "step": 37009 }, { "epoch": 1.6984075994676702, "grad_norm": 0.49108701944351196, "learning_rate": 4.105130537394742e-06, "loss": 0.4048, "step": 37010 }, { "epoch": 1.6984534899729247, "grad_norm": 0.48684123158454895, "learning_rate": 4.104889309410936e-06, "loss": 0.3626, "step": 37011 }, { "epoch": 1.6984993804781792, "grad_norm": 0.4597502648830414, "learning_rate": 4.104648083579588e-06, "loss": 0.3044, "step": 37012 }, { "epoch": 1.6985452709834337, "grad_norm": 0.4922095835208893, "learning_rate": 4.104406859901279e-06, "loss": 0.3678, "step": 37013 }, { "epoch": 1.698591161488688, "grad_norm": 0.45404890179634094, "learning_rate": 4.104165638376589e-06, "loss": 0.33, "step": 37014 }, { "epoch": 1.6986370519939424, "grad_norm": 0.4564456045627594, "learning_rate": 4.103924419006098e-06, "loss": 0.3013, "step": 37015 }, { "epoch": 1.698682942499197, "grad_norm": 0.5030897855758667, "learning_rate": 4.103683201790384e-06, "loss": 0.3745, "step": 37016 }, { "epoch": 1.6987288330044514, "grad_norm": 0.48292121291160583, "learning_rate": 4.1034419867300315e-06, "loss": 0.3684, "step": 37017 }, { "epoch": 1.6987747235097057, "grad_norm": 0.50263512134552, "learning_rate": 4.1032007738256175e-06, "loss": 0.3628, "step": 37018 }, { "epoch": 1.6988206140149602, "grad_norm": 0.4769405126571655, "learning_rate": 4.1029595630777215e-06, "loss": 0.3586, "step": 37019 }, { "epoch": 1.6988665045202147, "grad_norm": 0.44763803482055664, "learning_rate": 4.102718354486926e-06, "loss": 0.3308, "step": 37020 }, { "epoch": 1.6989123950254692, "grad_norm": 0.45846760272979736, "learning_rate": 4.102477148053809e-06, "loss": 0.3034, "step": 37021 }, { "epoch": 1.6989582855307237, "grad_norm": 0.49356159567832947, "learning_rate": 4.10223594377895e-06, "loss": 0.4278, "step": 37022 }, { "epoch": 1.6990041760359782, "grad_norm": 0.4713141620159149, "learning_rate": 4.101994741662932e-06, "loss": 0.3995, "step": 37023 }, { "epoch": 1.6990500665412327, "grad_norm": 0.4768821597099304, "learning_rate": 4.101753541706333e-06, "loss": 0.3236, "step": 37024 }, { "epoch": 1.6990959570464872, "grad_norm": 0.45070868730545044, "learning_rate": 4.101512343909733e-06, "loss": 0.3242, "step": 37025 }, { "epoch": 1.6991418475517417, "grad_norm": 0.49519988894462585, "learning_rate": 4.101271148273712e-06, "loss": 0.4296, "step": 37026 }, { "epoch": 1.699187738056996, "grad_norm": 0.46270689368247986, "learning_rate": 4.101029954798851e-06, "loss": 0.3618, "step": 37027 }, { "epoch": 1.6992336285622505, "grad_norm": 0.4458094835281372, "learning_rate": 4.1007887634857274e-06, "loss": 0.3274, "step": 37028 }, { "epoch": 1.699279519067505, "grad_norm": 0.44086140394210815, "learning_rate": 4.100547574334925e-06, "loss": 0.2969, "step": 37029 }, { "epoch": 1.6993254095727592, "grad_norm": 0.4878346025943756, "learning_rate": 4.1003063873470225e-06, "loss": 0.408, "step": 37030 }, { "epoch": 1.6993713000780137, "grad_norm": 0.48761337995529175, "learning_rate": 4.100065202522596e-06, "loss": 0.4047, "step": 37031 }, { "epoch": 1.6994171905832682, "grad_norm": 0.4447671175003052, "learning_rate": 4.0998240198622305e-06, "loss": 0.292, "step": 37032 }, { "epoch": 1.6994630810885227, "grad_norm": 0.4682255685329437, "learning_rate": 4.099582839366505e-06, "loss": 0.3737, "step": 37033 }, { "epoch": 1.6995089715937772, "grad_norm": 0.46399277448654175, "learning_rate": 4.099341661035997e-06, "loss": 0.3315, "step": 37034 }, { "epoch": 1.6995548620990317, "grad_norm": 0.48087140917778015, "learning_rate": 4.099100484871289e-06, "loss": 0.3575, "step": 37035 }, { "epoch": 1.6996007526042862, "grad_norm": 0.4813202917575836, "learning_rate": 4.0988593108729605e-06, "loss": 0.4075, "step": 37036 }, { "epoch": 1.6996466431095407, "grad_norm": 0.49177423119544983, "learning_rate": 4.098618139041591e-06, "loss": 0.403, "step": 37037 }, { "epoch": 1.6996925336147952, "grad_norm": 0.5182430744171143, "learning_rate": 4.0983769693777574e-06, "loss": 0.4402, "step": 37038 }, { "epoch": 1.6997384241200497, "grad_norm": 0.4809049665927887, "learning_rate": 4.098135801882045e-06, "loss": 0.3671, "step": 37039 }, { "epoch": 1.699784314625304, "grad_norm": 0.4783908724784851, "learning_rate": 4.097894636555032e-06, "loss": 0.3559, "step": 37040 }, { "epoch": 1.6998302051305585, "grad_norm": 0.46669304370880127, "learning_rate": 4.097653473397296e-06, "loss": 0.2959, "step": 37041 }, { "epoch": 1.699876095635813, "grad_norm": 0.46887364983558655, "learning_rate": 4.097412312409421e-06, "loss": 0.2994, "step": 37042 }, { "epoch": 1.6999219861410673, "grad_norm": 0.46676933765411377, "learning_rate": 4.097171153591983e-06, "loss": 0.3938, "step": 37043 }, { "epoch": 1.6999678766463218, "grad_norm": 0.46536609530448914, "learning_rate": 4.096929996945562e-06, "loss": 0.3525, "step": 37044 }, { "epoch": 1.7000137671515763, "grad_norm": 0.42363160848617554, "learning_rate": 4.09668884247074e-06, "loss": 0.2981, "step": 37045 }, { "epoch": 1.7000596576568308, "grad_norm": 0.4613165557384491, "learning_rate": 4.096447690168098e-06, "loss": 0.3306, "step": 37046 }, { "epoch": 1.7001055481620853, "grad_norm": 0.49438849091529846, "learning_rate": 4.0962065400382125e-06, "loss": 0.3742, "step": 37047 }, { "epoch": 1.7001514386673398, "grad_norm": 0.4550308883190155, "learning_rate": 4.095965392081667e-06, "loss": 0.3546, "step": 37048 }, { "epoch": 1.7001973291725943, "grad_norm": 0.46803539991378784, "learning_rate": 4.095724246299038e-06, "loss": 0.3539, "step": 37049 }, { "epoch": 1.7002432196778487, "grad_norm": 0.45837870240211487, "learning_rate": 4.095483102690905e-06, "loss": 0.346, "step": 37050 }, { "epoch": 1.7002891101831032, "grad_norm": 0.4857238829135895, "learning_rate": 4.095241961257852e-06, "loss": 0.4067, "step": 37051 }, { "epoch": 1.7003350006883577, "grad_norm": 0.4473886787891388, "learning_rate": 4.095000822000458e-06, "loss": 0.3103, "step": 37052 }, { "epoch": 1.700380891193612, "grad_norm": 0.4641798436641693, "learning_rate": 4.094759684919298e-06, "loss": 0.3361, "step": 37053 }, { "epoch": 1.7004267816988665, "grad_norm": 0.4464186429977417, "learning_rate": 4.094518550014957e-06, "loss": 0.3375, "step": 37054 }, { "epoch": 1.700472672204121, "grad_norm": 0.5191483497619629, "learning_rate": 4.094277417288014e-06, "loss": 0.3262, "step": 37055 }, { "epoch": 1.7005185627093753, "grad_norm": 0.5633576512336731, "learning_rate": 4.094036286739045e-06, "loss": 0.4973, "step": 37056 }, { "epoch": 1.7005644532146298, "grad_norm": 0.5799922943115234, "learning_rate": 4.093795158368636e-06, "loss": 0.3863, "step": 37057 }, { "epoch": 1.7006103437198843, "grad_norm": 0.4663558900356293, "learning_rate": 4.093554032177363e-06, "loss": 0.2877, "step": 37058 }, { "epoch": 1.7006562342251388, "grad_norm": 0.5218594074249268, "learning_rate": 4.0933129081658025e-06, "loss": 0.3695, "step": 37059 }, { "epoch": 1.7007021247303933, "grad_norm": 0.4873916208744049, "learning_rate": 4.093071786334542e-06, "loss": 0.3537, "step": 37060 }, { "epoch": 1.7007480152356478, "grad_norm": 0.44444769620895386, "learning_rate": 4.092830666684158e-06, "loss": 0.2896, "step": 37061 }, { "epoch": 1.7007939057409023, "grad_norm": 0.47003471851348877, "learning_rate": 4.092589549215229e-06, "loss": 0.3474, "step": 37062 }, { "epoch": 1.7008397962461568, "grad_norm": 0.4852973520755768, "learning_rate": 4.092348433928336e-06, "loss": 0.3661, "step": 37063 }, { "epoch": 1.7008856867514113, "grad_norm": 0.46563276648521423, "learning_rate": 4.092107320824059e-06, "loss": 0.3427, "step": 37064 }, { "epoch": 1.7009315772566656, "grad_norm": 0.4912428557872772, "learning_rate": 4.091866209902975e-06, "loss": 0.3955, "step": 37065 }, { "epoch": 1.70097746776192, "grad_norm": 0.4934949278831482, "learning_rate": 4.091625101165668e-06, "loss": 0.3848, "step": 37066 }, { "epoch": 1.7010233582671745, "grad_norm": 0.4814314544200897, "learning_rate": 4.091383994612716e-06, "loss": 0.3699, "step": 37067 }, { "epoch": 1.7010692487724288, "grad_norm": 0.4549257457256317, "learning_rate": 4.0911428902446985e-06, "loss": 0.2974, "step": 37068 }, { "epoch": 1.7011151392776833, "grad_norm": 0.45683541893959045, "learning_rate": 4.090901788062196e-06, "loss": 0.3483, "step": 37069 }, { "epoch": 1.7011610297829378, "grad_norm": 0.4526069164276123, "learning_rate": 4.0906606880657875e-06, "loss": 0.3255, "step": 37070 }, { "epoch": 1.7012069202881923, "grad_norm": 0.49994105100631714, "learning_rate": 4.090419590256054e-06, "loss": 0.3707, "step": 37071 }, { "epoch": 1.7012528107934468, "grad_norm": 0.38907769322395325, "learning_rate": 4.090178494633571e-06, "loss": 0.2404, "step": 37072 }, { "epoch": 1.7012987012987013, "grad_norm": 0.4601591229438782, "learning_rate": 4.089937401198924e-06, "loss": 0.4, "step": 37073 }, { "epoch": 1.7013445918039558, "grad_norm": 0.5101577043533325, "learning_rate": 4.089696309952692e-06, "loss": 0.3895, "step": 37074 }, { "epoch": 1.7013904823092103, "grad_norm": 0.4520625174045563, "learning_rate": 4.0894552208954495e-06, "loss": 0.3092, "step": 37075 }, { "epoch": 1.7014363728144648, "grad_norm": 0.476667195558548, "learning_rate": 4.089214134027782e-06, "loss": 0.3858, "step": 37076 }, { "epoch": 1.7014822633197193, "grad_norm": 0.47687771916389465, "learning_rate": 4.088973049350267e-06, "loss": 0.3344, "step": 37077 }, { "epoch": 1.7015281538249736, "grad_norm": 0.40557971596717834, "learning_rate": 4.088731966863483e-06, "loss": 0.2828, "step": 37078 }, { "epoch": 1.701574044330228, "grad_norm": 0.4913073182106018, "learning_rate": 4.088490886568012e-06, "loss": 0.3756, "step": 37079 }, { "epoch": 1.7016199348354826, "grad_norm": 0.47396451234817505, "learning_rate": 4.088249808464434e-06, "loss": 0.3503, "step": 37080 }, { "epoch": 1.7016658253407368, "grad_norm": 0.4691883325576782, "learning_rate": 4.088008732553324e-06, "loss": 0.3561, "step": 37081 }, { "epoch": 1.7017117158459913, "grad_norm": 0.4907822608947754, "learning_rate": 4.087767658835267e-06, "loss": 0.3526, "step": 37082 }, { "epoch": 1.7017576063512458, "grad_norm": 0.5208011865615845, "learning_rate": 4.087526587310842e-06, "loss": 0.4337, "step": 37083 }, { "epoch": 1.7018034968565003, "grad_norm": 0.4618575870990753, "learning_rate": 4.087285517980626e-06, "loss": 0.2975, "step": 37084 }, { "epoch": 1.7018493873617548, "grad_norm": 0.44503843784332275, "learning_rate": 4.087044450845201e-06, "loss": 0.285, "step": 37085 }, { "epoch": 1.7018952778670093, "grad_norm": 0.4864273965358734, "learning_rate": 4.0868033859051465e-06, "loss": 0.3988, "step": 37086 }, { "epoch": 1.7019411683722638, "grad_norm": 0.49956944584846497, "learning_rate": 4.086562323161039e-06, "loss": 0.4196, "step": 37087 }, { "epoch": 1.7019870588775183, "grad_norm": 0.44127318263053894, "learning_rate": 4.086321262613463e-06, "loss": 0.3239, "step": 37088 }, { "epoch": 1.7020329493827728, "grad_norm": 0.49322354793548584, "learning_rate": 4.0860802042629964e-06, "loss": 0.4267, "step": 37089 }, { "epoch": 1.7020788398880273, "grad_norm": 0.4673802852630615, "learning_rate": 4.085839148110218e-06, "loss": 0.3372, "step": 37090 }, { "epoch": 1.7021247303932816, "grad_norm": 0.4939850866794586, "learning_rate": 4.085598094155708e-06, "loss": 0.3476, "step": 37091 }, { "epoch": 1.702170620898536, "grad_norm": 0.49505728483200073, "learning_rate": 4.085357042400047e-06, "loss": 0.3968, "step": 37092 }, { "epoch": 1.7022165114037906, "grad_norm": 0.4444194436073303, "learning_rate": 4.08511599284381e-06, "loss": 0.3266, "step": 37093 }, { "epoch": 1.7022624019090449, "grad_norm": 0.47243350744247437, "learning_rate": 4.084874945487583e-06, "loss": 0.3892, "step": 37094 }, { "epoch": 1.7023082924142994, "grad_norm": 0.5057694315910339, "learning_rate": 4.084633900331943e-06, "loss": 0.3765, "step": 37095 }, { "epoch": 1.7023541829195539, "grad_norm": 0.4706217348575592, "learning_rate": 4.084392857377468e-06, "loss": 0.3491, "step": 37096 }, { "epoch": 1.7024000734248084, "grad_norm": 0.46859052777290344, "learning_rate": 4.084151816624742e-06, "loss": 0.3516, "step": 37097 }, { "epoch": 1.7024459639300629, "grad_norm": 0.7105806469917297, "learning_rate": 4.083910778074341e-06, "loss": 0.4175, "step": 37098 }, { "epoch": 1.7024918544353174, "grad_norm": 0.4650220572948456, "learning_rate": 4.083669741726844e-06, "loss": 0.3105, "step": 37099 }, { "epoch": 1.7025377449405719, "grad_norm": 0.5274021029472351, "learning_rate": 4.083428707582833e-06, "loss": 0.3938, "step": 37100 }, { "epoch": 1.7025836354458264, "grad_norm": 0.4464446008205414, "learning_rate": 4.083187675642887e-06, "loss": 0.2969, "step": 37101 }, { "epoch": 1.7026295259510809, "grad_norm": 0.48198747634887695, "learning_rate": 4.082946645907585e-06, "loss": 0.4273, "step": 37102 }, { "epoch": 1.7026754164563351, "grad_norm": 0.49331995844841003, "learning_rate": 4.0827056183775045e-06, "loss": 0.3963, "step": 37103 }, { "epoch": 1.7027213069615896, "grad_norm": 0.4680737853050232, "learning_rate": 4.082464593053231e-06, "loss": 0.3542, "step": 37104 }, { "epoch": 1.7027671974668441, "grad_norm": 0.4616132378578186, "learning_rate": 4.0822235699353394e-06, "loss": 0.339, "step": 37105 }, { "epoch": 1.7028130879720986, "grad_norm": 0.5102261304855347, "learning_rate": 4.08198254902441e-06, "loss": 0.407, "step": 37106 }, { "epoch": 1.702858978477353, "grad_norm": 0.5707585215568542, "learning_rate": 4.081741530321024e-06, "loss": 0.4773, "step": 37107 }, { "epoch": 1.7029048689826074, "grad_norm": 0.49624764919281006, "learning_rate": 4.081500513825759e-06, "loss": 0.4216, "step": 37108 }, { "epoch": 1.702950759487862, "grad_norm": 0.4800606369972229, "learning_rate": 4.081259499539193e-06, "loss": 0.3578, "step": 37109 }, { "epoch": 1.7029966499931164, "grad_norm": 0.5712722539901733, "learning_rate": 4.081018487461911e-06, "loss": 0.4809, "step": 37110 }, { "epoch": 1.703042540498371, "grad_norm": 0.4546302556991577, "learning_rate": 4.0807774775944895e-06, "loss": 0.3085, "step": 37111 }, { "epoch": 1.7030884310036254, "grad_norm": 0.43546995520591736, "learning_rate": 4.0805364699375064e-06, "loss": 0.3383, "step": 37112 }, { "epoch": 1.70313432150888, "grad_norm": 0.44864436984062195, "learning_rate": 4.0802954644915444e-06, "loss": 0.3192, "step": 37113 }, { "epoch": 1.7031802120141344, "grad_norm": 0.5189633369445801, "learning_rate": 4.080054461257181e-06, "loss": 0.4266, "step": 37114 }, { "epoch": 1.7032261025193889, "grad_norm": 0.5082626342773438, "learning_rate": 4.0798134602349945e-06, "loss": 0.3866, "step": 37115 }, { "epoch": 1.7032719930246432, "grad_norm": 0.4611167907714844, "learning_rate": 4.079572461425569e-06, "loss": 0.3434, "step": 37116 }, { "epoch": 1.7033178835298977, "grad_norm": 0.4593517482280731, "learning_rate": 4.0793314648294806e-06, "loss": 0.3618, "step": 37117 }, { "epoch": 1.7033637740351522, "grad_norm": 0.44533324241638184, "learning_rate": 4.079090470447308e-06, "loss": 0.2862, "step": 37118 }, { "epoch": 1.7034096645404064, "grad_norm": 0.4900319278240204, "learning_rate": 4.0788494782796335e-06, "loss": 0.3657, "step": 37119 }, { "epoch": 1.703455555045661, "grad_norm": 0.5279750823974609, "learning_rate": 4.078608488327036e-06, "loss": 0.3911, "step": 37120 }, { "epoch": 1.7035014455509154, "grad_norm": 0.5033696293830872, "learning_rate": 4.078367500590092e-06, "loss": 0.4531, "step": 37121 }, { "epoch": 1.70354733605617, "grad_norm": 0.47592678666114807, "learning_rate": 4.078126515069385e-06, "loss": 0.3454, "step": 37122 }, { "epoch": 1.7035932265614244, "grad_norm": 0.47349923849105835, "learning_rate": 4.077885531765493e-06, "loss": 0.3836, "step": 37123 }, { "epoch": 1.703639117066679, "grad_norm": 0.49263712763786316, "learning_rate": 4.077644550678993e-06, "loss": 0.4334, "step": 37124 }, { "epoch": 1.7036850075719334, "grad_norm": 0.4827812910079956, "learning_rate": 4.077403571810469e-06, "loss": 0.3741, "step": 37125 }, { "epoch": 1.703730898077188, "grad_norm": 0.45588603615760803, "learning_rate": 4.077162595160498e-06, "loss": 0.3399, "step": 37126 }, { "epoch": 1.7037767885824424, "grad_norm": 0.4579865634441376, "learning_rate": 4.076921620729658e-06, "loss": 0.3311, "step": 37127 }, { "epoch": 1.703822679087697, "grad_norm": 0.46900448203086853, "learning_rate": 4.076680648518532e-06, "loss": 0.3216, "step": 37128 }, { "epoch": 1.7038685695929512, "grad_norm": 0.4781333804130554, "learning_rate": 4.076439678527698e-06, "loss": 0.3111, "step": 37129 }, { "epoch": 1.7039144600982057, "grad_norm": 0.4797394275665283, "learning_rate": 4.076198710757733e-06, "loss": 0.4071, "step": 37130 }, { "epoch": 1.7039603506034602, "grad_norm": 0.4584585726261139, "learning_rate": 4.075957745209219e-06, "loss": 0.323, "step": 37131 }, { "epoch": 1.7040062411087145, "grad_norm": 0.46239206194877625, "learning_rate": 4.075716781882737e-06, "loss": 0.3222, "step": 37132 }, { "epoch": 1.704052131613969, "grad_norm": 0.5300318598747253, "learning_rate": 4.075475820778862e-06, "loss": 0.3756, "step": 37133 }, { "epoch": 1.7040980221192235, "grad_norm": 0.48300135135650635, "learning_rate": 4.075234861898177e-06, "loss": 0.3874, "step": 37134 }, { "epoch": 1.704143912624478, "grad_norm": 0.44051608443260193, "learning_rate": 4.074993905241261e-06, "loss": 0.34, "step": 37135 }, { "epoch": 1.7041898031297325, "grad_norm": 0.4496227204799652, "learning_rate": 4.074752950808692e-06, "loss": 0.3287, "step": 37136 }, { "epoch": 1.704235693634987, "grad_norm": 0.52524334192276, "learning_rate": 4.074511998601049e-06, "loss": 0.5143, "step": 37137 }, { "epoch": 1.7042815841402414, "grad_norm": 0.46143487095832825, "learning_rate": 4.074271048618914e-06, "loss": 0.3636, "step": 37138 }, { "epoch": 1.704327474645496, "grad_norm": 0.4894908666610718, "learning_rate": 4.0740301008628655e-06, "loss": 0.4186, "step": 37139 }, { "epoch": 1.7043733651507504, "grad_norm": 0.42181724309921265, "learning_rate": 4.073789155333481e-06, "loss": 0.2919, "step": 37140 }, { "epoch": 1.704419255656005, "grad_norm": 0.44759997725486755, "learning_rate": 4.073548212031342e-06, "loss": 0.3237, "step": 37141 }, { "epoch": 1.7044651461612592, "grad_norm": 0.42724454402923584, "learning_rate": 4.073307270957027e-06, "loss": 0.2999, "step": 37142 }, { "epoch": 1.7045110366665137, "grad_norm": 0.48375964164733887, "learning_rate": 4.073066332111115e-06, "loss": 0.4095, "step": 37143 }, { "epoch": 1.7045569271717682, "grad_norm": 0.48780426383018494, "learning_rate": 4.072825395494186e-06, "loss": 0.3998, "step": 37144 }, { "epoch": 1.7046028176770225, "grad_norm": 0.5033005475997925, "learning_rate": 4.072584461106821e-06, "loss": 0.4568, "step": 37145 }, { "epoch": 1.704648708182277, "grad_norm": 0.4485622048377991, "learning_rate": 4.0723435289495945e-06, "loss": 0.3403, "step": 37146 }, { "epoch": 1.7046945986875315, "grad_norm": 0.45271849632263184, "learning_rate": 4.072102599023091e-06, "loss": 0.3707, "step": 37147 }, { "epoch": 1.704740489192786, "grad_norm": 0.43735307455062866, "learning_rate": 4.0718616713278885e-06, "loss": 0.3403, "step": 37148 }, { "epoch": 1.7047863796980405, "grad_norm": 0.4794866740703583, "learning_rate": 4.071620745864564e-06, "loss": 0.3779, "step": 37149 }, { "epoch": 1.704832270203295, "grad_norm": 0.4712761640548706, "learning_rate": 4.0713798226337005e-06, "loss": 0.3812, "step": 37150 }, { "epoch": 1.7048781607085495, "grad_norm": 0.509405791759491, "learning_rate": 4.071138901635875e-06, "loss": 0.3885, "step": 37151 }, { "epoch": 1.704924051213804, "grad_norm": 0.4988270699977875, "learning_rate": 4.070897982871665e-06, "loss": 0.3889, "step": 37152 }, { "epoch": 1.7049699417190585, "grad_norm": 0.4369191825389862, "learning_rate": 4.070657066341655e-06, "loss": 0.3321, "step": 37153 }, { "epoch": 1.7050158322243127, "grad_norm": 0.49078813195228577, "learning_rate": 4.07041615204642e-06, "loss": 0.4246, "step": 37154 }, { "epoch": 1.7050617227295672, "grad_norm": 0.4961918890476227, "learning_rate": 4.070175239986541e-06, "loss": 0.3688, "step": 37155 }, { "epoch": 1.7051076132348217, "grad_norm": 0.4577355980873108, "learning_rate": 4.069934330162598e-06, "loss": 0.323, "step": 37156 }, { "epoch": 1.705153503740076, "grad_norm": 0.4761107563972473, "learning_rate": 4.069693422575169e-06, "loss": 0.3504, "step": 37157 }, { "epoch": 1.7051993942453305, "grad_norm": 0.46491101384162903, "learning_rate": 4.0694525172248315e-06, "loss": 0.3409, "step": 37158 }, { "epoch": 1.705245284750585, "grad_norm": 0.4526561498641968, "learning_rate": 4.06921161411217e-06, "loss": 0.3291, "step": 37159 }, { "epoch": 1.7052911752558395, "grad_norm": 0.4282483756542206, "learning_rate": 4.06897071323776e-06, "loss": 0.2818, "step": 37160 }, { "epoch": 1.705337065761094, "grad_norm": 0.44433850049972534, "learning_rate": 4.06872981460218e-06, "loss": 0.3024, "step": 37161 }, { "epoch": 1.7053829562663485, "grad_norm": 0.46502140164375305, "learning_rate": 4.068488918206013e-06, "loss": 0.375, "step": 37162 }, { "epoch": 1.705428846771603, "grad_norm": 0.5303376913070679, "learning_rate": 4.068248024049836e-06, "loss": 0.4066, "step": 37163 }, { "epoch": 1.7054747372768575, "grad_norm": 0.46291354298591614, "learning_rate": 4.068007132134226e-06, "loss": 0.39, "step": 37164 }, { "epoch": 1.705520627782112, "grad_norm": 0.4432717561721802, "learning_rate": 4.067766242459767e-06, "loss": 0.3483, "step": 37165 }, { "epoch": 1.7055665182873665, "grad_norm": 0.5276281833648682, "learning_rate": 4.067525355027037e-06, "loss": 0.4376, "step": 37166 }, { "epoch": 1.7056124087926208, "grad_norm": 0.43291765451431274, "learning_rate": 4.067284469836612e-06, "loss": 0.319, "step": 37167 }, { "epoch": 1.7056582992978753, "grad_norm": 0.491380512714386, "learning_rate": 4.067043586889073e-06, "loss": 0.3916, "step": 37168 }, { "epoch": 1.7057041898031298, "grad_norm": 0.4536169171333313, "learning_rate": 4.066802706185001e-06, "loss": 0.3187, "step": 37169 }, { "epoch": 1.705750080308384, "grad_norm": 0.460276335477829, "learning_rate": 4.0665618277249745e-06, "loss": 0.3325, "step": 37170 }, { "epoch": 1.7057959708136385, "grad_norm": 0.44502002000808716, "learning_rate": 4.066320951509571e-06, "loss": 0.3407, "step": 37171 }, { "epoch": 1.705841861318893, "grad_norm": 0.50029057264328, "learning_rate": 4.0660800775393715e-06, "loss": 0.4023, "step": 37172 }, { "epoch": 1.7058877518241475, "grad_norm": 0.47610247135162354, "learning_rate": 4.065839205814956e-06, "loss": 0.4003, "step": 37173 }, { "epoch": 1.705933642329402, "grad_norm": 0.4778377413749695, "learning_rate": 4.065598336336899e-06, "loss": 0.4003, "step": 37174 }, { "epoch": 1.7059795328346565, "grad_norm": 0.47889193892478943, "learning_rate": 4.065357469105785e-06, "loss": 0.3875, "step": 37175 }, { "epoch": 1.706025423339911, "grad_norm": 0.531707763671875, "learning_rate": 4.065116604122192e-06, "loss": 0.4382, "step": 37176 }, { "epoch": 1.7060713138451655, "grad_norm": 0.46462202072143555, "learning_rate": 4.0648757413866975e-06, "loss": 0.3327, "step": 37177 }, { "epoch": 1.70611720435042, "grad_norm": 0.4666639566421509, "learning_rate": 4.064634880899882e-06, "loss": 0.3516, "step": 37178 }, { "epoch": 1.7061630948556745, "grad_norm": 0.5211677551269531, "learning_rate": 4.064394022662325e-06, "loss": 0.4127, "step": 37179 }, { "epoch": 1.7062089853609288, "grad_norm": 0.4819161891937256, "learning_rate": 4.064153166674603e-06, "loss": 0.3506, "step": 37180 }, { "epoch": 1.7062548758661833, "grad_norm": 0.5249783396720886, "learning_rate": 4.0639123129372995e-06, "loss": 0.4482, "step": 37181 }, { "epoch": 1.7063007663714378, "grad_norm": 0.5111711025238037, "learning_rate": 4.063671461450991e-06, "loss": 0.4053, "step": 37182 }, { "epoch": 1.706346656876692, "grad_norm": 0.4787129759788513, "learning_rate": 4.063430612216257e-06, "loss": 0.3952, "step": 37183 }, { "epoch": 1.7063925473819466, "grad_norm": 0.47377315163612366, "learning_rate": 4.063189765233677e-06, "loss": 0.3734, "step": 37184 }, { "epoch": 1.706438437887201, "grad_norm": 0.4762354791164398, "learning_rate": 4.06294892050383e-06, "loss": 0.374, "step": 37185 }, { "epoch": 1.7064843283924556, "grad_norm": 0.46650922298431396, "learning_rate": 4.062708078027293e-06, "loss": 0.3815, "step": 37186 }, { "epoch": 1.70653021889771, "grad_norm": 0.4503220021724701, "learning_rate": 4.06246723780465e-06, "loss": 0.3286, "step": 37187 }, { "epoch": 1.7065761094029646, "grad_norm": 0.4705676734447479, "learning_rate": 4.062226399836478e-06, "loss": 0.3555, "step": 37188 }, { "epoch": 1.706621999908219, "grad_norm": 0.4887832701206207, "learning_rate": 4.061985564123352e-06, "loss": 0.3875, "step": 37189 }, { "epoch": 1.7066678904134736, "grad_norm": 0.4402146637439728, "learning_rate": 4.061744730665857e-06, "loss": 0.3039, "step": 37190 }, { "epoch": 1.706713780918728, "grad_norm": 0.4440721273422241, "learning_rate": 4.06150389946457e-06, "loss": 0.2953, "step": 37191 }, { "epoch": 1.7067596714239823, "grad_norm": 0.5012357234954834, "learning_rate": 4.0612630705200685e-06, "loss": 0.4609, "step": 37192 }, { "epoch": 1.7068055619292368, "grad_norm": 0.47520551085472107, "learning_rate": 4.061022243832934e-06, "loss": 0.329, "step": 37193 }, { "epoch": 1.7068514524344913, "grad_norm": 0.4874781668186188, "learning_rate": 4.060781419403746e-06, "loss": 0.3749, "step": 37194 }, { "epoch": 1.7068973429397458, "grad_norm": 0.502419650554657, "learning_rate": 4.060540597233078e-06, "loss": 0.4328, "step": 37195 }, { "epoch": 1.706943233445, "grad_norm": 0.48548516631126404, "learning_rate": 4.060299777321517e-06, "loss": 0.3993, "step": 37196 }, { "epoch": 1.7069891239502546, "grad_norm": 0.4321601092815399, "learning_rate": 4.060058959669638e-06, "loss": 0.3074, "step": 37197 }, { "epoch": 1.707035014455509, "grad_norm": 0.47215190529823303, "learning_rate": 4.05981814427802e-06, "loss": 0.4133, "step": 37198 }, { "epoch": 1.7070809049607636, "grad_norm": 0.47352322936058044, "learning_rate": 4.059577331147243e-06, "loss": 0.342, "step": 37199 }, { "epoch": 1.707126795466018, "grad_norm": 0.49360090494155884, "learning_rate": 4.059336520277886e-06, "loss": 0.4076, "step": 37200 }, { "epoch": 1.7071726859712726, "grad_norm": 0.4465285539627075, "learning_rate": 4.059095711670527e-06, "loss": 0.3298, "step": 37201 }, { "epoch": 1.707218576476527, "grad_norm": 0.4660680294036865, "learning_rate": 4.058854905325746e-06, "loss": 0.3594, "step": 37202 }, { "epoch": 1.7072644669817816, "grad_norm": 0.5783329010009766, "learning_rate": 4.058614101244122e-06, "loss": 0.4123, "step": 37203 }, { "epoch": 1.707310357487036, "grad_norm": 0.4617738127708435, "learning_rate": 4.058373299426235e-06, "loss": 0.3675, "step": 37204 }, { "epoch": 1.7073562479922904, "grad_norm": 0.47495830059051514, "learning_rate": 4.058132499872661e-06, "loss": 0.3382, "step": 37205 }, { "epoch": 1.7074021384975449, "grad_norm": 0.4865867495536804, "learning_rate": 4.057891702583983e-06, "loss": 0.4318, "step": 37206 }, { "epoch": 1.7074480290027994, "grad_norm": 0.47814252972602844, "learning_rate": 4.057650907560777e-06, "loss": 0.3801, "step": 37207 }, { "epoch": 1.7074939195080536, "grad_norm": 0.4484092593193054, "learning_rate": 4.057410114803622e-06, "loss": 0.3126, "step": 37208 }, { "epoch": 1.7075398100133081, "grad_norm": 0.45347321033477783, "learning_rate": 4.0571693243131015e-06, "loss": 0.3328, "step": 37209 }, { "epoch": 1.7075857005185626, "grad_norm": 0.46406063437461853, "learning_rate": 4.05692853608979e-06, "loss": 0.3584, "step": 37210 }, { "epoch": 1.7076315910238171, "grad_norm": 0.4286377727985382, "learning_rate": 4.056687750134265e-06, "loss": 0.318, "step": 37211 }, { "epoch": 1.7076774815290716, "grad_norm": 0.49172669649124146, "learning_rate": 4.056446966447111e-06, "loss": 0.3725, "step": 37212 }, { "epoch": 1.7077233720343261, "grad_norm": 0.4806927442550659, "learning_rate": 4.056206185028903e-06, "loss": 0.3857, "step": 37213 }, { "epoch": 1.7077692625395806, "grad_norm": 0.49589765071868896, "learning_rate": 4.055965405880223e-06, "loss": 0.387, "step": 37214 }, { "epoch": 1.707815153044835, "grad_norm": 0.49459123611450195, "learning_rate": 4.055724629001647e-06, "loss": 0.4132, "step": 37215 }, { "epoch": 1.7078610435500896, "grad_norm": 0.4940870404243469, "learning_rate": 4.055483854393756e-06, "loss": 0.4572, "step": 37216 }, { "epoch": 1.707906934055344, "grad_norm": 0.47986456751823425, "learning_rate": 4.0552430820571255e-06, "loss": 0.4022, "step": 37217 }, { "epoch": 1.7079528245605984, "grad_norm": 0.448303759098053, "learning_rate": 4.0550023119923395e-06, "loss": 0.3567, "step": 37218 }, { "epoch": 1.7079987150658529, "grad_norm": 0.4745594561100006, "learning_rate": 4.054761544199975e-06, "loss": 0.372, "step": 37219 }, { "epoch": 1.7080446055711074, "grad_norm": 0.5128031373023987, "learning_rate": 4.054520778680609e-06, "loss": 0.4857, "step": 37220 }, { "epoch": 1.7080904960763617, "grad_norm": 0.48289692401885986, "learning_rate": 4.054280015434824e-06, "loss": 0.3718, "step": 37221 }, { "epoch": 1.7081363865816162, "grad_norm": 0.4999849200248718, "learning_rate": 4.0540392544631965e-06, "loss": 0.4086, "step": 37222 }, { "epoch": 1.7081822770868706, "grad_norm": 0.47292983531951904, "learning_rate": 4.053798495766305e-06, "loss": 0.3098, "step": 37223 }, { "epoch": 1.7082281675921251, "grad_norm": 0.4618285000324249, "learning_rate": 4.05355773934473e-06, "loss": 0.3409, "step": 37224 }, { "epoch": 1.7082740580973796, "grad_norm": 0.531160295009613, "learning_rate": 4.053316985199051e-06, "loss": 0.3811, "step": 37225 }, { "epoch": 1.7083199486026341, "grad_norm": 0.4821196496486664, "learning_rate": 4.053076233329845e-06, "loss": 0.3813, "step": 37226 }, { "epoch": 1.7083658391078886, "grad_norm": 0.5254929661750793, "learning_rate": 4.0528354837376925e-06, "loss": 0.4099, "step": 37227 }, { "epoch": 1.7084117296131431, "grad_norm": 0.45511549711227417, "learning_rate": 4.052594736423171e-06, "loss": 0.3534, "step": 37228 }, { "epoch": 1.7084576201183976, "grad_norm": 0.42991873621940613, "learning_rate": 4.052353991386859e-06, "loss": 0.2792, "step": 37229 }, { "epoch": 1.708503510623652, "grad_norm": 0.46137696504592896, "learning_rate": 4.052113248629338e-06, "loss": 0.3106, "step": 37230 }, { "epoch": 1.7085494011289064, "grad_norm": 0.43926867842674255, "learning_rate": 4.051872508151187e-06, "loss": 0.3031, "step": 37231 }, { "epoch": 1.708595291634161, "grad_norm": 0.47060224413871765, "learning_rate": 4.05163176995298e-06, "loss": 0.3461, "step": 37232 }, { "epoch": 1.7086411821394154, "grad_norm": 0.46438634395599365, "learning_rate": 4.051391034035301e-06, "loss": 0.3577, "step": 37233 }, { "epoch": 1.7086870726446697, "grad_norm": 0.47936832904815674, "learning_rate": 4.051150300398729e-06, "loss": 0.3988, "step": 37234 }, { "epoch": 1.7087329631499242, "grad_norm": 0.43051546812057495, "learning_rate": 4.050909569043839e-06, "loss": 0.2831, "step": 37235 }, { "epoch": 1.7087788536551787, "grad_norm": 0.4612168073654175, "learning_rate": 4.0506688399712115e-06, "loss": 0.3627, "step": 37236 }, { "epoch": 1.7088247441604332, "grad_norm": 0.45353445410728455, "learning_rate": 4.050428113181427e-06, "loss": 0.3218, "step": 37237 }, { "epoch": 1.7088706346656877, "grad_norm": 0.5035279393196106, "learning_rate": 4.0501873886750634e-06, "loss": 0.3295, "step": 37238 }, { "epoch": 1.7089165251709422, "grad_norm": 0.41984808444976807, "learning_rate": 4.049946666452696e-06, "loss": 0.2999, "step": 37239 }, { "epoch": 1.7089624156761967, "grad_norm": 0.482452392578125, "learning_rate": 4.049705946514911e-06, "loss": 0.3922, "step": 37240 }, { "epoch": 1.7090083061814512, "grad_norm": 0.6534234881401062, "learning_rate": 4.0494652288622825e-06, "loss": 0.4853, "step": 37241 }, { "epoch": 1.7090541966867057, "grad_norm": 0.4699498116970062, "learning_rate": 4.049224513495389e-06, "loss": 0.3556, "step": 37242 }, { "epoch": 1.70910008719196, "grad_norm": 0.4807699918746948, "learning_rate": 4.048983800414812e-06, "loss": 0.3912, "step": 37243 }, { "epoch": 1.7091459776972144, "grad_norm": 0.4927505552768707, "learning_rate": 4.048743089621127e-06, "loss": 0.4045, "step": 37244 }, { "epoch": 1.709191868202469, "grad_norm": 0.4810812771320343, "learning_rate": 4.048502381114915e-06, "loss": 0.3669, "step": 37245 }, { "epoch": 1.7092377587077232, "grad_norm": 0.5033617615699768, "learning_rate": 4.048261674896755e-06, "loss": 0.3704, "step": 37246 }, { "epoch": 1.7092836492129777, "grad_norm": 0.4786507487297058, "learning_rate": 4.048020970967226e-06, "loss": 0.3909, "step": 37247 }, { "epoch": 1.7093295397182322, "grad_norm": 0.4453367292881012, "learning_rate": 4.047780269326905e-06, "loss": 0.3297, "step": 37248 }, { "epoch": 1.7093754302234867, "grad_norm": 0.4948914349079132, "learning_rate": 4.047539569976373e-06, "loss": 0.3985, "step": 37249 }, { "epoch": 1.7094213207287412, "grad_norm": 0.47703272104263306, "learning_rate": 4.047298872916207e-06, "loss": 0.3578, "step": 37250 }, { "epoch": 1.7094672112339957, "grad_norm": 0.48056522011756897, "learning_rate": 4.047058178146985e-06, "loss": 0.3912, "step": 37251 }, { "epoch": 1.7095131017392502, "grad_norm": 0.4651651978492737, "learning_rate": 4.04681748566929e-06, "loss": 0.3118, "step": 37252 }, { "epoch": 1.7095589922445047, "grad_norm": 0.432786226272583, "learning_rate": 4.0465767954836975e-06, "loss": 0.2708, "step": 37253 }, { "epoch": 1.7096048827497592, "grad_norm": 0.47514307498931885, "learning_rate": 4.046336107590785e-06, "loss": 0.3866, "step": 37254 }, { "epoch": 1.7096507732550137, "grad_norm": 0.487131804227829, "learning_rate": 4.046095421991136e-06, "loss": 0.3375, "step": 37255 }, { "epoch": 1.709696663760268, "grad_norm": 0.4613759517669678, "learning_rate": 4.045854738685325e-06, "loss": 0.3221, "step": 37256 }, { "epoch": 1.7097425542655225, "grad_norm": 0.5064923763275146, "learning_rate": 4.045614057673931e-06, "loss": 0.4493, "step": 37257 }, { "epoch": 1.709788444770777, "grad_norm": 0.44667404890060425, "learning_rate": 4.045373378957536e-06, "loss": 0.3596, "step": 37258 }, { "epoch": 1.7098343352760312, "grad_norm": 0.45310187339782715, "learning_rate": 4.045132702536716e-06, "loss": 0.356, "step": 37259 }, { "epoch": 1.7098802257812857, "grad_norm": 0.42656636238098145, "learning_rate": 4.044892028412049e-06, "loss": 0.3157, "step": 37260 }, { "epoch": 1.7099261162865402, "grad_norm": 0.45634812116622925, "learning_rate": 4.044651356584117e-06, "loss": 0.2996, "step": 37261 }, { "epoch": 1.7099720067917947, "grad_norm": 0.4932784140110016, "learning_rate": 4.044410687053498e-06, "loss": 0.3787, "step": 37262 }, { "epoch": 1.7100178972970492, "grad_norm": 0.43509674072265625, "learning_rate": 4.044170019820767e-06, "loss": 0.3027, "step": 37263 }, { "epoch": 1.7100637878023037, "grad_norm": 0.4595159590244293, "learning_rate": 4.043929354886508e-06, "loss": 0.3401, "step": 37264 }, { "epoch": 1.7101096783075582, "grad_norm": 0.4910777807235718, "learning_rate": 4.043688692251296e-06, "loss": 0.4285, "step": 37265 }, { "epoch": 1.7101555688128127, "grad_norm": 0.47283264994621277, "learning_rate": 4.043448031915712e-06, "loss": 0.3039, "step": 37266 }, { "epoch": 1.7102014593180672, "grad_norm": 0.5002070069313049, "learning_rate": 4.04320737388033e-06, "loss": 0.3848, "step": 37267 }, { "epoch": 1.7102473498233217, "grad_norm": 0.500329852104187, "learning_rate": 4.042966718145735e-06, "loss": 0.4079, "step": 37268 }, { "epoch": 1.710293240328576, "grad_norm": 0.4287467896938324, "learning_rate": 4.042726064712503e-06, "loss": 0.3133, "step": 37269 }, { "epoch": 1.7103391308338305, "grad_norm": 0.45750361680984497, "learning_rate": 4.042485413581212e-06, "loss": 0.3319, "step": 37270 }, { "epoch": 1.710385021339085, "grad_norm": 0.4609622657299042, "learning_rate": 4.0422447647524424e-06, "loss": 0.3658, "step": 37271 }, { "epoch": 1.7104309118443393, "grad_norm": 0.5328750014305115, "learning_rate": 4.042004118226772e-06, "loss": 0.415, "step": 37272 }, { "epoch": 1.7104768023495938, "grad_norm": 0.4722987115383148, "learning_rate": 4.041763474004777e-06, "loss": 0.3503, "step": 37273 }, { "epoch": 1.7105226928548483, "grad_norm": 0.4802601635456085, "learning_rate": 4.04152283208704e-06, "loss": 0.3597, "step": 37274 }, { "epoch": 1.7105685833601028, "grad_norm": 0.4661281406879425, "learning_rate": 4.04128219247414e-06, "loss": 0.3344, "step": 37275 }, { "epoch": 1.7106144738653573, "grad_norm": 0.48276233673095703, "learning_rate": 4.0410415551666495e-06, "loss": 0.3217, "step": 37276 }, { "epoch": 1.7106603643706118, "grad_norm": 0.47436198592185974, "learning_rate": 4.040800920165155e-06, "loss": 0.3533, "step": 37277 }, { "epoch": 1.7107062548758663, "grad_norm": 0.5083417296409607, "learning_rate": 4.040560287470231e-06, "loss": 0.4449, "step": 37278 }, { "epoch": 1.7107521453811207, "grad_norm": 0.502764105796814, "learning_rate": 4.040319657082455e-06, "loss": 0.3807, "step": 37279 }, { "epoch": 1.7107980358863752, "grad_norm": 0.4862821698188782, "learning_rate": 4.040079029002409e-06, "loss": 0.3982, "step": 37280 }, { "epoch": 1.7108439263916295, "grad_norm": 0.4743708372116089, "learning_rate": 4.0398384032306694e-06, "loss": 0.3786, "step": 37281 }, { "epoch": 1.710889816896884, "grad_norm": 0.4454227387905121, "learning_rate": 4.039597779767814e-06, "loss": 0.3083, "step": 37282 }, { "epoch": 1.7109357074021385, "grad_norm": 0.4496665298938751, "learning_rate": 4.039357158614425e-06, "loss": 0.3254, "step": 37283 }, { "epoch": 1.710981597907393, "grad_norm": 0.4640202224254608, "learning_rate": 4.039116539771078e-06, "loss": 0.3255, "step": 37284 }, { "epoch": 1.7110274884126473, "grad_norm": 0.456325888633728, "learning_rate": 4.038875923238352e-06, "loss": 0.3318, "step": 37285 }, { "epoch": 1.7110733789179018, "grad_norm": 0.46854954957962036, "learning_rate": 4.038635309016827e-06, "loss": 0.3089, "step": 37286 }, { "epoch": 1.7111192694231563, "grad_norm": 0.47335946559906006, "learning_rate": 4.03839469710708e-06, "loss": 0.3754, "step": 37287 }, { "epoch": 1.7111651599284108, "grad_norm": 0.5227986574172974, "learning_rate": 4.038154087509689e-06, "loss": 0.4124, "step": 37288 }, { "epoch": 1.7112110504336653, "grad_norm": 0.5032994151115417, "learning_rate": 4.037913480225236e-06, "loss": 0.3725, "step": 37289 }, { "epoch": 1.7112569409389198, "grad_norm": 0.5717638731002808, "learning_rate": 4.037672875254297e-06, "loss": 0.3159, "step": 37290 }, { "epoch": 1.7113028314441743, "grad_norm": 0.4264291226863861, "learning_rate": 4.03743227259745e-06, "loss": 0.2861, "step": 37291 }, { "epoch": 1.7113487219494288, "grad_norm": 0.44525814056396484, "learning_rate": 4.037191672255277e-06, "loss": 0.297, "step": 37292 }, { "epoch": 1.7113946124546833, "grad_norm": 0.5035507678985596, "learning_rate": 4.036951074228352e-06, "loss": 0.3295, "step": 37293 }, { "epoch": 1.7114405029599375, "grad_norm": 0.43730267882347107, "learning_rate": 4.036710478517254e-06, "loss": 0.2824, "step": 37294 }, { "epoch": 1.711486393465192, "grad_norm": 0.49828433990478516, "learning_rate": 4.036469885122567e-06, "loss": 0.3226, "step": 37295 }, { "epoch": 1.7115322839704465, "grad_norm": 0.48228147625923157, "learning_rate": 4.036229294044864e-06, "loss": 0.3638, "step": 37296 }, { "epoch": 1.7115781744757008, "grad_norm": 0.495972603559494, "learning_rate": 4.035988705284725e-06, "loss": 0.349, "step": 37297 }, { "epoch": 1.7116240649809553, "grad_norm": 0.4948122799396515, "learning_rate": 4.0357481188427305e-06, "loss": 0.3537, "step": 37298 }, { "epoch": 1.7116699554862098, "grad_norm": 0.44758141040802, "learning_rate": 4.035507534719457e-06, "loss": 0.3252, "step": 37299 }, { "epoch": 1.7117158459914643, "grad_norm": 0.4738883972167969, "learning_rate": 4.035266952915484e-06, "loss": 0.3553, "step": 37300 }, { "epoch": 1.7117617364967188, "grad_norm": 0.4827274978160858, "learning_rate": 4.035026373431387e-06, "loss": 0.3836, "step": 37301 }, { "epoch": 1.7118076270019733, "grad_norm": 0.5196304321289062, "learning_rate": 4.034785796267749e-06, "loss": 0.4134, "step": 37302 }, { "epoch": 1.7118535175072278, "grad_norm": 0.47551608085632324, "learning_rate": 4.034545221425147e-06, "loss": 0.3786, "step": 37303 }, { "epoch": 1.7118994080124823, "grad_norm": 0.48373571038246155, "learning_rate": 4.034304648904156e-06, "loss": 0.3692, "step": 37304 }, { "epoch": 1.7119452985177368, "grad_norm": 0.4583427608013153, "learning_rate": 4.03406407870536e-06, "loss": 0.3214, "step": 37305 }, { "epoch": 1.7119911890229913, "grad_norm": 0.4982357323169708, "learning_rate": 4.033823510829335e-06, "loss": 0.4239, "step": 37306 }, { "epoch": 1.7120370795282456, "grad_norm": 0.4551074802875519, "learning_rate": 4.03358294527666e-06, "loss": 0.3636, "step": 37307 }, { "epoch": 1.7120829700335, "grad_norm": 0.47619926929473877, "learning_rate": 4.033342382047911e-06, "loss": 0.4229, "step": 37308 }, { "epoch": 1.7121288605387546, "grad_norm": 0.4577862620353699, "learning_rate": 4.033101821143671e-06, "loss": 0.3404, "step": 37309 }, { "epoch": 1.7121747510440088, "grad_norm": 0.4380928575992584, "learning_rate": 4.032861262564513e-06, "loss": 0.3483, "step": 37310 }, { "epoch": 1.7122206415492633, "grad_norm": 0.4592551589012146, "learning_rate": 4.032620706311021e-06, "loss": 0.3878, "step": 37311 }, { "epoch": 1.7122665320545178, "grad_norm": 0.4703465700149536, "learning_rate": 4.03238015238377e-06, "loss": 0.352, "step": 37312 }, { "epoch": 1.7123124225597723, "grad_norm": 0.4664079248905182, "learning_rate": 4.032139600783339e-06, "loss": 0.3273, "step": 37313 }, { "epoch": 1.7123583130650268, "grad_norm": 0.48494628071784973, "learning_rate": 4.031899051510307e-06, "loss": 0.3484, "step": 37314 }, { "epoch": 1.7124042035702813, "grad_norm": 0.4541513919830322, "learning_rate": 4.031658504565253e-06, "loss": 0.3646, "step": 37315 }, { "epoch": 1.7124500940755358, "grad_norm": 0.4337867498397827, "learning_rate": 4.031417959948753e-06, "loss": 0.2987, "step": 37316 }, { "epoch": 1.7124959845807903, "grad_norm": 0.4954255521297455, "learning_rate": 4.031177417661388e-06, "loss": 0.4118, "step": 37317 }, { "epoch": 1.7125418750860448, "grad_norm": 0.5105871558189392, "learning_rate": 4.030936877703736e-06, "loss": 0.4075, "step": 37318 }, { "epoch": 1.712587765591299, "grad_norm": 0.49367764592170715, "learning_rate": 4.0306963400763745e-06, "loss": 0.3406, "step": 37319 }, { "epoch": 1.7126336560965536, "grad_norm": 0.5028600692749023, "learning_rate": 4.030455804779883e-06, "loss": 0.4086, "step": 37320 }, { "epoch": 1.712679546601808, "grad_norm": 0.48203909397125244, "learning_rate": 4.030215271814839e-06, "loss": 0.4042, "step": 37321 }, { "epoch": 1.7127254371070626, "grad_norm": 0.4726642072200775, "learning_rate": 4.02997474118182e-06, "loss": 0.33, "step": 37322 }, { "epoch": 1.7127713276123169, "grad_norm": 0.5309221744537354, "learning_rate": 4.029734212881408e-06, "loss": 0.4032, "step": 37323 }, { "epoch": 1.7128172181175714, "grad_norm": 0.4851519465446472, "learning_rate": 4.0294936869141776e-06, "loss": 0.3516, "step": 37324 }, { "epoch": 1.7128631086228259, "grad_norm": 0.44537079334259033, "learning_rate": 4.029253163280707e-06, "loss": 0.3075, "step": 37325 }, { "epoch": 1.7129089991280804, "grad_norm": 0.4342617392539978, "learning_rate": 4.0290126419815785e-06, "loss": 0.3292, "step": 37326 }, { "epoch": 1.7129548896333349, "grad_norm": 0.47228944301605225, "learning_rate": 4.028772123017368e-06, "loss": 0.3569, "step": 37327 }, { "epoch": 1.7130007801385894, "grad_norm": 0.5286669135093689, "learning_rate": 4.028531606388653e-06, "loss": 0.3839, "step": 37328 }, { "epoch": 1.7130466706438439, "grad_norm": 0.4851683974266052, "learning_rate": 4.028291092096013e-06, "loss": 0.3847, "step": 37329 }, { "epoch": 1.7130925611490984, "grad_norm": 0.4726741313934326, "learning_rate": 4.028050580140027e-06, "loss": 0.3779, "step": 37330 }, { "epoch": 1.7131384516543529, "grad_norm": 0.4778311252593994, "learning_rate": 4.027810070521273e-06, "loss": 0.3543, "step": 37331 }, { "epoch": 1.7131843421596071, "grad_norm": 0.49756529927253723, "learning_rate": 4.027569563240326e-06, "loss": 0.3535, "step": 37332 }, { "epoch": 1.7132302326648616, "grad_norm": 0.4910120964050293, "learning_rate": 4.02732905829777e-06, "loss": 0.4271, "step": 37333 }, { "epoch": 1.7132761231701161, "grad_norm": 0.47159597277641296, "learning_rate": 4.027088555694181e-06, "loss": 0.3272, "step": 37334 }, { "epoch": 1.7133220136753704, "grad_norm": 0.46531111001968384, "learning_rate": 4.026848055430135e-06, "loss": 0.3644, "step": 37335 }, { "epoch": 1.713367904180625, "grad_norm": 0.4460766613483429, "learning_rate": 4.026607557506214e-06, "loss": 0.3306, "step": 37336 }, { "epoch": 1.7134137946858794, "grad_norm": 0.49223431944847107, "learning_rate": 4.026367061922994e-06, "loss": 0.4107, "step": 37337 }, { "epoch": 1.713459685191134, "grad_norm": 0.4411812722682953, "learning_rate": 4.026126568681052e-06, "loss": 0.2922, "step": 37338 }, { "epoch": 1.7135055756963884, "grad_norm": 0.4565831422805786, "learning_rate": 4.02588607778097e-06, "loss": 0.3368, "step": 37339 }, { "epoch": 1.713551466201643, "grad_norm": 0.5011718273162842, "learning_rate": 4.025645589223326e-06, "loss": 0.379, "step": 37340 }, { "epoch": 1.7135973567068974, "grad_norm": 0.45208147168159485, "learning_rate": 4.025405103008694e-06, "loss": 0.3199, "step": 37341 }, { "epoch": 1.7136432472121519, "grad_norm": 0.47122251987457275, "learning_rate": 4.025164619137658e-06, "loss": 0.3283, "step": 37342 }, { "epoch": 1.7136891377174064, "grad_norm": 0.47793155908584595, "learning_rate": 4.024924137610792e-06, "loss": 0.3656, "step": 37343 }, { "epoch": 1.7137350282226609, "grad_norm": 0.4862075448036194, "learning_rate": 4.024683658428675e-06, "loss": 0.335, "step": 37344 }, { "epoch": 1.7137809187279152, "grad_norm": 0.4497852623462677, "learning_rate": 4.024443181591887e-06, "loss": 0.3136, "step": 37345 }, { "epoch": 1.7138268092331697, "grad_norm": 0.457185834646225, "learning_rate": 4.024202707101006e-06, "loss": 0.3414, "step": 37346 }, { "epoch": 1.7138726997384242, "grad_norm": 0.4834270477294922, "learning_rate": 4.023962234956607e-06, "loss": 0.3761, "step": 37347 }, { "epoch": 1.7139185902436784, "grad_norm": 0.45955580472946167, "learning_rate": 4.023721765159272e-06, "loss": 0.3563, "step": 37348 }, { "epoch": 1.713964480748933, "grad_norm": 0.4962072968482971, "learning_rate": 4.023481297709579e-06, "loss": 0.3704, "step": 37349 }, { "epoch": 1.7140103712541874, "grad_norm": 0.48209697008132935, "learning_rate": 4.023240832608104e-06, "loss": 0.3511, "step": 37350 }, { "epoch": 1.714056261759442, "grad_norm": 0.45875638723373413, "learning_rate": 4.023000369855427e-06, "loss": 0.3985, "step": 37351 }, { "epoch": 1.7141021522646964, "grad_norm": 0.4975643754005432, "learning_rate": 4.022759909452127e-06, "loss": 0.3827, "step": 37352 }, { "epoch": 1.714148042769951, "grad_norm": 0.4814925491809845, "learning_rate": 4.0225194513987776e-06, "loss": 0.4078, "step": 37353 }, { "epoch": 1.7141939332752054, "grad_norm": 0.43571585416793823, "learning_rate": 4.0222789956959635e-06, "loss": 0.3173, "step": 37354 }, { "epoch": 1.71423982378046, "grad_norm": 0.5095200538635254, "learning_rate": 4.02203854234426e-06, "loss": 0.4408, "step": 37355 }, { "epoch": 1.7142857142857144, "grad_norm": 0.43497005105018616, "learning_rate": 4.021798091344243e-06, "loss": 0.3166, "step": 37356 }, { "epoch": 1.714331604790969, "grad_norm": 0.45422419905662537, "learning_rate": 4.021557642696494e-06, "loss": 0.3567, "step": 37357 }, { "epoch": 1.7143774952962232, "grad_norm": 0.44679537415504456, "learning_rate": 4.021317196401591e-06, "loss": 0.295, "step": 37358 }, { "epoch": 1.7144233858014777, "grad_norm": 0.45649194717407227, "learning_rate": 4.021076752460108e-06, "loss": 0.3431, "step": 37359 }, { "epoch": 1.7144692763067322, "grad_norm": 0.4441635012626648, "learning_rate": 4.02083631087263e-06, "loss": 0.2946, "step": 37360 }, { "epoch": 1.7145151668119865, "grad_norm": 0.6025425791740417, "learning_rate": 4.020595871639731e-06, "loss": 0.4325, "step": 37361 }, { "epoch": 1.714561057317241, "grad_norm": 0.48729264736175537, "learning_rate": 4.020355434761989e-06, "loss": 0.394, "step": 37362 }, { "epoch": 1.7146069478224955, "grad_norm": 0.46638697385787964, "learning_rate": 4.020115000239983e-06, "loss": 0.3257, "step": 37363 }, { "epoch": 1.71465283832775, "grad_norm": 0.44915318489074707, "learning_rate": 4.019874568074292e-06, "loss": 0.3291, "step": 37364 }, { "epoch": 1.7146987288330044, "grad_norm": 0.4523506760597229, "learning_rate": 4.019634138265493e-06, "loss": 0.2986, "step": 37365 }, { "epoch": 1.714744619338259, "grad_norm": 0.4175998866558075, "learning_rate": 4.019393710814163e-06, "loss": 0.3367, "step": 37366 }, { "epoch": 1.7147905098435134, "grad_norm": 0.45889878273010254, "learning_rate": 4.019153285720884e-06, "loss": 0.3417, "step": 37367 }, { "epoch": 1.714836400348768, "grad_norm": 0.4952715337276459, "learning_rate": 4.018912862986231e-06, "loss": 0.4329, "step": 37368 }, { "epoch": 1.7148822908540224, "grad_norm": 0.4462283253669739, "learning_rate": 4.018672442610781e-06, "loss": 0.3043, "step": 37369 }, { "epoch": 1.7149281813592767, "grad_norm": 0.5120095014572144, "learning_rate": 4.018432024595116e-06, "loss": 0.4132, "step": 37370 }, { "epoch": 1.7149740718645312, "grad_norm": 0.4621037244796753, "learning_rate": 4.018191608939812e-06, "loss": 0.3354, "step": 37371 }, { "epoch": 1.7150199623697857, "grad_norm": 0.4522314965724945, "learning_rate": 4.017951195645446e-06, "loss": 0.3497, "step": 37372 }, { "epoch": 1.7150658528750402, "grad_norm": 0.4655211865901947, "learning_rate": 4.017710784712599e-06, "loss": 0.3649, "step": 37373 }, { "epoch": 1.7151117433802945, "grad_norm": 0.48257794976234436, "learning_rate": 4.0174703761418486e-06, "loss": 0.4107, "step": 37374 }, { "epoch": 1.715157633885549, "grad_norm": 0.45438098907470703, "learning_rate": 4.017229969933768e-06, "loss": 0.3178, "step": 37375 }, { "epoch": 1.7152035243908035, "grad_norm": 0.453106552362442, "learning_rate": 4.016989566088942e-06, "loss": 0.3797, "step": 37376 }, { "epoch": 1.715249414896058, "grad_norm": 0.4805621802806854, "learning_rate": 4.016749164607946e-06, "loss": 0.3785, "step": 37377 }, { "epoch": 1.7152953054013125, "grad_norm": 0.4582268297672272, "learning_rate": 4.016508765491356e-06, "loss": 0.3843, "step": 37378 }, { "epoch": 1.715341195906567, "grad_norm": 0.47891950607299805, "learning_rate": 4.016268368739754e-06, "loss": 0.4186, "step": 37379 }, { "epoch": 1.7153870864118215, "grad_norm": 0.4825790524482727, "learning_rate": 4.016027974353716e-06, "loss": 0.3558, "step": 37380 }, { "epoch": 1.715432976917076, "grad_norm": 0.4485175907611847, "learning_rate": 4.015787582333818e-06, "loss": 0.3359, "step": 37381 }, { "epoch": 1.7154788674223305, "grad_norm": 0.46191123127937317, "learning_rate": 4.015547192680642e-06, "loss": 0.344, "step": 37382 }, { "epoch": 1.7155247579275847, "grad_norm": 0.4657062888145447, "learning_rate": 4.015306805394765e-06, "loss": 0.3186, "step": 37383 }, { "epoch": 1.7155706484328392, "grad_norm": 0.4660350978374481, "learning_rate": 4.015066420476763e-06, "loss": 0.3336, "step": 37384 }, { "epoch": 1.7156165389380937, "grad_norm": 0.4915370047092438, "learning_rate": 4.014826037927216e-06, "loss": 0.3514, "step": 37385 }, { "epoch": 1.715662429443348, "grad_norm": 0.5144296884536743, "learning_rate": 4.0145856577467015e-06, "loss": 0.3126, "step": 37386 }, { "epoch": 1.7157083199486025, "grad_norm": 0.4534396529197693, "learning_rate": 4.014345279935796e-06, "loss": 0.3493, "step": 37387 }, { "epoch": 1.715754210453857, "grad_norm": 0.47214728593826294, "learning_rate": 4.014104904495082e-06, "loss": 0.3876, "step": 37388 }, { "epoch": 1.7158001009591115, "grad_norm": 0.5021201968193054, "learning_rate": 4.0138645314251335e-06, "loss": 0.3942, "step": 37389 }, { "epoch": 1.715845991464366, "grad_norm": 0.5134985446929932, "learning_rate": 4.013624160726527e-06, "loss": 0.452, "step": 37390 }, { "epoch": 1.7158918819696205, "grad_norm": 0.512238621711731, "learning_rate": 4.013383792399845e-06, "loss": 0.4439, "step": 37391 }, { "epoch": 1.715937772474875, "grad_norm": 0.4435451924800873, "learning_rate": 4.013143426445665e-06, "loss": 0.3203, "step": 37392 }, { "epoch": 1.7159836629801295, "grad_norm": 0.44603249430656433, "learning_rate": 4.012903062864561e-06, "loss": 0.3101, "step": 37393 }, { "epoch": 1.716029553485384, "grad_norm": 0.46990033984184265, "learning_rate": 4.012662701657115e-06, "loss": 0.3466, "step": 37394 }, { "epoch": 1.7160754439906385, "grad_norm": 0.4934183359146118, "learning_rate": 4.012422342823904e-06, "loss": 0.3931, "step": 37395 }, { "epoch": 1.7161213344958928, "grad_norm": 0.4806623160839081, "learning_rate": 4.012181986365506e-06, "loss": 0.3985, "step": 37396 }, { "epoch": 1.7161672250011473, "grad_norm": 0.48924383521080017, "learning_rate": 4.0119416322824956e-06, "loss": 0.3561, "step": 37397 }, { "epoch": 1.7162131155064018, "grad_norm": 0.48046043515205383, "learning_rate": 4.011701280575456e-06, "loss": 0.3845, "step": 37398 }, { "epoch": 1.716259006011656, "grad_norm": 0.489839106798172, "learning_rate": 4.011460931244963e-06, "loss": 0.3783, "step": 37399 }, { "epoch": 1.7163048965169105, "grad_norm": 0.490020751953125, "learning_rate": 4.0112205842915936e-06, "loss": 0.3422, "step": 37400 }, { "epoch": 1.716350787022165, "grad_norm": 0.4628543555736542, "learning_rate": 4.010980239715928e-06, "loss": 0.3482, "step": 37401 }, { "epoch": 1.7163966775274195, "grad_norm": 0.4331725239753723, "learning_rate": 4.0107398975185415e-06, "loss": 0.3366, "step": 37402 }, { "epoch": 1.716442568032674, "grad_norm": 0.44855016469955444, "learning_rate": 4.0104995577000125e-06, "loss": 0.32, "step": 37403 }, { "epoch": 1.7164884585379285, "grad_norm": 0.4676326811313629, "learning_rate": 4.010259220260921e-06, "loss": 0.3487, "step": 37404 }, { "epoch": 1.716534349043183, "grad_norm": 0.49320530891418457, "learning_rate": 4.010018885201844e-06, "loss": 0.4193, "step": 37405 }, { "epoch": 1.7165802395484375, "grad_norm": 0.4477243423461914, "learning_rate": 4.009778552523358e-06, "loss": 0.3132, "step": 37406 }, { "epoch": 1.716626130053692, "grad_norm": 0.5851663947105408, "learning_rate": 4.009538222226044e-06, "loss": 0.3872, "step": 37407 }, { "epoch": 1.7166720205589463, "grad_norm": 0.6327536106109619, "learning_rate": 4.0092978943104775e-06, "loss": 0.3158, "step": 37408 }, { "epoch": 1.7167179110642008, "grad_norm": 0.5061814785003662, "learning_rate": 4.009057568777234e-06, "loss": 0.4366, "step": 37409 }, { "epoch": 1.7167638015694553, "grad_norm": 0.47651946544647217, "learning_rate": 4.008817245626898e-06, "loss": 0.3467, "step": 37410 }, { "epoch": 1.7168096920747098, "grad_norm": 0.4591066539287567, "learning_rate": 4.008576924860042e-06, "loss": 0.2832, "step": 37411 }, { "epoch": 1.716855582579964, "grad_norm": 0.49064821004867554, "learning_rate": 4.008336606477244e-06, "loss": 0.3977, "step": 37412 }, { "epoch": 1.7169014730852186, "grad_norm": 0.4682297110557556, "learning_rate": 4.008096290479087e-06, "loss": 0.3447, "step": 37413 }, { "epoch": 1.716947363590473, "grad_norm": 0.43156325817108154, "learning_rate": 4.0078559768661435e-06, "loss": 0.2768, "step": 37414 }, { "epoch": 1.7169932540957276, "grad_norm": 0.5454644560813904, "learning_rate": 4.007615665638993e-06, "loss": 0.4423, "step": 37415 }, { "epoch": 1.717039144600982, "grad_norm": 0.4622335731983185, "learning_rate": 4.007375356798214e-06, "loss": 0.3481, "step": 37416 }, { "epoch": 1.7170850351062366, "grad_norm": 0.5088116526603699, "learning_rate": 4.007135050344385e-06, "loss": 0.3448, "step": 37417 }, { "epoch": 1.717130925611491, "grad_norm": 0.4551600217819214, "learning_rate": 4.00689474627808e-06, "loss": 0.3614, "step": 37418 }, { "epoch": 1.7171768161167456, "grad_norm": 0.44858092069625854, "learning_rate": 4.006654444599882e-06, "loss": 0.2912, "step": 37419 }, { "epoch": 1.717222706622, "grad_norm": 0.8074127435684204, "learning_rate": 4.006414145310367e-06, "loss": 0.4605, "step": 37420 }, { "epoch": 1.7172685971272543, "grad_norm": 0.45256632566452026, "learning_rate": 4.00617384841011e-06, "loss": 0.3092, "step": 37421 }, { "epoch": 1.7173144876325088, "grad_norm": 0.5214881300926208, "learning_rate": 4.005933553899693e-06, "loss": 0.4056, "step": 37422 }, { "epoch": 1.7173603781377633, "grad_norm": 0.5207011103630066, "learning_rate": 4.005693261779693e-06, "loss": 0.4571, "step": 37423 }, { "epoch": 1.7174062686430176, "grad_norm": 0.4614640176296234, "learning_rate": 4.005452972050684e-06, "loss": 0.3546, "step": 37424 }, { "epoch": 1.717452159148272, "grad_norm": 0.4548502564430237, "learning_rate": 4.005212684713249e-06, "loss": 0.312, "step": 37425 }, { "epoch": 1.7174980496535266, "grad_norm": 0.4582907259464264, "learning_rate": 4.0049723997679625e-06, "loss": 0.358, "step": 37426 }, { "epoch": 1.717543940158781, "grad_norm": 0.45684152841567993, "learning_rate": 4.004732117215403e-06, "loss": 0.3248, "step": 37427 }, { "epoch": 1.7175898306640356, "grad_norm": 0.461478054523468, "learning_rate": 4.00449183705615e-06, "loss": 0.3546, "step": 37428 }, { "epoch": 1.71763572116929, "grad_norm": 0.47843286395072937, "learning_rate": 4.0042515592907806e-06, "loss": 0.4121, "step": 37429 }, { "epoch": 1.7176816116745446, "grad_norm": 0.4785355031490326, "learning_rate": 4.004011283919871e-06, "loss": 0.3736, "step": 37430 }, { "epoch": 1.717727502179799, "grad_norm": 0.4617527723312378, "learning_rate": 4.003771010943997e-06, "loss": 0.3743, "step": 37431 }, { "epoch": 1.7177733926850536, "grad_norm": 0.4967412054538727, "learning_rate": 4.003530740363745e-06, "loss": 0.4163, "step": 37432 }, { "epoch": 1.717819283190308, "grad_norm": 0.5080995559692383, "learning_rate": 4.003290472179684e-06, "loss": 0.2781, "step": 37433 }, { "epoch": 1.7178651736955624, "grad_norm": 0.46165111660957336, "learning_rate": 4.003050206392393e-06, "loss": 0.3413, "step": 37434 }, { "epoch": 1.7179110642008169, "grad_norm": 0.5243955254554749, "learning_rate": 4.002809943002453e-06, "loss": 0.4108, "step": 37435 }, { "epoch": 1.7179569547060713, "grad_norm": 0.4620725214481354, "learning_rate": 4.002569682010442e-06, "loss": 0.3328, "step": 37436 }, { "epoch": 1.7180028452113256, "grad_norm": 0.4952905774116516, "learning_rate": 4.0023294234169345e-06, "loss": 0.4072, "step": 37437 }, { "epoch": 1.7180487357165801, "grad_norm": 0.4835174083709717, "learning_rate": 4.00208916722251e-06, "loss": 0.3606, "step": 37438 }, { "epoch": 1.7180946262218346, "grad_norm": 0.46597862243652344, "learning_rate": 4.001848913427747e-06, "loss": 0.3806, "step": 37439 }, { "epoch": 1.7181405167270891, "grad_norm": 0.43003422021865845, "learning_rate": 4.001608662033221e-06, "loss": 0.2815, "step": 37440 }, { "epoch": 1.7181864072323436, "grad_norm": 0.4521295726299286, "learning_rate": 4.0013684130395116e-06, "loss": 0.3371, "step": 37441 }, { "epoch": 1.7182322977375981, "grad_norm": 0.455587774515152, "learning_rate": 4.0011281664471966e-06, "loss": 0.3238, "step": 37442 }, { "epoch": 1.7182781882428526, "grad_norm": 0.4918869435787201, "learning_rate": 4.000887922256852e-06, "loss": 0.3693, "step": 37443 }, { "epoch": 1.718324078748107, "grad_norm": 0.4982035160064697, "learning_rate": 4.000647680469058e-06, "loss": 0.4172, "step": 37444 }, { "epoch": 1.7183699692533616, "grad_norm": 0.45832544565200806, "learning_rate": 4.000407441084391e-06, "loss": 0.2756, "step": 37445 }, { "epoch": 1.718415859758616, "grad_norm": 0.469639390707016, "learning_rate": 4.0001672041034256e-06, "loss": 0.361, "step": 37446 }, { "epoch": 1.7184617502638704, "grad_norm": 0.4746950566768646, "learning_rate": 3.999926969526745e-06, "loss": 0.375, "step": 37447 }, { "epoch": 1.7185076407691249, "grad_norm": 0.423507422208786, "learning_rate": 3.999686737354925e-06, "loss": 0.3029, "step": 37448 }, { "epoch": 1.7185535312743794, "grad_norm": 0.46763208508491516, "learning_rate": 3.999446507588541e-06, "loss": 0.3644, "step": 37449 }, { "epoch": 1.7185994217796337, "grad_norm": 0.4662920832633972, "learning_rate": 3.999206280228174e-06, "loss": 0.3689, "step": 37450 }, { "epoch": 1.7186453122848881, "grad_norm": 0.47742247581481934, "learning_rate": 3.9989660552744e-06, "loss": 0.3873, "step": 37451 }, { "epoch": 1.7186912027901426, "grad_norm": 0.47880977392196655, "learning_rate": 3.998725832727794e-06, "loss": 0.3335, "step": 37452 }, { "epoch": 1.7187370932953971, "grad_norm": 0.47136253118515015, "learning_rate": 3.9984856125889384e-06, "loss": 0.3596, "step": 37453 }, { "epoch": 1.7187829838006516, "grad_norm": 0.43218478560447693, "learning_rate": 3.9982453948584105e-06, "loss": 0.3041, "step": 37454 }, { "epoch": 1.7188288743059061, "grad_norm": 0.4914086163043976, "learning_rate": 3.998005179536783e-06, "loss": 0.432, "step": 37455 }, { "epoch": 1.7188747648111606, "grad_norm": 0.5185924768447876, "learning_rate": 3.997764966624639e-06, "loss": 0.4798, "step": 37456 }, { "epoch": 1.7189206553164151, "grad_norm": 0.4142240285873413, "learning_rate": 3.997524756122554e-06, "loss": 0.29, "step": 37457 }, { "epoch": 1.7189665458216696, "grad_norm": 0.45898813009262085, "learning_rate": 3.997284548031103e-06, "loss": 0.382, "step": 37458 }, { "epoch": 1.719012436326924, "grad_norm": 0.46060672402381897, "learning_rate": 3.997044342350868e-06, "loss": 0.3469, "step": 37459 }, { "epoch": 1.7190583268321784, "grad_norm": 0.48611119389533997, "learning_rate": 3.996804139082427e-06, "loss": 0.3503, "step": 37460 }, { "epoch": 1.719104217337433, "grad_norm": 0.4571055471897125, "learning_rate": 3.996563938226351e-06, "loss": 0.3383, "step": 37461 }, { "epoch": 1.7191501078426874, "grad_norm": 0.5042287111282349, "learning_rate": 3.996323739783224e-06, "loss": 0.4256, "step": 37462 }, { "epoch": 1.7191959983479417, "grad_norm": 0.45242840051651, "learning_rate": 3.996083543753623e-06, "loss": 0.3158, "step": 37463 }, { "epoch": 1.7192418888531962, "grad_norm": 0.5024271011352539, "learning_rate": 3.995843350138123e-06, "loss": 0.3983, "step": 37464 }, { "epoch": 1.7192877793584507, "grad_norm": 0.4561421275138855, "learning_rate": 3.995603158937303e-06, "loss": 0.346, "step": 37465 }, { "epoch": 1.7193336698637052, "grad_norm": 0.4468078315258026, "learning_rate": 3.99536297015174e-06, "loss": 0.3128, "step": 37466 }, { "epoch": 1.7193795603689597, "grad_norm": 0.5193073153495789, "learning_rate": 3.995122783782014e-06, "loss": 0.422, "step": 37467 }, { "epoch": 1.7194254508742142, "grad_norm": 0.44783931970596313, "learning_rate": 3.994882599828697e-06, "loss": 0.3198, "step": 37468 }, { "epoch": 1.7194713413794687, "grad_norm": 0.4439665675163269, "learning_rate": 3.994642418292372e-06, "loss": 0.3472, "step": 37469 }, { "epoch": 1.7195172318847232, "grad_norm": 0.46491897106170654, "learning_rate": 3.994402239173615e-06, "loss": 0.3198, "step": 37470 }, { "epoch": 1.7195631223899777, "grad_norm": 0.5054397583007812, "learning_rate": 3.994162062473002e-06, "loss": 0.4048, "step": 37471 }, { "epoch": 1.719609012895232, "grad_norm": 0.41765931248664856, "learning_rate": 3.993921888191114e-06, "loss": 0.2432, "step": 37472 }, { "epoch": 1.7196549034004864, "grad_norm": 0.4711093008518219, "learning_rate": 3.993681716328526e-06, "loss": 0.3601, "step": 37473 }, { "epoch": 1.719700793905741, "grad_norm": 0.45483720302581787, "learning_rate": 3.993441546885813e-06, "loss": 0.316, "step": 37474 }, { "epoch": 1.7197466844109952, "grad_norm": 0.44771209359169006, "learning_rate": 3.993201379863558e-06, "loss": 0.2952, "step": 37475 }, { "epoch": 1.7197925749162497, "grad_norm": 0.48636239767074585, "learning_rate": 3.9929612152623364e-06, "loss": 0.3318, "step": 37476 }, { "epoch": 1.7198384654215042, "grad_norm": 0.4718059003353119, "learning_rate": 3.992721053082723e-06, "loss": 0.3671, "step": 37477 }, { "epoch": 1.7198843559267587, "grad_norm": 0.46593016386032104, "learning_rate": 3.992480893325299e-06, "loss": 0.3454, "step": 37478 }, { "epoch": 1.7199302464320132, "grad_norm": 0.47570833563804626, "learning_rate": 3.99224073599064e-06, "loss": 0.4017, "step": 37479 }, { "epoch": 1.7199761369372677, "grad_norm": 0.4842776954174042, "learning_rate": 3.992000581079324e-06, "loss": 0.3554, "step": 37480 }, { "epoch": 1.7200220274425222, "grad_norm": 0.4806846082210541, "learning_rate": 3.991760428591928e-06, "loss": 0.4014, "step": 37481 }, { "epoch": 1.7200679179477767, "grad_norm": 0.4526643455028534, "learning_rate": 3.991520278529032e-06, "loss": 0.3194, "step": 37482 }, { "epoch": 1.7201138084530312, "grad_norm": 0.45557910203933716, "learning_rate": 3.991280130891208e-06, "loss": 0.3378, "step": 37483 }, { "epoch": 1.7201596989582857, "grad_norm": 0.5032175779342651, "learning_rate": 3.991039985679039e-06, "loss": 0.4044, "step": 37484 }, { "epoch": 1.72020558946354, "grad_norm": 0.46120551228523254, "learning_rate": 3.990799842893101e-06, "loss": 0.3491, "step": 37485 }, { "epoch": 1.7202514799687945, "grad_norm": 0.4485439658164978, "learning_rate": 3.9905597025339705e-06, "loss": 0.3157, "step": 37486 }, { "epoch": 1.720297370474049, "grad_norm": 0.4824945628643036, "learning_rate": 3.990319564602225e-06, "loss": 0.364, "step": 37487 }, { "epoch": 1.7203432609793032, "grad_norm": 0.4498058557510376, "learning_rate": 3.990079429098443e-06, "loss": 0.3433, "step": 37488 }, { "epoch": 1.7203891514845577, "grad_norm": 0.44333308935165405, "learning_rate": 3.989839296023198e-06, "loss": 0.2932, "step": 37489 }, { "epoch": 1.7204350419898122, "grad_norm": 0.45488208532333374, "learning_rate": 3.989599165377075e-06, "loss": 0.3268, "step": 37490 }, { "epoch": 1.7204809324950667, "grad_norm": 0.4659055471420288, "learning_rate": 3.989359037160645e-06, "loss": 0.346, "step": 37491 }, { "epoch": 1.7205268230003212, "grad_norm": 0.46532806754112244, "learning_rate": 3.989118911374488e-06, "loss": 0.3609, "step": 37492 }, { "epoch": 1.7205727135055757, "grad_norm": 0.4649726450443268, "learning_rate": 3.988878788019181e-06, "loss": 0.336, "step": 37493 }, { "epoch": 1.7206186040108302, "grad_norm": 0.47610193490982056, "learning_rate": 3.988638667095301e-06, "loss": 0.3645, "step": 37494 }, { "epoch": 1.7206644945160847, "grad_norm": 0.5477766990661621, "learning_rate": 3.988398548603427e-06, "loss": 0.4065, "step": 37495 }, { "epoch": 1.7207103850213392, "grad_norm": 0.4297437071800232, "learning_rate": 3.9881584325441325e-06, "loss": 0.2932, "step": 37496 }, { "epoch": 1.7207562755265935, "grad_norm": 0.4719291925430298, "learning_rate": 3.987918318918e-06, "loss": 0.3666, "step": 37497 }, { "epoch": 1.720802166031848, "grad_norm": 0.46739521622657776, "learning_rate": 3.987678207725605e-06, "loss": 0.3823, "step": 37498 }, { "epoch": 1.7208480565371025, "grad_norm": 0.44146203994750977, "learning_rate": 3.987438098967522e-06, "loss": 0.3443, "step": 37499 }, { "epoch": 1.720893947042357, "grad_norm": 0.5081411004066467, "learning_rate": 3.987197992644333e-06, "loss": 0.4601, "step": 37500 }, { "epoch": 1.7209398375476113, "grad_norm": 0.5735171437263489, "learning_rate": 3.986957888756612e-06, "loss": 0.3333, "step": 37501 }, { "epoch": 1.7209857280528658, "grad_norm": 0.4871867299079895, "learning_rate": 3.9867177873049375e-06, "loss": 0.3904, "step": 37502 }, { "epoch": 1.7210316185581203, "grad_norm": 0.40315142273902893, "learning_rate": 3.986477688289888e-06, "loss": 0.2663, "step": 37503 }, { "epoch": 1.7210775090633748, "grad_norm": 0.5050402879714966, "learning_rate": 3.986237591712039e-06, "loss": 0.4533, "step": 37504 }, { "epoch": 1.7211233995686293, "grad_norm": 0.49255913496017456, "learning_rate": 3.985997497571968e-06, "loss": 0.4059, "step": 37505 }, { "epoch": 1.7211692900738838, "grad_norm": 0.4671362042427063, "learning_rate": 3.985757405870254e-06, "loss": 0.3257, "step": 37506 }, { "epoch": 1.7212151805791382, "grad_norm": 0.49167948961257935, "learning_rate": 3.985517316607473e-06, "loss": 0.3897, "step": 37507 }, { "epoch": 1.7212610710843927, "grad_norm": 0.4676930606365204, "learning_rate": 3.985277229784203e-06, "loss": 0.354, "step": 37508 }, { "epoch": 1.7213069615896472, "grad_norm": 0.47411277890205383, "learning_rate": 3.9850371454010205e-06, "loss": 0.3917, "step": 37509 }, { "epoch": 1.7213528520949015, "grad_norm": 0.5109216570854187, "learning_rate": 3.984797063458504e-06, "loss": 0.4725, "step": 37510 }, { "epoch": 1.721398742600156, "grad_norm": 0.46160703897476196, "learning_rate": 3.984556983957228e-06, "loss": 0.3445, "step": 37511 }, { "epoch": 1.7214446331054105, "grad_norm": 0.49744412302970886, "learning_rate": 3.984316906897774e-06, "loss": 0.3575, "step": 37512 }, { "epoch": 1.7214905236106648, "grad_norm": 0.5047242045402527, "learning_rate": 3.984076832280718e-06, "loss": 0.4503, "step": 37513 }, { "epoch": 1.7215364141159193, "grad_norm": 0.49746882915496826, "learning_rate": 3.983836760106634e-06, "loss": 0.4197, "step": 37514 }, { "epoch": 1.7215823046211738, "grad_norm": 0.4703180193901062, "learning_rate": 3.9835966903761046e-06, "loss": 0.3384, "step": 37515 }, { "epoch": 1.7216281951264283, "grad_norm": 0.507028341293335, "learning_rate": 3.983356623089704e-06, "loss": 0.4222, "step": 37516 }, { "epoch": 1.7216740856316828, "grad_norm": 0.5000210404396057, "learning_rate": 3.983116558248007e-06, "loss": 0.4687, "step": 37517 }, { "epoch": 1.7217199761369373, "grad_norm": 0.45217305421829224, "learning_rate": 3.9828764958515955e-06, "loss": 0.3185, "step": 37518 }, { "epoch": 1.7217658666421918, "grad_norm": 0.4977262616157532, "learning_rate": 3.9826364359010465e-06, "loss": 0.3643, "step": 37519 }, { "epoch": 1.7218117571474463, "grad_norm": 0.4937879145145416, "learning_rate": 3.982396378396934e-06, "loss": 0.4067, "step": 37520 }, { "epoch": 1.7218576476527008, "grad_norm": 0.46037471294403076, "learning_rate": 3.982156323339838e-06, "loss": 0.3141, "step": 37521 }, { "epoch": 1.7219035381579553, "grad_norm": 0.47940704226493835, "learning_rate": 3.9819162707303346e-06, "loss": 0.4117, "step": 37522 }, { "epoch": 1.7219494286632095, "grad_norm": 0.46581918001174927, "learning_rate": 3.981676220569e-06, "loss": 0.3048, "step": 37523 }, { "epoch": 1.721995319168464, "grad_norm": 0.4672877788543701, "learning_rate": 3.981436172856415e-06, "loss": 0.3669, "step": 37524 }, { "epoch": 1.7220412096737185, "grad_norm": 0.48470765352249146, "learning_rate": 3.981196127593154e-06, "loss": 0.3808, "step": 37525 }, { "epoch": 1.7220871001789728, "grad_norm": 0.4611252546310425, "learning_rate": 3.980956084779792e-06, "loss": 0.3359, "step": 37526 }, { "epoch": 1.7221329906842273, "grad_norm": 0.4747268259525299, "learning_rate": 3.980716044416911e-06, "loss": 0.3514, "step": 37527 }, { "epoch": 1.7221788811894818, "grad_norm": 0.4880480468273163, "learning_rate": 3.980476006505087e-06, "loss": 0.3847, "step": 37528 }, { "epoch": 1.7222247716947363, "grad_norm": 0.46068698167800903, "learning_rate": 3.980235971044896e-06, "loss": 0.3191, "step": 37529 }, { "epoch": 1.7222706621999908, "grad_norm": 0.461694598197937, "learning_rate": 3.979995938036915e-06, "loss": 0.3521, "step": 37530 }, { "epoch": 1.7223165527052453, "grad_norm": 0.5194443464279175, "learning_rate": 3.979755907481723e-06, "loss": 0.4, "step": 37531 }, { "epoch": 1.7223624432104998, "grad_norm": 0.42952805757522583, "learning_rate": 3.979515879379895e-06, "loss": 0.3075, "step": 37532 }, { "epoch": 1.7224083337157543, "grad_norm": 0.44474998116493225, "learning_rate": 3.979275853732009e-06, "loss": 0.3105, "step": 37533 }, { "epoch": 1.7224542242210088, "grad_norm": 0.45281049609184265, "learning_rate": 3.979035830538643e-06, "loss": 0.3415, "step": 37534 }, { "epoch": 1.7225001147262633, "grad_norm": 0.48055335879325867, "learning_rate": 3.9787958098003736e-06, "loss": 0.4269, "step": 37535 }, { "epoch": 1.7225460052315176, "grad_norm": 0.4424835443496704, "learning_rate": 3.978555791517777e-06, "loss": 0.2859, "step": 37536 }, { "epoch": 1.722591895736772, "grad_norm": 0.5206233859062195, "learning_rate": 3.978315775691433e-06, "loss": 0.432, "step": 37537 }, { "epoch": 1.7226377862420266, "grad_norm": 0.480898380279541, "learning_rate": 3.978075762321917e-06, "loss": 0.3403, "step": 37538 }, { "epoch": 1.7226836767472808, "grad_norm": 0.4597509503364563, "learning_rate": 3.977835751409804e-06, "loss": 0.3443, "step": 37539 }, { "epoch": 1.7227295672525353, "grad_norm": 0.4521420896053314, "learning_rate": 3.977595742955676e-06, "loss": 0.349, "step": 37540 }, { "epoch": 1.7227754577577898, "grad_norm": 0.5329156517982483, "learning_rate": 3.9773557369601065e-06, "loss": 0.3258, "step": 37541 }, { "epoch": 1.7228213482630443, "grad_norm": 0.4797748327255249, "learning_rate": 3.977115733423673e-06, "loss": 0.377, "step": 37542 }, { "epoch": 1.7228672387682988, "grad_norm": 0.4856626093387604, "learning_rate": 3.9768757323469545e-06, "loss": 0.4252, "step": 37543 }, { "epoch": 1.7229131292735533, "grad_norm": 0.4284609258174896, "learning_rate": 3.976635733730527e-06, "loss": 0.2918, "step": 37544 }, { "epoch": 1.7229590197788078, "grad_norm": 0.45278462767601013, "learning_rate": 3.976395737574967e-06, "loss": 0.3045, "step": 37545 }, { "epoch": 1.7230049102840623, "grad_norm": 0.4712797999382019, "learning_rate": 3.976155743880853e-06, "loss": 0.3507, "step": 37546 }, { "epoch": 1.7230508007893168, "grad_norm": 0.44265469908714294, "learning_rate": 3.975915752648761e-06, "loss": 0.3118, "step": 37547 }, { "epoch": 1.723096691294571, "grad_norm": 0.5240386724472046, "learning_rate": 3.975675763879266e-06, "loss": 0.4425, "step": 37548 }, { "epoch": 1.7231425817998256, "grad_norm": 0.4849725663661957, "learning_rate": 3.97543577757295e-06, "loss": 0.3769, "step": 37549 }, { "epoch": 1.72318847230508, "grad_norm": 0.42730268836021423, "learning_rate": 3.975195793730387e-06, "loss": 0.2778, "step": 37550 }, { "epoch": 1.7232343628103346, "grad_norm": 0.4534895718097687, "learning_rate": 3.974955812352155e-06, "loss": 0.3372, "step": 37551 }, { "epoch": 1.7232802533155889, "grad_norm": 0.4546175003051758, "learning_rate": 3.974715833438831e-06, "loss": 0.3271, "step": 37552 }, { "epoch": 1.7233261438208434, "grad_norm": 0.44587600231170654, "learning_rate": 3.974475856990992e-06, "loss": 0.3213, "step": 37553 }, { "epoch": 1.7233720343260979, "grad_norm": 0.47271448373794556, "learning_rate": 3.974235883009213e-06, "loss": 0.3754, "step": 37554 }, { "epoch": 1.7234179248313524, "grad_norm": 0.456817090511322, "learning_rate": 3.973995911494074e-06, "loss": 0.332, "step": 37555 }, { "epoch": 1.7234638153366069, "grad_norm": 0.5080356001853943, "learning_rate": 3.973755942446152e-06, "loss": 0.4085, "step": 37556 }, { "epoch": 1.7235097058418614, "grad_norm": 0.46196117997169495, "learning_rate": 3.973515975866021e-06, "loss": 0.3553, "step": 37557 }, { "epoch": 1.7235555963471159, "grad_norm": 0.48965105414390564, "learning_rate": 3.9732760117542626e-06, "loss": 0.3845, "step": 37558 }, { "epoch": 1.7236014868523704, "grad_norm": 0.4894275367259979, "learning_rate": 3.973036050111451e-06, "loss": 0.3812, "step": 37559 }, { "epoch": 1.7236473773576249, "grad_norm": 0.475813090801239, "learning_rate": 3.972796090938163e-06, "loss": 0.3675, "step": 37560 }, { "epoch": 1.7236932678628791, "grad_norm": 0.513232409954071, "learning_rate": 3.972556134234975e-06, "loss": 0.3494, "step": 37561 }, { "epoch": 1.7237391583681336, "grad_norm": 0.47769877314567566, "learning_rate": 3.972316180002466e-06, "loss": 0.3701, "step": 37562 }, { "epoch": 1.7237850488733881, "grad_norm": 0.46627339720726013, "learning_rate": 3.972076228241213e-06, "loss": 0.3765, "step": 37563 }, { "epoch": 1.7238309393786424, "grad_norm": 0.4687279462814331, "learning_rate": 3.971836278951791e-06, "loss": 0.3838, "step": 37564 }, { "epoch": 1.723876829883897, "grad_norm": 0.4852904975414276, "learning_rate": 3.971596332134779e-06, "loss": 0.3742, "step": 37565 }, { "epoch": 1.7239227203891514, "grad_norm": 0.48786213994026184, "learning_rate": 3.9713563877907545e-06, "loss": 0.3496, "step": 37566 }, { "epoch": 1.723968610894406, "grad_norm": 0.4731210172176361, "learning_rate": 3.971116445920291e-06, "loss": 0.3234, "step": 37567 }, { "epoch": 1.7240145013996604, "grad_norm": 0.46663519740104675, "learning_rate": 3.970876506523969e-06, "loss": 0.3229, "step": 37568 }, { "epoch": 1.724060391904915, "grad_norm": 0.46064475178718567, "learning_rate": 3.970636569602364e-06, "loss": 0.3446, "step": 37569 }, { "epoch": 1.7241062824101694, "grad_norm": 0.44724735617637634, "learning_rate": 3.970396635156052e-06, "loss": 0.3074, "step": 37570 }, { "epoch": 1.7241521729154239, "grad_norm": 0.4779791831970215, "learning_rate": 3.970156703185613e-06, "loss": 0.4032, "step": 37571 }, { "epoch": 1.7241980634206784, "grad_norm": 0.4674805998802185, "learning_rate": 3.969916773691621e-06, "loss": 0.3605, "step": 37572 }, { "epoch": 1.7242439539259329, "grad_norm": 0.46409332752227783, "learning_rate": 3.969676846674654e-06, "loss": 0.3939, "step": 37573 }, { "epoch": 1.7242898444311872, "grad_norm": 0.4558377265930176, "learning_rate": 3.969436922135289e-06, "loss": 0.3379, "step": 37574 }, { "epoch": 1.7243357349364417, "grad_norm": 0.4678764045238495, "learning_rate": 3.9691970000741045e-06, "loss": 0.3789, "step": 37575 }, { "epoch": 1.7243816254416962, "grad_norm": 0.491243839263916, "learning_rate": 3.968957080491674e-06, "loss": 0.3562, "step": 37576 }, { "epoch": 1.7244275159469504, "grad_norm": 0.5193041563034058, "learning_rate": 3.968717163388576e-06, "loss": 0.3935, "step": 37577 }, { "epoch": 1.724473406452205, "grad_norm": 0.48554202914237976, "learning_rate": 3.96847724876539e-06, "loss": 0.4166, "step": 37578 }, { "epoch": 1.7245192969574594, "grad_norm": 0.46968919038772583, "learning_rate": 3.968237336622688e-06, "loss": 0.3632, "step": 37579 }, { "epoch": 1.724565187462714, "grad_norm": 0.48958495259284973, "learning_rate": 3.967997426961052e-06, "loss": 0.3468, "step": 37580 }, { "epoch": 1.7246110779679684, "grad_norm": 0.47594383358955383, "learning_rate": 3.967757519781056e-06, "loss": 0.3825, "step": 37581 }, { "epoch": 1.724656968473223, "grad_norm": 0.4329048991203308, "learning_rate": 3.967517615083275e-06, "loss": 0.2605, "step": 37582 }, { "epoch": 1.7247028589784774, "grad_norm": 0.48690247535705566, "learning_rate": 3.967277712868291e-06, "loss": 0.3682, "step": 37583 }, { "epoch": 1.724748749483732, "grad_norm": 0.43930667638778687, "learning_rate": 3.967037813136677e-06, "loss": 0.2801, "step": 37584 }, { "epoch": 1.7247946399889864, "grad_norm": 0.5230001211166382, "learning_rate": 3.966797915889011e-06, "loss": 0.423, "step": 37585 }, { "epoch": 1.7248405304942407, "grad_norm": 0.44060665369033813, "learning_rate": 3.96655802112587e-06, "loss": 0.2968, "step": 37586 }, { "epoch": 1.7248864209994952, "grad_norm": 0.5006231069564819, "learning_rate": 3.966318128847832e-06, "loss": 0.4017, "step": 37587 }, { "epoch": 1.7249323115047497, "grad_norm": 0.586255669593811, "learning_rate": 3.96607823905547e-06, "loss": 0.3447, "step": 37588 }, { "epoch": 1.7249782020100042, "grad_norm": 0.4721508324146271, "learning_rate": 3.965838351749367e-06, "loss": 0.3599, "step": 37589 }, { "epoch": 1.7250240925152585, "grad_norm": 0.46438029408454895, "learning_rate": 3.9655984669300955e-06, "loss": 0.3333, "step": 37590 }, { "epoch": 1.725069983020513, "grad_norm": 0.5636918544769287, "learning_rate": 3.9653585845982304e-06, "loss": 0.3103, "step": 37591 }, { "epoch": 1.7251158735257675, "grad_norm": 0.48524752259254456, "learning_rate": 3.965118704754354e-06, "loss": 0.3317, "step": 37592 }, { "epoch": 1.725161764031022, "grad_norm": 0.4652059078216553, "learning_rate": 3.964878827399039e-06, "loss": 0.347, "step": 37593 }, { "epoch": 1.7252076545362764, "grad_norm": 0.49668341875076294, "learning_rate": 3.964638952532866e-06, "loss": 0.3466, "step": 37594 }, { "epoch": 1.725253545041531, "grad_norm": 0.5006192922592163, "learning_rate": 3.964399080156407e-06, "loss": 0.3853, "step": 37595 }, { "epoch": 1.7252994355467854, "grad_norm": 0.4834129512310028, "learning_rate": 3.9641592102702435e-06, "loss": 0.4054, "step": 37596 }, { "epoch": 1.72534532605204, "grad_norm": 0.45366448163986206, "learning_rate": 3.963919342874949e-06, "loss": 0.3282, "step": 37597 }, { "epoch": 1.7253912165572944, "grad_norm": 0.5185109972953796, "learning_rate": 3.963679477971099e-06, "loss": 0.4652, "step": 37598 }, { "epoch": 1.7254371070625487, "grad_norm": 0.47021418809890747, "learning_rate": 3.963439615559277e-06, "loss": 0.3185, "step": 37599 }, { "epoch": 1.7254829975678032, "grad_norm": 0.4304482936859131, "learning_rate": 3.963199755640053e-06, "loss": 0.3021, "step": 37600 }, { "epoch": 1.7255288880730577, "grad_norm": 0.46824756264686584, "learning_rate": 3.962959898214008e-06, "loss": 0.3588, "step": 37601 }, { "epoch": 1.725574778578312, "grad_norm": 0.46092677116394043, "learning_rate": 3.9627200432817165e-06, "loss": 0.3285, "step": 37602 }, { "epoch": 1.7256206690835665, "grad_norm": 0.4817688465118408, "learning_rate": 3.962480190843757e-06, "loss": 0.2963, "step": 37603 }, { "epoch": 1.725666559588821, "grad_norm": 0.46024349331855774, "learning_rate": 3.962240340900701e-06, "loss": 0.323, "step": 37604 }, { "epoch": 1.7257124500940755, "grad_norm": 0.45891401171684265, "learning_rate": 3.962000493453133e-06, "loss": 0.3325, "step": 37605 }, { "epoch": 1.72575834059933, "grad_norm": 0.44002100825309753, "learning_rate": 3.961760648501627e-06, "loss": 0.3108, "step": 37606 }, { "epoch": 1.7258042311045845, "grad_norm": 0.4563017785549164, "learning_rate": 3.961520806046757e-06, "loss": 0.3751, "step": 37607 }, { "epoch": 1.725850121609839, "grad_norm": 0.45861729979515076, "learning_rate": 3.961280966089103e-06, "loss": 0.3197, "step": 37608 }, { "epoch": 1.7258960121150935, "grad_norm": 0.44539135694503784, "learning_rate": 3.9610411286292396e-06, "loss": 0.3256, "step": 37609 }, { "epoch": 1.725941902620348, "grad_norm": 0.4862193167209625, "learning_rate": 3.960801293667743e-06, "loss": 0.3344, "step": 37610 }, { "epoch": 1.7259877931256025, "grad_norm": 0.5091336965560913, "learning_rate": 3.960561461205195e-06, "loss": 0.3597, "step": 37611 }, { "epoch": 1.7260336836308567, "grad_norm": 0.4904113709926605, "learning_rate": 3.960321631242167e-06, "loss": 0.4448, "step": 37612 }, { "epoch": 1.7260795741361112, "grad_norm": 0.46362507343292236, "learning_rate": 3.960081803779235e-06, "loss": 0.3664, "step": 37613 }, { "epoch": 1.7261254646413657, "grad_norm": 0.46124547719955444, "learning_rate": 3.9598419788169804e-06, "loss": 0.3519, "step": 37614 }, { "epoch": 1.72617135514662, "grad_norm": 0.43686968088150024, "learning_rate": 3.959602156355977e-06, "loss": 0.2956, "step": 37615 }, { "epoch": 1.7262172456518745, "grad_norm": 0.44320154190063477, "learning_rate": 3.959362336396802e-06, "loss": 0.2927, "step": 37616 }, { "epoch": 1.726263136157129, "grad_norm": 0.4559209942817688, "learning_rate": 3.959122518940033e-06, "loss": 0.3176, "step": 37617 }, { "epoch": 1.7263090266623835, "grad_norm": 0.45440229773521423, "learning_rate": 3.958882703986245e-06, "loss": 0.3099, "step": 37618 }, { "epoch": 1.726354917167638, "grad_norm": 0.47660914063453674, "learning_rate": 3.958642891536015e-06, "loss": 0.355, "step": 37619 }, { "epoch": 1.7264008076728925, "grad_norm": 0.49120286107063293, "learning_rate": 3.958403081589921e-06, "loss": 0.4522, "step": 37620 }, { "epoch": 1.726446698178147, "grad_norm": 0.4136184751987457, "learning_rate": 3.958163274148539e-06, "loss": 0.2641, "step": 37621 }, { "epoch": 1.7264925886834015, "grad_norm": 0.426470011472702, "learning_rate": 3.9579234692124445e-06, "loss": 0.2966, "step": 37622 }, { "epoch": 1.726538479188656, "grad_norm": 0.48396599292755127, "learning_rate": 3.957683666782216e-06, "loss": 0.3776, "step": 37623 }, { "epoch": 1.7265843696939105, "grad_norm": 0.5111770033836365, "learning_rate": 3.95744386685843e-06, "loss": 0.3858, "step": 37624 }, { "epoch": 1.7266302601991648, "grad_norm": 0.49605560302734375, "learning_rate": 3.957204069441663e-06, "loss": 0.3924, "step": 37625 }, { "epoch": 1.7266761507044193, "grad_norm": 0.5005434155464172, "learning_rate": 3.9569642745324875e-06, "loss": 0.3049, "step": 37626 }, { "epoch": 1.7267220412096738, "grad_norm": 0.4701997637748718, "learning_rate": 3.956724482131487e-06, "loss": 0.3478, "step": 37627 }, { "epoch": 1.726767931714928, "grad_norm": 0.4853617250919342, "learning_rate": 3.9564846922392335e-06, "loss": 0.3837, "step": 37628 }, { "epoch": 1.7268138222201825, "grad_norm": 0.4502929151058197, "learning_rate": 3.956244904856304e-06, "loss": 0.2981, "step": 37629 }, { "epoch": 1.726859712725437, "grad_norm": 0.47922638058662415, "learning_rate": 3.9560051199832786e-06, "loss": 0.3876, "step": 37630 }, { "epoch": 1.7269056032306915, "grad_norm": 0.48648831248283386, "learning_rate": 3.9557653376207306e-06, "loss": 0.398, "step": 37631 }, { "epoch": 1.726951493735946, "grad_norm": 0.5228362083435059, "learning_rate": 3.9555255577692354e-06, "loss": 0.3621, "step": 37632 }, { "epoch": 1.7269973842412005, "grad_norm": 0.49243059754371643, "learning_rate": 3.955285780429375e-06, "loss": 0.379, "step": 37633 }, { "epoch": 1.727043274746455, "grad_norm": 0.4640369713306427, "learning_rate": 3.9550460056017205e-06, "loss": 0.371, "step": 37634 }, { "epoch": 1.7270891652517095, "grad_norm": 0.45532137155532837, "learning_rate": 3.95480623328685e-06, "loss": 0.3516, "step": 37635 }, { "epoch": 1.727135055756964, "grad_norm": 0.4885496199131012, "learning_rate": 3.954566463485342e-06, "loss": 0.4224, "step": 37636 }, { "epoch": 1.7271809462622183, "grad_norm": 0.4878074824810028, "learning_rate": 3.954326696197771e-06, "loss": 0.3815, "step": 37637 }, { "epoch": 1.7272268367674728, "grad_norm": 0.4718996584415436, "learning_rate": 3.9540869314247134e-06, "loss": 0.3504, "step": 37638 }, { "epoch": 1.7272727272727273, "grad_norm": 0.48672056198120117, "learning_rate": 3.953847169166748e-06, "loss": 0.3842, "step": 37639 }, { "epoch": 1.7273186177779818, "grad_norm": 0.4533197283744812, "learning_rate": 3.9536074094244506e-06, "loss": 0.3049, "step": 37640 }, { "epoch": 1.727364508283236, "grad_norm": 0.5071361064910889, "learning_rate": 3.953367652198395e-06, "loss": 0.4153, "step": 37641 }, { "epoch": 1.7274103987884906, "grad_norm": 0.4767429828643799, "learning_rate": 3.953127897489161e-06, "loss": 0.3705, "step": 37642 }, { "epoch": 1.727456289293745, "grad_norm": 0.5048102736473083, "learning_rate": 3.952888145297324e-06, "loss": 0.4114, "step": 37643 }, { "epoch": 1.7275021797989996, "grad_norm": 0.44948458671569824, "learning_rate": 3.95264839562346e-06, "loss": 0.3199, "step": 37644 }, { "epoch": 1.727548070304254, "grad_norm": 0.44682055711746216, "learning_rate": 3.952408648468147e-06, "loss": 0.3363, "step": 37645 }, { "epoch": 1.7275939608095086, "grad_norm": 0.4394311308860779, "learning_rate": 3.952168903831961e-06, "loss": 0.3313, "step": 37646 }, { "epoch": 1.727639851314763, "grad_norm": 0.4514541029930115, "learning_rate": 3.951929161715476e-06, "loss": 0.308, "step": 37647 }, { "epoch": 1.7276857418200176, "grad_norm": 0.47513025999069214, "learning_rate": 3.951689422119272e-06, "loss": 0.4092, "step": 37648 }, { "epoch": 1.727731632325272, "grad_norm": 0.4893389940261841, "learning_rate": 3.951449685043924e-06, "loss": 0.4037, "step": 37649 }, { "epoch": 1.7277775228305263, "grad_norm": 0.4138801395893097, "learning_rate": 3.951209950490008e-06, "loss": 0.2879, "step": 37650 }, { "epoch": 1.7278234133357808, "grad_norm": 0.47534239292144775, "learning_rate": 3.950970218458103e-06, "loss": 0.3527, "step": 37651 }, { "epoch": 1.7278693038410353, "grad_norm": 0.4440337121486664, "learning_rate": 3.950730488948782e-06, "loss": 0.3022, "step": 37652 }, { "epoch": 1.7279151943462896, "grad_norm": 0.4509468972682953, "learning_rate": 3.950490761962621e-06, "loss": 0.3574, "step": 37653 }, { "epoch": 1.727961084851544, "grad_norm": 0.44328397512435913, "learning_rate": 3.950251037500201e-06, "loss": 0.3021, "step": 37654 }, { "epoch": 1.7280069753567986, "grad_norm": 0.4531576931476593, "learning_rate": 3.950011315562097e-06, "loss": 0.3178, "step": 37655 }, { "epoch": 1.728052865862053, "grad_norm": 0.46454399824142456, "learning_rate": 3.949771596148881e-06, "loss": 0.344, "step": 37656 }, { "epoch": 1.7280987563673076, "grad_norm": 0.4286445379257202, "learning_rate": 3.949531879261136e-06, "loss": 0.2772, "step": 37657 }, { "epoch": 1.728144646872562, "grad_norm": 0.43322786688804626, "learning_rate": 3.949292164899434e-06, "loss": 0.2975, "step": 37658 }, { "epoch": 1.7281905373778166, "grad_norm": 0.4554278254508972, "learning_rate": 3.9490524530643536e-06, "loss": 0.3424, "step": 37659 }, { "epoch": 1.728236427883071, "grad_norm": 0.46346452832221985, "learning_rate": 3.948812743756469e-06, "loss": 0.3086, "step": 37660 }, { "epoch": 1.7282823183883256, "grad_norm": 0.46050119400024414, "learning_rate": 3.948573036976359e-06, "loss": 0.33, "step": 37661 }, { "epoch": 1.72832820889358, "grad_norm": 0.4652964770793915, "learning_rate": 3.9483333327246e-06, "loss": 0.3424, "step": 37662 }, { "epoch": 1.7283740993988344, "grad_norm": 0.5044764280319214, "learning_rate": 3.948093631001763e-06, "loss": 0.4018, "step": 37663 }, { "epoch": 1.7284199899040888, "grad_norm": 0.45207908749580383, "learning_rate": 3.947853931808433e-06, "loss": 0.3416, "step": 37664 }, { "epoch": 1.7284658804093433, "grad_norm": 0.47938260436058044, "learning_rate": 3.947614235145182e-06, "loss": 0.3747, "step": 37665 }, { "epoch": 1.7285117709145976, "grad_norm": 0.46454739570617676, "learning_rate": 3.947374541012584e-06, "loss": 0.3637, "step": 37666 }, { "epoch": 1.7285576614198521, "grad_norm": 0.46638867259025574, "learning_rate": 3.9471348494112205e-06, "loss": 0.3425, "step": 37667 }, { "epoch": 1.7286035519251066, "grad_norm": 0.5074900388717651, "learning_rate": 3.946895160341665e-06, "loss": 0.3478, "step": 37668 }, { "epoch": 1.7286494424303611, "grad_norm": 0.45006459951400757, "learning_rate": 3.946655473804492e-06, "loss": 0.3087, "step": 37669 }, { "epoch": 1.7286953329356156, "grad_norm": 0.43827497959136963, "learning_rate": 3.946415789800283e-06, "loss": 0.3041, "step": 37670 }, { "epoch": 1.7287412234408701, "grad_norm": 0.4709852635860443, "learning_rate": 3.946176108329611e-06, "loss": 0.3781, "step": 37671 }, { "epoch": 1.7287871139461246, "grad_norm": 0.4881029725074768, "learning_rate": 3.945936429393051e-06, "loss": 0.4109, "step": 37672 }, { "epoch": 1.728833004451379, "grad_norm": 0.4328114986419678, "learning_rate": 3.945696752991183e-06, "loss": 0.3104, "step": 37673 }, { "epoch": 1.7288788949566336, "grad_norm": 0.4756580591201782, "learning_rate": 3.945457079124582e-06, "loss": 0.3628, "step": 37674 }, { "epoch": 1.7289247854618879, "grad_norm": 0.4868132472038269, "learning_rate": 3.9452174077938216e-06, "loss": 0.3697, "step": 37675 }, { "epoch": 1.7289706759671424, "grad_norm": 0.5099665522575378, "learning_rate": 3.944977738999482e-06, "loss": 0.3589, "step": 37676 }, { "epoch": 1.7290165664723969, "grad_norm": 0.5095929503440857, "learning_rate": 3.9447380727421395e-06, "loss": 0.3257, "step": 37677 }, { "epoch": 1.7290624569776514, "grad_norm": 0.49904799461364746, "learning_rate": 3.944498409022366e-06, "loss": 0.3728, "step": 37678 }, { "epoch": 1.7291083474829056, "grad_norm": 0.5024864673614502, "learning_rate": 3.944258747840742e-06, "loss": 0.343, "step": 37679 }, { "epoch": 1.7291542379881601, "grad_norm": 0.5154918432235718, "learning_rate": 3.944019089197842e-06, "loss": 0.3713, "step": 37680 }, { "epoch": 1.7292001284934146, "grad_norm": 0.4523521065711975, "learning_rate": 3.943779433094244e-06, "loss": 0.3316, "step": 37681 }, { "epoch": 1.7292460189986691, "grad_norm": 0.48860740661621094, "learning_rate": 3.943539779530522e-06, "loss": 0.3721, "step": 37682 }, { "epoch": 1.7292919095039236, "grad_norm": 0.4496825933456421, "learning_rate": 3.943300128507255e-06, "loss": 0.3331, "step": 37683 }, { "epoch": 1.7293378000091781, "grad_norm": 0.50135737657547, "learning_rate": 3.943060480025015e-06, "loss": 0.3845, "step": 37684 }, { "epoch": 1.7293836905144326, "grad_norm": 0.4964587390422821, "learning_rate": 3.942820834084382e-06, "loss": 0.3659, "step": 37685 }, { "epoch": 1.7294295810196871, "grad_norm": 0.45602425932884216, "learning_rate": 3.942581190685932e-06, "loss": 0.338, "step": 37686 }, { "epoch": 1.7294754715249416, "grad_norm": 0.4918994605541229, "learning_rate": 3.94234154983024e-06, "loss": 0.3833, "step": 37687 }, { "epoch": 1.729521362030196, "grad_norm": 0.44704511761665344, "learning_rate": 3.942101911517883e-06, "loss": 0.3239, "step": 37688 }, { "epoch": 1.7295672525354504, "grad_norm": 0.4794505536556244, "learning_rate": 3.941862275749438e-06, "loss": 0.4064, "step": 37689 }, { "epoch": 1.729613143040705, "grad_norm": 0.44472235441207886, "learning_rate": 3.941622642525479e-06, "loss": 0.2972, "step": 37690 }, { "epoch": 1.7296590335459592, "grad_norm": 0.4283420443534851, "learning_rate": 3.941383011846583e-06, "loss": 0.3, "step": 37691 }, { "epoch": 1.7297049240512137, "grad_norm": 0.4611454904079437, "learning_rate": 3.941143383713327e-06, "loss": 0.3427, "step": 37692 }, { "epoch": 1.7297508145564682, "grad_norm": 0.4861791431903839, "learning_rate": 3.9409037581262885e-06, "loss": 0.3976, "step": 37693 }, { "epoch": 1.7297967050617227, "grad_norm": 0.4751395881175995, "learning_rate": 3.94066413508604e-06, "loss": 0.4043, "step": 37694 }, { "epoch": 1.7298425955669772, "grad_norm": 0.5078848600387573, "learning_rate": 3.940424514593162e-06, "loss": 0.4517, "step": 37695 }, { "epoch": 1.7298884860722317, "grad_norm": 0.5129518508911133, "learning_rate": 3.940184896648228e-06, "loss": 0.4431, "step": 37696 }, { "epoch": 1.7299343765774862, "grad_norm": 0.4845578968524933, "learning_rate": 3.939945281251812e-06, "loss": 0.3846, "step": 37697 }, { "epoch": 1.7299802670827407, "grad_norm": 0.5167884230613708, "learning_rate": 3.939705668404497e-06, "loss": 0.444, "step": 37698 }, { "epoch": 1.7300261575879952, "grad_norm": 0.44016215205192566, "learning_rate": 3.939466058106855e-06, "loss": 0.299, "step": 37699 }, { "epoch": 1.7300720480932497, "grad_norm": 0.5090606212615967, "learning_rate": 3.93922645035946e-06, "loss": 0.3952, "step": 37700 }, { "epoch": 1.730117938598504, "grad_norm": 0.5415244698524475, "learning_rate": 3.938986845162892e-06, "loss": 0.4439, "step": 37701 }, { "epoch": 1.7301638291037584, "grad_norm": 0.4660992920398712, "learning_rate": 3.938747242517726e-06, "loss": 0.3299, "step": 37702 }, { "epoch": 1.730209719609013, "grad_norm": 0.49812474846839905, "learning_rate": 3.938507642424536e-06, "loss": 0.3593, "step": 37703 }, { "epoch": 1.7302556101142672, "grad_norm": 0.49513715505599976, "learning_rate": 3.938268044883903e-06, "loss": 0.4197, "step": 37704 }, { "epoch": 1.7303015006195217, "grad_norm": 0.4584709703922272, "learning_rate": 3.938028449896399e-06, "loss": 0.2836, "step": 37705 }, { "epoch": 1.7303473911247762, "grad_norm": 0.4992996156215668, "learning_rate": 3.9377888574626e-06, "loss": 0.4415, "step": 37706 }, { "epoch": 1.7303932816300307, "grad_norm": 0.4919486939907074, "learning_rate": 3.937549267583085e-06, "loss": 0.4121, "step": 37707 }, { "epoch": 1.7304391721352852, "grad_norm": 0.45202216506004333, "learning_rate": 3.93730968025843e-06, "loss": 0.3438, "step": 37708 }, { "epoch": 1.7304850626405397, "grad_norm": 0.46126124262809753, "learning_rate": 3.937070095489207e-06, "loss": 0.3484, "step": 37709 }, { "epoch": 1.7305309531457942, "grad_norm": 0.5129637718200684, "learning_rate": 3.9368305132759966e-06, "loss": 0.4102, "step": 37710 }, { "epoch": 1.7305768436510487, "grad_norm": 0.4908868968486786, "learning_rate": 3.936590933619374e-06, "loss": 0.412, "step": 37711 }, { "epoch": 1.7306227341563032, "grad_norm": 0.48706531524658203, "learning_rate": 3.936351356519912e-06, "loss": 0.3735, "step": 37712 }, { "epoch": 1.7306686246615577, "grad_norm": 0.4465389549732208, "learning_rate": 3.936111781978191e-06, "loss": 0.3509, "step": 37713 }, { "epoch": 1.730714515166812, "grad_norm": 0.46393975615501404, "learning_rate": 3.935872209994785e-06, "loss": 0.3484, "step": 37714 }, { "epoch": 1.7307604056720665, "grad_norm": 0.4549098014831543, "learning_rate": 3.935632640570271e-06, "loss": 0.3192, "step": 37715 }, { "epoch": 1.730806296177321, "grad_norm": 0.45554542541503906, "learning_rate": 3.935393073705225e-06, "loss": 0.3158, "step": 37716 }, { "epoch": 1.7308521866825752, "grad_norm": 0.4349183738231659, "learning_rate": 3.935153509400222e-06, "loss": 0.2848, "step": 37717 }, { "epoch": 1.7308980771878297, "grad_norm": 0.46370723843574524, "learning_rate": 3.9349139476558365e-06, "loss": 0.3544, "step": 37718 }, { "epoch": 1.7309439676930842, "grad_norm": 0.45701155066490173, "learning_rate": 3.93467438847265e-06, "loss": 0.3235, "step": 37719 }, { "epoch": 1.7309898581983387, "grad_norm": 0.4520294964313507, "learning_rate": 3.934434831851235e-06, "loss": 0.3273, "step": 37720 }, { "epoch": 1.7310357487035932, "grad_norm": 0.4941861629486084, "learning_rate": 3.9341952777921665e-06, "loss": 0.3496, "step": 37721 }, { "epoch": 1.7310816392088477, "grad_norm": 0.5370991230010986, "learning_rate": 3.933955726296023e-06, "loss": 0.3949, "step": 37722 }, { "epoch": 1.7311275297141022, "grad_norm": 0.5831584930419922, "learning_rate": 3.9337161773633795e-06, "loss": 0.3201, "step": 37723 }, { "epoch": 1.7311734202193567, "grad_norm": 0.5078880190849304, "learning_rate": 3.933476630994812e-06, "loss": 0.4142, "step": 37724 }, { "epoch": 1.7312193107246112, "grad_norm": 0.5126383900642395, "learning_rate": 3.933237087190896e-06, "loss": 0.3703, "step": 37725 }, { "epoch": 1.7312652012298655, "grad_norm": 0.45886334776878357, "learning_rate": 3.932997545952209e-06, "loss": 0.3131, "step": 37726 }, { "epoch": 1.73131109173512, "grad_norm": 0.4797188639640808, "learning_rate": 3.9327580072793266e-06, "loss": 0.3468, "step": 37727 }, { "epoch": 1.7313569822403745, "grad_norm": 0.454182505607605, "learning_rate": 3.932518471172822e-06, "loss": 0.3686, "step": 37728 }, { "epoch": 1.731402872745629, "grad_norm": 0.4468729794025421, "learning_rate": 3.9322789376332755e-06, "loss": 0.3082, "step": 37729 }, { "epoch": 1.7314487632508833, "grad_norm": 0.4744400978088379, "learning_rate": 3.932039406661261e-06, "loss": 0.3309, "step": 37730 }, { "epoch": 1.7314946537561378, "grad_norm": 0.46409016847610474, "learning_rate": 3.931799878257354e-06, "loss": 0.3167, "step": 37731 }, { "epoch": 1.7315405442613923, "grad_norm": 0.44316425919532776, "learning_rate": 3.931560352422132e-06, "loss": 0.3327, "step": 37732 }, { "epoch": 1.7315864347666468, "grad_norm": 0.4968279302120209, "learning_rate": 3.93132082915617e-06, "loss": 0.3943, "step": 37733 }, { "epoch": 1.7316323252719013, "grad_norm": 0.5221280455589294, "learning_rate": 3.931081308460042e-06, "loss": 0.4385, "step": 37734 }, { "epoch": 1.7316782157771557, "grad_norm": 0.5274427533149719, "learning_rate": 3.930841790334329e-06, "loss": 0.4187, "step": 37735 }, { "epoch": 1.7317241062824102, "grad_norm": 0.5154575109481812, "learning_rate": 3.930602274779604e-06, "loss": 0.3648, "step": 37736 }, { "epoch": 1.7317699967876647, "grad_norm": 0.4670393764972687, "learning_rate": 3.930362761796441e-06, "loss": 0.3628, "step": 37737 }, { "epoch": 1.7318158872929192, "grad_norm": 0.46661749482154846, "learning_rate": 3.930123251385419e-06, "loss": 0.3605, "step": 37738 }, { "epoch": 1.7318617777981735, "grad_norm": 0.45243117213249207, "learning_rate": 3.929883743547114e-06, "loss": 0.3389, "step": 37739 }, { "epoch": 1.731907668303428, "grad_norm": 0.5083144903182983, "learning_rate": 3.929644238282098e-06, "loss": 0.4425, "step": 37740 }, { "epoch": 1.7319535588086825, "grad_norm": 0.48450958728790283, "learning_rate": 3.929404735590951e-06, "loss": 0.3812, "step": 37741 }, { "epoch": 1.7319994493139368, "grad_norm": 0.45165616273880005, "learning_rate": 3.929165235474249e-06, "loss": 0.3324, "step": 37742 }, { "epoch": 1.7320453398191913, "grad_norm": 0.47355028986930847, "learning_rate": 3.928925737932566e-06, "loss": 0.3723, "step": 37743 }, { "epoch": 1.7320912303244458, "grad_norm": 0.47502601146698, "learning_rate": 3.928686242966478e-06, "loss": 0.4058, "step": 37744 }, { "epoch": 1.7321371208297003, "grad_norm": 0.4867856204509735, "learning_rate": 3.9284467505765625e-06, "loss": 0.3834, "step": 37745 }, { "epoch": 1.7321830113349548, "grad_norm": 0.46754777431488037, "learning_rate": 3.928207260763394e-06, "loss": 0.366, "step": 37746 }, { "epoch": 1.7322289018402093, "grad_norm": 0.42955413460731506, "learning_rate": 3.927967773527549e-06, "loss": 0.2755, "step": 37747 }, { "epoch": 1.7322747923454638, "grad_norm": 0.45646870136260986, "learning_rate": 3.927728288869603e-06, "loss": 0.3253, "step": 37748 }, { "epoch": 1.7323206828507183, "grad_norm": 0.48685693740844727, "learning_rate": 3.9274888067901305e-06, "loss": 0.3828, "step": 37749 }, { "epoch": 1.7323665733559728, "grad_norm": 0.4678124189376831, "learning_rate": 3.927249327289711e-06, "loss": 0.3494, "step": 37750 }, { "epoch": 1.7324124638612273, "grad_norm": 0.5095535516738892, "learning_rate": 3.9270098503689165e-06, "loss": 0.4217, "step": 37751 }, { "epoch": 1.7324583543664815, "grad_norm": 0.4811328947544098, "learning_rate": 3.926770376028326e-06, "loss": 0.3633, "step": 37752 }, { "epoch": 1.732504244871736, "grad_norm": 0.4730220139026642, "learning_rate": 3.926530904268514e-06, "loss": 0.3438, "step": 37753 }, { "epoch": 1.7325501353769905, "grad_norm": 0.4898549020290375, "learning_rate": 3.926291435090057e-06, "loss": 0.4089, "step": 37754 }, { "epoch": 1.7325960258822448, "grad_norm": 0.4313989579677582, "learning_rate": 3.926051968493527e-06, "loss": 0.2905, "step": 37755 }, { "epoch": 1.7326419163874993, "grad_norm": 0.47588685154914856, "learning_rate": 3.9258125044795056e-06, "loss": 0.332, "step": 37756 }, { "epoch": 1.7326878068927538, "grad_norm": 0.4357886016368866, "learning_rate": 3.925573043048567e-06, "loss": 0.3184, "step": 37757 }, { "epoch": 1.7327336973980083, "grad_norm": 0.471975177526474, "learning_rate": 3.925333584201285e-06, "loss": 0.3612, "step": 37758 }, { "epoch": 1.7327795879032628, "grad_norm": 0.5200879573822021, "learning_rate": 3.925094127938236e-06, "loss": 0.3357, "step": 37759 }, { "epoch": 1.7328254784085173, "grad_norm": 0.5017543435096741, "learning_rate": 3.924854674259998e-06, "loss": 0.3778, "step": 37760 }, { "epoch": 1.7328713689137718, "grad_norm": 0.47081589698791504, "learning_rate": 3.9246152231671445e-06, "loss": 0.3762, "step": 37761 }, { "epoch": 1.7329172594190263, "grad_norm": 0.4721234142780304, "learning_rate": 3.92437577466025e-06, "loss": 0.345, "step": 37762 }, { "epoch": 1.7329631499242808, "grad_norm": 0.447214275598526, "learning_rate": 3.924136328739893e-06, "loss": 0.3457, "step": 37763 }, { "epoch": 1.733009040429535, "grad_norm": 0.4461555778980255, "learning_rate": 3.923896885406651e-06, "loss": 0.3062, "step": 37764 }, { "epoch": 1.7330549309347896, "grad_norm": 0.4548856019973755, "learning_rate": 3.923657444661095e-06, "loss": 0.2986, "step": 37765 }, { "epoch": 1.733100821440044, "grad_norm": 0.4415822923183441, "learning_rate": 3.923418006503805e-06, "loss": 0.3201, "step": 37766 }, { "epoch": 1.7331467119452986, "grad_norm": 0.4948274791240692, "learning_rate": 3.923178570935354e-06, "loss": 0.3761, "step": 37767 }, { "epoch": 1.7331926024505528, "grad_norm": 0.5065581798553467, "learning_rate": 3.922939137956317e-06, "loss": 0.4441, "step": 37768 }, { "epoch": 1.7332384929558073, "grad_norm": 0.47803249955177307, "learning_rate": 3.9226997075672745e-06, "loss": 0.3648, "step": 37769 }, { "epoch": 1.7332843834610618, "grad_norm": 0.4461725652217865, "learning_rate": 3.922460279768798e-06, "loss": 0.3262, "step": 37770 }, { "epoch": 1.7333302739663163, "grad_norm": 0.44367727637290955, "learning_rate": 3.9222208545614625e-06, "loss": 0.2871, "step": 37771 }, { "epoch": 1.7333761644715708, "grad_norm": 0.47054678201675415, "learning_rate": 3.921981431945848e-06, "loss": 0.3315, "step": 37772 }, { "epoch": 1.7334220549768253, "grad_norm": 0.4531335234642029, "learning_rate": 3.921742011922527e-06, "loss": 0.3242, "step": 37773 }, { "epoch": 1.7334679454820798, "grad_norm": 0.4755464494228363, "learning_rate": 3.9215025944920765e-06, "loss": 0.3813, "step": 37774 }, { "epoch": 1.7335138359873343, "grad_norm": 0.4542332887649536, "learning_rate": 3.921263179655072e-06, "loss": 0.3454, "step": 37775 }, { "epoch": 1.7335597264925888, "grad_norm": 0.517070472240448, "learning_rate": 3.92102376741209e-06, "loss": 0.4176, "step": 37776 }, { "epoch": 1.733605616997843, "grad_norm": 0.49232161045074463, "learning_rate": 3.9207843577637015e-06, "loss": 0.3553, "step": 37777 }, { "epoch": 1.7336515075030976, "grad_norm": 0.4544234871864319, "learning_rate": 3.92054495071049e-06, "loss": 0.3456, "step": 37778 }, { "epoch": 1.733697398008352, "grad_norm": 0.4365238845348358, "learning_rate": 3.920305546253026e-06, "loss": 0.2888, "step": 37779 }, { "epoch": 1.7337432885136064, "grad_norm": 0.4615754187107086, "learning_rate": 3.920066144391886e-06, "loss": 0.3342, "step": 37780 }, { "epoch": 1.7337891790188609, "grad_norm": 0.43486255407333374, "learning_rate": 3.919826745127647e-06, "loss": 0.314, "step": 37781 }, { "epoch": 1.7338350695241154, "grad_norm": 0.42503219842910767, "learning_rate": 3.919587348460883e-06, "loss": 0.2808, "step": 37782 }, { "epoch": 1.7338809600293699, "grad_norm": 0.4473850429058075, "learning_rate": 3.9193479543921695e-06, "loss": 0.3265, "step": 37783 }, { "epoch": 1.7339268505346244, "grad_norm": 0.509564995765686, "learning_rate": 3.919108562922084e-06, "loss": 0.4426, "step": 37784 }, { "epoch": 1.7339727410398789, "grad_norm": 0.4935932457447052, "learning_rate": 3.918869174051202e-06, "loss": 0.3907, "step": 37785 }, { "epoch": 1.7340186315451334, "grad_norm": 0.47518447041511536, "learning_rate": 3.918629787780098e-06, "loss": 0.3655, "step": 37786 }, { "epoch": 1.7340645220503879, "grad_norm": 0.46330776810646057, "learning_rate": 3.918390404109348e-06, "loss": 0.2894, "step": 37787 }, { "epoch": 1.7341104125556424, "grad_norm": 0.4758063554763794, "learning_rate": 3.918151023039528e-06, "loss": 0.3599, "step": 37788 }, { "epoch": 1.7341563030608969, "grad_norm": 0.4561474323272705, "learning_rate": 3.917911644571214e-06, "loss": 0.3115, "step": 37789 }, { "epoch": 1.7342021935661511, "grad_norm": 0.4917207360267639, "learning_rate": 3.917672268704979e-06, "loss": 0.3709, "step": 37790 }, { "epoch": 1.7342480840714056, "grad_norm": 0.43501728773117065, "learning_rate": 3.917432895441402e-06, "loss": 0.3161, "step": 37791 }, { "epoch": 1.7342939745766601, "grad_norm": 0.5072606205940247, "learning_rate": 3.917193524781058e-06, "loss": 0.4011, "step": 37792 }, { "epoch": 1.7343398650819144, "grad_norm": 0.4619543254375458, "learning_rate": 3.916954156724518e-06, "loss": 0.3611, "step": 37793 }, { "epoch": 1.734385755587169, "grad_norm": 0.4629977345466614, "learning_rate": 3.916714791272365e-06, "loss": 0.3181, "step": 37794 }, { "epoch": 1.7344316460924234, "grad_norm": 0.49492716789245605, "learning_rate": 3.916475428425171e-06, "loss": 0.4117, "step": 37795 }, { "epoch": 1.734477536597678, "grad_norm": 0.4954071640968323, "learning_rate": 3.9162360681835095e-06, "loss": 0.41, "step": 37796 }, { "epoch": 1.7345234271029324, "grad_norm": 0.4620422422885895, "learning_rate": 3.9159967105479604e-06, "loss": 0.3468, "step": 37797 }, { "epoch": 1.734569317608187, "grad_norm": 0.4937390685081482, "learning_rate": 3.915757355519097e-06, "loss": 0.3839, "step": 37798 }, { "epoch": 1.7346152081134414, "grad_norm": 0.4778202772140503, "learning_rate": 3.915518003097492e-06, "loss": 0.3784, "step": 37799 }, { "epoch": 1.7346610986186959, "grad_norm": 0.49725303053855896, "learning_rate": 3.915278653283726e-06, "loss": 0.3772, "step": 37800 }, { "epoch": 1.7347069891239504, "grad_norm": 0.4928148686885834, "learning_rate": 3.915039306078373e-06, "loss": 0.4283, "step": 37801 }, { "epoch": 1.7347528796292049, "grad_norm": 0.4940696358680725, "learning_rate": 3.914799961482008e-06, "loss": 0.3998, "step": 37802 }, { "epoch": 1.7347987701344592, "grad_norm": 0.5216817259788513, "learning_rate": 3.914560619495206e-06, "loss": 0.371, "step": 37803 }, { "epoch": 1.7348446606397137, "grad_norm": 0.48414674401283264, "learning_rate": 3.914321280118544e-06, "loss": 0.3836, "step": 37804 }, { "epoch": 1.7348905511449682, "grad_norm": 0.48860999941825867, "learning_rate": 3.914081943352594e-06, "loss": 0.3954, "step": 37805 }, { "epoch": 1.7349364416502224, "grad_norm": 0.44745299220085144, "learning_rate": 3.913842609197937e-06, "loss": 0.3297, "step": 37806 }, { "epoch": 1.734982332155477, "grad_norm": 0.4577964246273041, "learning_rate": 3.913603277655145e-06, "loss": 0.3199, "step": 37807 }, { "epoch": 1.7350282226607314, "grad_norm": 0.43561914563179016, "learning_rate": 3.913363948724794e-06, "loss": 0.2885, "step": 37808 }, { "epoch": 1.735074113165986, "grad_norm": 0.4532536566257477, "learning_rate": 3.91312462240746e-06, "loss": 0.3231, "step": 37809 }, { "epoch": 1.7351200036712404, "grad_norm": 0.4688645601272583, "learning_rate": 3.9128852987037184e-06, "loss": 0.3641, "step": 37810 }, { "epoch": 1.735165894176495, "grad_norm": 0.5063457489013672, "learning_rate": 3.912645977614143e-06, "loss": 0.3543, "step": 37811 }, { "epoch": 1.7352117846817494, "grad_norm": 0.5305487513542175, "learning_rate": 3.912406659139314e-06, "loss": 0.4038, "step": 37812 }, { "epoch": 1.735257675187004, "grad_norm": 0.5343021154403687, "learning_rate": 3.9121673432798015e-06, "loss": 0.3274, "step": 37813 }, { "epoch": 1.7353035656922584, "grad_norm": 0.4330636262893677, "learning_rate": 3.911928030036182e-06, "loss": 0.3117, "step": 37814 }, { "epoch": 1.7353494561975127, "grad_norm": 0.4431147575378418, "learning_rate": 3.911688719409034e-06, "loss": 0.2912, "step": 37815 }, { "epoch": 1.7353953467027672, "grad_norm": 0.45175331830978394, "learning_rate": 3.911449411398932e-06, "loss": 0.3238, "step": 37816 }, { "epoch": 1.7354412372080217, "grad_norm": 0.49868541955947876, "learning_rate": 3.911210106006449e-06, "loss": 0.4066, "step": 37817 }, { "epoch": 1.7354871277132762, "grad_norm": 0.4436154365539551, "learning_rate": 3.910970803232163e-06, "loss": 0.301, "step": 37818 }, { "epoch": 1.7355330182185305, "grad_norm": 0.4512239992618561, "learning_rate": 3.910731503076648e-06, "loss": 0.3159, "step": 37819 }, { "epoch": 1.735578908723785, "grad_norm": 0.4715718924999237, "learning_rate": 3.910492205540479e-06, "loss": 0.3429, "step": 37820 }, { "epoch": 1.7356247992290394, "grad_norm": 0.4154037535190582, "learning_rate": 3.910252910624234e-06, "loss": 0.2823, "step": 37821 }, { "epoch": 1.735670689734294, "grad_norm": 0.45371514558792114, "learning_rate": 3.910013618328487e-06, "loss": 0.3496, "step": 37822 }, { "epoch": 1.7357165802395484, "grad_norm": 0.5113012194633484, "learning_rate": 3.909774328653812e-06, "loss": 0.4332, "step": 37823 }, { "epoch": 1.735762470744803, "grad_norm": 0.46074530482292175, "learning_rate": 3.909535041600786e-06, "loss": 0.3601, "step": 37824 }, { "epoch": 1.7358083612500574, "grad_norm": 0.4375602900981903, "learning_rate": 3.909295757169985e-06, "loss": 0.2642, "step": 37825 }, { "epoch": 1.735854251755312, "grad_norm": 0.48137366771698, "learning_rate": 3.909056475361983e-06, "loss": 0.3385, "step": 37826 }, { "epoch": 1.7359001422605664, "grad_norm": 0.4824962317943573, "learning_rate": 3.908817196177353e-06, "loss": 0.364, "step": 37827 }, { "epoch": 1.7359460327658207, "grad_norm": 0.468479186296463, "learning_rate": 3.9085779196166775e-06, "loss": 0.3318, "step": 37828 }, { "epoch": 1.7359919232710752, "grad_norm": 0.47652795910835266, "learning_rate": 3.908338645680526e-06, "loss": 0.3164, "step": 37829 }, { "epoch": 1.7360378137763297, "grad_norm": 0.4329882860183716, "learning_rate": 3.908099374369475e-06, "loss": 0.3087, "step": 37830 }, { "epoch": 1.736083704281584, "grad_norm": 0.49736976623535156, "learning_rate": 3.907860105684101e-06, "loss": 0.4145, "step": 37831 }, { "epoch": 1.7361295947868385, "grad_norm": 0.4967638850212097, "learning_rate": 3.907620839624979e-06, "loss": 0.3636, "step": 37832 }, { "epoch": 1.736175485292093, "grad_norm": 0.48689496517181396, "learning_rate": 3.907381576192682e-06, "loss": 0.3768, "step": 37833 }, { "epoch": 1.7362213757973475, "grad_norm": 0.4676749110221863, "learning_rate": 3.907142315387791e-06, "loss": 0.3757, "step": 37834 }, { "epoch": 1.736267266302602, "grad_norm": 0.4551714062690735, "learning_rate": 3.906903057210876e-06, "loss": 0.3577, "step": 37835 }, { "epoch": 1.7363131568078565, "grad_norm": 0.43191415071487427, "learning_rate": 3.906663801662513e-06, "loss": 0.3272, "step": 37836 }, { "epoch": 1.736359047313111, "grad_norm": 0.5016480684280396, "learning_rate": 3.90642454874328e-06, "loss": 0.4546, "step": 37837 }, { "epoch": 1.7364049378183655, "grad_norm": 0.48935431241989136, "learning_rate": 3.906185298453751e-06, "loss": 0.4106, "step": 37838 }, { "epoch": 1.73645082832362, "grad_norm": 0.46445828676223755, "learning_rate": 3.9059460507945e-06, "loss": 0.3447, "step": 37839 }, { "epoch": 1.7364967188288745, "grad_norm": 0.46387794613838196, "learning_rate": 3.905706805766105e-06, "loss": 0.3653, "step": 37840 }, { "epoch": 1.7365426093341287, "grad_norm": 0.45771172642707825, "learning_rate": 3.905467563369139e-06, "loss": 0.3098, "step": 37841 }, { "epoch": 1.7365884998393832, "grad_norm": 0.5334724187850952, "learning_rate": 3.905228323604176e-06, "loss": 0.4786, "step": 37842 }, { "epoch": 1.7366343903446377, "grad_norm": 0.4668572247028351, "learning_rate": 3.904989086471796e-06, "loss": 0.3292, "step": 37843 }, { "epoch": 1.736680280849892, "grad_norm": 0.460836261510849, "learning_rate": 3.9047498519725714e-06, "loss": 0.2868, "step": 37844 }, { "epoch": 1.7367261713551465, "grad_norm": 0.46095919609069824, "learning_rate": 3.9045106201070764e-06, "loss": 0.3335, "step": 37845 }, { "epoch": 1.736772061860401, "grad_norm": 0.47260582447052, "learning_rate": 3.904271390875889e-06, "loss": 0.3854, "step": 37846 }, { "epoch": 1.7368179523656555, "grad_norm": 0.4793805181980133, "learning_rate": 3.904032164279583e-06, "loss": 0.361, "step": 37847 }, { "epoch": 1.73686384287091, "grad_norm": 0.4562554359436035, "learning_rate": 3.903792940318732e-06, "loss": 0.3444, "step": 37848 }, { "epoch": 1.7369097333761645, "grad_norm": 0.4470905363559723, "learning_rate": 3.903553718993915e-06, "loss": 0.3289, "step": 37849 }, { "epoch": 1.736955623881419, "grad_norm": 0.48299121856689453, "learning_rate": 3.903314500305704e-06, "loss": 0.3625, "step": 37850 }, { "epoch": 1.7370015143866735, "grad_norm": 0.4796854555606842, "learning_rate": 3.903075284254676e-06, "loss": 0.3558, "step": 37851 }, { "epoch": 1.737047404891928, "grad_norm": 0.4653775691986084, "learning_rate": 3.902836070841406e-06, "loss": 0.3361, "step": 37852 }, { "epoch": 1.7370932953971823, "grad_norm": 0.4681016504764557, "learning_rate": 3.902596860066469e-06, "loss": 0.343, "step": 37853 }, { "epoch": 1.7371391859024368, "grad_norm": 0.7698867321014404, "learning_rate": 3.902357651930441e-06, "loss": 0.3819, "step": 37854 }, { "epoch": 1.7371850764076913, "grad_norm": 0.4971826374530792, "learning_rate": 3.902118446433893e-06, "loss": 0.389, "step": 37855 }, { "epoch": 1.7372309669129458, "grad_norm": 0.4718954265117645, "learning_rate": 3.901879243577408e-06, "loss": 0.3738, "step": 37856 }, { "epoch": 1.7372768574182, "grad_norm": 0.5012049674987793, "learning_rate": 3.901640043361556e-06, "loss": 0.443, "step": 37857 }, { "epoch": 1.7373227479234545, "grad_norm": 0.4853041470050812, "learning_rate": 3.90140084578691e-06, "loss": 0.3881, "step": 37858 }, { "epoch": 1.737368638428709, "grad_norm": 0.45671334862709045, "learning_rate": 3.90116165085405e-06, "loss": 0.3253, "step": 37859 }, { "epoch": 1.7374145289339635, "grad_norm": 0.5007564425468445, "learning_rate": 3.900922458563551e-06, "loss": 0.3469, "step": 37860 }, { "epoch": 1.737460419439218, "grad_norm": 0.44194164872169495, "learning_rate": 3.9006832689159845e-06, "loss": 0.3252, "step": 37861 }, { "epoch": 1.7375063099444725, "grad_norm": 0.46637746691703796, "learning_rate": 3.9004440819119285e-06, "loss": 0.3578, "step": 37862 }, { "epoch": 1.737552200449727, "grad_norm": 0.4666733741760254, "learning_rate": 3.900204897551958e-06, "loss": 0.3566, "step": 37863 }, { "epoch": 1.7375980909549815, "grad_norm": 0.4632923901081085, "learning_rate": 3.899965715836645e-06, "loss": 0.3248, "step": 37864 }, { "epoch": 1.737643981460236, "grad_norm": 0.45875582098960876, "learning_rate": 3.89972653676657e-06, "loss": 0.3055, "step": 37865 }, { "epoch": 1.7376898719654903, "grad_norm": 0.4747883081436157, "learning_rate": 3.899487360342305e-06, "loss": 0.3704, "step": 37866 }, { "epoch": 1.7377357624707448, "grad_norm": 0.45304328203201294, "learning_rate": 3.899248186564425e-06, "loss": 0.3078, "step": 37867 }, { "epoch": 1.7377816529759993, "grad_norm": 0.5072611570358276, "learning_rate": 3.899009015433506e-06, "loss": 0.4126, "step": 37868 }, { "epoch": 1.7378275434812536, "grad_norm": 0.4828420877456665, "learning_rate": 3.8987698469501236e-06, "loss": 0.3757, "step": 37869 }, { "epoch": 1.737873433986508, "grad_norm": 0.44230419397354126, "learning_rate": 3.898530681114849e-06, "loss": 0.3161, "step": 37870 }, { "epoch": 1.7379193244917626, "grad_norm": 0.4832528233528137, "learning_rate": 3.898291517928264e-06, "loss": 0.3794, "step": 37871 }, { "epoch": 1.737965214997017, "grad_norm": 0.49374139308929443, "learning_rate": 3.8980523573909394e-06, "loss": 0.361, "step": 37872 }, { "epoch": 1.7380111055022716, "grad_norm": 0.5273470282554626, "learning_rate": 3.8978131995034504e-06, "loss": 0.3839, "step": 37873 }, { "epoch": 1.738056996007526, "grad_norm": 0.46764346957206726, "learning_rate": 3.897574044266373e-06, "loss": 0.3841, "step": 37874 }, { "epoch": 1.7381028865127806, "grad_norm": 0.4615870416164398, "learning_rate": 3.897334891680283e-06, "loss": 0.354, "step": 37875 }, { "epoch": 1.738148777018035, "grad_norm": 0.48016002774238586, "learning_rate": 3.897095741745752e-06, "loss": 0.3849, "step": 37876 }, { "epoch": 1.7381946675232895, "grad_norm": 0.4906712472438812, "learning_rate": 3.89685659446336e-06, "loss": 0.373, "step": 37877 }, { "epoch": 1.738240558028544, "grad_norm": 0.4949273467063904, "learning_rate": 3.896617449833681e-06, "loss": 0.4256, "step": 37878 }, { "epoch": 1.7382864485337983, "grad_norm": 0.4806094765663147, "learning_rate": 3.896378307857285e-06, "loss": 0.3768, "step": 37879 }, { "epoch": 1.7383323390390528, "grad_norm": 0.47996315360069275, "learning_rate": 3.8961391685347536e-06, "loss": 0.3747, "step": 37880 }, { "epoch": 1.7383782295443073, "grad_norm": 0.45010027289390564, "learning_rate": 3.895900031866659e-06, "loss": 0.3464, "step": 37881 }, { "epoch": 1.7384241200495616, "grad_norm": 0.48724475502967834, "learning_rate": 3.895660897853575e-06, "loss": 0.4224, "step": 37882 }, { "epoch": 1.738470010554816, "grad_norm": 0.4635736644268036, "learning_rate": 3.895421766496079e-06, "loss": 0.3609, "step": 37883 }, { "epoch": 1.7385159010600706, "grad_norm": 0.42947572469711304, "learning_rate": 3.895182637794746e-06, "loss": 0.3239, "step": 37884 }, { "epoch": 1.738561791565325, "grad_norm": 0.4659508466720581, "learning_rate": 3.8949435117501465e-06, "loss": 0.3622, "step": 37885 }, { "epoch": 1.7386076820705796, "grad_norm": 0.528142511844635, "learning_rate": 3.8947043883628626e-06, "loss": 0.4175, "step": 37886 }, { "epoch": 1.738653572575834, "grad_norm": 0.44024258852005005, "learning_rate": 3.894465267633465e-06, "loss": 0.3277, "step": 37887 }, { "epoch": 1.7386994630810886, "grad_norm": 0.47341403365135193, "learning_rate": 3.894226149562529e-06, "loss": 0.3291, "step": 37888 }, { "epoch": 1.738745353586343, "grad_norm": 0.4834555685520172, "learning_rate": 3.89398703415063e-06, "loss": 0.3265, "step": 37889 }, { "epoch": 1.7387912440915976, "grad_norm": 0.44103726744651794, "learning_rate": 3.893747921398344e-06, "loss": 0.3373, "step": 37890 }, { "epoch": 1.738837134596852, "grad_norm": 0.4566248953342438, "learning_rate": 3.893508811306245e-06, "loss": 0.3048, "step": 37891 }, { "epoch": 1.7388830251021063, "grad_norm": 0.519572377204895, "learning_rate": 3.893269703874906e-06, "loss": 0.427, "step": 37892 }, { "epoch": 1.7389289156073608, "grad_norm": 0.48425421118736267, "learning_rate": 3.893030599104905e-06, "loss": 0.3811, "step": 37893 }, { "epoch": 1.7389748061126153, "grad_norm": 0.4950339198112488, "learning_rate": 3.892791496996818e-06, "loss": 0.3774, "step": 37894 }, { "epoch": 1.7390206966178696, "grad_norm": 0.48766353726387024, "learning_rate": 3.892552397551216e-06, "loss": 0.3651, "step": 37895 }, { "epoch": 1.7390665871231241, "grad_norm": 0.48620134592056274, "learning_rate": 3.892313300768677e-06, "loss": 0.3561, "step": 37896 }, { "epoch": 1.7391124776283786, "grad_norm": 0.4747154116630554, "learning_rate": 3.892074206649775e-06, "loss": 0.3152, "step": 37897 }, { "epoch": 1.7391583681336331, "grad_norm": 0.4623135030269623, "learning_rate": 3.891835115195083e-06, "loss": 0.3372, "step": 37898 }, { "epoch": 1.7392042586388876, "grad_norm": 0.4672326147556305, "learning_rate": 3.89159602640518e-06, "loss": 0.3594, "step": 37899 }, { "epoch": 1.739250149144142, "grad_norm": 0.49602916836738586, "learning_rate": 3.891356940280639e-06, "loss": 0.3748, "step": 37900 }, { "epoch": 1.7392960396493966, "grad_norm": 0.5135919451713562, "learning_rate": 3.891117856822032e-06, "loss": 0.4026, "step": 37901 }, { "epoch": 1.739341930154651, "grad_norm": 0.4555191993713379, "learning_rate": 3.890878776029938e-06, "loss": 0.3252, "step": 37902 }, { "epoch": 1.7393878206599056, "grad_norm": 0.4648953676223755, "learning_rate": 3.890639697904932e-06, "loss": 0.3169, "step": 37903 }, { "epoch": 1.7394337111651599, "grad_norm": 0.4850652515888214, "learning_rate": 3.890400622447586e-06, "loss": 0.4249, "step": 37904 }, { "epoch": 1.7394796016704144, "grad_norm": 0.45225778222084045, "learning_rate": 3.8901615496584764e-06, "loss": 0.3069, "step": 37905 }, { "epoch": 1.7395254921756689, "grad_norm": 0.4264868199825287, "learning_rate": 3.889922479538178e-06, "loss": 0.2836, "step": 37906 }, { "epoch": 1.7395713826809232, "grad_norm": 0.4594583213329315, "learning_rate": 3.889683412087264e-06, "loss": 0.3166, "step": 37907 }, { "epoch": 1.7396172731861776, "grad_norm": 0.44186604022979736, "learning_rate": 3.889444347306313e-06, "loss": 0.3094, "step": 37908 }, { "epoch": 1.7396631636914321, "grad_norm": 0.470024436712265, "learning_rate": 3.889205285195898e-06, "loss": 0.341, "step": 37909 }, { "epoch": 1.7397090541966866, "grad_norm": 0.5121283531188965, "learning_rate": 3.888966225756593e-06, "loss": 0.4008, "step": 37910 }, { "epoch": 1.7397549447019411, "grad_norm": 0.5117537975311279, "learning_rate": 3.888727168988974e-06, "loss": 0.4012, "step": 37911 }, { "epoch": 1.7398008352071956, "grad_norm": 0.5018085837364197, "learning_rate": 3.888488114893616e-06, "loss": 0.4497, "step": 37912 }, { "epoch": 1.7398467257124501, "grad_norm": 0.4352177083492279, "learning_rate": 3.888249063471091e-06, "loss": 0.2829, "step": 37913 }, { "epoch": 1.7398926162177046, "grad_norm": 0.4980044364929199, "learning_rate": 3.888010014721978e-06, "loss": 0.433, "step": 37914 }, { "epoch": 1.7399385067229591, "grad_norm": 0.49845001101493835, "learning_rate": 3.887770968646849e-06, "loss": 0.4092, "step": 37915 }, { "epoch": 1.7399843972282136, "grad_norm": 0.47982701659202576, "learning_rate": 3.887531925246281e-06, "loss": 0.3676, "step": 37916 }, { "epoch": 1.740030287733468, "grad_norm": 0.44795164465904236, "learning_rate": 3.8872928845208465e-06, "loss": 0.3471, "step": 37917 }, { "epoch": 1.7400761782387224, "grad_norm": 0.4580914080142975, "learning_rate": 3.887053846471122e-06, "loss": 0.3083, "step": 37918 }, { "epoch": 1.740122068743977, "grad_norm": 0.5032056570053101, "learning_rate": 3.886814811097683e-06, "loss": 0.4782, "step": 37919 }, { "epoch": 1.7401679592492312, "grad_norm": 0.49358004331588745, "learning_rate": 3.8865757784011e-06, "loss": 0.3868, "step": 37920 }, { "epoch": 1.7402138497544857, "grad_norm": 0.4595424234867096, "learning_rate": 3.886336748381953e-06, "loss": 0.3818, "step": 37921 }, { "epoch": 1.7402597402597402, "grad_norm": 0.48590585589408875, "learning_rate": 3.886097721040816e-06, "loss": 0.4278, "step": 37922 }, { "epoch": 1.7403056307649947, "grad_norm": 0.4943716526031494, "learning_rate": 3.885858696378258e-06, "loss": 0.4097, "step": 37923 }, { "epoch": 1.7403515212702492, "grad_norm": 0.4562951624393463, "learning_rate": 3.885619674394862e-06, "loss": 0.3746, "step": 37924 }, { "epoch": 1.7403974117755037, "grad_norm": 0.4828827977180481, "learning_rate": 3.885380655091198e-06, "loss": 0.3777, "step": 37925 }, { "epoch": 1.7404433022807582, "grad_norm": 0.45610445737838745, "learning_rate": 3.885141638467841e-06, "loss": 0.3376, "step": 37926 }, { "epoch": 1.7404891927860127, "grad_norm": 0.45092371106147766, "learning_rate": 3.884902624525367e-06, "loss": 0.318, "step": 37927 }, { "epoch": 1.7405350832912672, "grad_norm": 0.4492829740047455, "learning_rate": 3.884663613264351e-06, "loss": 0.2999, "step": 37928 }, { "epoch": 1.7405809737965217, "grad_norm": 0.4760184586048126, "learning_rate": 3.884424604685365e-06, "loss": 0.3597, "step": 37929 }, { "epoch": 1.740626864301776, "grad_norm": 0.4797089099884033, "learning_rate": 3.884185598788988e-06, "loss": 0.4083, "step": 37930 }, { "epoch": 1.7406727548070304, "grad_norm": 0.42375004291534424, "learning_rate": 3.883946595575792e-06, "loss": 0.261, "step": 37931 }, { "epoch": 1.740718645312285, "grad_norm": 0.44299307465553284, "learning_rate": 3.883707595046351e-06, "loss": 0.3042, "step": 37932 }, { "epoch": 1.7407645358175392, "grad_norm": 0.5549852848052979, "learning_rate": 3.883468597201241e-06, "loss": 0.4073, "step": 37933 }, { "epoch": 1.7408104263227937, "grad_norm": 0.4480770528316498, "learning_rate": 3.883229602041038e-06, "loss": 0.3319, "step": 37934 }, { "epoch": 1.7408563168280482, "grad_norm": 0.4775012135505676, "learning_rate": 3.882990609566314e-06, "loss": 0.349, "step": 37935 }, { "epoch": 1.7409022073333027, "grad_norm": 0.45379960536956787, "learning_rate": 3.882751619777645e-06, "loss": 0.3435, "step": 37936 }, { "epoch": 1.7409480978385572, "grad_norm": 0.44584015011787415, "learning_rate": 3.882512632675608e-06, "loss": 0.3017, "step": 37937 }, { "epoch": 1.7409939883438117, "grad_norm": 0.48601266741752625, "learning_rate": 3.882273648260773e-06, "loss": 0.4027, "step": 37938 }, { "epoch": 1.7410398788490662, "grad_norm": 0.45220714807510376, "learning_rate": 3.882034666533718e-06, "loss": 0.314, "step": 37939 }, { "epoch": 1.7410857693543207, "grad_norm": 0.47872194647789, "learning_rate": 3.881795687495017e-06, "loss": 0.4294, "step": 37940 }, { "epoch": 1.7411316598595752, "grad_norm": 0.5045514702796936, "learning_rate": 3.881556711145242e-06, "loss": 0.4023, "step": 37941 }, { "epoch": 1.7411775503648295, "grad_norm": 0.496366947889328, "learning_rate": 3.881317737484973e-06, "loss": 0.4119, "step": 37942 }, { "epoch": 1.741223440870084, "grad_norm": 0.4768097996711731, "learning_rate": 3.881078766514782e-06, "loss": 0.4053, "step": 37943 }, { "epoch": 1.7412693313753385, "grad_norm": 0.4879123866558075, "learning_rate": 3.880839798235242e-06, "loss": 0.4046, "step": 37944 }, { "epoch": 1.741315221880593, "grad_norm": 0.4374123811721802, "learning_rate": 3.880600832646929e-06, "loss": 0.3049, "step": 37945 }, { "epoch": 1.7413611123858472, "grad_norm": 0.45772024989128113, "learning_rate": 3.880361869750419e-06, "loss": 0.3988, "step": 37946 }, { "epoch": 1.7414070028911017, "grad_norm": 0.40565404295921326, "learning_rate": 3.880122909546284e-06, "loss": 0.2447, "step": 37947 }, { "epoch": 1.7414528933963562, "grad_norm": 0.47302478551864624, "learning_rate": 3.879883952035102e-06, "loss": 0.3428, "step": 37948 }, { "epoch": 1.7414987839016107, "grad_norm": 0.4119592607021332, "learning_rate": 3.879644997217444e-06, "loss": 0.2555, "step": 37949 }, { "epoch": 1.7415446744068652, "grad_norm": 0.4624257981777191, "learning_rate": 3.879406045093885e-06, "loss": 0.3596, "step": 37950 }, { "epoch": 1.7415905649121197, "grad_norm": 0.4962199926376343, "learning_rate": 3.879167095665002e-06, "loss": 0.3725, "step": 37951 }, { "epoch": 1.7416364554173742, "grad_norm": 0.47904813289642334, "learning_rate": 3.878928148931369e-06, "loss": 0.3225, "step": 37952 }, { "epoch": 1.7416823459226287, "grad_norm": 0.5103809833526611, "learning_rate": 3.8786892048935605e-06, "loss": 0.4498, "step": 37953 }, { "epoch": 1.7417282364278832, "grad_norm": 0.4917394518852234, "learning_rate": 3.8784502635521485e-06, "loss": 0.4135, "step": 37954 }, { "epoch": 1.7417741269331375, "grad_norm": 0.462625116109848, "learning_rate": 3.878211324907711e-06, "loss": 0.3163, "step": 37955 }, { "epoch": 1.741820017438392, "grad_norm": 0.44750088453292847, "learning_rate": 3.877972388960822e-06, "loss": 0.3124, "step": 37956 }, { "epoch": 1.7418659079436465, "grad_norm": 0.4470539093017578, "learning_rate": 3.877733455712052e-06, "loss": 0.3197, "step": 37957 }, { "epoch": 1.7419117984489008, "grad_norm": 0.45596978068351746, "learning_rate": 3.877494525161982e-06, "loss": 0.3208, "step": 37958 }, { "epoch": 1.7419576889541553, "grad_norm": 0.48154351115226746, "learning_rate": 3.877255597311183e-06, "loss": 0.3892, "step": 37959 }, { "epoch": 1.7420035794594098, "grad_norm": 0.4488265812397003, "learning_rate": 3.877016672160229e-06, "loss": 0.3268, "step": 37960 }, { "epoch": 1.7420494699646643, "grad_norm": 0.4380034804344177, "learning_rate": 3.876777749709696e-06, "loss": 0.3005, "step": 37961 }, { "epoch": 1.7420953604699188, "grad_norm": 0.45686468482017517, "learning_rate": 3.876538829960158e-06, "loss": 0.3474, "step": 37962 }, { "epoch": 1.7421412509751733, "grad_norm": 0.47951850295066833, "learning_rate": 3.876299912912188e-06, "loss": 0.4007, "step": 37963 }, { "epoch": 1.7421871414804277, "grad_norm": 0.43697425723075867, "learning_rate": 3.876060998566364e-06, "loss": 0.2841, "step": 37964 }, { "epoch": 1.7422330319856822, "grad_norm": 0.4656594693660736, "learning_rate": 3.875822086923258e-06, "loss": 0.3883, "step": 37965 }, { "epoch": 1.7422789224909367, "grad_norm": 0.4658859372138977, "learning_rate": 3.875583177983444e-06, "loss": 0.387, "step": 37966 }, { "epoch": 1.7423248129961912, "grad_norm": 0.4475056529045105, "learning_rate": 3.8753442717475e-06, "loss": 0.2877, "step": 37967 }, { "epoch": 1.7423707035014455, "grad_norm": 0.4683990776538849, "learning_rate": 3.875105368215997e-06, "loss": 0.3246, "step": 37968 }, { "epoch": 1.7424165940067, "grad_norm": 0.45770785212516785, "learning_rate": 3.87486646738951e-06, "loss": 0.3271, "step": 37969 }, { "epoch": 1.7424624845119545, "grad_norm": 0.49078628420829773, "learning_rate": 3.874627569268615e-06, "loss": 0.3655, "step": 37970 }, { "epoch": 1.7425083750172088, "grad_norm": 0.4394998848438263, "learning_rate": 3.874388673853885e-06, "loss": 0.3003, "step": 37971 }, { "epoch": 1.7425542655224633, "grad_norm": 0.5385946035385132, "learning_rate": 3.874149781145894e-06, "loss": 0.2971, "step": 37972 }, { "epoch": 1.7426001560277178, "grad_norm": 0.46914124488830566, "learning_rate": 3.873910891145218e-06, "loss": 0.3531, "step": 37973 }, { "epoch": 1.7426460465329723, "grad_norm": 0.4598560333251953, "learning_rate": 3.873672003852432e-06, "loss": 0.3665, "step": 37974 }, { "epoch": 1.7426919370382268, "grad_norm": 0.4359232783317566, "learning_rate": 3.8734331192681075e-06, "loss": 0.2751, "step": 37975 }, { "epoch": 1.7427378275434813, "grad_norm": 0.46403488516807556, "learning_rate": 3.873194237392823e-06, "loss": 0.371, "step": 37976 }, { "epoch": 1.7427837180487358, "grad_norm": 0.4754049777984619, "learning_rate": 3.87295535822715e-06, "loss": 0.2499, "step": 37977 }, { "epoch": 1.7428296085539903, "grad_norm": 0.5008671879768372, "learning_rate": 3.872716481771661e-06, "loss": 0.4276, "step": 37978 }, { "epoch": 1.7428754990592448, "grad_norm": 0.5081888437271118, "learning_rate": 3.872477608026936e-06, "loss": 0.4419, "step": 37979 }, { "epoch": 1.7429213895644993, "grad_norm": 0.4507344663143158, "learning_rate": 3.872238736993547e-06, "loss": 0.3403, "step": 37980 }, { "epoch": 1.7429672800697535, "grad_norm": 0.5313845872879028, "learning_rate": 3.871999868672066e-06, "loss": 0.4081, "step": 37981 }, { "epoch": 1.743013170575008, "grad_norm": 0.4921160638332367, "learning_rate": 3.87176100306307e-06, "loss": 0.4115, "step": 37982 }, { "epoch": 1.7430590610802625, "grad_norm": 0.481585830450058, "learning_rate": 3.8715221401671335e-06, "loss": 0.3867, "step": 37983 }, { "epoch": 1.7431049515855168, "grad_norm": 0.4809044301509857, "learning_rate": 3.871283279984828e-06, "loss": 0.4208, "step": 37984 }, { "epoch": 1.7431508420907713, "grad_norm": 0.5015039443969727, "learning_rate": 3.871044422516731e-06, "loss": 0.3829, "step": 37985 }, { "epoch": 1.7431967325960258, "grad_norm": 0.47089001536369324, "learning_rate": 3.870805567763417e-06, "loss": 0.3234, "step": 37986 }, { "epoch": 1.7432426231012803, "grad_norm": 0.4653850793838501, "learning_rate": 3.870566715725459e-06, "loss": 0.3586, "step": 37987 }, { "epoch": 1.7432885136065348, "grad_norm": 0.45627743005752563, "learning_rate": 3.870327866403431e-06, "loss": 0.3234, "step": 37988 }, { "epoch": 1.7433344041117893, "grad_norm": 0.4074941575527191, "learning_rate": 3.870089019797907e-06, "loss": 0.2681, "step": 37989 }, { "epoch": 1.7433802946170438, "grad_norm": 0.48102593421936035, "learning_rate": 3.8698501759094646e-06, "loss": 0.3678, "step": 37990 }, { "epoch": 1.7434261851222983, "grad_norm": 0.47702229022979736, "learning_rate": 3.869611334738673e-06, "loss": 0.3366, "step": 37991 }, { "epoch": 1.7434720756275528, "grad_norm": 0.5347232222557068, "learning_rate": 3.8693724962861116e-06, "loss": 0.3559, "step": 37992 }, { "epoch": 1.743517966132807, "grad_norm": 0.5036343932151794, "learning_rate": 3.869133660552352e-06, "loss": 0.3582, "step": 37993 }, { "epoch": 1.7435638566380616, "grad_norm": 0.4728985130786896, "learning_rate": 3.868894827537968e-06, "loss": 0.3347, "step": 37994 }, { "epoch": 1.743609747143316, "grad_norm": 0.4648227393627167, "learning_rate": 3.868655997243536e-06, "loss": 0.3324, "step": 37995 }, { "epoch": 1.7436556376485703, "grad_norm": 0.46075934171676636, "learning_rate": 3.8684171696696295e-06, "loss": 0.3398, "step": 37996 }, { "epoch": 1.7437015281538248, "grad_norm": 0.4379268288612366, "learning_rate": 3.868178344816822e-06, "loss": 0.3261, "step": 37997 }, { "epoch": 1.7437474186590793, "grad_norm": 0.4336170256137848, "learning_rate": 3.867939522685688e-06, "loss": 0.2966, "step": 37998 }, { "epoch": 1.7437933091643338, "grad_norm": 0.4741629958152771, "learning_rate": 3.8677007032768035e-06, "loss": 0.3554, "step": 37999 }, { "epoch": 1.7438391996695883, "grad_norm": 0.45813441276550293, "learning_rate": 3.867461886590738e-06, "loss": 0.3228, "step": 38000 }, { "epoch": 1.7438850901748428, "grad_norm": 0.49603018164634705, "learning_rate": 3.867223072628073e-06, "loss": 0.3591, "step": 38001 }, { "epoch": 1.7439309806800973, "grad_norm": 0.49939361214637756, "learning_rate": 3.8669842613893785e-06, "loss": 0.3934, "step": 38002 }, { "epoch": 1.7439768711853518, "grad_norm": 0.47434869408607483, "learning_rate": 3.866745452875228e-06, "loss": 0.2877, "step": 38003 }, { "epoch": 1.7440227616906063, "grad_norm": 0.4390914738178253, "learning_rate": 3.866506647086198e-06, "loss": 0.3025, "step": 38004 }, { "epoch": 1.7440686521958608, "grad_norm": 0.47859013080596924, "learning_rate": 3.866267844022862e-06, "loss": 0.4251, "step": 38005 }, { "epoch": 1.744114542701115, "grad_norm": 0.4661463499069214, "learning_rate": 3.866029043685791e-06, "loss": 0.3569, "step": 38006 }, { "epoch": 1.7441604332063696, "grad_norm": 0.4449816346168518, "learning_rate": 3.865790246075565e-06, "loss": 0.3233, "step": 38007 }, { "epoch": 1.744206323711624, "grad_norm": 0.46287795901298523, "learning_rate": 3.865551451192756e-06, "loss": 0.3712, "step": 38008 }, { "epoch": 1.7442522142168784, "grad_norm": 0.4136410057544708, "learning_rate": 3.8653126590379364e-06, "loss": 0.2648, "step": 38009 }, { "epoch": 1.7442981047221329, "grad_norm": 0.488627552986145, "learning_rate": 3.865073869611683e-06, "loss": 0.4028, "step": 38010 }, { "epoch": 1.7443439952273874, "grad_norm": 0.45532307028770447, "learning_rate": 3.8648350829145676e-06, "loss": 0.3242, "step": 38011 }, { "epoch": 1.7443898857326419, "grad_norm": 0.46150466799736023, "learning_rate": 3.864596298947164e-06, "loss": 0.3391, "step": 38012 }, { "epoch": 1.7444357762378964, "grad_norm": 0.4998036026954651, "learning_rate": 3.864357517710051e-06, "loss": 0.3808, "step": 38013 }, { "epoch": 1.7444816667431509, "grad_norm": 0.4728691875934601, "learning_rate": 3.8641187392038e-06, "loss": 0.3578, "step": 38014 }, { "epoch": 1.7445275572484054, "grad_norm": 0.45582035183906555, "learning_rate": 3.863879963428981e-06, "loss": 0.3239, "step": 38015 }, { "epoch": 1.7445734477536599, "grad_norm": 0.44844090938568115, "learning_rate": 3.863641190386176e-06, "loss": 0.332, "step": 38016 }, { "epoch": 1.7446193382589144, "grad_norm": 0.5106685161590576, "learning_rate": 3.863402420075954e-06, "loss": 0.4414, "step": 38017 }, { "epoch": 1.7446652287641689, "grad_norm": 0.4613772928714752, "learning_rate": 3.863163652498891e-06, "loss": 0.3741, "step": 38018 }, { "epoch": 1.7447111192694231, "grad_norm": 0.46264442801475525, "learning_rate": 3.8629248876555595e-06, "loss": 0.3397, "step": 38019 }, { "epoch": 1.7447570097746776, "grad_norm": 0.48016098141670227, "learning_rate": 3.862686125546536e-06, "loss": 0.3738, "step": 38020 }, { "epoch": 1.7448029002799321, "grad_norm": 0.4793284237384796, "learning_rate": 3.862447366172393e-06, "loss": 0.3708, "step": 38021 }, { "epoch": 1.7448487907851864, "grad_norm": 0.4508094787597656, "learning_rate": 3.862208609533703e-06, "loss": 0.3317, "step": 38022 }, { "epoch": 1.744894681290441, "grad_norm": 0.4547170400619507, "learning_rate": 3.861969855631045e-06, "loss": 0.335, "step": 38023 }, { "epoch": 1.7449405717956954, "grad_norm": 0.4202292263507843, "learning_rate": 3.861731104464989e-06, "loss": 0.2755, "step": 38024 }, { "epoch": 1.74498646230095, "grad_norm": 0.46441975235939026, "learning_rate": 3.861492356036111e-06, "loss": 0.3803, "step": 38025 }, { "epoch": 1.7450323528062044, "grad_norm": 0.44824299216270447, "learning_rate": 3.861253610344984e-06, "loss": 0.3549, "step": 38026 }, { "epoch": 1.7450782433114589, "grad_norm": 0.4311133325099945, "learning_rate": 3.861014867392183e-06, "loss": 0.2776, "step": 38027 }, { "epoch": 1.7451241338167134, "grad_norm": 0.49576759338378906, "learning_rate": 3.860776127178281e-06, "loss": 0.4285, "step": 38028 }, { "epoch": 1.7451700243219679, "grad_norm": 0.4565376341342926, "learning_rate": 3.860537389703854e-06, "loss": 0.3213, "step": 38029 }, { "epoch": 1.7452159148272224, "grad_norm": 0.482695072889328, "learning_rate": 3.860298654969475e-06, "loss": 0.334, "step": 38030 }, { "epoch": 1.7452618053324767, "grad_norm": 0.4559727609157562, "learning_rate": 3.8600599229757165e-06, "loss": 0.3173, "step": 38031 }, { "epoch": 1.7453076958377312, "grad_norm": 0.41081032156944275, "learning_rate": 3.859821193723155e-06, "loss": 0.2458, "step": 38032 }, { "epoch": 1.7453535863429857, "grad_norm": 0.4661165475845337, "learning_rate": 3.859582467212365e-06, "loss": 0.2931, "step": 38033 }, { "epoch": 1.7453994768482402, "grad_norm": 0.4763866364955902, "learning_rate": 3.859343743443916e-06, "loss": 0.3963, "step": 38034 }, { "epoch": 1.7454453673534944, "grad_norm": 0.4529687762260437, "learning_rate": 3.859105022418389e-06, "loss": 0.3032, "step": 38035 }, { "epoch": 1.745491257858749, "grad_norm": 0.48756447434425354, "learning_rate": 3.858866304136354e-06, "loss": 0.4326, "step": 38036 }, { "epoch": 1.7455371483640034, "grad_norm": 0.4894818365573883, "learning_rate": 3.858627588598382e-06, "loss": 0.4243, "step": 38037 }, { "epoch": 1.745583038869258, "grad_norm": 0.5077701210975647, "learning_rate": 3.858388875805054e-06, "loss": 0.4161, "step": 38038 }, { "epoch": 1.7456289293745124, "grad_norm": 0.4977981448173523, "learning_rate": 3.8581501657569384e-06, "loss": 0.3705, "step": 38039 }, { "epoch": 1.745674819879767, "grad_norm": 0.4803827702999115, "learning_rate": 3.857911458454612e-06, "loss": 0.3498, "step": 38040 }, { "epoch": 1.7457207103850214, "grad_norm": 0.4764993488788605, "learning_rate": 3.857672753898648e-06, "loss": 0.3306, "step": 38041 }, { "epoch": 1.745766600890276, "grad_norm": 0.45429080724716187, "learning_rate": 3.857434052089621e-06, "loss": 0.3558, "step": 38042 }, { "epoch": 1.7458124913955304, "grad_norm": 0.5428577065467834, "learning_rate": 3.8571953530281025e-06, "loss": 0.44, "step": 38043 }, { "epoch": 1.7458583819007847, "grad_norm": 0.4856216311454773, "learning_rate": 3.8569566567146705e-06, "loss": 0.3831, "step": 38044 }, { "epoch": 1.7459042724060392, "grad_norm": 0.6090908646583557, "learning_rate": 3.856717963149897e-06, "loss": 0.3348, "step": 38045 }, { "epoch": 1.7459501629112937, "grad_norm": 0.47545376420021057, "learning_rate": 3.856479272334355e-06, "loss": 0.3393, "step": 38046 }, { "epoch": 1.745996053416548, "grad_norm": 0.45884227752685547, "learning_rate": 3.85624058426862e-06, "loss": 0.3537, "step": 38047 }, { "epoch": 1.7460419439218025, "grad_norm": 0.5147340893745422, "learning_rate": 3.856001898953265e-06, "loss": 0.3987, "step": 38048 }, { "epoch": 1.746087834427057, "grad_norm": 0.49896541237831116, "learning_rate": 3.8557632163888635e-06, "loss": 0.3771, "step": 38049 }, { "epoch": 1.7461337249323114, "grad_norm": 0.4938095211982727, "learning_rate": 3.8555245365759915e-06, "loss": 0.3527, "step": 38050 }, { "epoch": 1.746179615437566, "grad_norm": 0.4150000512599945, "learning_rate": 3.855285859515223e-06, "loss": 0.2832, "step": 38051 }, { "epoch": 1.7462255059428204, "grad_norm": 0.48580825328826904, "learning_rate": 3.85504718520713e-06, "loss": 0.372, "step": 38052 }, { "epoch": 1.746271396448075, "grad_norm": 0.5207895040512085, "learning_rate": 3.854808513652286e-06, "loss": 0.3678, "step": 38053 }, { "epoch": 1.7463172869533294, "grad_norm": 0.44285261631011963, "learning_rate": 3.854569844851267e-06, "loss": 0.3262, "step": 38054 }, { "epoch": 1.746363177458584, "grad_norm": 0.479082316160202, "learning_rate": 3.8543311788046465e-06, "loss": 0.3351, "step": 38055 }, { "epoch": 1.7464090679638384, "grad_norm": 0.49444156885147095, "learning_rate": 3.8540925155129955e-06, "loss": 0.3765, "step": 38056 }, { "epoch": 1.7464549584690927, "grad_norm": 0.4872749447822571, "learning_rate": 3.8538538549768934e-06, "loss": 0.3967, "step": 38057 }, { "epoch": 1.7465008489743472, "grad_norm": 0.45521658658981323, "learning_rate": 3.85361519719691e-06, "loss": 0.3224, "step": 38058 }, { "epoch": 1.7465467394796017, "grad_norm": 0.4507570266723633, "learning_rate": 3.853376542173619e-06, "loss": 0.3335, "step": 38059 }, { "epoch": 1.746592629984856, "grad_norm": 0.4997755289077759, "learning_rate": 3.853137889907597e-06, "loss": 0.3853, "step": 38060 }, { "epoch": 1.7466385204901105, "grad_norm": 0.5147897601127625, "learning_rate": 3.852899240399416e-06, "loss": 0.3902, "step": 38061 }, { "epoch": 1.746684410995365, "grad_norm": 0.46618714928627014, "learning_rate": 3.852660593649649e-06, "loss": 0.3459, "step": 38062 }, { "epoch": 1.7467303015006195, "grad_norm": 0.47780168056488037, "learning_rate": 3.8524219496588735e-06, "loss": 0.3754, "step": 38063 }, { "epoch": 1.746776192005874, "grad_norm": 0.46038898825645447, "learning_rate": 3.85218330842766e-06, "loss": 0.3218, "step": 38064 }, { "epoch": 1.7468220825111285, "grad_norm": 0.4934273660182953, "learning_rate": 3.851944669956582e-06, "loss": 0.387, "step": 38065 }, { "epoch": 1.746867973016383, "grad_norm": 0.4990277588367462, "learning_rate": 3.851706034246216e-06, "loss": 0.3233, "step": 38066 }, { "epoch": 1.7469138635216375, "grad_norm": 0.44005143642425537, "learning_rate": 3.851467401297135e-06, "loss": 0.2958, "step": 38067 }, { "epoch": 1.746959754026892, "grad_norm": 0.4454624652862549, "learning_rate": 3.851228771109911e-06, "loss": 0.2985, "step": 38068 }, { "epoch": 1.7470056445321465, "grad_norm": 0.44387415051460266, "learning_rate": 3.850990143685121e-06, "loss": 0.3138, "step": 38069 }, { "epoch": 1.7470515350374007, "grad_norm": 0.4584024250507355, "learning_rate": 3.850751519023337e-06, "loss": 0.3072, "step": 38070 }, { "epoch": 1.7470974255426552, "grad_norm": 0.468189537525177, "learning_rate": 3.85051289712513e-06, "loss": 0.3715, "step": 38071 }, { "epoch": 1.7471433160479097, "grad_norm": 0.5106051564216614, "learning_rate": 3.850274277991079e-06, "loss": 0.4245, "step": 38072 }, { "epoch": 1.747189206553164, "grad_norm": 0.4444620609283447, "learning_rate": 3.850035661621756e-06, "loss": 0.3358, "step": 38073 }, { "epoch": 1.7472350970584185, "grad_norm": 0.48902514576911926, "learning_rate": 3.849797048017733e-06, "loss": 0.369, "step": 38074 }, { "epoch": 1.747280987563673, "grad_norm": 0.47217416763305664, "learning_rate": 3.8495584371795865e-06, "loss": 0.3359, "step": 38075 }, { "epoch": 1.7473268780689275, "grad_norm": 0.4627039134502411, "learning_rate": 3.849319829107888e-06, "loss": 0.3189, "step": 38076 }, { "epoch": 1.747372768574182, "grad_norm": 0.48779672384262085, "learning_rate": 3.849081223803211e-06, "loss": 0.3575, "step": 38077 }, { "epoch": 1.7474186590794365, "grad_norm": 0.4559279978275299, "learning_rate": 3.8488426212661325e-06, "loss": 0.3155, "step": 38078 }, { "epoch": 1.747464549584691, "grad_norm": 0.46457433700561523, "learning_rate": 3.848604021497224e-06, "loss": 0.3368, "step": 38079 }, { "epoch": 1.7475104400899455, "grad_norm": 0.49840980768203735, "learning_rate": 3.848365424497058e-06, "loss": 0.3647, "step": 38080 }, { "epoch": 1.7475563305952, "grad_norm": 0.4659261703491211, "learning_rate": 3.84812683026621e-06, "loss": 0.3632, "step": 38081 }, { "epoch": 1.7476022211004543, "grad_norm": 0.454520046710968, "learning_rate": 3.8478882388052555e-06, "loss": 0.326, "step": 38082 }, { "epoch": 1.7476481116057088, "grad_norm": 0.4582139849662781, "learning_rate": 3.847649650114764e-06, "loss": 0.385, "step": 38083 }, { "epoch": 1.7476940021109633, "grad_norm": 0.4651276469230652, "learning_rate": 3.8474110641953115e-06, "loss": 0.3444, "step": 38084 }, { "epoch": 1.7477398926162175, "grad_norm": 0.4231316149234772, "learning_rate": 3.8471724810474734e-06, "loss": 0.2776, "step": 38085 }, { "epoch": 1.747785783121472, "grad_norm": 0.5295804738998413, "learning_rate": 3.84693390067182e-06, "loss": 0.4412, "step": 38086 }, { "epoch": 1.7478316736267265, "grad_norm": 0.4610322415828705, "learning_rate": 3.846695323068926e-06, "loss": 0.3405, "step": 38087 }, { "epoch": 1.747877564131981, "grad_norm": 0.4613892138004303, "learning_rate": 3.846456748239367e-06, "loss": 0.3285, "step": 38088 }, { "epoch": 1.7479234546372355, "grad_norm": 0.4432855546474457, "learning_rate": 3.846218176183715e-06, "loss": 0.3348, "step": 38089 }, { "epoch": 1.74796934514249, "grad_norm": 0.49812015891075134, "learning_rate": 3.845979606902544e-06, "loss": 0.3864, "step": 38090 }, { "epoch": 1.7480152356477445, "grad_norm": 0.5012561678886414, "learning_rate": 3.8457410403964285e-06, "loss": 0.3881, "step": 38091 }, { "epoch": 1.748061126152999, "grad_norm": 0.48442694544792175, "learning_rate": 3.8455024766659415e-06, "loss": 0.3635, "step": 38092 }, { "epoch": 1.7481070166582535, "grad_norm": 0.5005574822425842, "learning_rate": 3.845263915711655e-06, "loss": 0.4273, "step": 38093 }, { "epoch": 1.748152907163508, "grad_norm": 0.46281829476356506, "learning_rate": 3.845025357534147e-06, "loss": 0.3189, "step": 38094 }, { "epoch": 1.7481987976687623, "grad_norm": 0.4967372417449951, "learning_rate": 3.8447868021339864e-06, "loss": 0.4053, "step": 38095 }, { "epoch": 1.7482446881740168, "grad_norm": 0.4903258979320526, "learning_rate": 3.8445482495117496e-06, "loss": 0.3645, "step": 38096 }, { "epoch": 1.7482905786792713, "grad_norm": 0.4525153934955597, "learning_rate": 3.8443096996680095e-06, "loss": 0.3657, "step": 38097 }, { "epoch": 1.7483364691845256, "grad_norm": 0.46167466044425964, "learning_rate": 3.8440711526033404e-06, "loss": 0.3123, "step": 38098 }, { "epoch": 1.74838235968978, "grad_norm": 0.47946059703826904, "learning_rate": 3.843832608318313e-06, "loss": 0.3762, "step": 38099 }, { "epoch": 1.7484282501950346, "grad_norm": 0.481432169675827, "learning_rate": 3.843594066813507e-06, "loss": 0.4092, "step": 38100 }, { "epoch": 1.748474140700289, "grad_norm": 0.49810469150543213, "learning_rate": 3.8433555280894915e-06, "loss": 0.3522, "step": 38101 }, { "epoch": 1.7485200312055436, "grad_norm": 0.46181628108024597, "learning_rate": 3.843116992146838e-06, "loss": 0.3354, "step": 38102 }, { "epoch": 1.748565921710798, "grad_norm": 0.4687594771385193, "learning_rate": 3.842878458986126e-06, "loss": 0.3236, "step": 38103 }, { "epoch": 1.7486118122160526, "grad_norm": 0.5257999897003174, "learning_rate": 3.842639928607925e-06, "loss": 0.345, "step": 38104 }, { "epoch": 1.748657702721307, "grad_norm": 0.4715394079685211, "learning_rate": 3.84240140101281e-06, "loss": 0.3105, "step": 38105 }, { "epoch": 1.7487035932265615, "grad_norm": 0.4646730422973633, "learning_rate": 3.842162876201354e-06, "loss": 0.3191, "step": 38106 }, { "epoch": 1.748749483731816, "grad_norm": 0.4432922303676605, "learning_rate": 3.841924354174131e-06, "loss": 0.278, "step": 38107 }, { "epoch": 1.7487953742370703, "grad_norm": 0.5247595906257629, "learning_rate": 3.8416858349317134e-06, "loss": 0.4545, "step": 38108 }, { "epoch": 1.7488412647423248, "grad_norm": 0.4653107821941376, "learning_rate": 3.841447318474677e-06, "loss": 0.3733, "step": 38109 }, { "epoch": 1.7488871552475793, "grad_norm": 0.454339861869812, "learning_rate": 3.8412088048035945e-06, "loss": 0.3437, "step": 38110 }, { "epoch": 1.7489330457528336, "grad_norm": 0.45869478583335876, "learning_rate": 3.840970293919037e-06, "loss": 0.3529, "step": 38111 }, { "epoch": 1.748978936258088, "grad_norm": 0.4230233430862427, "learning_rate": 3.840731785821583e-06, "loss": 0.2824, "step": 38112 }, { "epoch": 1.7490248267633426, "grad_norm": 0.49271175265312195, "learning_rate": 3.840493280511802e-06, "loss": 0.3704, "step": 38113 }, { "epoch": 1.749070717268597, "grad_norm": 0.466296911239624, "learning_rate": 3.840254777990266e-06, "loss": 0.3688, "step": 38114 }, { "epoch": 1.7491166077738516, "grad_norm": 0.5173819661140442, "learning_rate": 3.840016278257555e-06, "loss": 0.4086, "step": 38115 }, { "epoch": 1.749162498279106, "grad_norm": 0.4452902674674988, "learning_rate": 3.839777781314238e-06, "loss": 0.3332, "step": 38116 }, { "epoch": 1.7492083887843606, "grad_norm": 0.4867652356624603, "learning_rate": 3.83953928716089e-06, "loss": 0.375, "step": 38117 }, { "epoch": 1.749254279289615, "grad_norm": 0.4646952450275421, "learning_rate": 3.839300795798081e-06, "loss": 0.3542, "step": 38118 }, { "epoch": 1.7493001697948696, "grad_norm": 0.49498888850212097, "learning_rate": 3.839062307226389e-06, "loss": 0.4197, "step": 38119 }, { "epoch": 1.7493460603001239, "grad_norm": 0.4770100712776184, "learning_rate": 3.838823821446387e-06, "loss": 0.3374, "step": 38120 }, { "epoch": 1.7493919508053783, "grad_norm": 0.4847092032432556, "learning_rate": 3.838585338458644e-06, "loss": 0.3486, "step": 38121 }, { "epoch": 1.7494378413106328, "grad_norm": 0.5174691081047058, "learning_rate": 3.838346858263739e-06, "loss": 0.3694, "step": 38122 }, { "epoch": 1.7494837318158873, "grad_norm": 0.4751739799976349, "learning_rate": 3.838108380862243e-06, "loss": 0.3836, "step": 38123 }, { "epoch": 1.7495296223211416, "grad_norm": 0.5592171549797058, "learning_rate": 3.837869906254728e-06, "loss": 0.3699, "step": 38124 }, { "epoch": 1.7495755128263961, "grad_norm": 0.44661763310432434, "learning_rate": 3.837631434441771e-06, "loss": 0.3082, "step": 38125 }, { "epoch": 1.7496214033316506, "grad_norm": 0.4721638560295105, "learning_rate": 3.837392965423943e-06, "loss": 0.3444, "step": 38126 }, { "epoch": 1.7496672938369051, "grad_norm": 0.46432891488075256, "learning_rate": 3.837154499201818e-06, "loss": 0.3388, "step": 38127 }, { "epoch": 1.7497131843421596, "grad_norm": 0.4756113886833191, "learning_rate": 3.83691603577597e-06, "loss": 0.3784, "step": 38128 }, { "epoch": 1.749759074847414, "grad_norm": 0.46746310591697693, "learning_rate": 3.836677575146971e-06, "loss": 0.3493, "step": 38129 }, { "epoch": 1.7498049653526686, "grad_norm": 0.47583332657814026, "learning_rate": 3.836439117315393e-06, "loss": 0.3876, "step": 38130 }, { "epoch": 1.749850855857923, "grad_norm": 0.46496549248695374, "learning_rate": 3.836200662281815e-06, "loss": 0.3808, "step": 38131 }, { "epoch": 1.7498967463631776, "grad_norm": 0.4402986764907837, "learning_rate": 3.835962210046807e-06, "loss": 0.3103, "step": 38132 }, { "epoch": 1.7499426368684319, "grad_norm": 0.6384602189064026, "learning_rate": 3.835723760610941e-06, "loss": 0.3013, "step": 38133 }, { "epoch": 1.7499885273736864, "grad_norm": 0.4458513855934143, "learning_rate": 3.835485313974794e-06, "loss": 0.3268, "step": 38134 }, { "epoch": 1.7500344178789409, "grad_norm": 0.46175071597099304, "learning_rate": 3.835246870138936e-06, "loss": 0.3634, "step": 38135 }, { "epoch": 1.7500803083841951, "grad_norm": 0.4617447257041931, "learning_rate": 3.83500842910394e-06, "loss": 0.3779, "step": 38136 }, { "epoch": 1.7501261988894496, "grad_norm": 0.46795785427093506, "learning_rate": 3.834769990870383e-06, "loss": 0.4061, "step": 38137 }, { "epoch": 1.7501720893947041, "grad_norm": 0.45606985688209534, "learning_rate": 3.8345315554388364e-06, "loss": 0.318, "step": 38138 }, { "epoch": 1.7502179798999586, "grad_norm": 0.441455215215683, "learning_rate": 3.834293122809873e-06, "loss": 0.3284, "step": 38139 }, { "epoch": 1.7502638704052131, "grad_norm": 0.4359261989593506, "learning_rate": 3.8340546929840675e-06, "loss": 0.331, "step": 38140 }, { "epoch": 1.7503097609104676, "grad_norm": 0.49580463767051697, "learning_rate": 3.833816265961993e-06, "loss": 0.3246, "step": 38141 }, { "epoch": 1.7503556514157221, "grad_norm": 0.4948081374168396, "learning_rate": 3.833577841744219e-06, "loss": 0.3516, "step": 38142 }, { "epoch": 1.7504015419209766, "grad_norm": 0.4832538962364197, "learning_rate": 3.833339420331324e-06, "loss": 0.381, "step": 38143 }, { "epoch": 1.7504474324262311, "grad_norm": 0.4935844838619232, "learning_rate": 3.8331010017238815e-06, "loss": 0.3631, "step": 38144 }, { "epoch": 1.7504933229314856, "grad_norm": 0.4307432770729065, "learning_rate": 3.832862585922461e-06, "loss": 0.2874, "step": 38145 }, { "epoch": 1.75053921343674, "grad_norm": 0.4742552936077118, "learning_rate": 3.832624172927638e-06, "loss": 0.3332, "step": 38146 }, { "epoch": 1.7505851039419944, "grad_norm": 0.4865756630897522, "learning_rate": 3.8323857627399855e-06, "loss": 0.3702, "step": 38147 }, { "epoch": 1.750630994447249, "grad_norm": 0.4902043640613556, "learning_rate": 3.832147355360077e-06, "loss": 0.3228, "step": 38148 }, { "epoch": 1.7506768849525032, "grad_norm": 0.5285575985908508, "learning_rate": 3.8319089507884845e-06, "loss": 0.4122, "step": 38149 }, { "epoch": 1.7507227754577577, "grad_norm": 0.4738818109035492, "learning_rate": 3.8316705490257836e-06, "loss": 0.3088, "step": 38150 }, { "epoch": 1.7507686659630122, "grad_norm": 0.453727662563324, "learning_rate": 3.831432150072546e-06, "loss": 0.3107, "step": 38151 }, { "epoch": 1.7508145564682667, "grad_norm": 0.5346817374229431, "learning_rate": 3.831193753929343e-06, "loss": 0.2537, "step": 38152 }, { "epoch": 1.7508604469735212, "grad_norm": 0.4578160047531128, "learning_rate": 3.830955360596753e-06, "loss": 0.361, "step": 38153 }, { "epoch": 1.7509063374787757, "grad_norm": 0.45820382237434387, "learning_rate": 3.830716970075345e-06, "loss": 0.309, "step": 38154 }, { "epoch": 1.7509522279840302, "grad_norm": 0.4526442289352417, "learning_rate": 3.830478582365693e-06, "loss": 0.3002, "step": 38155 }, { "epoch": 1.7509981184892847, "grad_norm": 0.5117863416671753, "learning_rate": 3.830240197468373e-06, "loss": 0.4113, "step": 38156 }, { "epoch": 1.7510440089945392, "grad_norm": 0.4909150302410126, "learning_rate": 3.830001815383956e-06, "loss": 0.3647, "step": 38157 }, { "epoch": 1.7510898994997937, "grad_norm": 0.4600817859172821, "learning_rate": 3.829763436113012e-06, "loss": 0.3703, "step": 38158 }, { "epoch": 1.751135790005048, "grad_norm": 0.46346205472946167, "learning_rate": 3.829525059656121e-06, "loss": 0.347, "step": 38159 }, { "epoch": 1.7511816805103024, "grad_norm": 0.49247220158576965, "learning_rate": 3.8292866860138525e-06, "loss": 0.4084, "step": 38160 }, { "epoch": 1.751227571015557, "grad_norm": 0.5336139798164368, "learning_rate": 3.8290483151867785e-06, "loss": 0.4194, "step": 38161 }, { "epoch": 1.7512734615208112, "grad_norm": 0.45903024077415466, "learning_rate": 3.828809947175475e-06, "loss": 0.35, "step": 38162 }, { "epoch": 1.7513193520260657, "grad_norm": 0.503702700138092, "learning_rate": 3.828571581980514e-06, "loss": 0.3654, "step": 38163 }, { "epoch": 1.7513652425313202, "grad_norm": 0.509530782699585, "learning_rate": 3.828333219602466e-06, "loss": 0.365, "step": 38164 }, { "epoch": 1.7514111330365747, "grad_norm": 0.4682491421699524, "learning_rate": 3.82809486004191e-06, "loss": 0.3694, "step": 38165 }, { "epoch": 1.7514570235418292, "grad_norm": 0.4417170584201813, "learning_rate": 3.827856503299416e-06, "loss": 0.2936, "step": 38166 }, { "epoch": 1.7515029140470837, "grad_norm": 0.48972633481025696, "learning_rate": 3.827618149375555e-06, "loss": 0.3991, "step": 38167 }, { "epoch": 1.7515488045523382, "grad_norm": 0.4649178683757782, "learning_rate": 3.827379798270904e-06, "loss": 0.3636, "step": 38168 }, { "epoch": 1.7515946950575927, "grad_norm": 0.5182351469993591, "learning_rate": 3.8271414499860345e-06, "loss": 0.3988, "step": 38169 }, { "epoch": 1.7516405855628472, "grad_norm": 0.46883293986320496, "learning_rate": 3.826903104521517e-06, "loss": 0.3451, "step": 38170 }, { "epoch": 1.7516864760681015, "grad_norm": 0.46674907207489014, "learning_rate": 3.8266647618779315e-06, "loss": 0.3236, "step": 38171 }, { "epoch": 1.751732366573356, "grad_norm": 0.4476717412471771, "learning_rate": 3.826426422055845e-06, "loss": 0.3049, "step": 38172 }, { "epoch": 1.7517782570786105, "grad_norm": 0.47447434067726135, "learning_rate": 3.826188085055831e-06, "loss": 0.3508, "step": 38173 }, { "epoch": 1.7518241475838647, "grad_norm": 0.4956931471824646, "learning_rate": 3.825949750878466e-06, "loss": 0.4407, "step": 38174 }, { "epoch": 1.7518700380891192, "grad_norm": 0.4717135429382324, "learning_rate": 3.8257114195243215e-06, "loss": 0.3339, "step": 38175 }, { "epoch": 1.7519159285943737, "grad_norm": 0.5097389221191406, "learning_rate": 3.825473090993969e-06, "loss": 0.3977, "step": 38176 }, { "epoch": 1.7519618190996282, "grad_norm": 0.43335920572280884, "learning_rate": 3.8252347652879845e-06, "loss": 0.3137, "step": 38177 }, { "epoch": 1.7520077096048827, "grad_norm": 0.49420949816703796, "learning_rate": 3.8249964424069394e-06, "loss": 0.3953, "step": 38178 }, { "epoch": 1.7520536001101372, "grad_norm": 0.5198296308517456, "learning_rate": 3.824758122351405e-06, "loss": 0.4862, "step": 38179 }, { "epoch": 1.7520994906153917, "grad_norm": 0.49265122413635254, "learning_rate": 3.8245198051219585e-06, "loss": 0.4401, "step": 38180 }, { "epoch": 1.7521453811206462, "grad_norm": 0.4839162230491638, "learning_rate": 3.824281490719171e-06, "loss": 0.3413, "step": 38181 }, { "epoch": 1.7521912716259007, "grad_norm": 0.48862046003341675, "learning_rate": 3.824043179143615e-06, "loss": 0.3696, "step": 38182 }, { "epoch": 1.7522371621311552, "grad_norm": 0.44298094511032104, "learning_rate": 3.823804870395863e-06, "loss": 0.2985, "step": 38183 }, { "epoch": 1.7522830526364095, "grad_norm": 0.4870200455188751, "learning_rate": 3.823566564476491e-06, "loss": 0.3229, "step": 38184 }, { "epoch": 1.752328943141664, "grad_norm": 0.4738350212574005, "learning_rate": 3.823328261386069e-06, "loss": 0.3569, "step": 38185 }, { "epoch": 1.7523748336469185, "grad_norm": 0.4842919707298279, "learning_rate": 3.82308996112517e-06, "loss": 0.3548, "step": 38186 }, { "epoch": 1.7524207241521728, "grad_norm": 0.43787020444869995, "learning_rate": 3.82285166369437e-06, "loss": 0.3055, "step": 38187 }, { "epoch": 1.7524666146574273, "grad_norm": 0.49981147050857544, "learning_rate": 3.82261336909424e-06, "loss": 0.3632, "step": 38188 }, { "epoch": 1.7525125051626818, "grad_norm": 0.5303622484207153, "learning_rate": 3.822375077325352e-06, "loss": 0.4237, "step": 38189 }, { "epoch": 1.7525583956679363, "grad_norm": 0.43769294023513794, "learning_rate": 3.822136788388282e-06, "loss": 0.3236, "step": 38190 }, { "epoch": 1.7526042861731908, "grad_norm": 0.463581383228302, "learning_rate": 3.821898502283601e-06, "loss": 0.3084, "step": 38191 }, { "epoch": 1.7526501766784452, "grad_norm": 0.4965786933898926, "learning_rate": 3.82166021901188e-06, "loss": 0.4554, "step": 38192 }, { "epoch": 1.7526960671836997, "grad_norm": 0.4936651885509491, "learning_rate": 3.821421938573697e-06, "loss": 0.3791, "step": 38193 }, { "epoch": 1.7527419576889542, "grad_norm": 0.5074048042297363, "learning_rate": 3.821183660969622e-06, "loss": 0.3743, "step": 38194 }, { "epoch": 1.7527878481942087, "grad_norm": 0.5093694925308228, "learning_rate": 3.820945386200226e-06, "loss": 0.4438, "step": 38195 }, { "epoch": 1.7528337386994632, "grad_norm": 0.4397948086261749, "learning_rate": 3.820707114266086e-06, "loss": 0.3425, "step": 38196 }, { "epoch": 1.7528796292047175, "grad_norm": 0.49644970893859863, "learning_rate": 3.820468845167773e-06, "loss": 0.4014, "step": 38197 }, { "epoch": 1.752925519709972, "grad_norm": 0.4853942394256592, "learning_rate": 3.82023057890586e-06, "loss": 0.3985, "step": 38198 }, { "epoch": 1.7529714102152265, "grad_norm": 0.5195972919464111, "learning_rate": 3.819992315480921e-06, "loss": 0.4851, "step": 38199 }, { "epoch": 1.7530173007204808, "grad_norm": 0.42381638288497925, "learning_rate": 3.819754054893527e-06, "loss": 0.2741, "step": 38200 }, { "epoch": 1.7530631912257353, "grad_norm": 0.4638938009738922, "learning_rate": 3.819515797144251e-06, "loss": 0.3702, "step": 38201 }, { "epoch": 1.7531090817309898, "grad_norm": 0.4675822854042053, "learning_rate": 3.8192775422336685e-06, "loss": 0.3311, "step": 38202 }, { "epoch": 1.7531549722362443, "grad_norm": 0.4215841293334961, "learning_rate": 3.819039290162351e-06, "loss": 0.2788, "step": 38203 }, { "epoch": 1.7532008627414988, "grad_norm": 0.561294674873352, "learning_rate": 3.818801040930871e-06, "loss": 0.4176, "step": 38204 }, { "epoch": 1.7532467532467533, "grad_norm": 0.4887683689594269, "learning_rate": 3.818562794539801e-06, "loss": 0.3823, "step": 38205 }, { "epoch": 1.7532926437520078, "grad_norm": 0.5062733888626099, "learning_rate": 3.818324550989716e-06, "loss": 0.3712, "step": 38206 }, { "epoch": 1.7533385342572623, "grad_norm": 0.4431309103965759, "learning_rate": 3.818086310281185e-06, "loss": 0.3052, "step": 38207 }, { "epoch": 1.7533844247625168, "grad_norm": 0.7142221927642822, "learning_rate": 3.817848072414786e-06, "loss": 0.3849, "step": 38208 }, { "epoch": 1.753430315267771, "grad_norm": 0.44148916006088257, "learning_rate": 3.817609837391088e-06, "loss": 0.303, "step": 38209 }, { "epoch": 1.7534762057730255, "grad_norm": 0.5489116311073303, "learning_rate": 3.817371605210665e-06, "loss": 0.4632, "step": 38210 }, { "epoch": 1.75352209627828, "grad_norm": 0.43401098251342773, "learning_rate": 3.817133375874091e-06, "loss": 0.3572, "step": 38211 }, { "epoch": 1.7535679867835345, "grad_norm": 0.4292687177658081, "learning_rate": 3.816895149381938e-06, "loss": 0.2786, "step": 38212 }, { "epoch": 1.7536138772887888, "grad_norm": 0.4996427595615387, "learning_rate": 3.816656925734777e-06, "loss": 0.384, "step": 38213 }, { "epoch": 1.7536597677940433, "grad_norm": 0.4408896863460541, "learning_rate": 3.816418704933184e-06, "loss": 0.2822, "step": 38214 }, { "epoch": 1.7537056582992978, "grad_norm": 0.4812081456184387, "learning_rate": 3.8161804869777315e-06, "loss": 0.4075, "step": 38215 }, { "epoch": 1.7537515488045523, "grad_norm": 0.47232586145401, "learning_rate": 3.815942271868991e-06, "loss": 0.3126, "step": 38216 }, { "epoch": 1.7537974393098068, "grad_norm": 0.46676433086395264, "learning_rate": 3.815704059607534e-06, "loss": 0.3109, "step": 38217 }, { "epoch": 1.7538433298150613, "grad_norm": 0.4698551595211029, "learning_rate": 3.8154658501939355e-06, "loss": 0.3201, "step": 38218 }, { "epoch": 1.7538892203203158, "grad_norm": 0.4659937620162964, "learning_rate": 3.8152276436287686e-06, "loss": 0.3305, "step": 38219 }, { "epoch": 1.7539351108255703, "grad_norm": 0.4518466591835022, "learning_rate": 3.8149894399126045e-06, "loss": 0.3299, "step": 38220 }, { "epoch": 1.7539810013308248, "grad_norm": 0.42783045768737793, "learning_rate": 3.814751239046017e-06, "loss": 0.2663, "step": 38221 }, { "epoch": 1.754026891836079, "grad_norm": 0.5260459780693054, "learning_rate": 3.814513041029579e-06, "loss": 0.4022, "step": 38222 }, { "epoch": 1.7540727823413336, "grad_norm": 0.48999151587486267, "learning_rate": 3.814274845863861e-06, "loss": 0.3774, "step": 38223 }, { "epoch": 1.754118672846588, "grad_norm": 0.5121983289718628, "learning_rate": 3.81403665354944e-06, "loss": 0.3911, "step": 38224 }, { "epoch": 1.7541645633518423, "grad_norm": 0.47604820132255554, "learning_rate": 3.8137984640868863e-06, "loss": 0.3423, "step": 38225 }, { "epoch": 1.7542104538570968, "grad_norm": 0.471549928188324, "learning_rate": 3.813560277476772e-06, "loss": 0.3698, "step": 38226 }, { "epoch": 1.7542563443623513, "grad_norm": 0.43529465794563293, "learning_rate": 3.8133220937196718e-06, "loss": 0.2943, "step": 38227 }, { "epoch": 1.7543022348676058, "grad_norm": 0.4500519633293152, "learning_rate": 3.813083912816158e-06, "loss": 0.3436, "step": 38228 }, { "epoch": 1.7543481253728603, "grad_norm": 0.4286739230155945, "learning_rate": 3.8128457347668e-06, "loss": 0.2709, "step": 38229 }, { "epoch": 1.7543940158781148, "grad_norm": 0.507461428642273, "learning_rate": 3.8126075595721756e-06, "loss": 0.3778, "step": 38230 }, { "epoch": 1.7544399063833693, "grad_norm": 0.4735884666442871, "learning_rate": 3.812369387232855e-06, "loss": 0.3886, "step": 38231 }, { "epoch": 1.7544857968886238, "grad_norm": 0.4654468595981598, "learning_rate": 3.8121312177494107e-06, "loss": 0.3396, "step": 38232 }, { "epoch": 1.7545316873938783, "grad_norm": 0.4904736280441284, "learning_rate": 3.8118930511224168e-06, "loss": 0.3678, "step": 38233 }, { "epoch": 1.7545775778991328, "grad_norm": 0.44794490933418274, "learning_rate": 3.811654887352445e-06, "loss": 0.3165, "step": 38234 }, { "epoch": 1.754623468404387, "grad_norm": 0.45835959911346436, "learning_rate": 3.8114167264400658e-06, "loss": 0.3273, "step": 38235 }, { "epoch": 1.7546693589096416, "grad_norm": 0.47494685649871826, "learning_rate": 3.8111785683858563e-06, "loss": 0.3811, "step": 38236 }, { "epoch": 1.754715249414896, "grad_norm": 0.5015307068824768, "learning_rate": 3.8109404131903886e-06, "loss": 0.4401, "step": 38237 }, { "epoch": 1.7547611399201504, "grad_norm": 0.45531246066093445, "learning_rate": 3.810702260854231e-06, "loss": 0.3172, "step": 38238 }, { "epoch": 1.7548070304254049, "grad_norm": 0.4644160270690918, "learning_rate": 3.8104641113779606e-06, "loss": 0.3797, "step": 38239 }, { "epoch": 1.7548529209306594, "grad_norm": 0.49182045459747314, "learning_rate": 3.8102259647621494e-06, "loss": 0.347, "step": 38240 }, { "epoch": 1.7548988114359139, "grad_norm": 0.4740894138813019, "learning_rate": 3.8099878210073676e-06, "loss": 0.3179, "step": 38241 }, { "epoch": 1.7549447019411684, "grad_norm": 0.47984302043914795, "learning_rate": 3.80974968011419e-06, "loss": 0.4062, "step": 38242 }, { "epoch": 1.7549905924464229, "grad_norm": 0.48271089792251587, "learning_rate": 3.8095115420831895e-06, "loss": 0.39, "step": 38243 }, { "epoch": 1.7550364829516774, "grad_norm": 0.4634411036968231, "learning_rate": 3.809273406914936e-06, "loss": 0.3611, "step": 38244 }, { "epoch": 1.7550823734569319, "grad_norm": 0.45366501808166504, "learning_rate": 3.809035274610006e-06, "loss": 0.3476, "step": 38245 }, { "epoch": 1.7551282639621864, "grad_norm": 0.454681932926178, "learning_rate": 3.8087971451689708e-06, "loss": 0.3877, "step": 38246 }, { "epoch": 1.7551741544674409, "grad_norm": 0.4686877727508545, "learning_rate": 3.808559018592402e-06, "loss": 0.3166, "step": 38247 }, { "epoch": 1.7552200449726951, "grad_norm": 0.5123105645179749, "learning_rate": 3.8083208948808713e-06, "loss": 0.4276, "step": 38248 }, { "epoch": 1.7552659354779496, "grad_norm": 0.4363461434841156, "learning_rate": 3.8080827740349545e-06, "loss": 0.3271, "step": 38249 }, { "epoch": 1.7553118259832041, "grad_norm": 0.4420645833015442, "learning_rate": 3.8078446560552223e-06, "loss": 0.3274, "step": 38250 }, { "epoch": 1.7553577164884584, "grad_norm": 0.5362547039985657, "learning_rate": 3.807606540942245e-06, "loss": 0.4109, "step": 38251 }, { "epoch": 1.755403606993713, "grad_norm": 0.42891788482666016, "learning_rate": 3.8073684286966e-06, "loss": 0.2917, "step": 38252 }, { "epoch": 1.7554494974989674, "grad_norm": 0.5154009461402893, "learning_rate": 3.807130319318858e-06, "loss": 0.4774, "step": 38253 }, { "epoch": 1.755495388004222, "grad_norm": 0.5612766146659851, "learning_rate": 3.806892212809589e-06, "loss": 0.4641, "step": 38254 }, { "epoch": 1.7555412785094764, "grad_norm": 0.5181286334991455, "learning_rate": 3.80665410916937e-06, "loss": 0.5061, "step": 38255 }, { "epoch": 1.7555871690147309, "grad_norm": 0.531748354434967, "learning_rate": 3.80641600839877e-06, "loss": 0.412, "step": 38256 }, { "epoch": 1.7556330595199854, "grad_norm": 0.4841928482055664, "learning_rate": 3.806177910498361e-06, "loss": 0.3972, "step": 38257 }, { "epoch": 1.7556789500252399, "grad_norm": 0.4579367935657501, "learning_rate": 3.80593981546872e-06, "loss": 0.3325, "step": 38258 }, { "epoch": 1.7557248405304944, "grad_norm": 0.6023992300033569, "learning_rate": 3.8057017233104175e-06, "loss": 0.4487, "step": 38259 }, { "epoch": 1.7557707310357487, "grad_norm": 0.46867311000823975, "learning_rate": 3.8054636340240225e-06, "loss": 0.333, "step": 38260 }, { "epoch": 1.7558166215410032, "grad_norm": 0.45162853598594666, "learning_rate": 3.8052255476101124e-06, "loss": 0.3372, "step": 38261 }, { "epoch": 1.7558625120462577, "grad_norm": 0.46779870986938477, "learning_rate": 3.8049874640692575e-06, "loss": 0.3574, "step": 38262 }, { "epoch": 1.755908402551512, "grad_norm": 0.44593721628189087, "learning_rate": 3.80474938340203e-06, "loss": 0.2959, "step": 38263 }, { "epoch": 1.7559542930567664, "grad_norm": 0.4924672842025757, "learning_rate": 3.804511305609004e-06, "loss": 0.4088, "step": 38264 }, { "epoch": 1.756000183562021, "grad_norm": 0.4211216866970062, "learning_rate": 3.804273230690751e-06, "loss": 0.3098, "step": 38265 }, { "epoch": 1.7560460740672754, "grad_norm": 0.46275594830513, "learning_rate": 3.8040351586478406e-06, "loss": 0.3809, "step": 38266 }, { "epoch": 1.75609196457253, "grad_norm": 0.4629899561405182, "learning_rate": 3.803797089480852e-06, "loss": 0.3583, "step": 38267 }, { "epoch": 1.7561378550777844, "grad_norm": 0.4310542047023773, "learning_rate": 3.8035590231903524e-06, "loss": 0.2912, "step": 38268 }, { "epoch": 1.756183745583039, "grad_norm": 0.47964125871658325, "learning_rate": 3.803320959776915e-06, "loss": 0.3584, "step": 38269 }, { "epoch": 1.7562296360882934, "grad_norm": 0.47627997398376465, "learning_rate": 3.803082899241114e-06, "loss": 0.3475, "step": 38270 }, { "epoch": 1.756275526593548, "grad_norm": 0.4328688979148865, "learning_rate": 3.802844841583521e-06, "loss": 0.2954, "step": 38271 }, { "epoch": 1.7563214170988024, "grad_norm": 0.5056161880493164, "learning_rate": 3.8026067868047068e-06, "loss": 0.3967, "step": 38272 }, { "epoch": 1.7563673076040567, "grad_norm": 0.4901103377342224, "learning_rate": 3.8023687349052467e-06, "loss": 0.398, "step": 38273 }, { "epoch": 1.7564131981093112, "grad_norm": 0.455577552318573, "learning_rate": 3.802130685885712e-06, "loss": 0.3302, "step": 38274 }, { "epoch": 1.7564590886145657, "grad_norm": 0.5018193125724792, "learning_rate": 3.8018926397466736e-06, "loss": 0.4099, "step": 38275 }, { "epoch": 1.75650497911982, "grad_norm": 0.462346613407135, "learning_rate": 3.801654596488707e-06, "loss": 0.3692, "step": 38276 }, { "epoch": 1.7565508696250745, "grad_norm": 0.4271996319293976, "learning_rate": 3.8014165561123827e-06, "loss": 0.2911, "step": 38277 }, { "epoch": 1.756596760130329, "grad_norm": 0.5613418221473694, "learning_rate": 3.801178518618271e-06, "loss": 0.3896, "step": 38278 }, { "epoch": 1.7566426506355834, "grad_norm": 0.4786616861820221, "learning_rate": 3.8009404840069484e-06, "loss": 0.3597, "step": 38279 }, { "epoch": 1.756688541140838, "grad_norm": 0.4913674592971802, "learning_rate": 3.800702452278986e-06, "loss": 0.3986, "step": 38280 }, { "epoch": 1.7567344316460924, "grad_norm": 0.45116549730300903, "learning_rate": 3.800464423434957e-06, "loss": 0.3164, "step": 38281 }, { "epoch": 1.756780322151347, "grad_norm": 0.4954787492752075, "learning_rate": 3.800226397475429e-06, "loss": 0.4334, "step": 38282 }, { "epoch": 1.7568262126566014, "grad_norm": 0.46176910400390625, "learning_rate": 3.7999883744009795e-06, "loss": 0.3436, "step": 38283 }, { "epoch": 1.756872103161856, "grad_norm": 0.4726191461086273, "learning_rate": 3.7997503542121798e-06, "loss": 0.3411, "step": 38284 }, { "epoch": 1.7569179936671104, "grad_norm": 0.47916683554649353, "learning_rate": 3.7995123369096004e-06, "loss": 0.3353, "step": 38285 }, { "epoch": 1.7569638841723647, "grad_norm": 0.45721709728240967, "learning_rate": 3.799274322493816e-06, "loss": 0.3292, "step": 38286 }, { "epoch": 1.7570097746776192, "grad_norm": 0.46589529514312744, "learning_rate": 3.7990363109653984e-06, "loss": 0.3393, "step": 38287 }, { "epoch": 1.7570556651828737, "grad_norm": 0.44673770666122437, "learning_rate": 3.7987983023249163e-06, "loss": 0.324, "step": 38288 }, { "epoch": 1.757101555688128, "grad_norm": 0.4309248924255371, "learning_rate": 3.798560296572948e-06, "loss": 0.3154, "step": 38289 }, { "epoch": 1.7571474461933825, "grad_norm": 0.52397221326828, "learning_rate": 3.7983222937100626e-06, "loss": 0.4381, "step": 38290 }, { "epoch": 1.757193336698637, "grad_norm": 0.4323873519897461, "learning_rate": 3.7980842937368325e-06, "loss": 0.2796, "step": 38291 }, { "epoch": 1.7572392272038915, "grad_norm": 0.4782414734363556, "learning_rate": 3.797846296653831e-06, "loss": 0.3544, "step": 38292 }, { "epoch": 1.757285117709146, "grad_norm": 0.4502119719982147, "learning_rate": 3.7976083024616297e-06, "loss": 0.3156, "step": 38293 }, { "epoch": 1.7573310082144005, "grad_norm": 0.4780610501766205, "learning_rate": 3.7973703111607984e-06, "loss": 0.397, "step": 38294 }, { "epoch": 1.757376898719655, "grad_norm": 0.5238561034202576, "learning_rate": 3.7971323227519147e-06, "loss": 0.3922, "step": 38295 }, { "epoch": 1.7574227892249095, "grad_norm": 0.4771853983402252, "learning_rate": 3.7968943372355477e-06, "loss": 0.4037, "step": 38296 }, { "epoch": 1.757468679730164, "grad_norm": 0.5051767826080322, "learning_rate": 3.796656354612269e-06, "loss": 0.4289, "step": 38297 }, { "epoch": 1.7575145702354182, "grad_norm": 0.4776654839515686, "learning_rate": 3.7964183748826534e-06, "loss": 0.3367, "step": 38298 }, { "epoch": 1.7575604607406727, "grad_norm": 0.4678038954734802, "learning_rate": 3.7961803980472716e-06, "loss": 0.4064, "step": 38299 }, { "epoch": 1.7576063512459272, "grad_norm": 0.47737982869148254, "learning_rate": 3.795942424106694e-06, "loss": 0.3681, "step": 38300 }, { "epoch": 1.7576522417511817, "grad_norm": 0.45834699273109436, "learning_rate": 3.7957044530614966e-06, "loss": 0.3271, "step": 38301 }, { "epoch": 1.757698132256436, "grad_norm": 0.4882853329181671, "learning_rate": 3.79546648491225e-06, "loss": 0.3643, "step": 38302 }, { "epoch": 1.7577440227616905, "grad_norm": 0.443279892206192, "learning_rate": 3.795228519659526e-06, "loss": 0.3291, "step": 38303 }, { "epoch": 1.757789913266945, "grad_norm": 0.4696867763996124, "learning_rate": 3.7949905573038977e-06, "loss": 0.3687, "step": 38304 }, { "epoch": 1.7578358037721995, "grad_norm": 0.48926839232444763, "learning_rate": 3.7947525978459364e-06, "loss": 0.3365, "step": 38305 }, { "epoch": 1.757881694277454, "grad_norm": 0.5036697387695312, "learning_rate": 3.7945146412862145e-06, "loss": 0.3829, "step": 38306 }, { "epoch": 1.7579275847827085, "grad_norm": 0.4913530647754669, "learning_rate": 3.794276687625305e-06, "loss": 0.3769, "step": 38307 }, { "epoch": 1.757973475287963, "grad_norm": 0.45507287979125977, "learning_rate": 3.794038736863779e-06, "loss": 0.3604, "step": 38308 }, { "epoch": 1.7580193657932175, "grad_norm": 0.4666888117790222, "learning_rate": 3.7938007890022084e-06, "loss": 0.349, "step": 38309 }, { "epoch": 1.758065256298472, "grad_norm": 0.49028727412223816, "learning_rate": 3.793562844041168e-06, "loss": 0.3938, "step": 38310 }, { "epoch": 1.7581111468037263, "grad_norm": 0.5133760571479797, "learning_rate": 3.793324901981227e-06, "loss": 0.3208, "step": 38311 }, { "epoch": 1.7581570373089808, "grad_norm": 0.5037102103233337, "learning_rate": 3.79308696282296e-06, "loss": 0.403, "step": 38312 }, { "epoch": 1.7582029278142353, "grad_norm": 0.4614470899105072, "learning_rate": 3.792849026566936e-06, "loss": 0.3263, "step": 38313 }, { "epoch": 1.7582488183194895, "grad_norm": 0.4743252694606781, "learning_rate": 3.7926110932137304e-06, "loss": 0.3867, "step": 38314 }, { "epoch": 1.758294708824744, "grad_norm": 0.4464605152606964, "learning_rate": 3.792373162763915e-06, "loss": 0.2822, "step": 38315 }, { "epoch": 1.7583405993299985, "grad_norm": 0.4677067995071411, "learning_rate": 3.7921352352180577e-06, "loss": 0.337, "step": 38316 }, { "epoch": 1.758386489835253, "grad_norm": 0.4541931450366974, "learning_rate": 3.7918973105767365e-06, "loss": 0.328, "step": 38317 }, { "epoch": 1.7584323803405075, "grad_norm": 0.4460824131965637, "learning_rate": 3.791659388840521e-06, "loss": 0.2998, "step": 38318 }, { "epoch": 1.758478270845762, "grad_norm": 0.5064753890037537, "learning_rate": 3.7914214700099827e-06, "loss": 0.3477, "step": 38319 }, { "epoch": 1.7585241613510165, "grad_norm": 0.4383980929851532, "learning_rate": 3.7911835540856944e-06, "loss": 0.2813, "step": 38320 }, { "epoch": 1.758570051856271, "grad_norm": 0.4959060847759247, "learning_rate": 3.7909456410682288e-06, "loss": 0.3468, "step": 38321 }, { "epoch": 1.7586159423615255, "grad_norm": 0.47594988346099854, "learning_rate": 3.790707730958155e-06, "loss": 0.3899, "step": 38322 }, { "epoch": 1.75866183286678, "grad_norm": 0.48165324330329895, "learning_rate": 3.79046982375605e-06, "loss": 0.4432, "step": 38323 }, { "epoch": 1.7587077233720343, "grad_norm": 0.4453853964805603, "learning_rate": 3.790231919462483e-06, "loss": 0.3513, "step": 38324 }, { "epoch": 1.7587536138772888, "grad_norm": 0.46017348766326904, "learning_rate": 3.7899940180780255e-06, "loss": 0.3282, "step": 38325 }, { "epoch": 1.7587995043825433, "grad_norm": 0.4543053209781647, "learning_rate": 3.7897561196032512e-06, "loss": 0.3499, "step": 38326 }, { "epoch": 1.7588453948877976, "grad_norm": 0.45544353127479553, "learning_rate": 3.7895182240387318e-06, "loss": 0.3782, "step": 38327 }, { "epoch": 1.758891285393052, "grad_norm": 0.5120173692703247, "learning_rate": 3.789280331385038e-06, "loss": 0.4256, "step": 38328 }, { "epoch": 1.7589371758983066, "grad_norm": 0.5207133293151855, "learning_rate": 3.7890424416427436e-06, "loss": 0.4154, "step": 38329 }, { "epoch": 1.758983066403561, "grad_norm": 0.456525057554245, "learning_rate": 3.7888045548124205e-06, "loss": 0.3274, "step": 38330 }, { "epoch": 1.7590289569088156, "grad_norm": 0.46602892875671387, "learning_rate": 3.788566670894637e-06, "loss": 0.3501, "step": 38331 }, { "epoch": 1.75907484741407, "grad_norm": 0.4597901403903961, "learning_rate": 3.788328789889971e-06, "loss": 0.3428, "step": 38332 }, { "epoch": 1.7591207379193246, "grad_norm": 0.4388744831085205, "learning_rate": 3.788090911798993e-06, "loss": 0.3013, "step": 38333 }, { "epoch": 1.759166628424579, "grad_norm": 0.48054590821266174, "learning_rate": 3.7878530366222716e-06, "loss": 0.3544, "step": 38334 }, { "epoch": 1.7592125189298335, "grad_norm": 0.4662417769432068, "learning_rate": 3.787615164360382e-06, "loss": 0.3623, "step": 38335 }, { "epoch": 1.759258409435088, "grad_norm": 0.48504161834716797, "learning_rate": 3.787377295013896e-06, "loss": 0.4041, "step": 38336 }, { "epoch": 1.7593042999403423, "grad_norm": 0.5425921082496643, "learning_rate": 3.7871394285833817e-06, "loss": 0.3966, "step": 38337 }, { "epoch": 1.7593501904455968, "grad_norm": 0.4686540961265564, "learning_rate": 3.7869015650694175e-06, "loss": 0.3232, "step": 38338 }, { "epoch": 1.7593960809508513, "grad_norm": 0.4686158001422882, "learning_rate": 3.7866637044725717e-06, "loss": 0.3438, "step": 38339 }, { "epoch": 1.7594419714561056, "grad_norm": 0.4754941165447235, "learning_rate": 3.7864258467934156e-06, "loss": 0.3807, "step": 38340 }, { "epoch": 1.75948786196136, "grad_norm": 0.4595724940299988, "learning_rate": 3.786187992032524e-06, "loss": 0.3804, "step": 38341 }, { "epoch": 1.7595337524666146, "grad_norm": 0.46796467900276184, "learning_rate": 3.785950140190466e-06, "loss": 0.3638, "step": 38342 }, { "epoch": 1.759579642971869, "grad_norm": 0.5049145817756653, "learning_rate": 3.785712291267814e-06, "loss": 0.4168, "step": 38343 }, { "epoch": 1.7596255334771236, "grad_norm": 0.45139241218566895, "learning_rate": 3.785474445265142e-06, "loss": 0.3092, "step": 38344 }, { "epoch": 1.759671423982378, "grad_norm": 0.4832097291946411, "learning_rate": 3.785236602183021e-06, "loss": 0.3738, "step": 38345 }, { "epoch": 1.7597173144876326, "grad_norm": 0.4438186585903168, "learning_rate": 3.7849987620220226e-06, "loss": 0.2817, "step": 38346 }, { "epoch": 1.759763204992887, "grad_norm": 0.4807962477207184, "learning_rate": 3.784760924782717e-06, "loss": 0.3749, "step": 38347 }, { "epoch": 1.7598090954981416, "grad_norm": 0.500275194644928, "learning_rate": 3.7845230904656793e-06, "loss": 0.311, "step": 38348 }, { "epoch": 1.7598549860033958, "grad_norm": 0.4842105507850647, "learning_rate": 3.7842852590714806e-06, "loss": 0.4042, "step": 38349 }, { "epoch": 1.7599008765086503, "grad_norm": 0.45828014612197876, "learning_rate": 3.7840474306006904e-06, "loss": 0.2946, "step": 38350 }, { "epoch": 1.7599467670139048, "grad_norm": 0.47050970792770386, "learning_rate": 3.7838096050538837e-06, "loss": 0.3763, "step": 38351 }, { "epoch": 1.7599926575191591, "grad_norm": 0.46474409103393555, "learning_rate": 3.783571782431631e-06, "loss": 0.3647, "step": 38352 }, { "epoch": 1.7600385480244136, "grad_norm": 0.47763368487358093, "learning_rate": 3.7833339627345017e-06, "loss": 0.377, "step": 38353 }, { "epoch": 1.7600844385296681, "grad_norm": 0.5136001706123352, "learning_rate": 3.7830961459630728e-06, "loss": 0.3972, "step": 38354 }, { "epoch": 1.7601303290349226, "grad_norm": 0.45928817987442017, "learning_rate": 3.7828583321179136e-06, "loss": 0.3103, "step": 38355 }, { "epoch": 1.7601762195401771, "grad_norm": 0.43964308500289917, "learning_rate": 3.7826205211995946e-06, "loss": 0.3174, "step": 38356 }, { "epoch": 1.7602221100454316, "grad_norm": 0.47442957758903503, "learning_rate": 3.7823827132086897e-06, "loss": 0.3611, "step": 38357 }, { "epoch": 1.760268000550686, "grad_norm": 0.43624040484428406, "learning_rate": 3.782144908145771e-06, "loss": 0.3083, "step": 38358 }, { "epoch": 1.7603138910559406, "grad_norm": 0.5192256569862366, "learning_rate": 3.781907106011407e-06, "loss": 0.4205, "step": 38359 }, { "epoch": 1.760359781561195, "grad_norm": 0.502558171749115, "learning_rate": 3.7816693068061746e-06, "loss": 0.416, "step": 38360 }, { "epoch": 1.7604056720664496, "grad_norm": 0.4580441415309906, "learning_rate": 3.781431510530642e-06, "loss": 0.3364, "step": 38361 }, { "epoch": 1.7604515625717039, "grad_norm": 0.48370984196662903, "learning_rate": 3.781193717185382e-06, "loss": 0.3511, "step": 38362 }, { "epoch": 1.7604974530769584, "grad_norm": 0.46518009901046753, "learning_rate": 3.7809559267709663e-06, "loss": 0.3778, "step": 38363 }, { "epoch": 1.7605433435822129, "grad_norm": 0.43130454421043396, "learning_rate": 3.780718139287968e-06, "loss": 0.3222, "step": 38364 }, { "epoch": 1.7605892340874671, "grad_norm": 0.4705457091331482, "learning_rate": 3.7804803547369552e-06, "loss": 0.3457, "step": 38365 }, { "epoch": 1.7606351245927216, "grad_norm": 0.485038697719574, "learning_rate": 3.7802425731185037e-06, "loss": 0.3747, "step": 38366 }, { "epoch": 1.7606810150979761, "grad_norm": 0.5070366263389587, "learning_rate": 3.7800047944331846e-06, "loss": 0.4022, "step": 38367 }, { "epoch": 1.7607269056032306, "grad_norm": 0.4587084949016571, "learning_rate": 3.779767018681568e-06, "loss": 0.3471, "step": 38368 }, { "epoch": 1.7607727961084851, "grad_norm": 0.43464237451553345, "learning_rate": 3.7795292458642274e-06, "loss": 0.3485, "step": 38369 }, { "epoch": 1.7608186866137396, "grad_norm": 0.47145384550094604, "learning_rate": 3.7792914759817333e-06, "loss": 0.3344, "step": 38370 }, { "epoch": 1.7608645771189941, "grad_norm": 0.49127840995788574, "learning_rate": 3.7790537090346564e-06, "loss": 0.4116, "step": 38371 }, { "epoch": 1.7609104676242486, "grad_norm": 0.42733117938041687, "learning_rate": 3.7788159450235728e-06, "loss": 0.3343, "step": 38372 }, { "epoch": 1.7609563581295031, "grad_norm": 0.4438866376876831, "learning_rate": 3.778578183949051e-06, "loss": 0.322, "step": 38373 }, { "epoch": 1.7610022486347576, "grad_norm": 0.47877466678619385, "learning_rate": 3.7783404258116607e-06, "loss": 0.3544, "step": 38374 }, { "epoch": 1.761048139140012, "grad_norm": 0.4426877796649933, "learning_rate": 3.7781026706119784e-06, "loss": 0.3578, "step": 38375 }, { "epoch": 1.7610940296452664, "grad_norm": 0.4676334261894226, "learning_rate": 3.777864918350573e-06, "loss": 0.3668, "step": 38376 }, { "epoch": 1.761139920150521, "grad_norm": 0.4337848126888275, "learning_rate": 3.777627169028017e-06, "loss": 0.3165, "step": 38377 }, { "epoch": 1.7611858106557752, "grad_norm": 0.49028557538986206, "learning_rate": 3.777389422644881e-06, "loss": 0.388, "step": 38378 }, { "epoch": 1.7612317011610297, "grad_norm": 0.4711563289165497, "learning_rate": 3.777151679201738e-06, "loss": 0.4113, "step": 38379 }, { "epoch": 1.7612775916662842, "grad_norm": 0.4533214867115021, "learning_rate": 3.7769139386991606e-06, "loss": 0.3405, "step": 38380 }, { "epoch": 1.7613234821715387, "grad_norm": 0.45808663964271545, "learning_rate": 3.7766762011377154e-06, "loss": 0.3106, "step": 38381 }, { "epoch": 1.7613693726767932, "grad_norm": 0.46038907766342163, "learning_rate": 3.776438466517981e-06, "loss": 0.3795, "step": 38382 }, { "epoch": 1.7614152631820477, "grad_norm": 0.470052570104599, "learning_rate": 3.776200734840526e-06, "loss": 0.3401, "step": 38383 }, { "epoch": 1.7614611536873022, "grad_norm": 0.5138224959373474, "learning_rate": 3.7759630061059204e-06, "loss": 0.368, "step": 38384 }, { "epoch": 1.7615070441925567, "grad_norm": 0.4590214490890503, "learning_rate": 3.775725280314739e-06, "loss": 0.3594, "step": 38385 }, { "epoch": 1.7615529346978112, "grad_norm": 0.46629631519317627, "learning_rate": 3.775487557467552e-06, "loss": 0.3241, "step": 38386 }, { "epoch": 1.7615988252030654, "grad_norm": 0.48703789710998535, "learning_rate": 3.7752498375649283e-06, "loss": 0.3456, "step": 38387 }, { "epoch": 1.76164471570832, "grad_norm": 0.4532487690448761, "learning_rate": 3.775012120607444e-06, "loss": 0.369, "step": 38388 }, { "epoch": 1.7616906062135744, "grad_norm": 0.4913702607154846, "learning_rate": 3.77477440659567e-06, "loss": 0.3736, "step": 38389 }, { "epoch": 1.761736496718829, "grad_norm": 0.5234541893005371, "learning_rate": 3.7745366955301755e-06, "loss": 0.3576, "step": 38390 }, { "epoch": 1.7617823872240832, "grad_norm": 0.4744754731655121, "learning_rate": 3.7742989874115338e-06, "loss": 0.3805, "step": 38391 }, { "epoch": 1.7618282777293377, "grad_norm": 0.4563060998916626, "learning_rate": 3.7740612822403167e-06, "loss": 0.3087, "step": 38392 }, { "epoch": 1.7618741682345922, "grad_norm": 0.43083345890045166, "learning_rate": 3.773823580017093e-06, "loss": 0.2872, "step": 38393 }, { "epoch": 1.7619200587398467, "grad_norm": 0.5092521905899048, "learning_rate": 3.7735858807424402e-06, "loss": 0.4334, "step": 38394 }, { "epoch": 1.7619659492451012, "grad_norm": 0.46459004282951355, "learning_rate": 3.773348184416925e-06, "loss": 0.3155, "step": 38395 }, { "epoch": 1.7620118397503557, "grad_norm": 0.4866071045398712, "learning_rate": 3.7731104910411183e-06, "loss": 0.3932, "step": 38396 }, { "epoch": 1.7620577302556102, "grad_norm": 0.48907604813575745, "learning_rate": 3.7728728006155955e-06, "loss": 0.3739, "step": 38397 }, { "epoch": 1.7621036207608647, "grad_norm": 0.4651419520378113, "learning_rate": 3.7726351131409267e-06, "loss": 0.3507, "step": 38398 }, { "epoch": 1.7621495112661192, "grad_norm": 0.46683207154273987, "learning_rate": 3.772397428617681e-06, "loss": 0.3305, "step": 38399 }, { "epoch": 1.7621954017713735, "grad_norm": 0.48388731479644775, "learning_rate": 3.772159747046434e-06, "loss": 0.3705, "step": 38400 }, { "epoch": 1.762241292276628, "grad_norm": 0.4656377136707306, "learning_rate": 3.7719220684277553e-06, "loss": 0.3439, "step": 38401 }, { "epoch": 1.7622871827818825, "grad_norm": 0.4899931252002716, "learning_rate": 3.7716843927622144e-06, "loss": 0.3925, "step": 38402 }, { "epoch": 1.7623330732871367, "grad_norm": 0.48617154359817505, "learning_rate": 3.7714467200503867e-06, "loss": 0.4062, "step": 38403 }, { "epoch": 1.7623789637923912, "grad_norm": 0.5124910473823547, "learning_rate": 3.7712090502928417e-06, "loss": 0.4406, "step": 38404 }, { "epoch": 1.7624248542976457, "grad_norm": 0.5015543103218079, "learning_rate": 3.7709713834901505e-06, "loss": 0.4344, "step": 38405 }, { "epoch": 1.7624707448029002, "grad_norm": 0.47744908928871155, "learning_rate": 3.7707337196428863e-06, "loss": 0.3776, "step": 38406 }, { "epoch": 1.7625166353081547, "grad_norm": 0.4517383575439453, "learning_rate": 3.7704960587516194e-06, "loss": 0.2975, "step": 38407 }, { "epoch": 1.7625625258134092, "grad_norm": 0.4984171688556671, "learning_rate": 3.7702584008169184e-06, "loss": 0.4094, "step": 38408 }, { "epoch": 1.7626084163186637, "grad_norm": 0.6187036633491516, "learning_rate": 3.770020745839361e-06, "loss": 0.3674, "step": 38409 }, { "epoch": 1.7626543068239182, "grad_norm": 0.543114423751831, "learning_rate": 3.7697830938195157e-06, "loss": 0.5127, "step": 38410 }, { "epoch": 1.7627001973291727, "grad_norm": 0.5098135471343994, "learning_rate": 3.769545444757953e-06, "loss": 0.4156, "step": 38411 }, { "epoch": 1.7627460878344272, "grad_norm": 0.5018177032470703, "learning_rate": 3.7693077986552445e-06, "loss": 0.4319, "step": 38412 }, { "epoch": 1.7627919783396815, "grad_norm": 0.4527531862258911, "learning_rate": 3.7690701555119635e-06, "loss": 0.3372, "step": 38413 }, { "epoch": 1.762837868844936, "grad_norm": 0.41302385926246643, "learning_rate": 3.7688325153286803e-06, "loss": 0.2613, "step": 38414 }, { "epoch": 1.7628837593501905, "grad_norm": 0.4281408190727234, "learning_rate": 3.768594878105964e-06, "loss": 0.3146, "step": 38415 }, { "epoch": 1.7629296498554448, "grad_norm": 0.5146076679229736, "learning_rate": 3.7683572438443917e-06, "loss": 0.4166, "step": 38416 }, { "epoch": 1.7629755403606993, "grad_norm": 0.4726836085319519, "learning_rate": 3.7681196125445303e-06, "loss": 0.3326, "step": 38417 }, { "epoch": 1.7630214308659538, "grad_norm": 0.4964827001094818, "learning_rate": 3.7678819842069503e-06, "loss": 0.3747, "step": 38418 }, { "epoch": 1.7630673213712083, "grad_norm": 0.48425567150115967, "learning_rate": 3.7676443588322276e-06, "loss": 0.3793, "step": 38419 }, { "epoch": 1.7631132118764627, "grad_norm": 0.4698258936405182, "learning_rate": 3.767406736420931e-06, "loss": 0.3643, "step": 38420 }, { "epoch": 1.7631591023817172, "grad_norm": 0.4701162874698639, "learning_rate": 3.7671691169736312e-06, "loss": 0.3813, "step": 38421 }, { "epoch": 1.7632049928869717, "grad_norm": 0.44547238945961, "learning_rate": 3.766931500490902e-06, "loss": 0.3073, "step": 38422 }, { "epoch": 1.7632508833922262, "grad_norm": 0.4657565951347351, "learning_rate": 3.7666938869733128e-06, "loss": 0.3454, "step": 38423 }, { "epoch": 1.7632967738974807, "grad_norm": 0.47479498386383057, "learning_rate": 3.766456276421434e-06, "loss": 0.3495, "step": 38424 }, { "epoch": 1.7633426644027352, "grad_norm": 0.4870316982269287, "learning_rate": 3.7662186688358403e-06, "loss": 0.3398, "step": 38425 }, { "epoch": 1.7633885549079895, "grad_norm": 0.46854791045188904, "learning_rate": 3.7659810642171013e-06, "loss": 0.33, "step": 38426 }, { "epoch": 1.763434445413244, "grad_norm": 0.48503589630126953, "learning_rate": 3.765743462565787e-06, "loss": 0.3936, "step": 38427 }, { "epoch": 1.7634803359184985, "grad_norm": 0.4690198302268982, "learning_rate": 3.765505863882472e-06, "loss": 0.359, "step": 38428 }, { "epoch": 1.7635262264237528, "grad_norm": 0.4582815170288086, "learning_rate": 3.7652682681677255e-06, "loss": 0.3611, "step": 38429 }, { "epoch": 1.7635721169290073, "grad_norm": 0.4690004289150238, "learning_rate": 3.7650306754221167e-06, "loss": 0.3468, "step": 38430 }, { "epoch": 1.7636180074342618, "grad_norm": 0.46284356713294983, "learning_rate": 3.7647930856462215e-06, "loss": 0.3853, "step": 38431 }, { "epoch": 1.7636638979395163, "grad_norm": 0.49339282512664795, "learning_rate": 3.7645554988406095e-06, "loss": 0.3891, "step": 38432 }, { "epoch": 1.7637097884447708, "grad_norm": 0.46456676721572876, "learning_rate": 3.7643179150058504e-06, "loss": 0.3125, "step": 38433 }, { "epoch": 1.7637556789500253, "grad_norm": 0.449203759431839, "learning_rate": 3.764080334142518e-06, "loss": 0.2988, "step": 38434 }, { "epoch": 1.7638015694552798, "grad_norm": 0.4307943880558014, "learning_rate": 3.7638427562511816e-06, "loss": 0.269, "step": 38435 }, { "epoch": 1.7638474599605343, "grad_norm": 0.4483773410320282, "learning_rate": 3.7636051813324116e-06, "loss": 0.3573, "step": 38436 }, { "epoch": 1.7638933504657888, "grad_norm": 0.44686639308929443, "learning_rate": 3.763367609386783e-06, "loss": 0.3197, "step": 38437 }, { "epoch": 1.763939240971043, "grad_norm": 0.47998306155204773, "learning_rate": 3.7631300404148663e-06, "loss": 0.4057, "step": 38438 }, { "epoch": 1.7639851314762975, "grad_norm": 0.46391916275024414, "learning_rate": 3.762892474417228e-06, "loss": 0.3209, "step": 38439 }, { "epoch": 1.764031021981552, "grad_norm": 0.45572006702423096, "learning_rate": 3.762654911394445e-06, "loss": 0.323, "step": 38440 }, { "epoch": 1.7640769124868063, "grad_norm": 0.4794723689556122, "learning_rate": 3.7624173513470863e-06, "loss": 0.3525, "step": 38441 }, { "epoch": 1.7641228029920608, "grad_norm": 0.48541536927223206, "learning_rate": 3.7621797942757223e-06, "loss": 0.3735, "step": 38442 }, { "epoch": 1.7641686934973153, "grad_norm": 0.4728507399559021, "learning_rate": 3.7619422401809265e-06, "loss": 0.3731, "step": 38443 }, { "epoch": 1.7642145840025698, "grad_norm": 0.48088783025741577, "learning_rate": 3.7617046890632686e-06, "loss": 0.375, "step": 38444 }, { "epoch": 1.7642604745078243, "grad_norm": 0.5236520171165466, "learning_rate": 3.76146714092332e-06, "loss": 0.3701, "step": 38445 }, { "epoch": 1.7643063650130788, "grad_norm": 0.5070548057556152, "learning_rate": 3.7612295957616503e-06, "loss": 0.411, "step": 38446 }, { "epoch": 1.7643522555183333, "grad_norm": 0.4297960102558136, "learning_rate": 3.7609920535788343e-06, "loss": 0.3047, "step": 38447 }, { "epoch": 1.7643981460235878, "grad_norm": 0.4578765630722046, "learning_rate": 3.7607545143754414e-06, "loss": 0.3043, "step": 38448 }, { "epoch": 1.7644440365288423, "grad_norm": 0.4711192846298218, "learning_rate": 3.7605169781520418e-06, "loss": 0.3905, "step": 38449 }, { "epoch": 1.7644899270340968, "grad_norm": 0.43182337284088135, "learning_rate": 3.7602794449092088e-06, "loss": 0.3116, "step": 38450 }, { "epoch": 1.764535817539351, "grad_norm": 0.4665738046169281, "learning_rate": 3.760041914647512e-06, "loss": 0.3858, "step": 38451 }, { "epoch": 1.7645817080446056, "grad_norm": 0.5155542492866516, "learning_rate": 3.7598043873675216e-06, "loss": 0.4002, "step": 38452 }, { "epoch": 1.76462759854986, "grad_norm": 0.5507570505142212, "learning_rate": 3.759566863069812e-06, "loss": 0.3141, "step": 38453 }, { "epoch": 1.7646734890551143, "grad_norm": 0.4773794710636139, "learning_rate": 3.7593293417549526e-06, "loss": 0.3884, "step": 38454 }, { "epoch": 1.7647193795603688, "grad_norm": 0.513459324836731, "learning_rate": 3.759091823423514e-06, "loss": 0.3295, "step": 38455 }, { "epoch": 1.7647652700656233, "grad_norm": 0.4660806357860565, "learning_rate": 3.7588543080760686e-06, "loss": 0.3717, "step": 38456 }, { "epoch": 1.7648111605708778, "grad_norm": 0.4965137541294098, "learning_rate": 3.7586167957131864e-06, "loss": 0.3504, "step": 38457 }, { "epoch": 1.7648570510761323, "grad_norm": 0.43501007556915283, "learning_rate": 3.7583792863354372e-06, "loss": 0.2893, "step": 38458 }, { "epoch": 1.7649029415813868, "grad_norm": 0.45114362239837646, "learning_rate": 3.758141779943396e-06, "loss": 0.3087, "step": 38459 }, { "epoch": 1.7649488320866413, "grad_norm": 0.5181567668914795, "learning_rate": 3.7579042765376337e-06, "loss": 0.4398, "step": 38460 }, { "epoch": 1.7649947225918958, "grad_norm": 0.4726133644580841, "learning_rate": 3.7576667761187157e-06, "loss": 0.4069, "step": 38461 }, { "epoch": 1.7650406130971503, "grad_norm": 0.4695392847061157, "learning_rate": 3.757429278687219e-06, "loss": 0.3408, "step": 38462 }, { "epoch": 1.7650865036024048, "grad_norm": 0.4348408877849579, "learning_rate": 3.7571917842437135e-06, "loss": 0.33, "step": 38463 }, { "epoch": 1.765132394107659, "grad_norm": 0.4362618923187256, "learning_rate": 3.756954292788768e-06, "loss": 0.326, "step": 38464 }, { "epoch": 1.7651782846129136, "grad_norm": 0.43866854906082153, "learning_rate": 3.7567168043229562e-06, "loss": 0.3238, "step": 38465 }, { "epoch": 1.765224175118168, "grad_norm": 0.4723884165287018, "learning_rate": 3.756479318846848e-06, "loss": 0.3629, "step": 38466 }, { "epoch": 1.7652700656234224, "grad_norm": 0.47802889347076416, "learning_rate": 3.756241836361012e-06, "loss": 0.4092, "step": 38467 }, { "epoch": 1.7653159561286769, "grad_norm": 0.5006721019744873, "learning_rate": 3.756004356866025e-06, "loss": 0.3728, "step": 38468 }, { "epoch": 1.7653618466339314, "grad_norm": 0.4914933145046234, "learning_rate": 3.755766880362454e-06, "loss": 0.4036, "step": 38469 }, { "epoch": 1.7654077371391859, "grad_norm": 0.42574670910835266, "learning_rate": 3.75552940685087e-06, "loss": 0.326, "step": 38470 }, { "epoch": 1.7654536276444404, "grad_norm": 0.4239595830440521, "learning_rate": 3.755291936331846e-06, "loss": 0.2976, "step": 38471 }, { "epoch": 1.7654995181496949, "grad_norm": 0.49394649267196655, "learning_rate": 3.755054468805953e-06, "loss": 0.4333, "step": 38472 }, { "epoch": 1.7655454086549494, "grad_norm": 0.47833678126335144, "learning_rate": 3.754817004273758e-06, "loss": 0.3767, "step": 38473 }, { "epoch": 1.7655912991602039, "grad_norm": 0.4767918884754181, "learning_rate": 3.754579542735838e-06, "loss": 0.3681, "step": 38474 }, { "epoch": 1.7656371896654584, "grad_norm": 0.441020667552948, "learning_rate": 3.75434208419276e-06, "loss": 0.3175, "step": 38475 }, { "epoch": 1.7656830801707126, "grad_norm": 0.4671199917793274, "learning_rate": 3.754104628645098e-06, "loss": 0.3389, "step": 38476 }, { "epoch": 1.7657289706759671, "grad_norm": 0.4706002473831177, "learning_rate": 3.753867176093418e-06, "loss": 0.3395, "step": 38477 }, { "epoch": 1.7657748611812216, "grad_norm": 0.4754515588283539, "learning_rate": 3.7536297265382966e-06, "loss": 0.3471, "step": 38478 }, { "epoch": 1.7658207516864761, "grad_norm": 0.5018705725669861, "learning_rate": 3.7533922799803017e-06, "loss": 0.4064, "step": 38479 }, { "epoch": 1.7658666421917304, "grad_norm": 0.47097817063331604, "learning_rate": 3.7531548364200034e-06, "loss": 0.374, "step": 38480 }, { "epoch": 1.765912532696985, "grad_norm": 0.5272670388221741, "learning_rate": 3.7529173958579755e-06, "loss": 0.4541, "step": 38481 }, { "epoch": 1.7659584232022394, "grad_norm": 0.49096012115478516, "learning_rate": 3.752679958294789e-06, "loss": 0.3458, "step": 38482 }, { "epoch": 1.766004313707494, "grad_norm": 0.4396880269050598, "learning_rate": 3.7524425237310107e-06, "loss": 0.2898, "step": 38483 }, { "epoch": 1.7660502042127484, "grad_norm": 0.485205739736557, "learning_rate": 3.7522050921672167e-06, "loss": 0.3428, "step": 38484 }, { "epoch": 1.7660960947180029, "grad_norm": 0.49671176075935364, "learning_rate": 3.7519676636039747e-06, "loss": 0.3844, "step": 38485 }, { "epoch": 1.7661419852232574, "grad_norm": 0.46306127309799194, "learning_rate": 3.7517302380418564e-06, "loss": 0.3151, "step": 38486 }, { "epoch": 1.7661878757285119, "grad_norm": 0.46388280391693115, "learning_rate": 3.7514928154814335e-06, "loss": 0.3477, "step": 38487 }, { "epoch": 1.7662337662337664, "grad_norm": 0.47771385312080383, "learning_rate": 3.7512553959232766e-06, "loss": 0.3615, "step": 38488 }, { "epoch": 1.7662796567390207, "grad_norm": 0.4718706011772156, "learning_rate": 3.751017979367954e-06, "loss": 0.3745, "step": 38489 }, { "epoch": 1.7663255472442752, "grad_norm": 0.45186829566955566, "learning_rate": 3.750780565816041e-06, "loss": 0.2889, "step": 38490 }, { "epoch": 1.7663714377495296, "grad_norm": 0.4666984975337982, "learning_rate": 3.750543155268107e-06, "loss": 0.3121, "step": 38491 }, { "epoch": 1.766417328254784, "grad_norm": 0.48314914107322693, "learning_rate": 3.7503057477247206e-06, "loss": 0.3662, "step": 38492 }, { "epoch": 1.7664632187600384, "grad_norm": 0.4488626718521118, "learning_rate": 3.750068343186456e-06, "loss": 0.3248, "step": 38493 }, { "epoch": 1.766509109265293, "grad_norm": 0.46268755197525024, "learning_rate": 3.7498309416538823e-06, "loss": 0.3524, "step": 38494 }, { "epoch": 1.7665549997705474, "grad_norm": 0.4513900578022003, "learning_rate": 3.7495935431275687e-06, "loss": 0.3227, "step": 38495 }, { "epoch": 1.766600890275802, "grad_norm": 0.4730130732059479, "learning_rate": 3.7493561476080904e-06, "loss": 0.3669, "step": 38496 }, { "epoch": 1.7666467807810564, "grad_norm": 0.43243369460105896, "learning_rate": 3.7491187550960156e-06, "loss": 0.3078, "step": 38497 }, { "epoch": 1.766692671286311, "grad_norm": 0.43778154253959656, "learning_rate": 3.748881365591914e-06, "loss": 0.3274, "step": 38498 }, { "epoch": 1.7667385617915654, "grad_norm": 0.5096520781517029, "learning_rate": 3.7486439790963593e-06, "loss": 0.3694, "step": 38499 }, { "epoch": 1.76678445229682, "grad_norm": 0.4443760812282562, "learning_rate": 3.7484065956099208e-06, "loss": 0.3358, "step": 38500 }, { "epoch": 1.7668303428020744, "grad_norm": 0.466034471988678, "learning_rate": 3.7481692151331673e-06, "loss": 0.3333, "step": 38501 }, { "epoch": 1.7668762333073287, "grad_norm": 0.5084569454193115, "learning_rate": 3.747931837666674e-06, "loss": 0.4001, "step": 38502 }, { "epoch": 1.7669221238125832, "grad_norm": 0.4870623052120209, "learning_rate": 3.747694463211009e-06, "loss": 0.3799, "step": 38503 }, { "epoch": 1.7669680143178377, "grad_norm": 0.41490522027015686, "learning_rate": 3.747457091766743e-06, "loss": 0.2732, "step": 38504 }, { "epoch": 1.767013904823092, "grad_norm": 0.43872538208961487, "learning_rate": 3.747219723334448e-06, "loss": 0.3114, "step": 38505 }, { "epoch": 1.7670597953283464, "grad_norm": 0.4772000014781952, "learning_rate": 3.7469823579146945e-06, "loss": 0.3621, "step": 38506 }, { "epoch": 1.767105685833601, "grad_norm": 0.5230588912963867, "learning_rate": 3.746744995508052e-06, "loss": 0.4489, "step": 38507 }, { "epoch": 1.7671515763388554, "grad_norm": 0.46467316150665283, "learning_rate": 3.7465076361150934e-06, "loss": 0.3333, "step": 38508 }, { "epoch": 1.76719746684411, "grad_norm": 0.4688573181629181, "learning_rate": 3.7462702797363883e-06, "loss": 0.313, "step": 38509 }, { "epoch": 1.7672433573493644, "grad_norm": 0.4833219349384308, "learning_rate": 3.746032926372507e-06, "loss": 0.3831, "step": 38510 }, { "epoch": 1.767289247854619, "grad_norm": 0.4736151099205017, "learning_rate": 3.7457955760240187e-06, "loss": 0.3825, "step": 38511 }, { "epoch": 1.7673351383598734, "grad_norm": 0.45076894760131836, "learning_rate": 3.745558228691498e-06, "loss": 0.3298, "step": 38512 }, { "epoch": 1.767381028865128, "grad_norm": 0.4703231751918793, "learning_rate": 3.745320884375515e-06, "loss": 0.3525, "step": 38513 }, { "epoch": 1.7674269193703824, "grad_norm": 0.4730297923088074, "learning_rate": 3.7450835430766375e-06, "loss": 0.3508, "step": 38514 }, { "epoch": 1.7674728098756367, "grad_norm": 0.4682832360267639, "learning_rate": 3.7448462047954393e-06, "loss": 0.3498, "step": 38515 }, { "epoch": 1.7675187003808912, "grad_norm": 0.45303937792778015, "learning_rate": 3.7446088695324894e-06, "loss": 0.3689, "step": 38516 }, { "epoch": 1.7675645908861457, "grad_norm": 0.4568033516407013, "learning_rate": 3.744371537288357e-06, "loss": 0.3396, "step": 38517 }, { "epoch": 1.7676104813914, "grad_norm": 0.47557491064071655, "learning_rate": 3.744134208063617e-06, "loss": 0.3846, "step": 38518 }, { "epoch": 1.7676563718966545, "grad_norm": 0.4927780330181122, "learning_rate": 3.7438968818588383e-06, "loss": 0.3953, "step": 38519 }, { "epoch": 1.767702262401909, "grad_norm": 0.45397743582725525, "learning_rate": 3.743659558674589e-06, "loss": 0.3453, "step": 38520 }, { "epoch": 1.7677481529071635, "grad_norm": 0.48580628633499146, "learning_rate": 3.7434222385114444e-06, "loss": 0.3895, "step": 38521 }, { "epoch": 1.767794043412418, "grad_norm": 0.5023244619369507, "learning_rate": 3.7431849213699724e-06, "loss": 0.3733, "step": 38522 }, { "epoch": 1.7678399339176725, "grad_norm": 0.4831523299217224, "learning_rate": 3.742947607250741e-06, "loss": 0.4029, "step": 38523 }, { "epoch": 1.767885824422927, "grad_norm": 0.5166612267494202, "learning_rate": 3.7427102961543267e-06, "loss": 0.413, "step": 38524 }, { "epoch": 1.7679317149281815, "grad_norm": 0.4437985420227051, "learning_rate": 3.742472988081297e-06, "loss": 0.309, "step": 38525 }, { "epoch": 1.767977605433436, "grad_norm": 0.44597673416137695, "learning_rate": 3.742235683032222e-06, "loss": 0.3313, "step": 38526 }, { "epoch": 1.7680234959386902, "grad_norm": 0.4579927623271942, "learning_rate": 3.7419983810076744e-06, "loss": 0.3483, "step": 38527 }, { "epoch": 1.7680693864439447, "grad_norm": 0.5150508284568787, "learning_rate": 3.741761082008223e-06, "loss": 0.3084, "step": 38528 }, { "epoch": 1.7681152769491992, "grad_norm": 0.4623219668865204, "learning_rate": 3.741523786034439e-06, "loss": 0.3807, "step": 38529 }, { "epoch": 1.7681611674544535, "grad_norm": 0.4210550785064697, "learning_rate": 3.741286493086893e-06, "loss": 0.28, "step": 38530 }, { "epoch": 1.768207057959708, "grad_norm": 0.4599069356918335, "learning_rate": 3.741049203166157e-06, "loss": 0.36, "step": 38531 }, { "epoch": 1.7682529484649625, "grad_norm": 0.4746827781200409, "learning_rate": 3.7408119162727974e-06, "loss": 0.3854, "step": 38532 }, { "epoch": 1.768298838970217, "grad_norm": 0.49421611428260803, "learning_rate": 3.74057463240739e-06, "loss": 0.349, "step": 38533 }, { "epoch": 1.7683447294754715, "grad_norm": 0.48385533690452576, "learning_rate": 3.7403373515705034e-06, "loss": 0.3301, "step": 38534 }, { "epoch": 1.768390619980726, "grad_norm": 0.5149655342102051, "learning_rate": 3.7401000737627067e-06, "loss": 0.4547, "step": 38535 }, { "epoch": 1.7684365104859805, "grad_norm": 0.5138331055641174, "learning_rate": 3.739862798984573e-06, "loss": 0.4158, "step": 38536 }, { "epoch": 1.768482400991235, "grad_norm": 0.4657612144947052, "learning_rate": 3.739625527236671e-06, "loss": 0.313, "step": 38537 }, { "epoch": 1.7685282914964895, "grad_norm": 0.48610982298851013, "learning_rate": 3.7393882585195697e-06, "loss": 0.3434, "step": 38538 }, { "epoch": 1.768574182001744, "grad_norm": 0.45144006609916687, "learning_rate": 3.7391509928338445e-06, "loss": 0.3643, "step": 38539 }, { "epoch": 1.7686200725069983, "grad_norm": 0.49678999185562134, "learning_rate": 3.7389137301800626e-06, "loss": 0.4023, "step": 38540 }, { "epoch": 1.7686659630122528, "grad_norm": 0.5130404233932495, "learning_rate": 3.738676470558795e-06, "loss": 0.4507, "step": 38541 }, { "epoch": 1.7687118535175073, "grad_norm": 0.4708603620529175, "learning_rate": 3.738439213970612e-06, "loss": 0.3293, "step": 38542 }, { "epoch": 1.7687577440227615, "grad_norm": 0.4638685882091522, "learning_rate": 3.738201960416086e-06, "loss": 0.312, "step": 38543 }, { "epoch": 1.768803634528016, "grad_norm": 0.47997885942459106, "learning_rate": 3.737964709895785e-06, "loss": 0.3685, "step": 38544 }, { "epoch": 1.7688495250332705, "grad_norm": 0.5013919472694397, "learning_rate": 3.7377274624102787e-06, "loss": 0.4057, "step": 38545 }, { "epoch": 1.768895415538525, "grad_norm": 0.5012370944023132, "learning_rate": 3.7374902179601413e-06, "loss": 0.3857, "step": 38546 }, { "epoch": 1.7689413060437795, "grad_norm": 0.4549535810947418, "learning_rate": 3.737252976545942e-06, "loss": 0.3459, "step": 38547 }, { "epoch": 1.768987196549034, "grad_norm": 0.4928508996963501, "learning_rate": 3.7370157381682496e-06, "loss": 0.3592, "step": 38548 }, { "epoch": 1.7690330870542885, "grad_norm": 0.5145472288131714, "learning_rate": 3.7367785028276373e-06, "loss": 0.3304, "step": 38549 }, { "epoch": 1.769078977559543, "grad_norm": 0.49751579761505127, "learning_rate": 3.7365412705246735e-06, "loss": 0.4007, "step": 38550 }, { "epoch": 1.7691248680647975, "grad_norm": 0.504457950592041, "learning_rate": 3.7363040412599284e-06, "loss": 0.3367, "step": 38551 }, { "epoch": 1.769170758570052, "grad_norm": 0.5629599690437317, "learning_rate": 3.7360668150339733e-06, "loss": 0.4776, "step": 38552 }, { "epoch": 1.7692166490753063, "grad_norm": 0.46189168095588684, "learning_rate": 3.7358295918473796e-06, "loss": 0.3324, "step": 38553 }, { "epoch": 1.7692625395805608, "grad_norm": 0.452391654253006, "learning_rate": 3.7355923717007146e-06, "loss": 0.3348, "step": 38554 }, { "epoch": 1.7693084300858153, "grad_norm": 0.528039276599884, "learning_rate": 3.7353551545945528e-06, "loss": 0.4218, "step": 38555 }, { "epoch": 1.7693543205910696, "grad_norm": 0.4705066680908203, "learning_rate": 3.7351179405294624e-06, "loss": 0.359, "step": 38556 }, { "epoch": 1.769400211096324, "grad_norm": 0.4617433547973633, "learning_rate": 3.7348807295060136e-06, "loss": 0.3767, "step": 38557 }, { "epoch": 1.7694461016015786, "grad_norm": 0.45232275128364563, "learning_rate": 3.734643521524778e-06, "loss": 0.2715, "step": 38558 }, { "epoch": 1.769491992106833, "grad_norm": 0.4697965681552887, "learning_rate": 3.7344063165863258e-06, "loss": 0.35, "step": 38559 }, { "epoch": 1.7695378826120876, "grad_norm": 0.4364967942237854, "learning_rate": 3.7341691146912244e-06, "loss": 0.2956, "step": 38560 }, { "epoch": 1.769583773117342, "grad_norm": 0.4612088203430176, "learning_rate": 3.7339319158400494e-06, "loss": 0.3802, "step": 38561 }, { "epoch": 1.7696296636225965, "grad_norm": 0.4626777470111847, "learning_rate": 3.7336947200333683e-06, "loss": 0.327, "step": 38562 }, { "epoch": 1.769675554127851, "grad_norm": 0.4789682924747467, "learning_rate": 3.733457527271751e-06, "loss": 0.3553, "step": 38563 }, { "epoch": 1.7697214446331055, "grad_norm": 0.5410568118095398, "learning_rate": 3.733220337555769e-06, "loss": 0.4301, "step": 38564 }, { "epoch": 1.7697673351383598, "grad_norm": 0.49810272455215454, "learning_rate": 3.732983150885993e-06, "loss": 0.4071, "step": 38565 }, { "epoch": 1.7698132256436143, "grad_norm": 0.4807017147541046, "learning_rate": 3.7327459672629896e-06, "loss": 0.3663, "step": 38566 }, { "epoch": 1.7698591161488688, "grad_norm": 0.5166071057319641, "learning_rate": 3.7325087866873348e-06, "loss": 0.3873, "step": 38567 }, { "epoch": 1.7699050066541233, "grad_norm": 0.44316667318344116, "learning_rate": 3.7322716091595967e-06, "loss": 0.312, "step": 38568 }, { "epoch": 1.7699508971593776, "grad_norm": 0.4966508746147156, "learning_rate": 3.732034434680344e-06, "loss": 0.3535, "step": 38569 }, { "epoch": 1.769996787664632, "grad_norm": 0.45816951990127563, "learning_rate": 3.731797263250149e-06, "loss": 0.373, "step": 38570 }, { "epoch": 1.7700426781698866, "grad_norm": 0.43862634897232056, "learning_rate": 3.7315600948695817e-06, "loss": 0.3178, "step": 38571 }, { "epoch": 1.770088568675141, "grad_norm": 0.4678722321987152, "learning_rate": 3.7313229295392095e-06, "loss": 0.3589, "step": 38572 }, { "epoch": 1.7701344591803956, "grad_norm": 0.4796691834926605, "learning_rate": 3.731085767259609e-06, "loss": 0.3839, "step": 38573 }, { "epoch": 1.77018034968565, "grad_norm": 0.5083514451980591, "learning_rate": 3.7308486080313454e-06, "loss": 0.3955, "step": 38574 }, { "epoch": 1.7702262401909046, "grad_norm": 0.4831717610359192, "learning_rate": 3.730611451854991e-06, "loss": 0.3205, "step": 38575 }, { "epoch": 1.770272130696159, "grad_norm": 0.5091385245323181, "learning_rate": 3.730374298731112e-06, "loss": 0.418, "step": 38576 }, { "epoch": 1.7703180212014136, "grad_norm": 0.47544386982917786, "learning_rate": 3.7301371486602856e-06, "loss": 0.3693, "step": 38577 }, { "epoch": 1.7703639117066678, "grad_norm": 0.49333903193473816, "learning_rate": 3.7299000016430775e-06, "loss": 0.3388, "step": 38578 }, { "epoch": 1.7704098022119223, "grad_norm": 0.456682950258255, "learning_rate": 3.7296628576800585e-06, "loss": 0.3318, "step": 38579 }, { "epoch": 1.7704556927171768, "grad_norm": 0.4832608997821808, "learning_rate": 3.7294257167718003e-06, "loss": 0.3696, "step": 38580 }, { "epoch": 1.7705015832224311, "grad_norm": 0.5153594017028809, "learning_rate": 3.729188578918872e-06, "loss": 0.4137, "step": 38581 }, { "epoch": 1.7705474737276856, "grad_norm": 0.48623257875442505, "learning_rate": 3.728951444121842e-06, "loss": 0.3985, "step": 38582 }, { "epoch": 1.7705933642329401, "grad_norm": 0.471584677696228, "learning_rate": 3.7287143123812853e-06, "loss": 0.3419, "step": 38583 }, { "epoch": 1.7706392547381946, "grad_norm": 0.48362842202186584, "learning_rate": 3.7284771836977695e-06, "loss": 0.363, "step": 38584 }, { "epoch": 1.770685145243449, "grad_norm": 0.46947935223579407, "learning_rate": 3.728240058071863e-06, "loss": 0.3579, "step": 38585 }, { "epoch": 1.7707310357487036, "grad_norm": 0.5046108365058899, "learning_rate": 3.728002935504139e-06, "loss": 0.3986, "step": 38586 }, { "epoch": 1.770776926253958, "grad_norm": 0.4774002134799957, "learning_rate": 3.727765815995167e-06, "loss": 0.3508, "step": 38587 }, { "epoch": 1.7708228167592126, "grad_norm": 0.45514386892318726, "learning_rate": 3.727528699545514e-06, "loss": 0.324, "step": 38588 }, { "epoch": 1.770868707264467, "grad_norm": 0.45323365926742554, "learning_rate": 3.727291586155756e-06, "loss": 0.3402, "step": 38589 }, { "epoch": 1.7709145977697216, "grad_norm": 0.4561525285243988, "learning_rate": 3.727054475826459e-06, "loss": 0.3366, "step": 38590 }, { "epoch": 1.7709604882749759, "grad_norm": 0.4826277494430542, "learning_rate": 3.7268173685581934e-06, "loss": 0.3291, "step": 38591 }, { "epoch": 1.7710063787802304, "grad_norm": 0.4709374010562897, "learning_rate": 3.7265802643515314e-06, "loss": 0.3579, "step": 38592 }, { "epoch": 1.7710522692854849, "grad_norm": 0.43209028244018555, "learning_rate": 3.7263431632070413e-06, "loss": 0.3096, "step": 38593 }, { "epoch": 1.7710981597907391, "grad_norm": 0.4100930392742157, "learning_rate": 3.7261060651252923e-06, "loss": 0.2351, "step": 38594 }, { "epoch": 1.7711440502959936, "grad_norm": 0.4591045379638672, "learning_rate": 3.7258689701068596e-06, "loss": 0.3585, "step": 38595 }, { "epoch": 1.7711899408012481, "grad_norm": 0.477417916059494, "learning_rate": 3.7256318781523086e-06, "loss": 0.3769, "step": 38596 }, { "epoch": 1.7712358313065026, "grad_norm": 0.5884917974472046, "learning_rate": 3.7253947892622088e-06, "loss": 0.4087, "step": 38597 }, { "epoch": 1.7712817218117571, "grad_norm": 0.45911407470703125, "learning_rate": 3.7251577034371346e-06, "loss": 0.3406, "step": 38598 }, { "epoch": 1.7713276123170116, "grad_norm": 0.39112111926078796, "learning_rate": 3.7249206206776534e-06, "loss": 0.253, "step": 38599 }, { "epoch": 1.7713735028222661, "grad_norm": 0.5277090072631836, "learning_rate": 3.724683540984334e-06, "loss": 0.4292, "step": 38600 }, { "epoch": 1.7714193933275206, "grad_norm": 0.46726396679878235, "learning_rate": 3.7244464643577506e-06, "loss": 0.3346, "step": 38601 }, { "epoch": 1.7714652838327751, "grad_norm": 0.48324766755104065, "learning_rate": 3.72420939079847e-06, "loss": 0.313, "step": 38602 }, { "epoch": 1.7715111743380296, "grad_norm": 0.4578716456890106, "learning_rate": 3.723972320307061e-06, "loss": 0.3533, "step": 38603 }, { "epoch": 1.771557064843284, "grad_norm": 0.48574161529541016, "learning_rate": 3.7237352528840983e-06, "loss": 0.3971, "step": 38604 }, { "epoch": 1.7716029553485384, "grad_norm": 0.44975003600120544, "learning_rate": 3.7234981885301492e-06, "loss": 0.3302, "step": 38605 }, { "epoch": 1.771648845853793, "grad_norm": 0.4717950224876404, "learning_rate": 3.723261127245784e-06, "loss": 0.3288, "step": 38606 }, { "epoch": 1.7716947363590472, "grad_norm": 0.4624135196208954, "learning_rate": 3.723024069031572e-06, "loss": 0.3412, "step": 38607 }, { "epoch": 1.7717406268643017, "grad_norm": 0.4835161566734314, "learning_rate": 3.7227870138880856e-06, "loss": 0.4036, "step": 38608 }, { "epoch": 1.7717865173695562, "grad_norm": 0.5001975893974304, "learning_rate": 3.722549961815893e-06, "loss": 0.3976, "step": 38609 }, { "epoch": 1.7718324078748107, "grad_norm": 0.4461921453475952, "learning_rate": 3.722312912815562e-06, "loss": 0.3312, "step": 38610 }, { "epoch": 1.7718782983800652, "grad_norm": 0.48167791962623596, "learning_rate": 3.722075866887668e-06, "loss": 0.3514, "step": 38611 }, { "epoch": 1.7719241888853197, "grad_norm": 0.4678392708301544, "learning_rate": 3.721838824032778e-06, "loss": 0.3124, "step": 38612 }, { "epoch": 1.7719700793905742, "grad_norm": 0.49929267168045044, "learning_rate": 3.7216017842514606e-06, "loss": 0.4381, "step": 38613 }, { "epoch": 1.7720159698958287, "grad_norm": 0.4935516119003296, "learning_rate": 3.721364747544289e-06, "loss": 0.3584, "step": 38614 }, { "epoch": 1.7720618604010832, "grad_norm": 0.4555259644985199, "learning_rate": 3.7211277139118317e-06, "loss": 0.3354, "step": 38615 }, { "epoch": 1.7721077509063374, "grad_norm": 0.47761666774749756, "learning_rate": 3.7208906833546565e-06, "loss": 0.3878, "step": 38616 }, { "epoch": 1.772153641411592, "grad_norm": 0.4073365032672882, "learning_rate": 3.7206536558733388e-06, "loss": 0.2379, "step": 38617 }, { "epoch": 1.7721995319168464, "grad_norm": 0.5379689931869507, "learning_rate": 3.7204166314684442e-06, "loss": 0.4588, "step": 38618 }, { "epoch": 1.7722454224221007, "grad_norm": 0.5229946970939636, "learning_rate": 3.720179610140541e-06, "loss": 0.4252, "step": 38619 }, { "epoch": 1.7722913129273552, "grad_norm": 0.5249634385108948, "learning_rate": 3.7199425918902043e-06, "loss": 0.4164, "step": 38620 }, { "epoch": 1.7723372034326097, "grad_norm": 0.4545176923274994, "learning_rate": 3.7197055767180023e-06, "loss": 0.3598, "step": 38621 }, { "epoch": 1.7723830939378642, "grad_norm": 0.45977604389190674, "learning_rate": 3.7194685646245026e-06, "loss": 0.3393, "step": 38622 }, { "epoch": 1.7724289844431187, "grad_norm": 0.46226322650909424, "learning_rate": 3.719231555610278e-06, "loss": 0.3447, "step": 38623 }, { "epoch": 1.7724748749483732, "grad_norm": 0.4780139625072479, "learning_rate": 3.718994549675897e-06, "loss": 0.3284, "step": 38624 }, { "epoch": 1.7725207654536277, "grad_norm": 0.48105674982070923, "learning_rate": 3.7187575468219284e-06, "loss": 0.3685, "step": 38625 }, { "epoch": 1.7725666559588822, "grad_norm": 0.4751576781272888, "learning_rate": 3.7185205470489455e-06, "loss": 0.3606, "step": 38626 }, { "epoch": 1.7726125464641367, "grad_norm": 0.45569533109664917, "learning_rate": 3.7182835503575164e-06, "loss": 0.367, "step": 38627 }, { "epoch": 1.7726584369693912, "grad_norm": 0.49231454730033875, "learning_rate": 3.7180465567482093e-06, "loss": 0.4382, "step": 38628 }, { "epoch": 1.7727043274746455, "grad_norm": 0.46188369393348694, "learning_rate": 3.717809566221597e-06, "loss": 0.3188, "step": 38629 }, { "epoch": 1.7727502179799, "grad_norm": 0.47314122319221497, "learning_rate": 3.717572578778248e-06, "loss": 0.3766, "step": 38630 }, { "epoch": 1.7727961084851545, "grad_norm": 0.46679896116256714, "learning_rate": 3.7173355944187296e-06, "loss": 0.3345, "step": 38631 }, { "epoch": 1.7728419989904087, "grad_norm": 0.4729922413825989, "learning_rate": 3.717098613143617e-06, "loss": 0.3463, "step": 38632 }, { "epoch": 1.7728878894956632, "grad_norm": 0.4638116657733917, "learning_rate": 3.716861634953477e-06, "loss": 0.3232, "step": 38633 }, { "epoch": 1.7729337800009177, "grad_norm": 0.5012961626052856, "learning_rate": 3.7166246598488785e-06, "loss": 0.4239, "step": 38634 }, { "epoch": 1.7729796705061722, "grad_norm": 0.47806546092033386, "learning_rate": 3.716387687830394e-06, "loss": 0.3657, "step": 38635 }, { "epoch": 1.7730255610114267, "grad_norm": 0.4350389540195465, "learning_rate": 3.7161507188985925e-06, "loss": 0.3039, "step": 38636 }, { "epoch": 1.7730714515166812, "grad_norm": 0.44631901383399963, "learning_rate": 3.715913753054041e-06, "loss": 0.3075, "step": 38637 }, { "epoch": 1.7731173420219357, "grad_norm": 0.478247731924057, "learning_rate": 3.7156767902973134e-06, "loss": 0.291, "step": 38638 }, { "epoch": 1.7731632325271902, "grad_norm": 0.4301947057247162, "learning_rate": 3.7154398306289786e-06, "loss": 0.2781, "step": 38639 }, { "epoch": 1.7732091230324447, "grad_norm": 0.4958086609840393, "learning_rate": 3.715202874049605e-06, "loss": 0.3881, "step": 38640 }, { "epoch": 1.7732550135376992, "grad_norm": 0.4955841302871704, "learning_rate": 3.714965920559761e-06, "loss": 0.4484, "step": 38641 }, { "epoch": 1.7733009040429535, "grad_norm": 0.48201343417167664, "learning_rate": 3.71472897016002e-06, "loss": 0.3414, "step": 38642 }, { "epoch": 1.773346794548208, "grad_norm": 0.43385186791419983, "learning_rate": 3.7144920228509507e-06, "loss": 0.2703, "step": 38643 }, { "epoch": 1.7733926850534625, "grad_norm": 0.44730186462402344, "learning_rate": 3.714255078633121e-06, "loss": 0.3163, "step": 38644 }, { "epoch": 1.7734385755587168, "grad_norm": 0.45939165353775024, "learning_rate": 3.714018137507103e-06, "loss": 0.3636, "step": 38645 }, { "epoch": 1.7734844660639713, "grad_norm": 0.4454156160354614, "learning_rate": 3.7137811994734656e-06, "loss": 0.3249, "step": 38646 }, { "epoch": 1.7735303565692258, "grad_norm": 0.4642527997493744, "learning_rate": 3.7135442645327767e-06, "loss": 0.3777, "step": 38647 }, { "epoch": 1.7735762470744802, "grad_norm": 0.5166807174682617, "learning_rate": 3.7133073326856096e-06, "loss": 0.3876, "step": 38648 }, { "epoch": 1.7736221375797347, "grad_norm": 0.5027990341186523, "learning_rate": 3.713070403932532e-06, "loss": 0.3554, "step": 38649 }, { "epoch": 1.7736680280849892, "grad_norm": 0.4712843596935272, "learning_rate": 3.7128334782741134e-06, "loss": 0.3671, "step": 38650 }, { "epoch": 1.7737139185902437, "grad_norm": 0.48860350251197815, "learning_rate": 3.7125965557109246e-06, "loss": 0.4057, "step": 38651 }, { "epoch": 1.7737598090954982, "grad_norm": 0.41781896352767944, "learning_rate": 3.7123596362435355e-06, "loss": 0.276, "step": 38652 }, { "epoch": 1.7738056996007527, "grad_norm": 0.45310860872268677, "learning_rate": 3.7121227198725125e-06, "loss": 0.363, "step": 38653 }, { "epoch": 1.773851590106007, "grad_norm": 0.45763465762138367, "learning_rate": 3.7118858065984296e-06, "loss": 0.3304, "step": 38654 }, { "epoch": 1.7738974806112615, "grad_norm": 0.4800237715244293, "learning_rate": 3.7116488964218555e-06, "loss": 0.3594, "step": 38655 }, { "epoch": 1.773943371116516, "grad_norm": 0.4406096041202545, "learning_rate": 3.7114119893433574e-06, "loss": 0.3069, "step": 38656 }, { "epoch": 1.7739892616217705, "grad_norm": 0.499500036239624, "learning_rate": 3.7111750853635087e-06, "loss": 0.4048, "step": 38657 }, { "epoch": 1.7740351521270248, "grad_norm": 0.47643256187438965, "learning_rate": 3.710938184482877e-06, "loss": 0.3282, "step": 38658 }, { "epoch": 1.7740810426322793, "grad_norm": 0.4817681312561035, "learning_rate": 3.71070128670203e-06, "loss": 0.3941, "step": 38659 }, { "epoch": 1.7741269331375338, "grad_norm": 0.44607314467430115, "learning_rate": 3.710464392021541e-06, "loss": 0.2939, "step": 38660 }, { "epoch": 1.7741728236427883, "grad_norm": 0.464729905128479, "learning_rate": 3.7102275004419795e-06, "loss": 0.3481, "step": 38661 }, { "epoch": 1.7742187141480428, "grad_norm": 0.44852015376091003, "learning_rate": 3.7099906119639108e-06, "loss": 0.3166, "step": 38662 }, { "epoch": 1.7742646046532973, "grad_norm": 0.45701828598976135, "learning_rate": 3.7097537265879096e-06, "loss": 0.371, "step": 38663 }, { "epoch": 1.7743104951585518, "grad_norm": 0.4807978868484497, "learning_rate": 3.7095168443145435e-06, "loss": 0.3828, "step": 38664 }, { "epoch": 1.7743563856638063, "grad_norm": 0.47495120763778687, "learning_rate": 3.7092799651443813e-06, "loss": 0.376, "step": 38665 }, { "epoch": 1.7744022761690608, "grad_norm": 0.44755518436431885, "learning_rate": 3.709043089077994e-06, "loss": 0.3507, "step": 38666 }, { "epoch": 1.774448166674315, "grad_norm": 0.43564754724502563, "learning_rate": 3.7088062161159504e-06, "loss": 0.268, "step": 38667 }, { "epoch": 1.7744940571795695, "grad_norm": 0.4905352294445038, "learning_rate": 3.708569346258819e-06, "loss": 0.4, "step": 38668 }, { "epoch": 1.774539947684824, "grad_norm": 0.4904656410217285, "learning_rate": 3.708332479507172e-06, "loss": 0.4405, "step": 38669 }, { "epoch": 1.7745858381900783, "grad_norm": 0.4862385392189026, "learning_rate": 3.708095615861578e-06, "loss": 0.3361, "step": 38670 }, { "epoch": 1.7746317286953328, "grad_norm": 0.45290136337280273, "learning_rate": 3.7078587553226057e-06, "loss": 0.3321, "step": 38671 }, { "epoch": 1.7746776192005873, "grad_norm": 0.45138660073280334, "learning_rate": 3.7076218978908262e-06, "loss": 0.3043, "step": 38672 }, { "epoch": 1.7747235097058418, "grad_norm": 0.4899376630783081, "learning_rate": 3.707385043566808e-06, "loss": 0.3472, "step": 38673 }, { "epoch": 1.7747694002110963, "grad_norm": 0.4827486276626587, "learning_rate": 3.707148192351121e-06, "loss": 0.3601, "step": 38674 }, { "epoch": 1.7748152907163508, "grad_norm": 0.4980613589286804, "learning_rate": 3.7069113442443323e-06, "loss": 0.3874, "step": 38675 }, { "epoch": 1.7748611812216053, "grad_norm": 0.4982292056083679, "learning_rate": 3.7066744992470155e-06, "loss": 0.416, "step": 38676 }, { "epoch": 1.7749070717268598, "grad_norm": 0.4814682900905609, "learning_rate": 3.7064376573597386e-06, "loss": 0.3767, "step": 38677 }, { "epoch": 1.7749529622321143, "grad_norm": 0.4303734004497528, "learning_rate": 3.70620081858307e-06, "loss": 0.2975, "step": 38678 }, { "epoch": 1.7749988527373688, "grad_norm": 0.48041343688964844, "learning_rate": 3.705963982917581e-06, "loss": 0.3469, "step": 38679 }, { "epoch": 1.775044743242623, "grad_norm": 0.46200627088546753, "learning_rate": 3.7057271503638404e-06, "loss": 0.3197, "step": 38680 }, { "epoch": 1.7750906337478776, "grad_norm": 0.44478148221969604, "learning_rate": 3.7054903209224146e-06, "loss": 0.3108, "step": 38681 }, { "epoch": 1.775136524253132, "grad_norm": 0.5038337707519531, "learning_rate": 3.705253494593879e-06, "loss": 0.4357, "step": 38682 }, { "epoch": 1.7751824147583863, "grad_norm": 0.5064499378204346, "learning_rate": 3.7050166713788017e-06, "loss": 0.4007, "step": 38683 }, { "epoch": 1.7752283052636408, "grad_norm": 0.4801899492740631, "learning_rate": 3.7047798512777463e-06, "loss": 0.3398, "step": 38684 }, { "epoch": 1.7752741957688953, "grad_norm": 0.4947572946548462, "learning_rate": 3.704543034291289e-06, "loss": 0.4237, "step": 38685 }, { "epoch": 1.7753200862741498, "grad_norm": 0.5049558281898499, "learning_rate": 3.7043062204199966e-06, "loss": 0.3945, "step": 38686 }, { "epoch": 1.7753659767794043, "grad_norm": 0.4876173138618469, "learning_rate": 3.704069409664438e-06, "loss": 0.4194, "step": 38687 }, { "epoch": 1.7754118672846588, "grad_norm": 0.4788006842136383, "learning_rate": 3.7038326020251847e-06, "loss": 0.3779, "step": 38688 }, { "epoch": 1.7754577577899133, "grad_norm": 0.4422045052051544, "learning_rate": 3.7035957975028046e-06, "loss": 0.3291, "step": 38689 }, { "epoch": 1.7755036482951678, "grad_norm": 0.4834446310997009, "learning_rate": 3.7033589960978657e-06, "loss": 0.3616, "step": 38690 }, { "epoch": 1.7755495388004223, "grad_norm": 0.4987007975578308, "learning_rate": 3.703122197810941e-06, "loss": 0.4333, "step": 38691 }, { "epoch": 1.7755954293056768, "grad_norm": 0.5029178261756897, "learning_rate": 3.7028854026425985e-06, "loss": 0.3655, "step": 38692 }, { "epoch": 1.775641319810931, "grad_norm": 0.48140189051628113, "learning_rate": 3.7026486105934057e-06, "loss": 0.3916, "step": 38693 }, { "epoch": 1.7756872103161856, "grad_norm": 0.45967769622802734, "learning_rate": 3.702411821663935e-06, "loss": 0.3664, "step": 38694 }, { "epoch": 1.77573310082144, "grad_norm": 0.4670740067958832, "learning_rate": 3.7021750358547537e-06, "loss": 0.3758, "step": 38695 }, { "epoch": 1.7757789913266944, "grad_norm": 0.4445522129535675, "learning_rate": 3.7019382531664304e-06, "loss": 0.313, "step": 38696 }, { "epoch": 1.7758248818319489, "grad_norm": 0.4579584300518036, "learning_rate": 3.7017014735995378e-06, "loss": 0.3576, "step": 38697 }, { "epoch": 1.7758707723372034, "grad_norm": 0.4726782739162445, "learning_rate": 3.701464697154643e-06, "loss": 0.3711, "step": 38698 }, { "epoch": 1.7759166628424579, "grad_norm": 0.44019168615341187, "learning_rate": 3.7012279238323157e-06, "loss": 0.3156, "step": 38699 }, { "epoch": 1.7759625533477124, "grad_norm": 0.49122607707977295, "learning_rate": 3.7009911536331256e-06, "loss": 0.408, "step": 38700 }, { "epoch": 1.7760084438529669, "grad_norm": 0.48665308952331543, "learning_rate": 3.700754386557642e-06, "loss": 0.3942, "step": 38701 }, { "epoch": 1.7760543343582214, "grad_norm": 0.48522692918777466, "learning_rate": 3.700517622606432e-06, "loss": 0.4371, "step": 38702 }, { "epoch": 1.7761002248634759, "grad_norm": 0.4556894302368164, "learning_rate": 3.700280861780069e-06, "loss": 0.3491, "step": 38703 }, { "epoch": 1.7761461153687303, "grad_norm": 0.4846763014793396, "learning_rate": 3.70004410407912e-06, "loss": 0.3573, "step": 38704 }, { "epoch": 1.7761920058739846, "grad_norm": 0.4335854649543762, "learning_rate": 3.6998073495041566e-06, "loss": 0.285, "step": 38705 }, { "epoch": 1.7762378963792391, "grad_norm": 0.4490160644054413, "learning_rate": 3.6995705980557415e-06, "loss": 0.3462, "step": 38706 }, { "epoch": 1.7762837868844936, "grad_norm": 0.4872885048389435, "learning_rate": 3.6993338497344523e-06, "loss": 0.4052, "step": 38707 }, { "epoch": 1.776329677389748, "grad_norm": 0.484502375125885, "learning_rate": 3.699097104540854e-06, "loss": 0.3651, "step": 38708 }, { "epoch": 1.7763755678950024, "grad_norm": 0.4643760919570923, "learning_rate": 3.698860362475516e-06, "loss": 0.3484, "step": 38709 }, { "epoch": 1.776421458400257, "grad_norm": 0.4756908118724823, "learning_rate": 3.6986236235390084e-06, "loss": 0.3923, "step": 38710 }, { "epoch": 1.7764673489055114, "grad_norm": 0.45895692706108093, "learning_rate": 3.698386887731901e-06, "loss": 0.3717, "step": 38711 }, { "epoch": 1.7765132394107659, "grad_norm": 0.4976475238800049, "learning_rate": 3.69815015505476e-06, "loss": 0.4033, "step": 38712 }, { "epoch": 1.7765591299160204, "grad_norm": 0.5010587573051453, "learning_rate": 3.6979134255081593e-06, "loss": 0.3719, "step": 38713 }, { "epoch": 1.7766050204212749, "grad_norm": 0.4220401346683502, "learning_rate": 3.6976766990926655e-06, "loss": 0.2854, "step": 38714 }, { "epoch": 1.7766509109265294, "grad_norm": 0.4740554988384247, "learning_rate": 3.6974399758088477e-06, "loss": 0.2831, "step": 38715 }, { "epoch": 1.7766968014317839, "grad_norm": 0.5042394995689392, "learning_rate": 3.6972032556572755e-06, "loss": 0.4602, "step": 38716 }, { "epoch": 1.7767426919370384, "grad_norm": 0.5015743970870972, "learning_rate": 3.6969665386385196e-06, "loss": 0.4087, "step": 38717 }, { "epoch": 1.7767885824422927, "grad_norm": 0.6407734751701355, "learning_rate": 3.696729824753145e-06, "loss": 0.3711, "step": 38718 }, { "epoch": 1.7768344729475471, "grad_norm": 0.43201109766960144, "learning_rate": 3.696493114001727e-06, "loss": 0.3092, "step": 38719 }, { "epoch": 1.7768803634528016, "grad_norm": 0.5038405656814575, "learning_rate": 3.6962564063848305e-06, "loss": 0.3617, "step": 38720 }, { "epoch": 1.776926253958056, "grad_norm": 0.5605885982513428, "learning_rate": 3.6960197019030253e-06, "loss": 0.4122, "step": 38721 }, { "epoch": 1.7769721444633104, "grad_norm": 0.4641634225845337, "learning_rate": 3.695783000556882e-06, "loss": 0.324, "step": 38722 }, { "epoch": 1.777018034968565, "grad_norm": 0.5137197971343994, "learning_rate": 3.6955463023469697e-06, "loss": 0.4215, "step": 38723 }, { "epoch": 1.7770639254738194, "grad_norm": 0.45366814732551575, "learning_rate": 3.6953096072738536e-06, "loss": 0.3015, "step": 38724 }, { "epoch": 1.777109815979074, "grad_norm": 0.4421251118183136, "learning_rate": 3.6950729153381094e-06, "loss": 0.3037, "step": 38725 }, { "epoch": 1.7771557064843284, "grad_norm": 0.48795151710510254, "learning_rate": 3.694836226540302e-06, "loss": 0.3866, "step": 38726 }, { "epoch": 1.777201596989583, "grad_norm": 0.47505733370780945, "learning_rate": 3.694599540881001e-06, "loss": 0.3639, "step": 38727 }, { "epoch": 1.7772474874948374, "grad_norm": 0.46536684036254883, "learning_rate": 3.694362858360777e-06, "loss": 0.3741, "step": 38728 }, { "epoch": 1.777293378000092, "grad_norm": 0.47876566648483276, "learning_rate": 3.6941261789801986e-06, "loss": 0.3818, "step": 38729 }, { "epoch": 1.7773392685053464, "grad_norm": 0.4812789559364319, "learning_rate": 3.693889502739833e-06, "loss": 0.3834, "step": 38730 }, { "epoch": 1.7773851590106007, "grad_norm": 0.4632786214351654, "learning_rate": 3.6936528296402525e-06, "loss": 0.3392, "step": 38731 }, { "epoch": 1.7774310495158552, "grad_norm": 0.5048632621765137, "learning_rate": 3.693416159682024e-06, "loss": 0.4023, "step": 38732 }, { "epoch": 1.7774769400211097, "grad_norm": 0.5229439735412598, "learning_rate": 3.693179492865716e-06, "loss": 0.4856, "step": 38733 }, { "epoch": 1.777522830526364, "grad_norm": 0.459413081407547, "learning_rate": 3.6929428291919e-06, "loss": 0.3966, "step": 38734 }, { "epoch": 1.7775687210316184, "grad_norm": 0.4646683633327484, "learning_rate": 3.692706168661145e-06, "loss": 0.3507, "step": 38735 }, { "epoch": 1.777614611536873, "grad_norm": 0.4666522145271301, "learning_rate": 3.692469511274017e-06, "loss": 0.3485, "step": 38736 }, { "epoch": 1.7776605020421274, "grad_norm": 0.4557129144668579, "learning_rate": 3.6922328570310896e-06, "loss": 0.3367, "step": 38737 }, { "epoch": 1.777706392547382, "grad_norm": 0.4737633168697357, "learning_rate": 3.6919962059329285e-06, "loss": 0.3506, "step": 38738 }, { "epoch": 1.7777522830526364, "grad_norm": 0.44450145959854126, "learning_rate": 3.6917595579801036e-06, "loss": 0.2819, "step": 38739 }, { "epoch": 1.777798173557891, "grad_norm": 0.47432535886764526, "learning_rate": 3.6915229131731824e-06, "loss": 0.3699, "step": 38740 }, { "epoch": 1.7778440640631454, "grad_norm": 0.509697437286377, "learning_rate": 3.6912862715127374e-06, "loss": 0.4015, "step": 38741 }, { "epoch": 1.7778899545684, "grad_norm": 0.5004681348800659, "learning_rate": 3.6910496329993358e-06, "loss": 0.3721, "step": 38742 }, { "epoch": 1.7779358450736542, "grad_norm": 0.45762088894844055, "learning_rate": 3.6908129976335454e-06, "loss": 0.3351, "step": 38743 }, { "epoch": 1.7779817355789087, "grad_norm": 0.4737938940525055, "learning_rate": 3.690576365415938e-06, "loss": 0.3768, "step": 38744 }, { "epoch": 1.7780276260841632, "grad_norm": 0.5007230043411255, "learning_rate": 3.6903397363470817e-06, "loss": 0.4158, "step": 38745 }, { "epoch": 1.7780735165894177, "grad_norm": 0.4866727292537689, "learning_rate": 3.690103110427542e-06, "loss": 0.3705, "step": 38746 }, { "epoch": 1.778119407094672, "grad_norm": 0.5128588080406189, "learning_rate": 3.689866487657894e-06, "loss": 0.4548, "step": 38747 }, { "epoch": 1.7781652975999265, "grad_norm": 0.467977911233902, "learning_rate": 3.6896298680387023e-06, "loss": 0.3722, "step": 38748 }, { "epoch": 1.778211188105181, "grad_norm": 0.48930954933166504, "learning_rate": 3.6893932515705372e-06, "loss": 0.403, "step": 38749 }, { "epoch": 1.7782570786104355, "grad_norm": 0.44622647762298584, "learning_rate": 3.689156638253968e-06, "loss": 0.321, "step": 38750 }, { "epoch": 1.77830296911569, "grad_norm": 0.4731053411960602, "learning_rate": 3.688920028089564e-06, "loss": 0.3479, "step": 38751 }, { "epoch": 1.7783488596209445, "grad_norm": 0.4504857063293457, "learning_rate": 3.688683421077892e-06, "loss": 0.344, "step": 38752 }, { "epoch": 1.778394750126199, "grad_norm": 0.4367230236530304, "learning_rate": 3.6884468172195243e-06, "loss": 0.2951, "step": 38753 }, { "epoch": 1.7784406406314535, "grad_norm": 0.46515780687332153, "learning_rate": 3.6882102165150273e-06, "loss": 0.339, "step": 38754 }, { "epoch": 1.778486531136708, "grad_norm": 0.5011507272720337, "learning_rate": 3.687973618964969e-06, "loss": 0.3815, "step": 38755 }, { "epoch": 1.7785324216419622, "grad_norm": 0.4553022086620331, "learning_rate": 3.687737024569922e-06, "loss": 0.3238, "step": 38756 }, { "epoch": 1.7785783121472167, "grad_norm": 0.49520811438560486, "learning_rate": 3.687500433330453e-06, "loss": 0.4362, "step": 38757 }, { "epoch": 1.7786242026524712, "grad_norm": 0.48133385181427, "learning_rate": 3.687263845247131e-06, "loss": 0.3839, "step": 38758 }, { "epoch": 1.7786700931577255, "grad_norm": 0.5521310567855835, "learning_rate": 3.687027260320526e-06, "loss": 0.4693, "step": 38759 }, { "epoch": 1.77871598366298, "grad_norm": 0.45161595940589905, "learning_rate": 3.6867906785512053e-06, "loss": 0.3386, "step": 38760 }, { "epoch": 1.7787618741682345, "grad_norm": 0.4778243601322174, "learning_rate": 3.686554099939737e-06, "loss": 0.3418, "step": 38761 }, { "epoch": 1.778807764673489, "grad_norm": 0.4270366132259369, "learning_rate": 3.6863175244866935e-06, "loss": 0.2846, "step": 38762 }, { "epoch": 1.7788536551787435, "grad_norm": 0.451988160610199, "learning_rate": 3.686080952192642e-06, "loss": 0.3292, "step": 38763 }, { "epoch": 1.778899545683998, "grad_norm": 0.46123743057250977, "learning_rate": 3.68584438305815e-06, "loss": 0.3764, "step": 38764 }, { "epoch": 1.7789454361892525, "grad_norm": 0.47436368465423584, "learning_rate": 3.6856078170837883e-06, "loss": 0.3247, "step": 38765 }, { "epoch": 1.778991326694507, "grad_norm": 0.46316099166870117, "learning_rate": 3.6853712542701248e-06, "loss": 0.2978, "step": 38766 }, { "epoch": 1.7790372171997615, "grad_norm": 0.4840412139892578, "learning_rate": 3.6851346946177267e-06, "loss": 0.3492, "step": 38767 }, { "epoch": 1.779083107705016, "grad_norm": 0.5125594139099121, "learning_rate": 3.6848981381271675e-06, "loss": 0.4203, "step": 38768 }, { "epoch": 1.7791289982102703, "grad_norm": 0.4330027997493744, "learning_rate": 3.6846615847990117e-06, "loss": 0.317, "step": 38769 }, { "epoch": 1.7791748887155248, "grad_norm": 0.4930709898471832, "learning_rate": 3.684425034633831e-06, "loss": 0.4334, "step": 38770 }, { "epoch": 1.7792207792207793, "grad_norm": 0.44742172956466675, "learning_rate": 3.6841884876321916e-06, "loss": 0.3254, "step": 38771 }, { "epoch": 1.7792666697260335, "grad_norm": 0.4393977224826813, "learning_rate": 3.6839519437946644e-06, "loss": 0.3074, "step": 38772 }, { "epoch": 1.779312560231288, "grad_norm": 0.4759564697742462, "learning_rate": 3.683715403121817e-06, "loss": 0.3845, "step": 38773 }, { "epoch": 1.7793584507365425, "grad_norm": 0.5070866346359253, "learning_rate": 3.6834788656142186e-06, "loss": 0.4476, "step": 38774 }, { "epoch": 1.779404341241797, "grad_norm": 0.5410648584365845, "learning_rate": 3.683242331272438e-06, "loss": 0.4626, "step": 38775 }, { "epoch": 1.7794502317470515, "grad_norm": 0.4763728380203247, "learning_rate": 3.6830058000970446e-06, "loss": 0.3513, "step": 38776 }, { "epoch": 1.779496122252306, "grad_norm": 0.47258642315864563, "learning_rate": 3.6827692720886044e-06, "loss": 0.3699, "step": 38777 }, { "epoch": 1.7795420127575605, "grad_norm": 0.43259844183921814, "learning_rate": 3.68253274724769e-06, "loss": 0.2787, "step": 38778 }, { "epoch": 1.779587903262815, "grad_norm": 0.4752890169620514, "learning_rate": 3.6822962255748695e-06, "loss": 0.3359, "step": 38779 }, { "epoch": 1.7796337937680695, "grad_norm": 0.4541025459766388, "learning_rate": 3.6820597070707085e-06, "loss": 0.3366, "step": 38780 }, { "epoch": 1.779679684273324, "grad_norm": 0.4203289747238159, "learning_rate": 3.68182319173578e-06, "loss": 0.311, "step": 38781 }, { "epoch": 1.7797255747785783, "grad_norm": 0.4727313816547394, "learning_rate": 3.68158667957065e-06, "loss": 0.3277, "step": 38782 }, { "epoch": 1.7797714652838328, "grad_norm": 0.44337737560272217, "learning_rate": 3.6813501705758866e-06, "loss": 0.3415, "step": 38783 }, { "epoch": 1.7798173557890873, "grad_norm": 0.4635860025882721, "learning_rate": 3.6811136647520613e-06, "loss": 0.3336, "step": 38784 }, { "epoch": 1.7798632462943416, "grad_norm": 0.4659208059310913, "learning_rate": 3.680877162099742e-06, "loss": 0.3785, "step": 38785 }, { "epoch": 1.779909136799596, "grad_norm": 0.49302011728286743, "learning_rate": 3.6806406626194948e-06, "loss": 0.4302, "step": 38786 }, { "epoch": 1.7799550273048506, "grad_norm": 0.4949454367160797, "learning_rate": 3.6804041663118915e-06, "loss": 0.3986, "step": 38787 }, { "epoch": 1.780000917810105, "grad_norm": 0.529241681098938, "learning_rate": 3.6801676731775003e-06, "loss": 0.4568, "step": 38788 }, { "epoch": 1.7800468083153596, "grad_norm": 0.4531041085720062, "learning_rate": 3.6799311832168873e-06, "loss": 0.3785, "step": 38789 }, { "epoch": 1.780092698820614, "grad_norm": 0.47543302178382874, "learning_rate": 3.679694696430625e-06, "loss": 0.3984, "step": 38790 }, { "epoch": 1.7801385893258685, "grad_norm": 0.5123245120048523, "learning_rate": 3.679458212819281e-06, "loss": 0.4844, "step": 38791 }, { "epoch": 1.780184479831123, "grad_norm": 0.41209524869918823, "learning_rate": 3.6792217323834212e-06, "loss": 0.2648, "step": 38792 }, { "epoch": 1.7802303703363775, "grad_norm": 0.48457223176956177, "learning_rate": 3.678985255123617e-06, "loss": 0.3697, "step": 38793 }, { "epoch": 1.7802762608416318, "grad_norm": 0.44528916478157043, "learning_rate": 3.6787487810404376e-06, "loss": 0.3151, "step": 38794 }, { "epoch": 1.7803221513468863, "grad_norm": 0.4946439266204834, "learning_rate": 3.678512310134448e-06, "loss": 0.3392, "step": 38795 }, { "epoch": 1.7803680418521408, "grad_norm": 0.4705406427383423, "learning_rate": 3.6782758424062216e-06, "loss": 0.3356, "step": 38796 }, { "epoch": 1.780413932357395, "grad_norm": 0.515501081943512, "learning_rate": 3.678039377856324e-06, "loss": 0.3195, "step": 38797 }, { "epoch": 1.7804598228626496, "grad_norm": 0.490554541349411, "learning_rate": 3.6778029164853225e-06, "loss": 0.3691, "step": 38798 }, { "epoch": 1.780505713367904, "grad_norm": 0.45429137349128723, "learning_rate": 3.67756645829379e-06, "loss": 0.2844, "step": 38799 }, { "epoch": 1.7805516038731586, "grad_norm": 0.49002280831336975, "learning_rate": 3.6773300032822924e-06, "loss": 0.4043, "step": 38800 }, { "epoch": 1.780597494378413, "grad_norm": 0.4559251368045807, "learning_rate": 3.6770935514513973e-06, "loss": 0.3291, "step": 38801 }, { "epoch": 1.7806433848836676, "grad_norm": 0.48697417974472046, "learning_rate": 3.6768571028016764e-06, "loss": 0.3625, "step": 38802 }, { "epoch": 1.780689275388922, "grad_norm": 0.4785788655281067, "learning_rate": 3.676620657333696e-06, "loss": 0.3294, "step": 38803 }, { "epoch": 1.7807351658941766, "grad_norm": 0.5466011762619019, "learning_rate": 3.6763842150480256e-06, "loss": 0.4518, "step": 38804 }, { "epoch": 1.780781056399431, "grad_norm": 0.48355022072792053, "learning_rate": 3.676147775945231e-06, "loss": 0.316, "step": 38805 }, { "epoch": 1.7808269469046856, "grad_norm": 0.49268755316734314, "learning_rate": 3.675911340025886e-06, "loss": 0.4082, "step": 38806 }, { "epoch": 1.7808728374099398, "grad_norm": 0.49337977170944214, "learning_rate": 3.6756749072905564e-06, "loss": 0.3769, "step": 38807 }, { "epoch": 1.7809187279151943, "grad_norm": 0.4707299470901489, "learning_rate": 3.675438477739809e-06, "loss": 0.3564, "step": 38808 }, { "epoch": 1.7809646184204488, "grad_norm": 0.4616657495498657, "learning_rate": 3.675202051374215e-06, "loss": 0.3357, "step": 38809 }, { "epoch": 1.7810105089257031, "grad_norm": 0.4482233226299286, "learning_rate": 3.6749656281943426e-06, "loss": 0.2876, "step": 38810 }, { "epoch": 1.7810563994309576, "grad_norm": 0.46304577589035034, "learning_rate": 3.6747292082007568e-06, "loss": 0.35, "step": 38811 }, { "epoch": 1.7811022899362121, "grad_norm": 0.49315279722213745, "learning_rate": 3.6744927913940308e-06, "loss": 0.3942, "step": 38812 }, { "epoch": 1.7811481804414666, "grad_norm": 0.4711878299713135, "learning_rate": 3.674256377774732e-06, "loss": 0.3207, "step": 38813 }, { "epoch": 1.781194070946721, "grad_norm": 0.40258875489234924, "learning_rate": 3.6740199673434274e-06, "loss": 0.2689, "step": 38814 }, { "epoch": 1.7812399614519756, "grad_norm": 0.4317987263202667, "learning_rate": 3.6737835601006868e-06, "loss": 0.3013, "step": 38815 }, { "epoch": 1.78128585195723, "grad_norm": 0.4454007148742676, "learning_rate": 3.673547156047078e-06, "loss": 0.3055, "step": 38816 }, { "epoch": 1.7813317424624846, "grad_norm": 0.4814406931400299, "learning_rate": 3.673310755183168e-06, "loss": 0.3774, "step": 38817 }, { "epoch": 1.781377632967739, "grad_norm": 0.4965106248855591, "learning_rate": 3.6730743575095306e-06, "loss": 0.4161, "step": 38818 }, { "epoch": 1.7814235234729936, "grad_norm": 0.4762606918811798, "learning_rate": 3.6728379630267285e-06, "loss": 0.3598, "step": 38819 }, { "epoch": 1.7814694139782479, "grad_norm": 0.5015295147895813, "learning_rate": 3.67260157173533e-06, "loss": 0.3975, "step": 38820 }, { "epoch": 1.7815153044835024, "grad_norm": 0.4930337965488434, "learning_rate": 3.6723651836359093e-06, "loss": 0.3986, "step": 38821 }, { "epoch": 1.7815611949887569, "grad_norm": 0.5053561925888062, "learning_rate": 3.67212879872903e-06, "loss": 0.3981, "step": 38822 }, { "epoch": 1.7816070854940111, "grad_norm": 0.47690486907958984, "learning_rate": 3.6718924170152613e-06, "loss": 0.3431, "step": 38823 }, { "epoch": 1.7816529759992656, "grad_norm": 0.44550928473472595, "learning_rate": 3.6716560384951733e-06, "loss": 0.3472, "step": 38824 }, { "epoch": 1.7816988665045201, "grad_norm": 0.4321156442165375, "learning_rate": 3.6714196631693333e-06, "loss": 0.3021, "step": 38825 }, { "epoch": 1.7817447570097746, "grad_norm": 0.49916934967041016, "learning_rate": 3.6711832910383072e-06, "loss": 0.3839, "step": 38826 }, { "epoch": 1.7817906475150291, "grad_norm": 0.5040802359580994, "learning_rate": 3.6709469221026685e-06, "loss": 0.4304, "step": 38827 }, { "epoch": 1.7818365380202836, "grad_norm": 0.4874526858329773, "learning_rate": 3.670710556362983e-06, "loss": 0.3768, "step": 38828 }, { "epoch": 1.7818824285255381, "grad_norm": 0.49648502469062805, "learning_rate": 3.6704741938198175e-06, "loss": 0.3865, "step": 38829 }, { "epoch": 1.7819283190307926, "grad_norm": 0.488092839717865, "learning_rate": 3.6702378344737433e-06, "loss": 0.3679, "step": 38830 }, { "epoch": 1.7819742095360471, "grad_norm": 0.43595221638679504, "learning_rate": 3.6700014783253272e-06, "loss": 0.2814, "step": 38831 }, { "epoch": 1.7820201000413014, "grad_norm": 0.5059211254119873, "learning_rate": 3.669765125375136e-06, "loss": 0.4126, "step": 38832 }, { "epoch": 1.782065990546556, "grad_norm": 0.4665108621120453, "learning_rate": 3.669528775623742e-06, "loss": 0.3476, "step": 38833 }, { "epoch": 1.7821118810518104, "grad_norm": 0.48190823197364807, "learning_rate": 3.6692924290717112e-06, "loss": 0.3727, "step": 38834 }, { "epoch": 1.782157771557065, "grad_norm": 0.4643876552581787, "learning_rate": 3.669056085719612e-06, "loss": 0.3117, "step": 38835 }, { "epoch": 1.7822036620623192, "grad_norm": 0.4390116035938263, "learning_rate": 3.6688197455680117e-06, "loss": 0.2814, "step": 38836 }, { "epoch": 1.7822495525675737, "grad_norm": 0.43688899278640747, "learning_rate": 3.668583408617481e-06, "loss": 0.3113, "step": 38837 }, { "epoch": 1.7822954430728282, "grad_norm": 0.4819587469100952, "learning_rate": 3.6683470748685874e-06, "loss": 0.3429, "step": 38838 }, { "epoch": 1.7823413335780827, "grad_norm": 0.4955873191356659, "learning_rate": 3.6681107443218968e-06, "loss": 0.4393, "step": 38839 }, { "epoch": 1.7823872240833372, "grad_norm": 0.4806351065635681, "learning_rate": 3.6678744169779817e-06, "loss": 0.369, "step": 38840 }, { "epoch": 1.7824331145885917, "grad_norm": 0.46430543065071106, "learning_rate": 3.6676380928374076e-06, "loss": 0.3599, "step": 38841 }, { "epoch": 1.7824790050938462, "grad_norm": 0.48802515864372253, "learning_rate": 3.6674017719007416e-06, "loss": 0.3782, "step": 38842 }, { "epoch": 1.7825248955991007, "grad_norm": 0.4700908064842224, "learning_rate": 3.6671654541685554e-06, "loss": 0.3611, "step": 38843 }, { "epoch": 1.7825707861043552, "grad_norm": 0.46627581119537354, "learning_rate": 3.666929139641416e-06, "loss": 0.3305, "step": 38844 }, { "epoch": 1.7826166766096094, "grad_norm": 0.48393192887306213, "learning_rate": 3.66669282831989e-06, "loss": 0.3422, "step": 38845 }, { "epoch": 1.782662567114864, "grad_norm": 0.47725164890289307, "learning_rate": 3.6664565202045477e-06, "loss": 0.4048, "step": 38846 }, { "epoch": 1.7827084576201184, "grad_norm": 0.4746381640434265, "learning_rate": 3.666220215295957e-06, "loss": 0.3328, "step": 38847 }, { "epoch": 1.7827543481253727, "grad_norm": 0.4609190821647644, "learning_rate": 3.665983913594684e-06, "loss": 0.3586, "step": 38848 }, { "epoch": 1.7828002386306272, "grad_norm": 0.467178076505661, "learning_rate": 3.6657476151013004e-06, "loss": 0.3801, "step": 38849 }, { "epoch": 1.7828461291358817, "grad_norm": 0.468646377325058, "learning_rate": 3.665511319816373e-06, "loss": 0.3179, "step": 38850 }, { "epoch": 1.7828920196411362, "grad_norm": 0.46147310733795166, "learning_rate": 3.665275027740468e-06, "loss": 0.3302, "step": 38851 }, { "epoch": 1.7829379101463907, "grad_norm": 0.46726518869400024, "learning_rate": 3.6650387388741564e-06, "loss": 0.3947, "step": 38852 }, { "epoch": 1.7829838006516452, "grad_norm": 0.4900877773761749, "learning_rate": 3.6648024532180055e-06, "loss": 0.3399, "step": 38853 }, { "epoch": 1.7830296911568997, "grad_norm": 0.5002977848052979, "learning_rate": 3.6645661707725812e-06, "loss": 0.4062, "step": 38854 }, { "epoch": 1.7830755816621542, "grad_norm": 0.4390370547771454, "learning_rate": 3.6643298915384562e-06, "loss": 0.3224, "step": 38855 }, { "epoch": 1.7831214721674087, "grad_norm": 0.4955928325653076, "learning_rate": 3.6640936155161955e-06, "loss": 0.3495, "step": 38856 }, { "epoch": 1.7831673626726632, "grad_norm": 0.5008420944213867, "learning_rate": 3.6638573427063677e-06, "loss": 0.389, "step": 38857 }, { "epoch": 1.7832132531779175, "grad_norm": 0.4896942675113678, "learning_rate": 3.663621073109542e-06, "loss": 0.3412, "step": 38858 }, { "epoch": 1.783259143683172, "grad_norm": 0.470429927110672, "learning_rate": 3.663384806726286e-06, "loss": 0.3435, "step": 38859 }, { "epoch": 1.7833050341884265, "grad_norm": 0.45611026883125305, "learning_rate": 3.6631485435571657e-06, "loss": 0.3995, "step": 38860 }, { "epoch": 1.7833509246936807, "grad_norm": 0.4429650902748108, "learning_rate": 3.662912283602753e-06, "loss": 0.3205, "step": 38861 }, { "epoch": 1.7833968151989352, "grad_norm": 0.4269215166568756, "learning_rate": 3.662676026863616e-06, "loss": 0.2734, "step": 38862 }, { "epoch": 1.7834427057041897, "grad_norm": 0.3998905420303345, "learning_rate": 3.662439773340317e-06, "loss": 0.2469, "step": 38863 }, { "epoch": 1.7834885962094442, "grad_norm": 0.45013442635536194, "learning_rate": 3.6622035230334307e-06, "loss": 0.3342, "step": 38864 }, { "epoch": 1.7835344867146987, "grad_norm": 0.45822131633758545, "learning_rate": 3.6619672759435234e-06, "loss": 0.3271, "step": 38865 }, { "epoch": 1.7835803772199532, "grad_norm": 0.4669106602668762, "learning_rate": 3.66173103207116e-06, "loss": 0.2837, "step": 38866 }, { "epoch": 1.7836262677252077, "grad_norm": 0.5053828954696655, "learning_rate": 3.661494791416913e-06, "loss": 0.3949, "step": 38867 }, { "epoch": 1.7836721582304622, "grad_norm": 0.4411364197731018, "learning_rate": 3.661258553981348e-06, "loss": 0.306, "step": 38868 }, { "epoch": 1.7837180487357167, "grad_norm": 0.4495849013328552, "learning_rate": 3.661022319765035e-06, "loss": 0.3022, "step": 38869 }, { "epoch": 1.7837639392409712, "grad_norm": 0.4716343879699707, "learning_rate": 3.6607860887685377e-06, "loss": 0.3653, "step": 38870 }, { "epoch": 1.7838098297462255, "grad_norm": 0.4861082434654236, "learning_rate": 3.6605498609924295e-06, "loss": 0.3693, "step": 38871 }, { "epoch": 1.78385572025148, "grad_norm": 0.5092144012451172, "learning_rate": 3.660313636437276e-06, "loss": 0.3975, "step": 38872 }, { "epoch": 1.7839016107567345, "grad_norm": 0.4512619376182556, "learning_rate": 3.6600774151036445e-06, "loss": 0.3345, "step": 38873 }, { "epoch": 1.7839475012619888, "grad_norm": 0.4386513829231262, "learning_rate": 3.6598411969921043e-06, "loss": 0.2944, "step": 38874 }, { "epoch": 1.7839933917672433, "grad_norm": 0.5139645338058472, "learning_rate": 3.6596049821032242e-06, "loss": 0.3791, "step": 38875 }, { "epoch": 1.7840392822724978, "grad_norm": 0.4300140142440796, "learning_rate": 3.659368770437568e-06, "loss": 0.2792, "step": 38876 }, { "epoch": 1.7840851727777522, "grad_norm": 0.48955217003822327, "learning_rate": 3.6591325619957098e-06, "loss": 0.4023, "step": 38877 }, { "epoch": 1.7841310632830067, "grad_norm": 0.46486377716064453, "learning_rate": 3.6588963567782138e-06, "loss": 0.3222, "step": 38878 }, { "epoch": 1.7841769537882612, "grad_norm": 0.47650933265686035, "learning_rate": 3.6586601547856484e-06, "loss": 0.4016, "step": 38879 }, { "epoch": 1.7842228442935157, "grad_norm": 0.45058494806289673, "learning_rate": 3.658423956018582e-06, "loss": 0.2922, "step": 38880 }, { "epoch": 1.7842687347987702, "grad_norm": 0.47090595960617065, "learning_rate": 3.6581877604775835e-06, "loss": 0.334, "step": 38881 }, { "epoch": 1.7843146253040247, "grad_norm": 0.4944796860218048, "learning_rate": 3.6579515681632174e-06, "loss": 0.423, "step": 38882 }, { "epoch": 1.784360515809279, "grad_norm": 0.44962990283966064, "learning_rate": 3.6577153790760565e-06, "loss": 0.3288, "step": 38883 }, { "epoch": 1.7844064063145335, "grad_norm": 0.5113868713378906, "learning_rate": 3.6574791932166676e-06, "loss": 0.3828, "step": 38884 }, { "epoch": 1.784452296819788, "grad_norm": 0.4317995607852936, "learning_rate": 3.657243010585614e-06, "loss": 0.3175, "step": 38885 }, { "epoch": 1.7844981873250423, "grad_norm": 0.4848055839538574, "learning_rate": 3.6570068311834692e-06, "loss": 0.369, "step": 38886 }, { "epoch": 1.7845440778302968, "grad_norm": 0.46412521600723267, "learning_rate": 3.6567706550108e-06, "loss": 0.3515, "step": 38887 }, { "epoch": 1.7845899683355513, "grad_norm": 0.4597375988960266, "learning_rate": 3.656534482068172e-06, "loss": 0.3519, "step": 38888 }, { "epoch": 1.7846358588408058, "grad_norm": 0.5018007755279541, "learning_rate": 3.6562983123561556e-06, "loss": 0.3984, "step": 38889 }, { "epoch": 1.7846817493460603, "grad_norm": 0.4584071934223175, "learning_rate": 3.6560621458753177e-06, "loss": 0.3693, "step": 38890 }, { "epoch": 1.7847276398513148, "grad_norm": 0.46777623891830444, "learning_rate": 3.655825982626223e-06, "loss": 0.3255, "step": 38891 }, { "epoch": 1.7847735303565693, "grad_norm": 0.4695948362350464, "learning_rate": 3.655589822609446e-06, "loss": 0.4168, "step": 38892 }, { "epoch": 1.7848194208618238, "grad_norm": 0.4416811466217041, "learning_rate": 3.6553536658255506e-06, "loss": 0.3242, "step": 38893 }, { "epoch": 1.7848653113670783, "grad_norm": 0.4720553755760193, "learning_rate": 3.655117512275105e-06, "loss": 0.3679, "step": 38894 }, { "epoch": 1.7849112018723328, "grad_norm": 0.4780040681362152, "learning_rate": 3.654881361958678e-06, "loss": 0.4092, "step": 38895 }, { "epoch": 1.784957092377587, "grad_norm": 0.4819720685482025, "learning_rate": 3.6546452148768363e-06, "loss": 0.3539, "step": 38896 }, { "epoch": 1.7850029828828415, "grad_norm": 0.4870342016220093, "learning_rate": 3.654409071030146e-06, "loss": 0.3948, "step": 38897 }, { "epoch": 1.785048873388096, "grad_norm": 0.49844029545783997, "learning_rate": 3.6541729304191796e-06, "loss": 0.4164, "step": 38898 }, { "epoch": 1.7850947638933503, "grad_norm": 0.44270724058151245, "learning_rate": 3.653936793044503e-06, "loss": 0.3331, "step": 38899 }, { "epoch": 1.7851406543986048, "grad_norm": 0.433416485786438, "learning_rate": 3.653700658906683e-06, "loss": 0.288, "step": 38900 }, { "epoch": 1.7851865449038593, "grad_norm": 0.5079478621482849, "learning_rate": 3.6534645280062874e-06, "loss": 0.3823, "step": 38901 }, { "epoch": 1.7852324354091138, "grad_norm": 0.50795978307724, "learning_rate": 3.6532284003438858e-06, "loss": 0.4769, "step": 38902 }, { "epoch": 1.7852783259143683, "grad_norm": 0.48330941796302795, "learning_rate": 3.6529922759200443e-06, "loss": 0.4197, "step": 38903 }, { "epoch": 1.7853242164196228, "grad_norm": 0.47169798612594604, "learning_rate": 3.652756154735329e-06, "loss": 0.3457, "step": 38904 }, { "epoch": 1.7853701069248773, "grad_norm": 0.4668859541416168, "learning_rate": 3.6525200367903123e-06, "loss": 0.3788, "step": 38905 }, { "epoch": 1.7854159974301318, "grad_norm": 0.5085064768791199, "learning_rate": 3.652283922085561e-06, "loss": 0.4408, "step": 38906 }, { "epoch": 1.7854618879353863, "grad_norm": 0.4649844467639923, "learning_rate": 3.6520478106216383e-06, "loss": 0.3241, "step": 38907 }, { "epoch": 1.7855077784406408, "grad_norm": 0.47638532519340515, "learning_rate": 3.6518117023991175e-06, "loss": 0.3679, "step": 38908 }, { "epoch": 1.785553668945895, "grad_norm": 0.453876256942749, "learning_rate": 3.6515755974185637e-06, "loss": 0.3212, "step": 38909 }, { "epoch": 1.7855995594511496, "grad_norm": 0.4389156699180603, "learning_rate": 3.651339495680544e-06, "loss": 0.3213, "step": 38910 }, { "epoch": 1.785645449956404, "grad_norm": 0.5007078051567078, "learning_rate": 3.651103397185628e-06, "loss": 0.3579, "step": 38911 }, { "epoch": 1.7856913404616583, "grad_norm": 0.42395341396331787, "learning_rate": 3.6508673019343827e-06, "loss": 0.2898, "step": 38912 }, { "epoch": 1.7857372309669128, "grad_norm": 0.47447890043258667, "learning_rate": 3.6506312099273735e-06, "loss": 0.3543, "step": 38913 }, { "epoch": 1.7857831214721673, "grad_norm": 0.4424329102039337, "learning_rate": 3.650395121165173e-06, "loss": 0.3258, "step": 38914 }, { "epoch": 1.7858290119774218, "grad_norm": 0.4706698954105377, "learning_rate": 3.6501590356483457e-06, "loss": 0.3294, "step": 38915 }, { "epoch": 1.7858749024826763, "grad_norm": 0.5204449892044067, "learning_rate": 3.649922953377459e-06, "loss": 0.4157, "step": 38916 }, { "epoch": 1.7859207929879308, "grad_norm": 0.4657539129257202, "learning_rate": 3.649686874353083e-06, "loss": 0.3868, "step": 38917 }, { "epoch": 1.7859666834931853, "grad_norm": 0.4737354815006256, "learning_rate": 3.6494507985757833e-06, "loss": 0.3558, "step": 38918 }, { "epoch": 1.7860125739984398, "grad_norm": 0.4947808086872101, "learning_rate": 3.6492147260461264e-06, "loss": 0.4197, "step": 38919 }, { "epoch": 1.7860584645036943, "grad_norm": 0.4777209758758545, "learning_rate": 3.648978656764684e-06, "loss": 0.3441, "step": 38920 }, { "epoch": 1.7861043550089486, "grad_norm": 0.4520593285560608, "learning_rate": 3.6487425907320216e-06, "loss": 0.3005, "step": 38921 }, { "epoch": 1.786150245514203, "grad_norm": 0.48971161246299744, "learning_rate": 3.6485065279487052e-06, "loss": 0.3684, "step": 38922 }, { "epoch": 1.7861961360194576, "grad_norm": 0.4493216574192047, "learning_rate": 3.6482704684153056e-06, "loss": 0.2938, "step": 38923 }, { "epoch": 1.786242026524712, "grad_norm": 0.5002160668373108, "learning_rate": 3.6480344121323887e-06, "loss": 0.4173, "step": 38924 }, { "epoch": 1.7862879170299664, "grad_norm": 0.47588375210762024, "learning_rate": 3.6477983591005205e-06, "loss": 0.3991, "step": 38925 }, { "epoch": 1.7863338075352209, "grad_norm": 0.46012020111083984, "learning_rate": 3.6475623093202716e-06, "loss": 0.3772, "step": 38926 }, { "epoch": 1.7863796980404754, "grad_norm": 0.5056272149085999, "learning_rate": 3.6473262627922096e-06, "loss": 0.3689, "step": 38927 }, { "epoch": 1.7864255885457299, "grad_norm": 0.4490322768688202, "learning_rate": 3.6470902195169e-06, "loss": 0.3371, "step": 38928 }, { "epoch": 1.7864714790509844, "grad_norm": 0.48112401366233826, "learning_rate": 3.646854179494912e-06, "loss": 0.3808, "step": 38929 }, { "epoch": 1.7865173695562389, "grad_norm": 0.5166167616844177, "learning_rate": 3.6466181427268126e-06, "loss": 0.3785, "step": 38930 }, { "epoch": 1.7865632600614934, "grad_norm": 0.48401951789855957, "learning_rate": 3.6463821092131687e-06, "loss": 0.4247, "step": 38931 }, { "epoch": 1.7866091505667479, "grad_norm": 0.4511071443557739, "learning_rate": 3.6461460789545496e-06, "loss": 0.3534, "step": 38932 }, { "epoch": 1.7866550410720023, "grad_norm": 0.45948219299316406, "learning_rate": 3.645910051951521e-06, "loss": 0.3295, "step": 38933 }, { "epoch": 1.7867009315772566, "grad_norm": 0.4965776205062866, "learning_rate": 3.645674028204653e-06, "loss": 0.3858, "step": 38934 }, { "epoch": 1.7867468220825111, "grad_norm": 0.4239000678062439, "learning_rate": 3.645438007714508e-06, "loss": 0.2756, "step": 38935 }, { "epoch": 1.7867927125877656, "grad_norm": 0.4805695414543152, "learning_rate": 3.64520199048166e-06, "loss": 0.3741, "step": 38936 }, { "epoch": 1.78683860309302, "grad_norm": 0.44096726179122925, "learning_rate": 3.6449659765066736e-06, "loss": 0.3547, "step": 38937 }, { "epoch": 1.7868844935982744, "grad_norm": 0.4842705726623535, "learning_rate": 3.6447299657901146e-06, "loss": 0.33, "step": 38938 }, { "epoch": 1.786930384103529, "grad_norm": 0.5217254161834717, "learning_rate": 3.644493958332554e-06, "loss": 0.3885, "step": 38939 }, { "epoch": 1.7869762746087834, "grad_norm": 0.42562544345855713, "learning_rate": 3.6442579541345573e-06, "loss": 0.2793, "step": 38940 }, { "epoch": 1.7870221651140379, "grad_norm": 0.4950166940689087, "learning_rate": 3.6440219531966905e-06, "loss": 0.3796, "step": 38941 }, { "epoch": 1.7870680556192924, "grad_norm": 0.479701966047287, "learning_rate": 3.643785955519525e-06, "loss": 0.4008, "step": 38942 }, { "epoch": 1.7871139461245469, "grad_norm": 0.5827389359474182, "learning_rate": 3.6435499611036262e-06, "loss": 0.344, "step": 38943 }, { "epoch": 1.7871598366298014, "grad_norm": 0.44716349244117737, "learning_rate": 3.64331396994956e-06, "loss": 0.3113, "step": 38944 }, { "epoch": 1.7872057271350559, "grad_norm": 0.48132508993148804, "learning_rate": 3.6430779820578975e-06, "loss": 0.3503, "step": 38945 }, { "epoch": 1.7872516176403104, "grad_norm": 0.44393736124038696, "learning_rate": 3.642841997429204e-06, "loss": 0.3471, "step": 38946 }, { "epoch": 1.7872975081455647, "grad_norm": 0.4960499107837677, "learning_rate": 3.642606016064045e-06, "loss": 0.4239, "step": 38947 }, { "epoch": 1.7873433986508191, "grad_norm": 0.5084317922592163, "learning_rate": 3.6423700379629923e-06, "loss": 0.3677, "step": 38948 }, { "epoch": 1.7873892891560736, "grad_norm": 0.46935632824897766, "learning_rate": 3.6421340631266105e-06, "loss": 0.3646, "step": 38949 }, { "epoch": 1.787435179661328, "grad_norm": 0.4705304801464081, "learning_rate": 3.641898091555467e-06, "loss": 0.33, "step": 38950 }, { "epoch": 1.7874810701665824, "grad_norm": 0.5175275802612305, "learning_rate": 3.6416621232501315e-06, "loss": 0.4203, "step": 38951 }, { "epoch": 1.787526960671837, "grad_norm": 0.47787007689476013, "learning_rate": 3.6414261582111698e-06, "loss": 0.3764, "step": 38952 }, { "epoch": 1.7875728511770914, "grad_norm": 0.5212996602058411, "learning_rate": 3.6411901964391476e-06, "loss": 0.3889, "step": 38953 }, { "epoch": 1.787618741682346, "grad_norm": 0.4915921092033386, "learning_rate": 3.6409542379346363e-06, "loss": 0.3825, "step": 38954 }, { "epoch": 1.7876646321876004, "grad_norm": 0.4641614258289337, "learning_rate": 3.6407182826982e-06, "loss": 0.3352, "step": 38955 }, { "epoch": 1.787710522692855, "grad_norm": 0.4913608431816101, "learning_rate": 3.640482330730406e-06, "loss": 0.4027, "step": 38956 }, { "epoch": 1.7877564131981094, "grad_norm": 0.4311918020248413, "learning_rate": 3.6402463820318245e-06, "loss": 0.2775, "step": 38957 }, { "epoch": 1.787802303703364, "grad_norm": 0.4445841908454895, "learning_rate": 3.6400104366030214e-06, "loss": 0.3316, "step": 38958 }, { "epoch": 1.7878481942086184, "grad_norm": 0.46535274386405945, "learning_rate": 3.6397744944445634e-06, "loss": 0.3247, "step": 38959 }, { "epoch": 1.7878940847138727, "grad_norm": 0.5080114603042603, "learning_rate": 3.6395385555570184e-06, "loss": 0.4207, "step": 38960 }, { "epoch": 1.7879399752191272, "grad_norm": 0.49811017513275146, "learning_rate": 3.639302619940955e-06, "loss": 0.3645, "step": 38961 }, { "epoch": 1.7879858657243817, "grad_norm": 0.4890167713165283, "learning_rate": 3.6390666875969367e-06, "loss": 0.3981, "step": 38962 }, { "epoch": 1.788031756229636, "grad_norm": 0.46625998616218567, "learning_rate": 3.6388307585255354e-06, "loss": 0.3186, "step": 38963 }, { "epoch": 1.7880776467348904, "grad_norm": 0.4739624559879303, "learning_rate": 3.6385948327273168e-06, "loss": 0.3471, "step": 38964 }, { "epoch": 1.788123537240145, "grad_norm": 0.4863038659095764, "learning_rate": 3.638358910202847e-06, "loss": 0.4081, "step": 38965 }, { "epoch": 1.7881694277453994, "grad_norm": 0.46976757049560547, "learning_rate": 3.638122990952695e-06, "loss": 0.3246, "step": 38966 }, { "epoch": 1.788215318250654, "grad_norm": 0.4728696346282959, "learning_rate": 3.6378870749774276e-06, "loss": 0.359, "step": 38967 }, { "epoch": 1.7882612087559084, "grad_norm": 0.4206938147544861, "learning_rate": 3.637651162277612e-06, "loss": 0.2771, "step": 38968 }, { "epoch": 1.788307099261163, "grad_norm": 0.4470050632953644, "learning_rate": 3.637415252853813e-06, "loss": 0.3245, "step": 38969 }, { "epoch": 1.7883529897664174, "grad_norm": 0.4776450991630554, "learning_rate": 3.637179346706603e-06, "loss": 0.3695, "step": 38970 }, { "epoch": 1.788398880271672, "grad_norm": 0.4869869351387024, "learning_rate": 3.636943443836546e-06, "loss": 0.4111, "step": 38971 }, { "epoch": 1.7884447707769262, "grad_norm": 0.7510921359062195, "learning_rate": 3.636707544244209e-06, "loss": 0.3653, "step": 38972 }, { "epoch": 1.7884906612821807, "grad_norm": 0.45729202032089233, "learning_rate": 3.6364716479301608e-06, "loss": 0.313, "step": 38973 }, { "epoch": 1.7885365517874352, "grad_norm": 0.4710065424442291, "learning_rate": 3.6362357548949677e-06, "loss": 0.3615, "step": 38974 }, { "epoch": 1.7885824422926895, "grad_norm": 0.46029266715049744, "learning_rate": 3.635999865139197e-06, "loss": 0.3311, "step": 38975 }, { "epoch": 1.788628332797944, "grad_norm": 0.4766286313533783, "learning_rate": 3.635763978663417e-06, "loss": 0.3486, "step": 38976 }, { "epoch": 1.7886742233031985, "grad_norm": 0.5313324928283691, "learning_rate": 3.6355280954681938e-06, "loss": 0.4835, "step": 38977 }, { "epoch": 1.788720113808453, "grad_norm": 0.45634564757347107, "learning_rate": 3.6352922155540925e-06, "loss": 0.3235, "step": 38978 }, { "epoch": 1.7887660043137075, "grad_norm": 0.5241215229034424, "learning_rate": 3.6350563389216853e-06, "loss": 0.4652, "step": 38979 }, { "epoch": 1.788811894818962, "grad_norm": 0.49338221549987793, "learning_rate": 3.634820465571537e-06, "loss": 0.3953, "step": 38980 }, { "epoch": 1.7888577853242165, "grad_norm": 0.4995090663433075, "learning_rate": 3.6345845955042125e-06, "loss": 0.3794, "step": 38981 }, { "epoch": 1.788903675829471, "grad_norm": 0.5188331007957458, "learning_rate": 3.6343487287202828e-06, "loss": 0.4912, "step": 38982 }, { "epoch": 1.7889495663347255, "grad_norm": 0.4594517648220062, "learning_rate": 3.6341128652203137e-06, "loss": 0.3146, "step": 38983 }, { "epoch": 1.78899545683998, "grad_norm": 0.45125338435173035, "learning_rate": 3.63387700500487e-06, "loss": 0.2914, "step": 38984 }, { "epoch": 1.7890413473452342, "grad_norm": 0.45458483695983887, "learning_rate": 3.633641148074523e-06, "loss": 0.3191, "step": 38985 }, { "epoch": 1.7890872378504887, "grad_norm": 0.4788898527622223, "learning_rate": 3.633405294429837e-06, "loss": 0.3793, "step": 38986 }, { "epoch": 1.7891331283557432, "grad_norm": 0.4719712734222412, "learning_rate": 3.6331694440713794e-06, "loss": 0.3484, "step": 38987 }, { "epoch": 1.7891790188609975, "grad_norm": 0.4692332446575165, "learning_rate": 3.6329335969997194e-06, "loss": 0.345, "step": 38988 }, { "epoch": 1.789224909366252, "grad_norm": 0.4610622227191925, "learning_rate": 3.6326977532154223e-06, "loss": 0.3435, "step": 38989 }, { "epoch": 1.7892707998715065, "grad_norm": 0.5087058544158936, "learning_rate": 3.632461912719053e-06, "loss": 0.4207, "step": 38990 }, { "epoch": 1.789316690376761, "grad_norm": 0.46746012568473816, "learning_rate": 3.632226075511184e-06, "loss": 0.3435, "step": 38991 }, { "epoch": 1.7893625808820155, "grad_norm": 0.44698840379714966, "learning_rate": 3.63199024159238e-06, "loss": 0.2914, "step": 38992 }, { "epoch": 1.78940847138727, "grad_norm": 0.4543249011039734, "learning_rate": 3.6317544109632053e-06, "loss": 0.3245, "step": 38993 }, { "epoch": 1.7894543618925245, "grad_norm": 0.4537357985973358, "learning_rate": 3.6315185836242316e-06, "loss": 0.3166, "step": 38994 }, { "epoch": 1.789500252397779, "grad_norm": 0.5167016983032227, "learning_rate": 3.6312827595760236e-06, "loss": 0.3891, "step": 38995 }, { "epoch": 1.7895461429030335, "grad_norm": 0.46405622363090515, "learning_rate": 3.631046938819146e-06, "loss": 0.3656, "step": 38996 }, { "epoch": 1.789592033408288, "grad_norm": 0.4733262062072754, "learning_rate": 3.630811121354172e-06, "loss": 0.3798, "step": 38997 }, { "epoch": 1.7896379239135423, "grad_norm": 0.47339415550231934, "learning_rate": 3.630575307181664e-06, "loss": 0.3283, "step": 38998 }, { "epoch": 1.7896838144187968, "grad_norm": 0.47460290789604187, "learning_rate": 3.63033949630219e-06, "loss": 0.3494, "step": 38999 }, { "epoch": 1.7897297049240513, "grad_norm": 0.4535413086414337, "learning_rate": 3.6301036887163155e-06, "loss": 0.3439, "step": 39000 }, { "epoch": 1.7897755954293055, "grad_norm": 0.49388542771339417, "learning_rate": 3.6298678844246106e-06, "loss": 0.3786, "step": 39001 }, { "epoch": 1.78982148593456, "grad_norm": 0.5035730600357056, "learning_rate": 3.6296320834276415e-06, "loss": 0.2787, "step": 39002 }, { "epoch": 1.7898673764398145, "grad_norm": 0.4609580934047699, "learning_rate": 3.629396285725974e-06, "loss": 0.3514, "step": 39003 }, { "epoch": 1.789913266945069, "grad_norm": 0.48449817299842834, "learning_rate": 3.6291604913201766e-06, "loss": 0.4319, "step": 39004 }, { "epoch": 1.7899591574503235, "grad_norm": 0.48680898547172546, "learning_rate": 3.628924700210816e-06, "loss": 0.3513, "step": 39005 }, { "epoch": 1.790005047955578, "grad_norm": 0.4859076738357544, "learning_rate": 3.628688912398456e-06, "loss": 0.3882, "step": 39006 }, { "epoch": 1.7900509384608325, "grad_norm": 0.5115410685539246, "learning_rate": 3.628453127883669e-06, "loss": 0.3573, "step": 39007 }, { "epoch": 1.790096828966087, "grad_norm": 0.4633951485157013, "learning_rate": 3.628217346667019e-06, "loss": 0.3648, "step": 39008 }, { "epoch": 1.7901427194713415, "grad_norm": 0.4623551368713379, "learning_rate": 3.627981568749072e-06, "loss": 0.3704, "step": 39009 }, { "epoch": 1.7901886099765958, "grad_norm": 0.43444451689720154, "learning_rate": 3.6277457941303985e-06, "loss": 0.2962, "step": 39010 }, { "epoch": 1.7902345004818503, "grad_norm": 0.4922162592411041, "learning_rate": 3.6275100228115623e-06, "loss": 0.4003, "step": 39011 }, { "epoch": 1.7902803909871048, "grad_norm": 0.4627329707145691, "learning_rate": 3.6272742547931294e-06, "loss": 0.3272, "step": 39012 }, { "epoch": 1.7903262814923593, "grad_norm": 0.4639261066913605, "learning_rate": 3.6270384900756704e-06, "loss": 0.3468, "step": 39013 }, { "epoch": 1.7903721719976136, "grad_norm": 0.45098528265953064, "learning_rate": 3.6268027286597513e-06, "loss": 0.2997, "step": 39014 }, { "epoch": 1.790418062502868, "grad_norm": 0.5043196678161621, "learning_rate": 3.626566970545937e-06, "loss": 0.4683, "step": 39015 }, { "epoch": 1.7904639530081226, "grad_norm": 0.4593127369880676, "learning_rate": 3.626331215734796e-06, "loss": 0.3349, "step": 39016 }, { "epoch": 1.790509843513377, "grad_norm": 0.4710067808628082, "learning_rate": 3.626095464226895e-06, "loss": 0.3252, "step": 39017 }, { "epoch": 1.7905557340186316, "grad_norm": 0.48087239265441895, "learning_rate": 3.6258597160228e-06, "loss": 0.4204, "step": 39018 }, { "epoch": 1.790601624523886, "grad_norm": 0.4525650143623352, "learning_rate": 3.6256239711230805e-06, "loss": 0.3231, "step": 39019 }, { "epoch": 1.7906475150291405, "grad_norm": 0.4923372268676758, "learning_rate": 3.6253882295283017e-06, "loss": 0.3331, "step": 39020 }, { "epoch": 1.790693405534395, "grad_norm": 0.444284588098526, "learning_rate": 3.625152491239027e-06, "loss": 0.264, "step": 39021 }, { "epoch": 1.7907392960396495, "grad_norm": 0.4350729286670685, "learning_rate": 3.62491675625583e-06, "loss": 0.3228, "step": 39022 }, { "epoch": 1.7907851865449038, "grad_norm": 0.48174792528152466, "learning_rate": 3.624681024579273e-06, "loss": 0.373, "step": 39023 }, { "epoch": 1.7908310770501583, "grad_norm": 0.4338656961917877, "learning_rate": 3.6244452962099235e-06, "loss": 0.331, "step": 39024 }, { "epoch": 1.7908769675554128, "grad_norm": 0.4945756196975708, "learning_rate": 3.62420957114835e-06, "loss": 0.4106, "step": 39025 }, { "epoch": 1.790922858060667, "grad_norm": 0.5030802488327026, "learning_rate": 3.6239738493951183e-06, "loss": 0.3657, "step": 39026 }, { "epoch": 1.7909687485659216, "grad_norm": 0.4823955297470093, "learning_rate": 3.623738130950793e-06, "loss": 0.4072, "step": 39027 }, { "epoch": 1.791014639071176, "grad_norm": 0.47356152534484863, "learning_rate": 3.6235024158159452e-06, "loss": 0.2981, "step": 39028 }, { "epoch": 1.7910605295764306, "grad_norm": 0.49216094613075256, "learning_rate": 3.62326670399114e-06, "loss": 0.34, "step": 39029 }, { "epoch": 1.791106420081685, "grad_norm": 0.46532973647117615, "learning_rate": 3.623030995476943e-06, "loss": 0.317, "step": 39030 }, { "epoch": 1.7911523105869396, "grad_norm": 0.44334882497787476, "learning_rate": 3.6227952902739223e-06, "loss": 0.3013, "step": 39031 }, { "epoch": 1.791198201092194, "grad_norm": 0.4597153961658478, "learning_rate": 3.622559588382645e-06, "loss": 0.3174, "step": 39032 }, { "epoch": 1.7912440915974486, "grad_norm": 0.4653659462928772, "learning_rate": 3.622323889803676e-06, "loss": 0.3389, "step": 39033 }, { "epoch": 1.791289982102703, "grad_norm": 0.43161216378211975, "learning_rate": 3.622088194537582e-06, "loss": 0.3173, "step": 39034 }, { "epoch": 1.7913358726079576, "grad_norm": 0.45239779353141785, "learning_rate": 3.621852502584933e-06, "loss": 0.3332, "step": 39035 }, { "epoch": 1.7913817631132118, "grad_norm": 0.464103102684021, "learning_rate": 3.621616813946294e-06, "loss": 0.3302, "step": 39036 }, { "epoch": 1.7914276536184663, "grad_norm": 0.4936441481113434, "learning_rate": 3.6213811286222295e-06, "loss": 0.3725, "step": 39037 }, { "epoch": 1.7914735441237208, "grad_norm": 0.4331585764884949, "learning_rate": 3.62114544661331e-06, "loss": 0.313, "step": 39038 }, { "epoch": 1.7915194346289751, "grad_norm": 0.4732377827167511, "learning_rate": 3.6209097679201004e-06, "loss": 0.3492, "step": 39039 }, { "epoch": 1.7915653251342296, "grad_norm": 0.4589652419090271, "learning_rate": 3.620674092543166e-06, "loss": 0.328, "step": 39040 }, { "epoch": 1.7916112156394841, "grad_norm": 0.49584299325942993, "learning_rate": 3.620438420483078e-06, "loss": 0.3991, "step": 39041 }, { "epoch": 1.7916571061447386, "grad_norm": 0.4257907569408417, "learning_rate": 3.6202027517403983e-06, "loss": 0.2823, "step": 39042 }, { "epoch": 1.791702996649993, "grad_norm": 0.5027692317962646, "learning_rate": 3.6199670863156946e-06, "loss": 0.3663, "step": 39043 }, { "epoch": 1.7917488871552476, "grad_norm": 0.4778556525707245, "learning_rate": 3.6197314242095355e-06, "loss": 0.3867, "step": 39044 }, { "epoch": 1.791794777660502, "grad_norm": 0.46024858951568604, "learning_rate": 3.6194957654224874e-06, "loss": 0.3309, "step": 39045 }, { "epoch": 1.7918406681657566, "grad_norm": 0.44204387068748474, "learning_rate": 3.619260109955115e-06, "loss": 0.3388, "step": 39046 }, { "epoch": 1.791886558671011, "grad_norm": 0.4797074794769287, "learning_rate": 3.6190244578079874e-06, "loss": 0.3699, "step": 39047 }, { "epoch": 1.7919324491762656, "grad_norm": 0.5228578448295593, "learning_rate": 3.61878880898167e-06, "loss": 0.3833, "step": 39048 }, { "epoch": 1.7919783396815199, "grad_norm": 0.44349056482315063, "learning_rate": 3.618553163476728e-06, "loss": 0.354, "step": 39049 }, { "epoch": 1.7920242301867744, "grad_norm": 0.4855322539806366, "learning_rate": 3.6183175212937316e-06, "loss": 0.3612, "step": 39050 }, { "epoch": 1.7920701206920289, "grad_norm": 0.4381566047668457, "learning_rate": 3.618081882433245e-06, "loss": 0.3054, "step": 39051 }, { "epoch": 1.7921160111972831, "grad_norm": 0.4582786560058594, "learning_rate": 3.617846246895834e-06, "loss": 0.3125, "step": 39052 }, { "epoch": 1.7921619017025376, "grad_norm": 0.4975954294204712, "learning_rate": 3.6176106146820687e-06, "loss": 0.3563, "step": 39053 }, { "epoch": 1.7922077922077921, "grad_norm": 0.4677339494228363, "learning_rate": 3.617374985792513e-06, "loss": 0.3566, "step": 39054 }, { "epoch": 1.7922536827130466, "grad_norm": 0.45274174213409424, "learning_rate": 3.6171393602277317e-06, "loss": 0.3276, "step": 39055 }, { "epoch": 1.7922995732183011, "grad_norm": 0.4715751111507416, "learning_rate": 3.6169037379882964e-06, "loss": 0.3506, "step": 39056 }, { "epoch": 1.7923454637235556, "grad_norm": 0.4670037627220154, "learning_rate": 3.616668119074771e-06, "loss": 0.3582, "step": 39057 }, { "epoch": 1.7923913542288101, "grad_norm": 0.48028427362442017, "learning_rate": 3.6164325034877212e-06, "loss": 0.3835, "step": 39058 }, { "epoch": 1.7924372447340646, "grad_norm": 0.4367833137512207, "learning_rate": 3.616196891227715e-06, "loss": 0.2766, "step": 39059 }, { "epoch": 1.7924831352393191, "grad_norm": 0.45414289832115173, "learning_rate": 3.6159612822953194e-06, "loss": 0.3225, "step": 39060 }, { "epoch": 1.7925290257445734, "grad_norm": 0.4873010516166687, "learning_rate": 3.6157256766910975e-06, "loss": 0.3948, "step": 39061 }, { "epoch": 1.792574916249828, "grad_norm": 0.5049685835838318, "learning_rate": 3.6154900744156206e-06, "loss": 0.4023, "step": 39062 }, { "epoch": 1.7926208067550824, "grad_norm": 0.48493272066116333, "learning_rate": 3.615254475469454e-06, "loss": 0.3748, "step": 39063 }, { "epoch": 1.7926666972603367, "grad_norm": 0.4843459129333496, "learning_rate": 3.6150188798531626e-06, "loss": 0.3768, "step": 39064 }, { "epoch": 1.7927125877655912, "grad_norm": 0.41103413701057434, "learning_rate": 3.6147832875673115e-06, "loss": 0.2656, "step": 39065 }, { "epoch": 1.7927584782708457, "grad_norm": 0.4740593731403351, "learning_rate": 3.6145476986124716e-06, "loss": 0.3596, "step": 39066 }, { "epoch": 1.7928043687761002, "grad_norm": 0.43567797541618347, "learning_rate": 3.6143121129892067e-06, "loss": 0.3372, "step": 39067 }, { "epoch": 1.7928502592813547, "grad_norm": 0.6817604303359985, "learning_rate": 3.614076530698083e-06, "loss": 0.4235, "step": 39068 }, { "epoch": 1.7928961497866092, "grad_norm": 0.45818445086479187, "learning_rate": 3.6138409517396694e-06, "loss": 0.3514, "step": 39069 }, { "epoch": 1.7929420402918637, "grad_norm": 0.511385440826416, "learning_rate": 3.6136053761145307e-06, "loss": 0.4487, "step": 39070 }, { "epoch": 1.7929879307971182, "grad_norm": 0.4798769950866699, "learning_rate": 3.613369803823231e-06, "loss": 0.3972, "step": 39071 }, { "epoch": 1.7930338213023727, "grad_norm": 0.44799596071243286, "learning_rate": 3.613134234866341e-06, "loss": 0.3007, "step": 39072 }, { "epoch": 1.7930797118076272, "grad_norm": 0.49659913778305054, "learning_rate": 3.612898669244427e-06, "loss": 0.3843, "step": 39073 }, { "epoch": 1.7931256023128814, "grad_norm": 0.4618549048900604, "learning_rate": 3.6126631069580514e-06, "loss": 0.3815, "step": 39074 }, { "epoch": 1.793171492818136, "grad_norm": 0.4708144962787628, "learning_rate": 3.6124275480077846e-06, "loss": 0.3607, "step": 39075 }, { "epoch": 1.7932173833233904, "grad_norm": 0.46132534742355347, "learning_rate": 3.612191992394192e-06, "loss": 0.3375, "step": 39076 }, { "epoch": 1.7932632738286447, "grad_norm": 0.4657265543937683, "learning_rate": 3.6119564401178374e-06, "loss": 0.3272, "step": 39077 }, { "epoch": 1.7933091643338992, "grad_norm": 0.5100289583206177, "learning_rate": 3.611720891179291e-06, "loss": 0.3804, "step": 39078 }, { "epoch": 1.7933550548391537, "grad_norm": 0.4835840165615082, "learning_rate": 3.6114853455791187e-06, "loss": 0.3905, "step": 39079 }, { "epoch": 1.7934009453444082, "grad_norm": 0.47456595301628113, "learning_rate": 3.6112498033178844e-06, "loss": 0.3666, "step": 39080 }, { "epoch": 1.7934468358496627, "grad_norm": 0.47419407963752747, "learning_rate": 3.6110142643961564e-06, "loss": 0.3849, "step": 39081 }, { "epoch": 1.7934927263549172, "grad_norm": 0.4323647916316986, "learning_rate": 3.610778728814502e-06, "loss": 0.2864, "step": 39082 }, { "epoch": 1.7935386168601717, "grad_norm": 0.49279850721359253, "learning_rate": 3.6105431965734832e-06, "loss": 0.4066, "step": 39083 }, { "epoch": 1.7935845073654262, "grad_norm": 0.4407173693180084, "learning_rate": 3.610307667673672e-06, "loss": 0.3092, "step": 39084 }, { "epoch": 1.7936303978706807, "grad_norm": 0.4560415744781494, "learning_rate": 3.610072142115634e-06, "loss": 0.3203, "step": 39085 }, { "epoch": 1.7936762883759352, "grad_norm": 0.5087391138076782, "learning_rate": 3.609836619899929e-06, "loss": 0.4515, "step": 39086 }, { "epoch": 1.7937221788811895, "grad_norm": 0.49655935168266296, "learning_rate": 3.6096011010271315e-06, "loss": 0.416, "step": 39087 }, { "epoch": 1.793768069386444, "grad_norm": 0.4519485533237457, "learning_rate": 3.6093655854978043e-06, "loss": 0.358, "step": 39088 }, { "epoch": 1.7938139598916985, "grad_norm": 0.4522759020328522, "learning_rate": 3.609130073312513e-06, "loss": 0.3599, "step": 39089 }, { "epoch": 1.7938598503969527, "grad_norm": 0.4722626507282257, "learning_rate": 3.6088945644718254e-06, "loss": 0.3417, "step": 39090 }, { "epoch": 1.7939057409022072, "grad_norm": 0.4826270639896393, "learning_rate": 3.608659058976308e-06, "loss": 0.3995, "step": 39091 }, { "epoch": 1.7939516314074617, "grad_norm": 0.43914300203323364, "learning_rate": 3.608423556826524e-06, "loss": 0.3073, "step": 39092 }, { "epoch": 1.7939975219127162, "grad_norm": 0.5353196263313293, "learning_rate": 3.6081880580230445e-06, "loss": 0.4576, "step": 39093 }, { "epoch": 1.7940434124179707, "grad_norm": 0.4994310736656189, "learning_rate": 3.6079525625664335e-06, "loss": 0.4249, "step": 39094 }, { "epoch": 1.7940893029232252, "grad_norm": 0.4786619544029236, "learning_rate": 3.6077170704572556e-06, "loss": 0.3972, "step": 39095 }, { "epoch": 1.7941351934284797, "grad_norm": 0.42954838275909424, "learning_rate": 3.60748158169608e-06, "loss": 0.2709, "step": 39096 }, { "epoch": 1.7941810839337342, "grad_norm": 0.47246503829956055, "learning_rate": 3.607246096283472e-06, "loss": 0.3517, "step": 39097 }, { "epoch": 1.7942269744389887, "grad_norm": 0.4846789836883545, "learning_rate": 3.607010614219998e-06, "loss": 0.3869, "step": 39098 }, { "epoch": 1.794272864944243, "grad_norm": 0.4607219696044922, "learning_rate": 3.606775135506221e-06, "loss": 0.3122, "step": 39099 }, { "epoch": 1.7943187554494975, "grad_norm": 0.4825912117958069, "learning_rate": 3.6065396601427115e-06, "loss": 0.3736, "step": 39100 }, { "epoch": 1.794364645954752, "grad_norm": 0.4601112902164459, "learning_rate": 3.6063041881300353e-06, "loss": 0.2945, "step": 39101 }, { "epoch": 1.7944105364600065, "grad_norm": 0.4749048948287964, "learning_rate": 3.606068719468757e-06, "loss": 0.3611, "step": 39102 }, { "epoch": 1.7944564269652608, "grad_norm": 0.4865274727344513, "learning_rate": 3.605833254159444e-06, "loss": 0.4343, "step": 39103 }, { "epoch": 1.7945023174705153, "grad_norm": 0.4507814049720764, "learning_rate": 3.605597792202662e-06, "loss": 0.3232, "step": 39104 }, { "epoch": 1.7945482079757697, "grad_norm": 0.4889148771762848, "learning_rate": 3.605362333598975e-06, "loss": 0.3817, "step": 39105 }, { "epoch": 1.7945940984810242, "grad_norm": 0.5521024465560913, "learning_rate": 3.6051268783489533e-06, "loss": 0.421, "step": 39106 }, { "epoch": 1.7946399889862787, "grad_norm": 0.45276516675949097, "learning_rate": 3.604891426453163e-06, "loss": 0.3036, "step": 39107 }, { "epoch": 1.7946858794915332, "grad_norm": 0.5247492790222168, "learning_rate": 3.6046559779121654e-06, "loss": 0.3812, "step": 39108 }, { "epoch": 1.7947317699967877, "grad_norm": 0.5347162485122681, "learning_rate": 3.604420532726531e-06, "loss": 0.4396, "step": 39109 }, { "epoch": 1.7947776605020422, "grad_norm": 0.44859734177589417, "learning_rate": 3.604185090896826e-06, "loss": 0.2907, "step": 39110 }, { "epoch": 1.7948235510072967, "grad_norm": 0.4471120238304138, "learning_rate": 3.6039496524236126e-06, "loss": 0.2985, "step": 39111 }, { "epoch": 1.794869441512551, "grad_norm": 0.47846418619155884, "learning_rate": 3.6037142173074625e-06, "loss": 0.3386, "step": 39112 }, { "epoch": 1.7949153320178055, "grad_norm": 0.5252884030342102, "learning_rate": 3.6034787855489382e-06, "loss": 0.3861, "step": 39113 }, { "epoch": 1.79496122252306, "grad_norm": 0.4815131425857544, "learning_rate": 3.603243357148605e-06, "loss": 0.344, "step": 39114 }, { "epoch": 1.7950071130283143, "grad_norm": 0.4899124801158905, "learning_rate": 3.603007932107032e-06, "loss": 0.3709, "step": 39115 }, { "epoch": 1.7950530035335688, "grad_norm": 0.4672159254550934, "learning_rate": 3.6027725104247845e-06, "loss": 0.3206, "step": 39116 }, { "epoch": 1.7950988940388233, "grad_norm": 0.4296744167804718, "learning_rate": 3.6025370921024267e-06, "loss": 0.2646, "step": 39117 }, { "epoch": 1.7951447845440778, "grad_norm": 0.4530632495880127, "learning_rate": 3.6023016771405273e-06, "loss": 0.3545, "step": 39118 }, { "epoch": 1.7951906750493323, "grad_norm": 0.45011207461357117, "learning_rate": 3.6020662655396516e-06, "loss": 0.3062, "step": 39119 }, { "epoch": 1.7952365655545868, "grad_norm": 0.4747127592563629, "learning_rate": 3.601830857300363e-06, "loss": 0.3605, "step": 39120 }, { "epoch": 1.7952824560598413, "grad_norm": 0.45756855607032776, "learning_rate": 3.6015954524232323e-06, "loss": 0.3371, "step": 39121 }, { "epoch": 1.7953283465650958, "grad_norm": 0.49475839734077454, "learning_rate": 3.6013600509088232e-06, "loss": 0.4214, "step": 39122 }, { "epoch": 1.7953742370703503, "grad_norm": 0.5522712469100952, "learning_rate": 3.6011246527577e-06, "loss": 0.5556, "step": 39123 }, { "epoch": 1.7954201275756048, "grad_norm": 0.4747956693172455, "learning_rate": 3.6008892579704317e-06, "loss": 0.3296, "step": 39124 }, { "epoch": 1.795466018080859, "grad_norm": 0.49176618456840515, "learning_rate": 3.6006538665475838e-06, "loss": 0.3699, "step": 39125 }, { "epoch": 1.7955119085861135, "grad_norm": 0.4626230299472809, "learning_rate": 3.600418478489719e-06, "loss": 0.3345, "step": 39126 }, { "epoch": 1.795557799091368, "grad_norm": 0.48788586258888245, "learning_rate": 3.6001830937974085e-06, "loss": 0.4083, "step": 39127 }, { "epoch": 1.7956036895966223, "grad_norm": 0.464763879776001, "learning_rate": 3.5999477124712158e-06, "loss": 0.3071, "step": 39128 }, { "epoch": 1.7956495801018768, "grad_norm": 0.47066062688827515, "learning_rate": 3.599712334511708e-06, "loss": 0.3639, "step": 39129 }, { "epoch": 1.7956954706071313, "grad_norm": 0.5380046963691711, "learning_rate": 3.5994769599194466e-06, "loss": 0.4034, "step": 39130 }, { "epoch": 1.7957413611123858, "grad_norm": 0.4737645983695984, "learning_rate": 3.599241588695004e-06, "loss": 0.3861, "step": 39131 }, { "epoch": 1.7957872516176403, "grad_norm": 0.4689282178878784, "learning_rate": 3.5990062208389432e-06, "loss": 0.3842, "step": 39132 }, { "epoch": 1.7958331421228948, "grad_norm": 0.4675632417201996, "learning_rate": 3.5987708563518286e-06, "loss": 0.3605, "step": 39133 }, { "epoch": 1.7958790326281493, "grad_norm": 0.4577837288379669, "learning_rate": 3.598535495234229e-06, "loss": 0.3186, "step": 39134 }, { "epoch": 1.7959249231334038, "grad_norm": 0.47404882311820984, "learning_rate": 3.59830013748671e-06, "loss": 0.4139, "step": 39135 }, { "epoch": 1.7959708136386583, "grad_norm": 0.4490429162979126, "learning_rate": 3.5980647831098337e-06, "loss": 0.3329, "step": 39136 }, { "epoch": 1.7960167041439128, "grad_norm": 0.4627262055873871, "learning_rate": 3.5978294321041716e-06, "loss": 0.3382, "step": 39137 }, { "epoch": 1.796062594649167, "grad_norm": 0.48307734727859497, "learning_rate": 3.5975940844702873e-06, "loss": 0.3752, "step": 39138 }, { "epoch": 1.7961084851544216, "grad_norm": 0.4380726218223572, "learning_rate": 3.5973587402087452e-06, "loss": 0.3177, "step": 39139 }, { "epoch": 1.796154375659676, "grad_norm": 0.4519878327846527, "learning_rate": 3.5971233993201138e-06, "loss": 0.3018, "step": 39140 }, { "epoch": 1.7962002661649303, "grad_norm": 0.4569428861141205, "learning_rate": 3.5968880618049583e-06, "loss": 0.361, "step": 39141 }, { "epoch": 1.7962461566701848, "grad_norm": 0.48241445422172546, "learning_rate": 3.5966527276638417e-06, "loss": 0.3313, "step": 39142 }, { "epoch": 1.7962920471754393, "grad_norm": 0.45483192801475525, "learning_rate": 3.596417396897335e-06, "loss": 0.3306, "step": 39143 }, { "epoch": 1.7963379376806938, "grad_norm": 0.4606817066669464, "learning_rate": 3.596182069506e-06, "loss": 0.3619, "step": 39144 }, { "epoch": 1.7963838281859483, "grad_norm": 0.4854259490966797, "learning_rate": 3.5959467454904048e-06, "loss": 0.3707, "step": 39145 }, { "epoch": 1.7964297186912028, "grad_norm": 0.5069246888160706, "learning_rate": 3.595711424851115e-06, "loss": 0.4497, "step": 39146 }, { "epoch": 1.7964756091964573, "grad_norm": 0.5490174889564514, "learning_rate": 3.5954761075886955e-06, "loss": 0.3823, "step": 39147 }, { "epoch": 1.7965214997017118, "grad_norm": 0.5013524889945984, "learning_rate": 3.595240793703711e-06, "loss": 0.3303, "step": 39148 }, { "epoch": 1.7965673902069663, "grad_norm": 0.49147921800613403, "learning_rate": 3.5950054831967306e-06, "loss": 0.3872, "step": 39149 }, { "epoch": 1.7966132807122206, "grad_norm": 0.43856003880500793, "learning_rate": 3.5947701760683197e-06, "loss": 0.3059, "step": 39150 }, { "epoch": 1.796659171217475, "grad_norm": 0.4646542966365814, "learning_rate": 3.594534872319041e-06, "loss": 0.2933, "step": 39151 }, { "epoch": 1.7967050617227296, "grad_norm": 0.4598725140094757, "learning_rate": 3.5942995719494632e-06, "loss": 0.3446, "step": 39152 }, { "epoch": 1.7967509522279839, "grad_norm": 0.4479806423187256, "learning_rate": 3.5940642749601523e-06, "loss": 0.3003, "step": 39153 }, { "epoch": 1.7967968427332384, "grad_norm": 0.446286141872406, "learning_rate": 3.5938289813516713e-06, "loss": 0.3056, "step": 39154 }, { "epoch": 1.7968427332384929, "grad_norm": 0.4434756636619568, "learning_rate": 3.5935936911245895e-06, "loss": 0.3224, "step": 39155 }, { "epoch": 1.7968886237437474, "grad_norm": 0.4779556095600128, "learning_rate": 3.5933584042794705e-06, "loss": 0.3583, "step": 39156 }, { "epoch": 1.7969345142490019, "grad_norm": 0.4515618681907654, "learning_rate": 3.593123120816878e-06, "loss": 0.3115, "step": 39157 }, { "epoch": 1.7969804047542564, "grad_norm": 0.502406656742096, "learning_rate": 3.5928878407373836e-06, "loss": 0.4788, "step": 39158 }, { "epoch": 1.7970262952595109, "grad_norm": 0.46805617213249207, "learning_rate": 3.5926525640415493e-06, "loss": 0.3382, "step": 39159 }, { "epoch": 1.7970721857647654, "grad_norm": 0.4763306677341461, "learning_rate": 3.5924172907299403e-06, "loss": 0.426, "step": 39160 }, { "epoch": 1.7971180762700198, "grad_norm": 0.6413353681564331, "learning_rate": 3.5921820208031246e-06, "loss": 0.3616, "step": 39161 }, { "epoch": 1.7971639667752743, "grad_norm": 0.47044244408607483, "learning_rate": 3.591946754261667e-06, "loss": 0.3395, "step": 39162 }, { "epoch": 1.7972098572805286, "grad_norm": 0.48470616340637207, "learning_rate": 3.591711491106133e-06, "loss": 0.3707, "step": 39163 }, { "epoch": 1.7972557477857831, "grad_norm": 0.5120109915733337, "learning_rate": 3.591476231337086e-06, "loss": 0.3909, "step": 39164 }, { "epoch": 1.7973016382910376, "grad_norm": 0.4529309570789337, "learning_rate": 3.5912409749550964e-06, "loss": 0.3021, "step": 39165 }, { "epoch": 1.797347528796292, "grad_norm": 0.45821020007133484, "learning_rate": 3.5910057219607275e-06, "loss": 0.3116, "step": 39166 }, { "epoch": 1.7973934193015464, "grad_norm": 0.6176456809043884, "learning_rate": 3.5907704723545443e-06, "loss": 0.3359, "step": 39167 }, { "epoch": 1.7974393098068009, "grad_norm": 0.5167381167411804, "learning_rate": 3.590535226137114e-06, "loss": 0.4386, "step": 39168 }, { "epoch": 1.7974852003120554, "grad_norm": 0.4389282166957855, "learning_rate": 3.5902999833090025e-06, "loss": 0.314, "step": 39169 }, { "epoch": 1.7975310908173099, "grad_norm": 0.4669302701950073, "learning_rate": 3.5900647438707713e-06, "loss": 0.2942, "step": 39170 }, { "epoch": 1.7975769813225644, "grad_norm": 0.49780312180519104, "learning_rate": 3.589829507822993e-06, "loss": 0.4061, "step": 39171 }, { "epoch": 1.7976228718278189, "grad_norm": 0.5416058897972107, "learning_rate": 3.589594275166228e-06, "loss": 0.4354, "step": 39172 }, { "epoch": 1.7976687623330734, "grad_norm": 0.5023727416992188, "learning_rate": 3.5893590459010443e-06, "loss": 0.4201, "step": 39173 }, { "epoch": 1.7977146528383279, "grad_norm": 0.4762422442436218, "learning_rate": 3.5891238200280064e-06, "loss": 0.3606, "step": 39174 }, { "epoch": 1.7977605433435824, "grad_norm": 0.4768833816051483, "learning_rate": 3.588888597547682e-06, "loss": 0.363, "step": 39175 }, { "epoch": 1.7978064338488366, "grad_norm": 0.43534204363822937, "learning_rate": 3.5886533784606327e-06, "loss": 0.2905, "step": 39176 }, { "epoch": 1.7978523243540911, "grad_norm": 0.48054373264312744, "learning_rate": 3.588418162767428e-06, "loss": 0.3591, "step": 39177 }, { "epoch": 1.7978982148593456, "grad_norm": 0.47345200181007385, "learning_rate": 3.588182950468633e-06, "loss": 0.3539, "step": 39178 }, { "epoch": 1.7979441053646, "grad_norm": 0.47555145621299744, "learning_rate": 3.587947741564809e-06, "loss": 0.3893, "step": 39179 }, { "epoch": 1.7979899958698544, "grad_norm": 0.4615512490272522, "learning_rate": 3.5877125360565277e-06, "loss": 0.3358, "step": 39180 }, { "epoch": 1.798035886375109, "grad_norm": 0.4767516553401947, "learning_rate": 3.5874773339443517e-06, "loss": 0.3412, "step": 39181 }, { "epoch": 1.7980817768803634, "grad_norm": 0.4808574914932251, "learning_rate": 3.5872421352288465e-06, "loss": 0.3538, "step": 39182 }, { "epoch": 1.798127667385618, "grad_norm": 0.4516178071498871, "learning_rate": 3.587006939910579e-06, "loss": 0.3174, "step": 39183 }, { "epoch": 1.7981735578908724, "grad_norm": 0.5146178603172302, "learning_rate": 3.586771747990113e-06, "loss": 0.4089, "step": 39184 }, { "epoch": 1.798219448396127, "grad_norm": 0.48138606548309326, "learning_rate": 3.586536559468014e-06, "loss": 0.3112, "step": 39185 }, { "epoch": 1.7982653389013814, "grad_norm": 0.47517290711402893, "learning_rate": 3.5863013743448504e-06, "loss": 0.3524, "step": 39186 }, { "epoch": 1.798311229406636, "grad_norm": 0.4804094731807709, "learning_rate": 3.5860661926211848e-06, "loss": 0.3652, "step": 39187 }, { "epoch": 1.7983571199118902, "grad_norm": 0.42782214283943176, "learning_rate": 3.585831014297584e-06, "loss": 0.2905, "step": 39188 }, { "epoch": 1.7984030104171447, "grad_norm": 0.46503114700317383, "learning_rate": 3.5855958393746137e-06, "loss": 0.3095, "step": 39189 }, { "epoch": 1.7984489009223992, "grad_norm": 0.4764620363712311, "learning_rate": 3.5853606678528387e-06, "loss": 0.3768, "step": 39190 }, { "epoch": 1.7984947914276537, "grad_norm": 0.5156913995742798, "learning_rate": 3.5851254997328233e-06, "loss": 0.4519, "step": 39191 }, { "epoch": 1.798540681932908, "grad_norm": 0.5220249891281128, "learning_rate": 3.5848903350151364e-06, "loss": 0.3099, "step": 39192 }, { "epoch": 1.7985865724381624, "grad_norm": 0.48183974623680115, "learning_rate": 3.5846551737003417e-06, "loss": 0.3724, "step": 39193 }, { "epoch": 1.798632462943417, "grad_norm": 0.4872881770133972, "learning_rate": 3.584420015789003e-06, "loss": 0.3658, "step": 39194 }, { "epoch": 1.7986783534486714, "grad_norm": 0.45851632952690125, "learning_rate": 3.5841848612816892e-06, "loss": 0.3264, "step": 39195 }, { "epoch": 1.798724243953926, "grad_norm": 0.47330886125564575, "learning_rate": 3.583949710178964e-06, "loss": 0.372, "step": 39196 }, { "epoch": 1.7987701344591804, "grad_norm": 0.47900545597076416, "learning_rate": 3.5837145624813923e-06, "loss": 0.3632, "step": 39197 }, { "epoch": 1.798816024964435, "grad_norm": 0.4801865816116333, "learning_rate": 3.5834794181895395e-06, "loss": 0.3722, "step": 39198 }, { "epoch": 1.7988619154696894, "grad_norm": 0.4970323443412781, "learning_rate": 3.583244277303972e-06, "loss": 0.3688, "step": 39199 }, { "epoch": 1.798907805974944, "grad_norm": 0.4540555477142334, "learning_rate": 3.5830091398252554e-06, "loss": 0.3148, "step": 39200 }, { "epoch": 1.7989536964801982, "grad_norm": 0.43361538648605347, "learning_rate": 3.5827740057539524e-06, "loss": 0.3166, "step": 39201 }, { "epoch": 1.7989995869854527, "grad_norm": 0.4528296887874603, "learning_rate": 3.582538875090633e-06, "loss": 0.3316, "step": 39202 }, { "epoch": 1.7990454774907072, "grad_norm": 0.5063188672065735, "learning_rate": 3.5823037478358594e-06, "loss": 0.3972, "step": 39203 }, { "epoch": 1.7990913679959615, "grad_norm": 0.4593522548675537, "learning_rate": 3.5820686239901982e-06, "loss": 0.3319, "step": 39204 }, { "epoch": 1.799137258501216, "grad_norm": 0.44395455718040466, "learning_rate": 3.581833503554214e-06, "loss": 0.3233, "step": 39205 }, { "epoch": 1.7991831490064705, "grad_norm": 0.46326175332069397, "learning_rate": 3.581598386528473e-06, "loss": 0.3467, "step": 39206 }, { "epoch": 1.799229039511725, "grad_norm": 0.4977503716945648, "learning_rate": 3.581363272913539e-06, "loss": 0.4487, "step": 39207 }, { "epoch": 1.7992749300169795, "grad_norm": 0.46444225311279297, "learning_rate": 3.5811281627099803e-06, "loss": 0.3423, "step": 39208 }, { "epoch": 1.799320820522234, "grad_norm": 0.45476028323173523, "learning_rate": 3.58089305591836e-06, "loss": 0.3373, "step": 39209 }, { "epoch": 1.7993667110274885, "grad_norm": 0.4991852343082428, "learning_rate": 3.580657952539244e-06, "loss": 0.4728, "step": 39210 }, { "epoch": 1.799412601532743, "grad_norm": 0.49933263659477234, "learning_rate": 3.5804228525731977e-06, "loss": 0.4309, "step": 39211 }, { "epoch": 1.7994584920379975, "grad_norm": 0.5126903653144836, "learning_rate": 3.5801877560207863e-06, "loss": 0.4679, "step": 39212 }, { "epoch": 1.799504382543252, "grad_norm": 0.4869692623615265, "learning_rate": 3.579952662882574e-06, "loss": 0.3939, "step": 39213 }, { "epoch": 1.7995502730485062, "grad_norm": 0.4746963381767273, "learning_rate": 3.579717573159129e-06, "loss": 0.3677, "step": 39214 }, { "epoch": 1.7995961635537607, "grad_norm": 0.48424577713012695, "learning_rate": 3.579482486851015e-06, "loss": 0.3564, "step": 39215 }, { "epoch": 1.7996420540590152, "grad_norm": 0.47278037667274475, "learning_rate": 3.5792474039587965e-06, "loss": 0.3499, "step": 39216 }, { "epoch": 1.7996879445642695, "grad_norm": 0.4383920431137085, "learning_rate": 3.5790123244830398e-06, "loss": 0.3128, "step": 39217 }, { "epoch": 1.799733835069524, "grad_norm": 0.5058495998382568, "learning_rate": 3.578777248424311e-06, "loss": 0.4028, "step": 39218 }, { "epoch": 1.7997797255747785, "grad_norm": 0.48429593443870544, "learning_rate": 3.5785421757831717e-06, "loss": 0.3683, "step": 39219 }, { "epoch": 1.799825616080033, "grad_norm": 0.46059587597846985, "learning_rate": 3.5783071065601936e-06, "loss": 0.3534, "step": 39220 }, { "epoch": 1.7998715065852875, "grad_norm": 0.494500070810318, "learning_rate": 3.5780720407559366e-06, "loss": 0.387, "step": 39221 }, { "epoch": 1.799917397090542, "grad_norm": 0.4394480884075165, "learning_rate": 3.5778369783709652e-06, "loss": 0.3126, "step": 39222 }, { "epoch": 1.7999632875957965, "grad_norm": 0.47112613916397095, "learning_rate": 3.5776019194058496e-06, "loss": 0.392, "step": 39223 }, { "epoch": 1.800009178101051, "grad_norm": 0.5653722882270813, "learning_rate": 3.5773668638611526e-06, "loss": 0.4628, "step": 39224 }, { "epoch": 1.8000550686063055, "grad_norm": 0.4742116928100586, "learning_rate": 3.5771318117374382e-06, "loss": 0.3573, "step": 39225 }, { "epoch": 1.80010095911156, "grad_norm": 0.46739962697029114, "learning_rate": 3.5768967630352736e-06, "loss": 0.3554, "step": 39226 }, { "epoch": 1.8001468496168143, "grad_norm": 0.4979287385940552, "learning_rate": 3.5766617177552234e-06, "loss": 0.4271, "step": 39227 }, { "epoch": 1.8001927401220688, "grad_norm": 0.4359785318374634, "learning_rate": 3.576426675897852e-06, "loss": 0.3175, "step": 39228 }, { "epoch": 1.8002386306273233, "grad_norm": 0.4938175678253174, "learning_rate": 3.5761916374637237e-06, "loss": 0.4281, "step": 39229 }, { "epoch": 1.8002845211325775, "grad_norm": 0.47553664445877075, "learning_rate": 3.5759566024534066e-06, "loss": 0.3468, "step": 39230 }, { "epoch": 1.800330411637832, "grad_norm": 0.4512454569339752, "learning_rate": 3.5757215708674647e-06, "loss": 0.3218, "step": 39231 }, { "epoch": 1.8003763021430865, "grad_norm": 0.48382073640823364, "learning_rate": 3.5754865427064623e-06, "loss": 0.3793, "step": 39232 }, { "epoch": 1.800422192648341, "grad_norm": 0.5018970966339111, "learning_rate": 3.575251517970966e-06, "loss": 0.4885, "step": 39233 }, { "epoch": 1.8004680831535955, "grad_norm": 0.45490139722824097, "learning_rate": 3.5750164966615397e-06, "loss": 0.3094, "step": 39234 }, { "epoch": 1.80051397365885, "grad_norm": 0.4589248597621918, "learning_rate": 3.5747814787787473e-06, "loss": 0.3045, "step": 39235 }, { "epoch": 1.8005598641641045, "grad_norm": 0.46435531973838806, "learning_rate": 3.574546464323157e-06, "loss": 0.3371, "step": 39236 }, { "epoch": 1.800605754669359, "grad_norm": 0.44815996289253235, "learning_rate": 3.5743114532953335e-06, "loss": 0.3346, "step": 39237 }, { "epoch": 1.8006516451746135, "grad_norm": 0.45247188210487366, "learning_rate": 3.5740764456958395e-06, "loss": 0.3192, "step": 39238 }, { "epoch": 1.8006975356798678, "grad_norm": 0.5225508809089661, "learning_rate": 3.5738414415252424e-06, "loss": 0.4473, "step": 39239 }, { "epoch": 1.8007434261851223, "grad_norm": 0.44614115357398987, "learning_rate": 3.573606440784107e-06, "loss": 0.3072, "step": 39240 }, { "epoch": 1.8007893166903768, "grad_norm": 0.4881133735179901, "learning_rate": 3.573371443472996e-06, "loss": 0.3637, "step": 39241 }, { "epoch": 1.800835207195631, "grad_norm": 0.5156430602073669, "learning_rate": 3.5731364495924793e-06, "loss": 0.4287, "step": 39242 }, { "epoch": 1.8008810977008856, "grad_norm": 0.4470885992050171, "learning_rate": 3.5729014591431177e-06, "loss": 0.3381, "step": 39243 }, { "epoch": 1.80092698820614, "grad_norm": 0.4537081718444824, "learning_rate": 3.5726664721254757e-06, "loss": 0.3387, "step": 39244 }, { "epoch": 1.8009728787113946, "grad_norm": 0.44785383343696594, "learning_rate": 3.572431488540123e-06, "loss": 0.3373, "step": 39245 }, { "epoch": 1.801018769216649, "grad_norm": 0.4318164587020874, "learning_rate": 3.572196508387622e-06, "loss": 0.3189, "step": 39246 }, { "epoch": 1.8010646597219035, "grad_norm": 0.4455227255821228, "learning_rate": 3.571961531668536e-06, "loss": 0.3223, "step": 39247 }, { "epoch": 1.801110550227158, "grad_norm": 0.4470912516117096, "learning_rate": 3.571726558383433e-06, "loss": 0.3165, "step": 39248 }, { "epoch": 1.8011564407324125, "grad_norm": 0.48179107904434204, "learning_rate": 3.5714915885328773e-06, "loss": 0.4318, "step": 39249 }, { "epoch": 1.801202331237667, "grad_norm": 0.47391849756240845, "learning_rate": 3.571256622117432e-06, "loss": 0.3477, "step": 39250 }, { "epoch": 1.8012482217429215, "grad_norm": 0.47040629386901855, "learning_rate": 3.5710216591376646e-06, "loss": 0.4035, "step": 39251 }, { "epoch": 1.8012941122481758, "grad_norm": 0.4455244243144989, "learning_rate": 3.570786699594139e-06, "loss": 0.3391, "step": 39252 }, { "epoch": 1.8013400027534303, "grad_norm": 0.4383828639984131, "learning_rate": 3.5705517434874193e-06, "loss": 0.3174, "step": 39253 }, { "epoch": 1.8013858932586848, "grad_norm": 0.5050778985023499, "learning_rate": 3.5703167908180734e-06, "loss": 0.4118, "step": 39254 }, { "epoch": 1.801431783763939, "grad_norm": 0.48553112149238586, "learning_rate": 3.570081841586664e-06, "loss": 0.4189, "step": 39255 }, { "epoch": 1.8014776742691936, "grad_norm": 0.479735791683197, "learning_rate": 3.569846895793754e-06, "loss": 0.3649, "step": 39256 }, { "epoch": 1.801523564774448, "grad_norm": 0.4841137230396271, "learning_rate": 3.569611953439913e-06, "loss": 0.3941, "step": 39257 }, { "epoch": 1.8015694552797026, "grad_norm": 0.4801064729690552, "learning_rate": 3.5693770145257046e-06, "loss": 0.3781, "step": 39258 }, { "epoch": 1.801615345784957, "grad_norm": 0.4492424428462982, "learning_rate": 3.569142079051691e-06, "loss": 0.2972, "step": 39259 }, { "epoch": 1.8016612362902116, "grad_norm": 0.45552802085876465, "learning_rate": 3.568907147018441e-06, "loss": 0.3026, "step": 39260 }, { "epoch": 1.801707126795466, "grad_norm": 0.4365113079547882, "learning_rate": 3.5686722184265167e-06, "loss": 0.3017, "step": 39261 }, { "epoch": 1.8017530173007206, "grad_norm": 0.4780440032482147, "learning_rate": 3.5684372932764843e-06, "loss": 0.3656, "step": 39262 }, { "epoch": 1.801798907805975, "grad_norm": 0.4473513960838318, "learning_rate": 3.568202371568907e-06, "loss": 0.3178, "step": 39263 }, { "epoch": 1.8018447983112296, "grad_norm": 0.4802020192146301, "learning_rate": 3.5679674533043533e-06, "loss": 0.3675, "step": 39264 }, { "epoch": 1.8018906888164838, "grad_norm": 0.44099804759025574, "learning_rate": 3.5677325384833854e-06, "loss": 0.2683, "step": 39265 }, { "epoch": 1.8019365793217383, "grad_norm": 0.4765324294567108, "learning_rate": 3.5674976271065666e-06, "loss": 0.3488, "step": 39266 }, { "epoch": 1.8019824698269928, "grad_norm": 0.49827513098716736, "learning_rate": 3.5672627191744656e-06, "loss": 0.3293, "step": 39267 }, { "epoch": 1.8020283603322471, "grad_norm": 0.48104366660118103, "learning_rate": 3.5670278146876457e-06, "loss": 0.351, "step": 39268 }, { "epoch": 1.8020742508375016, "grad_norm": 0.45449671149253845, "learning_rate": 3.566792913646671e-06, "loss": 0.3244, "step": 39269 }, { "epoch": 1.802120141342756, "grad_norm": 0.48197853565216064, "learning_rate": 3.566558016052108e-06, "loss": 0.3993, "step": 39270 }, { "epoch": 1.8021660318480106, "grad_norm": 0.4611546993255615, "learning_rate": 3.56632312190452e-06, "loss": 0.3567, "step": 39271 }, { "epoch": 1.802211922353265, "grad_norm": 0.533311128616333, "learning_rate": 3.566088231204471e-06, "loss": 0.4052, "step": 39272 }, { "epoch": 1.8022578128585196, "grad_norm": 0.4383503496646881, "learning_rate": 3.565853343952529e-06, "loss": 0.3416, "step": 39273 }, { "epoch": 1.802303703363774, "grad_norm": 0.4511098265647888, "learning_rate": 3.565618460149257e-06, "loss": 0.366, "step": 39274 }, { "epoch": 1.8023495938690286, "grad_norm": 0.41594845056533813, "learning_rate": 3.565383579795219e-06, "loss": 0.2797, "step": 39275 }, { "epoch": 1.802395484374283, "grad_norm": 0.4979259967803955, "learning_rate": 3.5651487028909815e-06, "loss": 0.4277, "step": 39276 }, { "epoch": 1.8024413748795374, "grad_norm": 0.512932300567627, "learning_rate": 3.564913829437109e-06, "loss": 0.4053, "step": 39277 }, { "epoch": 1.8024872653847919, "grad_norm": 0.5362216234207153, "learning_rate": 3.5646789594341636e-06, "loss": 0.4779, "step": 39278 }, { "epoch": 1.8025331558900464, "grad_norm": 0.4687674343585968, "learning_rate": 3.564444092882714e-06, "loss": 0.3484, "step": 39279 }, { "epoch": 1.8025790463953009, "grad_norm": 0.48018693923950195, "learning_rate": 3.564209229783324e-06, "loss": 0.3652, "step": 39280 }, { "epoch": 1.8026249369005551, "grad_norm": 0.5012412071228027, "learning_rate": 3.5639743701365558e-06, "loss": 0.3301, "step": 39281 }, { "epoch": 1.8026708274058096, "grad_norm": 0.4927400052547455, "learning_rate": 3.5637395139429776e-06, "loss": 0.4132, "step": 39282 }, { "epoch": 1.8027167179110641, "grad_norm": 0.488741397857666, "learning_rate": 3.563504661203152e-06, "loss": 0.346, "step": 39283 }, { "epoch": 1.8027626084163186, "grad_norm": 0.45236778259277344, "learning_rate": 3.563269811917642e-06, "loss": 0.318, "step": 39284 }, { "epoch": 1.8028084989215731, "grad_norm": 0.43687424063682556, "learning_rate": 3.563034966087018e-06, "loss": 0.3018, "step": 39285 }, { "epoch": 1.8028543894268276, "grad_norm": 0.449699342250824, "learning_rate": 3.5628001237118415e-06, "loss": 0.3258, "step": 39286 }, { "epoch": 1.8029002799320821, "grad_norm": 0.49896955490112305, "learning_rate": 3.5625652847926745e-06, "loss": 0.3916, "step": 39287 }, { "epoch": 1.8029461704373366, "grad_norm": 0.4704723060131073, "learning_rate": 3.5623304493300858e-06, "loss": 0.3645, "step": 39288 }, { "epoch": 1.8029920609425911, "grad_norm": 0.4715275466442108, "learning_rate": 3.5620956173246396e-06, "loss": 0.3821, "step": 39289 }, { "epoch": 1.8030379514478454, "grad_norm": 0.44830822944641113, "learning_rate": 3.5618607887768976e-06, "loss": 0.359, "step": 39290 }, { "epoch": 1.8030838419531, "grad_norm": 0.48768895864486694, "learning_rate": 3.5616259636874284e-06, "loss": 0.3403, "step": 39291 }, { "epoch": 1.8031297324583544, "grad_norm": 0.46323302388191223, "learning_rate": 3.5613911420567937e-06, "loss": 0.3589, "step": 39292 }, { "epoch": 1.8031756229636087, "grad_norm": 0.4593289792537689, "learning_rate": 3.5611563238855606e-06, "loss": 0.3523, "step": 39293 }, { "epoch": 1.8032215134688632, "grad_norm": 0.4646664559841156, "learning_rate": 3.5609215091742894e-06, "loss": 0.3654, "step": 39294 }, { "epoch": 1.8032674039741177, "grad_norm": 0.47845137119293213, "learning_rate": 3.56068669792355e-06, "loss": 0.402, "step": 39295 }, { "epoch": 1.8033132944793722, "grad_norm": 0.45610469579696655, "learning_rate": 3.5604518901339047e-06, "loss": 0.3212, "step": 39296 }, { "epoch": 1.8033591849846267, "grad_norm": 0.45342209935188293, "learning_rate": 3.560217085805918e-06, "loss": 0.3303, "step": 39297 }, { "epoch": 1.8034050754898812, "grad_norm": 0.4680362045764923, "learning_rate": 3.559982284940155e-06, "loss": 0.3553, "step": 39298 }, { "epoch": 1.8034509659951357, "grad_norm": 0.4510551691055298, "learning_rate": 3.5597474875371807e-06, "loss": 0.3319, "step": 39299 }, { "epoch": 1.8034968565003902, "grad_norm": 0.4679715037345886, "learning_rate": 3.5595126935975565e-06, "loss": 0.3931, "step": 39300 }, { "epoch": 1.8035427470056447, "grad_norm": 0.4478391408920288, "learning_rate": 3.5592779031218516e-06, "loss": 0.3035, "step": 39301 }, { "epoch": 1.8035886375108992, "grad_norm": 0.4669497311115265, "learning_rate": 3.55904311611063e-06, "loss": 0.3131, "step": 39302 }, { "epoch": 1.8036345280161534, "grad_norm": 0.46841856837272644, "learning_rate": 3.5588083325644525e-06, "loss": 0.3568, "step": 39303 }, { "epoch": 1.803680418521408, "grad_norm": 0.4601190686225891, "learning_rate": 3.558573552483887e-06, "loss": 0.3624, "step": 39304 }, { "epoch": 1.8037263090266624, "grad_norm": 0.4474479556083679, "learning_rate": 3.558338775869498e-06, "loss": 0.3176, "step": 39305 }, { "epoch": 1.8037721995319167, "grad_norm": 0.49226072430610657, "learning_rate": 3.558104002721847e-06, "loss": 0.4149, "step": 39306 }, { "epoch": 1.8038180900371712, "grad_norm": 0.4580143392086029, "learning_rate": 3.5578692330415033e-06, "loss": 0.3644, "step": 39307 }, { "epoch": 1.8038639805424257, "grad_norm": 0.47971004247665405, "learning_rate": 3.557634466829029e-06, "loss": 0.3454, "step": 39308 }, { "epoch": 1.8039098710476802, "grad_norm": 0.4903052747249603, "learning_rate": 3.557399704084986e-06, "loss": 0.3939, "step": 39309 }, { "epoch": 1.8039557615529347, "grad_norm": 0.4632745683193207, "learning_rate": 3.5571649448099434e-06, "loss": 0.3599, "step": 39310 }, { "epoch": 1.8040016520581892, "grad_norm": 0.4452897608280182, "learning_rate": 3.5569301890044638e-06, "loss": 0.3073, "step": 39311 }, { "epoch": 1.8040475425634437, "grad_norm": 0.48747268319129944, "learning_rate": 3.556695436669111e-06, "loss": 0.3791, "step": 39312 }, { "epoch": 1.8040934330686982, "grad_norm": 0.46609342098236084, "learning_rate": 3.5564606878044505e-06, "loss": 0.3628, "step": 39313 }, { "epoch": 1.8041393235739527, "grad_norm": 0.4931948184967041, "learning_rate": 3.5562259424110467e-06, "loss": 0.4007, "step": 39314 }, { "epoch": 1.8041852140792072, "grad_norm": 0.43707239627838135, "learning_rate": 3.5559912004894616e-06, "loss": 0.3075, "step": 39315 }, { "epoch": 1.8042311045844615, "grad_norm": 0.46779289841651917, "learning_rate": 3.555756462040264e-06, "loss": 0.3916, "step": 39316 }, { "epoch": 1.804276995089716, "grad_norm": 0.4659048020839691, "learning_rate": 3.555521727064017e-06, "loss": 0.3492, "step": 39317 }, { "epoch": 1.8043228855949704, "grad_norm": 0.4051838219165802, "learning_rate": 3.555286995561282e-06, "loss": 0.2534, "step": 39318 }, { "epoch": 1.8043687761002247, "grad_norm": 0.4775054454803467, "learning_rate": 3.555052267532628e-06, "loss": 0.3838, "step": 39319 }, { "epoch": 1.8044146666054792, "grad_norm": 0.49684667587280273, "learning_rate": 3.554817542978617e-06, "loss": 0.38, "step": 39320 }, { "epoch": 1.8044605571107337, "grad_norm": 0.47997039556503296, "learning_rate": 3.5545828218998112e-06, "loss": 0.3848, "step": 39321 }, { "epoch": 1.8045064476159882, "grad_norm": 0.476949542760849, "learning_rate": 3.5543481042967805e-06, "loss": 0.3535, "step": 39322 }, { "epoch": 1.8045523381212427, "grad_norm": 0.466804563999176, "learning_rate": 3.554113390170085e-06, "loss": 0.3486, "step": 39323 }, { "epoch": 1.8045982286264972, "grad_norm": 0.47255849838256836, "learning_rate": 3.5538786795202907e-06, "loss": 0.3825, "step": 39324 }, { "epoch": 1.8046441191317517, "grad_norm": 0.4365609288215637, "learning_rate": 3.553643972347962e-06, "loss": 0.2882, "step": 39325 }, { "epoch": 1.8046900096370062, "grad_norm": 0.475857138633728, "learning_rate": 3.5534092686536637e-06, "loss": 0.3609, "step": 39326 }, { "epoch": 1.8047359001422607, "grad_norm": 0.4938315749168396, "learning_rate": 3.5531745684379594e-06, "loss": 0.4078, "step": 39327 }, { "epoch": 1.804781790647515, "grad_norm": 0.44474807381629944, "learning_rate": 3.5529398717014113e-06, "loss": 0.3269, "step": 39328 }, { "epoch": 1.8048276811527695, "grad_norm": 0.4790645241737366, "learning_rate": 3.5527051784445884e-06, "loss": 0.36, "step": 39329 }, { "epoch": 1.804873571658024, "grad_norm": 0.471356600522995, "learning_rate": 3.552470488668054e-06, "loss": 0.3692, "step": 39330 }, { "epoch": 1.8049194621632783, "grad_norm": 0.48099663853645325, "learning_rate": 3.5522358023723672e-06, "loss": 0.3841, "step": 39331 }, { "epoch": 1.8049653526685328, "grad_norm": 0.4951941668987274, "learning_rate": 3.5520011195580994e-06, "loss": 0.423, "step": 39332 }, { "epoch": 1.8050112431737872, "grad_norm": 0.43567320704460144, "learning_rate": 3.551766440225811e-06, "loss": 0.3142, "step": 39333 }, { "epoch": 1.8050571336790417, "grad_norm": 0.44170209765434265, "learning_rate": 3.551531764376067e-06, "loss": 0.3099, "step": 39334 }, { "epoch": 1.8051030241842962, "grad_norm": 0.4575502574443817, "learning_rate": 3.5512970920094324e-06, "loss": 0.3487, "step": 39335 }, { "epoch": 1.8051489146895507, "grad_norm": 0.44578108191490173, "learning_rate": 3.5510624231264717e-06, "loss": 0.3134, "step": 39336 }, { "epoch": 1.8051948051948052, "grad_norm": 0.5043043494224548, "learning_rate": 3.5508277577277463e-06, "loss": 0.3656, "step": 39337 }, { "epoch": 1.8052406957000597, "grad_norm": 0.4623585343360901, "learning_rate": 3.5505930958138246e-06, "loss": 0.3224, "step": 39338 }, { "epoch": 1.8052865862053142, "grad_norm": 0.4920845627784729, "learning_rate": 3.5503584373852694e-06, "loss": 0.3543, "step": 39339 }, { "epoch": 1.8053324767105687, "grad_norm": 0.4601145386695862, "learning_rate": 3.550123782442643e-06, "loss": 0.3085, "step": 39340 }, { "epoch": 1.805378367215823, "grad_norm": 0.4655556082725525, "learning_rate": 3.549889130986513e-06, "loss": 0.3584, "step": 39341 }, { "epoch": 1.8054242577210775, "grad_norm": 0.4654712975025177, "learning_rate": 3.549654483017442e-06, "loss": 0.3731, "step": 39342 }, { "epoch": 1.805470148226332, "grad_norm": 0.4468616247177124, "learning_rate": 3.5494198385359915e-06, "loss": 0.2959, "step": 39343 }, { "epoch": 1.8055160387315863, "grad_norm": 0.4409073293209076, "learning_rate": 3.5491851975427307e-06, "loss": 0.3208, "step": 39344 }, { "epoch": 1.8055619292368408, "grad_norm": 0.44044968485832214, "learning_rate": 3.548950560038222e-06, "loss": 0.3097, "step": 39345 }, { "epoch": 1.8056078197420953, "grad_norm": 0.486675500869751, "learning_rate": 3.5487159260230274e-06, "loss": 0.36, "step": 39346 }, { "epoch": 1.8056537102473498, "grad_norm": 0.4384382665157318, "learning_rate": 3.5484812954977153e-06, "loss": 0.3169, "step": 39347 }, { "epoch": 1.8056996007526043, "grad_norm": 0.525726854801178, "learning_rate": 3.548246668462847e-06, "loss": 0.4639, "step": 39348 }, { "epoch": 1.8057454912578588, "grad_norm": 0.5005178451538086, "learning_rate": 3.548012044918985e-06, "loss": 0.4902, "step": 39349 }, { "epoch": 1.8057913817631133, "grad_norm": 0.4615524113178253, "learning_rate": 3.5477774248666972e-06, "loss": 0.3306, "step": 39350 }, { "epoch": 1.8058372722683678, "grad_norm": 0.44902145862579346, "learning_rate": 3.5475428083065478e-06, "loss": 0.3294, "step": 39351 }, { "epoch": 1.8058831627736223, "grad_norm": 0.4812130928039551, "learning_rate": 3.5473081952390976e-06, "loss": 0.4089, "step": 39352 }, { "epoch": 1.8059290532788768, "grad_norm": 0.46974262595176697, "learning_rate": 3.547073585664914e-06, "loss": 0.3825, "step": 39353 }, { "epoch": 1.805974943784131, "grad_norm": 0.4758954346179962, "learning_rate": 3.54683897958456e-06, "loss": 0.3981, "step": 39354 }, { "epoch": 1.8060208342893855, "grad_norm": 0.467261403799057, "learning_rate": 3.546604376998597e-06, "loss": 0.3829, "step": 39355 }, { "epoch": 1.80606672479464, "grad_norm": 0.47148534655570984, "learning_rate": 3.546369777907596e-06, "loss": 0.3446, "step": 39356 }, { "epoch": 1.8061126152998943, "grad_norm": 0.48792725801467896, "learning_rate": 3.5461351823121144e-06, "loss": 0.3627, "step": 39357 }, { "epoch": 1.8061585058051488, "grad_norm": 0.49115055799484253, "learning_rate": 3.5459005902127196e-06, "loss": 0.3435, "step": 39358 }, { "epoch": 1.8062043963104033, "grad_norm": 0.5524871945381165, "learning_rate": 3.545666001609973e-06, "loss": 0.26, "step": 39359 }, { "epoch": 1.8062502868156578, "grad_norm": 0.47939959168434143, "learning_rate": 3.5454314165044424e-06, "loss": 0.395, "step": 39360 }, { "epoch": 1.8062961773209123, "grad_norm": 0.44979482889175415, "learning_rate": 3.5451968348966904e-06, "loss": 0.2927, "step": 39361 }, { "epoch": 1.8063420678261668, "grad_norm": 0.5196375846862793, "learning_rate": 3.54496225678728e-06, "loss": 0.4358, "step": 39362 }, { "epoch": 1.8063879583314213, "grad_norm": 0.47665971517562866, "learning_rate": 3.544727682176776e-06, "loss": 0.3448, "step": 39363 }, { "epoch": 1.8064338488366758, "grad_norm": 0.4489773213863373, "learning_rate": 3.5444931110657444e-06, "loss": 0.3692, "step": 39364 }, { "epoch": 1.8064797393419303, "grad_norm": 0.46411487460136414, "learning_rate": 3.5442585434547438e-06, "loss": 0.3509, "step": 39365 }, { "epoch": 1.8065256298471846, "grad_norm": 0.46334949135780334, "learning_rate": 3.5440239793443453e-06, "loss": 0.3336, "step": 39366 }, { "epoch": 1.806571520352439, "grad_norm": 0.5342000722885132, "learning_rate": 3.543789418735109e-06, "loss": 0.3344, "step": 39367 }, { "epoch": 1.8066174108576936, "grad_norm": 0.46767657995224, "learning_rate": 3.5435548616275984e-06, "loss": 0.3667, "step": 39368 }, { "epoch": 1.806663301362948, "grad_norm": 0.48962831497192383, "learning_rate": 3.54332030802238e-06, "loss": 0.4198, "step": 39369 }, { "epoch": 1.8067091918682023, "grad_norm": 0.4334319829940796, "learning_rate": 3.5430857579200166e-06, "loss": 0.3215, "step": 39370 }, { "epoch": 1.8067550823734568, "grad_norm": 0.47955387830734253, "learning_rate": 3.54285121132107e-06, "loss": 0.3411, "step": 39371 }, { "epoch": 1.8068009728787113, "grad_norm": 0.4400034546852112, "learning_rate": 3.5426166682261088e-06, "loss": 0.3161, "step": 39372 }, { "epoch": 1.8068468633839658, "grad_norm": 0.4896361827850342, "learning_rate": 3.542382128635694e-06, "loss": 0.3996, "step": 39373 }, { "epoch": 1.8068927538892203, "grad_norm": 0.4794846475124359, "learning_rate": 3.542147592550389e-06, "loss": 0.3863, "step": 39374 }, { "epoch": 1.8069386443944748, "grad_norm": 0.40737876296043396, "learning_rate": 3.5419130599707603e-06, "loss": 0.2705, "step": 39375 }, { "epoch": 1.8069845348997293, "grad_norm": 0.5043819546699524, "learning_rate": 3.54167853089737e-06, "loss": 0.3775, "step": 39376 }, { "epoch": 1.8070304254049838, "grad_norm": 0.4714910686016083, "learning_rate": 3.5414440053307807e-06, "loss": 0.364, "step": 39377 }, { "epoch": 1.8070763159102383, "grad_norm": 0.46037811040878296, "learning_rate": 3.541209483271562e-06, "loss": 0.3614, "step": 39378 }, { "epoch": 1.8071222064154926, "grad_norm": 0.46615785360336304, "learning_rate": 3.540974964720273e-06, "loss": 0.3823, "step": 39379 }, { "epoch": 1.807168096920747, "grad_norm": 0.45109298825263977, "learning_rate": 3.5407404496774755e-06, "loss": 0.34, "step": 39380 }, { "epoch": 1.8072139874260016, "grad_norm": 0.43808576464653015, "learning_rate": 3.54050593814374e-06, "loss": 0.3272, "step": 39381 }, { "epoch": 1.8072598779312559, "grad_norm": 0.4434596002101898, "learning_rate": 3.540271430119626e-06, "loss": 0.3229, "step": 39382 }, { "epoch": 1.8073057684365104, "grad_norm": 0.459803968667984, "learning_rate": 3.540036925605699e-06, "loss": 0.3423, "step": 39383 }, { "epoch": 1.8073516589417649, "grad_norm": 0.49817344546318054, "learning_rate": 3.539802424602522e-06, "loss": 0.4044, "step": 39384 }, { "epoch": 1.8073975494470194, "grad_norm": 0.4696045219898224, "learning_rate": 3.53956792711066e-06, "loss": 0.3688, "step": 39385 }, { "epoch": 1.8074434399522739, "grad_norm": 0.46462154388427734, "learning_rate": 3.539333433130674e-06, "loss": 0.3407, "step": 39386 }, { "epoch": 1.8074893304575284, "grad_norm": 0.46397772431373596, "learning_rate": 3.539098942663132e-06, "loss": 0.3251, "step": 39387 }, { "epoch": 1.8075352209627829, "grad_norm": 0.47480034828186035, "learning_rate": 3.538864455708596e-06, "loss": 0.3022, "step": 39388 }, { "epoch": 1.8075811114680373, "grad_norm": 0.4780513644218445, "learning_rate": 3.5386299722676287e-06, "loss": 0.3282, "step": 39389 }, { "epoch": 1.8076270019732918, "grad_norm": 0.5047063231468201, "learning_rate": 3.538395492340796e-06, "loss": 0.3823, "step": 39390 }, { "epoch": 1.8076728924785463, "grad_norm": 0.4939988851547241, "learning_rate": 3.5381610159286615e-06, "loss": 0.3479, "step": 39391 }, { "epoch": 1.8077187829838006, "grad_norm": 0.480477511882782, "learning_rate": 3.5379265430317877e-06, "loss": 0.3687, "step": 39392 }, { "epoch": 1.8077646734890551, "grad_norm": 0.5041800737380981, "learning_rate": 3.537692073650738e-06, "loss": 0.3896, "step": 39393 }, { "epoch": 1.8078105639943096, "grad_norm": 0.46361982822418213, "learning_rate": 3.5374576077860784e-06, "loss": 0.3301, "step": 39394 }, { "epoch": 1.807856454499564, "grad_norm": 0.46986833214759827, "learning_rate": 3.537223145438372e-06, "loss": 0.4069, "step": 39395 }, { "epoch": 1.8079023450048184, "grad_norm": 0.45814529061317444, "learning_rate": 3.5369886866081815e-06, "loss": 0.3544, "step": 39396 }, { "epoch": 1.8079482355100729, "grad_norm": 0.4591224193572998, "learning_rate": 3.536754231296072e-06, "loss": 0.3537, "step": 39397 }, { "epoch": 1.8079941260153274, "grad_norm": 0.450774222612381, "learning_rate": 3.5365197795026075e-06, "loss": 0.3323, "step": 39398 }, { "epoch": 1.8080400165205819, "grad_norm": 0.469013512134552, "learning_rate": 3.5362853312283484e-06, "loss": 0.3306, "step": 39399 }, { "epoch": 1.8080859070258364, "grad_norm": 0.49245598912239075, "learning_rate": 3.536050886473864e-06, "loss": 0.4283, "step": 39400 }, { "epoch": 1.8081317975310909, "grad_norm": 0.45118993520736694, "learning_rate": 3.535816445239715e-06, "loss": 0.3218, "step": 39401 }, { "epoch": 1.8081776880363454, "grad_norm": 0.46114370226860046, "learning_rate": 3.5355820075264625e-06, "loss": 0.3748, "step": 39402 }, { "epoch": 1.8082235785415999, "grad_norm": 0.4903721809387207, "learning_rate": 3.5353475733346754e-06, "loss": 0.423, "step": 39403 }, { "epoch": 1.8082694690468544, "grad_norm": 0.4909888803958893, "learning_rate": 3.5351131426649156e-06, "loss": 0.3463, "step": 39404 }, { "epoch": 1.8083153595521086, "grad_norm": 0.4454742968082428, "learning_rate": 3.5348787155177444e-06, "loss": 0.2991, "step": 39405 }, { "epoch": 1.8083612500573631, "grad_norm": 0.4895438253879547, "learning_rate": 3.5346442918937297e-06, "loss": 0.4121, "step": 39406 }, { "epoch": 1.8084071405626176, "grad_norm": 0.4469958245754242, "learning_rate": 3.5344098717934318e-06, "loss": 0.3253, "step": 39407 }, { "epoch": 1.808453031067872, "grad_norm": 0.4424133598804474, "learning_rate": 3.534175455217414e-06, "loss": 0.3299, "step": 39408 }, { "epoch": 1.8084989215731264, "grad_norm": 0.44898784160614014, "learning_rate": 3.5339410421662435e-06, "loss": 0.3382, "step": 39409 }, { "epoch": 1.808544812078381, "grad_norm": 0.45608261227607727, "learning_rate": 3.5337066326404823e-06, "loss": 0.3448, "step": 39410 }, { "epoch": 1.8085907025836354, "grad_norm": 0.4978122413158417, "learning_rate": 3.5334722266406927e-06, "loss": 0.3897, "step": 39411 }, { "epoch": 1.80863659308889, "grad_norm": 0.5066713094711304, "learning_rate": 3.5332378241674414e-06, "loss": 0.382, "step": 39412 }, { "epoch": 1.8086824835941444, "grad_norm": 0.4606563150882721, "learning_rate": 3.53300342522129e-06, "loss": 0.3644, "step": 39413 }, { "epoch": 1.808728374099399, "grad_norm": 0.4733734726905823, "learning_rate": 3.5327690298027993e-06, "loss": 0.3553, "step": 39414 }, { "epoch": 1.8087742646046534, "grad_norm": 0.4480231702327728, "learning_rate": 3.5325346379125385e-06, "loss": 0.3402, "step": 39415 }, { "epoch": 1.808820155109908, "grad_norm": 0.4656098783016205, "learning_rate": 3.532300249551069e-06, "loss": 0.356, "step": 39416 }, { "epoch": 1.8088660456151622, "grad_norm": 0.46409595012664795, "learning_rate": 3.5320658647189534e-06, "loss": 0.35, "step": 39417 }, { "epoch": 1.8089119361204167, "grad_norm": 0.49375781416893005, "learning_rate": 3.5318314834167565e-06, "loss": 0.3652, "step": 39418 }, { "epoch": 1.8089578266256712, "grad_norm": 0.46811777353286743, "learning_rate": 3.5315971056450416e-06, "loss": 0.3565, "step": 39419 }, { "epoch": 1.8090037171309254, "grad_norm": 0.45803651213645935, "learning_rate": 3.5313627314043703e-06, "loss": 0.3253, "step": 39420 }, { "epoch": 1.80904960763618, "grad_norm": 0.48711204528808594, "learning_rate": 3.53112836069531e-06, "loss": 0.3519, "step": 39421 }, { "epoch": 1.8090954981414344, "grad_norm": 0.42546218633651733, "learning_rate": 3.530893993518424e-06, "loss": 0.3217, "step": 39422 }, { "epoch": 1.809141388646689, "grad_norm": 0.46847471594810486, "learning_rate": 3.53065962987427e-06, "loss": 0.3682, "step": 39423 }, { "epoch": 1.8091872791519434, "grad_norm": 0.44000476598739624, "learning_rate": 3.530425269763419e-06, "loss": 0.3155, "step": 39424 }, { "epoch": 1.809233169657198, "grad_norm": 0.4481617212295532, "learning_rate": 3.5301909131864316e-06, "loss": 0.3478, "step": 39425 }, { "epoch": 1.8092790601624524, "grad_norm": 0.4471161961555481, "learning_rate": 3.5299565601438707e-06, "loss": 0.334, "step": 39426 }, { "epoch": 1.809324950667707, "grad_norm": 0.44624075293540955, "learning_rate": 3.5297222106362994e-06, "loss": 0.3, "step": 39427 }, { "epoch": 1.8093708411729614, "grad_norm": 0.46490681171417236, "learning_rate": 3.529487864664283e-06, "loss": 0.3534, "step": 39428 }, { "epoch": 1.809416731678216, "grad_norm": 0.4589875638484955, "learning_rate": 3.5292535222283842e-06, "loss": 0.31, "step": 39429 }, { "epoch": 1.8094626221834702, "grad_norm": 0.49487200379371643, "learning_rate": 3.5290191833291644e-06, "loss": 0.3905, "step": 39430 }, { "epoch": 1.8095085126887247, "grad_norm": 0.4707028269767761, "learning_rate": 3.5287848479671915e-06, "loss": 0.3588, "step": 39431 }, { "epoch": 1.8095544031939792, "grad_norm": 0.5007294416427612, "learning_rate": 3.5285505161430266e-06, "loss": 0.4416, "step": 39432 }, { "epoch": 1.8096002936992335, "grad_norm": 0.4758753478527069, "learning_rate": 3.5283161878572318e-06, "loss": 0.3899, "step": 39433 }, { "epoch": 1.809646184204488, "grad_norm": 0.4538108706474304, "learning_rate": 3.528081863110374e-06, "loss": 0.3132, "step": 39434 }, { "epoch": 1.8096920747097425, "grad_norm": 0.4754985570907593, "learning_rate": 3.5278475419030145e-06, "loss": 0.3737, "step": 39435 }, { "epoch": 1.809737965214997, "grad_norm": 0.46852758526802063, "learning_rate": 3.527613224235714e-06, "loss": 0.3627, "step": 39436 }, { "epoch": 1.8097838557202515, "grad_norm": 0.44394734501838684, "learning_rate": 3.5273789101090426e-06, "loss": 0.3329, "step": 39437 }, { "epoch": 1.809829746225506, "grad_norm": 0.479248970746994, "learning_rate": 3.5271445995235593e-06, "loss": 0.3426, "step": 39438 }, { "epoch": 1.8098756367307605, "grad_norm": 0.4800010919570923, "learning_rate": 3.526910292479827e-06, "loss": 0.3421, "step": 39439 }, { "epoch": 1.809921527236015, "grad_norm": 0.4706912338733673, "learning_rate": 3.5266759889784126e-06, "loss": 0.383, "step": 39440 }, { "epoch": 1.8099674177412695, "grad_norm": 0.46369218826293945, "learning_rate": 3.526441689019877e-06, "loss": 0.3164, "step": 39441 }, { "epoch": 1.810013308246524, "grad_norm": 0.44786131381988525, "learning_rate": 3.5262073926047815e-06, "loss": 0.309, "step": 39442 }, { "epoch": 1.8100591987517782, "grad_norm": 0.47272443771362305, "learning_rate": 3.525973099733695e-06, "loss": 0.3603, "step": 39443 }, { "epoch": 1.8101050892570327, "grad_norm": 0.465867280960083, "learning_rate": 3.5257388104071784e-06, "loss": 0.3342, "step": 39444 }, { "epoch": 1.8101509797622872, "grad_norm": 0.4450068473815918, "learning_rate": 3.525504524625791e-06, "loss": 0.3257, "step": 39445 }, { "epoch": 1.8101968702675415, "grad_norm": 0.4671347737312317, "learning_rate": 3.525270242390103e-06, "loss": 0.347, "step": 39446 }, { "epoch": 1.810242760772796, "grad_norm": 0.46415403485298157, "learning_rate": 3.525035963700675e-06, "loss": 0.3755, "step": 39447 }, { "epoch": 1.8102886512780505, "grad_norm": 0.45591476559638977, "learning_rate": 3.5248016885580675e-06, "loss": 0.3422, "step": 39448 }, { "epoch": 1.810334541783305, "grad_norm": 0.47728389501571655, "learning_rate": 3.5245674169628487e-06, "loss": 0.3878, "step": 39449 }, { "epoch": 1.8103804322885595, "grad_norm": 0.48697972297668457, "learning_rate": 3.5243331489155785e-06, "loss": 0.377, "step": 39450 }, { "epoch": 1.810426322793814, "grad_norm": 0.45997846126556396, "learning_rate": 3.52409888441682e-06, "loss": 0.3149, "step": 39451 }, { "epoch": 1.8104722132990685, "grad_norm": 0.5034828186035156, "learning_rate": 3.52386462346714e-06, "loss": 0.3945, "step": 39452 }, { "epoch": 1.810518103804323, "grad_norm": 0.4767049252986908, "learning_rate": 3.523630366067099e-06, "loss": 0.354, "step": 39453 }, { "epoch": 1.8105639943095775, "grad_norm": 0.4790947735309601, "learning_rate": 3.5233961122172604e-06, "loss": 0.389, "step": 39454 }, { "epoch": 1.8106098848148318, "grad_norm": 0.4993637800216675, "learning_rate": 3.5231618619181897e-06, "loss": 0.4014, "step": 39455 }, { "epoch": 1.8106557753200863, "grad_norm": 0.48440149426460266, "learning_rate": 3.5229276151704477e-06, "loss": 0.3964, "step": 39456 }, { "epoch": 1.8107016658253408, "grad_norm": 0.456769198179245, "learning_rate": 3.5226933719745994e-06, "loss": 0.3339, "step": 39457 }, { "epoch": 1.8107475563305953, "grad_norm": 0.4684979021549225, "learning_rate": 3.5224591323312044e-06, "loss": 0.4113, "step": 39458 }, { "epoch": 1.8107934468358495, "grad_norm": 0.4555327594280243, "learning_rate": 3.5222248962408314e-06, "loss": 0.3255, "step": 39459 }, { "epoch": 1.810839337341104, "grad_norm": 0.45053476095199585, "learning_rate": 3.521990663704041e-06, "loss": 0.3169, "step": 39460 }, { "epoch": 1.8108852278463585, "grad_norm": 0.4760741889476776, "learning_rate": 3.521756434721396e-06, "loss": 0.3577, "step": 39461 }, { "epoch": 1.810931118351613, "grad_norm": 0.872757077217102, "learning_rate": 3.521522209293461e-06, "loss": 0.3982, "step": 39462 }, { "epoch": 1.8109770088568675, "grad_norm": 0.45069801807403564, "learning_rate": 3.5212879874207986e-06, "loss": 0.3297, "step": 39463 }, { "epoch": 1.811022899362122, "grad_norm": 0.48185452818870544, "learning_rate": 3.5210537691039694e-06, "loss": 0.408, "step": 39464 }, { "epoch": 1.8110687898673765, "grad_norm": 0.4508371353149414, "learning_rate": 3.5208195543435407e-06, "loss": 0.3422, "step": 39465 }, { "epoch": 1.811114680372631, "grad_norm": 0.4416200518608093, "learning_rate": 3.520585343140076e-06, "loss": 0.2842, "step": 39466 }, { "epoch": 1.8111605708778855, "grad_norm": 0.47118598222732544, "learning_rate": 3.5203511354941333e-06, "loss": 0.3642, "step": 39467 }, { "epoch": 1.8112064613831398, "grad_norm": 0.4564858078956604, "learning_rate": 3.5201169314062816e-06, "loss": 0.3088, "step": 39468 }, { "epoch": 1.8112523518883943, "grad_norm": 0.4718751609325409, "learning_rate": 3.5198827308770812e-06, "loss": 0.3295, "step": 39469 }, { "epoch": 1.8112982423936488, "grad_norm": 0.4917490482330322, "learning_rate": 3.5196485339070947e-06, "loss": 0.347, "step": 39470 }, { "epoch": 1.811344132898903, "grad_norm": 0.4902651309967041, "learning_rate": 3.5194143404968874e-06, "loss": 0.3817, "step": 39471 }, { "epoch": 1.8113900234041576, "grad_norm": 0.47088849544525146, "learning_rate": 3.5191801506470213e-06, "loss": 0.3771, "step": 39472 }, { "epoch": 1.811435913909412, "grad_norm": 0.4848845601081848, "learning_rate": 3.518945964358057e-06, "loss": 0.4023, "step": 39473 }, { "epoch": 1.8114818044146666, "grad_norm": 0.506091296672821, "learning_rate": 3.5187117816305632e-06, "loss": 0.4172, "step": 39474 }, { "epoch": 1.811527694919921, "grad_norm": 0.4335012137889862, "learning_rate": 3.5184776024650996e-06, "loss": 0.2788, "step": 39475 }, { "epoch": 1.8115735854251755, "grad_norm": 0.4631637930870056, "learning_rate": 3.518243426862229e-06, "loss": 0.3762, "step": 39476 }, { "epoch": 1.81161947593043, "grad_norm": 0.4419138431549072, "learning_rate": 3.518009254822516e-06, "loss": 0.2997, "step": 39477 }, { "epoch": 1.8116653664356845, "grad_norm": 0.45471277832984924, "learning_rate": 3.5177750863465234e-06, "loss": 0.3216, "step": 39478 }, { "epoch": 1.811711256940939, "grad_norm": 0.47711506485939026, "learning_rate": 3.517540921434811e-06, "loss": 0.3999, "step": 39479 }, { "epoch": 1.8117571474461935, "grad_norm": 0.4533890187740326, "learning_rate": 3.517306760087947e-06, "loss": 0.2658, "step": 39480 }, { "epoch": 1.8118030379514478, "grad_norm": 0.4536905288696289, "learning_rate": 3.5170726023064935e-06, "loss": 0.3946, "step": 39481 }, { "epoch": 1.8118489284567023, "grad_norm": 0.4744662344455719, "learning_rate": 3.51683844809101e-06, "loss": 0.4002, "step": 39482 }, { "epoch": 1.8118948189619568, "grad_norm": 0.45185065269470215, "learning_rate": 3.5166042974420637e-06, "loss": 0.3079, "step": 39483 }, { "epoch": 1.811940709467211, "grad_norm": 0.46054282784461975, "learning_rate": 3.5163701503602154e-06, "loss": 0.321, "step": 39484 }, { "epoch": 1.8119865999724656, "grad_norm": 0.4445771872997284, "learning_rate": 3.516136006846027e-06, "loss": 0.3468, "step": 39485 }, { "epoch": 1.81203249047772, "grad_norm": 0.4504624307155609, "learning_rate": 3.5159018669000655e-06, "loss": 0.2975, "step": 39486 }, { "epoch": 1.8120783809829746, "grad_norm": 0.48114365339279175, "learning_rate": 3.5156677305228913e-06, "loss": 0.394, "step": 39487 }, { "epoch": 1.812124271488229, "grad_norm": 0.45713376998901367, "learning_rate": 3.5154335977150665e-06, "loss": 0.3418, "step": 39488 }, { "epoch": 1.8121701619934836, "grad_norm": 0.4792759120464325, "learning_rate": 3.515199468477156e-06, "loss": 0.3454, "step": 39489 }, { "epoch": 1.812216052498738, "grad_norm": 0.480305552482605, "learning_rate": 3.514965342809723e-06, "loss": 0.3518, "step": 39490 }, { "epoch": 1.8122619430039926, "grad_norm": 0.4601585566997528, "learning_rate": 3.51473122071333e-06, "loss": 0.3818, "step": 39491 }, { "epoch": 1.812307833509247, "grad_norm": 0.496074378490448, "learning_rate": 3.5144971021885375e-06, "loss": 0.3787, "step": 39492 }, { "epoch": 1.8123537240145016, "grad_norm": 0.45184215903282166, "learning_rate": 3.5142629872359123e-06, "loss": 0.2937, "step": 39493 }, { "epoch": 1.8123996145197558, "grad_norm": 0.4206579923629761, "learning_rate": 3.514028875856016e-06, "loss": 0.273, "step": 39494 }, { "epoch": 1.8124455050250103, "grad_norm": 0.4677528440952301, "learning_rate": 3.5137947680494085e-06, "loss": 0.2968, "step": 39495 }, { "epoch": 1.8124913955302648, "grad_norm": 0.46379512548446655, "learning_rate": 3.5135606638166573e-06, "loss": 0.348, "step": 39496 }, { "epoch": 1.8125372860355191, "grad_norm": 0.44144296646118164, "learning_rate": 3.5133265631583246e-06, "loss": 0.3031, "step": 39497 }, { "epoch": 1.8125831765407736, "grad_norm": 0.496247798204422, "learning_rate": 3.513092466074971e-06, "loss": 0.4057, "step": 39498 }, { "epoch": 1.812629067046028, "grad_norm": 0.4584907591342926, "learning_rate": 3.512858372567162e-06, "loss": 0.3355, "step": 39499 }, { "epoch": 1.8126749575512826, "grad_norm": 0.4650803506374359, "learning_rate": 3.5126242826354583e-06, "loss": 0.3202, "step": 39500 }, { "epoch": 1.812720848056537, "grad_norm": 0.4555077850818634, "learning_rate": 3.5123901962804217e-06, "loss": 0.3461, "step": 39501 }, { "epoch": 1.8127667385617916, "grad_norm": 0.4598619341850281, "learning_rate": 3.51215611350262e-06, "loss": 0.3265, "step": 39502 }, { "epoch": 1.812812629067046, "grad_norm": 0.557809591293335, "learning_rate": 3.511922034302613e-06, "loss": 0.4538, "step": 39503 }, { "epoch": 1.8128585195723006, "grad_norm": 0.4756147861480713, "learning_rate": 3.5116879586809627e-06, "loss": 0.3297, "step": 39504 }, { "epoch": 1.812904410077555, "grad_norm": 0.4510299861431122, "learning_rate": 3.5114538866382342e-06, "loss": 0.3422, "step": 39505 }, { "epoch": 1.8129503005828094, "grad_norm": 0.45085158944129944, "learning_rate": 3.5112198181749903e-06, "loss": 0.316, "step": 39506 }, { "epoch": 1.8129961910880639, "grad_norm": 0.44742006063461304, "learning_rate": 3.510985753291789e-06, "loss": 0.305, "step": 39507 }, { "epoch": 1.8130420815933184, "grad_norm": 0.44663214683532715, "learning_rate": 3.5107516919892003e-06, "loss": 0.2969, "step": 39508 }, { "epoch": 1.8130879720985726, "grad_norm": 0.5223094820976257, "learning_rate": 3.510517634267784e-06, "loss": 0.3692, "step": 39509 }, { "epoch": 1.8131338626038271, "grad_norm": 0.48271459341049194, "learning_rate": 3.510283580128101e-06, "loss": 0.4002, "step": 39510 }, { "epoch": 1.8131797531090816, "grad_norm": 0.454841673374176, "learning_rate": 3.510049529570717e-06, "loss": 0.3208, "step": 39511 }, { "epoch": 1.8132256436143361, "grad_norm": 0.5297150611877441, "learning_rate": 3.5098154825961933e-06, "loss": 0.3868, "step": 39512 }, { "epoch": 1.8132715341195906, "grad_norm": 0.4768126308917999, "learning_rate": 3.509581439205092e-06, "loss": 0.3791, "step": 39513 }, { "epoch": 1.8133174246248451, "grad_norm": 0.47430965304374695, "learning_rate": 3.5093473993979785e-06, "loss": 0.3676, "step": 39514 }, { "epoch": 1.8133633151300996, "grad_norm": 0.44978076219558716, "learning_rate": 3.5091133631754136e-06, "loss": 0.3432, "step": 39515 }, { "epoch": 1.8134092056353541, "grad_norm": 0.44673579931259155, "learning_rate": 3.5088793305379593e-06, "loss": 0.3359, "step": 39516 }, { "epoch": 1.8134550961406086, "grad_norm": 0.484850138425827, "learning_rate": 3.508645301486181e-06, "loss": 0.3923, "step": 39517 }, { "epoch": 1.8135009866458631, "grad_norm": 0.4703046977519989, "learning_rate": 3.50841127602064e-06, "loss": 0.3233, "step": 39518 }, { "epoch": 1.8135468771511174, "grad_norm": 0.6792561411857605, "learning_rate": 3.5081772541418984e-06, "loss": 0.2875, "step": 39519 }, { "epoch": 1.813592767656372, "grad_norm": 0.4948335289955139, "learning_rate": 3.5079432358505208e-06, "loss": 0.4213, "step": 39520 }, { "epoch": 1.8136386581616264, "grad_norm": 0.44160768389701843, "learning_rate": 3.507709221147069e-06, "loss": 0.3116, "step": 39521 }, { "epoch": 1.8136845486668807, "grad_norm": 0.5229886174201965, "learning_rate": 3.507475210032104e-06, "loss": 0.4239, "step": 39522 }, { "epoch": 1.8137304391721352, "grad_norm": 0.4474203884601593, "learning_rate": 3.5072412025061897e-06, "loss": 0.3137, "step": 39523 }, { "epoch": 1.8137763296773897, "grad_norm": 0.5003618001937866, "learning_rate": 3.5070071985698907e-06, "loss": 0.3556, "step": 39524 }, { "epoch": 1.8138222201826442, "grad_norm": 0.49806690216064453, "learning_rate": 3.506773198223768e-06, "loss": 0.4491, "step": 39525 }, { "epoch": 1.8138681106878987, "grad_norm": 0.5106737017631531, "learning_rate": 3.5065392014683834e-06, "loss": 0.3634, "step": 39526 }, { "epoch": 1.8139140011931532, "grad_norm": 0.44777336716651917, "learning_rate": 3.506305208304302e-06, "loss": 0.2697, "step": 39527 }, { "epoch": 1.8139598916984077, "grad_norm": 0.4923311769962311, "learning_rate": 3.506071218732085e-06, "loss": 0.3538, "step": 39528 }, { "epoch": 1.8140057822036622, "grad_norm": 0.47008460760116577, "learning_rate": 3.505837232752293e-06, "loss": 0.3446, "step": 39529 }, { "epoch": 1.8140516727089167, "grad_norm": 0.465802937746048, "learning_rate": 3.5056032503654935e-06, "loss": 0.3525, "step": 39530 }, { "epoch": 1.8140975632141711, "grad_norm": 0.44240227341651917, "learning_rate": 3.505369271572246e-06, "loss": 0.292, "step": 39531 }, { "epoch": 1.8141434537194254, "grad_norm": 0.49259674549102783, "learning_rate": 3.5051352963731128e-06, "loss": 0.3902, "step": 39532 }, { "epoch": 1.81418934422468, "grad_norm": 0.4702433943748474, "learning_rate": 3.5049013247686585e-06, "loss": 0.3459, "step": 39533 }, { "epoch": 1.8142352347299344, "grad_norm": 0.4792638123035431, "learning_rate": 3.5046673567594445e-06, "loss": 0.3794, "step": 39534 }, { "epoch": 1.8142811252351887, "grad_norm": 0.4250314235687256, "learning_rate": 3.5044333923460327e-06, "loss": 0.2885, "step": 39535 }, { "epoch": 1.8143270157404432, "grad_norm": 0.5330049395561218, "learning_rate": 3.5041994315289873e-06, "loss": 0.487, "step": 39536 }, { "epoch": 1.8143729062456977, "grad_norm": 0.5002133250236511, "learning_rate": 3.5039654743088707e-06, "loss": 0.3617, "step": 39537 }, { "epoch": 1.8144187967509522, "grad_norm": 0.4636618494987488, "learning_rate": 3.503731520686243e-06, "loss": 0.3323, "step": 39538 }, { "epoch": 1.8144646872562067, "grad_norm": 0.43951013684272766, "learning_rate": 3.5034975706616702e-06, "loss": 0.2902, "step": 39539 }, { "epoch": 1.8145105777614612, "grad_norm": 0.4752309024333954, "learning_rate": 3.5032636242357138e-06, "loss": 0.3893, "step": 39540 }, { "epoch": 1.8145564682667157, "grad_norm": 0.46331796050071716, "learning_rate": 3.503029681408935e-06, "loss": 0.3157, "step": 39541 }, { "epoch": 1.8146023587719702, "grad_norm": 0.45354852080345154, "learning_rate": 3.502795742181898e-06, "loss": 0.3448, "step": 39542 }, { "epoch": 1.8146482492772247, "grad_norm": 0.40669703483581543, "learning_rate": 3.5025618065551647e-06, "loss": 0.2301, "step": 39543 }, { "epoch": 1.814694139782479, "grad_norm": 0.4454670250415802, "learning_rate": 3.5023278745292965e-06, "loss": 0.3272, "step": 39544 }, { "epoch": 1.8147400302877335, "grad_norm": 0.49089691042900085, "learning_rate": 3.5020939461048586e-06, "loss": 0.4403, "step": 39545 }, { "epoch": 1.814785920792988, "grad_norm": 0.48121514916419983, "learning_rate": 3.501860021282412e-06, "loss": 0.4, "step": 39546 }, { "epoch": 1.8148318112982424, "grad_norm": 0.5374740958213806, "learning_rate": 3.5016261000625173e-06, "loss": 0.4917, "step": 39547 }, { "epoch": 1.8148777018034967, "grad_norm": 0.43312427401542664, "learning_rate": 3.5013921824457408e-06, "loss": 0.2955, "step": 39548 }, { "epoch": 1.8149235923087512, "grad_norm": 0.41755566000938416, "learning_rate": 3.5011582684326435e-06, "loss": 0.2531, "step": 39549 }, { "epoch": 1.8149694828140057, "grad_norm": 0.4619131088256836, "learning_rate": 3.500924358023785e-06, "loss": 0.3508, "step": 39550 }, { "epoch": 1.8150153733192602, "grad_norm": 0.4567030966281891, "learning_rate": 3.5006904512197326e-06, "loss": 0.3725, "step": 39551 }, { "epoch": 1.8150612638245147, "grad_norm": 0.46277812123298645, "learning_rate": 3.5004565480210464e-06, "loss": 0.352, "step": 39552 }, { "epoch": 1.8151071543297692, "grad_norm": 0.46202561259269714, "learning_rate": 3.5002226484282873e-06, "loss": 0.3186, "step": 39553 }, { "epoch": 1.8151530448350237, "grad_norm": 0.45498454570770264, "learning_rate": 3.4999887524420206e-06, "loss": 0.3371, "step": 39554 }, { "epoch": 1.8151989353402782, "grad_norm": 0.44943398237228394, "learning_rate": 3.4997548600628084e-06, "loss": 0.3429, "step": 39555 }, { "epoch": 1.8152448258455327, "grad_norm": 0.4773716926574707, "learning_rate": 3.4995209712912117e-06, "loss": 0.3838, "step": 39556 }, { "epoch": 1.815290716350787, "grad_norm": 0.469123512506485, "learning_rate": 3.4992870861277928e-06, "loss": 0.3748, "step": 39557 }, { "epoch": 1.8153366068560415, "grad_norm": 0.49857503175735474, "learning_rate": 3.499053204573115e-06, "loss": 0.3868, "step": 39558 }, { "epoch": 1.815382497361296, "grad_norm": 0.47888579964637756, "learning_rate": 3.4988193266277415e-06, "loss": 0.3547, "step": 39559 }, { "epoch": 1.8154283878665503, "grad_norm": 0.48772895336151123, "learning_rate": 3.4985854522922306e-06, "loss": 0.3711, "step": 39560 }, { "epoch": 1.8154742783718048, "grad_norm": 0.4486040771007538, "learning_rate": 3.498351581567151e-06, "loss": 0.3202, "step": 39561 }, { "epoch": 1.8155201688770592, "grad_norm": 0.4901120066642761, "learning_rate": 3.498117714453061e-06, "loss": 0.4046, "step": 39562 }, { "epoch": 1.8155660593823137, "grad_norm": 0.4414820969104767, "learning_rate": 3.497883850950523e-06, "loss": 0.2689, "step": 39563 }, { "epoch": 1.8156119498875682, "grad_norm": 0.4740545153617859, "learning_rate": 3.497649991060102e-06, "loss": 0.3197, "step": 39564 }, { "epoch": 1.8156578403928227, "grad_norm": 0.44108715653419495, "learning_rate": 3.4974161347823582e-06, "loss": 0.3159, "step": 39565 }, { "epoch": 1.8157037308980772, "grad_norm": 0.48185884952545166, "learning_rate": 3.4971822821178524e-06, "loss": 0.3494, "step": 39566 }, { "epoch": 1.8157496214033317, "grad_norm": 0.4806457459926605, "learning_rate": 3.496948433067151e-06, "loss": 0.38, "step": 39567 }, { "epoch": 1.8157955119085862, "grad_norm": 0.4542843699455261, "learning_rate": 3.496714587630814e-06, "loss": 0.3339, "step": 39568 }, { "epoch": 1.8158414024138407, "grad_norm": 0.4481528103351593, "learning_rate": 3.496480745809403e-06, "loss": 0.3208, "step": 39569 }, { "epoch": 1.815887292919095, "grad_norm": 0.462266206741333, "learning_rate": 3.4962469076034823e-06, "loss": 0.3661, "step": 39570 }, { "epoch": 1.8159331834243495, "grad_norm": 0.5014846920967102, "learning_rate": 3.4960130730136135e-06, "loss": 0.3898, "step": 39571 }, { "epoch": 1.815979073929604, "grad_norm": 0.4919555187225342, "learning_rate": 3.495779242040356e-06, "loss": 0.3668, "step": 39572 }, { "epoch": 1.8160249644348583, "grad_norm": 0.4625389277935028, "learning_rate": 3.4955454146842773e-06, "loss": 0.3419, "step": 39573 }, { "epoch": 1.8160708549401128, "grad_norm": 0.5019527673721313, "learning_rate": 3.495311590945937e-06, "loss": 0.4436, "step": 39574 }, { "epoch": 1.8161167454453673, "grad_norm": 0.44494757056236267, "learning_rate": 3.4950777708258955e-06, "loss": 0.3392, "step": 39575 }, { "epoch": 1.8161626359506218, "grad_norm": 0.5316281318664551, "learning_rate": 3.4948439543247185e-06, "loss": 0.3736, "step": 39576 }, { "epoch": 1.8162085264558763, "grad_norm": 0.4386516809463501, "learning_rate": 3.494610141442967e-06, "loss": 0.3147, "step": 39577 }, { "epoch": 1.8162544169611308, "grad_norm": 0.48993417620658875, "learning_rate": 3.4943763321812007e-06, "loss": 0.3961, "step": 39578 }, { "epoch": 1.8163003074663853, "grad_norm": 0.44720199704170227, "learning_rate": 3.4941425265399873e-06, "loss": 0.2872, "step": 39579 }, { "epoch": 1.8163461979716398, "grad_norm": 0.4452161192893982, "learning_rate": 3.4939087245198855e-06, "loss": 0.3391, "step": 39580 }, { "epoch": 1.8163920884768943, "grad_norm": 0.4964442849159241, "learning_rate": 3.493674926121455e-06, "loss": 0.4122, "step": 39581 }, { "epoch": 1.8164379789821488, "grad_norm": 0.48770570755004883, "learning_rate": 3.493441131345263e-06, "loss": 0.3681, "step": 39582 }, { "epoch": 1.816483869487403, "grad_norm": 0.47472161054611206, "learning_rate": 3.4932073401918697e-06, "loss": 0.3521, "step": 39583 }, { "epoch": 1.8165297599926575, "grad_norm": 0.4979342222213745, "learning_rate": 3.492973552661837e-06, "loss": 0.4258, "step": 39584 }, { "epoch": 1.816575650497912, "grad_norm": 0.48419591784477234, "learning_rate": 3.4927397687557273e-06, "loss": 0.3359, "step": 39585 }, { "epoch": 1.8166215410031663, "grad_norm": 0.46609562635421753, "learning_rate": 3.492505988474103e-06, "loss": 0.3275, "step": 39586 }, { "epoch": 1.8166674315084208, "grad_norm": 0.45768651366233826, "learning_rate": 3.492272211817526e-06, "loss": 0.3579, "step": 39587 }, { "epoch": 1.8167133220136753, "grad_norm": 0.44072335958480835, "learning_rate": 3.492038438786556e-06, "loss": 0.342, "step": 39588 }, { "epoch": 1.8167592125189298, "grad_norm": 0.4868362843990326, "learning_rate": 3.4918046693817608e-06, "loss": 0.3388, "step": 39589 }, { "epoch": 1.8168051030241843, "grad_norm": 0.46968239545822144, "learning_rate": 3.4915709036036983e-06, "loss": 0.3913, "step": 39590 }, { "epoch": 1.8168509935294388, "grad_norm": 0.4459097981452942, "learning_rate": 3.4913371414529313e-06, "loss": 0.3265, "step": 39591 }, { "epoch": 1.8168968840346933, "grad_norm": 0.46137523651123047, "learning_rate": 3.4911033829300232e-06, "loss": 0.3474, "step": 39592 }, { "epoch": 1.8169427745399478, "grad_norm": 0.48670485615730286, "learning_rate": 3.4908696280355346e-06, "loss": 0.3889, "step": 39593 }, { "epoch": 1.8169886650452023, "grad_norm": 0.4040960371494293, "learning_rate": 3.4906358767700273e-06, "loss": 0.2715, "step": 39594 }, { "epoch": 1.8170345555504566, "grad_norm": 0.4574456214904785, "learning_rate": 3.4904021291340663e-06, "loss": 0.344, "step": 39595 }, { "epoch": 1.817080446055711, "grad_norm": 0.4248758852481842, "learning_rate": 3.4901683851282108e-06, "loss": 0.286, "step": 39596 }, { "epoch": 1.8171263365609656, "grad_norm": 0.4577995836734772, "learning_rate": 3.4899346447530234e-06, "loss": 0.3684, "step": 39597 }, { "epoch": 1.8171722270662198, "grad_norm": 0.4645821750164032, "learning_rate": 3.489700908009067e-06, "loss": 0.3707, "step": 39598 }, { "epoch": 1.8172181175714743, "grad_norm": 0.4526720941066742, "learning_rate": 3.4894671748969045e-06, "loss": 0.3757, "step": 39599 }, { "epoch": 1.8172640080767288, "grad_norm": 0.4931142032146454, "learning_rate": 3.489233445417094e-06, "loss": 0.4197, "step": 39600 }, { "epoch": 1.8173098985819833, "grad_norm": 0.4752224385738373, "learning_rate": 3.488999719570203e-06, "loss": 0.3769, "step": 39601 }, { "epoch": 1.8173557890872378, "grad_norm": 0.4186309576034546, "learning_rate": 3.48876599735679e-06, "loss": 0.2721, "step": 39602 }, { "epoch": 1.8174016795924923, "grad_norm": 0.5018913745880127, "learning_rate": 3.4885322787774154e-06, "loss": 0.3865, "step": 39603 }, { "epoch": 1.8174475700977468, "grad_norm": 0.4992775321006775, "learning_rate": 3.488298563832646e-06, "loss": 0.3606, "step": 39604 }, { "epoch": 1.8174934606030013, "grad_norm": 0.46387362480163574, "learning_rate": 3.488064852523042e-06, "loss": 0.3069, "step": 39605 }, { "epoch": 1.8175393511082558, "grad_norm": 0.4529910683631897, "learning_rate": 3.487831144849163e-06, "loss": 0.3333, "step": 39606 }, { "epoch": 1.8175852416135103, "grad_norm": 0.43581998348236084, "learning_rate": 3.4875974408115743e-06, "loss": 0.291, "step": 39607 }, { "epoch": 1.8176311321187646, "grad_norm": 0.4874977469444275, "learning_rate": 3.4873637404108364e-06, "loss": 0.3811, "step": 39608 }, { "epoch": 1.817677022624019, "grad_norm": 0.45020362734794617, "learning_rate": 3.487130043647509e-06, "loss": 0.3546, "step": 39609 }, { "epoch": 1.8177229131292736, "grad_norm": 0.4834462106227875, "learning_rate": 3.4868963505221587e-06, "loss": 0.3446, "step": 39610 }, { "epoch": 1.8177688036345279, "grad_norm": 0.48098015785217285, "learning_rate": 3.486662661035345e-06, "loss": 0.3807, "step": 39611 }, { "epoch": 1.8178146941397824, "grad_norm": 0.46889030933380127, "learning_rate": 3.486428975187629e-06, "loss": 0.3222, "step": 39612 }, { "epoch": 1.8178605846450369, "grad_norm": 0.5324971079826355, "learning_rate": 3.486195292979575e-06, "loss": 0.3927, "step": 39613 }, { "epoch": 1.8179064751502914, "grad_norm": 0.4587648808956146, "learning_rate": 3.485961614411743e-06, "loss": 0.3361, "step": 39614 }, { "epoch": 1.8179523656555459, "grad_norm": 0.48495808243751526, "learning_rate": 3.485727939484693e-06, "loss": 0.3672, "step": 39615 }, { "epoch": 1.8179982561608004, "grad_norm": 0.46631932258605957, "learning_rate": 3.4854942681989925e-06, "loss": 0.3236, "step": 39616 }, { "epoch": 1.8180441466660548, "grad_norm": 0.5099856853485107, "learning_rate": 3.4852606005552e-06, "loss": 0.3846, "step": 39617 }, { "epoch": 1.8180900371713093, "grad_norm": 0.46327638626098633, "learning_rate": 3.4850269365538765e-06, "loss": 0.337, "step": 39618 }, { "epoch": 1.8181359276765638, "grad_norm": 0.47863510251045227, "learning_rate": 3.484793276195586e-06, "loss": 0.3854, "step": 39619 }, { "epoch": 1.8181818181818183, "grad_norm": 0.44759026169776917, "learning_rate": 3.4845596194808896e-06, "loss": 0.2916, "step": 39620 }, { "epoch": 1.8182277086870726, "grad_norm": 0.5106325149536133, "learning_rate": 3.484325966410349e-06, "loss": 0.4466, "step": 39621 }, { "epoch": 1.8182735991923271, "grad_norm": 0.496844083070755, "learning_rate": 3.484092316984524e-06, "loss": 0.3674, "step": 39622 }, { "epoch": 1.8183194896975816, "grad_norm": 0.511273205280304, "learning_rate": 3.483858671203982e-06, "loss": 0.4868, "step": 39623 }, { "epoch": 1.818365380202836, "grad_norm": 0.4763088822364807, "learning_rate": 3.4836250290692797e-06, "loss": 0.3836, "step": 39624 }, { "epoch": 1.8184112707080904, "grad_norm": 0.44941946864128113, "learning_rate": 3.483391390580979e-06, "loss": 0.3035, "step": 39625 }, { "epoch": 1.8184571612133449, "grad_norm": 0.5031437873840332, "learning_rate": 3.483157755739645e-06, "loss": 0.3765, "step": 39626 }, { "epoch": 1.8185030517185994, "grad_norm": 0.5027937293052673, "learning_rate": 3.482924124545838e-06, "loss": 0.4134, "step": 39627 }, { "epoch": 1.8185489422238539, "grad_norm": 0.725335955619812, "learning_rate": 3.482690497000119e-06, "loss": 0.3855, "step": 39628 }, { "epoch": 1.8185948327291084, "grad_norm": 0.4535479247570038, "learning_rate": 3.4824568731030516e-06, "loss": 0.3283, "step": 39629 }, { "epoch": 1.8186407232343629, "grad_norm": 0.48404204845428467, "learning_rate": 3.4822232528551958e-06, "loss": 0.3548, "step": 39630 }, { "epoch": 1.8186866137396174, "grad_norm": 0.4953702688217163, "learning_rate": 3.4819896362571126e-06, "loss": 0.3959, "step": 39631 }, { "epoch": 1.8187325042448719, "grad_norm": 0.45875510573387146, "learning_rate": 3.4817560233093674e-06, "loss": 0.3477, "step": 39632 }, { "epoch": 1.8187783947501261, "grad_norm": 0.5192646384239197, "learning_rate": 3.4815224140125194e-06, "loss": 0.4271, "step": 39633 }, { "epoch": 1.8188242852553806, "grad_norm": 0.49065494537353516, "learning_rate": 3.481288808367129e-06, "loss": 0.3562, "step": 39634 }, { "epoch": 1.8188701757606351, "grad_norm": 0.5205522775650024, "learning_rate": 3.4810552063737617e-06, "loss": 0.4019, "step": 39635 }, { "epoch": 1.8189160662658896, "grad_norm": 0.47686171531677246, "learning_rate": 3.480821608032977e-06, "loss": 0.3451, "step": 39636 }, { "epoch": 1.818961956771144, "grad_norm": 0.46159252524375916, "learning_rate": 3.480588013345335e-06, "loss": 0.3408, "step": 39637 }, { "epoch": 1.8190078472763984, "grad_norm": 0.4689064025878906, "learning_rate": 3.4803544223114017e-06, "loss": 0.361, "step": 39638 }, { "epoch": 1.819053737781653, "grad_norm": 0.46284064650535583, "learning_rate": 3.4801208349317357e-06, "loss": 0.3181, "step": 39639 }, { "epoch": 1.8190996282869074, "grad_norm": 0.5090509057044983, "learning_rate": 3.4798872512068983e-06, "loss": 0.39, "step": 39640 }, { "epoch": 1.819145518792162, "grad_norm": 0.5186017155647278, "learning_rate": 3.479653671137454e-06, "loss": 0.492, "step": 39641 }, { "epoch": 1.8191914092974164, "grad_norm": 0.4499276876449585, "learning_rate": 3.479420094723962e-06, "loss": 0.3489, "step": 39642 }, { "epoch": 1.819237299802671, "grad_norm": 0.4829350411891937, "learning_rate": 3.4791865219669828e-06, "loss": 0.3446, "step": 39643 }, { "epoch": 1.8192831903079254, "grad_norm": 0.4944691061973572, "learning_rate": 3.4789529528670827e-06, "loss": 0.4094, "step": 39644 }, { "epoch": 1.81932908081318, "grad_norm": 0.4556099474430084, "learning_rate": 3.4787193874248217e-06, "loss": 0.3385, "step": 39645 }, { "epoch": 1.8193749713184342, "grad_norm": 0.48509979248046875, "learning_rate": 3.4784858256407567e-06, "loss": 0.3911, "step": 39646 }, { "epoch": 1.8194208618236887, "grad_norm": 0.4380609393119812, "learning_rate": 3.478252267515455e-06, "loss": 0.2747, "step": 39647 }, { "epoch": 1.8194667523289432, "grad_norm": 0.43105247616767883, "learning_rate": 3.478018713049477e-06, "loss": 0.2827, "step": 39648 }, { "epoch": 1.8195126428341974, "grad_norm": 0.4686887562274933, "learning_rate": 3.4777851622433828e-06, "loss": 0.3793, "step": 39649 }, { "epoch": 1.819558533339452, "grad_norm": 0.42547518014907837, "learning_rate": 3.477551615097735e-06, "loss": 0.2996, "step": 39650 }, { "epoch": 1.8196044238447064, "grad_norm": 0.49002817273139954, "learning_rate": 3.477318071613096e-06, "loss": 0.4055, "step": 39651 }, { "epoch": 1.819650314349961, "grad_norm": 0.45447611808776855, "learning_rate": 3.477084531790024e-06, "loss": 0.3375, "step": 39652 }, { "epoch": 1.8196962048552154, "grad_norm": 0.47052499651908875, "learning_rate": 3.4768509956290854e-06, "loss": 0.3714, "step": 39653 }, { "epoch": 1.81974209536047, "grad_norm": 0.426779568195343, "learning_rate": 3.4766174631308393e-06, "loss": 0.3238, "step": 39654 }, { "epoch": 1.8197879858657244, "grad_norm": 0.5042814016342163, "learning_rate": 3.4763839342958473e-06, "loss": 0.4499, "step": 39655 }, { "epoch": 1.819833876370979, "grad_norm": 0.4874879717826843, "learning_rate": 3.4761504091246695e-06, "loss": 0.3832, "step": 39656 }, { "epoch": 1.8198797668762334, "grad_norm": 0.4270375370979309, "learning_rate": 3.4759168876178707e-06, "loss": 0.3033, "step": 39657 }, { "epoch": 1.819925657381488, "grad_norm": 0.4860326051712036, "learning_rate": 3.4756833697760107e-06, "loss": 0.3467, "step": 39658 }, { "epoch": 1.8199715478867422, "grad_norm": 0.464802086353302, "learning_rate": 3.4754498555996484e-06, "loss": 0.3607, "step": 39659 }, { "epoch": 1.8200174383919967, "grad_norm": 0.5333609580993652, "learning_rate": 3.4752163450893506e-06, "loss": 0.4076, "step": 39660 }, { "epoch": 1.8200633288972512, "grad_norm": 0.4679904282093048, "learning_rate": 3.4749828382456764e-06, "loss": 0.3054, "step": 39661 }, { "epoch": 1.8201092194025055, "grad_norm": 0.4843520522117615, "learning_rate": 3.4747493350691864e-06, "loss": 0.3493, "step": 39662 }, { "epoch": 1.82015510990776, "grad_norm": 0.4795750677585602, "learning_rate": 3.4745158355604435e-06, "loss": 0.3948, "step": 39663 }, { "epoch": 1.8202010004130145, "grad_norm": 0.4739072620868683, "learning_rate": 3.474282339720009e-06, "loss": 0.3475, "step": 39664 }, { "epoch": 1.820246890918269, "grad_norm": 0.4935581386089325, "learning_rate": 3.474048847548441e-06, "loss": 0.3389, "step": 39665 }, { "epoch": 1.8202927814235235, "grad_norm": 0.45583951473236084, "learning_rate": 3.4738153590463065e-06, "loss": 0.338, "step": 39666 }, { "epoch": 1.820338671928778, "grad_norm": 0.4920426905155182, "learning_rate": 3.4735818742141653e-06, "loss": 0.4099, "step": 39667 }, { "epoch": 1.8203845624340325, "grad_norm": 0.4615534543991089, "learning_rate": 3.473348393052575e-06, "loss": 0.3699, "step": 39668 }, { "epoch": 1.820430452939287, "grad_norm": 0.4846288859844208, "learning_rate": 3.473114915562102e-06, "loss": 0.3577, "step": 39669 }, { "epoch": 1.8204763434445415, "grad_norm": 0.4416280686855316, "learning_rate": 3.4728814417433054e-06, "loss": 0.2895, "step": 39670 }, { "epoch": 1.8205222339497957, "grad_norm": 0.47974348068237305, "learning_rate": 3.4726479715967466e-06, "loss": 0.3443, "step": 39671 }, { "epoch": 1.8205681244550502, "grad_norm": 0.4343111217021942, "learning_rate": 3.472414505122988e-06, "loss": 0.2719, "step": 39672 }, { "epoch": 1.8206140149603047, "grad_norm": 0.551262617111206, "learning_rate": 3.4721810423225904e-06, "loss": 0.4476, "step": 39673 }, { "epoch": 1.8206599054655592, "grad_norm": 0.46013858914375305, "learning_rate": 3.471947583196113e-06, "loss": 0.327, "step": 39674 }, { "epoch": 1.8207057959708135, "grad_norm": 0.44072896242141724, "learning_rate": 3.4717141277441217e-06, "loss": 0.2941, "step": 39675 }, { "epoch": 1.820751686476068, "grad_norm": 0.4894869029521942, "learning_rate": 3.471480675967176e-06, "loss": 0.3947, "step": 39676 }, { "epoch": 1.8207975769813225, "grad_norm": 0.4676510989665985, "learning_rate": 3.4712472278658348e-06, "loss": 0.357, "step": 39677 }, { "epoch": 1.820843467486577, "grad_norm": 0.46327951550483704, "learning_rate": 3.4710137834406634e-06, "loss": 0.3319, "step": 39678 }, { "epoch": 1.8208893579918315, "grad_norm": 0.5456066131591797, "learning_rate": 3.470780342692221e-06, "loss": 0.4153, "step": 39679 }, { "epoch": 1.820935248497086, "grad_norm": 0.4520506262779236, "learning_rate": 3.4705469056210673e-06, "loss": 0.3432, "step": 39680 }, { "epoch": 1.8209811390023405, "grad_norm": 0.4608404040336609, "learning_rate": 3.470313472227768e-06, "loss": 0.341, "step": 39681 }, { "epoch": 1.821027029507595, "grad_norm": 0.5059654712677002, "learning_rate": 3.4700800425128813e-06, "loss": 0.4258, "step": 39682 }, { "epoch": 1.8210729200128495, "grad_norm": 0.44469496607780457, "learning_rate": 3.469846616476968e-06, "loss": 0.2854, "step": 39683 }, { "epoch": 1.8211188105181038, "grad_norm": 0.4834306836128235, "learning_rate": 3.4696131941205925e-06, "loss": 0.3455, "step": 39684 }, { "epoch": 1.8211647010233583, "grad_norm": 0.48792532086372375, "learning_rate": 3.469379775444314e-06, "loss": 0.3789, "step": 39685 }, { "epoch": 1.8212105915286128, "grad_norm": 0.5039854645729065, "learning_rate": 3.4691463604486937e-06, "loss": 0.3293, "step": 39686 }, { "epoch": 1.821256482033867, "grad_norm": 0.4919971525669098, "learning_rate": 3.468912949134291e-06, "loss": 0.3886, "step": 39687 }, { "epoch": 1.8213023725391215, "grad_norm": 0.47926145792007446, "learning_rate": 3.4686795415016723e-06, "loss": 0.3757, "step": 39688 }, { "epoch": 1.821348263044376, "grad_norm": 0.433773934841156, "learning_rate": 3.4684461375513965e-06, "loss": 0.2907, "step": 39689 }, { "epoch": 1.8213941535496305, "grad_norm": 0.44634684920310974, "learning_rate": 3.468212737284021e-06, "loss": 0.3116, "step": 39690 }, { "epoch": 1.821440044054885, "grad_norm": 0.47449058294296265, "learning_rate": 3.4679793407001135e-06, "loss": 0.3359, "step": 39691 }, { "epoch": 1.8214859345601395, "grad_norm": 0.48565399646759033, "learning_rate": 3.4677459478002316e-06, "loss": 0.3563, "step": 39692 }, { "epoch": 1.821531825065394, "grad_norm": 0.48586633801460266, "learning_rate": 3.467512558584936e-06, "loss": 0.347, "step": 39693 }, { "epoch": 1.8215777155706485, "grad_norm": 0.426209956407547, "learning_rate": 3.4672791730547907e-06, "loss": 0.3191, "step": 39694 }, { "epoch": 1.821623606075903, "grad_norm": 0.50107741355896, "learning_rate": 3.467045791210354e-06, "loss": 0.4237, "step": 39695 }, { "epoch": 1.8216694965811575, "grad_norm": 0.5386788249015808, "learning_rate": 3.4668124130521874e-06, "loss": 0.2958, "step": 39696 }, { "epoch": 1.8217153870864118, "grad_norm": 0.453836053609848, "learning_rate": 3.4665790385808545e-06, "loss": 0.3383, "step": 39697 }, { "epoch": 1.8217612775916663, "grad_norm": 0.43805840611457825, "learning_rate": 3.466345667796915e-06, "loss": 0.2965, "step": 39698 }, { "epoch": 1.8218071680969208, "grad_norm": 0.48212528228759766, "learning_rate": 3.4661123007009294e-06, "loss": 0.3358, "step": 39699 }, { "epoch": 1.821853058602175, "grad_norm": 0.43631258606910706, "learning_rate": 3.4658789372934608e-06, "loss": 0.2946, "step": 39700 }, { "epoch": 1.8218989491074296, "grad_norm": 0.4848869740962982, "learning_rate": 3.4656455775750684e-06, "loss": 0.3886, "step": 39701 }, { "epoch": 1.821944839612684, "grad_norm": 0.47595199942588806, "learning_rate": 3.465412221546313e-06, "loss": 0.3449, "step": 39702 }, { "epoch": 1.8219907301179386, "grad_norm": 0.4694417417049408, "learning_rate": 3.4651788692077587e-06, "loss": 0.3416, "step": 39703 }, { "epoch": 1.822036620623193, "grad_norm": 0.4631176292896271, "learning_rate": 3.4649455205599646e-06, "loss": 0.3132, "step": 39704 }, { "epoch": 1.8220825111284475, "grad_norm": 0.4707501530647278, "learning_rate": 3.4647121756034913e-06, "loss": 0.3813, "step": 39705 }, { "epoch": 1.822128401633702, "grad_norm": 0.532524824142456, "learning_rate": 3.4644788343389014e-06, "loss": 0.4556, "step": 39706 }, { "epoch": 1.8221742921389565, "grad_norm": 0.44256794452667236, "learning_rate": 3.4642454967667555e-06, "loss": 0.3314, "step": 39707 }, { "epoch": 1.822220182644211, "grad_norm": 0.5229761600494385, "learning_rate": 3.4640121628876122e-06, "loss": 0.4271, "step": 39708 }, { "epoch": 1.8222660731494655, "grad_norm": 0.47077512741088867, "learning_rate": 3.463778832702037e-06, "loss": 0.3966, "step": 39709 }, { "epoch": 1.8223119636547198, "grad_norm": 0.4409750699996948, "learning_rate": 3.463545506210589e-06, "loss": 0.292, "step": 39710 }, { "epoch": 1.8223578541599743, "grad_norm": 0.48615244030952454, "learning_rate": 3.463312183413828e-06, "loss": 0.4027, "step": 39711 }, { "epoch": 1.8224037446652288, "grad_norm": 0.44817638397216797, "learning_rate": 3.463078864312317e-06, "loss": 0.333, "step": 39712 }, { "epoch": 1.822449635170483, "grad_norm": 0.43933331966400146, "learning_rate": 3.4628455489066172e-06, "loss": 0.3531, "step": 39713 }, { "epoch": 1.8224955256757376, "grad_norm": 0.4364699721336365, "learning_rate": 3.4626122371972866e-06, "loss": 0.3056, "step": 39714 }, { "epoch": 1.822541416180992, "grad_norm": 0.4388059973716736, "learning_rate": 3.4623789291848897e-06, "loss": 0.3113, "step": 39715 }, { "epoch": 1.8225873066862466, "grad_norm": 0.47817790508270264, "learning_rate": 3.462145624869986e-06, "loss": 0.3662, "step": 39716 }, { "epoch": 1.822633197191501, "grad_norm": 0.4888109266757965, "learning_rate": 3.461912324253135e-06, "loss": 0.3798, "step": 39717 }, { "epoch": 1.8226790876967556, "grad_norm": 0.47051936388015747, "learning_rate": 3.4616790273349017e-06, "loss": 0.3636, "step": 39718 }, { "epoch": 1.82272497820201, "grad_norm": 0.42735761404037476, "learning_rate": 3.4614457341158446e-06, "loss": 0.3046, "step": 39719 }, { "epoch": 1.8227708687072646, "grad_norm": 0.4589006006717682, "learning_rate": 3.4612124445965245e-06, "loss": 0.3529, "step": 39720 }, { "epoch": 1.822816759212519, "grad_norm": 0.4616396725177765, "learning_rate": 3.460979158777502e-06, "loss": 0.3573, "step": 39721 }, { "epoch": 1.8228626497177733, "grad_norm": 0.44229286909103394, "learning_rate": 3.4607458766593403e-06, "loss": 0.3098, "step": 39722 }, { "epoch": 1.8229085402230278, "grad_norm": 0.47998663783073425, "learning_rate": 3.460512598242599e-06, "loss": 0.3398, "step": 39723 }, { "epoch": 1.8229544307282823, "grad_norm": 0.4813268780708313, "learning_rate": 3.4602793235278364e-06, "loss": 0.3262, "step": 39724 }, { "epoch": 1.8230003212335368, "grad_norm": 0.4682084023952484, "learning_rate": 3.4600460525156187e-06, "loss": 0.3211, "step": 39725 }, { "epoch": 1.8230462117387911, "grad_norm": 0.48063281178474426, "learning_rate": 3.4598127852065037e-06, "loss": 0.3826, "step": 39726 }, { "epoch": 1.8230921022440456, "grad_norm": 0.4624738097190857, "learning_rate": 3.459579521601052e-06, "loss": 0.3566, "step": 39727 }, { "epoch": 1.8231379927493, "grad_norm": 0.459591269493103, "learning_rate": 3.4593462616998263e-06, "loss": 0.3184, "step": 39728 }, { "epoch": 1.8231838832545546, "grad_norm": 0.47669076919555664, "learning_rate": 3.459113005503387e-06, "loss": 0.3811, "step": 39729 }, { "epoch": 1.823229773759809, "grad_norm": 0.4679020643234253, "learning_rate": 3.4588797530122923e-06, "loss": 0.3628, "step": 39730 }, { "epoch": 1.8232756642650636, "grad_norm": 0.5673671364784241, "learning_rate": 3.4586465042271077e-06, "loss": 0.4215, "step": 39731 }, { "epoch": 1.823321554770318, "grad_norm": 0.5005654692649841, "learning_rate": 3.4584132591483914e-06, "loss": 0.4395, "step": 39732 }, { "epoch": 1.8233674452755726, "grad_norm": 0.45318859815597534, "learning_rate": 3.4581800177767042e-06, "loss": 0.3503, "step": 39733 }, { "epoch": 1.823413335780827, "grad_norm": 0.488800585269928, "learning_rate": 3.4579467801126078e-06, "loss": 0.3922, "step": 39734 }, { "epoch": 1.8234592262860814, "grad_norm": 0.4423036277294159, "learning_rate": 3.4577135461566636e-06, "loss": 0.3122, "step": 39735 }, { "epoch": 1.8235051167913359, "grad_norm": 0.48434388637542725, "learning_rate": 3.4574803159094294e-06, "loss": 0.3617, "step": 39736 }, { "epoch": 1.8235510072965904, "grad_norm": 0.5010190010070801, "learning_rate": 3.45724708937147e-06, "loss": 0.3892, "step": 39737 }, { "epoch": 1.8235968978018446, "grad_norm": 0.43173491954803467, "learning_rate": 3.457013866543344e-06, "loss": 0.2992, "step": 39738 }, { "epoch": 1.8236427883070991, "grad_norm": 0.48391714692115784, "learning_rate": 3.456780647425611e-06, "loss": 0.3222, "step": 39739 }, { "epoch": 1.8236886788123536, "grad_norm": 0.41770222783088684, "learning_rate": 3.4565474320188354e-06, "loss": 0.2762, "step": 39740 }, { "epoch": 1.8237345693176081, "grad_norm": 0.43311479687690735, "learning_rate": 3.456314220323576e-06, "loss": 0.2665, "step": 39741 }, { "epoch": 1.8237804598228626, "grad_norm": 0.468510240316391, "learning_rate": 3.4560810123403933e-06, "loss": 0.3814, "step": 39742 }, { "epoch": 1.8238263503281171, "grad_norm": 0.43205875158309937, "learning_rate": 3.4558478080698487e-06, "loss": 0.2997, "step": 39743 }, { "epoch": 1.8238722408333716, "grad_norm": 0.6720637083053589, "learning_rate": 3.455614607512503e-06, "loss": 0.4674, "step": 39744 }, { "epoch": 1.8239181313386261, "grad_norm": 0.443794846534729, "learning_rate": 3.4553814106689143e-06, "loss": 0.3176, "step": 39745 }, { "epoch": 1.8239640218438806, "grad_norm": 0.4892192482948303, "learning_rate": 3.455148217539649e-06, "loss": 0.3601, "step": 39746 }, { "epoch": 1.8240099123491351, "grad_norm": 0.4681258797645569, "learning_rate": 3.4549150281252635e-06, "loss": 0.3648, "step": 39747 }, { "epoch": 1.8240558028543894, "grad_norm": 0.4436596632003784, "learning_rate": 3.4546818424263194e-06, "loss": 0.3328, "step": 39748 }, { "epoch": 1.824101693359644, "grad_norm": 0.4754773676395416, "learning_rate": 3.454448660443378e-06, "loss": 0.3998, "step": 39749 }, { "epoch": 1.8241475838648984, "grad_norm": 0.5269674062728882, "learning_rate": 3.4542154821770006e-06, "loss": 0.4163, "step": 39750 }, { "epoch": 1.8241934743701527, "grad_norm": 0.43985024094581604, "learning_rate": 3.453982307627747e-06, "loss": 0.3125, "step": 39751 }, { "epoch": 1.8242393648754072, "grad_norm": 0.49593809247016907, "learning_rate": 3.453749136796176e-06, "loss": 0.3766, "step": 39752 }, { "epoch": 1.8242852553806617, "grad_norm": 0.4544205963611603, "learning_rate": 3.453515969682852e-06, "loss": 0.2961, "step": 39753 }, { "epoch": 1.8243311458859162, "grad_norm": 0.490241140127182, "learning_rate": 3.453282806288334e-06, "loss": 0.3405, "step": 39754 }, { "epoch": 1.8243770363911707, "grad_norm": 0.4823947250843048, "learning_rate": 3.4530496466131817e-06, "loss": 0.3778, "step": 39755 }, { "epoch": 1.8244229268964252, "grad_norm": 0.48063433170318604, "learning_rate": 3.452816490657958e-06, "loss": 0.3695, "step": 39756 }, { "epoch": 1.8244688174016797, "grad_norm": 0.45817458629608154, "learning_rate": 3.4525833384232222e-06, "loss": 0.3478, "step": 39757 }, { "epoch": 1.8245147079069342, "grad_norm": 0.4703139662742615, "learning_rate": 3.452350189909534e-06, "loss": 0.3877, "step": 39758 }, { "epoch": 1.8245605984121887, "grad_norm": 0.46057528257369995, "learning_rate": 3.4521170451174566e-06, "loss": 0.3769, "step": 39759 }, { "epoch": 1.824606488917443, "grad_norm": 0.6564388871192932, "learning_rate": 3.4518839040475486e-06, "loss": 0.3968, "step": 39760 }, { "epoch": 1.8246523794226974, "grad_norm": 0.4606430232524872, "learning_rate": 3.4516507667003695e-06, "loss": 0.3093, "step": 39761 }, { "epoch": 1.824698269927952, "grad_norm": 0.47507789731025696, "learning_rate": 3.4514176330764837e-06, "loss": 0.356, "step": 39762 }, { "epoch": 1.8247441604332064, "grad_norm": 0.4683626592159271, "learning_rate": 3.4511845031764496e-06, "loss": 0.3369, "step": 39763 }, { "epoch": 1.8247900509384607, "grad_norm": 0.4159708023071289, "learning_rate": 3.4509513770008273e-06, "loss": 0.2911, "step": 39764 }, { "epoch": 1.8248359414437152, "grad_norm": 0.4397841989994049, "learning_rate": 3.450718254550178e-06, "loss": 0.3243, "step": 39765 }, { "epoch": 1.8248818319489697, "grad_norm": 0.4882018268108368, "learning_rate": 3.4504851358250637e-06, "loss": 0.3496, "step": 39766 }, { "epoch": 1.8249277224542242, "grad_norm": 0.4699988067150116, "learning_rate": 3.4502520208260403e-06, "loss": 0.3518, "step": 39767 }, { "epoch": 1.8249736129594787, "grad_norm": 0.45117104053497314, "learning_rate": 3.4500189095536752e-06, "loss": 0.3208, "step": 39768 }, { "epoch": 1.8250195034647332, "grad_norm": 0.45605313777923584, "learning_rate": 3.449785802008524e-06, "loss": 0.3329, "step": 39769 }, { "epoch": 1.8250653939699877, "grad_norm": 0.4631207287311554, "learning_rate": 3.4495526981911488e-06, "loss": 0.3425, "step": 39770 }, { "epoch": 1.8251112844752422, "grad_norm": 0.4506734609603882, "learning_rate": 3.4493195981021097e-06, "loss": 0.3316, "step": 39771 }, { "epoch": 1.8251571749804967, "grad_norm": 0.4647207260131836, "learning_rate": 3.449086501741969e-06, "loss": 0.3362, "step": 39772 }, { "epoch": 1.825203065485751, "grad_norm": 0.4753653407096863, "learning_rate": 3.4488534091112828e-06, "loss": 0.3367, "step": 39773 }, { "epoch": 1.8252489559910055, "grad_norm": 0.513957142829895, "learning_rate": 3.448620320210617e-06, "loss": 0.3708, "step": 39774 }, { "epoch": 1.82529484649626, "grad_norm": 0.444884717464447, "learning_rate": 3.44838723504053e-06, "loss": 0.3327, "step": 39775 }, { "epoch": 1.8253407370015142, "grad_norm": 0.4631721079349518, "learning_rate": 3.4481541536015805e-06, "loss": 0.3553, "step": 39776 }, { "epoch": 1.8253866275067687, "grad_norm": 0.4578433036804199, "learning_rate": 3.4479210758943316e-06, "loss": 0.3217, "step": 39777 }, { "epoch": 1.8254325180120232, "grad_norm": 0.4629529118537903, "learning_rate": 3.4476880019193425e-06, "loss": 0.3538, "step": 39778 }, { "epoch": 1.8254784085172777, "grad_norm": 0.4903775751590729, "learning_rate": 3.447454931677172e-06, "loss": 0.3959, "step": 39779 }, { "epoch": 1.8255242990225322, "grad_norm": 0.4303648769855499, "learning_rate": 3.4472218651683853e-06, "loss": 0.2586, "step": 39780 }, { "epoch": 1.8255701895277867, "grad_norm": 0.4740014672279358, "learning_rate": 3.4469888023935396e-06, "loss": 0.3578, "step": 39781 }, { "epoch": 1.8256160800330412, "grad_norm": 0.4490221440792084, "learning_rate": 3.4467557433531927e-06, "loss": 0.2949, "step": 39782 }, { "epoch": 1.8256619705382957, "grad_norm": 0.5850761532783508, "learning_rate": 3.446522688047911e-06, "loss": 0.411, "step": 39783 }, { "epoch": 1.8257078610435502, "grad_norm": 0.4485396146774292, "learning_rate": 3.4462896364782505e-06, "loss": 0.3439, "step": 39784 }, { "epoch": 1.8257537515488047, "grad_norm": 0.4747643768787384, "learning_rate": 3.4460565886447746e-06, "loss": 0.3826, "step": 39785 }, { "epoch": 1.825799642054059, "grad_norm": 0.503736138343811, "learning_rate": 3.4458235445480404e-06, "loss": 0.3549, "step": 39786 }, { "epoch": 1.8258455325593135, "grad_norm": 0.46490034461021423, "learning_rate": 3.4455905041886113e-06, "loss": 0.3133, "step": 39787 }, { "epoch": 1.825891423064568, "grad_norm": 0.4739399552345276, "learning_rate": 3.4453574675670463e-06, "loss": 0.3926, "step": 39788 }, { "epoch": 1.8259373135698223, "grad_norm": 0.4864806830883026, "learning_rate": 3.4451244346839043e-06, "loss": 0.4054, "step": 39789 }, { "epoch": 1.8259832040750767, "grad_norm": 0.44184496998786926, "learning_rate": 3.4448914055397488e-06, "loss": 0.3281, "step": 39790 }, { "epoch": 1.8260290945803312, "grad_norm": 0.4813521206378937, "learning_rate": 3.4446583801351386e-06, "loss": 0.4117, "step": 39791 }, { "epoch": 1.8260749850855857, "grad_norm": 0.43435606360435486, "learning_rate": 3.4444253584706334e-06, "loss": 0.3239, "step": 39792 }, { "epoch": 1.8261208755908402, "grad_norm": 0.4891532361507416, "learning_rate": 3.4441923405467947e-06, "loss": 0.3756, "step": 39793 }, { "epoch": 1.8261667660960947, "grad_norm": 0.45238667726516724, "learning_rate": 3.4439593263641835e-06, "loss": 0.3422, "step": 39794 }, { "epoch": 1.8262126566013492, "grad_norm": 0.46222174167633057, "learning_rate": 3.4437263159233557e-06, "loss": 0.3502, "step": 39795 }, { "epoch": 1.8262585471066037, "grad_norm": 0.43490520119667053, "learning_rate": 3.443493309224878e-06, "loss": 0.2861, "step": 39796 }, { "epoch": 1.8263044376118582, "grad_norm": 0.5129135251045227, "learning_rate": 3.4432603062693073e-06, "loss": 0.3775, "step": 39797 }, { "epoch": 1.8263503281171127, "grad_norm": 0.47910308837890625, "learning_rate": 3.443027307057203e-06, "loss": 0.3496, "step": 39798 }, { "epoch": 1.826396218622367, "grad_norm": 0.47066253423690796, "learning_rate": 3.442794311589128e-06, "loss": 0.3689, "step": 39799 }, { "epoch": 1.8264421091276215, "grad_norm": 0.4875509738922119, "learning_rate": 3.4425613198656417e-06, "loss": 0.3551, "step": 39800 }, { "epoch": 1.826487999632876, "grad_norm": 0.4715254604816437, "learning_rate": 3.442328331887301e-06, "loss": 0.3456, "step": 39801 }, { "epoch": 1.8265338901381303, "grad_norm": 0.4701811969280243, "learning_rate": 3.4420953476546724e-06, "loss": 0.3607, "step": 39802 }, { "epoch": 1.8265797806433848, "grad_norm": 0.4681417644023895, "learning_rate": 3.4418623671683114e-06, "loss": 0.3312, "step": 39803 }, { "epoch": 1.8266256711486393, "grad_norm": 0.46512049436569214, "learning_rate": 3.4416293904287777e-06, "loss": 0.3428, "step": 39804 }, { "epoch": 1.8266715616538938, "grad_norm": 0.46033889055252075, "learning_rate": 3.441396417436636e-06, "loss": 0.3423, "step": 39805 }, { "epoch": 1.8267174521591483, "grad_norm": 0.476309210062027, "learning_rate": 3.441163448192444e-06, "loss": 0.34, "step": 39806 }, { "epoch": 1.8267633426644028, "grad_norm": 0.49267256259918213, "learning_rate": 3.4409304826967606e-06, "loss": 0.3852, "step": 39807 }, { "epoch": 1.8268092331696573, "grad_norm": 0.4587656557559967, "learning_rate": 3.440697520950148e-06, "loss": 0.3373, "step": 39808 }, { "epoch": 1.8268551236749118, "grad_norm": 0.45361581444740295, "learning_rate": 3.4404645629531665e-06, "loss": 0.3508, "step": 39809 }, { "epoch": 1.8269010141801663, "grad_norm": 0.4888070225715637, "learning_rate": 3.440231608706373e-06, "loss": 0.402, "step": 39810 }, { "epoch": 1.8269469046854205, "grad_norm": 0.4693959653377533, "learning_rate": 3.4399986582103324e-06, "loss": 0.3154, "step": 39811 }, { "epoch": 1.826992795190675, "grad_norm": 0.49927473068237305, "learning_rate": 3.4397657114656025e-06, "loss": 0.3884, "step": 39812 }, { "epoch": 1.8270386856959295, "grad_norm": 0.41470178961753845, "learning_rate": 3.439532768472743e-06, "loss": 0.2709, "step": 39813 }, { "epoch": 1.827084576201184, "grad_norm": 0.46712177991867065, "learning_rate": 3.4392998292323153e-06, "loss": 0.3596, "step": 39814 }, { "epoch": 1.8271304667064383, "grad_norm": 0.47077158093452454, "learning_rate": 3.439066893744879e-06, "loss": 0.3682, "step": 39815 }, { "epoch": 1.8271763572116928, "grad_norm": 0.5060456991195679, "learning_rate": 3.4388339620109947e-06, "loss": 0.4576, "step": 39816 }, { "epoch": 1.8272222477169473, "grad_norm": 0.469989538192749, "learning_rate": 3.4386010340312192e-06, "loss": 0.357, "step": 39817 }, { "epoch": 1.8272681382222018, "grad_norm": 0.4360527992248535, "learning_rate": 3.4383681098061185e-06, "loss": 0.2945, "step": 39818 }, { "epoch": 1.8273140287274563, "grad_norm": 0.46837764978408813, "learning_rate": 3.4381351893362486e-06, "loss": 0.3872, "step": 39819 }, { "epoch": 1.8273599192327108, "grad_norm": 0.4442499279975891, "learning_rate": 3.43790227262217e-06, "loss": 0.2968, "step": 39820 }, { "epoch": 1.8274058097379653, "grad_norm": 0.443032830953598, "learning_rate": 3.437669359664444e-06, "loss": 0.2933, "step": 39821 }, { "epoch": 1.8274517002432198, "grad_norm": 0.41946423053741455, "learning_rate": 3.4374364504636305e-06, "loss": 0.2815, "step": 39822 }, { "epoch": 1.8274975907484743, "grad_norm": 0.49188661575317383, "learning_rate": 3.4372035450202872e-06, "loss": 0.389, "step": 39823 }, { "epoch": 1.8275434812537286, "grad_norm": 0.4841740131378174, "learning_rate": 3.4369706433349793e-06, "loss": 0.3602, "step": 39824 }, { "epoch": 1.827589371758983, "grad_norm": 0.48956382274627686, "learning_rate": 3.4367377454082628e-06, "loss": 0.3884, "step": 39825 }, { "epoch": 1.8276352622642376, "grad_norm": 0.49822378158569336, "learning_rate": 3.4365048512406962e-06, "loss": 0.4251, "step": 39826 }, { "epoch": 1.8276811527694918, "grad_norm": 0.5036949515342712, "learning_rate": 3.4362719608328443e-06, "loss": 0.3642, "step": 39827 }, { "epoch": 1.8277270432747463, "grad_norm": 0.47843703627586365, "learning_rate": 3.4360390741852646e-06, "loss": 0.3588, "step": 39828 }, { "epoch": 1.8277729337800008, "grad_norm": 0.4904283285140991, "learning_rate": 3.4358061912985163e-06, "loss": 0.3453, "step": 39829 }, { "epoch": 1.8278188242852553, "grad_norm": 0.44428929686546326, "learning_rate": 3.4355733121731614e-06, "loss": 0.3184, "step": 39830 }, { "epoch": 1.8278647147905098, "grad_norm": 0.514350414276123, "learning_rate": 3.4353404368097585e-06, "loss": 0.4048, "step": 39831 }, { "epoch": 1.8279106052957643, "grad_norm": 0.48060178756713867, "learning_rate": 3.435107565208867e-06, "loss": 0.3627, "step": 39832 }, { "epoch": 1.8279564958010188, "grad_norm": 0.5014491677284241, "learning_rate": 3.4348746973710495e-06, "loss": 0.4083, "step": 39833 }, { "epoch": 1.8280023863062733, "grad_norm": 0.4820750951766968, "learning_rate": 3.4346418332968644e-06, "loss": 0.3833, "step": 39834 }, { "epoch": 1.8280482768115278, "grad_norm": 0.43934452533721924, "learning_rate": 3.4344089729868705e-06, "loss": 0.3061, "step": 39835 }, { "epoch": 1.8280941673167823, "grad_norm": 0.43806639313697815, "learning_rate": 3.4341761164416297e-06, "loss": 0.2665, "step": 39836 }, { "epoch": 1.8281400578220366, "grad_norm": 0.45623186230659485, "learning_rate": 3.433943263661702e-06, "loss": 0.2958, "step": 39837 }, { "epoch": 1.828185948327291, "grad_norm": 0.46764761209487915, "learning_rate": 3.433710414647644e-06, "loss": 0.359, "step": 39838 }, { "epoch": 1.8282318388325456, "grad_norm": 0.44870442152023315, "learning_rate": 3.43347756940002e-06, "loss": 0.2754, "step": 39839 }, { "epoch": 1.8282777293377999, "grad_norm": 0.5139065980911255, "learning_rate": 3.433244727919388e-06, "loss": 0.4244, "step": 39840 }, { "epoch": 1.8283236198430544, "grad_norm": 0.4684973955154419, "learning_rate": 3.433011890206307e-06, "loss": 0.34, "step": 39841 }, { "epoch": 1.8283695103483089, "grad_norm": 0.43400582671165466, "learning_rate": 3.4327790562613387e-06, "loss": 0.3011, "step": 39842 }, { "epoch": 1.8284154008535634, "grad_norm": 0.44612035155296326, "learning_rate": 3.432546226085043e-06, "loss": 0.3126, "step": 39843 }, { "epoch": 1.8284612913588179, "grad_norm": 0.4521428942680359, "learning_rate": 3.4323133996779755e-06, "loss": 0.342, "step": 39844 }, { "epoch": 1.8285071818640724, "grad_norm": 0.4564052224159241, "learning_rate": 3.4320805770407028e-06, "loss": 0.3322, "step": 39845 }, { "epoch": 1.8285530723693268, "grad_norm": 0.49334412813186646, "learning_rate": 3.431847758173782e-06, "loss": 0.3541, "step": 39846 }, { "epoch": 1.8285989628745813, "grad_norm": 0.5017808675765991, "learning_rate": 3.431614943077769e-06, "loss": 0.4264, "step": 39847 }, { "epoch": 1.8286448533798358, "grad_norm": 0.4284824728965759, "learning_rate": 3.43138213175323e-06, "loss": 0.2848, "step": 39848 }, { "epoch": 1.8286907438850901, "grad_norm": 0.4694102704524994, "learning_rate": 3.431149324200721e-06, "loss": 0.3354, "step": 39849 }, { "epoch": 1.8287366343903446, "grad_norm": 0.479158878326416, "learning_rate": 3.430916520420803e-06, "loss": 0.3564, "step": 39850 }, { "epoch": 1.8287825248955991, "grad_norm": 0.5006948113441467, "learning_rate": 3.4306837204140346e-06, "loss": 0.374, "step": 39851 }, { "epoch": 1.8288284154008536, "grad_norm": 0.46287286281585693, "learning_rate": 3.430450924180978e-06, "loss": 0.3488, "step": 39852 }, { "epoch": 1.8288743059061079, "grad_norm": 0.48436230421066284, "learning_rate": 3.4302181317221905e-06, "loss": 0.3773, "step": 39853 }, { "epoch": 1.8289201964113624, "grad_norm": 0.4432602524757385, "learning_rate": 3.4299853430382318e-06, "loss": 0.3326, "step": 39854 }, { "epoch": 1.8289660869166169, "grad_norm": 0.4736188054084778, "learning_rate": 3.429752558129664e-06, "loss": 0.3141, "step": 39855 }, { "epoch": 1.8290119774218714, "grad_norm": 0.48585787415504456, "learning_rate": 3.429519776997047e-06, "loss": 0.3587, "step": 39856 }, { "epoch": 1.8290578679271259, "grad_norm": 0.5037298202514648, "learning_rate": 3.4292869996409374e-06, "loss": 0.3874, "step": 39857 }, { "epoch": 1.8291037584323804, "grad_norm": 0.47064104676246643, "learning_rate": 3.4290542260618987e-06, "loss": 0.3342, "step": 39858 }, { "epoch": 1.8291496489376349, "grad_norm": 0.4904780685901642, "learning_rate": 3.4288214562604876e-06, "loss": 0.3616, "step": 39859 }, { "epoch": 1.8291955394428894, "grad_norm": 0.42936259508132935, "learning_rate": 3.428588690237263e-06, "loss": 0.2876, "step": 39860 }, { "epoch": 1.8292414299481439, "grad_norm": 0.44854018092155457, "learning_rate": 3.4283559279927895e-06, "loss": 0.3426, "step": 39861 }, { "epoch": 1.8292873204533981, "grad_norm": 0.4780154228210449, "learning_rate": 3.428123169527624e-06, "loss": 0.3507, "step": 39862 }, { "epoch": 1.8293332109586526, "grad_norm": 0.4782347083091736, "learning_rate": 3.4278904148423247e-06, "loss": 0.3607, "step": 39863 }, { "epoch": 1.8293791014639071, "grad_norm": 0.4915837347507477, "learning_rate": 3.4276576639374542e-06, "loss": 0.3442, "step": 39864 }, { "epoch": 1.8294249919691614, "grad_norm": 0.47087234258651733, "learning_rate": 3.4274249168135702e-06, "loss": 0.3416, "step": 39865 }, { "epoch": 1.829470882474416, "grad_norm": 0.4413788914680481, "learning_rate": 3.427192173471231e-06, "loss": 0.3168, "step": 39866 }, { "epoch": 1.8295167729796704, "grad_norm": 0.5047722458839417, "learning_rate": 3.4269594339110014e-06, "loss": 0.3886, "step": 39867 }, { "epoch": 1.829562663484925, "grad_norm": 0.43959546089172363, "learning_rate": 3.426726698133438e-06, "loss": 0.3056, "step": 39868 }, { "epoch": 1.8296085539901794, "grad_norm": 0.4711773991584778, "learning_rate": 3.4264939661390973e-06, "loss": 0.3377, "step": 39869 }, { "epoch": 1.829654444495434, "grad_norm": 0.4531530439853668, "learning_rate": 3.426261237928544e-06, "loss": 0.3111, "step": 39870 }, { "epoch": 1.8297003350006884, "grad_norm": 0.4908592402935028, "learning_rate": 3.4260285135023353e-06, "loss": 0.3614, "step": 39871 }, { "epoch": 1.829746225505943, "grad_norm": 0.4415791928768158, "learning_rate": 3.425795792861031e-06, "loss": 0.3429, "step": 39872 }, { "epoch": 1.8297921160111974, "grad_norm": 0.48063236474990845, "learning_rate": 3.4255630760051916e-06, "loss": 0.3899, "step": 39873 }, { "epoch": 1.829838006516452, "grad_norm": 0.5113020539283752, "learning_rate": 3.4253303629353762e-06, "loss": 0.4156, "step": 39874 }, { "epoch": 1.8298838970217062, "grad_norm": 0.46374186873435974, "learning_rate": 3.425097653652142e-06, "loss": 0.3063, "step": 39875 }, { "epoch": 1.8299297875269607, "grad_norm": 0.4975281357765198, "learning_rate": 3.4248649481560535e-06, "loss": 0.3633, "step": 39876 }, { "epoch": 1.8299756780322152, "grad_norm": 0.4965936541557312, "learning_rate": 3.4246322464476676e-06, "loss": 0.3655, "step": 39877 }, { "epoch": 1.8300215685374694, "grad_norm": 0.4819747805595398, "learning_rate": 3.424399548527543e-06, "loss": 0.3423, "step": 39878 }, { "epoch": 1.830067459042724, "grad_norm": 0.48193904757499695, "learning_rate": 3.4241668543962403e-06, "loss": 0.384, "step": 39879 }, { "epoch": 1.8301133495479784, "grad_norm": 0.4784378409385681, "learning_rate": 3.42393416405432e-06, "loss": 0.3506, "step": 39880 }, { "epoch": 1.830159240053233, "grad_norm": 0.49161818623542786, "learning_rate": 3.423701477502338e-06, "loss": 0.4141, "step": 39881 }, { "epoch": 1.8302051305584874, "grad_norm": 0.4426211416721344, "learning_rate": 3.4234687947408586e-06, "loss": 0.3017, "step": 39882 }, { "epoch": 1.830251021063742, "grad_norm": 0.5334861874580383, "learning_rate": 3.4232361157704395e-06, "loss": 0.4947, "step": 39883 }, { "epoch": 1.8302969115689964, "grad_norm": 0.4907417893409729, "learning_rate": 3.4230034405916397e-06, "loss": 0.379, "step": 39884 }, { "epoch": 1.830342802074251, "grad_norm": 0.43633753061294556, "learning_rate": 3.4227707692050178e-06, "loss": 0.2903, "step": 39885 }, { "epoch": 1.8303886925795054, "grad_norm": 0.44748276472091675, "learning_rate": 3.4225381016111354e-06, "loss": 0.3034, "step": 39886 }, { "epoch": 1.83043458308476, "grad_norm": 0.45776447653770447, "learning_rate": 3.422305437810551e-06, "loss": 0.3642, "step": 39887 }, { "epoch": 1.8304804735900142, "grad_norm": 0.461580365896225, "learning_rate": 3.422072777803822e-06, "loss": 0.3569, "step": 39888 }, { "epoch": 1.8305263640952687, "grad_norm": 0.46489113569259644, "learning_rate": 3.421840121591512e-06, "loss": 0.34, "step": 39889 }, { "epoch": 1.8305722546005232, "grad_norm": 0.48917025327682495, "learning_rate": 3.4216074691741795e-06, "loss": 0.3658, "step": 39890 }, { "epoch": 1.8306181451057775, "grad_norm": 0.4313655197620392, "learning_rate": 3.42137482055238e-06, "loss": 0.2858, "step": 39891 }, { "epoch": 1.830664035611032, "grad_norm": 0.4934520125389099, "learning_rate": 3.4211421757266784e-06, "loss": 0.3974, "step": 39892 }, { "epoch": 1.8307099261162865, "grad_norm": 0.4365563690662384, "learning_rate": 3.4209095346976305e-06, "loss": 0.2897, "step": 39893 }, { "epoch": 1.830755816621541, "grad_norm": 0.4462529718875885, "learning_rate": 3.420676897465796e-06, "loss": 0.2998, "step": 39894 }, { "epoch": 1.8308017071267955, "grad_norm": 0.4518919289112091, "learning_rate": 3.420444264031737e-06, "loss": 0.3034, "step": 39895 }, { "epoch": 1.83084759763205, "grad_norm": 0.45529159903526306, "learning_rate": 3.4202116343960103e-06, "loss": 0.3316, "step": 39896 }, { "epoch": 1.8308934881373045, "grad_norm": 0.47908708453178406, "learning_rate": 3.4199790085591734e-06, "loss": 0.403, "step": 39897 }, { "epoch": 1.830939378642559, "grad_norm": 0.4375518560409546, "learning_rate": 3.4197463865217918e-06, "loss": 0.281, "step": 39898 }, { "epoch": 1.8309852691478135, "grad_norm": 0.504301130771637, "learning_rate": 3.4195137682844205e-06, "loss": 0.3515, "step": 39899 }, { "epoch": 1.8310311596530677, "grad_norm": 0.4544859230518341, "learning_rate": 3.4192811538476184e-06, "loss": 0.3547, "step": 39900 }, { "epoch": 1.8310770501583222, "grad_norm": 0.5025752186775208, "learning_rate": 3.419048543211948e-06, "loss": 0.4656, "step": 39901 }, { "epoch": 1.8311229406635767, "grad_norm": 0.46962785720825195, "learning_rate": 3.418815936377966e-06, "loss": 0.3573, "step": 39902 }, { "epoch": 1.8311688311688312, "grad_norm": 0.4398167133331299, "learning_rate": 3.4185833333462317e-06, "loss": 0.3252, "step": 39903 }, { "epoch": 1.8312147216740855, "grad_norm": 0.4696708023548126, "learning_rate": 3.4183507341173073e-06, "loss": 0.3451, "step": 39904 }, { "epoch": 1.83126061217934, "grad_norm": 0.499142587184906, "learning_rate": 3.4181181386917497e-06, "loss": 0.4089, "step": 39905 }, { "epoch": 1.8313065026845945, "grad_norm": 0.43776917457580566, "learning_rate": 3.417885547070118e-06, "loss": 0.3156, "step": 39906 }, { "epoch": 1.831352393189849, "grad_norm": 0.45756399631500244, "learning_rate": 3.417652959252973e-06, "loss": 0.313, "step": 39907 }, { "epoch": 1.8313982836951035, "grad_norm": 0.5262242555618286, "learning_rate": 3.417420375240873e-06, "loss": 0.3805, "step": 39908 }, { "epoch": 1.831444174200358, "grad_norm": 0.4564991891384125, "learning_rate": 3.4171877950343755e-06, "loss": 0.3181, "step": 39909 }, { "epoch": 1.8314900647056125, "grad_norm": 0.4668429493904114, "learning_rate": 3.4169552186340444e-06, "loss": 0.3389, "step": 39910 }, { "epoch": 1.831535955210867, "grad_norm": 0.4947572350502014, "learning_rate": 3.4167226460404357e-06, "loss": 0.399, "step": 39911 }, { "epoch": 1.8315818457161215, "grad_norm": 0.45476171374320984, "learning_rate": 3.4164900772541087e-06, "loss": 0.3436, "step": 39912 }, { "epoch": 1.8316277362213758, "grad_norm": 0.4689728319644928, "learning_rate": 3.4162575122756248e-06, "loss": 0.352, "step": 39913 }, { "epoch": 1.8316736267266303, "grad_norm": 0.47441449761390686, "learning_rate": 3.416024951105541e-06, "loss": 0.3581, "step": 39914 }, { "epoch": 1.8317195172318848, "grad_norm": 0.48141568899154663, "learning_rate": 3.4157923937444177e-06, "loss": 0.3907, "step": 39915 }, { "epoch": 1.831765407737139, "grad_norm": 0.4737670123577118, "learning_rate": 3.4155598401928126e-06, "loss": 0.3807, "step": 39916 }, { "epoch": 1.8318112982423935, "grad_norm": 0.5022450089454651, "learning_rate": 3.4153272904512867e-06, "loss": 0.4341, "step": 39917 }, { "epoch": 1.831857188747648, "grad_norm": 0.4362969696521759, "learning_rate": 3.4150947445203985e-06, "loss": 0.3085, "step": 39918 }, { "epoch": 1.8319030792529025, "grad_norm": 0.45722270011901855, "learning_rate": 3.414862202400706e-06, "loss": 0.3178, "step": 39919 }, { "epoch": 1.831948969758157, "grad_norm": 0.5092957615852356, "learning_rate": 3.414629664092771e-06, "loss": 0.3642, "step": 39920 }, { "epoch": 1.8319948602634115, "grad_norm": 0.4718768298625946, "learning_rate": 3.4143971295971514e-06, "loss": 0.3758, "step": 39921 }, { "epoch": 1.832040750768666, "grad_norm": 0.4614443778991699, "learning_rate": 3.4141645989144055e-06, "loss": 0.3404, "step": 39922 }, { "epoch": 1.8320866412739205, "grad_norm": 0.44428136944770813, "learning_rate": 3.4139320720450943e-06, "loss": 0.3066, "step": 39923 }, { "epoch": 1.832132531779175, "grad_norm": 0.5068833231925964, "learning_rate": 3.413699548989775e-06, "loss": 0.3566, "step": 39924 }, { "epoch": 1.8321784222844295, "grad_norm": 0.4967723488807678, "learning_rate": 3.4134670297490064e-06, "loss": 0.3694, "step": 39925 }, { "epoch": 1.8322243127896838, "grad_norm": 0.46272847056388855, "learning_rate": 3.413234514323351e-06, "loss": 0.3386, "step": 39926 }, { "epoch": 1.8322702032949383, "grad_norm": 0.4361560046672821, "learning_rate": 3.4130020027133653e-06, "loss": 0.2914, "step": 39927 }, { "epoch": 1.8323160938001928, "grad_norm": 0.46583291888237, "learning_rate": 3.4127694949196083e-06, "loss": 0.3607, "step": 39928 }, { "epoch": 1.832361984305447, "grad_norm": 0.4693363606929779, "learning_rate": 3.41253699094264e-06, "loss": 0.3481, "step": 39929 }, { "epoch": 1.8324078748107016, "grad_norm": 0.43972843885421753, "learning_rate": 3.4123044907830205e-06, "loss": 0.3416, "step": 39930 }, { "epoch": 1.832453765315956, "grad_norm": 0.45557788014411926, "learning_rate": 3.412071994441305e-06, "loss": 0.3111, "step": 39931 }, { "epoch": 1.8324996558212105, "grad_norm": 0.44942227005958557, "learning_rate": 3.411839501918057e-06, "loss": 0.2779, "step": 39932 }, { "epoch": 1.832545546326465, "grad_norm": 0.4538784921169281, "learning_rate": 3.4116070132138335e-06, "loss": 0.3231, "step": 39933 }, { "epoch": 1.8325914368317195, "grad_norm": 0.47214362025260925, "learning_rate": 3.4113745283291934e-06, "loss": 0.3921, "step": 39934 }, { "epoch": 1.832637327336974, "grad_norm": 0.4535367786884308, "learning_rate": 3.4111420472646965e-06, "loss": 0.3406, "step": 39935 }, { "epoch": 1.8326832178422285, "grad_norm": 0.4443082809448242, "learning_rate": 3.4109095700209022e-06, "loss": 0.3302, "step": 39936 }, { "epoch": 1.832729108347483, "grad_norm": 0.4912261366844177, "learning_rate": 3.4106770965983675e-06, "loss": 0.3939, "step": 39937 }, { "epoch": 1.8327749988527373, "grad_norm": 0.4652918875217438, "learning_rate": 3.4104446269976543e-06, "loss": 0.3852, "step": 39938 }, { "epoch": 1.8328208893579918, "grad_norm": 0.44766363501548767, "learning_rate": 3.4102121612193196e-06, "loss": 0.3308, "step": 39939 }, { "epoch": 1.8328667798632463, "grad_norm": 0.48971042037010193, "learning_rate": 3.409979699263921e-06, "loss": 0.3374, "step": 39940 }, { "epoch": 1.8329126703685008, "grad_norm": 0.4273504912853241, "learning_rate": 3.409747241132021e-06, "loss": 0.3065, "step": 39941 }, { "epoch": 1.832958560873755, "grad_norm": 0.4703972637653351, "learning_rate": 3.4095147868241772e-06, "loss": 0.367, "step": 39942 }, { "epoch": 1.8330044513790096, "grad_norm": 0.48062074184417725, "learning_rate": 3.4092823363409477e-06, "loss": 0.3815, "step": 39943 }, { "epoch": 1.833050341884264, "grad_norm": 0.4576103985309601, "learning_rate": 3.409049889682893e-06, "loss": 0.3412, "step": 39944 }, { "epoch": 1.8330962323895186, "grad_norm": 0.490282267332077, "learning_rate": 3.4088174468505713e-06, "loss": 0.3745, "step": 39945 }, { "epoch": 1.833142122894773, "grad_norm": 0.45636287331581116, "learning_rate": 3.408585007844539e-06, "loss": 0.3342, "step": 39946 }, { "epoch": 1.8331880134000276, "grad_norm": 0.4622512757778168, "learning_rate": 3.40835257266536e-06, "loss": 0.3209, "step": 39947 }, { "epoch": 1.833233903905282, "grad_norm": 0.49249333143234253, "learning_rate": 3.408120141313591e-06, "loss": 0.3949, "step": 39948 }, { "epoch": 1.8332797944105366, "grad_norm": 0.4692722260951996, "learning_rate": 3.40788771378979e-06, "loss": 0.3698, "step": 39949 }, { "epoch": 1.833325684915791, "grad_norm": 0.4519585967063904, "learning_rate": 3.407655290094516e-06, "loss": 0.3191, "step": 39950 }, { "epoch": 1.8333715754210453, "grad_norm": 0.4598078429698944, "learning_rate": 3.4074228702283287e-06, "loss": 0.326, "step": 39951 }, { "epoch": 1.8334174659262998, "grad_norm": 0.4811152219772339, "learning_rate": 3.407190454191788e-06, "loss": 0.3936, "step": 39952 }, { "epoch": 1.8334633564315543, "grad_norm": 0.43513578176498413, "learning_rate": 3.406958041985448e-06, "loss": 0.3211, "step": 39953 }, { "epoch": 1.8335092469368086, "grad_norm": 0.4456690549850464, "learning_rate": 3.4067256336098744e-06, "loss": 0.2995, "step": 39954 }, { "epoch": 1.833555137442063, "grad_norm": 0.4899369776248932, "learning_rate": 3.406493229065622e-06, "loss": 0.361, "step": 39955 }, { "epoch": 1.8336010279473176, "grad_norm": 0.4825911521911621, "learning_rate": 3.40626082835325e-06, "loss": 0.3508, "step": 39956 }, { "epoch": 1.833646918452572, "grad_norm": 0.4323596656322479, "learning_rate": 3.406028431473318e-06, "loss": 0.2819, "step": 39957 }, { "epoch": 1.8336928089578266, "grad_norm": 0.4366082549095154, "learning_rate": 3.4057960384263857e-06, "loss": 0.3289, "step": 39958 }, { "epoch": 1.833738699463081, "grad_norm": 0.47617030143737793, "learning_rate": 3.405563649213009e-06, "loss": 0.3799, "step": 39959 }, { "epoch": 1.8337845899683356, "grad_norm": 0.48050934076309204, "learning_rate": 3.40533126383375e-06, "loss": 0.4027, "step": 39960 }, { "epoch": 1.83383048047359, "grad_norm": 0.4609653353691101, "learning_rate": 3.405098882289165e-06, "loss": 0.3463, "step": 39961 }, { "epoch": 1.8338763709788446, "grad_norm": 0.5062091946601868, "learning_rate": 3.404866504579813e-06, "loss": 0.3904, "step": 39962 }, { "epoch": 1.833922261484099, "grad_norm": 0.4677774906158447, "learning_rate": 3.4046341307062547e-06, "loss": 0.3391, "step": 39963 }, { "epoch": 1.8339681519893534, "grad_norm": 0.47597381472587585, "learning_rate": 3.4044017606690485e-06, "loss": 0.3594, "step": 39964 }, { "epoch": 1.8340140424946079, "grad_norm": 0.508150577545166, "learning_rate": 3.4041693944687515e-06, "loss": 0.4189, "step": 39965 }, { "epoch": 1.8340599329998624, "grad_norm": 0.4767831265926361, "learning_rate": 3.403937032105924e-06, "loss": 0.3583, "step": 39966 }, { "epoch": 1.8341058235051166, "grad_norm": 0.473952054977417, "learning_rate": 3.403704673581124e-06, "loss": 0.3991, "step": 39967 }, { "epoch": 1.8341517140103711, "grad_norm": 0.5166555643081665, "learning_rate": 3.4034723188949092e-06, "loss": 0.4863, "step": 39968 }, { "epoch": 1.8341976045156256, "grad_norm": 0.48794183135032654, "learning_rate": 3.4032399680478413e-06, "loss": 0.4529, "step": 39969 }, { "epoch": 1.8342434950208801, "grad_norm": 0.46893325448036194, "learning_rate": 3.403007621040477e-06, "loss": 0.3336, "step": 39970 }, { "epoch": 1.8342893855261346, "grad_norm": 0.4389420747756958, "learning_rate": 3.4027752778733747e-06, "loss": 0.3264, "step": 39971 }, { "epoch": 1.8343352760313891, "grad_norm": 0.46775805950164795, "learning_rate": 3.402542938547094e-06, "loss": 0.3536, "step": 39972 }, { "epoch": 1.8343811665366436, "grad_norm": 0.47164833545684814, "learning_rate": 3.4023106030621943e-06, "loss": 0.4151, "step": 39973 }, { "epoch": 1.8344270570418981, "grad_norm": 0.48729804158210754, "learning_rate": 3.4020782714192314e-06, "loss": 0.3967, "step": 39974 }, { "epoch": 1.8344729475471526, "grad_norm": 0.48050767183303833, "learning_rate": 3.401845943618768e-06, "loss": 0.3341, "step": 39975 }, { "epoch": 1.8345188380524071, "grad_norm": 0.47942933440208435, "learning_rate": 3.4016136196613596e-06, "loss": 0.3809, "step": 39976 }, { "epoch": 1.8345647285576614, "grad_norm": 0.4591526687145233, "learning_rate": 3.401381299547566e-06, "loss": 0.3164, "step": 39977 }, { "epoch": 1.834610619062916, "grad_norm": 0.44258183240890503, "learning_rate": 3.4011489832779465e-06, "loss": 0.3123, "step": 39978 }, { "epoch": 1.8346565095681704, "grad_norm": 0.45343753695487976, "learning_rate": 3.4009166708530596e-06, "loss": 0.3209, "step": 39979 }, { "epoch": 1.8347024000734247, "grad_norm": 0.4733816087245941, "learning_rate": 3.400684362273462e-06, "loss": 0.3789, "step": 39980 }, { "epoch": 1.8347482905786792, "grad_norm": 0.5549359321594238, "learning_rate": 3.4004520575397137e-06, "loss": 0.4606, "step": 39981 }, { "epoch": 1.8347941810839337, "grad_norm": 0.4625313878059387, "learning_rate": 3.4002197566523748e-06, "loss": 0.3515, "step": 39982 }, { "epoch": 1.8348400715891882, "grad_norm": 0.5009883046150208, "learning_rate": 3.399987459612002e-06, "loss": 0.3661, "step": 39983 }, { "epoch": 1.8348859620944427, "grad_norm": 0.43865346908569336, "learning_rate": 3.399755166419152e-06, "loss": 0.31, "step": 39984 }, { "epoch": 1.8349318525996972, "grad_norm": 0.4846648573875427, "learning_rate": 3.3995228770743882e-06, "loss": 0.369, "step": 39985 }, { "epoch": 1.8349777431049517, "grad_norm": 0.4774952530860901, "learning_rate": 3.399290591578267e-06, "loss": 0.4023, "step": 39986 }, { "epoch": 1.8350236336102062, "grad_norm": 0.48021814227104187, "learning_rate": 3.399058309931346e-06, "loss": 0.3413, "step": 39987 }, { "epoch": 1.8350695241154606, "grad_norm": 0.48949164152145386, "learning_rate": 3.398826032134185e-06, "loss": 0.3826, "step": 39988 }, { "epoch": 1.835115414620715, "grad_norm": 0.44999539852142334, "learning_rate": 3.3985937581873416e-06, "loss": 0.3031, "step": 39989 }, { "epoch": 1.8351613051259694, "grad_norm": 0.4809440076351166, "learning_rate": 3.398361488091373e-06, "loss": 0.3567, "step": 39990 }, { "epoch": 1.835207195631224, "grad_norm": 0.49652642011642456, "learning_rate": 3.398129221846842e-06, "loss": 0.3798, "step": 39991 }, { "epoch": 1.8352530861364784, "grad_norm": 0.5037232041358948, "learning_rate": 3.397896959454304e-06, "loss": 0.398, "step": 39992 }, { "epoch": 1.8352989766417327, "grad_norm": 0.46513646841049194, "learning_rate": 3.397664700914318e-06, "loss": 0.3739, "step": 39993 }, { "epoch": 1.8353448671469872, "grad_norm": 0.4537631869316101, "learning_rate": 3.397432446227443e-06, "loss": 0.383, "step": 39994 }, { "epoch": 1.8353907576522417, "grad_norm": 0.4685698449611664, "learning_rate": 3.3972001953942373e-06, "loss": 0.3475, "step": 39995 }, { "epoch": 1.8354366481574962, "grad_norm": 0.5096879601478577, "learning_rate": 3.3969679484152575e-06, "loss": 0.3597, "step": 39996 }, { "epoch": 1.8354825386627507, "grad_norm": 0.48225119709968567, "learning_rate": 3.3967357052910653e-06, "loss": 0.4235, "step": 39997 }, { "epoch": 1.8355284291680052, "grad_norm": 0.4618787467479706, "learning_rate": 3.3965034660222186e-06, "loss": 0.354, "step": 39998 }, { "epoch": 1.8355743196732597, "grad_norm": 0.4447525143623352, "learning_rate": 3.396271230609274e-06, "loss": 0.3368, "step": 39999 }, { "epoch": 1.8356202101785142, "grad_norm": 0.5123956203460693, "learning_rate": 3.3960389990527908e-06, "loss": 0.3257, "step": 40000 }, { "epoch": 1.8356661006837687, "grad_norm": 0.4180506765842438, "learning_rate": 3.395806771353328e-06, "loss": 0.2989, "step": 40001 }, { "epoch": 1.835711991189023, "grad_norm": 0.4920891523361206, "learning_rate": 3.395574547511442e-06, "loss": 0.4188, "step": 40002 }, { "epoch": 1.8357578816942774, "grad_norm": 0.48719072341918945, "learning_rate": 3.3953423275276963e-06, "loss": 0.381, "step": 40003 }, { "epoch": 1.835803772199532, "grad_norm": 0.44342103600502014, "learning_rate": 3.395110111402644e-06, "loss": 0.2798, "step": 40004 }, { "epoch": 1.8358496627047862, "grad_norm": 0.4603930711746216, "learning_rate": 3.3948778991368435e-06, "loss": 0.3287, "step": 40005 }, { "epoch": 1.8358955532100407, "grad_norm": 0.5122914910316467, "learning_rate": 3.3946456907308577e-06, "loss": 0.4086, "step": 40006 }, { "epoch": 1.8359414437152952, "grad_norm": 0.4727425277233124, "learning_rate": 3.394413486185242e-06, "loss": 0.3528, "step": 40007 }, { "epoch": 1.8359873342205497, "grad_norm": 0.44000017642974854, "learning_rate": 3.394181285500554e-06, "loss": 0.3033, "step": 40008 }, { "epoch": 1.8360332247258042, "grad_norm": 0.4536668360233307, "learning_rate": 3.393949088677354e-06, "loss": 0.3525, "step": 40009 }, { "epoch": 1.8360791152310587, "grad_norm": 0.4706602990627289, "learning_rate": 3.3937168957161996e-06, "loss": 0.3976, "step": 40010 }, { "epoch": 1.8361250057363132, "grad_norm": 0.48928114771842957, "learning_rate": 3.3934847066176475e-06, "loss": 0.3642, "step": 40011 }, { "epoch": 1.8361708962415677, "grad_norm": 0.44901615381240845, "learning_rate": 3.3932525213822594e-06, "loss": 0.2943, "step": 40012 }, { "epoch": 1.8362167867468222, "grad_norm": 0.47857481241226196, "learning_rate": 3.393020340010592e-06, "loss": 0.348, "step": 40013 }, { "epoch": 1.8362626772520767, "grad_norm": 0.4598817229270935, "learning_rate": 3.392788162503203e-06, "loss": 0.3114, "step": 40014 }, { "epoch": 1.836308567757331, "grad_norm": 0.5410502552986145, "learning_rate": 3.3925559888606506e-06, "loss": 0.3703, "step": 40015 }, { "epoch": 1.8363544582625855, "grad_norm": 0.5212858319282532, "learning_rate": 3.3923238190834952e-06, "loss": 0.4095, "step": 40016 }, { "epoch": 1.83640034876784, "grad_norm": 0.4732181131839752, "learning_rate": 3.392091653172293e-06, "loss": 0.3397, "step": 40017 }, { "epoch": 1.8364462392730942, "grad_norm": 0.40973031520843506, "learning_rate": 3.391859491127601e-06, "loss": 0.269, "step": 40018 }, { "epoch": 1.8364921297783487, "grad_norm": 0.5094745755195618, "learning_rate": 3.3916273329499816e-06, "loss": 0.4242, "step": 40019 }, { "epoch": 1.8365380202836032, "grad_norm": 0.4824801981449127, "learning_rate": 3.3913951786399905e-06, "loss": 0.3583, "step": 40020 }, { "epoch": 1.8365839107888577, "grad_norm": 0.47572124004364014, "learning_rate": 3.3911630281981853e-06, "loss": 0.3883, "step": 40021 }, { "epoch": 1.8366298012941122, "grad_norm": 0.43510639667510986, "learning_rate": 3.390930881625126e-06, "loss": 0.3041, "step": 40022 }, { "epoch": 1.8366756917993667, "grad_norm": 0.466013640165329, "learning_rate": 3.3906987389213714e-06, "loss": 0.3411, "step": 40023 }, { "epoch": 1.8367215823046212, "grad_norm": 0.4450278580188751, "learning_rate": 3.390466600087475e-06, "loss": 0.2795, "step": 40024 }, { "epoch": 1.8367674728098757, "grad_norm": 0.4858452081680298, "learning_rate": 3.390234465124003e-06, "loss": 0.3821, "step": 40025 }, { "epoch": 1.8368133633151302, "grad_norm": 0.4891620874404907, "learning_rate": 3.390002334031507e-06, "loss": 0.3647, "step": 40026 }, { "epoch": 1.8368592538203845, "grad_norm": 0.5070965886116028, "learning_rate": 3.3897702068105447e-06, "loss": 0.337, "step": 40027 }, { "epoch": 1.836905144325639, "grad_norm": 0.4650188982486725, "learning_rate": 3.3895380834616796e-06, "loss": 0.384, "step": 40028 }, { "epoch": 1.8369510348308935, "grad_norm": 0.44363224506378174, "learning_rate": 3.389305963985467e-06, "loss": 0.3625, "step": 40029 }, { "epoch": 1.836996925336148, "grad_norm": 0.4735911786556244, "learning_rate": 3.389073848382464e-06, "loss": 0.4323, "step": 40030 }, { "epoch": 1.8370428158414023, "grad_norm": 0.4961511194705963, "learning_rate": 3.3888417366532312e-06, "loss": 0.3957, "step": 40031 }, { "epoch": 1.8370887063466568, "grad_norm": 0.45797258615493774, "learning_rate": 3.3886096287983254e-06, "loss": 0.3411, "step": 40032 }, { "epoch": 1.8371345968519113, "grad_norm": 0.4434041380882263, "learning_rate": 3.388377524818303e-06, "loss": 0.3089, "step": 40033 }, { "epoch": 1.8371804873571658, "grad_norm": 0.5157313346862793, "learning_rate": 3.3881454247137263e-06, "loss": 0.4628, "step": 40034 }, { "epoch": 1.8372263778624203, "grad_norm": 0.44423922896385193, "learning_rate": 3.3879133284851507e-06, "loss": 0.2925, "step": 40035 }, { "epoch": 1.8372722683676748, "grad_norm": 0.503124475479126, "learning_rate": 3.387681236133134e-06, "loss": 0.4036, "step": 40036 }, { "epoch": 1.8373181588729293, "grad_norm": 0.4812272787094116, "learning_rate": 3.387449147658236e-06, "loss": 0.3848, "step": 40037 }, { "epoch": 1.8373640493781838, "grad_norm": 0.4713943302631378, "learning_rate": 3.3872170630610136e-06, "loss": 0.3592, "step": 40038 }, { "epoch": 1.8374099398834383, "grad_norm": 0.4816942811012268, "learning_rate": 3.3869849823420233e-06, "loss": 0.3589, "step": 40039 }, { "epoch": 1.8374558303886925, "grad_norm": 0.46792346239089966, "learning_rate": 3.3867529055018273e-06, "loss": 0.3496, "step": 40040 }, { "epoch": 1.837501720893947, "grad_norm": 0.4468141198158264, "learning_rate": 3.3865208325409816e-06, "loss": 0.3265, "step": 40041 }, { "epoch": 1.8375476113992015, "grad_norm": 0.5125146508216858, "learning_rate": 3.3862887634600423e-06, "loss": 0.4246, "step": 40042 }, { "epoch": 1.8375935019044558, "grad_norm": 0.4288843870162964, "learning_rate": 3.3860566982595706e-06, "loss": 0.2954, "step": 40043 }, { "epoch": 1.8376393924097103, "grad_norm": 0.4696047008037567, "learning_rate": 3.3858246369401234e-06, "loss": 0.3496, "step": 40044 }, { "epoch": 1.8376852829149648, "grad_norm": 0.5032174587249756, "learning_rate": 3.385592579502258e-06, "loss": 0.4147, "step": 40045 }, { "epoch": 1.8377311734202193, "grad_norm": 0.4543551802635193, "learning_rate": 3.385360525946531e-06, "loss": 0.3246, "step": 40046 }, { "epoch": 1.8377770639254738, "grad_norm": 0.5061081647872925, "learning_rate": 3.385128476273506e-06, "loss": 0.4123, "step": 40047 }, { "epoch": 1.8378229544307283, "grad_norm": 0.5673410296440125, "learning_rate": 3.384896430483736e-06, "loss": 0.3442, "step": 40048 }, { "epoch": 1.8378688449359828, "grad_norm": 0.48610663414001465, "learning_rate": 3.3846643885777776e-06, "loss": 0.3161, "step": 40049 }, { "epoch": 1.8379147354412373, "grad_norm": 0.48675185441970825, "learning_rate": 3.3844323505561944e-06, "loss": 0.3947, "step": 40050 }, { "epoch": 1.8379606259464918, "grad_norm": 0.4737752676010132, "learning_rate": 3.3842003164195414e-06, "loss": 0.3529, "step": 40051 }, { "epoch": 1.8380065164517463, "grad_norm": 0.48218345642089844, "learning_rate": 3.3839682861683754e-06, "loss": 0.3773, "step": 40052 }, { "epoch": 1.8380524069570006, "grad_norm": 0.49974116683006287, "learning_rate": 3.3837362598032564e-06, "loss": 0.3468, "step": 40053 }, { "epoch": 1.838098297462255, "grad_norm": 0.47853338718414307, "learning_rate": 3.383504237324742e-06, "loss": 0.3706, "step": 40054 }, { "epoch": 1.8381441879675096, "grad_norm": 0.4632374942302704, "learning_rate": 3.383272218733388e-06, "loss": 0.2881, "step": 40055 }, { "epoch": 1.8381900784727638, "grad_norm": 0.45719119906425476, "learning_rate": 3.383040204029756e-06, "loss": 0.377, "step": 40056 }, { "epoch": 1.8382359689780183, "grad_norm": 0.4733566641807556, "learning_rate": 3.382808193214402e-06, "loss": 0.3559, "step": 40057 }, { "epoch": 1.8382818594832728, "grad_norm": 0.48723509907722473, "learning_rate": 3.3825761862878826e-06, "loss": 0.3596, "step": 40058 }, { "epoch": 1.8383277499885273, "grad_norm": 0.45164161920547485, "learning_rate": 3.3823441832507574e-06, "loss": 0.3423, "step": 40059 }, { "epoch": 1.8383736404937818, "grad_norm": 0.5071130394935608, "learning_rate": 3.382112184103585e-06, "loss": 0.36, "step": 40060 }, { "epoch": 1.8384195309990363, "grad_norm": 0.5026705861091614, "learning_rate": 3.3818801888469193e-06, "loss": 0.3787, "step": 40061 }, { "epoch": 1.8384654215042908, "grad_norm": 0.4690433740615845, "learning_rate": 3.3816481974813233e-06, "loss": 0.3274, "step": 40062 }, { "epoch": 1.8385113120095453, "grad_norm": 0.45508500933647156, "learning_rate": 3.381416210007353e-06, "loss": 0.3035, "step": 40063 }, { "epoch": 1.8385572025147998, "grad_norm": 0.47315606474876404, "learning_rate": 3.3811842264255647e-06, "loss": 0.3813, "step": 40064 }, { "epoch": 1.8386030930200543, "grad_norm": 0.4633285701274872, "learning_rate": 3.3809522467365185e-06, "loss": 0.3739, "step": 40065 }, { "epoch": 1.8386489835253086, "grad_norm": 0.47519776225090027, "learning_rate": 3.3807202709407704e-06, "loss": 0.3888, "step": 40066 }, { "epoch": 1.838694874030563, "grad_norm": 0.46747300028800964, "learning_rate": 3.3804882990388776e-06, "loss": 0.3697, "step": 40067 }, { "epoch": 1.8387407645358176, "grad_norm": 0.4418640732765198, "learning_rate": 3.3802563310314018e-06, "loss": 0.3264, "step": 40068 }, { "epoch": 1.8387866550410719, "grad_norm": 0.4547782838344574, "learning_rate": 3.3800243669188994e-06, "loss": 0.3229, "step": 40069 }, { "epoch": 1.8388325455463264, "grad_norm": 0.4975554049015045, "learning_rate": 3.3797924067019227e-06, "loss": 0.4063, "step": 40070 }, { "epoch": 1.8388784360515809, "grad_norm": 0.4877837896347046, "learning_rate": 3.379560450381037e-06, "loss": 0.3302, "step": 40071 }, { "epoch": 1.8389243265568354, "grad_norm": 0.44827812910079956, "learning_rate": 3.379328497956797e-06, "loss": 0.3152, "step": 40072 }, { "epoch": 1.8389702170620899, "grad_norm": 0.4726530611515045, "learning_rate": 3.379096549429759e-06, "loss": 0.371, "step": 40073 }, { "epoch": 1.8390161075673443, "grad_norm": 0.4561325013637543, "learning_rate": 3.3788646048004837e-06, "loss": 0.3226, "step": 40074 }, { "epoch": 1.8390619980725988, "grad_norm": 0.4795105755329132, "learning_rate": 3.3786326640695266e-06, "loss": 0.3675, "step": 40075 }, { "epoch": 1.8391078885778533, "grad_norm": 0.45084723830223083, "learning_rate": 3.3784007272374453e-06, "loss": 0.3233, "step": 40076 }, { "epoch": 1.8391537790831078, "grad_norm": 0.5003656148910522, "learning_rate": 3.3781687943047995e-06, "loss": 0.4007, "step": 40077 }, { "epoch": 1.8391996695883621, "grad_norm": 0.437844455242157, "learning_rate": 3.3779368652721467e-06, "loss": 0.3367, "step": 40078 }, { "epoch": 1.8392455600936166, "grad_norm": 0.4764179587364197, "learning_rate": 3.377704940140043e-06, "loss": 0.373, "step": 40079 }, { "epoch": 1.8392914505988711, "grad_norm": 0.42635950446128845, "learning_rate": 3.3774730189090464e-06, "loss": 0.3024, "step": 40080 }, { "epoch": 1.8393373411041256, "grad_norm": 0.49560028314590454, "learning_rate": 3.3772411015797165e-06, "loss": 0.324, "step": 40081 }, { "epoch": 1.8393832316093799, "grad_norm": 0.4866359233856201, "learning_rate": 3.377009188152609e-06, "loss": 0.4122, "step": 40082 }, { "epoch": 1.8394291221146344, "grad_norm": 0.44516223669052124, "learning_rate": 3.37677727862828e-06, "loss": 0.3139, "step": 40083 }, { "epoch": 1.8394750126198889, "grad_norm": 0.43794363737106323, "learning_rate": 3.3765453730072916e-06, "loss": 0.3193, "step": 40084 }, { "epoch": 1.8395209031251434, "grad_norm": 0.4666297733783722, "learning_rate": 3.376313471290199e-06, "loss": 0.3556, "step": 40085 }, { "epoch": 1.8395667936303979, "grad_norm": 0.4612573981285095, "learning_rate": 3.37608157347756e-06, "loss": 0.3044, "step": 40086 }, { "epoch": 1.8396126841356524, "grad_norm": 0.47304508090019226, "learning_rate": 3.3758496795699323e-06, "loss": 0.3371, "step": 40087 }, { "epoch": 1.8396585746409069, "grad_norm": 0.482916921377182, "learning_rate": 3.3756177895678733e-06, "loss": 0.3614, "step": 40088 }, { "epoch": 1.8397044651461614, "grad_norm": 0.4894770383834839, "learning_rate": 3.3753859034719393e-06, "loss": 0.4044, "step": 40089 }, { "epoch": 1.8397503556514159, "grad_norm": 0.44892576336860657, "learning_rate": 3.3751540212826916e-06, "loss": 0.3172, "step": 40090 }, { "epoch": 1.8397962461566701, "grad_norm": 0.49046286940574646, "learning_rate": 3.3749221430006866e-06, "loss": 0.4191, "step": 40091 }, { "epoch": 1.8398421366619246, "grad_norm": 0.46104514598846436, "learning_rate": 3.374690268626478e-06, "loss": 0.3533, "step": 40092 }, { "epoch": 1.8398880271671791, "grad_norm": 0.45382875204086304, "learning_rate": 3.374458398160628e-06, "loss": 0.3229, "step": 40093 }, { "epoch": 1.8399339176724334, "grad_norm": 0.47095879912376404, "learning_rate": 3.374226531603693e-06, "loss": 0.3532, "step": 40094 }, { "epoch": 1.839979808177688, "grad_norm": 0.4647656977176666, "learning_rate": 3.3739946689562286e-06, "loss": 0.3423, "step": 40095 }, { "epoch": 1.8400256986829424, "grad_norm": 0.49705958366394043, "learning_rate": 3.373762810218796e-06, "loss": 0.4279, "step": 40096 }, { "epoch": 1.840071589188197, "grad_norm": 0.47082722187042236, "learning_rate": 3.3735309553919494e-06, "loss": 0.3739, "step": 40097 }, { "epoch": 1.8401174796934514, "grad_norm": 0.4391995966434479, "learning_rate": 3.3732991044762452e-06, "loss": 0.3108, "step": 40098 }, { "epoch": 1.840163370198706, "grad_norm": 0.5253812074661255, "learning_rate": 3.3730672574722463e-06, "loss": 0.5041, "step": 40099 }, { "epoch": 1.8402092607039604, "grad_norm": 0.45840689539909363, "learning_rate": 3.3728354143805063e-06, "loss": 0.3031, "step": 40100 }, { "epoch": 1.840255151209215, "grad_norm": 0.4417295455932617, "learning_rate": 3.3726035752015833e-06, "loss": 0.3125, "step": 40101 }, { "epoch": 1.8403010417144694, "grad_norm": 0.44637274742126465, "learning_rate": 3.3723717399360355e-06, "loss": 0.3617, "step": 40102 }, { "epoch": 1.840346932219724, "grad_norm": 0.48672690987586975, "learning_rate": 3.3721399085844208e-06, "loss": 0.3894, "step": 40103 }, { "epoch": 1.8403928227249782, "grad_norm": 0.47997018694877625, "learning_rate": 3.3719080811472934e-06, "loss": 0.3617, "step": 40104 }, { "epoch": 1.8404387132302327, "grad_norm": 0.5309159755706787, "learning_rate": 3.371676257625215e-06, "loss": 0.4415, "step": 40105 }, { "epoch": 1.8404846037354872, "grad_norm": 0.4676118493080139, "learning_rate": 3.3714444380187415e-06, "loss": 0.3464, "step": 40106 }, { "epoch": 1.8405304942407414, "grad_norm": 0.4376239478588104, "learning_rate": 3.3712126223284292e-06, "loss": 0.3585, "step": 40107 }, { "epoch": 1.840576384745996, "grad_norm": 0.5081982016563416, "learning_rate": 3.370980810554837e-06, "loss": 0.4088, "step": 40108 }, { "epoch": 1.8406222752512504, "grad_norm": 0.433517187833786, "learning_rate": 3.3707490026985225e-06, "loss": 0.287, "step": 40109 }, { "epoch": 1.840668165756505, "grad_norm": 0.4637381136417389, "learning_rate": 3.3705171987600394e-06, "loss": 0.3651, "step": 40110 }, { "epoch": 1.8407140562617594, "grad_norm": 0.42848601937294006, "learning_rate": 3.370285398739951e-06, "loss": 0.2665, "step": 40111 }, { "epoch": 1.840759946767014, "grad_norm": 0.4670964777469635, "learning_rate": 3.370053602638812e-06, "loss": 0.3781, "step": 40112 }, { "epoch": 1.8408058372722684, "grad_norm": 0.48157966136932373, "learning_rate": 3.36982181045718e-06, "loss": 0.3828, "step": 40113 }, { "epoch": 1.840851727777523, "grad_norm": 0.4564974904060364, "learning_rate": 3.3695900221956103e-06, "loss": 0.3036, "step": 40114 }, { "epoch": 1.8408976182827774, "grad_norm": 0.4663805663585663, "learning_rate": 3.369358237854663e-06, "loss": 0.3081, "step": 40115 }, { "epoch": 1.8409435087880317, "grad_norm": 0.45845264196395874, "learning_rate": 3.369126457434895e-06, "loss": 0.375, "step": 40116 }, { "epoch": 1.8409893992932862, "grad_norm": 0.5117235779762268, "learning_rate": 3.368894680936861e-06, "loss": 0.4131, "step": 40117 }, { "epoch": 1.8410352897985407, "grad_norm": 0.48235753178596497, "learning_rate": 3.368662908361122e-06, "loss": 0.3301, "step": 40118 }, { "epoch": 1.8410811803037952, "grad_norm": 0.4922463893890381, "learning_rate": 3.368431139708235e-06, "loss": 0.4437, "step": 40119 }, { "epoch": 1.8411270708090495, "grad_norm": 0.494913250207901, "learning_rate": 3.3681993749787528e-06, "loss": 0.3971, "step": 40120 }, { "epoch": 1.841172961314304, "grad_norm": 0.4681732654571533, "learning_rate": 3.367967614173239e-06, "loss": 0.3196, "step": 40121 }, { "epoch": 1.8412188518195585, "grad_norm": 0.4512799382209778, "learning_rate": 3.367735857292247e-06, "loss": 0.3567, "step": 40122 }, { "epoch": 1.841264742324813, "grad_norm": 0.46655550599098206, "learning_rate": 3.3675041043363348e-06, "loss": 0.321, "step": 40123 }, { "epoch": 1.8413106328300675, "grad_norm": 0.4183458685874939, "learning_rate": 3.3672723553060615e-06, "loss": 0.2795, "step": 40124 }, { "epoch": 1.841356523335322, "grad_norm": 0.4655725657939911, "learning_rate": 3.367040610201982e-06, "loss": 0.3598, "step": 40125 }, { "epoch": 1.8414024138405765, "grad_norm": 0.47940540313720703, "learning_rate": 3.3668088690246525e-06, "loss": 0.3774, "step": 40126 }, { "epoch": 1.841448304345831, "grad_norm": 0.4671495258808136, "learning_rate": 3.3665771317746343e-06, "loss": 0.3299, "step": 40127 }, { "epoch": 1.8414941948510855, "grad_norm": 0.4431859850883484, "learning_rate": 3.366345398452483e-06, "loss": 0.3199, "step": 40128 }, { "epoch": 1.8415400853563397, "grad_norm": 0.49852409958839417, "learning_rate": 3.366113669058754e-06, "loss": 0.3868, "step": 40129 }, { "epoch": 1.8415859758615942, "grad_norm": 0.4486108124256134, "learning_rate": 3.365881943594007e-06, "loss": 0.3232, "step": 40130 }, { "epoch": 1.8416318663668487, "grad_norm": 0.4684428870677948, "learning_rate": 3.3656502220587988e-06, "loss": 0.3312, "step": 40131 }, { "epoch": 1.841677756872103, "grad_norm": 0.5015115141868591, "learning_rate": 3.3654185044536838e-06, "loss": 0.4206, "step": 40132 }, { "epoch": 1.8417236473773575, "grad_norm": 0.4728427827358246, "learning_rate": 3.3651867907792226e-06, "loss": 0.3735, "step": 40133 }, { "epoch": 1.841769537882612, "grad_norm": 0.4692358076572418, "learning_rate": 3.364955081035972e-06, "loss": 0.3534, "step": 40134 }, { "epoch": 1.8418154283878665, "grad_norm": 0.5298478603363037, "learning_rate": 3.364723375224487e-06, "loss": 0.4887, "step": 40135 }, { "epoch": 1.841861318893121, "grad_norm": 0.4001864790916443, "learning_rate": 3.364491673345327e-06, "loss": 0.2354, "step": 40136 }, { "epoch": 1.8419072093983755, "grad_norm": 0.4565076231956482, "learning_rate": 3.364259975399049e-06, "loss": 0.3357, "step": 40137 }, { "epoch": 1.84195309990363, "grad_norm": 0.47771820425987244, "learning_rate": 3.364028281386208e-06, "loss": 0.3482, "step": 40138 }, { "epoch": 1.8419989904088845, "grad_norm": 0.43700429797172546, "learning_rate": 3.363796591307364e-06, "loss": 0.3108, "step": 40139 }, { "epoch": 1.842044880914139, "grad_norm": 0.46043312549591064, "learning_rate": 3.3635649051630724e-06, "loss": 0.3275, "step": 40140 }, { "epoch": 1.8420907714193935, "grad_norm": 0.49332576990127563, "learning_rate": 3.3633332229538888e-06, "loss": 0.3946, "step": 40141 }, { "epoch": 1.8421366619246478, "grad_norm": 0.48606953024864197, "learning_rate": 3.3631015446803743e-06, "loss": 0.3843, "step": 40142 }, { "epoch": 1.8421825524299023, "grad_norm": 0.46592041850090027, "learning_rate": 3.362869870343084e-06, "loss": 0.3852, "step": 40143 }, { "epoch": 1.8422284429351568, "grad_norm": 0.46210891008377075, "learning_rate": 3.362638199942575e-06, "loss": 0.3676, "step": 40144 }, { "epoch": 1.842274333440411, "grad_norm": 0.4840258061885834, "learning_rate": 3.3624065334794038e-06, "loss": 0.3515, "step": 40145 }, { "epoch": 1.8423202239456655, "grad_norm": 0.47677192091941833, "learning_rate": 3.3621748709541287e-06, "loss": 0.3967, "step": 40146 }, { "epoch": 1.84236611445092, "grad_norm": 0.4676186442375183, "learning_rate": 3.3619432123673057e-06, "loss": 0.338, "step": 40147 }, { "epoch": 1.8424120049561745, "grad_norm": 0.5050968527793884, "learning_rate": 3.3617115577194904e-06, "loss": 0.3246, "step": 40148 }, { "epoch": 1.842457895461429, "grad_norm": 0.4328094720840454, "learning_rate": 3.3614799070112448e-06, "loss": 0.3241, "step": 40149 }, { "epoch": 1.8425037859666835, "grad_norm": 0.4765894114971161, "learning_rate": 3.3612482602431216e-06, "loss": 0.4017, "step": 40150 }, { "epoch": 1.842549676471938, "grad_norm": 0.4436030983924866, "learning_rate": 3.361016617415679e-06, "loss": 0.322, "step": 40151 }, { "epoch": 1.8425955669771925, "grad_norm": 0.43980351090431213, "learning_rate": 3.3607849785294744e-06, "loss": 0.2932, "step": 40152 }, { "epoch": 1.842641457482447, "grad_norm": 0.47944140434265137, "learning_rate": 3.3605533435850646e-06, "loss": 0.3966, "step": 40153 }, { "epoch": 1.8426873479877015, "grad_norm": 0.46652156114578247, "learning_rate": 3.360321712583005e-06, "loss": 0.3319, "step": 40154 }, { "epoch": 1.8427332384929558, "grad_norm": 0.44899260997772217, "learning_rate": 3.360090085523856e-06, "loss": 0.3314, "step": 40155 }, { "epoch": 1.8427791289982103, "grad_norm": 0.4919794797897339, "learning_rate": 3.359858462408173e-06, "loss": 0.3534, "step": 40156 }, { "epoch": 1.8428250195034648, "grad_norm": 0.42925024032592773, "learning_rate": 3.359626843236512e-06, "loss": 0.2904, "step": 40157 }, { "epoch": 1.842870910008719, "grad_norm": 0.6350916624069214, "learning_rate": 3.359395228009431e-06, "loss": 0.3624, "step": 40158 }, { "epoch": 1.8429168005139736, "grad_norm": 0.48383086919784546, "learning_rate": 3.359163616727487e-06, "loss": 0.3685, "step": 40159 }, { "epoch": 1.842962691019228, "grad_norm": 0.5049861073493958, "learning_rate": 3.3589320093912357e-06, "loss": 0.4063, "step": 40160 }, { "epoch": 1.8430085815244825, "grad_norm": 0.44479402899742126, "learning_rate": 3.3587004060012364e-06, "loss": 0.2988, "step": 40161 }, { "epoch": 1.843054472029737, "grad_norm": 0.4592954218387604, "learning_rate": 3.3584688065580438e-06, "loss": 0.2894, "step": 40162 }, { "epoch": 1.8431003625349915, "grad_norm": 0.4851171672344208, "learning_rate": 3.358237211062214e-06, "loss": 0.3329, "step": 40163 }, { "epoch": 1.843146253040246, "grad_norm": 0.504614531993866, "learning_rate": 3.3580056195143074e-06, "loss": 0.3513, "step": 40164 }, { "epoch": 1.8431921435455005, "grad_norm": 0.4932461977005005, "learning_rate": 3.3577740319148792e-06, "loss": 0.3722, "step": 40165 }, { "epoch": 1.843238034050755, "grad_norm": 0.4877512454986572, "learning_rate": 3.357542448264486e-06, "loss": 0.3156, "step": 40166 }, { "epoch": 1.8432839245560093, "grad_norm": 0.4547358751296997, "learning_rate": 3.3573108685636844e-06, "loss": 0.3297, "step": 40167 }, { "epoch": 1.8433298150612638, "grad_norm": 0.4972550868988037, "learning_rate": 3.3570792928130325e-06, "loss": 0.3372, "step": 40168 }, { "epoch": 1.8433757055665183, "grad_norm": 0.4829994738101959, "learning_rate": 3.356847721013084e-06, "loss": 0.3804, "step": 40169 }, { "epoch": 1.8434215960717728, "grad_norm": 0.5092733502388, "learning_rate": 3.3566161531644004e-06, "loss": 0.2968, "step": 40170 }, { "epoch": 1.843467486577027, "grad_norm": 0.4555870592594147, "learning_rate": 3.3563845892675363e-06, "loss": 0.3331, "step": 40171 }, { "epoch": 1.8435133770822816, "grad_norm": 0.45496371388435364, "learning_rate": 3.3561530293230473e-06, "loss": 0.357, "step": 40172 }, { "epoch": 1.843559267587536, "grad_norm": 0.49132633209228516, "learning_rate": 3.3559214733314925e-06, "loss": 0.3814, "step": 40173 }, { "epoch": 1.8436051580927906, "grad_norm": 0.5186668634414673, "learning_rate": 3.355689921293428e-06, "loss": 0.3899, "step": 40174 }, { "epoch": 1.843651048598045, "grad_norm": 0.46897244453430176, "learning_rate": 3.355458373209408e-06, "loss": 0.3547, "step": 40175 }, { "epoch": 1.8436969391032996, "grad_norm": 0.4857618808746338, "learning_rate": 3.3552268290799937e-06, "loss": 0.4085, "step": 40176 }, { "epoch": 1.843742829608554, "grad_norm": 0.5028660893440247, "learning_rate": 3.35499528890574e-06, "loss": 0.3995, "step": 40177 }, { "epoch": 1.8437887201138086, "grad_norm": 0.5205161571502686, "learning_rate": 3.3547637526872028e-06, "loss": 0.4577, "step": 40178 }, { "epoch": 1.843834610619063, "grad_norm": 0.5011703968048096, "learning_rate": 3.3545322204249384e-06, "loss": 0.4123, "step": 40179 }, { "epoch": 1.8438805011243173, "grad_norm": 0.47178688645362854, "learning_rate": 3.3543006921195064e-06, "loss": 0.384, "step": 40180 }, { "epoch": 1.8439263916295718, "grad_norm": 0.4854075014591217, "learning_rate": 3.3540691677714622e-06, "loss": 0.4124, "step": 40181 }, { "epoch": 1.8439722821348263, "grad_norm": 0.4899073839187622, "learning_rate": 3.3538376473813603e-06, "loss": 0.3615, "step": 40182 }, { "epoch": 1.8440181726400806, "grad_norm": 0.4421781897544861, "learning_rate": 3.3536061309497602e-06, "loss": 0.3044, "step": 40183 }, { "epoch": 1.844064063145335, "grad_norm": 0.43765202164649963, "learning_rate": 3.3533746184772188e-06, "loss": 0.3155, "step": 40184 }, { "epoch": 1.8441099536505896, "grad_norm": 0.44694969058036804, "learning_rate": 3.3531431099642887e-06, "loss": 0.3042, "step": 40185 }, { "epoch": 1.844155844155844, "grad_norm": 0.4438680112361908, "learning_rate": 3.352911605411532e-06, "loss": 0.3261, "step": 40186 }, { "epoch": 1.8442017346610986, "grad_norm": 0.4938766062259674, "learning_rate": 3.3526801048195035e-06, "loss": 0.3868, "step": 40187 }, { "epoch": 1.844247625166353, "grad_norm": 0.48017898201942444, "learning_rate": 3.3524486081887575e-06, "loss": 0.3381, "step": 40188 }, { "epoch": 1.8442935156716076, "grad_norm": 0.42440158128738403, "learning_rate": 3.352217115519855e-06, "loss": 0.314, "step": 40189 }, { "epoch": 1.844339406176862, "grad_norm": 0.4698415696620941, "learning_rate": 3.351985626813349e-06, "loss": 0.3545, "step": 40190 }, { "epoch": 1.8443852966821166, "grad_norm": 0.436884343624115, "learning_rate": 3.3517541420697952e-06, "loss": 0.2957, "step": 40191 }, { "epoch": 1.844431187187371, "grad_norm": 0.46428993344306946, "learning_rate": 3.3515226612897555e-06, "loss": 0.3316, "step": 40192 }, { "epoch": 1.8444770776926254, "grad_norm": 0.45856600999832153, "learning_rate": 3.3512911844737834e-06, "loss": 0.3529, "step": 40193 }, { "epoch": 1.8445229681978799, "grad_norm": 0.4722120761871338, "learning_rate": 3.351059711622434e-06, "loss": 0.3573, "step": 40194 }, { "epoch": 1.8445688587031344, "grad_norm": 0.46723583340644836, "learning_rate": 3.3508282427362672e-06, "loss": 0.3437, "step": 40195 }, { "epoch": 1.8446147492083886, "grad_norm": 0.4561867415904999, "learning_rate": 3.350596777815838e-06, "loss": 0.3107, "step": 40196 }, { "epoch": 1.8446606397136431, "grad_norm": 0.4521401524543762, "learning_rate": 3.3503653168617007e-06, "loss": 0.2826, "step": 40197 }, { "epoch": 1.8447065302188976, "grad_norm": 0.5296445488929749, "learning_rate": 3.350133859874416e-06, "loss": 0.4357, "step": 40198 }, { "epoch": 1.8447524207241521, "grad_norm": 0.4156520366668701, "learning_rate": 3.3499024068545393e-06, "loss": 0.2797, "step": 40199 }, { "epoch": 1.8447983112294066, "grad_norm": 0.45170968770980835, "learning_rate": 3.349670957802625e-06, "loss": 0.332, "step": 40200 }, { "epoch": 1.8448442017346611, "grad_norm": 0.45238637924194336, "learning_rate": 3.3494395127192324e-06, "loss": 0.3183, "step": 40201 }, { "epoch": 1.8448900922399156, "grad_norm": 0.47506338357925415, "learning_rate": 3.349208071604917e-06, "loss": 0.3732, "step": 40202 }, { "epoch": 1.8449359827451701, "grad_norm": 0.4922519624233246, "learning_rate": 3.3489766344602326e-06, "loss": 0.3427, "step": 40203 }, { "epoch": 1.8449818732504246, "grad_norm": 0.44472962617874146, "learning_rate": 3.3487452012857412e-06, "loss": 0.2997, "step": 40204 }, { "epoch": 1.845027763755679, "grad_norm": 0.5015663504600525, "learning_rate": 3.348513772081996e-06, "loss": 0.3654, "step": 40205 }, { "epoch": 1.8450736542609334, "grad_norm": 0.4426925778388977, "learning_rate": 3.348282346849552e-06, "loss": 0.2898, "step": 40206 }, { "epoch": 1.845119544766188, "grad_norm": 0.4806230366230011, "learning_rate": 3.3480509255889695e-06, "loss": 0.3556, "step": 40207 }, { "epoch": 1.8451654352714424, "grad_norm": 0.5011109709739685, "learning_rate": 3.3478195083008034e-06, "loss": 0.3942, "step": 40208 }, { "epoch": 1.8452113257766967, "grad_norm": 0.46690890192985535, "learning_rate": 3.34758809498561e-06, "loss": 0.3474, "step": 40209 }, { "epoch": 1.8452572162819512, "grad_norm": 0.4850022494792938, "learning_rate": 3.3473566856439443e-06, "loss": 0.4063, "step": 40210 }, { "epoch": 1.8453031067872057, "grad_norm": 0.4774045944213867, "learning_rate": 3.3471252802763656e-06, "loss": 0.3288, "step": 40211 }, { "epoch": 1.8453489972924602, "grad_norm": 0.4652615785598755, "learning_rate": 3.3468938788834284e-06, "loss": 0.3268, "step": 40212 }, { "epoch": 1.8453948877977147, "grad_norm": 0.47561171650886536, "learning_rate": 3.346662481465688e-06, "loss": 0.4047, "step": 40213 }, { "epoch": 1.8454407783029692, "grad_norm": 0.5156642198562622, "learning_rate": 3.3464310880237038e-06, "loss": 0.4138, "step": 40214 }, { "epoch": 1.8454866688082237, "grad_norm": 0.5278937220573425, "learning_rate": 3.346199698558032e-06, "loss": 0.4586, "step": 40215 }, { "epoch": 1.8455325593134781, "grad_norm": 0.47812986373901367, "learning_rate": 3.3459683130692267e-06, "loss": 0.3479, "step": 40216 }, { "epoch": 1.8455784498187326, "grad_norm": 0.48146477341651917, "learning_rate": 3.345736931557846e-06, "loss": 0.3689, "step": 40217 }, { "epoch": 1.845624340323987, "grad_norm": 0.4844745695590973, "learning_rate": 3.3455055540244462e-06, "loss": 0.3814, "step": 40218 }, { "epoch": 1.8456702308292414, "grad_norm": 0.46681469678878784, "learning_rate": 3.3452741804695823e-06, "loss": 0.3221, "step": 40219 }, { "epoch": 1.845716121334496, "grad_norm": 0.46164578199386597, "learning_rate": 3.3450428108938127e-06, "loss": 0.2937, "step": 40220 }, { "epoch": 1.8457620118397502, "grad_norm": 0.5032753348350525, "learning_rate": 3.344811445297693e-06, "loss": 0.3943, "step": 40221 }, { "epoch": 1.8458079023450047, "grad_norm": 0.44146233797073364, "learning_rate": 3.344580083681778e-06, "loss": 0.3139, "step": 40222 }, { "epoch": 1.8458537928502592, "grad_norm": 0.47278133034706116, "learning_rate": 3.344348726046627e-06, "loss": 0.3575, "step": 40223 }, { "epoch": 1.8458996833555137, "grad_norm": 0.4297723174095154, "learning_rate": 3.3441173723927944e-06, "loss": 0.2986, "step": 40224 }, { "epoch": 1.8459455738607682, "grad_norm": 0.44753748178482056, "learning_rate": 3.343886022720836e-06, "loss": 0.3506, "step": 40225 }, { "epoch": 1.8459914643660227, "grad_norm": 0.4846854507923126, "learning_rate": 3.3436546770313107e-06, "loss": 0.3588, "step": 40226 }, { "epoch": 1.8460373548712772, "grad_norm": 0.4135468304157257, "learning_rate": 3.3434233353247724e-06, "loss": 0.2632, "step": 40227 }, { "epoch": 1.8460832453765317, "grad_norm": 0.4615086317062378, "learning_rate": 3.3431919976017768e-06, "loss": 0.3176, "step": 40228 }, { "epoch": 1.8461291358817862, "grad_norm": 0.4799922704696655, "learning_rate": 3.342960663862883e-06, "loss": 0.3589, "step": 40229 }, { "epoch": 1.8461750263870407, "grad_norm": 0.4557317793369293, "learning_rate": 3.342729334108646e-06, "loss": 0.3121, "step": 40230 }, { "epoch": 1.846220916892295, "grad_norm": 0.4909101724624634, "learning_rate": 3.342498008339621e-06, "loss": 0.3941, "step": 40231 }, { "epoch": 1.8462668073975494, "grad_norm": 0.46094390749931335, "learning_rate": 3.3422666865563658e-06, "loss": 0.3567, "step": 40232 }, { "epoch": 1.846312697902804, "grad_norm": 0.49138572812080383, "learning_rate": 3.3420353687594365e-06, "loss": 0.4046, "step": 40233 }, { "epoch": 1.8463585884080582, "grad_norm": 0.49135449528694153, "learning_rate": 3.3418040549493863e-06, "loss": 0.3534, "step": 40234 }, { "epoch": 1.8464044789133127, "grad_norm": 0.5155145525932312, "learning_rate": 3.3415727451267767e-06, "loss": 0.4434, "step": 40235 }, { "epoch": 1.8464503694185672, "grad_norm": 0.43690621852874756, "learning_rate": 3.3413414392921605e-06, "loss": 0.2789, "step": 40236 }, { "epoch": 1.8464962599238217, "grad_norm": 0.43873971700668335, "learning_rate": 3.341110137446094e-06, "loss": 0.3014, "step": 40237 }, { "epoch": 1.8465421504290762, "grad_norm": 0.4224017560482025, "learning_rate": 3.340878839589135e-06, "loss": 0.3039, "step": 40238 }, { "epoch": 1.8465880409343307, "grad_norm": 0.48476657271385193, "learning_rate": 3.340647545721839e-06, "loss": 0.3586, "step": 40239 }, { "epoch": 1.8466339314395852, "grad_norm": 0.4460589587688446, "learning_rate": 3.3404162558447595e-06, "loss": 0.3212, "step": 40240 }, { "epoch": 1.8466798219448397, "grad_norm": 0.4813358187675476, "learning_rate": 3.340184969958458e-06, "loss": 0.3462, "step": 40241 }, { "epoch": 1.8467257124500942, "grad_norm": 0.4737553596496582, "learning_rate": 3.3399536880634863e-06, "loss": 0.3371, "step": 40242 }, { "epoch": 1.8467716029553487, "grad_norm": 0.4661926031112671, "learning_rate": 3.3397224101604032e-06, "loss": 0.3431, "step": 40243 }, { "epoch": 1.846817493460603, "grad_norm": 0.47274938225746155, "learning_rate": 3.339491136249763e-06, "loss": 0.3791, "step": 40244 }, { "epoch": 1.8468633839658575, "grad_norm": 0.4470342695713043, "learning_rate": 3.339259866332123e-06, "loss": 0.3085, "step": 40245 }, { "epoch": 1.846909274471112, "grad_norm": 0.44906309247016907, "learning_rate": 3.3390286004080388e-06, "loss": 0.348, "step": 40246 }, { "epoch": 1.8469551649763662, "grad_norm": 0.47322139143943787, "learning_rate": 3.338797338478065e-06, "loss": 0.3477, "step": 40247 }, { "epoch": 1.8470010554816207, "grad_norm": 0.45633676648139954, "learning_rate": 3.3385660805427612e-06, "loss": 0.3317, "step": 40248 }, { "epoch": 1.8470469459868752, "grad_norm": 0.4716099202632904, "learning_rate": 3.3383348266026815e-06, "loss": 0.3829, "step": 40249 }, { "epoch": 1.8470928364921297, "grad_norm": 0.520809531211853, "learning_rate": 3.33810357665838e-06, "loss": 0.4052, "step": 40250 }, { "epoch": 1.8471387269973842, "grad_norm": 0.47469982504844666, "learning_rate": 3.3378723307104167e-06, "loss": 0.3602, "step": 40251 }, { "epoch": 1.8471846175026387, "grad_norm": 0.4338953495025635, "learning_rate": 3.3376410887593464e-06, "loss": 0.2839, "step": 40252 }, { "epoch": 1.8472305080078932, "grad_norm": 0.5152711868286133, "learning_rate": 3.3374098508057227e-06, "loss": 0.4096, "step": 40253 }, { "epoch": 1.8472763985131477, "grad_norm": 0.46566447615623474, "learning_rate": 3.337178616850106e-06, "loss": 0.3282, "step": 40254 }, { "epoch": 1.8473222890184022, "grad_norm": 0.4541769027709961, "learning_rate": 3.336947386893049e-06, "loss": 0.3584, "step": 40255 }, { "epoch": 1.8473681795236565, "grad_norm": 0.4525289833545685, "learning_rate": 3.336716160935106e-06, "loss": 0.3516, "step": 40256 }, { "epoch": 1.847414070028911, "grad_norm": 0.48179036378860474, "learning_rate": 3.336484938976839e-06, "loss": 0.3348, "step": 40257 }, { "epoch": 1.8474599605341655, "grad_norm": 0.4722668528556824, "learning_rate": 3.3362537210188e-06, "loss": 0.3466, "step": 40258 }, { "epoch": 1.8475058510394198, "grad_norm": 0.43033891916275024, "learning_rate": 3.336022507061545e-06, "loss": 0.2623, "step": 40259 }, { "epoch": 1.8475517415446743, "grad_norm": 0.4403129816055298, "learning_rate": 3.3357912971056318e-06, "loss": 0.3273, "step": 40260 }, { "epoch": 1.8475976320499288, "grad_norm": 0.47655537724494934, "learning_rate": 3.335560091151615e-06, "loss": 0.356, "step": 40261 }, { "epoch": 1.8476435225551833, "grad_norm": 0.5336896181106567, "learning_rate": 3.3353288892000486e-06, "loss": 0.4452, "step": 40262 }, { "epoch": 1.8476894130604378, "grad_norm": 0.4733862578868866, "learning_rate": 3.3350976912514932e-06, "loss": 0.4135, "step": 40263 }, { "epoch": 1.8477353035656923, "grad_norm": 0.43277111649513245, "learning_rate": 3.3348664973065026e-06, "loss": 0.3023, "step": 40264 }, { "epoch": 1.8477811940709468, "grad_norm": 0.49459731578826904, "learning_rate": 3.3346353073656307e-06, "loss": 0.3522, "step": 40265 }, { "epoch": 1.8478270845762013, "grad_norm": 0.4786777198314667, "learning_rate": 3.334404121429437e-06, "loss": 0.3697, "step": 40266 }, { "epoch": 1.8478729750814558, "grad_norm": 0.4714864194393158, "learning_rate": 3.3341729394984755e-06, "loss": 0.3509, "step": 40267 }, { "epoch": 1.8479188655867103, "grad_norm": 0.44262534379959106, "learning_rate": 3.3339417615733004e-06, "loss": 0.2833, "step": 40268 }, { "epoch": 1.8479647560919645, "grad_norm": 0.46255114674568176, "learning_rate": 3.3337105876544718e-06, "loss": 0.335, "step": 40269 }, { "epoch": 1.848010646597219, "grad_norm": 0.4758565127849579, "learning_rate": 3.333479417742544e-06, "loss": 0.3614, "step": 40270 }, { "epoch": 1.8480565371024735, "grad_norm": 0.4858417510986328, "learning_rate": 3.333248251838069e-06, "loss": 0.4198, "step": 40271 }, { "epoch": 1.8481024276077278, "grad_norm": 0.4302600026130676, "learning_rate": 3.333017089941608e-06, "loss": 0.2885, "step": 40272 }, { "epoch": 1.8481483181129823, "grad_norm": 0.5097487568855286, "learning_rate": 3.3327859320537153e-06, "loss": 0.4601, "step": 40273 }, { "epoch": 1.8481942086182368, "grad_norm": 0.4900307357311249, "learning_rate": 3.3325547781749455e-06, "loss": 0.3324, "step": 40274 }, { "epoch": 1.8482400991234913, "grad_norm": 0.4893048405647278, "learning_rate": 3.332323628305855e-06, "loss": 0.3806, "step": 40275 }, { "epoch": 1.8482859896287458, "grad_norm": 0.5331499576568604, "learning_rate": 3.3320924824470008e-06, "loss": 0.4681, "step": 40276 }, { "epoch": 1.8483318801340003, "grad_norm": 0.48338252305984497, "learning_rate": 3.3318613405989376e-06, "loss": 0.2752, "step": 40277 }, { "epoch": 1.8483777706392548, "grad_norm": 0.4524998366832733, "learning_rate": 3.3316302027622188e-06, "loss": 0.3381, "step": 40278 }, { "epoch": 1.8484236611445093, "grad_norm": 0.48449498414993286, "learning_rate": 3.3313990689374054e-06, "loss": 0.3887, "step": 40279 }, { "epoch": 1.8484695516497638, "grad_norm": 0.48849570751190186, "learning_rate": 3.3311679391250507e-06, "loss": 0.3984, "step": 40280 }, { "epoch": 1.8485154421550183, "grad_norm": 0.5508852005004883, "learning_rate": 3.3309368133257093e-06, "loss": 0.4589, "step": 40281 }, { "epoch": 1.8485613326602726, "grad_norm": 0.5098567605018616, "learning_rate": 3.3307056915399383e-06, "loss": 0.4117, "step": 40282 }, { "epoch": 1.848607223165527, "grad_norm": 0.43602102994918823, "learning_rate": 3.330474573768294e-06, "loss": 0.3172, "step": 40283 }, { "epoch": 1.8486531136707816, "grad_norm": 0.506976842880249, "learning_rate": 3.33024346001133e-06, "loss": 0.443, "step": 40284 }, { "epoch": 1.8486990041760358, "grad_norm": 0.45459091663360596, "learning_rate": 3.3300123502696047e-06, "loss": 0.3248, "step": 40285 }, { "epoch": 1.8487448946812903, "grad_norm": 0.5261184573173523, "learning_rate": 3.3297812445436728e-06, "loss": 0.3752, "step": 40286 }, { "epoch": 1.8487907851865448, "grad_norm": 0.44434213638305664, "learning_rate": 3.3295501428340886e-06, "loss": 0.301, "step": 40287 }, { "epoch": 1.8488366756917993, "grad_norm": 0.4724124073982239, "learning_rate": 3.3293190451414104e-06, "loss": 0.3754, "step": 40288 }, { "epoch": 1.8488825661970538, "grad_norm": 0.5689067244529724, "learning_rate": 3.3290879514661926e-06, "loss": 0.4046, "step": 40289 }, { "epoch": 1.8489284567023083, "grad_norm": 0.5080040097236633, "learning_rate": 3.328856861808989e-06, "loss": 0.4193, "step": 40290 }, { "epoch": 1.8489743472075628, "grad_norm": 0.47288674116134644, "learning_rate": 3.3286257761703588e-06, "loss": 0.369, "step": 40291 }, { "epoch": 1.8490202377128173, "grad_norm": 0.46641141176223755, "learning_rate": 3.328394694550858e-06, "loss": 0.322, "step": 40292 }, { "epoch": 1.8490661282180718, "grad_norm": 0.46567028760910034, "learning_rate": 3.3281636169510366e-06, "loss": 0.3303, "step": 40293 }, { "epoch": 1.849112018723326, "grad_norm": 0.4862552583217621, "learning_rate": 3.327932543371456e-06, "loss": 0.3449, "step": 40294 }, { "epoch": 1.8491579092285806, "grad_norm": 0.5060927271842957, "learning_rate": 3.32770147381267e-06, "loss": 0.3841, "step": 40295 }, { "epoch": 1.849203799733835, "grad_norm": 0.4582996666431427, "learning_rate": 3.327470408275234e-06, "loss": 0.3368, "step": 40296 }, { "epoch": 1.8492496902390896, "grad_norm": 0.41669654846191406, "learning_rate": 3.327239346759704e-06, "loss": 0.2736, "step": 40297 }, { "epoch": 1.8492955807443439, "grad_norm": 0.5514963269233704, "learning_rate": 3.3270082892666354e-06, "loss": 0.3806, "step": 40298 }, { "epoch": 1.8493414712495984, "grad_norm": 0.5049227476119995, "learning_rate": 3.326777235796582e-06, "loss": 0.4137, "step": 40299 }, { "epoch": 1.8493873617548529, "grad_norm": 0.4483928084373474, "learning_rate": 3.3265461863501036e-06, "loss": 0.2924, "step": 40300 }, { "epoch": 1.8494332522601074, "grad_norm": 0.4311830401420593, "learning_rate": 3.3263151409277527e-06, "loss": 0.2638, "step": 40301 }, { "epoch": 1.8494791427653618, "grad_norm": 0.5194509029388428, "learning_rate": 3.3260840995300853e-06, "loss": 0.4682, "step": 40302 }, { "epoch": 1.8495250332706163, "grad_norm": 0.4780785143375397, "learning_rate": 3.325853062157658e-06, "loss": 0.3734, "step": 40303 }, { "epoch": 1.8495709237758708, "grad_norm": 0.4819948971271515, "learning_rate": 3.3256220288110263e-06, "loss": 0.3773, "step": 40304 }, { "epoch": 1.8496168142811253, "grad_norm": 0.47465479373931885, "learning_rate": 3.3253909994907425e-06, "loss": 0.3794, "step": 40305 }, { "epoch": 1.8496627047863798, "grad_norm": 0.4980343282222748, "learning_rate": 3.3251599741973674e-06, "loss": 0.3844, "step": 40306 }, { "epoch": 1.8497085952916341, "grad_norm": 0.48781460523605347, "learning_rate": 3.324928952931453e-06, "loss": 0.4202, "step": 40307 }, { "epoch": 1.8497544857968886, "grad_norm": 0.4286201298236847, "learning_rate": 3.3246979356935573e-06, "loss": 0.2888, "step": 40308 }, { "epoch": 1.8498003763021431, "grad_norm": 0.4626065492630005, "learning_rate": 3.324466922484232e-06, "loss": 0.3528, "step": 40309 }, { "epoch": 1.8498462668073974, "grad_norm": 0.5187747478485107, "learning_rate": 3.3242359133040366e-06, "loss": 0.4055, "step": 40310 }, { "epoch": 1.8498921573126519, "grad_norm": 0.681985080242157, "learning_rate": 3.324004908153524e-06, "loss": 0.3167, "step": 40311 }, { "epoch": 1.8499380478179064, "grad_norm": 0.4886026382446289, "learning_rate": 3.3237739070332496e-06, "loss": 0.3698, "step": 40312 }, { "epoch": 1.8499839383231609, "grad_norm": 0.47821518778800964, "learning_rate": 3.3235429099437723e-06, "loss": 0.3825, "step": 40313 }, { "epoch": 1.8500298288284154, "grad_norm": 0.5394052863121033, "learning_rate": 3.3233119168856454e-06, "loss": 0.4648, "step": 40314 }, { "epoch": 1.8500757193336699, "grad_norm": 0.4873966574668884, "learning_rate": 3.323080927859421e-06, "loss": 0.3259, "step": 40315 }, { "epoch": 1.8501216098389244, "grad_norm": 0.4885316491127014, "learning_rate": 3.3228499428656603e-06, "loss": 0.4246, "step": 40316 }, { "epoch": 1.8501675003441789, "grad_norm": 0.4414200782775879, "learning_rate": 3.322618961904916e-06, "loss": 0.2915, "step": 40317 }, { "epoch": 1.8502133908494334, "grad_norm": 0.5047275424003601, "learning_rate": 3.3223879849777426e-06, "loss": 0.3513, "step": 40318 }, { "epoch": 1.8502592813546879, "grad_norm": 0.5115063786506653, "learning_rate": 3.3221570120846974e-06, "loss": 0.4235, "step": 40319 }, { "epoch": 1.8503051718599421, "grad_norm": 0.5689840912818909, "learning_rate": 3.3219260432263357e-06, "loss": 0.3681, "step": 40320 }, { "epoch": 1.8503510623651966, "grad_norm": 0.517750084400177, "learning_rate": 3.3216950784032097e-06, "loss": 0.388, "step": 40321 }, { "epoch": 1.8503969528704511, "grad_norm": 0.49009838700294495, "learning_rate": 3.321464117615879e-06, "loss": 0.3593, "step": 40322 }, { "epoch": 1.8504428433757054, "grad_norm": 0.4579828977584839, "learning_rate": 3.3212331608648986e-06, "loss": 0.3769, "step": 40323 }, { "epoch": 1.85048873388096, "grad_norm": 0.4815659523010254, "learning_rate": 3.32100220815082e-06, "loss": 0.3907, "step": 40324 }, { "epoch": 1.8505346243862144, "grad_norm": 0.4690357744693756, "learning_rate": 3.320771259474204e-06, "loss": 0.3501, "step": 40325 }, { "epoch": 1.850580514891469, "grad_norm": 0.4366646409034729, "learning_rate": 3.320540314835602e-06, "loss": 0.2868, "step": 40326 }, { "epoch": 1.8506264053967234, "grad_norm": 0.49968916177749634, "learning_rate": 3.3203093742355685e-06, "loss": 0.3309, "step": 40327 }, { "epoch": 1.850672295901978, "grad_norm": 0.4759405553340912, "learning_rate": 3.3200784376746632e-06, "loss": 0.3633, "step": 40328 }, { "epoch": 1.8507181864072324, "grad_norm": 0.4625864028930664, "learning_rate": 3.3198475051534386e-06, "loss": 0.3927, "step": 40329 }, { "epoch": 1.850764076912487, "grad_norm": 0.4834825396537781, "learning_rate": 3.3196165766724497e-06, "loss": 0.3378, "step": 40330 }, { "epoch": 1.8508099674177414, "grad_norm": 0.4236920177936554, "learning_rate": 3.319385652232253e-06, "loss": 0.3097, "step": 40331 }, { "epoch": 1.850855857922996, "grad_norm": 0.5523902773857117, "learning_rate": 3.3191547318334043e-06, "loss": 0.4958, "step": 40332 }, { "epoch": 1.8509017484282502, "grad_norm": 0.43546047806739807, "learning_rate": 3.318923815476456e-06, "loss": 0.2831, "step": 40333 }, { "epoch": 1.8509476389335047, "grad_norm": 0.47303926944732666, "learning_rate": 3.318692903161967e-06, "loss": 0.3639, "step": 40334 }, { "epoch": 1.8509935294387592, "grad_norm": 0.43903714418411255, "learning_rate": 3.3184619948904905e-06, "loss": 0.3352, "step": 40335 }, { "epoch": 1.8510394199440134, "grad_norm": 0.5200567245483398, "learning_rate": 3.318231090662582e-06, "loss": 0.3702, "step": 40336 }, { "epoch": 1.851085310449268, "grad_norm": 0.47132304310798645, "learning_rate": 3.318000190478798e-06, "loss": 0.3095, "step": 40337 }, { "epoch": 1.8511312009545224, "grad_norm": 0.44837722182273865, "learning_rate": 3.317769294339692e-06, "loss": 0.2888, "step": 40338 }, { "epoch": 1.851177091459777, "grad_norm": 0.44245871901512146, "learning_rate": 3.31753840224582e-06, "loss": 0.324, "step": 40339 }, { "epoch": 1.8512229819650314, "grad_norm": 0.5134694576263428, "learning_rate": 3.317307514197737e-06, "loss": 0.4247, "step": 40340 }, { "epoch": 1.851268872470286, "grad_norm": 0.44494450092315674, "learning_rate": 3.3170766301959985e-06, "loss": 0.3162, "step": 40341 }, { "epoch": 1.8513147629755404, "grad_norm": 0.47726595401763916, "learning_rate": 3.31684575024116e-06, "loss": 0.3303, "step": 40342 }, { "epoch": 1.851360653480795, "grad_norm": 0.44975194334983826, "learning_rate": 3.3166148743337734e-06, "loss": 0.323, "step": 40343 }, { "epoch": 1.8514065439860494, "grad_norm": 0.5017104148864746, "learning_rate": 3.3163840024743998e-06, "loss": 0.4164, "step": 40344 }, { "epoch": 1.8514524344913037, "grad_norm": 0.4444129168987274, "learning_rate": 3.316153134663591e-06, "loss": 0.3172, "step": 40345 }, { "epoch": 1.8514983249965582, "grad_norm": 0.5043652057647705, "learning_rate": 3.315922270901901e-06, "loss": 0.393, "step": 40346 }, { "epoch": 1.8515442155018127, "grad_norm": 0.4725661873817444, "learning_rate": 3.315691411189888e-06, "loss": 0.3351, "step": 40347 }, { "epoch": 1.851590106007067, "grad_norm": 0.4680109918117523, "learning_rate": 3.315460555528105e-06, "loss": 0.3614, "step": 40348 }, { "epoch": 1.8516359965123215, "grad_norm": 0.44956454634666443, "learning_rate": 3.3152297039171066e-06, "loss": 0.3262, "step": 40349 }, { "epoch": 1.851681887017576, "grad_norm": 0.447170615196228, "learning_rate": 3.31499885635745e-06, "loss": 0.3391, "step": 40350 }, { "epoch": 1.8517277775228305, "grad_norm": 0.44262704253196716, "learning_rate": 3.31476801284969e-06, "loss": 0.2813, "step": 40351 }, { "epoch": 1.851773668028085, "grad_norm": 0.4534452557563782, "learning_rate": 3.31453717339438e-06, "loss": 0.373, "step": 40352 }, { "epoch": 1.8518195585333395, "grad_norm": 0.45108890533447266, "learning_rate": 3.314306337992077e-06, "loss": 0.3058, "step": 40353 }, { "epoch": 1.851865449038594, "grad_norm": 0.46963900327682495, "learning_rate": 3.314075506643335e-06, "loss": 0.3667, "step": 40354 }, { "epoch": 1.8519113395438485, "grad_norm": 0.5113492608070374, "learning_rate": 3.3138446793487077e-06, "loss": 0.3574, "step": 40355 }, { "epoch": 1.851957230049103, "grad_norm": 0.47289562225341797, "learning_rate": 3.3136138561087537e-06, "loss": 0.347, "step": 40356 }, { "epoch": 1.8520031205543575, "grad_norm": 0.45599907636642456, "learning_rate": 3.313383036924026e-06, "loss": 0.333, "step": 40357 }, { "epoch": 1.8520490110596117, "grad_norm": 0.4962487816810608, "learning_rate": 3.3131522217950794e-06, "loss": 0.4054, "step": 40358 }, { "epoch": 1.8520949015648662, "grad_norm": 0.4903612434864044, "learning_rate": 3.3129214107224696e-06, "loss": 0.3681, "step": 40359 }, { "epoch": 1.8521407920701207, "grad_norm": 0.4585355520248413, "learning_rate": 3.312690603706752e-06, "loss": 0.3355, "step": 40360 }, { "epoch": 1.852186682575375, "grad_norm": 0.5049539804458618, "learning_rate": 3.3124598007484793e-06, "loss": 0.3997, "step": 40361 }, { "epoch": 1.8522325730806295, "grad_norm": 0.4485749900341034, "learning_rate": 3.3122290018482094e-06, "loss": 0.3335, "step": 40362 }, { "epoch": 1.852278463585884, "grad_norm": 0.5551913976669312, "learning_rate": 3.311998207006496e-06, "loss": 0.4805, "step": 40363 }, { "epoch": 1.8523243540911385, "grad_norm": 0.46636343002319336, "learning_rate": 3.311767416223892e-06, "loss": 0.3563, "step": 40364 }, { "epoch": 1.852370244596393, "grad_norm": 0.474102646112442, "learning_rate": 3.311536629500957e-06, "loss": 0.3618, "step": 40365 }, { "epoch": 1.8524161351016475, "grad_norm": 0.49373120069503784, "learning_rate": 3.311305846838243e-06, "loss": 0.3934, "step": 40366 }, { "epoch": 1.852462025606902, "grad_norm": 0.4369644522666931, "learning_rate": 3.3110750682363054e-06, "loss": 0.3107, "step": 40367 }, { "epoch": 1.8525079161121565, "grad_norm": 0.4594891369342804, "learning_rate": 3.3108442936956997e-06, "loss": 0.3058, "step": 40368 }, { "epoch": 1.852553806617411, "grad_norm": 0.47270017862319946, "learning_rate": 3.31061352321698e-06, "loss": 0.3534, "step": 40369 }, { "epoch": 1.8525996971226655, "grad_norm": 0.4611585736274719, "learning_rate": 3.3103827568006996e-06, "loss": 0.3446, "step": 40370 }, { "epoch": 1.8526455876279198, "grad_norm": 0.49016016721725464, "learning_rate": 3.3101519944474183e-06, "loss": 0.373, "step": 40371 }, { "epoch": 1.8526914781331743, "grad_norm": 0.4962975084781647, "learning_rate": 3.3099212361576873e-06, "loss": 0.3924, "step": 40372 }, { "epoch": 1.8527373686384287, "grad_norm": 0.44242244958877563, "learning_rate": 3.309690481932062e-06, "loss": 0.3512, "step": 40373 }, { "epoch": 1.852783259143683, "grad_norm": 0.4601365327835083, "learning_rate": 3.3094597317710977e-06, "loss": 0.3653, "step": 40374 }, { "epoch": 1.8528291496489375, "grad_norm": 0.4547540843486786, "learning_rate": 3.309228985675349e-06, "loss": 0.304, "step": 40375 }, { "epoch": 1.852875040154192, "grad_norm": 0.45722106099128723, "learning_rate": 3.3089982436453714e-06, "loss": 0.3573, "step": 40376 }, { "epoch": 1.8529209306594465, "grad_norm": 0.4594864845275879, "learning_rate": 3.308767505681718e-06, "loss": 0.3615, "step": 40377 }, { "epoch": 1.852966821164701, "grad_norm": 0.4543708264827728, "learning_rate": 3.3085367717849465e-06, "loss": 0.3179, "step": 40378 }, { "epoch": 1.8530127116699555, "grad_norm": 0.4803710877895355, "learning_rate": 3.3083060419556102e-06, "loss": 0.2553, "step": 40379 }, { "epoch": 1.85305860217521, "grad_norm": 0.45421984791755676, "learning_rate": 3.308075316194263e-06, "loss": 0.3481, "step": 40380 }, { "epoch": 1.8531044926804645, "grad_norm": 0.48005250096321106, "learning_rate": 3.3078445945014614e-06, "loss": 0.3431, "step": 40381 }, { "epoch": 1.853150383185719, "grad_norm": 0.4485762417316437, "learning_rate": 3.3076138768777597e-06, "loss": 0.3273, "step": 40382 }, { "epoch": 1.8531962736909733, "grad_norm": 0.431786447763443, "learning_rate": 3.3073831633237117e-06, "loss": 0.3012, "step": 40383 }, { "epoch": 1.8532421641962278, "grad_norm": 0.4938342273235321, "learning_rate": 3.3071524538398735e-06, "loss": 0.343, "step": 40384 }, { "epoch": 1.8532880547014823, "grad_norm": 0.4701195955276489, "learning_rate": 3.3069217484268e-06, "loss": 0.3463, "step": 40385 }, { "epoch": 1.8533339452067368, "grad_norm": 0.47680822014808655, "learning_rate": 3.3066910470850432e-06, "loss": 0.3482, "step": 40386 }, { "epoch": 1.853379835711991, "grad_norm": 0.4916174113750458, "learning_rate": 3.3064603498151617e-06, "loss": 0.39, "step": 40387 }, { "epoch": 1.8534257262172456, "grad_norm": 0.47379714250564575, "learning_rate": 3.3062296566177087e-06, "loss": 0.3868, "step": 40388 }, { "epoch": 1.8534716167225, "grad_norm": 0.4527122676372528, "learning_rate": 3.305998967493238e-06, "loss": 0.3119, "step": 40389 }, { "epoch": 1.8535175072277545, "grad_norm": 0.47671982645988464, "learning_rate": 3.305768282442306e-06, "loss": 0.324, "step": 40390 }, { "epoch": 1.853563397733009, "grad_norm": 0.4962898790836334, "learning_rate": 3.305537601465466e-06, "loss": 0.3897, "step": 40391 }, { "epoch": 1.8536092882382635, "grad_norm": 0.5074235796928406, "learning_rate": 3.305306924563272e-06, "loss": 0.3738, "step": 40392 }, { "epoch": 1.853655178743518, "grad_norm": 0.45273494720458984, "learning_rate": 3.3050762517362824e-06, "loss": 0.3228, "step": 40393 }, { "epoch": 1.8537010692487725, "grad_norm": 0.49693411588668823, "learning_rate": 3.3048455829850486e-06, "loss": 0.3997, "step": 40394 }, { "epoch": 1.853746959754027, "grad_norm": 0.4722270369529724, "learning_rate": 3.3046149183101252e-06, "loss": 0.358, "step": 40395 }, { "epoch": 1.8537928502592813, "grad_norm": 0.47275233268737793, "learning_rate": 3.304384257712069e-06, "loss": 0.3652, "step": 40396 }, { "epoch": 1.8538387407645358, "grad_norm": 0.46848970651626587, "learning_rate": 3.304153601191434e-06, "loss": 0.3751, "step": 40397 }, { "epoch": 1.8538846312697903, "grad_norm": 0.4266578257083893, "learning_rate": 3.3039229487487717e-06, "loss": 0.281, "step": 40398 }, { "epoch": 1.8539305217750446, "grad_norm": 0.4510553181171417, "learning_rate": 3.3036923003846417e-06, "loss": 0.3301, "step": 40399 }, { "epoch": 1.853976412280299, "grad_norm": 0.4405747354030609, "learning_rate": 3.3034616560995963e-06, "loss": 0.2968, "step": 40400 }, { "epoch": 1.8540223027855536, "grad_norm": 0.507162868976593, "learning_rate": 3.3032310158941893e-06, "loss": 0.4171, "step": 40401 }, { "epoch": 1.854068193290808, "grad_norm": 0.4775787591934204, "learning_rate": 3.303000379768977e-06, "loss": 0.3502, "step": 40402 }, { "epoch": 1.8541140837960626, "grad_norm": 0.4950236678123474, "learning_rate": 3.3027697477245135e-06, "loss": 0.375, "step": 40403 }, { "epoch": 1.854159974301317, "grad_norm": 0.4665931761264801, "learning_rate": 3.3025391197613503e-06, "loss": 0.3595, "step": 40404 }, { "epoch": 1.8542058648065716, "grad_norm": 0.46330899000167847, "learning_rate": 3.3023084958800487e-06, "loss": 0.341, "step": 40405 }, { "epoch": 1.854251755311826, "grad_norm": 0.4906168282032013, "learning_rate": 3.302077876081158e-06, "loss": 0.3849, "step": 40406 }, { "epoch": 1.8542976458170806, "grad_norm": 0.4449165463447571, "learning_rate": 3.301847260365234e-06, "loss": 0.306, "step": 40407 }, { "epoch": 1.854343536322335, "grad_norm": 0.4921450614929199, "learning_rate": 3.3016166487328293e-06, "loss": 0.3547, "step": 40408 }, { "epoch": 1.8543894268275893, "grad_norm": 0.4533887803554535, "learning_rate": 3.301386041184503e-06, "loss": 0.3449, "step": 40409 }, { "epoch": 1.8544353173328438, "grad_norm": 0.40465766191482544, "learning_rate": 3.301155437720807e-06, "loss": 0.2498, "step": 40410 }, { "epoch": 1.8544812078380983, "grad_norm": 0.4959333837032318, "learning_rate": 3.300924838342295e-06, "loss": 0.3238, "step": 40411 }, { "epoch": 1.8545270983433526, "grad_norm": 0.5592528581619263, "learning_rate": 3.300694243049524e-06, "loss": 0.3555, "step": 40412 }, { "epoch": 1.854572988848607, "grad_norm": 0.49321016669273376, "learning_rate": 3.300463651843046e-06, "loss": 0.3944, "step": 40413 }, { "epoch": 1.8546188793538616, "grad_norm": 0.4886530637741089, "learning_rate": 3.3002330647234147e-06, "loss": 0.3601, "step": 40414 }, { "epoch": 1.854664769859116, "grad_norm": 0.46757471561431885, "learning_rate": 3.3000024816911892e-06, "loss": 0.3713, "step": 40415 }, { "epoch": 1.8547106603643706, "grad_norm": 0.6948164701461792, "learning_rate": 3.2997719027469205e-06, "loss": 0.2637, "step": 40416 }, { "epoch": 1.854756550869625, "grad_norm": 0.5493487119674683, "learning_rate": 3.299541327891163e-06, "loss": 0.392, "step": 40417 }, { "epoch": 1.8548024413748796, "grad_norm": 0.47073256969451904, "learning_rate": 3.299310757124473e-06, "loss": 0.3356, "step": 40418 }, { "epoch": 1.854848331880134, "grad_norm": 0.4546600878238678, "learning_rate": 3.2990801904474035e-06, "loss": 0.3254, "step": 40419 }, { "epoch": 1.8548942223853886, "grad_norm": 0.5111712217330933, "learning_rate": 3.2988496278605078e-06, "loss": 0.4136, "step": 40420 }, { "epoch": 1.854940112890643, "grad_norm": 0.49000680446624756, "learning_rate": 3.2986190693643437e-06, "loss": 0.415, "step": 40421 }, { "epoch": 1.8549860033958974, "grad_norm": 0.47147336602211, "learning_rate": 3.298388514959464e-06, "loss": 0.3591, "step": 40422 }, { "epoch": 1.8550318939011519, "grad_norm": 0.4704801142215729, "learning_rate": 3.2981579646464213e-06, "loss": 0.3326, "step": 40423 }, { "epoch": 1.8550777844064064, "grad_norm": 0.48693814873695374, "learning_rate": 3.2979274184257725e-06, "loss": 0.3991, "step": 40424 }, { "epoch": 1.8551236749116606, "grad_norm": 0.5025617480278015, "learning_rate": 3.297696876298072e-06, "loss": 0.3588, "step": 40425 }, { "epoch": 1.8551695654169151, "grad_norm": 0.48962709307670593, "learning_rate": 3.2974663382638705e-06, "loss": 0.3623, "step": 40426 }, { "epoch": 1.8552154559221696, "grad_norm": 0.5061325430870056, "learning_rate": 3.2972358043237286e-06, "loss": 0.4197, "step": 40427 }, { "epoch": 1.8552613464274241, "grad_norm": 0.54630446434021, "learning_rate": 3.2970052744781956e-06, "loss": 0.4393, "step": 40428 }, { "epoch": 1.8553072369326786, "grad_norm": 0.44675150513648987, "learning_rate": 3.2967747487278256e-06, "loss": 0.3368, "step": 40429 }, { "epoch": 1.8553531274379331, "grad_norm": 0.45063328742980957, "learning_rate": 3.2965442270731774e-06, "loss": 0.3318, "step": 40430 }, { "epoch": 1.8553990179431876, "grad_norm": 0.4517485201358795, "learning_rate": 3.2963137095148022e-06, "loss": 0.3408, "step": 40431 }, { "epoch": 1.8554449084484421, "grad_norm": 0.4778563380241394, "learning_rate": 3.2960831960532535e-06, "loss": 0.3267, "step": 40432 }, { "epoch": 1.8554907989536966, "grad_norm": 0.46979695558547974, "learning_rate": 3.295852686689088e-06, "loss": 0.3883, "step": 40433 }, { "epoch": 1.855536689458951, "grad_norm": 0.4589351415634155, "learning_rate": 3.2956221814228593e-06, "loss": 0.3199, "step": 40434 }, { "epoch": 1.8555825799642054, "grad_norm": 0.49148833751678467, "learning_rate": 3.2953916802551193e-06, "loss": 0.4179, "step": 40435 }, { "epoch": 1.85562847046946, "grad_norm": 0.48249709606170654, "learning_rate": 3.2951611831864266e-06, "loss": 0.3449, "step": 40436 }, { "epoch": 1.8556743609747142, "grad_norm": 0.4216356873512268, "learning_rate": 3.2949306902173327e-06, "loss": 0.2628, "step": 40437 }, { "epoch": 1.8557202514799687, "grad_norm": 0.4652479588985443, "learning_rate": 3.294700201348393e-06, "loss": 0.3744, "step": 40438 }, { "epoch": 1.8557661419852232, "grad_norm": 0.4533633887767792, "learning_rate": 3.2944697165801598e-06, "loss": 0.3312, "step": 40439 }, { "epoch": 1.8558120324904777, "grad_norm": 0.47569209337234497, "learning_rate": 3.294239235913189e-06, "loss": 0.3598, "step": 40440 }, { "epoch": 1.8558579229957322, "grad_norm": 0.4762534201145172, "learning_rate": 3.294008759348035e-06, "loss": 0.3888, "step": 40441 }, { "epoch": 1.8559038135009867, "grad_norm": 0.4759160876274109, "learning_rate": 3.2937782868852497e-06, "loss": 0.4382, "step": 40442 }, { "epoch": 1.8559497040062412, "grad_norm": 0.49226096272468567, "learning_rate": 3.293547818525391e-06, "loss": 0.3813, "step": 40443 }, { "epoch": 1.8559955945114956, "grad_norm": 0.45572954416275024, "learning_rate": 3.2933173542690112e-06, "loss": 0.3551, "step": 40444 }, { "epoch": 1.8560414850167501, "grad_norm": 0.47553253173828125, "learning_rate": 3.293086894116663e-06, "loss": 0.3827, "step": 40445 }, { "epoch": 1.8560873755220046, "grad_norm": 0.44638678431510925, "learning_rate": 3.2928564380689036e-06, "loss": 0.2872, "step": 40446 }, { "epoch": 1.856133266027259, "grad_norm": 0.4659976363182068, "learning_rate": 3.2926259861262855e-06, "loss": 0.3679, "step": 40447 }, { "epoch": 1.8561791565325134, "grad_norm": 0.4820985496044159, "learning_rate": 3.2923955382893614e-06, "loss": 0.3361, "step": 40448 }, { "epoch": 1.856225047037768, "grad_norm": 0.4595310389995575, "learning_rate": 3.2921650945586905e-06, "loss": 0.3569, "step": 40449 }, { "epoch": 1.8562709375430222, "grad_norm": 0.5015496015548706, "learning_rate": 3.2919346549348215e-06, "loss": 0.3795, "step": 40450 }, { "epoch": 1.8563168280482767, "grad_norm": 0.46801725029945374, "learning_rate": 3.2917042194183093e-06, "loss": 0.3564, "step": 40451 }, { "epoch": 1.8563627185535312, "grad_norm": 0.4736338257789612, "learning_rate": 3.2914737880097103e-06, "loss": 0.3722, "step": 40452 }, { "epoch": 1.8564086090587857, "grad_norm": 0.49746474623680115, "learning_rate": 3.2912433607095782e-06, "loss": 0.3622, "step": 40453 }, { "epoch": 1.8564544995640402, "grad_norm": 0.45160624384880066, "learning_rate": 3.2910129375184665e-06, "loss": 0.3278, "step": 40454 }, { "epoch": 1.8565003900692947, "grad_norm": 0.5271814465522766, "learning_rate": 3.290782518436929e-06, "loss": 0.4645, "step": 40455 }, { "epoch": 1.8565462805745492, "grad_norm": 0.4491087794303894, "learning_rate": 3.2905521034655207e-06, "loss": 0.3067, "step": 40456 }, { "epoch": 1.8565921710798037, "grad_norm": 0.4486611485481262, "learning_rate": 3.2903216926047925e-06, "loss": 0.338, "step": 40457 }, { "epoch": 1.8566380615850582, "grad_norm": 0.4695137143135071, "learning_rate": 3.2900912858553035e-06, "loss": 0.3674, "step": 40458 }, { "epoch": 1.8566839520903127, "grad_norm": 0.45977285504341125, "learning_rate": 3.289860883217605e-06, "loss": 0.3595, "step": 40459 }, { "epoch": 1.856729842595567, "grad_norm": 0.4911457598209381, "learning_rate": 3.289630484692251e-06, "loss": 0.3421, "step": 40460 }, { "epoch": 1.8567757331008214, "grad_norm": 0.4917941391468048, "learning_rate": 3.289400090279796e-06, "loss": 0.3954, "step": 40461 }, { "epoch": 1.856821623606076, "grad_norm": 0.4886419475078583, "learning_rate": 3.2891696999807944e-06, "loss": 0.3747, "step": 40462 }, { "epoch": 1.8568675141113302, "grad_norm": 0.4697672426700592, "learning_rate": 3.288939313795798e-06, "loss": 0.292, "step": 40463 }, { "epoch": 1.8569134046165847, "grad_norm": 0.4708062708377838, "learning_rate": 3.288708931725364e-06, "loss": 0.3285, "step": 40464 }, { "epoch": 1.8569592951218392, "grad_norm": 0.47901931405067444, "learning_rate": 3.2884785537700452e-06, "loss": 0.3298, "step": 40465 }, { "epoch": 1.8570051856270937, "grad_norm": 0.4290608763694763, "learning_rate": 3.2882481799303946e-06, "loss": 0.2688, "step": 40466 }, { "epoch": 1.8570510761323482, "grad_norm": 0.45043766498565674, "learning_rate": 3.288017810206967e-06, "loss": 0.3236, "step": 40467 }, { "epoch": 1.8570969666376027, "grad_norm": 0.4941132366657257, "learning_rate": 3.2877874446003167e-06, "loss": 0.4004, "step": 40468 }, { "epoch": 1.8571428571428572, "grad_norm": 0.478247731924057, "learning_rate": 3.2875570831109956e-06, "loss": 0.34, "step": 40469 }, { "epoch": 1.8571887476481117, "grad_norm": 0.5179359316825867, "learning_rate": 3.2873267257395607e-06, "loss": 0.3823, "step": 40470 }, { "epoch": 1.8572346381533662, "grad_norm": 0.4760451018810272, "learning_rate": 3.2870963724865656e-06, "loss": 0.3535, "step": 40471 }, { "epoch": 1.8572805286586205, "grad_norm": 0.49085113406181335, "learning_rate": 3.286866023352562e-06, "loss": 0.3616, "step": 40472 }, { "epoch": 1.857326419163875, "grad_norm": 0.44673463702201843, "learning_rate": 3.286635678338104e-06, "loss": 0.3649, "step": 40473 }, { "epoch": 1.8573723096691295, "grad_norm": 0.46099433302879333, "learning_rate": 3.286405337443747e-06, "loss": 0.3279, "step": 40474 }, { "epoch": 1.857418200174384, "grad_norm": 0.44094225764274597, "learning_rate": 3.2861750006700453e-06, "loss": 0.3351, "step": 40475 }, { "epoch": 1.8574640906796382, "grad_norm": 0.47792744636535645, "learning_rate": 3.28594466801755e-06, "loss": 0.4039, "step": 40476 }, { "epoch": 1.8575099811848927, "grad_norm": 0.478287935256958, "learning_rate": 3.2857143394868184e-06, "loss": 0.3688, "step": 40477 }, { "epoch": 1.8575558716901472, "grad_norm": 0.48180848360061646, "learning_rate": 3.285484015078403e-06, "loss": 0.3792, "step": 40478 }, { "epoch": 1.8576017621954017, "grad_norm": 0.48857712745666504, "learning_rate": 3.2852536947928547e-06, "loss": 0.3795, "step": 40479 }, { "epoch": 1.8576476527006562, "grad_norm": 0.4339185655117035, "learning_rate": 3.2850233786307327e-06, "loss": 0.2812, "step": 40480 }, { "epoch": 1.8576935432059107, "grad_norm": 0.5463237762451172, "learning_rate": 3.2847930665925876e-06, "loss": 0.3457, "step": 40481 }, { "epoch": 1.8577394337111652, "grad_norm": 0.4906579554080963, "learning_rate": 3.2845627586789732e-06, "loss": 0.3874, "step": 40482 }, { "epoch": 1.8577853242164197, "grad_norm": 0.47759681940078735, "learning_rate": 3.2843324548904453e-06, "loss": 0.3848, "step": 40483 }, { "epoch": 1.8578312147216742, "grad_norm": 0.4853752851486206, "learning_rate": 3.2841021552275553e-06, "loss": 0.4042, "step": 40484 }, { "epoch": 1.8578771052269285, "grad_norm": 0.4175878167152405, "learning_rate": 3.283871859690857e-06, "loss": 0.3003, "step": 40485 }, { "epoch": 1.857922995732183, "grad_norm": 0.5099867582321167, "learning_rate": 3.2836415682809077e-06, "loss": 0.4504, "step": 40486 }, { "epoch": 1.8579688862374375, "grad_norm": 0.47399142384529114, "learning_rate": 3.2834112809982576e-06, "loss": 0.3742, "step": 40487 }, { "epoch": 1.8580147767426918, "grad_norm": 0.4545018970966339, "learning_rate": 3.2831809978434608e-06, "loss": 0.3144, "step": 40488 }, { "epoch": 1.8580606672479463, "grad_norm": 0.48586878180503845, "learning_rate": 3.2829507188170733e-06, "loss": 0.35, "step": 40489 }, { "epoch": 1.8581065577532008, "grad_norm": 0.4599045515060425, "learning_rate": 3.282720443919647e-06, "loss": 0.347, "step": 40490 }, { "epoch": 1.8581524482584553, "grad_norm": 0.49765047430992126, "learning_rate": 3.2824901731517343e-06, "loss": 0.3953, "step": 40491 }, { "epoch": 1.8581983387637098, "grad_norm": 0.5206354260444641, "learning_rate": 3.2822599065138927e-06, "loss": 0.4812, "step": 40492 }, { "epoch": 1.8582442292689643, "grad_norm": 0.4445689618587494, "learning_rate": 3.2820296440066747e-06, "loss": 0.2921, "step": 40493 }, { "epoch": 1.8582901197742188, "grad_norm": 0.47815993428230286, "learning_rate": 3.2817993856306307e-06, "loss": 0.3638, "step": 40494 }, { "epoch": 1.8583360102794733, "grad_norm": 0.42235100269317627, "learning_rate": 3.2815691313863186e-06, "loss": 0.2681, "step": 40495 }, { "epoch": 1.8583819007847278, "grad_norm": 0.42937490344047546, "learning_rate": 3.2813388812742897e-06, "loss": 0.2933, "step": 40496 }, { "epoch": 1.8584277912899823, "grad_norm": 0.47735100984573364, "learning_rate": 3.281108635295098e-06, "loss": 0.379, "step": 40497 }, { "epoch": 1.8584736817952365, "grad_norm": 0.4386392831802368, "learning_rate": 3.280878393449299e-06, "loss": 0.3121, "step": 40498 }, { "epoch": 1.858519572300491, "grad_norm": 0.4631252586841583, "learning_rate": 3.280648155737444e-06, "loss": 0.3436, "step": 40499 }, { "epoch": 1.8585654628057455, "grad_norm": 0.45992547273635864, "learning_rate": 3.2804179221600862e-06, "loss": 0.4004, "step": 40500 }, { "epoch": 1.8586113533109998, "grad_norm": 0.508155345916748, "learning_rate": 3.2801876927177823e-06, "loss": 0.4546, "step": 40501 }, { "epoch": 1.8586572438162543, "grad_norm": 0.4374437928199768, "learning_rate": 3.279957467411084e-06, "loss": 0.3227, "step": 40502 }, { "epoch": 1.8587031343215088, "grad_norm": 0.4607063829898834, "learning_rate": 3.2797272462405456e-06, "loss": 0.3417, "step": 40503 }, { "epoch": 1.8587490248267633, "grad_norm": 0.4352935552597046, "learning_rate": 3.2794970292067186e-06, "loss": 0.297, "step": 40504 }, { "epoch": 1.8587949153320178, "grad_norm": 0.49879348278045654, "learning_rate": 3.2792668163101597e-06, "loss": 0.3855, "step": 40505 }, { "epoch": 1.8588408058372723, "grad_norm": 0.46867433190345764, "learning_rate": 3.279036607551421e-06, "loss": 0.3317, "step": 40506 }, { "epoch": 1.8588866963425268, "grad_norm": 0.4629073441028595, "learning_rate": 3.278806402931054e-06, "loss": 0.3186, "step": 40507 }, { "epoch": 1.8589325868477813, "grad_norm": 0.42354097962379456, "learning_rate": 3.2785762024496166e-06, "loss": 0.2919, "step": 40508 }, { "epoch": 1.8589784773530358, "grad_norm": 0.4578912556171417, "learning_rate": 3.27834600610766e-06, "loss": 0.2997, "step": 40509 }, { "epoch": 1.8590243678582903, "grad_norm": 0.4417024552822113, "learning_rate": 3.2781158139057367e-06, "loss": 0.2944, "step": 40510 }, { "epoch": 1.8590702583635446, "grad_norm": 0.4865308403968811, "learning_rate": 3.2778856258444023e-06, "loss": 0.3874, "step": 40511 }, { "epoch": 1.859116148868799, "grad_norm": 0.4919701814651489, "learning_rate": 3.27765544192421e-06, "loss": 0.4023, "step": 40512 }, { "epoch": 1.8591620393740536, "grad_norm": 0.4876241385936737, "learning_rate": 3.2774252621457105e-06, "loss": 0.3991, "step": 40513 }, { "epoch": 1.8592079298793078, "grad_norm": 0.4662734270095825, "learning_rate": 3.2771950865094614e-06, "loss": 0.3734, "step": 40514 }, { "epoch": 1.8592538203845623, "grad_norm": 0.45262831449508667, "learning_rate": 3.276964915016016e-06, "loss": 0.3714, "step": 40515 }, { "epoch": 1.8592997108898168, "grad_norm": 0.4360681474208832, "learning_rate": 3.2767347476659227e-06, "loss": 0.2844, "step": 40516 }, { "epoch": 1.8593456013950713, "grad_norm": 0.4859558045864105, "learning_rate": 3.276504584459741e-06, "loss": 0.3359, "step": 40517 }, { "epoch": 1.8593914919003258, "grad_norm": 0.5471981167793274, "learning_rate": 3.2762744253980205e-06, "loss": 0.4847, "step": 40518 }, { "epoch": 1.8594373824055803, "grad_norm": 0.4872472286224365, "learning_rate": 3.2760442704813162e-06, "loss": 0.4234, "step": 40519 }, { "epoch": 1.8594832729108348, "grad_norm": 0.48090410232543945, "learning_rate": 3.275814119710182e-06, "loss": 0.3834, "step": 40520 }, { "epoch": 1.8595291634160893, "grad_norm": 0.46259573101997375, "learning_rate": 3.2755839730851707e-06, "loss": 0.3672, "step": 40521 }, { "epoch": 1.8595750539213438, "grad_norm": 0.44553032517433167, "learning_rate": 3.2753538306068335e-06, "loss": 0.3021, "step": 40522 }, { "epoch": 1.859620944426598, "grad_norm": 0.4630652666091919, "learning_rate": 3.275123692275728e-06, "loss": 0.3453, "step": 40523 }, { "epoch": 1.8596668349318526, "grad_norm": 0.48131272196769714, "learning_rate": 3.2748935580924057e-06, "loss": 0.366, "step": 40524 }, { "epoch": 1.859712725437107, "grad_norm": 0.4392104744911194, "learning_rate": 3.2746634280574196e-06, "loss": 0.3137, "step": 40525 }, { "epoch": 1.8597586159423614, "grad_norm": 0.504638135433197, "learning_rate": 3.2744333021713237e-06, "loss": 0.4026, "step": 40526 }, { "epoch": 1.8598045064476159, "grad_norm": 0.47061944007873535, "learning_rate": 3.2742031804346707e-06, "loss": 0.4089, "step": 40527 }, { "epoch": 1.8598503969528704, "grad_norm": 0.51091468334198, "learning_rate": 3.273973062848013e-06, "loss": 0.4545, "step": 40528 }, { "epoch": 1.8598962874581249, "grad_norm": 0.45529428124427795, "learning_rate": 3.273742949411908e-06, "loss": 0.3668, "step": 40529 }, { "epoch": 1.8599421779633794, "grad_norm": 0.4591984450817108, "learning_rate": 3.2735128401269055e-06, "loss": 0.3225, "step": 40530 }, { "epoch": 1.8599880684686338, "grad_norm": 0.4552486836910248, "learning_rate": 3.2732827349935585e-06, "loss": 0.335, "step": 40531 }, { "epoch": 1.8600339589738883, "grad_norm": 0.4579552412033081, "learning_rate": 3.273052634012423e-06, "loss": 0.3444, "step": 40532 }, { "epoch": 1.8600798494791428, "grad_norm": 0.4971858263015747, "learning_rate": 3.2728225371840507e-06, "loss": 0.3721, "step": 40533 }, { "epoch": 1.8601257399843973, "grad_norm": 0.49052736163139343, "learning_rate": 3.272592444508993e-06, "loss": 0.3941, "step": 40534 }, { "epoch": 1.8601716304896518, "grad_norm": 0.48033231496810913, "learning_rate": 3.2723623559878083e-06, "loss": 0.3236, "step": 40535 }, { "epoch": 1.8602175209949061, "grad_norm": 0.47988998889923096, "learning_rate": 3.2721322716210457e-06, "loss": 0.3573, "step": 40536 }, { "epoch": 1.8602634115001606, "grad_norm": 0.48498162627220154, "learning_rate": 3.271902191409261e-06, "loss": 0.3692, "step": 40537 }, { "epoch": 1.860309302005415, "grad_norm": 0.46444427967071533, "learning_rate": 3.271672115353003e-06, "loss": 0.3292, "step": 40538 }, { "epoch": 1.8603551925106694, "grad_norm": 0.4641346037387848, "learning_rate": 3.2714420434528305e-06, "loss": 0.3411, "step": 40539 }, { "epoch": 1.8604010830159239, "grad_norm": 0.4939061999320984, "learning_rate": 3.2712119757092936e-06, "loss": 0.4078, "step": 40540 }, { "epoch": 1.8604469735211784, "grad_norm": 0.47170281410217285, "learning_rate": 3.2709819121229457e-06, "loss": 0.3402, "step": 40541 }, { "epoch": 1.8604928640264329, "grad_norm": 0.5147426724433899, "learning_rate": 3.2707518526943416e-06, "loss": 0.4621, "step": 40542 }, { "epoch": 1.8605387545316874, "grad_norm": 0.45042237639427185, "learning_rate": 3.270521797424034e-06, "loss": 0.3181, "step": 40543 }, { "epoch": 1.8605846450369419, "grad_norm": 0.43745186924934387, "learning_rate": 3.2702917463125727e-06, "loss": 0.2944, "step": 40544 }, { "epoch": 1.8606305355421964, "grad_norm": 0.4619556963443756, "learning_rate": 3.2700616993605166e-06, "loss": 0.3252, "step": 40545 }, { "epoch": 1.8606764260474509, "grad_norm": 0.5352588295936584, "learning_rate": 3.269831656568416e-06, "loss": 0.3448, "step": 40546 }, { "epoch": 1.8607223165527054, "grad_norm": 0.4807080626487732, "learning_rate": 3.269601617936823e-06, "loss": 0.3714, "step": 40547 }, { "epoch": 1.8607682070579599, "grad_norm": 0.45593440532684326, "learning_rate": 3.2693715834662933e-06, "loss": 0.3477, "step": 40548 }, { "epoch": 1.8608140975632141, "grad_norm": 0.4112125337123871, "learning_rate": 3.269141553157378e-06, "loss": 0.281, "step": 40549 }, { "epoch": 1.8608599880684686, "grad_norm": 0.5036879181861877, "learning_rate": 3.2689115270106296e-06, "loss": 0.3991, "step": 40550 }, { "epoch": 1.8609058785737231, "grad_norm": 0.44279763102531433, "learning_rate": 3.2686815050266047e-06, "loss": 0.2933, "step": 40551 }, { "epoch": 1.8609517690789774, "grad_norm": 0.5018121004104614, "learning_rate": 3.2684514872058536e-06, "loss": 0.3782, "step": 40552 }, { "epoch": 1.860997659584232, "grad_norm": 0.5405827164649963, "learning_rate": 3.2682214735489305e-06, "loss": 0.4422, "step": 40553 }, { "epoch": 1.8610435500894864, "grad_norm": 0.4483787417411804, "learning_rate": 3.267991464056388e-06, "loss": 0.2834, "step": 40554 }, { "epoch": 1.861089440594741, "grad_norm": 0.4710089862346649, "learning_rate": 3.26776145872878e-06, "loss": 0.3488, "step": 40555 }, { "epoch": 1.8611353310999954, "grad_norm": 0.5031662583351135, "learning_rate": 3.2675314575666567e-06, "loss": 0.382, "step": 40556 }, { "epoch": 1.86118122160525, "grad_norm": 0.4852766692638397, "learning_rate": 3.2673014605705756e-06, "loss": 0.4361, "step": 40557 }, { "epoch": 1.8612271121105044, "grad_norm": 0.44936904311180115, "learning_rate": 3.2670714677410877e-06, "loss": 0.3462, "step": 40558 }, { "epoch": 1.861273002615759, "grad_norm": 0.4642120897769928, "learning_rate": 3.266841479078745e-06, "loss": 0.3416, "step": 40559 }, { "epoch": 1.8613188931210134, "grad_norm": 0.4379352927207947, "learning_rate": 3.2666114945841022e-06, "loss": 0.2628, "step": 40560 }, { "epoch": 1.8613647836262677, "grad_norm": 0.47870543599128723, "learning_rate": 3.266381514257712e-06, "loss": 0.3578, "step": 40561 }, { "epoch": 1.8614106741315222, "grad_norm": 0.5182487964630127, "learning_rate": 3.266151538100125e-06, "loss": 0.4522, "step": 40562 }, { "epoch": 1.8614565646367767, "grad_norm": 0.4527634382247925, "learning_rate": 3.2659215661118994e-06, "loss": 0.3019, "step": 40563 }, { "epoch": 1.8615024551420312, "grad_norm": 0.44533082842826843, "learning_rate": 3.2656915982935843e-06, "loss": 0.3166, "step": 40564 }, { "epoch": 1.8615483456472854, "grad_norm": 0.48734965920448303, "learning_rate": 3.265461634645731e-06, "loss": 0.3316, "step": 40565 }, { "epoch": 1.86159423615254, "grad_norm": 0.46406134963035583, "learning_rate": 3.2652316751688974e-06, "loss": 0.4028, "step": 40566 }, { "epoch": 1.8616401266577944, "grad_norm": 0.41516074538230896, "learning_rate": 3.265001719863634e-06, "loss": 0.2588, "step": 40567 }, { "epoch": 1.861686017163049, "grad_norm": 0.4555928409099579, "learning_rate": 3.264771768730495e-06, "loss": 0.361, "step": 40568 }, { "epoch": 1.8617319076683034, "grad_norm": 0.48160600662231445, "learning_rate": 3.26454182177003e-06, "loss": 0.4037, "step": 40569 }, { "epoch": 1.861777798173558, "grad_norm": 0.616723895072937, "learning_rate": 3.2643118789827953e-06, "loss": 0.3449, "step": 40570 }, { "epoch": 1.8618236886788124, "grad_norm": 0.45792356133461, "learning_rate": 3.2640819403693423e-06, "loss": 0.363, "step": 40571 }, { "epoch": 1.861869579184067, "grad_norm": 0.4561545252799988, "learning_rate": 3.263852005930223e-06, "loss": 0.3254, "step": 40572 }, { "epoch": 1.8619154696893214, "grad_norm": 0.45796236395835876, "learning_rate": 3.2636220756659937e-06, "loss": 0.3573, "step": 40573 }, { "epoch": 1.8619613601945757, "grad_norm": 0.4802989065647125, "learning_rate": 3.2633921495772053e-06, "loss": 0.3506, "step": 40574 }, { "epoch": 1.8620072506998302, "grad_norm": 0.6010094285011292, "learning_rate": 3.263162227664409e-06, "loss": 0.5173, "step": 40575 }, { "epoch": 1.8620531412050847, "grad_norm": 0.44536447525024414, "learning_rate": 3.2629323099281617e-06, "loss": 0.2904, "step": 40576 }, { "epoch": 1.862099031710339, "grad_norm": 0.44536781311035156, "learning_rate": 3.2627023963690128e-06, "loss": 0.2703, "step": 40577 }, { "epoch": 1.8621449222155935, "grad_norm": 0.4282838702201843, "learning_rate": 3.2624724869875147e-06, "loss": 0.2965, "step": 40578 }, { "epoch": 1.862190812720848, "grad_norm": 0.44729307293891907, "learning_rate": 3.2622425817842242e-06, "loss": 0.3085, "step": 40579 }, { "epoch": 1.8622367032261025, "grad_norm": 0.47040292620658875, "learning_rate": 3.262012680759692e-06, "loss": 0.3117, "step": 40580 }, { "epoch": 1.862282593731357, "grad_norm": 0.48567116260528564, "learning_rate": 3.261782783914469e-06, "loss": 0.3847, "step": 40581 }, { "epoch": 1.8623284842366115, "grad_norm": 0.4573099613189697, "learning_rate": 3.2615528912491113e-06, "loss": 0.3193, "step": 40582 }, { "epoch": 1.862374374741866, "grad_norm": 0.4340274930000305, "learning_rate": 3.2613230027641696e-06, "loss": 0.2949, "step": 40583 }, { "epoch": 1.8624202652471205, "grad_norm": 0.4891645610332489, "learning_rate": 3.2610931184601958e-06, "loss": 0.37, "step": 40584 }, { "epoch": 1.862466155752375, "grad_norm": 0.47471436858177185, "learning_rate": 3.2608632383377474e-06, "loss": 0.3567, "step": 40585 }, { "epoch": 1.8625120462576294, "grad_norm": 0.49920758605003357, "learning_rate": 3.260633362397373e-06, "loss": 0.378, "step": 40586 }, { "epoch": 1.8625579367628837, "grad_norm": 0.48607760667800903, "learning_rate": 3.260403490639624e-06, "loss": 0.4201, "step": 40587 }, { "epoch": 1.8626038272681382, "grad_norm": 0.49308910965919495, "learning_rate": 3.2601736230650583e-06, "loss": 0.3755, "step": 40588 }, { "epoch": 1.8626497177733927, "grad_norm": 0.4899759888648987, "learning_rate": 3.2599437596742255e-06, "loss": 0.3856, "step": 40589 }, { "epoch": 1.862695608278647, "grad_norm": 0.45371419191360474, "learning_rate": 3.259713900467678e-06, "loss": 0.319, "step": 40590 }, { "epoch": 1.8627414987839015, "grad_norm": 0.44646432995796204, "learning_rate": 3.2594840454459695e-06, "loss": 0.3402, "step": 40591 }, { "epoch": 1.862787389289156, "grad_norm": 0.46981915831565857, "learning_rate": 3.2592541946096536e-06, "loss": 0.3567, "step": 40592 }, { "epoch": 1.8628332797944105, "grad_norm": 0.4852912724018097, "learning_rate": 3.259024347959279e-06, "loss": 0.3918, "step": 40593 }, { "epoch": 1.862879170299665, "grad_norm": 0.47390133142471313, "learning_rate": 3.258794505495404e-06, "loss": 0.3559, "step": 40594 }, { "epoch": 1.8629250608049195, "grad_norm": 0.45345208048820496, "learning_rate": 3.258564667218579e-06, "loss": 0.3533, "step": 40595 }, { "epoch": 1.862970951310174, "grad_norm": 0.4535030126571655, "learning_rate": 3.2583348331293545e-06, "loss": 0.3477, "step": 40596 }, { "epoch": 1.8630168418154285, "grad_norm": 0.4521172046661377, "learning_rate": 3.2581050032282866e-06, "loss": 0.333, "step": 40597 }, { "epoch": 1.863062732320683, "grad_norm": 0.43522173166275024, "learning_rate": 3.2578751775159267e-06, "loss": 0.3031, "step": 40598 }, { "epoch": 1.8631086228259375, "grad_norm": 0.4725697636604309, "learning_rate": 3.2576453559928245e-06, "loss": 0.3582, "step": 40599 }, { "epoch": 1.8631545133311918, "grad_norm": 0.47011974453926086, "learning_rate": 3.2574155386595373e-06, "loss": 0.3827, "step": 40600 }, { "epoch": 1.8632004038364463, "grad_norm": 0.45221254229545593, "learning_rate": 3.257185725516616e-06, "loss": 0.3121, "step": 40601 }, { "epoch": 1.8632462943417007, "grad_norm": 0.4872588813304901, "learning_rate": 3.2569559165646136e-06, "loss": 0.3795, "step": 40602 }, { "epoch": 1.863292184846955, "grad_norm": 0.4899713397026062, "learning_rate": 3.2567261118040804e-06, "loss": 0.4439, "step": 40603 }, { "epoch": 1.8633380753522095, "grad_norm": 0.4765571057796478, "learning_rate": 3.2564963112355717e-06, "loss": 0.3656, "step": 40604 }, { "epoch": 1.863383965857464, "grad_norm": 0.4983557164669037, "learning_rate": 3.256266514859639e-06, "loss": 0.3777, "step": 40605 }, { "epoch": 1.8634298563627185, "grad_norm": 0.45017385482788086, "learning_rate": 3.256036722676833e-06, "loss": 0.3547, "step": 40606 }, { "epoch": 1.863475746867973, "grad_norm": 0.4200134575366974, "learning_rate": 3.2558069346877117e-06, "loss": 0.2997, "step": 40607 }, { "epoch": 1.8635216373732275, "grad_norm": 0.45535385608673096, "learning_rate": 3.255577150892823e-06, "loss": 0.2901, "step": 40608 }, { "epoch": 1.863567527878482, "grad_norm": 0.4469875395298004, "learning_rate": 3.2553473712927185e-06, "loss": 0.3252, "step": 40609 }, { "epoch": 1.8636134183837365, "grad_norm": 0.47536611557006836, "learning_rate": 3.2551175958879556e-06, "loss": 0.3495, "step": 40610 }, { "epoch": 1.863659308888991, "grad_norm": 0.4794268310070038, "learning_rate": 3.254887824679084e-06, "loss": 0.3242, "step": 40611 }, { "epoch": 1.8637051993942453, "grad_norm": 0.45534518361091614, "learning_rate": 3.254658057666654e-06, "loss": 0.3257, "step": 40612 }, { "epoch": 1.8637510898994998, "grad_norm": 0.5086663961410522, "learning_rate": 3.254428294851223e-06, "loss": 0.4096, "step": 40613 }, { "epoch": 1.8637969804047543, "grad_norm": 0.4981512725353241, "learning_rate": 3.254198536233341e-06, "loss": 0.3848, "step": 40614 }, { "epoch": 1.8638428709100086, "grad_norm": 0.46958738565444946, "learning_rate": 3.253968781813558e-06, "loss": 0.3654, "step": 40615 }, { "epoch": 1.863888761415263, "grad_norm": 0.4885769188404083, "learning_rate": 3.253739031592431e-06, "loss": 0.3853, "step": 40616 }, { "epoch": 1.8639346519205175, "grad_norm": 0.5148939490318298, "learning_rate": 3.253509285570511e-06, "loss": 0.3992, "step": 40617 }, { "epoch": 1.863980542425772, "grad_norm": 0.4883454442024231, "learning_rate": 3.253279543748349e-06, "loss": 0.3275, "step": 40618 }, { "epoch": 1.8640264329310265, "grad_norm": 0.4816766381263733, "learning_rate": 3.2530498061264994e-06, "loss": 0.3657, "step": 40619 }, { "epoch": 1.864072323436281, "grad_norm": 0.46653613448143005, "learning_rate": 3.2528200727055128e-06, "loss": 0.3817, "step": 40620 }, { "epoch": 1.8641182139415355, "grad_norm": 0.44647395610809326, "learning_rate": 3.2525903434859413e-06, "loss": 0.3254, "step": 40621 }, { "epoch": 1.86416410444679, "grad_norm": 0.4859084486961365, "learning_rate": 3.25236061846834e-06, "loss": 0.4367, "step": 40622 }, { "epoch": 1.8642099949520445, "grad_norm": 0.48087742924690247, "learning_rate": 3.2521308976532605e-06, "loss": 0.393, "step": 40623 }, { "epoch": 1.864255885457299, "grad_norm": 0.5647234320640564, "learning_rate": 3.251901181041253e-06, "loss": 0.4576, "step": 40624 }, { "epoch": 1.8643017759625533, "grad_norm": 0.4743705987930298, "learning_rate": 3.2516714686328725e-06, "loss": 0.377, "step": 40625 }, { "epoch": 1.8643476664678078, "grad_norm": 0.46758297085762024, "learning_rate": 3.251441760428671e-06, "loss": 0.3771, "step": 40626 }, { "epoch": 1.8643935569730623, "grad_norm": 0.4738147258758545, "learning_rate": 3.251212056429197e-06, "loss": 0.3869, "step": 40627 }, { "epoch": 1.8644394474783166, "grad_norm": 0.4917566776275635, "learning_rate": 3.250982356635009e-06, "loss": 0.4008, "step": 40628 }, { "epoch": 1.864485337983571, "grad_norm": 0.4821847379207611, "learning_rate": 3.250752661046657e-06, "loss": 0.3543, "step": 40629 }, { "epoch": 1.8645312284888256, "grad_norm": 0.4127359688282013, "learning_rate": 3.25052296966469e-06, "loss": 0.2379, "step": 40630 }, { "epoch": 1.86457711899408, "grad_norm": 0.44102901220321655, "learning_rate": 3.250293282489665e-06, "loss": 0.2933, "step": 40631 }, { "epoch": 1.8646230094993346, "grad_norm": 0.4247455894947052, "learning_rate": 3.250063599522133e-06, "loss": 0.2902, "step": 40632 }, { "epoch": 1.864668900004589, "grad_norm": 0.4821682274341583, "learning_rate": 3.249833920762644e-06, "loss": 0.371, "step": 40633 }, { "epoch": 1.8647147905098436, "grad_norm": 0.5072263479232788, "learning_rate": 3.2496042462117533e-06, "loss": 0.3563, "step": 40634 }, { "epoch": 1.864760681015098, "grad_norm": 0.4682324230670929, "learning_rate": 3.2493745758700116e-06, "loss": 0.3776, "step": 40635 }, { "epoch": 1.8648065715203526, "grad_norm": 0.47768300771713257, "learning_rate": 3.2491449097379725e-06, "loss": 0.4293, "step": 40636 }, { "epoch": 1.864852462025607, "grad_norm": 0.49399155378341675, "learning_rate": 3.2489152478161843e-06, "loss": 0.3825, "step": 40637 }, { "epoch": 1.8648983525308613, "grad_norm": 0.48155924677848816, "learning_rate": 3.248685590105205e-06, "loss": 0.3794, "step": 40638 }, { "epoch": 1.8649442430361158, "grad_norm": 0.43247830867767334, "learning_rate": 3.2484559366055834e-06, "loss": 0.3257, "step": 40639 }, { "epoch": 1.8649901335413703, "grad_norm": 0.4682311415672302, "learning_rate": 3.2482262873178714e-06, "loss": 0.3321, "step": 40640 }, { "epoch": 1.8650360240466246, "grad_norm": 0.5370655655860901, "learning_rate": 3.247996642242624e-06, "loss": 0.3883, "step": 40641 }, { "epoch": 1.865081914551879, "grad_norm": 0.48995935916900635, "learning_rate": 3.247767001380392e-06, "loss": 0.4086, "step": 40642 }, { "epoch": 1.8651278050571336, "grad_norm": 0.4899439811706543, "learning_rate": 3.2475373647317245e-06, "loss": 0.3775, "step": 40643 }, { "epoch": 1.865173695562388, "grad_norm": 0.4759605824947357, "learning_rate": 3.2473077322971793e-06, "loss": 0.3963, "step": 40644 }, { "epoch": 1.8652195860676426, "grad_norm": 0.4989371597766876, "learning_rate": 3.247078104077305e-06, "loss": 0.4227, "step": 40645 }, { "epoch": 1.865265476572897, "grad_norm": 0.4951787292957306, "learning_rate": 3.246848480072654e-06, "loss": 0.3427, "step": 40646 }, { "epoch": 1.8653113670781516, "grad_norm": 0.5179735422134399, "learning_rate": 3.24661886028378e-06, "loss": 0.3529, "step": 40647 }, { "epoch": 1.865357257583406, "grad_norm": 0.42660200595855713, "learning_rate": 3.2463892447112344e-06, "loss": 0.2982, "step": 40648 }, { "epoch": 1.8654031480886606, "grad_norm": 0.4724346995353699, "learning_rate": 3.2461596333555666e-06, "loss": 0.3793, "step": 40649 }, { "epoch": 1.8654490385939149, "grad_norm": 0.4424279034137726, "learning_rate": 3.2459300262173342e-06, "loss": 0.3076, "step": 40650 }, { "epoch": 1.8654949290991694, "grad_norm": 0.4944688677787781, "learning_rate": 3.2457004232970875e-06, "loss": 0.3178, "step": 40651 }, { "epoch": 1.8655408196044239, "grad_norm": 0.4386030435562134, "learning_rate": 3.245470824595374e-06, "loss": 0.3235, "step": 40652 }, { "epoch": 1.8655867101096784, "grad_norm": 0.4818091094493866, "learning_rate": 3.245241230112752e-06, "loss": 0.3868, "step": 40653 }, { "epoch": 1.8656326006149326, "grad_norm": 0.508457362651825, "learning_rate": 3.2450116398497712e-06, "loss": 0.4103, "step": 40654 }, { "epoch": 1.8656784911201871, "grad_norm": 0.4548834562301636, "learning_rate": 3.2447820538069817e-06, "loss": 0.3171, "step": 40655 }, { "epoch": 1.8657243816254416, "grad_norm": 0.47062554955482483, "learning_rate": 3.2445524719849387e-06, "loss": 0.3422, "step": 40656 }, { "epoch": 1.8657702721306961, "grad_norm": 0.4630744159221649, "learning_rate": 3.2443228943841932e-06, "loss": 0.3536, "step": 40657 }, { "epoch": 1.8658161626359506, "grad_norm": 0.47432541847229004, "learning_rate": 3.244093321005295e-06, "loss": 0.3958, "step": 40658 }, { "epoch": 1.8658620531412051, "grad_norm": 0.4837040603160858, "learning_rate": 3.2438637518487997e-06, "loss": 0.3682, "step": 40659 }, { "epoch": 1.8659079436464596, "grad_norm": 0.45332592725753784, "learning_rate": 3.2436341869152584e-06, "loss": 0.3025, "step": 40660 }, { "epoch": 1.8659538341517141, "grad_norm": 0.4613487422466278, "learning_rate": 3.243404626205221e-06, "loss": 0.3473, "step": 40661 }, { "epoch": 1.8659997246569686, "grad_norm": 0.5561777353286743, "learning_rate": 3.2431750697192426e-06, "loss": 0.5404, "step": 40662 }, { "epoch": 1.866045615162223, "grad_norm": 0.5318770408630371, "learning_rate": 3.242945517457874e-06, "loss": 0.4351, "step": 40663 }, { "epoch": 1.8660915056674774, "grad_norm": 0.5948088765144348, "learning_rate": 3.2427159694216637e-06, "loss": 0.4462, "step": 40664 }, { "epoch": 1.8661373961727319, "grad_norm": 0.4870043992996216, "learning_rate": 3.24248642561117e-06, "loss": 0.3482, "step": 40665 }, { "epoch": 1.8661832866779862, "grad_norm": 0.4509469270706177, "learning_rate": 3.242256886026941e-06, "loss": 0.3209, "step": 40666 }, { "epoch": 1.8662291771832407, "grad_norm": 0.4758235216140747, "learning_rate": 3.24202735066953e-06, "loss": 0.3175, "step": 40667 }, { "epoch": 1.8662750676884952, "grad_norm": 0.4881424605846405, "learning_rate": 3.2417978195394874e-06, "loss": 0.3469, "step": 40668 }, { "epoch": 1.8663209581937497, "grad_norm": 0.4640918970108032, "learning_rate": 3.2415682926373667e-06, "loss": 0.3168, "step": 40669 }, { "epoch": 1.8663668486990042, "grad_norm": 0.48344627022743225, "learning_rate": 3.241338769963719e-06, "loss": 0.3809, "step": 40670 }, { "epoch": 1.8664127392042587, "grad_norm": 0.4823472499847412, "learning_rate": 3.2411092515190952e-06, "loss": 0.3963, "step": 40671 }, { "epoch": 1.8664586297095132, "grad_norm": 0.48238396644592285, "learning_rate": 3.24087973730405e-06, "loss": 0.3965, "step": 40672 }, { "epoch": 1.8665045202147676, "grad_norm": 0.481507271528244, "learning_rate": 3.2406502273191355e-06, "loss": 0.3515, "step": 40673 }, { "epoch": 1.8665504107200221, "grad_norm": 0.4900887906551361, "learning_rate": 3.2404207215648986e-06, "loss": 0.415, "step": 40674 }, { "epoch": 1.8665963012252766, "grad_norm": 0.4664182662963867, "learning_rate": 3.240191220041896e-06, "loss": 0.3403, "step": 40675 }, { "epoch": 1.866642191730531, "grad_norm": 0.46869534254074097, "learning_rate": 3.2399617227506793e-06, "loss": 0.3513, "step": 40676 }, { "epoch": 1.8666880822357854, "grad_norm": 0.4409872889518738, "learning_rate": 3.2397322296917972e-06, "loss": 0.2962, "step": 40677 }, { "epoch": 1.86673397274104, "grad_norm": 0.4614500403404236, "learning_rate": 3.2395027408658047e-06, "loss": 0.3405, "step": 40678 }, { "epoch": 1.8667798632462942, "grad_norm": 0.4410739243030548, "learning_rate": 3.2392732562732533e-06, "loss": 0.3003, "step": 40679 }, { "epoch": 1.8668257537515487, "grad_norm": 0.4981977045536041, "learning_rate": 3.239043775914691e-06, "loss": 0.4197, "step": 40680 }, { "epoch": 1.8668716442568032, "grad_norm": 0.45733121037483215, "learning_rate": 3.2388142997906745e-06, "loss": 0.317, "step": 40681 }, { "epoch": 1.8669175347620577, "grad_norm": 0.4625927209854126, "learning_rate": 3.2385848279017546e-06, "loss": 0.3599, "step": 40682 }, { "epoch": 1.8669634252673122, "grad_norm": 0.4650525152683258, "learning_rate": 3.2383553602484806e-06, "loss": 0.345, "step": 40683 }, { "epoch": 1.8670093157725667, "grad_norm": 0.45474961400032043, "learning_rate": 3.2381258968314076e-06, "loss": 0.3273, "step": 40684 }, { "epoch": 1.8670552062778212, "grad_norm": 0.4604373872280121, "learning_rate": 3.237896437651085e-06, "loss": 0.3825, "step": 40685 }, { "epoch": 1.8671010967830757, "grad_norm": 0.48153409361839294, "learning_rate": 3.2376669827080643e-06, "loss": 0.3252, "step": 40686 }, { "epoch": 1.8671469872883302, "grad_norm": 0.454925537109375, "learning_rate": 3.2374375320028993e-06, "loss": 0.3119, "step": 40687 }, { "epoch": 1.8671928777935847, "grad_norm": 0.4897676706314087, "learning_rate": 3.237208085536141e-06, "loss": 0.3642, "step": 40688 }, { "epoch": 1.867238768298839, "grad_norm": 0.4363034963607788, "learning_rate": 3.2369786433083406e-06, "loss": 0.2901, "step": 40689 }, { "epoch": 1.8672846588040934, "grad_norm": 0.4592253267765045, "learning_rate": 3.23674920532005e-06, "loss": 0.3765, "step": 40690 }, { "epoch": 1.867330549309348, "grad_norm": 0.4582436978816986, "learning_rate": 3.2365197715718223e-06, "loss": 0.3372, "step": 40691 }, { "epoch": 1.8673764398146022, "grad_norm": 0.4628022611141205, "learning_rate": 3.236290342064205e-06, "loss": 0.3311, "step": 40692 }, { "epoch": 1.8674223303198567, "grad_norm": 0.4433578550815582, "learning_rate": 3.2360609167977555e-06, "loss": 0.335, "step": 40693 }, { "epoch": 1.8674682208251112, "grad_norm": 0.461154967546463, "learning_rate": 3.2358314957730218e-06, "loss": 0.3267, "step": 40694 }, { "epoch": 1.8675141113303657, "grad_norm": 0.5114935040473938, "learning_rate": 3.235602078990556e-06, "loss": 0.4301, "step": 40695 }, { "epoch": 1.8675600018356202, "grad_norm": 0.4680079221725464, "learning_rate": 3.235372666450912e-06, "loss": 0.3603, "step": 40696 }, { "epoch": 1.8676058923408747, "grad_norm": 0.3999522626399994, "learning_rate": 3.235143258154639e-06, "loss": 0.2853, "step": 40697 }, { "epoch": 1.8676517828461292, "grad_norm": 0.4605233669281006, "learning_rate": 3.2349138541022896e-06, "loss": 0.3368, "step": 40698 }, { "epoch": 1.8676976733513837, "grad_norm": 0.41906335949897766, "learning_rate": 3.2346844542944154e-06, "loss": 0.2735, "step": 40699 }, { "epoch": 1.8677435638566382, "grad_norm": 0.48962685465812683, "learning_rate": 3.2344550587315683e-06, "loss": 0.3979, "step": 40700 }, { "epoch": 1.8677894543618925, "grad_norm": 0.4788654148578644, "learning_rate": 3.234225667414299e-06, "loss": 0.3561, "step": 40701 }, { "epoch": 1.867835344867147, "grad_norm": 0.4790509343147278, "learning_rate": 3.233996280343158e-06, "loss": 0.3845, "step": 40702 }, { "epoch": 1.8678812353724015, "grad_norm": 0.46765217185020447, "learning_rate": 3.2337668975187005e-06, "loss": 0.3424, "step": 40703 }, { "epoch": 1.8679271258776557, "grad_norm": 0.4677450954914093, "learning_rate": 3.2335375189414763e-06, "loss": 0.323, "step": 40704 }, { "epoch": 1.8679730163829102, "grad_norm": 0.44587984681129456, "learning_rate": 3.233308144612036e-06, "loss": 0.2896, "step": 40705 }, { "epoch": 1.8680189068881647, "grad_norm": 0.4748303294181824, "learning_rate": 3.2330787745309324e-06, "loss": 0.2968, "step": 40706 }, { "epoch": 1.8680647973934192, "grad_norm": 0.4378405809402466, "learning_rate": 3.2328494086987173e-06, "loss": 0.2884, "step": 40707 }, { "epoch": 1.8681106878986737, "grad_norm": 0.46037033200263977, "learning_rate": 3.2326200471159395e-06, "loss": 0.3358, "step": 40708 }, { "epoch": 1.8681565784039282, "grad_norm": 0.4521106779575348, "learning_rate": 3.2323906897831546e-06, "loss": 0.3252, "step": 40709 }, { "epoch": 1.8682024689091827, "grad_norm": 0.49802979826927185, "learning_rate": 3.232161336700912e-06, "loss": 0.3849, "step": 40710 }, { "epoch": 1.8682483594144372, "grad_norm": 0.48265233635902405, "learning_rate": 3.231931987869763e-06, "loss": 0.3574, "step": 40711 }, { "epoch": 1.8682942499196917, "grad_norm": 0.47643181681632996, "learning_rate": 3.23170264329026e-06, "loss": 0.35, "step": 40712 }, { "epoch": 1.8683401404249462, "grad_norm": 0.46723902225494385, "learning_rate": 3.2314733029629543e-06, "loss": 0.3498, "step": 40713 }, { "epoch": 1.8683860309302005, "grad_norm": 0.5109745860099792, "learning_rate": 3.2312439668883954e-06, "loss": 0.4404, "step": 40714 }, { "epoch": 1.868431921435455, "grad_norm": 0.4574137330055237, "learning_rate": 3.231014635067138e-06, "loss": 0.3384, "step": 40715 }, { "epoch": 1.8684778119407095, "grad_norm": 0.5264354944229126, "learning_rate": 3.2307853074997327e-06, "loss": 0.4929, "step": 40716 }, { "epoch": 1.8685237024459638, "grad_norm": 0.4549841582775116, "learning_rate": 3.2305559841867284e-06, "loss": 0.317, "step": 40717 }, { "epoch": 1.8685695929512183, "grad_norm": 0.5012626051902771, "learning_rate": 3.2303266651286803e-06, "loss": 0.3953, "step": 40718 }, { "epoch": 1.8686154834564728, "grad_norm": 0.46170321106910706, "learning_rate": 3.230097350326138e-06, "loss": 0.3788, "step": 40719 }, { "epoch": 1.8686613739617273, "grad_norm": 0.48067739605903625, "learning_rate": 3.2298680397796523e-06, "loss": 0.4375, "step": 40720 }, { "epoch": 1.8687072644669818, "grad_norm": 0.4140966236591339, "learning_rate": 3.2296387334897754e-06, "loss": 0.2683, "step": 40721 }, { "epoch": 1.8687531549722363, "grad_norm": 0.4758281707763672, "learning_rate": 3.229409431457059e-06, "loss": 0.3765, "step": 40722 }, { "epoch": 1.8687990454774908, "grad_norm": 0.4974440336227417, "learning_rate": 3.2291801336820523e-06, "loss": 0.4189, "step": 40723 }, { "epoch": 1.8688449359827453, "grad_norm": 0.4582734704017639, "learning_rate": 3.2289508401653102e-06, "loss": 0.3569, "step": 40724 }, { "epoch": 1.8688908264879998, "grad_norm": 0.44212210178375244, "learning_rate": 3.228721550907383e-06, "loss": 0.3011, "step": 40725 }, { "epoch": 1.8689367169932543, "grad_norm": 0.46050137281417847, "learning_rate": 3.2284922659088192e-06, "loss": 0.3383, "step": 40726 }, { "epoch": 1.8689826074985085, "grad_norm": 0.46425989270210266, "learning_rate": 3.228262985170174e-06, "loss": 0.3399, "step": 40727 }, { "epoch": 1.869028498003763, "grad_norm": 0.4565785527229309, "learning_rate": 3.2280337086919977e-06, "loss": 0.3678, "step": 40728 }, { "epoch": 1.8690743885090175, "grad_norm": 0.46119794249534607, "learning_rate": 3.2278044364748384e-06, "loss": 0.3426, "step": 40729 }, { "epoch": 1.8691202790142718, "grad_norm": 0.4771638512611389, "learning_rate": 3.2275751685192523e-06, "loss": 0.3586, "step": 40730 }, { "epoch": 1.8691661695195263, "grad_norm": 0.4892936050891876, "learning_rate": 3.2273459048257894e-06, "loss": 0.3837, "step": 40731 }, { "epoch": 1.8692120600247808, "grad_norm": 0.43033915758132935, "learning_rate": 3.2271166453949987e-06, "loss": 0.3314, "step": 40732 }, { "epoch": 1.8692579505300353, "grad_norm": 0.44673073291778564, "learning_rate": 3.2268873902274328e-06, "loss": 0.2924, "step": 40733 }, { "epoch": 1.8693038410352898, "grad_norm": 0.4479477107524872, "learning_rate": 3.226658139323644e-06, "loss": 0.318, "step": 40734 }, { "epoch": 1.8693497315405443, "grad_norm": 0.5080916285514832, "learning_rate": 3.2264288926841823e-06, "loss": 0.355, "step": 40735 }, { "epoch": 1.8693956220457988, "grad_norm": 0.49496474862098694, "learning_rate": 3.2261996503095977e-06, "loss": 0.3956, "step": 40736 }, { "epoch": 1.8694415125510533, "grad_norm": 0.5329535007476807, "learning_rate": 3.225970412200445e-06, "loss": 0.4474, "step": 40737 }, { "epoch": 1.8694874030563078, "grad_norm": 0.46709516644477844, "learning_rate": 3.2257411783572734e-06, "loss": 0.3553, "step": 40738 }, { "epoch": 1.869533293561562, "grad_norm": 0.4768551290035248, "learning_rate": 3.225511948780634e-06, "loss": 0.3523, "step": 40739 }, { "epoch": 1.8695791840668166, "grad_norm": 0.4548031985759735, "learning_rate": 3.2252827234710786e-06, "loss": 0.3476, "step": 40740 }, { "epoch": 1.869625074572071, "grad_norm": 0.48026639223098755, "learning_rate": 3.2250535024291585e-06, "loss": 0.4007, "step": 40741 }, { "epoch": 1.8696709650773256, "grad_norm": 0.4627038538455963, "learning_rate": 3.2248242856554234e-06, "loss": 0.3371, "step": 40742 }, { "epoch": 1.8697168555825798, "grad_norm": 0.5128834247589111, "learning_rate": 3.2245950731504265e-06, "loss": 0.4841, "step": 40743 }, { "epoch": 1.8697627460878343, "grad_norm": 0.49921223521232605, "learning_rate": 3.224365864914718e-06, "loss": 0.3757, "step": 40744 }, { "epoch": 1.8698086365930888, "grad_norm": 0.49540871381759644, "learning_rate": 3.2241366609488478e-06, "loss": 0.3381, "step": 40745 }, { "epoch": 1.8698545270983433, "grad_norm": 0.5509725213050842, "learning_rate": 3.22390746125337e-06, "loss": 0.4604, "step": 40746 }, { "epoch": 1.8699004176035978, "grad_norm": 0.4361392557621002, "learning_rate": 3.2236782658288344e-06, "loss": 0.3221, "step": 40747 }, { "epoch": 1.8699463081088523, "grad_norm": 0.4969726800918579, "learning_rate": 3.2234490746757907e-06, "loss": 0.3893, "step": 40748 }, { "epoch": 1.8699921986141068, "grad_norm": 0.4852102994918823, "learning_rate": 3.2232198877947923e-06, "loss": 0.3675, "step": 40749 }, { "epoch": 1.8700380891193613, "grad_norm": 0.5214864611625671, "learning_rate": 3.2229907051863895e-06, "loss": 0.4187, "step": 40750 }, { "epoch": 1.8700839796246158, "grad_norm": 0.5014107823371887, "learning_rate": 3.2227615268511316e-06, "loss": 0.4053, "step": 40751 }, { "epoch": 1.87012987012987, "grad_norm": 0.5290302038192749, "learning_rate": 3.2225323527895725e-06, "loss": 0.3582, "step": 40752 }, { "epoch": 1.8701757606351246, "grad_norm": 0.43171751499176025, "learning_rate": 3.2223031830022623e-06, "loss": 0.3452, "step": 40753 }, { "epoch": 1.870221651140379, "grad_norm": 0.5005162954330444, "learning_rate": 3.222074017489751e-06, "loss": 0.3972, "step": 40754 }, { "epoch": 1.8702675416456334, "grad_norm": 0.4486680030822754, "learning_rate": 3.221844856252592e-06, "loss": 0.3196, "step": 40755 }, { "epoch": 1.8703134321508879, "grad_norm": 0.5238682627677917, "learning_rate": 3.2216156992913337e-06, "loss": 0.4265, "step": 40756 }, { "epoch": 1.8703593226561424, "grad_norm": 0.4576411545276642, "learning_rate": 3.2213865466065276e-06, "loss": 0.3246, "step": 40757 }, { "epoch": 1.8704052131613969, "grad_norm": 0.4694410264492035, "learning_rate": 3.221157398198727e-06, "loss": 0.3393, "step": 40758 }, { "epoch": 1.8704511036666513, "grad_norm": 0.4602578580379486, "learning_rate": 3.2209282540684815e-06, "loss": 0.3472, "step": 40759 }, { "epoch": 1.8704969941719058, "grad_norm": 0.498900830745697, "learning_rate": 3.2206991142163414e-06, "loss": 0.3917, "step": 40760 }, { "epoch": 1.8705428846771603, "grad_norm": 0.5118532180786133, "learning_rate": 3.220469978642859e-06, "loss": 0.3872, "step": 40761 }, { "epoch": 1.8705887751824148, "grad_norm": 0.4716373383998871, "learning_rate": 3.2202408473485847e-06, "loss": 0.3007, "step": 40762 }, { "epoch": 1.8706346656876693, "grad_norm": 0.4739513099193573, "learning_rate": 3.2200117203340675e-06, "loss": 0.3495, "step": 40763 }, { "epoch": 1.8706805561929238, "grad_norm": 0.4786577820777893, "learning_rate": 3.219782597599863e-06, "loss": 0.3826, "step": 40764 }, { "epoch": 1.8707264466981781, "grad_norm": 0.4648756980895996, "learning_rate": 3.219553479146519e-06, "loss": 0.3167, "step": 40765 }, { "epoch": 1.8707723372034326, "grad_norm": 0.4703684151172638, "learning_rate": 3.2193243649745865e-06, "loss": 0.354, "step": 40766 }, { "epoch": 1.870818227708687, "grad_norm": 0.4296219050884247, "learning_rate": 3.2190952550846157e-06, "loss": 0.3106, "step": 40767 }, { "epoch": 1.8708641182139414, "grad_norm": 0.4723515808582306, "learning_rate": 3.21886614947716e-06, "loss": 0.3527, "step": 40768 }, { "epoch": 1.8709100087191959, "grad_norm": 0.49085506796836853, "learning_rate": 3.2186370481527697e-06, "loss": 0.376, "step": 40769 }, { "epoch": 1.8709558992244504, "grad_norm": 0.4819372594356537, "learning_rate": 3.218407951111993e-06, "loss": 0.3669, "step": 40770 }, { "epoch": 1.8710017897297049, "grad_norm": 0.45792436599731445, "learning_rate": 3.2181788583553847e-06, "loss": 0.3404, "step": 40771 }, { "epoch": 1.8710476802349594, "grad_norm": 0.45170557498931885, "learning_rate": 3.2179497698834936e-06, "loss": 0.3165, "step": 40772 }, { "epoch": 1.8710935707402139, "grad_norm": 0.4720611274242401, "learning_rate": 3.217720685696869e-06, "loss": 0.3269, "step": 40773 }, { "epoch": 1.8711394612454684, "grad_norm": 0.5174765586853027, "learning_rate": 3.2174916057960657e-06, "loss": 0.431, "step": 40774 }, { "epoch": 1.8711853517507229, "grad_norm": 0.43505552411079407, "learning_rate": 3.217262530181633e-06, "loss": 0.2947, "step": 40775 }, { "epoch": 1.8712312422559774, "grad_norm": 0.49187368154525757, "learning_rate": 3.2170334588541195e-06, "loss": 0.397, "step": 40776 }, { "epoch": 1.8712771327612319, "grad_norm": 0.45217183232307434, "learning_rate": 3.216804391814079e-06, "loss": 0.3569, "step": 40777 }, { "epoch": 1.8713230232664861, "grad_norm": 0.515798807144165, "learning_rate": 3.216575329062061e-06, "loss": 0.3655, "step": 40778 }, { "epoch": 1.8713689137717406, "grad_norm": 0.48925110697746277, "learning_rate": 3.2163462705986146e-06, "loss": 0.3839, "step": 40779 }, { "epoch": 1.8714148042769951, "grad_norm": 0.4585813283920288, "learning_rate": 3.2161172164242947e-06, "loss": 0.3579, "step": 40780 }, { "epoch": 1.8714606947822494, "grad_norm": 0.4877050817012787, "learning_rate": 3.21588816653965e-06, "loss": 0.4291, "step": 40781 }, { "epoch": 1.871506585287504, "grad_norm": 0.45397013425827026, "learning_rate": 3.2156591209452294e-06, "loss": 0.3347, "step": 40782 }, { "epoch": 1.8715524757927584, "grad_norm": 0.470426470041275, "learning_rate": 3.2154300796415873e-06, "loss": 0.3607, "step": 40783 }, { "epoch": 1.871598366298013, "grad_norm": 0.4714231789112091, "learning_rate": 3.2152010426292725e-06, "loss": 0.3864, "step": 40784 }, { "epoch": 1.8716442568032674, "grad_norm": 0.49862444400787354, "learning_rate": 3.2149720099088335e-06, "loss": 0.3903, "step": 40785 }, { "epoch": 1.871690147308522, "grad_norm": 0.48576605319976807, "learning_rate": 3.214742981480826e-06, "loss": 0.341, "step": 40786 }, { "epoch": 1.8717360378137764, "grad_norm": 0.5064970254898071, "learning_rate": 3.2145139573457982e-06, "loss": 0.4283, "step": 40787 }, { "epoch": 1.871781928319031, "grad_norm": 0.46467825770378113, "learning_rate": 3.214284937504298e-06, "loss": 0.3244, "step": 40788 }, { "epoch": 1.8718278188242854, "grad_norm": 0.46187037229537964, "learning_rate": 3.2140559219568816e-06, "loss": 0.3546, "step": 40789 }, { "epoch": 1.8718737093295397, "grad_norm": 0.46945130825042725, "learning_rate": 3.213826910704097e-06, "loss": 0.365, "step": 40790 }, { "epoch": 1.8719195998347942, "grad_norm": 0.44744163751602173, "learning_rate": 3.2135979037464937e-06, "loss": 0.335, "step": 40791 }, { "epoch": 1.8719654903400487, "grad_norm": 0.46984878182411194, "learning_rate": 3.2133689010846247e-06, "loss": 0.3903, "step": 40792 }, { "epoch": 1.872011380845303, "grad_norm": 0.491263747215271, "learning_rate": 3.2131399027190402e-06, "loss": 0.4119, "step": 40793 }, { "epoch": 1.8720572713505574, "grad_norm": 0.49642932415008545, "learning_rate": 3.2129109086502875e-06, "loss": 0.4466, "step": 40794 }, { "epoch": 1.872103161855812, "grad_norm": 0.4380612373352051, "learning_rate": 3.212681918878922e-06, "loss": 0.2798, "step": 40795 }, { "epoch": 1.8721490523610664, "grad_norm": 0.49369558691978455, "learning_rate": 3.212452933405493e-06, "loss": 0.4024, "step": 40796 }, { "epoch": 1.872194942866321, "grad_norm": 0.4712003469467163, "learning_rate": 3.2122239522305497e-06, "loss": 0.3895, "step": 40797 }, { "epoch": 1.8722408333715754, "grad_norm": 0.5141485333442688, "learning_rate": 3.211994975354643e-06, "loss": 0.4379, "step": 40798 }, { "epoch": 1.87228672387683, "grad_norm": 0.47650057077407837, "learning_rate": 3.2117660027783255e-06, "loss": 0.4034, "step": 40799 }, { "epoch": 1.8723326143820844, "grad_norm": 0.48307308554649353, "learning_rate": 3.2115370345021457e-06, "loss": 0.3513, "step": 40800 }, { "epoch": 1.872378504887339, "grad_norm": 0.4398488998413086, "learning_rate": 3.2113080705266532e-06, "loss": 0.3352, "step": 40801 }, { "epoch": 1.8724243953925934, "grad_norm": 0.43941035866737366, "learning_rate": 3.211079110852402e-06, "loss": 0.3154, "step": 40802 }, { "epoch": 1.8724702858978477, "grad_norm": 0.4663524627685547, "learning_rate": 3.2108501554799413e-06, "loss": 0.3112, "step": 40803 }, { "epoch": 1.8725161764031022, "grad_norm": 0.5122995972633362, "learning_rate": 3.2106212044098204e-06, "loss": 0.3462, "step": 40804 }, { "epoch": 1.8725620669083567, "grad_norm": 0.4860347509384155, "learning_rate": 3.2103922576425918e-06, "loss": 0.3854, "step": 40805 }, { "epoch": 1.872607957413611, "grad_norm": 0.4439956843852997, "learning_rate": 3.2101633151788046e-06, "loss": 0.3431, "step": 40806 }, { "epoch": 1.8726538479188655, "grad_norm": 0.4780729413032532, "learning_rate": 3.209934377019007e-06, "loss": 0.3713, "step": 40807 }, { "epoch": 1.87269973842412, "grad_norm": 0.42868635058403015, "learning_rate": 3.2097054431637563e-06, "loss": 0.2993, "step": 40808 }, { "epoch": 1.8727456289293745, "grad_norm": 0.4646168053150177, "learning_rate": 3.2094765136135985e-06, "loss": 0.402, "step": 40809 }, { "epoch": 1.872791519434629, "grad_norm": 0.48864880204200745, "learning_rate": 3.2092475883690814e-06, "loss": 0.3893, "step": 40810 }, { "epoch": 1.8728374099398835, "grad_norm": 0.46659091114997864, "learning_rate": 3.209018667430761e-06, "loss": 0.3604, "step": 40811 }, { "epoch": 1.872883300445138, "grad_norm": 0.4875173270702362, "learning_rate": 3.208789750799186e-06, "loss": 0.3908, "step": 40812 }, { "epoch": 1.8729291909503925, "grad_norm": 0.46357446908950806, "learning_rate": 3.2085608384749052e-06, "loss": 0.3046, "step": 40813 }, { "epoch": 1.872975081455647, "grad_norm": 0.45003461837768555, "learning_rate": 3.2083319304584704e-06, "loss": 0.3257, "step": 40814 }, { "epoch": 1.8730209719609014, "grad_norm": 0.48325875401496887, "learning_rate": 3.2081030267504327e-06, "loss": 0.3697, "step": 40815 }, { "epoch": 1.8730668624661557, "grad_norm": 0.456366628408432, "learning_rate": 3.207874127351339e-06, "loss": 0.3577, "step": 40816 }, { "epoch": 1.8731127529714102, "grad_norm": 0.4352400600910187, "learning_rate": 3.2076452322617452e-06, "loss": 0.2966, "step": 40817 }, { "epoch": 1.8731586434766647, "grad_norm": 0.4380384385585785, "learning_rate": 3.2074163414821984e-06, "loss": 0.3234, "step": 40818 }, { "epoch": 1.873204533981919, "grad_norm": 0.45777013897895813, "learning_rate": 3.2071874550132488e-06, "loss": 0.3723, "step": 40819 }, { "epoch": 1.8732504244871735, "grad_norm": 0.4996108114719391, "learning_rate": 3.2069585728554482e-06, "loss": 0.3907, "step": 40820 }, { "epoch": 1.873296314992428, "grad_norm": 0.4152979552745819, "learning_rate": 3.206729695009346e-06, "loss": 0.2805, "step": 40821 }, { "epoch": 1.8733422054976825, "grad_norm": 0.46005675196647644, "learning_rate": 3.206500821475492e-06, "loss": 0.3365, "step": 40822 }, { "epoch": 1.873388096002937, "grad_norm": 0.453836053609848, "learning_rate": 3.206271952254439e-06, "loss": 0.3421, "step": 40823 }, { "epoch": 1.8734339865081915, "grad_norm": 0.445949912071228, "learning_rate": 3.206043087346735e-06, "loss": 0.2786, "step": 40824 }, { "epoch": 1.873479877013446, "grad_norm": 0.49423670768737793, "learning_rate": 3.2058142267529314e-06, "loss": 0.388, "step": 40825 }, { "epoch": 1.8735257675187005, "grad_norm": 0.5177964568138123, "learning_rate": 3.205585370473579e-06, "loss": 0.4089, "step": 40826 }, { "epoch": 1.873571658023955, "grad_norm": 0.5953711867332458, "learning_rate": 3.205356518509227e-06, "loss": 0.3317, "step": 40827 }, { "epoch": 1.8736175485292093, "grad_norm": 0.4540679156780243, "learning_rate": 3.2051276708604244e-06, "loss": 0.3396, "step": 40828 }, { "epoch": 1.8736634390344638, "grad_norm": 0.4874948263168335, "learning_rate": 3.2048988275277258e-06, "loss": 0.3846, "step": 40829 }, { "epoch": 1.8737093295397182, "grad_norm": 0.47497689723968506, "learning_rate": 3.2046699885116794e-06, "loss": 0.4012, "step": 40830 }, { "epoch": 1.8737552200449727, "grad_norm": 0.4538644552230835, "learning_rate": 3.204441153812834e-06, "loss": 0.3195, "step": 40831 }, { "epoch": 1.873801110550227, "grad_norm": 0.43262597918510437, "learning_rate": 3.204212323431739e-06, "loss": 0.2884, "step": 40832 }, { "epoch": 1.8738470010554815, "grad_norm": 0.4953249394893646, "learning_rate": 3.203983497368949e-06, "loss": 0.4047, "step": 40833 }, { "epoch": 1.873892891560736, "grad_norm": 0.47787341475486755, "learning_rate": 3.2037546756250114e-06, "loss": 0.4127, "step": 40834 }, { "epoch": 1.8739387820659905, "grad_norm": 0.462568074464798, "learning_rate": 3.2035258582004757e-06, "loss": 0.3298, "step": 40835 }, { "epoch": 1.873984672571245, "grad_norm": 0.45099908113479614, "learning_rate": 3.2032970450958952e-06, "loss": 0.3368, "step": 40836 }, { "epoch": 1.8740305630764995, "grad_norm": 0.45052874088287354, "learning_rate": 3.2030682363118177e-06, "loss": 0.3168, "step": 40837 }, { "epoch": 1.874076453581754, "grad_norm": 0.49139031767845154, "learning_rate": 3.2028394318487923e-06, "loss": 0.3865, "step": 40838 }, { "epoch": 1.8741223440870085, "grad_norm": 0.4749772548675537, "learning_rate": 3.2026106317073722e-06, "loss": 0.3881, "step": 40839 }, { "epoch": 1.874168234592263, "grad_norm": 0.43694987893104553, "learning_rate": 3.2023818358881063e-06, "loss": 0.3048, "step": 40840 }, { "epoch": 1.8742141250975173, "grad_norm": 0.48097196221351624, "learning_rate": 3.202153044391544e-06, "loss": 0.3467, "step": 40841 }, { "epoch": 1.8742600156027718, "grad_norm": 0.4816828966140747, "learning_rate": 3.2019242572182376e-06, "loss": 0.3548, "step": 40842 }, { "epoch": 1.8743059061080263, "grad_norm": 0.5016543865203857, "learning_rate": 3.2016954743687355e-06, "loss": 0.4121, "step": 40843 }, { "epoch": 1.8743517966132806, "grad_norm": 0.5262869596481323, "learning_rate": 3.2014666958435857e-06, "loss": 0.4247, "step": 40844 }, { "epoch": 1.874397687118535, "grad_norm": 0.5393515229225159, "learning_rate": 3.201237921643344e-06, "loss": 0.4274, "step": 40845 }, { "epoch": 1.8744435776237895, "grad_norm": 0.45059239864349365, "learning_rate": 3.2010091517685572e-06, "loss": 0.3048, "step": 40846 }, { "epoch": 1.874489468129044, "grad_norm": 0.49902456998825073, "learning_rate": 3.200780386219774e-06, "loss": 0.3616, "step": 40847 }, { "epoch": 1.8745353586342985, "grad_norm": 0.45046401023864746, "learning_rate": 3.200551624997547e-06, "loss": 0.3449, "step": 40848 }, { "epoch": 1.874581249139553, "grad_norm": 0.4514561891555786, "learning_rate": 3.200322868102426e-06, "loss": 0.3045, "step": 40849 }, { "epoch": 1.8746271396448075, "grad_norm": 0.5060063600540161, "learning_rate": 3.200094115534958e-06, "loss": 0.3806, "step": 40850 }, { "epoch": 1.874673030150062, "grad_norm": 0.5106275677680969, "learning_rate": 3.1998653672956983e-06, "loss": 0.4362, "step": 40851 }, { "epoch": 1.8747189206553165, "grad_norm": 0.4617348313331604, "learning_rate": 3.1996366233851944e-06, "loss": 0.3821, "step": 40852 }, { "epoch": 1.874764811160571, "grad_norm": 0.48427310585975647, "learning_rate": 3.1994078838039932e-06, "loss": 0.3649, "step": 40853 }, { "epoch": 1.8748107016658253, "grad_norm": 0.531186044216156, "learning_rate": 3.1991791485526504e-06, "loss": 0.3839, "step": 40854 }, { "epoch": 1.8748565921710798, "grad_norm": 0.473624050617218, "learning_rate": 3.198950417631713e-06, "loss": 0.3645, "step": 40855 }, { "epoch": 1.8749024826763343, "grad_norm": 0.43522363901138306, "learning_rate": 3.1987216910417308e-06, "loss": 0.2711, "step": 40856 }, { "epoch": 1.8749483731815886, "grad_norm": 0.4460555613040924, "learning_rate": 3.1984929687832555e-06, "loss": 0.2942, "step": 40857 }, { "epoch": 1.874994263686843, "grad_norm": 0.44664376974105835, "learning_rate": 3.1982642508568357e-06, "loss": 0.3197, "step": 40858 }, { "epoch": 1.8750401541920976, "grad_norm": 0.4478331506252289, "learning_rate": 3.19803553726302e-06, "loss": 0.2979, "step": 40859 }, { "epoch": 1.875086044697352, "grad_norm": 0.4893341660499573, "learning_rate": 3.197806828002362e-06, "loss": 0.3687, "step": 40860 }, { "epoch": 1.8751319352026066, "grad_norm": 0.501018226146698, "learning_rate": 3.1975781230754093e-06, "loss": 0.3704, "step": 40861 }, { "epoch": 1.875177825707861, "grad_norm": 0.4379594326019287, "learning_rate": 3.1973494224827116e-06, "loss": 0.2966, "step": 40862 }, { "epoch": 1.8752237162131156, "grad_norm": 0.46309545636177063, "learning_rate": 3.1971207262248206e-06, "loss": 0.3093, "step": 40863 }, { "epoch": 1.87526960671837, "grad_norm": 0.4987621307373047, "learning_rate": 3.1968920343022857e-06, "loss": 0.3722, "step": 40864 }, { "epoch": 1.8753154972236246, "grad_norm": 0.4991621673107147, "learning_rate": 3.196663346715656e-06, "loss": 0.3766, "step": 40865 }, { "epoch": 1.875361387728879, "grad_norm": 0.46523022651672363, "learning_rate": 3.19643466346548e-06, "loss": 0.3028, "step": 40866 }, { "epoch": 1.8754072782341333, "grad_norm": 0.48662468791007996, "learning_rate": 3.1962059845523113e-06, "loss": 0.3469, "step": 40867 }, { "epoch": 1.8754531687393878, "grad_norm": 0.47083377838134766, "learning_rate": 3.1959773099766977e-06, "loss": 0.3278, "step": 40868 }, { "epoch": 1.8754990592446423, "grad_norm": 0.4565489888191223, "learning_rate": 3.1957486397391892e-06, "loss": 0.356, "step": 40869 }, { "epoch": 1.8755449497498966, "grad_norm": 0.4829057455062866, "learning_rate": 3.195519973840335e-06, "loss": 0.3353, "step": 40870 }, { "epoch": 1.875590840255151, "grad_norm": 0.5008401274681091, "learning_rate": 3.195291312280687e-06, "loss": 0.3817, "step": 40871 }, { "epoch": 1.8756367307604056, "grad_norm": 0.5003997683525085, "learning_rate": 3.1950626550607923e-06, "loss": 0.4231, "step": 40872 }, { "epoch": 1.87568262126566, "grad_norm": 0.46256083250045776, "learning_rate": 3.194834002181203e-06, "loss": 0.347, "step": 40873 }, { "epoch": 1.8757285117709146, "grad_norm": 0.47234123945236206, "learning_rate": 3.1946053536424704e-06, "loss": 0.4095, "step": 40874 }, { "epoch": 1.875774402276169, "grad_norm": 0.4805777370929718, "learning_rate": 3.1943767094451382e-06, "loss": 0.3817, "step": 40875 }, { "epoch": 1.8758202927814236, "grad_norm": 0.43107733130455017, "learning_rate": 3.1941480695897626e-06, "loss": 0.2919, "step": 40876 }, { "epoch": 1.875866183286678, "grad_norm": 0.48459169268608093, "learning_rate": 3.1939194340768907e-06, "loss": 0.3633, "step": 40877 }, { "epoch": 1.8759120737919326, "grad_norm": 0.46466493606567383, "learning_rate": 3.193690802907072e-06, "loss": 0.3403, "step": 40878 }, { "epoch": 1.8759579642971869, "grad_norm": 0.4675995409488678, "learning_rate": 3.193462176080857e-06, "loss": 0.354, "step": 40879 }, { "epoch": 1.8760038548024414, "grad_norm": 0.4871419668197632, "learning_rate": 3.1932335535987962e-06, "loss": 0.3496, "step": 40880 }, { "epoch": 1.8760497453076959, "grad_norm": 0.47044265270233154, "learning_rate": 3.1930049354614356e-06, "loss": 0.3739, "step": 40881 }, { "epoch": 1.8760956358129501, "grad_norm": 0.4486694037914276, "learning_rate": 3.1927763216693307e-06, "loss": 0.3186, "step": 40882 }, { "epoch": 1.8761415263182046, "grad_norm": 0.48766204714775085, "learning_rate": 3.1925477122230282e-06, "loss": 0.3911, "step": 40883 }, { "epoch": 1.8761874168234591, "grad_norm": 0.44279924035072327, "learning_rate": 3.1923191071230763e-06, "loss": 0.283, "step": 40884 }, { "epoch": 1.8762333073287136, "grad_norm": 0.41964468359947205, "learning_rate": 3.192090506370028e-06, "loss": 0.2896, "step": 40885 }, { "epoch": 1.8762791978339681, "grad_norm": 0.477951318025589, "learning_rate": 3.1918619099644314e-06, "loss": 0.38, "step": 40886 }, { "epoch": 1.8763250883392226, "grad_norm": 0.46327531337738037, "learning_rate": 3.1916333179068344e-06, "loss": 0.3224, "step": 40887 }, { "epoch": 1.8763709788444771, "grad_norm": 0.4303705394268036, "learning_rate": 3.1914047301977903e-06, "loss": 0.2808, "step": 40888 }, { "epoch": 1.8764168693497316, "grad_norm": 0.5067707300186157, "learning_rate": 3.191176146837847e-06, "loss": 0.4242, "step": 40889 }, { "epoch": 1.8764627598549861, "grad_norm": 0.4907696545124054, "learning_rate": 3.190947567827554e-06, "loss": 0.3859, "step": 40890 }, { "epoch": 1.8765086503602406, "grad_norm": 0.6529475450515747, "learning_rate": 3.190718993167461e-06, "loss": 0.3242, "step": 40891 }, { "epoch": 1.876554540865495, "grad_norm": 0.48794758319854736, "learning_rate": 3.190490422858118e-06, "loss": 0.3517, "step": 40892 }, { "epoch": 1.8766004313707494, "grad_norm": 0.4901316165924072, "learning_rate": 3.190261856900072e-06, "loss": 0.3943, "step": 40893 }, { "epoch": 1.8766463218760039, "grad_norm": 0.45873603224754333, "learning_rate": 3.1900332952938782e-06, "loss": 0.3328, "step": 40894 }, { "epoch": 1.8766922123812582, "grad_norm": 0.41371187567710876, "learning_rate": 3.189804738040082e-06, "loss": 0.2929, "step": 40895 }, { "epoch": 1.8767381028865127, "grad_norm": 0.47034165263175964, "learning_rate": 3.189576185139236e-06, "loss": 0.3617, "step": 40896 }, { "epoch": 1.8767839933917672, "grad_norm": 0.4725523293018341, "learning_rate": 3.1893476365918847e-06, "loss": 0.3388, "step": 40897 }, { "epoch": 1.8768298838970217, "grad_norm": 0.48386532068252563, "learning_rate": 3.1891190923985827e-06, "loss": 0.3824, "step": 40898 }, { "epoch": 1.8768757744022762, "grad_norm": 0.46412011981010437, "learning_rate": 3.188890552559878e-06, "loss": 0.3354, "step": 40899 }, { "epoch": 1.8769216649075307, "grad_norm": 0.48253268003463745, "learning_rate": 3.1886620170763183e-06, "loss": 0.3564, "step": 40900 }, { "epoch": 1.8769675554127851, "grad_norm": 0.5229529738426208, "learning_rate": 3.1884334859484563e-06, "loss": 0.4508, "step": 40901 }, { "epoch": 1.8770134459180396, "grad_norm": 0.4572031795978546, "learning_rate": 3.18820495917684e-06, "loss": 0.3096, "step": 40902 }, { "epoch": 1.8770593364232941, "grad_norm": 0.45701250433921814, "learning_rate": 3.187976436762017e-06, "loss": 0.3153, "step": 40903 }, { "epoch": 1.8771052269285486, "grad_norm": 0.4692665636539459, "learning_rate": 3.18774791870454e-06, "loss": 0.3121, "step": 40904 }, { "epoch": 1.877151117433803, "grad_norm": 0.479989618062973, "learning_rate": 3.1875194050049585e-06, "loss": 0.3533, "step": 40905 }, { "epoch": 1.8771970079390574, "grad_norm": 0.4459161162376404, "learning_rate": 3.187290895663819e-06, "loss": 0.306, "step": 40906 }, { "epoch": 1.877242898444312, "grad_norm": 0.45808088779449463, "learning_rate": 3.187062390681674e-06, "loss": 0.3459, "step": 40907 }, { "epoch": 1.8772887889495662, "grad_norm": 0.41858750581741333, "learning_rate": 3.1868338900590717e-06, "loss": 0.3018, "step": 40908 }, { "epoch": 1.8773346794548207, "grad_norm": 0.4467853009700775, "learning_rate": 3.1866053937965596e-06, "loss": 0.3082, "step": 40909 }, { "epoch": 1.8773805699600752, "grad_norm": 0.43415582180023193, "learning_rate": 3.186376901894691e-06, "loss": 0.2892, "step": 40910 }, { "epoch": 1.8774264604653297, "grad_norm": 0.5375862717628479, "learning_rate": 3.1861484143540144e-06, "loss": 0.4403, "step": 40911 }, { "epoch": 1.8774723509705842, "grad_norm": 0.503567099571228, "learning_rate": 3.1859199311750765e-06, "loss": 0.314, "step": 40912 }, { "epoch": 1.8775182414758387, "grad_norm": 0.4473890960216522, "learning_rate": 3.18569145235843e-06, "loss": 0.3093, "step": 40913 }, { "epoch": 1.8775641319810932, "grad_norm": 0.4627523124217987, "learning_rate": 3.1854629779046232e-06, "loss": 0.3559, "step": 40914 }, { "epoch": 1.8776100224863477, "grad_norm": 0.4521934986114502, "learning_rate": 3.1852345078142027e-06, "loss": 0.3672, "step": 40915 }, { "epoch": 1.8776559129916022, "grad_norm": 0.4474486708641052, "learning_rate": 3.1850060420877226e-06, "loss": 0.3161, "step": 40916 }, { "epoch": 1.8777018034968564, "grad_norm": 0.45620644092559814, "learning_rate": 3.184777580725731e-06, "loss": 0.3643, "step": 40917 }, { "epoch": 1.877747694002111, "grad_norm": 0.46167102456092834, "learning_rate": 3.184549123728774e-06, "loss": 0.3207, "step": 40918 }, { "epoch": 1.8777935845073654, "grad_norm": 0.43175017833709717, "learning_rate": 3.1843206710974053e-06, "loss": 0.3455, "step": 40919 }, { "epoch": 1.87783947501262, "grad_norm": 0.7083204984664917, "learning_rate": 3.184092222832172e-06, "loss": 0.3604, "step": 40920 }, { "epoch": 1.8778853655178742, "grad_norm": 0.474498450756073, "learning_rate": 3.183863778933623e-06, "loss": 0.3942, "step": 40921 }, { "epoch": 1.8779312560231287, "grad_norm": 0.47159677743911743, "learning_rate": 3.1836353394023088e-06, "loss": 0.3494, "step": 40922 }, { "epoch": 1.8779771465283832, "grad_norm": 0.4765138328075409, "learning_rate": 3.1834069042387794e-06, "loss": 0.4053, "step": 40923 }, { "epoch": 1.8780230370336377, "grad_norm": 0.4741875231266022, "learning_rate": 3.1831784734435806e-06, "loss": 0.341, "step": 40924 }, { "epoch": 1.8780689275388922, "grad_norm": 0.4413011372089386, "learning_rate": 3.182950047017266e-06, "loss": 0.2939, "step": 40925 }, { "epoch": 1.8781148180441467, "grad_norm": 0.476239413022995, "learning_rate": 3.182721624960383e-06, "loss": 0.3774, "step": 40926 }, { "epoch": 1.8781607085494012, "grad_norm": 0.47231870889663696, "learning_rate": 3.1824932072734803e-06, "loss": 0.3491, "step": 40927 }, { "epoch": 1.8782065990546557, "grad_norm": 0.512829601764679, "learning_rate": 3.1822647939571084e-06, "loss": 0.4153, "step": 40928 }, { "epoch": 1.8782524895599102, "grad_norm": 0.4762999415397644, "learning_rate": 3.1820363850118165e-06, "loss": 0.3358, "step": 40929 }, { "epoch": 1.8782983800651645, "grad_norm": 0.45543399453163147, "learning_rate": 3.181807980438153e-06, "loss": 0.3569, "step": 40930 }, { "epoch": 1.878344270570419, "grad_norm": 0.4704321622848511, "learning_rate": 3.1815795802366654e-06, "loss": 0.2989, "step": 40931 }, { "epoch": 1.8783901610756735, "grad_norm": 0.4486519992351532, "learning_rate": 3.181351184407907e-06, "loss": 0.3274, "step": 40932 }, { "epoch": 1.8784360515809277, "grad_norm": 0.4495529234409332, "learning_rate": 3.181122792952425e-06, "loss": 0.3044, "step": 40933 }, { "epoch": 1.8784819420861822, "grad_norm": 0.46509629487991333, "learning_rate": 3.180894405870768e-06, "loss": 0.3178, "step": 40934 }, { "epoch": 1.8785278325914367, "grad_norm": 0.4702320694923401, "learning_rate": 3.1806660231634867e-06, "loss": 0.4017, "step": 40935 }, { "epoch": 1.8785737230966912, "grad_norm": 0.4962367117404938, "learning_rate": 3.1804376448311293e-06, "loss": 0.388, "step": 40936 }, { "epoch": 1.8786196136019457, "grad_norm": 0.4944383502006531, "learning_rate": 3.1802092708742426e-06, "loss": 0.3905, "step": 40937 }, { "epoch": 1.8786655041072002, "grad_norm": 0.4689026474952698, "learning_rate": 3.1799809012933806e-06, "loss": 0.3787, "step": 40938 }, { "epoch": 1.8787113946124547, "grad_norm": 0.45295166969299316, "learning_rate": 3.1797525360890903e-06, "loss": 0.3055, "step": 40939 }, { "epoch": 1.8787572851177092, "grad_norm": 0.4577741324901581, "learning_rate": 3.17952417526192e-06, "loss": 0.3134, "step": 40940 }, { "epoch": 1.8788031756229637, "grad_norm": 0.49297085404396057, "learning_rate": 3.1792958188124203e-06, "loss": 0.351, "step": 40941 }, { "epoch": 1.8788490661282182, "grad_norm": 0.45363950729370117, "learning_rate": 3.179067466741139e-06, "loss": 0.3225, "step": 40942 }, { "epoch": 1.8788949566334725, "grad_norm": 0.49120309948921204, "learning_rate": 3.1788391190486253e-06, "loss": 0.3848, "step": 40943 }, { "epoch": 1.878940847138727, "grad_norm": 0.444976806640625, "learning_rate": 3.178610775735429e-06, "loss": 0.3348, "step": 40944 }, { "epoch": 1.8789867376439815, "grad_norm": 0.4482094347476959, "learning_rate": 3.1783824368020997e-06, "loss": 0.3154, "step": 40945 }, { "epoch": 1.8790326281492358, "grad_norm": 0.5034446120262146, "learning_rate": 3.1781541022491836e-06, "loss": 0.3338, "step": 40946 }, { "epoch": 1.8790785186544903, "grad_norm": 0.5176222324371338, "learning_rate": 3.1779257720772337e-06, "loss": 0.4615, "step": 40947 }, { "epoch": 1.8791244091597448, "grad_norm": 0.4652732312679291, "learning_rate": 3.1776974462867972e-06, "loss": 0.323, "step": 40948 }, { "epoch": 1.8791702996649993, "grad_norm": 0.4904159605503082, "learning_rate": 3.177469124878423e-06, "loss": 0.3534, "step": 40949 }, { "epoch": 1.8792161901702538, "grad_norm": 0.5092717409133911, "learning_rate": 3.17724080785266e-06, "loss": 0.4567, "step": 40950 }, { "epoch": 1.8792620806755083, "grad_norm": 0.4551880359649658, "learning_rate": 3.177012495210058e-06, "loss": 0.3277, "step": 40951 }, { "epoch": 1.8793079711807628, "grad_norm": 0.48032402992248535, "learning_rate": 3.176784186951164e-06, "loss": 0.3606, "step": 40952 }, { "epoch": 1.8793538616860173, "grad_norm": 0.5019930005073547, "learning_rate": 3.1765558830765306e-06, "loss": 0.3446, "step": 40953 }, { "epoch": 1.8793997521912718, "grad_norm": 0.457221120595932, "learning_rate": 3.1763275835867048e-06, "loss": 0.346, "step": 40954 }, { "epoch": 1.8794456426965263, "grad_norm": 0.425237774848938, "learning_rate": 3.1760992884822344e-06, "loss": 0.2834, "step": 40955 }, { "epoch": 1.8794915332017805, "grad_norm": 0.48285725712776184, "learning_rate": 3.17587099776367e-06, "loss": 0.4012, "step": 40956 }, { "epoch": 1.879537423707035, "grad_norm": 0.4501810073852539, "learning_rate": 3.1756427114315613e-06, "loss": 0.3587, "step": 40957 }, { "epoch": 1.8795833142122895, "grad_norm": 0.4360103905200958, "learning_rate": 3.175414429486454e-06, "loss": 0.2812, "step": 40958 }, { "epoch": 1.8796292047175438, "grad_norm": 0.4685761034488678, "learning_rate": 3.1751861519289008e-06, "loss": 0.3559, "step": 40959 }, { "epoch": 1.8796750952227983, "grad_norm": 0.4544246196746826, "learning_rate": 3.174957878759448e-06, "loss": 0.3458, "step": 40960 }, { "epoch": 1.8797209857280528, "grad_norm": 0.47219225764274597, "learning_rate": 3.1747296099786475e-06, "loss": 0.3163, "step": 40961 }, { "epoch": 1.8797668762333073, "grad_norm": 0.456236332654953, "learning_rate": 3.1745013455870433e-06, "loss": 0.317, "step": 40962 }, { "epoch": 1.8798127667385618, "grad_norm": 0.5169612169265747, "learning_rate": 3.1742730855851896e-06, "loss": 0.4315, "step": 40963 }, { "epoch": 1.8798586572438163, "grad_norm": 0.46480226516723633, "learning_rate": 3.174044829973632e-06, "loss": 0.3369, "step": 40964 }, { "epoch": 1.8799045477490708, "grad_norm": 0.4524090886116028, "learning_rate": 3.1738165787529198e-06, "loss": 0.3061, "step": 40965 }, { "epoch": 1.8799504382543253, "grad_norm": 0.4854663908481598, "learning_rate": 3.1735883319236027e-06, "loss": 0.4362, "step": 40966 }, { "epoch": 1.8799963287595798, "grad_norm": 0.4476641118526459, "learning_rate": 3.1733600894862303e-06, "loss": 0.2879, "step": 40967 }, { "epoch": 1.880042219264834, "grad_norm": 0.4982832670211792, "learning_rate": 3.173131851441348e-06, "loss": 0.3742, "step": 40968 }, { "epoch": 1.8800881097700886, "grad_norm": 0.47871583700180054, "learning_rate": 3.1729036177895084e-06, "loss": 0.3744, "step": 40969 }, { "epoch": 1.880134000275343, "grad_norm": 0.5018702149391174, "learning_rate": 3.1726753885312593e-06, "loss": 0.4088, "step": 40970 }, { "epoch": 1.8801798907805973, "grad_norm": 0.49285808205604553, "learning_rate": 3.172447163667148e-06, "loss": 0.4066, "step": 40971 }, { "epoch": 1.8802257812858518, "grad_norm": 0.4498308300971985, "learning_rate": 3.1722189431977264e-06, "loss": 0.3391, "step": 40972 }, { "epoch": 1.8802716717911063, "grad_norm": 0.4803049862384796, "learning_rate": 3.171990727123541e-06, "loss": 0.3504, "step": 40973 }, { "epoch": 1.8803175622963608, "grad_norm": 0.46708038449287415, "learning_rate": 3.171762515445138e-06, "loss": 0.3738, "step": 40974 }, { "epoch": 1.8803634528016153, "grad_norm": 0.4638459384441376, "learning_rate": 3.1715343081630725e-06, "loss": 0.3382, "step": 40975 }, { "epoch": 1.8804093433068698, "grad_norm": 0.5472952723503113, "learning_rate": 3.1713061052778887e-06, "loss": 0.3378, "step": 40976 }, { "epoch": 1.8804552338121243, "grad_norm": 0.39143460988998413, "learning_rate": 3.1710779067901358e-06, "loss": 0.2337, "step": 40977 }, { "epoch": 1.8805011243173788, "grad_norm": 0.44960275292396545, "learning_rate": 3.1708497127003646e-06, "loss": 0.3567, "step": 40978 }, { "epoch": 1.8805470148226333, "grad_norm": 0.4635041356086731, "learning_rate": 3.170621523009123e-06, "loss": 0.3112, "step": 40979 }, { "epoch": 1.8805929053278878, "grad_norm": 0.4710862934589386, "learning_rate": 3.1703933377169567e-06, "loss": 0.3256, "step": 40980 }, { "epoch": 1.880638795833142, "grad_norm": 0.42451804876327515, "learning_rate": 3.1701651568244193e-06, "loss": 0.3285, "step": 40981 }, { "epoch": 1.8806846863383966, "grad_norm": 0.49581775069236755, "learning_rate": 3.1699369803320566e-06, "loss": 0.4141, "step": 40982 }, { "epoch": 1.880730576843651, "grad_norm": 0.4580715000629425, "learning_rate": 3.169708808240417e-06, "loss": 0.3457, "step": 40983 }, { "epoch": 1.8807764673489054, "grad_norm": 0.5822945833206177, "learning_rate": 3.1694806405500517e-06, "loss": 0.4298, "step": 40984 }, { "epoch": 1.8808223578541599, "grad_norm": 0.46547731757164, "learning_rate": 3.1692524772615076e-06, "loss": 0.3195, "step": 40985 }, { "epoch": 1.8808682483594144, "grad_norm": 0.4505749046802521, "learning_rate": 3.1690243183753313e-06, "loss": 0.3176, "step": 40986 }, { "epoch": 1.8809141388646688, "grad_norm": 0.46239930391311646, "learning_rate": 3.1687961638920763e-06, "loss": 0.3257, "step": 40987 }, { "epoch": 1.8809600293699233, "grad_norm": 0.46887993812561035, "learning_rate": 3.1685680138122876e-06, "loss": 0.3266, "step": 40988 }, { "epoch": 1.8810059198751778, "grad_norm": 0.44191545248031616, "learning_rate": 3.168339868136513e-06, "loss": 0.3081, "step": 40989 }, { "epoch": 1.8810518103804323, "grad_norm": 0.470470666885376, "learning_rate": 3.168111726865305e-06, "loss": 0.3695, "step": 40990 }, { "epoch": 1.8810977008856868, "grad_norm": 0.4549897313117981, "learning_rate": 3.1678835899992107e-06, "loss": 0.3018, "step": 40991 }, { "epoch": 1.8811435913909413, "grad_norm": 0.43316784501075745, "learning_rate": 3.1676554575387763e-06, "loss": 0.302, "step": 40992 }, { "epoch": 1.8811894818961958, "grad_norm": 0.4470137357711792, "learning_rate": 3.1674273294845535e-06, "loss": 0.3216, "step": 40993 }, { "epoch": 1.8812353724014501, "grad_norm": 0.46059149503707886, "learning_rate": 3.167199205837089e-06, "loss": 0.3283, "step": 40994 }, { "epoch": 1.8812812629067046, "grad_norm": 0.44102296233177185, "learning_rate": 3.166971086596933e-06, "loss": 0.3092, "step": 40995 }, { "epoch": 1.881327153411959, "grad_norm": 0.49818354845046997, "learning_rate": 3.1667429717646304e-06, "loss": 0.426, "step": 40996 }, { "epoch": 1.8813730439172134, "grad_norm": 0.4413798451423645, "learning_rate": 3.1665148613407348e-06, "loss": 0.3063, "step": 40997 }, { "epoch": 1.8814189344224679, "grad_norm": 0.4837610423564911, "learning_rate": 3.166286755325792e-06, "loss": 0.4054, "step": 40998 }, { "epoch": 1.8814648249277224, "grad_norm": 0.4820041060447693, "learning_rate": 3.1660586537203497e-06, "loss": 0.3746, "step": 40999 }, { "epoch": 1.8815107154329769, "grad_norm": 0.4675370752811432, "learning_rate": 3.1658305565249594e-06, "loss": 0.3214, "step": 41000 }, { "epoch": 1.8815566059382314, "grad_norm": 0.48801398277282715, "learning_rate": 3.165602463740166e-06, "loss": 0.3843, "step": 41001 }, { "epoch": 1.8816024964434859, "grad_norm": 0.49266982078552246, "learning_rate": 3.165374375366519e-06, "loss": 0.3881, "step": 41002 }, { "epoch": 1.8816483869487404, "grad_norm": 0.473346471786499, "learning_rate": 3.1651462914045695e-06, "loss": 0.3793, "step": 41003 }, { "epoch": 1.8816942774539949, "grad_norm": 0.47626182436943054, "learning_rate": 3.164918211854864e-06, "loss": 0.381, "step": 41004 }, { "epoch": 1.8817401679592494, "grad_norm": 0.4672994315624237, "learning_rate": 3.1646901367179495e-06, "loss": 0.3598, "step": 41005 }, { "epoch": 1.8817860584645036, "grad_norm": 0.4838709831237793, "learning_rate": 3.164462065994378e-06, "loss": 0.3767, "step": 41006 }, { "epoch": 1.8818319489697581, "grad_norm": 0.4869977533817291, "learning_rate": 3.1642339996846947e-06, "loss": 0.4107, "step": 41007 }, { "epoch": 1.8818778394750126, "grad_norm": 0.4710145592689514, "learning_rate": 3.164005937789448e-06, "loss": 0.3517, "step": 41008 }, { "epoch": 1.8819237299802671, "grad_norm": 0.48436930775642395, "learning_rate": 3.1637778803091906e-06, "loss": 0.3879, "step": 41009 }, { "epoch": 1.8819696204855214, "grad_norm": 0.4550233483314514, "learning_rate": 3.1635498272444663e-06, "loss": 0.3587, "step": 41010 }, { "epoch": 1.882015510990776, "grad_norm": 0.4757569134235382, "learning_rate": 3.1633217785958236e-06, "loss": 0.4098, "step": 41011 }, { "epoch": 1.8820614014960304, "grad_norm": 0.5197775363922119, "learning_rate": 3.163093734363815e-06, "loss": 0.4454, "step": 41012 }, { "epoch": 1.882107292001285, "grad_norm": 0.4945377707481384, "learning_rate": 3.162865694548985e-06, "loss": 0.3993, "step": 41013 }, { "epoch": 1.8821531825065394, "grad_norm": 0.46583765745162964, "learning_rate": 3.1626376591518827e-06, "loss": 0.3524, "step": 41014 }, { "epoch": 1.882199073011794, "grad_norm": 0.4493637979030609, "learning_rate": 3.1624096281730575e-06, "loss": 0.294, "step": 41015 }, { "epoch": 1.8822449635170484, "grad_norm": 0.47812148928642273, "learning_rate": 3.162181601613058e-06, "loss": 0.3623, "step": 41016 }, { "epoch": 1.882290854022303, "grad_norm": 0.44607001543045044, "learning_rate": 3.1619535794724295e-06, "loss": 0.3312, "step": 41017 }, { "epoch": 1.8823367445275574, "grad_norm": 0.5113836526870728, "learning_rate": 3.161725561751724e-06, "loss": 0.4008, "step": 41018 }, { "epoch": 1.8823826350328117, "grad_norm": 0.4481378495693207, "learning_rate": 3.1614975484514886e-06, "loss": 0.2826, "step": 41019 }, { "epoch": 1.8824285255380662, "grad_norm": 0.4838877320289612, "learning_rate": 3.16126953957227e-06, "loss": 0.3627, "step": 41020 }, { "epoch": 1.8824744160433207, "grad_norm": 0.44637173414230347, "learning_rate": 3.1610415351146195e-06, "loss": 0.3072, "step": 41021 }, { "epoch": 1.882520306548575, "grad_norm": 0.4835214912891388, "learning_rate": 3.1608135350790836e-06, "loss": 0.374, "step": 41022 }, { "epoch": 1.8825661970538294, "grad_norm": 0.49482837319374084, "learning_rate": 3.1605855394662087e-06, "loss": 0.3754, "step": 41023 }, { "epoch": 1.882612087559084, "grad_norm": 0.5678021311759949, "learning_rate": 3.160357548276547e-06, "loss": 0.3885, "step": 41024 }, { "epoch": 1.8826579780643384, "grad_norm": 0.5226348638534546, "learning_rate": 3.1601295615106444e-06, "loss": 0.4199, "step": 41025 }, { "epoch": 1.882703868569593, "grad_norm": 0.4846934378147125, "learning_rate": 3.1599015791690503e-06, "loss": 0.371, "step": 41026 }, { "epoch": 1.8827497590748474, "grad_norm": 0.4854075610637665, "learning_rate": 3.15967360125231e-06, "loss": 0.3882, "step": 41027 }, { "epoch": 1.882795649580102, "grad_norm": 0.494087278842926, "learning_rate": 3.159445627760976e-06, "loss": 0.3795, "step": 41028 }, { "epoch": 1.8828415400853564, "grad_norm": 0.5188537836074829, "learning_rate": 3.1592176586955937e-06, "loss": 0.4179, "step": 41029 }, { "epoch": 1.882887430590611, "grad_norm": 0.45918014645576477, "learning_rate": 3.15898969405671e-06, "loss": 0.3781, "step": 41030 }, { "epoch": 1.8829333210958654, "grad_norm": 0.4742754399776459, "learning_rate": 3.1587617338448782e-06, "loss": 0.37, "step": 41031 }, { "epoch": 1.8829792116011197, "grad_norm": 0.5049639344215393, "learning_rate": 3.1585337780606427e-06, "loss": 0.3932, "step": 41032 }, { "epoch": 1.8830251021063742, "grad_norm": 0.4636766314506531, "learning_rate": 3.15830582670455e-06, "loss": 0.3197, "step": 41033 }, { "epoch": 1.8830709926116287, "grad_norm": 0.48075205087661743, "learning_rate": 3.158077879777152e-06, "loss": 0.3689, "step": 41034 }, { "epoch": 1.883116883116883, "grad_norm": 0.4679269790649414, "learning_rate": 3.157849937278996e-06, "loss": 0.3965, "step": 41035 }, { "epoch": 1.8831627736221375, "grad_norm": 0.4851629137992859, "learning_rate": 3.1576219992106282e-06, "loss": 0.3489, "step": 41036 }, { "epoch": 1.883208664127392, "grad_norm": 0.4370095729827881, "learning_rate": 3.157394065572599e-06, "loss": 0.3428, "step": 41037 }, { "epoch": 1.8832545546326465, "grad_norm": 0.4753059148788452, "learning_rate": 3.157166136365456e-06, "loss": 0.3576, "step": 41038 }, { "epoch": 1.883300445137901, "grad_norm": 0.5060460567474365, "learning_rate": 3.156938211589744e-06, "loss": 0.4029, "step": 41039 }, { "epoch": 1.8833463356431555, "grad_norm": 0.45769980549812317, "learning_rate": 3.156710291246017e-06, "loss": 0.3456, "step": 41040 }, { "epoch": 1.88339222614841, "grad_norm": 0.4897744059562683, "learning_rate": 3.156482375334819e-06, "loss": 0.3776, "step": 41041 }, { "epoch": 1.8834381166536645, "grad_norm": 0.4726833403110504, "learning_rate": 3.1562544638566983e-06, "loss": 0.3617, "step": 41042 }, { "epoch": 1.883484007158919, "grad_norm": 0.4847007989883423, "learning_rate": 3.156026556812205e-06, "loss": 0.3881, "step": 41043 }, { "epoch": 1.8835298976641734, "grad_norm": 0.4632507562637329, "learning_rate": 3.155798654201886e-06, "loss": 0.3046, "step": 41044 }, { "epoch": 1.8835757881694277, "grad_norm": 0.4409697949886322, "learning_rate": 3.1555707560262865e-06, "loss": 0.2842, "step": 41045 }, { "epoch": 1.8836216786746822, "grad_norm": 0.49303868412971497, "learning_rate": 3.1553428622859595e-06, "loss": 0.3752, "step": 41046 }, { "epoch": 1.8836675691799367, "grad_norm": 0.4547823369503021, "learning_rate": 3.1551149729814513e-06, "loss": 0.331, "step": 41047 }, { "epoch": 1.883713459685191, "grad_norm": 0.505851149559021, "learning_rate": 3.1548870881133074e-06, "loss": 0.4515, "step": 41048 }, { "epoch": 1.8837593501904455, "grad_norm": 0.4911642372608185, "learning_rate": 3.154659207682079e-06, "loss": 0.4169, "step": 41049 }, { "epoch": 1.8838052406957, "grad_norm": 0.49238285422325134, "learning_rate": 3.1544313316883134e-06, "loss": 0.3733, "step": 41050 }, { "epoch": 1.8838511312009545, "grad_norm": 0.47842520475387573, "learning_rate": 3.1542034601325556e-06, "loss": 0.3821, "step": 41051 }, { "epoch": 1.883897021706209, "grad_norm": 0.4504016041755676, "learning_rate": 3.153975593015358e-06, "loss": 0.3187, "step": 41052 }, { "epoch": 1.8839429122114635, "grad_norm": 0.48080721497535706, "learning_rate": 3.153747730337267e-06, "loss": 0.3358, "step": 41053 }, { "epoch": 1.883988802716718, "grad_norm": 0.4527626633644104, "learning_rate": 3.1535198720988276e-06, "loss": 0.339, "step": 41054 }, { "epoch": 1.8840346932219725, "grad_norm": 0.4792722165584564, "learning_rate": 3.153292018300592e-06, "loss": 0.3841, "step": 41055 }, { "epoch": 1.884080583727227, "grad_norm": 0.4764436185359955, "learning_rate": 3.1530641689431064e-06, "loss": 0.3289, "step": 41056 }, { "epoch": 1.8841264742324813, "grad_norm": 0.49971815943717957, "learning_rate": 3.1528363240269168e-06, "loss": 0.3801, "step": 41057 }, { "epoch": 1.8841723647377357, "grad_norm": 0.46734774112701416, "learning_rate": 3.1526084835525743e-06, "loss": 0.3466, "step": 41058 }, { "epoch": 1.8842182552429902, "grad_norm": 0.47285333275794983, "learning_rate": 3.1523806475206253e-06, "loss": 0.3479, "step": 41059 }, { "epoch": 1.8842641457482445, "grad_norm": 0.45007196068763733, "learning_rate": 3.152152815931618e-06, "loss": 0.3375, "step": 41060 }, { "epoch": 1.884310036253499, "grad_norm": 0.4664475619792938, "learning_rate": 3.151924988786098e-06, "loss": 0.3346, "step": 41061 }, { "epoch": 1.8843559267587535, "grad_norm": 0.4767315089702606, "learning_rate": 3.151697166084617e-06, "loss": 0.3529, "step": 41062 }, { "epoch": 1.884401817264008, "grad_norm": 0.4740633964538574, "learning_rate": 3.1514693478277207e-06, "loss": 0.3967, "step": 41063 }, { "epoch": 1.8844477077692625, "grad_norm": 0.4482031464576721, "learning_rate": 3.1512415340159565e-06, "loss": 0.3454, "step": 41064 }, { "epoch": 1.884493598274517, "grad_norm": 0.4896005094051361, "learning_rate": 3.151013724649874e-06, "loss": 0.411, "step": 41065 }, { "epoch": 1.8845394887797715, "grad_norm": 0.4585939943790436, "learning_rate": 3.1507859197300194e-06, "loss": 0.356, "step": 41066 }, { "epoch": 1.884585379285026, "grad_norm": 0.4270206689834595, "learning_rate": 3.15055811925694e-06, "loss": 0.2783, "step": 41067 }, { "epoch": 1.8846312697902805, "grad_norm": 0.563872754573822, "learning_rate": 3.150330323231186e-06, "loss": 0.3809, "step": 41068 }, { "epoch": 1.884677160295535, "grad_norm": 0.4903283715248108, "learning_rate": 3.150102531653304e-06, "loss": 0.3609, "step": 41069 }, { "epoch": 1.8847230508007893, "grad_norm": 0.47412610054016113, "learning_rate": 3.1498747445238403e-06, "loss": 0.3893, "step": 41070 }, { "epoch": 1.8847689413060438, "grad_norm": 0.4898699223995209, "learning_rate": 3.1496469618433453e-06, "loss": 0.3793, "step": 41071 }, { "epoch": 1.8848148318112983, "grad_norm": 0.43060028553009033, "learning_rate": 3.1494191836123643e-06, "loss": 0.3091, "step": 41072 }, { "epoch": 1.8848607223165526, "grad_norm": 0.5120705366134644, "learning_rate": 3.1491914098314458e-06, "loss": 0.4497, "step": 41073 }, { "epoch": 1.884906612821807, "grad_norm": 0.44911623001098633, "learning_rate": 3.1489636405011386e-06, "loss": 0.3402, "step": 41074 }, { "epoch": 1.8849525033270615, "grad_norm": 0.49238765239715576, "learning_rate": 3.148735875621991e-06, "loss": 0.3687, "step": 41075 }, { "epoch": 1.884998393832316, "grad_norm": 0.4685404598712921, "learning_rate": 3.1485081151945464e-06, "loss": 0.32, "step": 41076 }, { "epoch": 1.8850442843375705, "grad_norm": 0.51351398229599, "learning_rate": 3.1482803592193574e-06, "loss": 0.4102, "step": 41077 }, { "epoch": 1.885090174842825, "grad_norm": 0.4447522461414337, "learning_rate": 3.1480526076969697e-06, "loss": 0.2875, "step": 41078 }, { "epoch": 1.8851360653480795, "grad_norm": 0.4599219262599945, "learning_rate": 3.14782486062793e-06, "loss": 0.3353, "step": 41079 }, { "epoch": 1.885181955853334, "grad_norm": 0.4919465482234955, "learning_rate": 3.1475971180127877e-06, "loss": 0.37, "step": 41080 }, { "epoch": 1.8852278463585885, "grad_norm": 0.4829567074775696, "learning_rate": 3.14736937985209e-06, "loss": 0.358, "step": 41081 }, { "epoch": 1.885273736863843, "grad_norm": 0.4866565763950348, "learning_rate": 3.147141646146382e-06, "loss": 0.3741, "step": 41082 }, { "epoch": 1.8853196273690973, "grad_norm": 0.4796319901943207, "learning_rate": 3.146913916896216e-06, "loss": 0.3211, "step": 41083 }, { "epoch": 1.8853655178743518, "grad_norm": 0.5006620287895203, "learning_rate": 3.1466861921021367e-06, "loss": 0.3985, "step": 41084 }, { "epoch": 1.8854114083796063, "grad_norm": 0.47266310453414917, "learning_rate": 3.1464584717646917e-06, "loss": 0.2927, "step": 41085 }, { "epoch": 1.8854572988848606, "grad_norm": 0.44848233461380005, "learning_rate": 3.1462307558844298e-06, "loss": 0.2782, "step": 41086 }, { "epoch": 1.885503189390115, "grad_norm": 0.4683627784252167, "learning_rate": 3.1460030444618973e-06, "loss": 0.388, "step": 41087 }, { "epoch": 1.8855490798953696, "grad_norm": 0.43223220109939575, "learning_rate": 3.1457753374976406e-06, "loss": 0.2855, "step": 41088 }, { "epoch": 1.885594970400624, "grad_norm": 0.4398265480995178, "learning_rate": 3.145547634992212e-06, "loss": 0.2903, "step": 41089 }, { "epoch": 1.8856408609058786, "grad_norm": 0.47206953167915344, "learning_rate": 3.145319936946155e-06, "loss": 0.3422, "step": 41090 }, { "epoch": 1.885686751411133, "grad_norm": 0.46676135063171387, "learning_rate": 3.1450922433600175e-06, "loss": 0.3552, "step": 41091 }, { "epoch": 1.8857326419163876, "grad_norm": 0.45267415046691895, "learning_rate": 3.144864554234349e-06, "loss": 0.316, "step": 41092 }, { "epoch": 1.885778532421642, "grad_norm": 0.48060664534568787, "learning_rate": 3.1446368695696955e-06, "loss": 0.4128, "step": 41093 }, { "epoch": 1.8858244229268966, "grad_norm": 0.474813312292099, "learning_rate": 3.1444091893666044e-06, "loss": 0.3638, "step": 41094 }, { "epoch": 1.8858703134321508, "grad_norm": 0.4663175642490387, "learning_rate": 3.144181513625622e-06, "loss": 0.3778, "step": 41095 }, { "epoch": 1.8859162039374053, "grad_norm": 0.4853506088256836, "learning_rate": 3.143953842347299e-06, "loss": 0.3867, "step": 41096 }, { "epoch": 1.8859620944426598, "grad_norm": 0.4963219165802002, "learning_rate": 3.143726175532183e-06, "loss": 0.4107, "step": 41097 }, { "epoch": 1.8860079849479143, "grad_norm": 0.4717501997947693, "learning_rate": 3.143498513180816e-06, "loss": 0.3985, "step": 41098 }, { "epoch": 1.8860538754531686, "grad_norm": 0.488343745470047, "learning_rate": 3.1432708552937513e-06, "loss": 0.3744, "step": 41099 }, { "epoch": 1.886099765958423, "grad_norm": 0.43021586537361145, "learning_rate": 3.143043201871534e-06, "loss": 0.3084, "step": 41100 }, { "epoch": 1.8861456564636776, "grad_norm": 0.4764815866947174, "learning_rate": 3.1428155529147108e-06, "loss": 0.4293, "step": 41101 }, { "epoch": 1.886191546968932, "grad_norm": 0.4853217303752899, "learning_rate": 3.142587908423832e-06, "loss": 0.422, "step": 41102 }, { "epoch": 1.8862374374741866, "grad_norm": 0.45550957322120667, "learning_rate": 3.142360268399442e-06, "loss": 0.3156, "step": 41103 }, { "epoch": 1.886283327979441, "grad_norm": 0.45159295201301575, "learning_rate": 3.1421326328420877e-06, "loss": 0.3281, "step": 41104 }, { "epoch": 1.8863292184846956, "grad_norm": 0.4706265330314636, "learning_rate": 3.1419050017523196e-06, "loss": 0.3621, "step": 41105 }, { "epoch": 1.88637510898995, "grad_norm": 0.4876958131790161, "learning_rate": 3.1416773751306837e-06, "loss": 0.3979, "step": 41106 }, { "epoch": 1.8864209994952046, "grad_norm": 0.4814624488353729, "learning_rate": 3.1414497529777265e-06, "loss": 0.2631, "step": 41107 }, { "epoch": 1.8864668900004589, "grad_norm": 0.4697282314300537, "learning_rate": 3.141222135293997e-06, "loss": 0.3668, "step": 41108 }, { "epoch": 1.8865127805057134, "grad_norm": 0.4303424656391144, "learning_rate": 3.1409945220800416e-06, "loss": 0.2821, "step": 41109 }, { "epoch": 1.8865586710109679, "grad_norm": 0.5009654760360718, "learning_rate": 3.1407669133364053e-06, "loss": 0.3547, "step": 41110 }, { "epoch": 1.8866045615162221, "grad_norm": 0.49545609951019287, "learning_rate": 3.1405393090636406e-06, "loss": 0.3871, "step": 41111 }, { "epoch": 1.8866504520214766, "grad_norm": 0.5043784379959106, "learning_rate": 3.1403117092622908e-06, "loss": 0.4654, "step": 41112 }, { "epoch": 1.8866963425267311, "grad_norm": 0.4593999981880188, "learning_rate": 3.140084113932904e-06, "loss": 0.3082, "step": 41113 }, { "epoch": 1.8867422330319856, "grad_norm": 0.470515638589859, "learning_rate": 3.139856523076029e-06, "loss": 0.3537, "step": 41114 }, { "epoch": 1.8867881235372401, "grad_norm": 0.5026741027832031, "learning_rate": 3.139628936692212e-06, "loss": 0.3472, "step": 41115 }, { "epoch": 1.8868340140424946, "grad_norm": 0.4406096339225769, "learning_rate": 3.1394013547819986e-06, "loss": 0.293, "step": 41116 }, { "epoch": 1.8868799045477491, "grad_norm": 0.4873218238353729, "learning_rate": 3.1391737773459393e-06, "loss": 0.3715, "step": 41117 }, { "epoch": 1.8869257950530036, "grad_norm": 0.4864557087421417, "learning_rate": 3.1389462043845797e-06, "loss": 0.4058, "step": 41118 }, { "epoch": 1.8869716855582581, "grad_norm": 0.47774580121040344, "learning_rate": 3.138718635898466e-06, "loss": 0.3288, "step": 41119 }, { "epoch": 1.8870175760635126, "grad_norm": 0.4742848575115204, "learning_rate": 3.138491071888148e-06, "loss": 0.3673, "step": 41120 }, { "epoch": 1.887063466568767, "grad_norm": 0.5039926767349243, "learning_rate": 3.138263512354171e-06, "loss": 0.4256, "step": 41121 }, { "epoch": 1.8871093570740214, "grad_norm": 0.5023134350776672, "learning_rate": 3.138035957297082e-06, "loss": 0.3829, "step": 41122 }, { "epoch": 1.8871552475792759, "grad_norm": 0.4216409921646118, "learning_rate": 3.1378084067174298e-06, "loss": 0.3106, "step": 41123 }, { "epoch": 1.8872011380845302, "grad_norm": 0.5173583030700684, "learning_rate": 3.137580860615761e-06, "loss": 0.4224, "step": 41124 }, { "epoch": 1.8872470285897847, "grad_norm": 0.46903419494628906, "learning_rate": 3.137353318992622e-06, "loss": 0.3681, "step": 41125 }, { "epoch": 1.8872929190950392, "grad_norm": 0.4946678578853607, "learning_rate": 3.137125781848558e-06, "loss": 0.397, "step": 41126 }, { "epoch": 1.8873388096002937, "grad_norm": 0.4374212324619293, "learning_rate": 3.136898249184121e-06, "loss": 0.3165, "step": 41127 }, { "epoch": 1.8873847001055482, "grad_norm": 0.4598016142845154, "learning_rate": 3.136670720999856e-06, "loss": 0.3139, "step": 41128 }, { "epoch": 1.8874305906108026, "grad_norm": 0.4759996831417084, "learning_rate": 3.1364431972963083e-06, "loss": 0.368, "step": 41129 }, { "epoch": 1.8874764811160571, "grad_norm": 0.46867337822914124, "learning_rate": 3.1362156780740278e-06, "loss": 0.3628, "step": 41130 }, { "epoch": 1.8875223716213116, "grad_norm": 0.4929969608783722, "learning_rate": 3.1359881633335604e-06, "loss": 0.3668, "step": 41131 }, { "epoch": 1.8875682621265661, "grad_norm": 0.46970969438552856, "learning_rate": 3.1357606530754516e-06, "loss": 0.3651, "step": 41132 }, { "epoch": 1.8876141526318206, "grad_norm": 0.5175648331642151, "learning_rate": 3.135533147300251e-06, "loss": 0.4367, "step": 41133 }, { "epoch": 1.887660043137075, "grad_norm": 0.4960569441318512, "learning_rate": 3.1353056460085053e-06, "loss": 0.3887, "step": 41134 }, { "epoch": 1.8877059336423294, "grad_norm": 0.4537118971347809, "learning_rate": 3.1350781492007596e-06, "loss": 0.3115, "step": 41135 }, { "epoch": 1.887751824147584, "grad_norm": 0.45467454195022583, "learning_rate": 3.134850656877564e-06, "loss": 0.3477, "step": 41136 }, { "epoch": 1.8877977146528382, "grad_norm": 0.49898117780685425, "learning_rate": 3.1346231690394642e-06, "loss": 0.3703, "step": 41137 }, { "epoch": 1.8878436051580927, "grad_norm": 0.5164281129837036, "learning_rate": 3.1343956856870038e-06, "loss": 0.4199, "step": 41138 }, { "epoch": 1.8878894956633472, "grad_norm": 0.4712604880332947, "learning_rate": 3.1341682068207356e-06, "loss": 0.38, "step": 41139 }, { "epoch": 1.8879353861686017, "grad_norm": 0.47745200991630554, "learning_rate": 3.133940732441204e-06, "loss": 0.3138, "step": 41140 }, { "epoch": 1.8879812766738562, "grad_norm": 0.46911901235580444, "learning_rate": 3.133713262548955e-06, "loss": 0.3767, "step": 41141 }, { "epoch": 1.8880271671791107, "grad_norm": 0.43628227710723877, "learning_rate": 3.1334857971445383e-06, "loss": 0.2957, "step": 41142 }, { "epoch": 1.8880730576843652, "grad_norm": 0.49944472312927246, "learning_rate": 3.1332583362284983e-06, "loss": 0.3996, "step": 41143 }, { "epoch": 1.8881189481896197, "grad_norm": 0.5038762092590332, "learning_rate": 3.133030879801382e-06, "loss": 0.4479, "step": 41144 }, { "epoch": 1.8881648386948742, "grad_norm": 0.4964870810508728, "learning_rate": 3.1328034278637383e-06, "loss": 0.4066, "step": 41145 }, { "epoch": 1.8882107292001284, "grad_norm": 0.4704322814941406, "learning_rate": 3.1325759804161125e-06, "loss": 0.3971, "step": 41146 }, { "epoch": 1.888256619705383, "grad_norm": 0.42865249514579773, "learning_rate": 3.132348537459051e-06, "loss": 0.28, "step": 41147 }, { "epoch": 1.8883025102106374, "grad_norm": 0.5313159227371216, "learning_rate": 3.1321210989931026e-06, "loss": 0.4086, "step": 41148 }, { "epoch": 1.8883484007158917, "grad_norm": 0.47520172595977783, "learning_rate": 3.131893665018815e-06, "loss": 0.4053, "step": 41149 }, { "epoch": 1.8883942912211462, "grad_norm": 0.45428359508514404, "learning_rate": 3.1316662355367318e-06, "loss": 0.351, "step": 41150 }, { "epoch": 1.8884401817264007, "grad_norm": 0.49090445041656494, "learning_rate": 3.1314388105474024e-06, "loss": 0.4065, "step": 41151 }, { "epoch": 1.8884860722316552, "grad_norm": 0.47239595651626587, "learning_rate": 3.131211390051373e-06, "loss": 0.373, "step": 41152 }, { "epoch": 1.8885319627369097, "grad_norm": 0.4654393494129181, "learning_rate": 3.1309839740491888e-06, "loss": 0.3068, "step": 41153 }, { "epoch": 1.8885778532421642, "grad_norm": 0.464420348405838, "learning_rate": 3.1307565625413992e-06, "loss": 0.3623, "step": 41154 }, { "epoch": 1.8886237437474187, "grad_norm": 0.46534568071365356, "learning_rate": 3.130529155528551e-06, "loss": 0.3512, "step": 41155 }, { "epoch": 1.8886696342526732, "grad_norm": 0.4454861283302307, "learning_rate": 3.1303017530111895e-06, "loss": 0.2926, "step": 41156 }, { "epoch": 1.8887155247579277, "grad_norm": 0.491476446390152, "learning_rate": 3.130074354989863e-06, "loss": 0.3885, "step": 41157 }, { "epoch": 1.8887614152631822, "grad_norm": 0.4414600133895874, "learning_rate": 3.1298469614651173e-06, "loss": 0.3222, "step": 41158 }, { "epoch": 1.8888073057684365, "grad_norm": 0.4600023031234741, "learning_rate": 3.129619572437499e-06, "loss": 0.3221, "step": 41159 }, { "epoch": 1.888853196273691, "grad_norm": 0.47206708788871765, "learning_rate": 3.129392187907554e-06, "loss": 0.3631, "step": 41160 }, { "epoch": 1.8888990867789455, "grad_norm": 0.4247168302536011, "learning_rate": 3.129164807875832e-06, "loss": 0.2685, "step": 41161 }, { "epoch": 1.8889449772841997, "grad_norm": 0.45369696617126465, "learning_rate": 3.1289374323428777e-06, "loss": 0.3454, "step": 41162 }, { "epoch": 1.8889908677894542, "grad_norm": 0.503206193447113, "learning_rate": 3.128710061309238e-06, "loss": 0.4105, "step": 41163 }, { "epoch": 1.8890367582947087, "grad_norm": 0.4733751714229584, "learning_rate": 3.128482694775461e-06, "loss": 0.4266, "step": 41164 }, { "epoch": 1.8890826487999632, "grad_norm": 0.5182019472122192, "learning_rate": 3.1282553327420927e-06, "loss": 0.4561, "step": 41165 }, { "epoch": 1.8891285393052177, "grad_norm": 0.5134178400039673, "learning_rate": 3.1280279752096787e-06, "loss": 0.4314, "step": 41166 }, { "epoch": 1.8891744298104722, "grad_norm": 0.47933104634284973, "learning_rate": 3.127800622178767e-06, "loss": 0.3303, "step": 41167 }, { "epoch": 1.8892203203157267, "grad_norm": 0.47664764523506165, "learning_rate": 3.127573273649904e-06, "loss": 0.3569, "step": 41168 }, { "epoch": 1.8892662108209812, "grad_norm": 0.4786016345024109, "learning_rate": 3.127345929623634e-06, "loss": 0.3714, "step": 41169 }, { "epoch": 1.8893121013262357, "grad_norm": 0.5402268171310425, "learning_rate": 3.1271185901005085e-06, "loss": 0.274, "step": 41170 }, { "epoch": 1.8893579918314902, "grad_norm": 0.452197790145874, "learning_rate": 3.126891255081072e-06, "loss": 0.3019, "step": 41171 }, { "epoch": 1.8894038823367445, "grad_norm": 0.46202072501182556, "learning_rate": 3.1266639245658693e-06, "loss": 0.3485, "step": 41172 }, { "epoch": 1.889449772841999, "grad_norm": 0.4801209270954132, "learning_rate": 3.12643659855545e-06, "loss": 0.3533, "step": 41173 }, { "epoch": 1.8894956633472535, "grad_norm": 0.4799911677837372, "learning_rate": 3.1262092770503594e-06, "loss": 0.3139, "step": 41174 }, { "epoch": 1.8895415538525078, "grad_norm": 0.5055767893791199, "learning_rate": 3.1259819600511413e-06, "loss": 0.3819, "step": 41175 }, { "epoch": 1.8895874443577623, "grad_norm": 0.47848111391067505, "learning_rate": 3.1257546475583477e-06, "loss": 0.3693, "step": 41176 }, { "epoch": 1.8896333348630168, "grad_norm": 0.46945539116859436, "learning_rate": 3.125527339572523e-06, "loss": 0.3297, "step": 41177 }, { "epoch": 1.8896792253682713, "grad_norm": 0.4737134277820587, "learning_rate": 3.125300036094212e-06, "loss": 0.3501, "step": 41178 }, { "epoch": 1.8897251158735258, "grad_norm": 0.5007602572441101, "learning_rate": 3.125072737123964e-06, "loss": 0.3514, "step": 41179 }, { "epoch": 1.8897710063787803, "grad_norm": 0.46914568543434143, "learning_rate": 3.1248454426623243e-06, "loss": 0.3264, "step": 41180 }, { "epoch": 1.8898168968840348, "grad_norm": 0.4685961902141571, "learning_rate": 3.1246181527098372e-06, "loss": 0.3648, "step": 41181 }, { "epoch": 1.8898627873892893, "grad_norm": 0.45776647329330444, "learning_rate": 3.1243908672670535e-06, "loss": 0.3161, "step": 41182 }, { "epoch": 1.8899086778945438, "grad_norm": 0.49521705508232117, "learning_rate": 3.1241635863345182e-06, "loss": 0.3615, "step": 41183 }, { "epoch": 1.889954568399798, "grad_norm": 0.5050050020217896, "learning_rate": 3.123936309912776e-06, "loss": 0.3841, "step": 41184 }, { "epoch": 1.8900004589050525, "grad_norm": 0.4600909650325775, "learning_rate": 3.1237090380023766e-06, "loss": 0.324, "step": 41185 }, { "epoch": 1.890046349410307, "grad_norm": 0.44461163878440857, "learning_rate": 3.1234817706038646e-06, "loss": 0.3337, "step": 41186 }, { "epoch": 1.8900922399155615, "grad_norm": 0.47812700271606445, "learning_rate": 3.123254507717784e-06, "loss": 0.3798, "step": 41187 }, { "epoch": 1.8901381304208158, "grad_norm": 0.4376629590988159, "learning_rate": 3.123027249344688e-06, "loss": 0.3105, "step": 41188 }, { "epoch": 1.8901840209260703, "grad_norm": 0.4467106759548187, "learning_rate": 3.1227999954851174e-06, "loss": 0.3124, "step": 41189 }, { "epoch": 1.8902299114313248, "grad_norm": 0.45303601026535034, "learning_rate": 3.1225727461396207e-06, "loss": 0.335, "step": 41190 }, { "epoch": 1.8902758019365793, "grad_norm": 0.43162739276885986, "learning_rate": 3.1223455013087413e-06, "loss": 0.3018, "step": 41191 }, { "epoch": 1.8903216924418338, "grad_norm": 0.5173735618591309, "learning_rate": 3.122118260993031e-06, "loss": 0.4223, "step": 41192 }, { "epoch": 1.8903675829470883, "grad_norm": 0.49723488092422485, "learning_rate": 3.1218910251930334e-06, "loss": 0.4246, "step": 41193 }, { "epoch": 1.8904134734523428, "grad_norm": 0.47979727387428284, "learning_rate": 3.121663793909294e-06, "loss": 0.3515, "step": 41194 }, { "epoch": 1.8904593639575973, "grad_norm": 0.47939544916152954, "learning_rate": 3.1214365671423613e-06, "loss": 0.3536, "step": 41195 }, { "epoch": 1.8905052544628518, "grad_norm": 0.489620178937912, "learning_rate": 3.121209344892781e-06, "loss": 0.3915, "step": 41196 }, { "epoch": 1.890551144968106, "grad_norm": 0.42947274446487427, "learning_rate": 3.1209821271610975e-06, "loss": 0.2993, "step": 41197 }, { "epoch": 1.8905970354733606, "grad_norm": 0.47395825386047363, "learning_rate": 3.1207549139478604e-06, "loss": 0.3827, "step": 41198 }, { "epoch": 1.890642925978615, "grad_norm": 0.45382463932037354, "learning_rate": 3.1205277052536153e-06, "loss": 0.3151, "step": 41199 }, { "epoch": 1.8906888164838693, "grad_norm": 0.4740877151489258, "learning_rate": 3.1203005010789062e-06, "loss": 0.35, "step": 41200 }, { "epoch": 1.8907347069891238, "grad_norm": 0.49499279260635376, "learning_rate": 3.1200733014242822e-06, "loss": 0.3585, "step": 41201 }, { "epoch": 1.8907805974943783, "grad_norm": 0.49770835041999817, "learning_rate": 3.119846106290289e-06, "loss": 0.4134, "step": 41202 }, { "epoch": 1.8908264879996328, "grad_norm": 0.43906986713409424, "learning_rate": 3.1196189156774704e-06, "loss": 0.2694, "step": 41203 }, { "epoch": 1.8908723785048873, "grad_norm": 0.4907604157924652, "learning_rate": 3.119391729586377e-06, "loss": 0.4021, "step": 41204 }, { "epoch": 1.8909182690101418, "grad_norm": 0.4510626196861267, "learning_rate": 3.1191645480175525e-06, "loss": 0.3285, "step": 41205 }, { "epoch": 1.8909641595153963, "grad_norm": 0.46524322032928467, "learning_rate": 3.118937370971543e-06, "loss": 0.3512, "step": 41206 }, { "epoch": 1.8910100500206508, "grad_norm": 0.443034827709198, "learning_rate": 3.1187101984488964e-06, "loss": 0.301, "step": 41207 }, { "epoch": 1.8910559405259053, "grad_norm": 0.438945472240448, "learning_rate": 3.1184830304501577e-06, "loss": 0.2988, "step": 41208 }, { "epoch": 1.8911018310311598, "grad_norm": 0.4736744165420532, "learning_rate": 3.1182558669758722e-06, "loss": 0.3938, "step": 41209 }, { "epoch": 1.891147721536414, "grad_norm": 0.4627220630645752, "learning_rate": 3.1180287080265896e-06, "loss": 0.3261, "step": 41210 }, { "epoch": 1.8911936120416686, "grad_norm": 0.4554779827594757, "learning_rate": 3.1178015536028537e-06, "loss": 0.3777, "step": 41211 }, { "epoch": 1.891239502546923, "grad_norm": 0.4913612902164459, "learning_rate": 3.1175744037052087e-06, "loss": 0.3542, "step": 41212 }, { "epoch": 1.8912853930521774, "grad_norm": 0.46813344955444336, "learning_rate": 3.1173472583342052e-06, "loss": 0.3623, "step": 41213 }, { "epoch": 1.8913312835574319, "grad_norm": 0.47300752997398376, "learning_rate": 3.1171201174903874e-06, "loss": 0.3688, "step": 41214 }, { "epoch": 1.8913771740626864, "grad_norm": 0.4756969213485718, "learning_rate": 3.1168929811743e-06, "loss": 0.3585, "step": 41215 }, { "epoch": 1.8914230645679408, "grad_norm": 0.49719470739364624, "learning_rate": 3.116665849386492e-06, "loss": 0.3913, "step": 41216 }, { "epoch": 1.8914689550731953, "grad_norm": 0.4546404778957367, "learning_rate": 3.116438722127508e-06, "loss": 0.3677, "step": 41217 }, { "epoch": 1.8915148455784498, "grad_norm": 0.49796062707901, "learning_rate": 3.1162115993978935e-06, "loss": 0.3948, "step": 41218 }, { "epoch": 1.8915607360837043, "grad_norm": 0.45443177223205566, "learning_rate": 3.1159844811981966e-06, "loss": 0.3339, "step": 41219 }, { "epoch": 1.8916066265889588, "grad_norm": 0.4904322028160095, "learning_rate": 3.1157573675289627e-06, "loss": 0.4108, "step": 41220 }, { "epoch": 1.8916525170942133, "grad_norm": 0.48188769817352295, "learning_rate": 3.1155302583907367e-06, "loss": 0.3704, "step": 41221 }, { "epoch": 1.8916984075994678, "grad_norm": 0.4205312132835388, "learning_rate": 3.115303153784066e-06, "loss": 0.2688, "step": 41222 }, { "epoch": 1.891744298104722, "grad_norm": 0.4634118676185608, "learning_rate": 3.115076053709497e-06, "loss": 0.4037, "step": 41223 }, { "epoch": 1.8917901886099766, "grad_norm": 0.45413535833358765, "learning_rate": 3.114848958167575e-06, "loss": 0.3101, "step": 41224 }, { "epoch": 1.891836079115231, "grad_norm": 0.5066378116607666, "learning_rate": 3.1146218671588445e-06, "loss": 0.4179, "step": 41225 }, { "epoch": 1.8918819696204854, "grad_norm": 0.5349546074867249, "learning_rate": 3.114394780683855e-06, "loss": 0.4787, "step": 41226 }, { "epoch": 1.8919278601257399, "grad_norm": 0.465223103761673, "learning_rate": 3.1141676987431512e-06, "loss": 0.3077, "step": 41227 }, { "epoch": 1.8919737506309944, "grad_norm": 0.4973445534706116, "learning_rate": 3.113940621337278e-06, "loss": 0.3871, "step": 41228 }, { "epoch": 1.8920196411362489, "grad_norm": 0.5124292969703674, "learning_rate": 3.113713548466783e-06, "loss": 0.3834, "step": 41229 }, { "epoch": 1.8920655316415034, "grad_norm": 0.46148982644081116, "learning_rate": 3.1134864801322117e-06, "loss": 0.3496, "step": 41230 }, { "epoch": 1.8921114221467579, "grad_norm": 0.4394114911556244, "learning_rate": 3.113259416334108e-06, "loss": 0.2817, "step": 41231 }, { "epoch": 1.8921573126520124, "grad_norm": 0.48006749153137207, "learning_rate": 3.1130323570730236e-06, "loss": 0.3798, "step": 41232 }, { "epoch": 1.8922032031572669, "grad_norm": 0.4865095317363739, "learning_rate": 3.1128053023494985e-06, "loss": 0.3768, "step": 41233 }, { "epoch": 1.8922490936625214, "grad_norm": 0.47790881991386414, "learning_rate": 3.11257825216408e-06, "loss": 0.3464, "step": 41234 }, { "epoch": 1.8922949841677756, "grad_norm": 0.4721854329109192, "learning_rate": 3.112351206517317e-06, "loss": 0.3443, "step": 41235 }, { "epoch": 1.8923408746730301, "grad_norm": 0.4781986176967621, "learning_rate": 3.1121241654097536e-06, "loss": 0.3303, "step": 41236 }, { "epoch": 1.8923867651782846, "grad_norm": 0.46346694231033325, "learning_rate": 3.1118971288419346e-06, "loss": 0.3485, "step": 41237 }, { "epoch": 1.892432655683539, "grad_norm": 0.49548622965812683, "learning_rate": 3.1116700968144088e-06, "loss": 0.3765, "step": 41238 }, { "epoch": 1.8924785461887934, "grad_norm": 0.519116997718811, "learning_rate": 3.1114430693277194e-06, "loss": 0.3722, "step": 41239 }, { "epoch": 1.892524436694048, "grad_norm": 0.46004951000213623, "learning_rate": 3.1112160463824116e-06, "loss": 0.3107, "step": 41240 }, { "epoch": 1.8925703271993024, "grad_norm": 0.5163196921348572, "learning_rate": 3.1109890279790355e-06, "loss": 0.3601, "step": 41241 }, { "epoch": 1.892616217704557, "grad_norm": 0.4503540098667145, "learning_rate": 3.110762014118134e-06, "loss": 0.3353, "step": 41242 }, { "epoch": 1.8926621082098114, "grad_norm": 0.47328004240989685, "learning_rate": 3.110535004800253e-06, "loss": 0.3617, "step": 41243 }, { "epoch": 1.892707998715066, "grad_norm": 0.5275623798370361, "learning_rate": 3.11030800002594e-06, "loss": 0.4354, "step": 41244 }, { "epoch": 1.8927538892203204, "grad_norm": 0.4528310298919678, "learning_rate": 3.11008099979574e-06, "loss": 0.298, "step": 41245 }, { "epoch": 1.892799779725575, "grad_norm": 0.512881338596344, "learning_rate": 3.1098540041101965e-06, "loss": 0.3709, "step": 41246 }, { "epoch": 1.8928456702308294, "grad_norm": 0.5487818121910095, "learning_rate": 3.109627012969859e-06, "loss": 0.3622, "step": 41247 }, { "epoch": 1.8928915607360837, "grad_norm": 0.5063799619674683, "learning_rate": 3.1094000263752734e-06, "loss": 0.3653, "step": 41248 }, { "epoch": 1.8929374512413382, "grad_norm": 0.45389240980148315, "learning_rate": 3.1091730443269817e-06, "loss": 0.3706, "step": 41249 }, { "epoch": 1.8929833417465927, "grad_norm": 0.4750155508518219, "learning_rate": 3.108946066825533e-06, "loss": 0.3347, "step": 41250 }, { "epoch": 1.893029232251847, "grad_norm": 0.4781514108181, "learning_rate": 3.1087190938714733e-06, "loss": 0.411, "step": 41251 }, { "epoch": 1.8930751227571014, "grad_norm": 0.47568875551223755, "learning_rate": 3.1084921254653443e-06, "loss": 0.3692, "step": 41252 }, { "epoch": 1.893121013262356, "grad_norm": 0.4677448570728302, "learning_rate": 3.1082651616076975e-06, "loss": 0.3596, "step": 41253 }, { "epoch": 1.8931669037676104, "grad_norm": 0.4624066948890686, "learning_rate": 3.1080382022990763e-06, "loss": 0.3178, "step": 41254 }, { "epoch": 1.893212794272865, "grad_norm": 0.4592594504356384, "learning_rate": 3.107811247540026e-06, "loss": 0.3263, "step": 41255 }, { "epoch": 1.8932586847781194, "grad_norm": 0.44488954544067383, "learning_rate": 3.1075842973310893e-06, "loss": 0.3203, "step": 41256 }, { "epoch": 1.893304575283374, "grad_norm": 0.46758103370666504, "learning_rate": 3.107357351672818e-06, "loss": 0.3454, "step": 41257 }, { "epoch": 1.8933504657886284, "grad_norm": 0.4891447424888611, "learning_rate": 3.107130410565754e-06, "loss": 0.3765, "step": 41258 }, { "epoch": 1.893396356293883, "grad_norm": 0.4446899890899658, "learning_rate": 3.1069034740104443e-06, "loss": 0.2949, "step": 41259 }, { "epoch": 1.8934422467991374, "grad_norm": 0.49757182598114014, "learning_rate": 3.106676542007434e-06, "loss": 0.4049, "step": 41260 }, { "epoch": 1.8934881373043917, "grad_norm": 0.45456501841545105, "learning_rate": 3.10644961455727e-06, "loss": 0.328, "step": 41261 }, { "epoch": 1.8935340278096462, "grad_norm": 0.4532433748245239, "learning_rate": 3.106222691660494e-06, "loss": 0.3442, "step": 41262 }, { "epoch": 1.8935799183149007, "grad_norm": 0.44578391313552856, "learning_rate": 3.1059957733176575e-06, "loss": 0.3056, "step": 41263 }, { "epoch": 1.893625808820155, "grad_norm": 0.4677031338214874, "learning_rate": 3.1057688595293035e-06, "loss": 0.405, "step": 41264 }, { "epoch": 1.8936716993254095, "grad_norm": 0.45704105496406555, "learning_rate": 3.105541950295976e-06, "loss": 0.3168, "step": 41265 }, { "epoch": 1.893717589830664, "grad_norm": 0.5308483242988586, "learning_rate": 3.1053150456182235e-06, "loss": 0.4504, "step": 41266 }, { "epoch": 1.8937634803359185, "grad_norm": 0.4926720857620239, "learning_rate": 3.1050881454965902e-06, "loss": 0.4562, "step": 41267 }, { "epoch": 1.893809370841173, "grad_norm": 0.46268779039382935, "learning_rate": 3.10486124993162e-06, "loss": 0.334, "step": 41268 }, { "epoch": 1.8938552613464275, "grad_norm": 0.48834484815597534, "learning_rate": 3.1046343589238624e-06, "loss": 0.3575, "step": 41269 }, { "epoch": 1.893901151851682, "grad_norm": 0.44369015097618103, "learning_rate": 3.1044074724738614e-06, "loss": 0.3197, "step": 41270 }, { "epoch": 1.8939470423569364, "grad_norm": 0.4686090350151062, "learning_rate": 3.10418059058216e-06, "loss": 0.3683, "step": 41271 }, { "epoch": 1.893992932862191, "grad_norm": 0.45741167664527893, "learning_rate": 3.103953713249308e-06, "loss": 0.3265, "step": 41272 }, { "epoch": 1.8940388233674452, "grad_norm": 0.4879246652126312, "learning_rate": 3.1037268404758487e-06, "loss": 0.3848, "step": 41273 }, { "epoch": 1.8940847138726997, "grad_norm": 0.6793796420097351, "learning_rate": 3.1034999722623256e-06, "loss": 0.4359, "step": 41274 }, { "epoch": 1.8941306043779542, "grad_norm": 0.4598791003227234, "learning_rate": 3.103273108609289e-06, "loss": 0.3038, "step": 41275 }, { "epoch": 1.8941764948832087, "grad_norm": 0.48693540692329407, "learning_rate": 3.103046249517282e-06, "loss": 0.3599, "step": 41276 }, { "epoch": 1.894222385388463, "grad_norm": 0.42047783732414246, "learning_rate": 3.102819394986848e-06, "loss": 0.2703, "step": 41277 }, { "epoch": 1.8942682758937175, "grad_norm": 0.488725870847702, "learning_rate": 3.102592545018536e-06, "loss": 0.3713, "step": 41278 }, { "epoch": 1.894314166398972, "grad_norm": 0.41957828402519226, "learning_rate": 3.1023656996128905e-06, "loss": 0.2784, "step": 41279 }, { "epoch": 1.8943600569042265, "grad_norm": 0.5231638550758362, "learning_rate": 3.1021388587704553e-06, "loss": 0.4678, "step": 41280 }, { "epoch": 1.894405947409481, "grad_norm": 0.49268701672554016, "learning_rate": 3.1019120224917785e-06, "loss": 0.3901, "step": 41281 }, { "epoch": 1.8944518379147355, "grad_norm": 0.4971865713596344, "learning_rate": 3.1016851907774036e-06, "loss": 0.3854, "step": 41282 }, { "epoch": 1.89449772841999, "grad_norm": 0.4569043219089508, "learning_rate": 3.101458363627875e-06, "loss": 0.3276, "step": 41283 }, { "epoch": 1.8945436189252445, "grad_norm": 0.4339468777179718, "learning_rate": 3.101231541043742e-06, "loss": 0.2848, "step": 41284 }, { "epoch": 1.894589509430499, "grad_norm": 0.4613548517227173, "learning_rate": 3.101004723025548e-06, "loss": 0.335, "step": 41285 }, { "epoch": 1.8946353999357533, "grad_norm": 0.46577420830726624, "learning_rate": 3.100777909573837e-06, "loss": 0.3385, "step": 41286 }, { "epoch": 1.8946812904410077, "grad_norm": 0.5025984048843384, "learning_rate": 3.100551100689157e-06, "loss": 0.3984, "step": 41287 }, { "epoch": 1.8947271809462622, "grad_norm": 0.45395809412002563, "learning_rate": 3.100324296372052e-06, "loss": 0.2846, "step": 41288 }, { "epoch": 1.8947730714515165, "grad_norm": 0.616338312625885, "learning_rate": 3.100097496623068e-06, "loss": 0.3821, "step": 41289 }, { "epoch": 1.894818961956771, "grad_norm": 0.5045035481452942, "learning_rate": 3.0998707014427463e-06, "loss": 0.4621, "step": 41290 }, { "epoch": 1.8948648524620255, "grad_norm": 0.5059893131256104, "learning_rate": 3.0996439108316396e-06, "loss": 0.4147, "step": 41291 }, { "epoch": 1.89491074296728, "grad_norm": 0.4637081027030945, "learning_rate": 3.0994171247902887e-06, "loss": 0.3254, "step": 41292 }, { "epoch": 1.8949566334725345, "grad_norm": 0.4381686747074127, "learning_rate": 3.099190343319239e-06, "loss": 0.3061, "step": 41293 }, { "epoch": 1.895002523977789, "grad_norm": 0.4915980398654938, "learning_rate": 3.0989635664190374e-06, "loss": 0.3628, "step": 41294 }, { "epoch": 1.8950484144830435, "grad_norm": 0.44747138023376465, "learning_rate": 3.0987367940902284e-06, "loss": 0.2774, "step": 41295 }, { "epoch": 1.895094304988298, "grad_norm": 0.46230950951576233, "learning_rate": 3.0985100263333555e-06, "loss": 0.3386, "step": 41296 }, { "epoch": 1.8951401954935525, "grad_norm": 0.4835541844367981, "learning_rate": 3.0982832631489677e-06, "loss": 0.3719, "step": 41297 }, { "epoch": 1.895186085998807, "grad_norm": 0.47275274991989136, "learning_rate": 3.09805650453761e-06, "loss": 0.3345, "step": 41298 }, { "epoch": 1.8952319765040613, "grad_norm": 0.4877709746360779, "learning_rate": 3.0978297504998226e-06, "loss": 0.3793, "step": 41299 }, { "epoch": 1.8952778670093158, "grad_norm": 0.4718673825263977, "learning_rate": 3.097603001036157e-06, "loss": 0.316, "step": 41300 }, { "epoch": 1.8953237575145703, "grad_norm": 0.5149658918380737, "learning_rate": 3.097376256147155e-06, "loss": 0.3889, "step": 41301 }, { "epoch": 1.8953696480198245, "grad_norm": 0.47058600187301636, "learning_rate": 3.097149515833362e-06, "loss": 0.3772, "step": 41302 }, { "epoch": 1.895415538525079, "grad_norm": 0.5015552043914795, "learning_rate": 3.0969227800953242e-06, "loss": 0.4107, "step": 41303 }, { "epoch": 1.8954614290303335, "grad_norm": 0.49075189232826233, "learning_rate": 3.0966960489335874e-06, "loss": 0.415, "step": 41304 }, { "epoch": 1.895507319535588, "grad_norm": 0.48438775539398193, "learning_rate": 3.0964693223486936e-06, "loss": 0.3718, "step": 41305 }, { "epoch": 1.8955532100408425, "grad_norm": 0.5056449174880981, "learning_rate": 3.096242600341192e-06, "loss": 0.4169, "step": 41306 }, { "epoch": 1.895599100546097, "grad_norm": 0.47764265537261963, "learning_rate": 3.096015882911626e-06, "loss": 0.3948, "step": 41307 }, { "epoch": 1.8956449910513515, "grad_norm": 0.455504447221756, "learning_rate": 3.0957891700605404e-06, "loss": 0.3168, "step": 41308 }, { "epoch": 1.895690881556606, "grad_norm": 0.4424023926258087, "learning_rate": 3.0955624617884815e-06, "loss": 0.3338, "step": 41309 }, { "epoch": 1.8957367720618605, "grad_norm": 0.4814959466457367, "learning_rate": 3.095335758095994e-06, "loss": 0.4062, "step": 41310 }, { "epoch": 1.895782662567115, "grad_norm": 0.47504523396492004, "learning_rate": 3.0951090589836215e-06, "loss": 0.3702, "step": 41311 }, { "epoch": 1.8958285530723693, "grad_norm": 0.47444871068000793, "learning_rate": 3.0948823644519118e-06, "loss": 0.3807, "step": 41312 }, { "epoch": 1.8958744435776238, "grad_norm": 0.4389927387237549, "learning_rate": 3.094655674501409e-06, "loss": 0.3003, "step": 41313 }, { "epoch": 1.8959203340828783, "grad_norm": 0.448261559009552, "learning_rate": 3.0944289891326564e-06, "loss": 0.323, "step": 41314 }, { "epoch": 1.8959662245881326, "grad_norm": 0.5008325576782227, "learning_rate": 3.094202308346202e-06, "loss": 0.3856, "step": 41315 }, { "epoch": 1.896012115093387, "grad_norm": 0.48241421580314636, "learning_rate": 3.0939756321425896e-06, "loss": 0.4205, "step": 41316 }, { "epoch": 1.8960580055986416, "grad_norm": 0.46519938111305237, "learning_rate": 3.0937489605223625e-06, "loss": 0.3458, "step": 41317 }, { "epoch": 1.896103896103896, "grad_norm": 0.5029230713844299, "learning_rate": 3.0935222934860686e-06, "loss": 0.4233, "step": 41318 }, { "epoch": 1.8961497866091506, "grad_norm": 0.520279586315155, "learning_rate": 3.093295631034253e-06, "loss": 0.4367, "step": 41319 }, { "epoch": 1.896195677114405, "grad_norm": 0.48823970556259155, "learning_rate": 3.093068973167458e-06, "loss": 0.4057, "step": 41320 }, { "epoch": 1.8962415676196596, "grad_norm": 0.492681086063385, "learning_rate": 3.0928423198862317e-06, "loss": 0.3797, "step": 41321 }, { "epoch": 1.896287458124914, "grad_norm": 0.4697648584842682, "learning_rate": 3.092615671191117e-06, "loss": 0.351, "step": 41322 }, { "epoch": 1.8963333486301686, "grad_norm": 0.45003649592399597, "learning_rate": 3.0923890270826606e-06, "loss": 0.3104, "step": 41323 }, { "epoch": 1.8963792391354228, "grad_norm": 0.4379443824291229, "learning_rate": 3.092162387561405e-06, "loss": 0.2739, "step": 41324 }, { "epoch": 1.8964251296406773, "grad_norm": 0.4780958592891693, "learning_rate": 3.091935752627898e-06, "loss": 0.4001, "step": 41325 }, { "epoch": 1.8964710201459318, "grad_norm": 0.5721418857574463, "learning_rate": 3.0917091222826827e-06, "loss": 0.3488, "step": 41326 }, { "epoch": 1.896516910651186, "grad_norm": 0.4809184968471527, "learning_rate": 3.0914824965263035e-06, "loss": 0.3193, "step": 41327 }, { "epoch": 1.8965628011564406, "grad_norm": 0.47580093145370483, "learning_rate": 3.091255875359308e-06, "loss": 0.3565, "step": 41328 }, { "epoch": 1.896608691661695, "grad_norm": 0.42898058891296387, "learning_rate": 3.0910292587822398e-06, "loss": 0.2872, "step": 41329 }, { "epoch": 1.8966545821669496, "grad_norm": 0.5047294497489929, "learning_rate": 3.0908026467956424e-06, "loss": 0.369, "step": 41330 }, { "epoch": 1.896700472672204, "grad_norm": 0.532032310962677, "learning_rate": 3.0905760394000635e-06, "loss": 0.4555, "step": 41331 }, { "epoch": 1.8967463631774586, "grad_norm": 0.4287479817867279, "learning_rate": 3.090349436596046e-06, "loss": 0.3268, "step": 41332 }, { "epoch": 1.896792253682713, "grad_norm": 0.49266374111175537, "learning_rate": 3.090122838384134e-06, "loss": 0.3498, "step": 41333 }, { "epoch": 1.8968381441879676, "grad_norm": 0.4340055286884308, "learning_rate": 3.089896244764876e-06, "loss": 0.3046, "step": 41334 }, { "epoch": 1.896884034693222, "grad_norm": 0.49657538533210754, "learning_rate": 3.0896696557388143e-06, "loss": 0.3749, "step": 41335 }, { "epoch": 1.8969299251984766, "grad_norm": 0.4315204918384552, "learning_rate": 3.089443071306494e-06, "loss": 0.2831, "step": 41336 }, { "epoch": 1.8969758157037309, "grad_norm": 0.4750882685184479, "learning_rate": 3.08921649146846e-06, "loss": 0.3881, "step": 41337 }, { "epoch": 1.8970217062089854, "grad_norm": 0.4684036374092102, "learning_rate": 3.088989916225258e-06, "loss": 0.3728, "step": 41338 }, { "epoch": 1.8970675967142399, "grad_norm": 0.4727766215801239, "learning_rate": 3.088763345577429e-06, "loss": 0.3652, "step": 41339 }, { "epoch": 1.8971134872194941, "grad_norm": 0.4784647226333618, "learning_rate": 3.088536779525524e-06, "loss": 0.3527, "step": 41340 }, { "epoch": 1.8971593777247486, "grad_norm": 0.4774829149246216, "learning_rate": 3.088310218070084e-06, "loss": 0.3589, "step": 41341 }, { "epoch": 1.8972052682300031, "grad_norm": 0.466403603553772, "learning_rate": 3.088083661211655e-06, "loss": 0.3461, "step": 41342 }, { "epoch": 1.8972511587352576, "grad_norm": 0.46614235639572144, "learning_rate": 3.0878571089507813e-06, "loss": 0.347, "step": 41343 }, { "epoch": 1.8972970492405121, "grad_norm": 0.4803677499294281, "learning_rate": 3.0876305612880074e-06, "loss": 0.3675, "step": 41344 }, { "epoch": 1.8973429397457666, "grad_norm": 0.4387318789958954, "learning_rate": 3.0874040182238787e-06, "loss": 0.2991, "step": 41345 }, { "epoch": 1.8973888302510211, "grad_norm": 0.45115095376968384, "learning_rate": 3.0871774797589394e-06, "loss": 0.3493, "step": 41346 }, { "epoch": 1.8974347207562756, "grad_norm": 0.4409339427947998, "learning_rate": 3.086950945893735e-06, "loss": 0.3319, "step": 41347 }, { "epoch": 1.8974806112615301, "grad_norm": 0.4229160249233246, "learning_rate": 3.086724416628808e-06, "loss": 0.2514, "step": 41348 }, { "epoch": 1.8975265017667846, "grad_norm": 0.4249306917190552, "learning_rate": 3.086497891964707e-06, "loss": 0.2648, "step": 41349 }, { "epoch": 1.8975723922720389, "grad_norm": 0.44979265332221985, "learning_rate": 3.0862713719019745e-06, "loss": 0.2969, "step": 41350 }, { "epoch": 1.8976182827772934, "grad_norm": 0.5150836706161499, "learning_rate": 3.0860448564411545e-06, "loss": 0.4053, "step": 41351 }, { "epoch": 1.8976641732825479, "grad_norm": 0.4928245544433594, "learning_rate": 3.0858183455827935e-06, "loss": 0.3899, "step": 41352 }, { "epoch": 1.8977100637878022, "grad_norm": 0.5218529105186462, "learning_rate": 3.085591839327435e-06, "loss": 0.4374, "step": 41353 }, { "epoch": 1.8977559542930567, "grad_norm": 0.47860127687454224, "learning_rate": 3.0853653376756245e-06, "loss": 0.4128, "step": 41354 }, { "epoch": 1.8978018447983112, "grad_norm": 0.4865467846393585, "learning_rate": 3.085138840627903e-06, "loss": 0.3441, "step": 41355 }, { "epoch": 1.8978477353035657, "grad_norm": 0.4956628382205963, "learning_rate": 3.0849123481848207e-06, "loss": 0.3607, "step": 41356 }, { "epoch": 1.8978936258088202, "grad_norm": 0.4887450635433197, "learning_rate": 3.0846858603469204e-06, "loss": 0.3904, "step": 41357 }, { "epoch": 1.8979395163140746, "grad_norm": 0.48269015550613403, "learning_rate": 3.084459377114745e-06, "loss": 0.396, "step": 41358 }, { "epoch": 1.8979854068193291, "grad_norm": 0.4926266372203827, "learning_rate": 3.0842328984888402e-06, "loss": 0.3542, "step": 41359 }, { "epoch": 1.8980312973245836, "grad_norm": 0.4579259753227234, "learning_rate": 3.0840064244697522e-06, "loss": 0.3229, "step": 41360 }, { "epoch": 1.8980771878298381, "grad_norm": 0.48977261781692505, "learning_rate": 3.0837799550580203e-06, "loss": 0.3927, "step": 41361 }, { "epoch": 1.8981230783350924, "grad_norm": 0.471478670835495, "learning_rate": 3.0835534902541963e-06, "loss": 0.3622, "step": 41362 }, { "epoch": 1.898168968840347, "grad_norm": 0.47682449221611023, "learning_rate": 3.0833270300588207e-06, "loss": 0.3209, "step": 41363 }, { "epoch": 1.8982148593456014, "grad_norm": 0.44824767112731934, "learning_rate": 3.083100574472438e-06, "loss": 0.3045, "step": 41364 }, { "epoch": 1.898260749850856, "grad_norm": 0.4394513666629791, "learning_rate": 3.082874123495594e-06, "loss": 0.3019, "step": 41365 }, { "epoch": 1.8983066403561102, "grad_norm": 0.59175705909729, "learning_rate": 3.082647677128833e-06, "loss": 0.317, "step": 41366 }, { "epoch": 1.8983525308613647, "grad_norm": 0.4853340983390808, "learning_rate": 3.082421235372698e-06, "loss": 0.3961, "step": 41367 }, { "epoch": 1.8983984213666192, "grad_norm": 0.44530442357063293, "learning_rate": 3.0821947982277357e-06, "loss": 0.3147, "step": 41368 }, { "epoch": 1.8984443118718737, "grad_norm": 0.5093839168548584, "learning_rate": 3.0819683656944897e-06, "loss": 0.3993, "step": 41369 }, { "epoch": 1.8984902023771282, "grad_norm": 0.47955745458602905, "learning_rate": 3.0817419377735025e-06, "loss": 0.3868, "step": 41370 }, { "epoch": 1.8985360928823827, "grad_norm": 0.45728230476379395, "learning_rate": 3.0815155144653224e-06, "loss": 0.3433, "step": 41371 }, { "epoch": 1.8985819833876372, "grad_norm": 0.5199812650680542, "learning_rate": 3.081289095770492e-06, "loss": 0.4399, "step": 41372 }, { "epoch": 1.8986278738928917, "grad_norm": 0.4781954288482666, "learning_rate": 3.081062681689555e-06, "loss": 0.3955, "step": 41373 }, { "epoch": 1.8986737643981462, "grad_norm": 0.46593138575553894, "learning_rate": 3.0808362722230573e-06, "loss": 0.3334, "step": 41374 }, { "epoch": 1.8987196549034004, "grad_norm": 0.5000401139259338, "learning_rate": 3.080609867371543e-06, "loss": 0.3672, "step": 41375 }, { "epoch": 1.898765545408655, "grad_norm": 0.4434860050678253, "learning_rate": 3.0803834671355537e-06, "loss": 0.3083, "step": 41376 }, { "epoch": 1.8988114359139094, "grad_norm": 0.45429325103759766, "learning_rate": 3.0801570715156385e-06, "loss": 0.3505, "step": 41377 }, { "epoch": 1.8988573264191637, "grad_norm": 0.45842641592025757, "learning_rate": 3.0799306805123396e-06, "loss": 0.3183, "step": 41378 }, { "epoch": 1.8989032169244182, "grad_norm": 0.4495009183883667, "learning_rate": 3.079704294126201e-06, "loss": 0.3245, "step": 41379 }, { "epoch": 1.8989491074296727, "grad_norm": 0.45997318625450134, "learning_rate": 3.079477912357768e-06, "loss": 0.3187, "step": 41380 }, { "epoch": 1.8989949979349272, "grad_norm": 0.5034911036491394, "learning_rate": 3.079251535207585e-06, "loss": 0.3867, "step": 41381 }, { "epoch": 1.8990408884401817, "grad_norm": 0.513751745223999, "learning_rate": 3.0790251626761924e-06, "loss": 0.3804, "step": 41382 }, { "epoch": 1.8990867789454362, "grad_norm": 0.4622642993927002, "learning_rate": 3.0787987947641416e-06, "loss": 0.3111, "step": 41383 }, { "epoch": 1.8991326694506907, "grad_norm": 0.45239636301994324, "learning_rate": 3.0785724314719724e-06, "loss": 0.3239, "step": 41384 }, { "epoch": 1.8991785599559452, "grad_norm": 0.4533463418483734, "learning_rate": 3.0783460728002294e-06, "loss": 0.3062, "step": 41385 }, { "epoch": 1.8992244504611997, "grad_norm": 0.46175503730773926, "learning_rate": 3.078119718749459e-06, "loss": 0.3166, "step": 41386 }, { "epoch": 1.8992703409664542, "grad_norm": 0.463453471660614, "learning_rate": 3.077893369320204e-06, "loss": 0.2907, "step": 41387 }, { "epoch": 1.8993162314717085, "grad_norm": 0.46581870317459106, "learning_rate": 3.077667024513008e-06, "loss": 0.364, "step": 41388 }, { "epoch": 1.899362121976963, "grad_norm": 0.47171932458877563, "learning_rate": 3.0774406843284156e-06, "loss": 0.3762, "step": 41389 }, { "epoch": 1.8994080124822175, "grad_norm": 0.4148736298084259, "learning_rate": 3.0772143487669726e-06, "loss": 0.2718, "step": 41390 }, { "epoch": 1.8994539029874717, "grad_norm": 0.471902996301651, "learning_rate": 3.0769880178292224e-06, "loss": 0.3556, "step": 41391 }, { "epoch": 1.8994997934927262, "grad_norm": 0.46473681926727295, "learning_rate": 3.076761691515707e-06, "loss": 0.3541, "step": 41392 }, { "epoch": 1.8995456839979807, "grad_norm": 0.4966817796230316, "learning_rate": 3.076535369826975e-06, "loss": 0.3616, "step": 41393 }, { "epoch": 1.8995915745032352, "grad_norm": 0.47986501455307007, "learning_rate": 3.076309052763568e-06, "loss": 0.3725, "step": 41394 }, { "epoch": 1.8996374650084897, "grad_norm": 0.48514416813850403, "learning_rate": 3.0760827403260295e-06, "loss": 0.3471, "step": 41395 }, { "epoch": 1.8996833555137442, "grad_norm": 0.46562033891677856, "learning_rate": 3.0758564325149067e-06, "loss": 0.3352, "step": 41396 }, { "epoch": 1.8997292460189987, "grad_norm": 0.5356138944625854, "learning_rate": 3.0756301293307407e-06, "loss": 0.4204, "step": 41397 }, { "epoch": 1.8997751365242532, "grad_norm": 0.5223627686500549, "learning_rate": 3.0754038307740754e-06, "loss": 0.3927, "step": 41398 }, { "epoch": 1.8998210270295077, "grad_norm": 0.5011340379714966, "learning_rate": 3.0751775368454584e-06, "loss": 0.415, "step": 41399 }, { "epoch": 1.8998669175347622, "grad_norm": 0.4960937201976776, "learning_rate": 3.0749512475454317e-06, "loss": 0.374, "step": 41400 }, { "epoch": 1.8999128080400165, "grad_norm": 0.4673342704772949, "learning_rate": 3.074724962874539e-06, "loss": 0.3275, "step": 41401 }, { "epoch": 1.899958698545271, "grad_norm": 0.4454406499862671, "learning_rate": 3.0744986828333263e-06, "loss": 0.3398, "step": 41402 }, { "epoch": 1.9000045890505255, "grad_norm": 0.5034834146499634, "learning_rate": 3.074272407422336e-06, "loss": 0.403, "step": 41403 }, { "epoch": 1.9000504795557798, "grad_norm": 0.4703139662742615, "learning_rate": 3.0740461366421113e-06, "loss": 0.3676, "step": 41404 }, { "epoch": 1.9000963700610343, "grad_norm": 0.49312445521354675, "learning_rate": 3.0738198704931997e-06, "loss": 0.3433, "step": 41405 }, { "epoch": 1.9001422605662888, "grad_norm": 0.5127609372138977, "learning_rate": 3.073593608976143e-06, "loss": 0.3545, "step": 41406 }, { "epoch": 1.9001881510715433, "grad_norm": 0.48808130621910095, "learning_rate": 3.0733673520914853e-06, "loss": 0.4447, "step": 41407 }, { "epoch": 1.9002340415767978, "grad_norm": 0.44427812099456787, "learning_rate": 3.073141099839772e-06, "loss": 0.2883, "step": 41408 }, { "epoch": 1.9002799320820523, "grad_norm": 0.40720510482788086, "learning_rate": 3.0729148522215456e-06, "loss": 0.3052, "step": 41409 }, { "epoch": 1.9003258225873068, "grad_norm": 0.5038220882415771, "learning_rate": 3.07268860923735e-06, "loss": 0.3989, "step": 41410 }, { "epoch": 1.9003717130925613, "grad_norm": 0.49350884556770325, "learning_rate": 3.0724623708877315e-06, "loss": 0.3693, "step": 41411 }, { "epoch": 1.9004176035978158, "grad_norm": 0.4786263704299927, "learning_rate": 3.0722361371732323e-06, "loss": 0.3814, "step": 41412 }, { "epoch": 1.90046349410307, "grad_norm": 0.4418644607067108, "learning_rate": 3.072009908094395e-06, "loss": 0.3395, "step": 41413 }, { "epoch": 1.9005093846083245, "grad_norm": 0.4409025311470032, "learning_rate": 3.0717836836517675e-06, "loss": 0.3663, "step": 41414 }, { "epoch": 1.900555275113579, "grad_norm": 0.47292205691337585, "learning_rate": 3.071557463845891e-06, "loss": 0.32, "step": 41415 }, { "epoch": 1.9006011656188333, "grad_norm": 0.41654813289642334, "learning_rate": 3.0713312486773096e-06, "loss": 0.2831, "step": 41416 }, { "epoch": 1.9006470561240878, "grad_norm": 0.4755242168903351, "learning_rate": 3.071105038146569e-06, "loss": 0.3581, "step": 41417 }, { "epoch": 1.9006929466293423, "grad_norm": 0.5076202750205994, "learning_rate": 3.0708788322542115e-06, "loss": 0.4556, "step": 41418 }, { "epoch": 1.9007388371345968, "grad_norm": 0.4687141478061676, "learning_rate": 3.0706526310007817e-06, "loss": 0.3359, "step": 41419 }, { "epoch": 1.9007847276398513, "grad_norm": 0.4413447082042694, "learning_rate": 3.0704264343868216e-06, "loss": 0.2843, "step": 41420 }, { "epoch": 1.9008306181451058, "grad_norm": 0.42279112339019775, "learning_rate": 3.0702002424128787e-06, "loss": 0.2658, "step": 41421 }, { "epoch": 1.9008765086503603, "grad_norm": 0.4256211221218109, "learning_rate": 3.0699740550794956e-06, "loss": 0.3106, "step": 41422 }, { "epoch": 1.9009223991556148, "grad_norm": 0.4700208604335785, "learning_rate": 3.0697478723872144e-06, "loss": 0.3451, "step": 41423 }, { "epoch": 1.9009682896608693, "grad_norm": 0.4352574348449707, "learning_rate": 3.0695216943365814e-06, "loss": 0.2805, "step": 41424 }, { "epoch": 1.9010141801661238, "grad_norm": 0.428152471780777, "learning_rate": 3.0692955209281396e-06, "loss": 0.2823, "step": 41425 }, { "epoch": 1.901060070671378, "grad_norm": 0.49111267924308777, "learning_rate": 3.0690693521624306e-06, "loss": 0.404, "step": 41426 }, { "epoch": 1.9011059611766326, "grad_norm": 0.45435822010040283, "learning_rate": 3.0688431880400025e-06, "loss": 0.3155, "step": 41427 }, { "epoch": 1.901151851681887, "grad_norm": 0.4784923493862152, "learning_rate": 3.068617028561397e-06, "loss": 0.3716, "step": 41428 }, { "epoch": 1.9011977421871413, "grad_norm": 0.488253116607666, "learning_rate": 3.0683908737271566e-06, "loss": 0.3995, "step": 41429 }, { "epoch": 1.9012436326923958, "grad_norm": 0.450344055891037, "learning_rate": 3.068164723537828e-06, "loss": 0.3403, "step": 41430 }, { "epoch": 1.9012895231976503, "grad_norm": 0.41516032814979553, "learning_rate": 3.067938577993953e-06, "loss": 0.2663, "step": 41431 }, { "epoch": 1.9013354137029048, "grad_norm": 0.48087143898010254, "learning_rate": 3.0677124370960747e-06, "loss": 0.361, "step": 41432 }, { "epoch": 1.9013813042081593, "grad_norm": 0.49434953927993774, "learning_rate": 3.067486300844741e-06, "loss": 0.3945, "step": 41433 }, { "epoch": 1.9014271947134138, "grad_norm": 0.45705169439315796, "learning_rate": 3.0672601692404914e-06, "loss": 0.3053, "step": 41434 }, { "epoch": 1.9014730852186683, "grad_norm": 0.49436134099960327, "learning_rate": 3.0670340422838696e-06, "loss": 0.2696, "step": 41435 }, { "epoch": 1.9015189757239228, "grad_norm": 0.5084503293037415, "learning_rate": 3.066807919975422e-06, "loss": 0.4113, "step": 41436 }, { "epoch": 1.9015648662291773, "grad_norm": 0.5224766731262207, "learning_rate": 3.0665818023156924e-06, "loss": 0.4401, "step": 41437 }, { "epoch": 1.9016107567344318, "grad_norm": 0.47182920575141907, "learning_rate": 3.0663556893052216e-06, "loss": 0.3173, "step": 41438 }, { "epoch": 1.901656647239686, "grad_norm": 0.4812453091144562, "learning_rate": 3.0661295809445556e-06, "loss": 0.3652, "step": 41439 }, { "epoch": 1.9017025377449406, "grad_norm": 0.4881761372089386, "learning_rate": 3.065903477234239e-06, "loss": 0.366, "step": 41440 }, { "epoch": 1.901748428250195, "grad_norm": 0.4897553324699402, "learning_rate": 3.065677378174811e-06, "loss": 0.3015, "step": 41441 }, { "epoch": 1.9017943187554494, "grad_norm": 0.4538304805755615, "learning_rate": 3.0654512837668203e-06, "loss": 0.358, "step": 41442 }, { "epoch": 1.9018402092607039, "grad_norm": 0.5251752138137817, "learning_rate": 3.06522519401081e-06, "loss": 0.4637, "step": 41443 }, { "epoch": 1.9018860997659583, "grad_norm": 0.5094059109687805, "learning_rate": 3.0649991089073205e-06, "loss": 0.374, "step": 41444 }, { "epoch": 1.9019319902712128, "grad_norm": 0.46599093079566956, "learning_rate": 3.0647730284568987e-06, "loss": 0.3759, "step": 41445 }, { "epoch": 1.9019778807764673, "grad_norm": 0.48446768522262573, "learning_rate": 3.064546952660087e-06, "loss": 0.3129, "step": 41446 }, { "epoch": 1.9020237712817218, "grad_norm": 0.4529275894165039, "learning_rate": 3.0643208815174265e-06, "loss": 0.3102, "step": 41447 }, { "epoch": 1.9020696617869763, "grad_norm": 0.46129462122917175, "learning_rate": 3.0640948150294657e-06, "loss": 0.3313, "step": 41448 }, { "epoch": 1.9021155522922308, "grad_norm": 0.4423235356807709, "learning_rate": 3.063868753196746e-06, "loss": 0.3182, "step": 41449 }, { "epoch": 1.9021614427974853, "grad_norm": 0.477204829454422, "learning_rate": 3.0636426960198096e-06, "loss": 0.3557, "step": 41450 }, { "epoch": 1.9022073333027396, "grad_norm": 0.4457971751689911, "learning_rate": 3.0634166434992028e-06, "loss": 0.3309, "step": 41451 }, { "epoch": 1.902253223807994, "grad_norm": 0.501105010509491, "learning_rate": 3.063190595635468e-06, "loss": 0.451, "step": 41452 }, { "epoch": 1.9022991143132486, "grad_norm": 0.4747656285762787, "learning_rate": 3.062964552429148e-06, "loss": 0.3975, "step": 41453 }, { "epoch": 1.902345004818503, "grad_norm": 0.48892056941986084, "learning_rate": 3.0627385138807853e-06, "loss": 0.3437, "step": 41454 }, { "epoch": 1.9023908953237574, "grad_norm": 0.4740126430988312, "learning_rate": 3.0625124799909277e-06, "loss": 0.3934, "step": 41455 }, { "epoch": 1.9024367858290119, "grad_norm": 0.48836010694503784, "learning_rate": 3.062286450760115e-06, "loss": 0.4109, "step": 41456 }, { "epoch": 1.9024826763342664, "grad_norm": 0.5288827419281006, "learning_rate": 3.0620604261888897e-06, "loss": 0.4777, "step": 41457 }, { "epoch": 1.9025285668395209, "grad_norm": 0.4669460654258728, "learning_rate": 3.0618344062778007e-06, "loss": 0.3591, "step": 41458 }, { "epoch": 1.9025744573447754, "grad_norm": 0.49849891662597656, "learning_rate": 3.061608391027387e-06, "loss": 0.3892, "step": 41459 }, { "epoch": 1.9026203478500299, "grad_norm": 0.4655285179615021, "learning_rate": 3.0613823804381927e-06, "loss": 0.3949, "step": 41460 }, { "epoch": 1.9026662383552844, "grad_norm": 0.5049852132797241, "learning_rate": 3.061156374510763e-06, "loss": 0.3645, "step": 41461 }, { "epoch": 1.9027121288605389, "grad_norm": 0.4669059216976166, "learning_rate": 3.0609303732456397e-06, "loss": 0.3256, "step": 41462 }, { "epoch": 1.9027580193657934, "grad_norm": 0.47471845149993896, "learning_rate": 3.060704376643365e-06, "loss": 0.3812, "step": 41463 }, { "epoch": 1.9028039098710476, "grad_norm": 0.4622300863265991, "learning_rate": 3.060478384704487e-06, "loss": 0.3192, "step": 41464 }, { "epoch": 1.9028498003763021, "grad_norm": 0.4324879050254822, "learning_rate": 3.0602523974295462e-06, "loss": 0.3165, "step": 41465 }, { "epoch": 1.9028956908815566, "grad_norm": 0.5104799270629883, "learning_rate": 3.060026414819085e-06, "loss": 0.3818, "step": 41466 }, { "epoch": 1.902941581386811, "grad_norm": 0.4826866686344147, "learning_rate": 3.059800436873649e-06, "loss": 0.3372, "step": 41467 }, { "epoch": 1.9029874718920654, "grad_norm": 0.5172342658042908, "learning_rate": 3.0595744635937804e-06, "loss": 0.4503, "step": 41468 }, { "epoch": 1.90303336239732, "grad_norm": 0.5518525838851929, "learning_rate": 3.0593484949800214e-06, "loss": 0.4825, "step": 41469 }, { "epoch": 1.9030792529025744, "grad_norm": 0.48422640562057495, "learning_rate": 3.0591225310329176e-06, "loss": 0.3271, "step": 41470 }, { "epoch": 1.903125143407829, "grad_norm": 0.4536167085170746, "learning_rate": 3.0588965717530123e-06, "loss": 0.3303, "step": 41471 }, { "epoch": 1.9031710339130834, "grad_norm": 0.4635917544364929, "learning_rate": 3.058670617140847e-06, "loss": 0.3398, "step": 41472 }, { "epoch": 1.903216924418338, "grad_norm": 0.48019567131996155, "learning_rate": 3.058444667196967e-06, "loss": 0.3531, "step": 41473 }, { "epoch": 1.9032628149235924, "grad_norm": 0.42461317777633667, "learning_rate": 3.058218721921915e-06, "loss": 0.265, "step": 41474 }, { "epoch": 1.903308705428847, "grad_norm": 0.4585840106010437, "learning_rate": 3.0579927813162323e-06, "loss": 0.3601, "step": 41475 }, { "epoch": 1.9033545959341014, "grad_norm": 0.45265302062034607, "learning_rate": 3.0577668453804665e-06, "loss": 0.3083, "step": 41476 }, { "epoch": 1.9034004864393557, "grad_norm": 0.49232858419418335, "learning_rate": 3.0575409141151587e-06, "loss": 0.3907, "step": 41477 }, { "epoch": 1.9034463769446102, "grad_norm": 0.49260351061820984, "learning_rate": 3.0573149875208492e-06, "loss": 0.3712, "step": 41478 }, { "epoch": 1.9034922674498647, "grad_norm": 0.4469263255596161, "learning_rate": 3.0570890655980857e-06, "loss": 0.3363, "step": 41479 }, { "epoch": 1.903538157955119, "grad_norm": 0.47698330879211426, "learning_rate": 3.0568631483474098e-06, "loss": 0.4058, "step": 41480 }, { "epoch": 1.9035840484603734, "grad_norm": 0.4693165123462677, "learning_rate": 3.056637235769364e-06, "loss": 0.3347, "step": 41481 }, { "epoch": 1.903629938965628, "grad_norm": 0.4870064854621887, "learning_rate": 3.0564113278644935e-06, "loss": 0.3602, "step": 41482 }, { "epoch": 1.9036758294708824, "grad_norm": 0.48844945430755615, "learning_rate": 3.05618542463334e-06, "loss": 0.3658, "step": 41483 }, { "epoch": 1.903721719976137, "grad_norm": 0.44118231534957886, "learning_rate": 3.0559595260764472e-06, "loss": 0.3132, "step": 41484 }, { "epoch": 1.9037676104813914, "grad_norm": 0.506024956703186, "learning_rate": 3.0557336321943566e-06, "loss": 0.4547, "step": 41485 }, { "epoch": 1.903813500986646, "grad_norm": 0.4778393507003784, "learning_rate": 3.0555077429876147e-06, "loss": 0.3469, "step": 41486 }, { "epoch": 1.9038593914919004, "grad_norm": 0.5090705752372742, "learning_rate": 3.0552818584567623e-06, "loss": 0.4252, "step": 41487 }, { "epoch": 1.903905281997155, "grad_norm": 0.4378887414932251, "learning_rate": 3.0550559786023433e-06, "loss": 0.263, "step": 41488 }, { "epoch": 1.9039511725024094, "grad_norm": 0.4595848321914673, "learning_rate": 3.0548301034249014e-06, "loss": 0.298, "step": 41489 }, { "epoch": 1.9039970630076637, "grad_norm": 0.4499305188655853, "learning_rate": 3.054604232924979e-06, "loss": 0.3078, "step": 41490 }, { "epoch": 1.9040429535129182, "grad_norm": 0.5040925145149231, "learning_rate": 3.0543783671031173e-06, "loss": 0.4087, "step": 41491 }, { "epoch": 1.9040888440181727, "grad_norm": 0.4440968632698059, "learning_rate": 3.0541525059598644e-06, "loss": 0.2989, "step": 41492 }, { "epoch": 1.904134734523427, "grad_norm": 0.4526048004627228, "learning_rate": 3.0539266494957596e-06, "loss": 0.2973, "step": 41493 }, { "epoch": 1.9041806250286815, "grad_norm": 0.44277554750442505, "learning_rate": 3.0537007977113462e-06, "loss": 0.3152, "step": 41494 }, { "epoch": 1.904226515533936, "grad_norm": 0.45494958758354187, "learning_rate": 3.0534749506071694e-06, "loss": 0.3488, "step": 41495 }, { "epoch": 1.9042724060391905, "grad_norm": 0.5186036229133606, "learning_rate": 3.053249108183771e-06, "loss": 0.3605, "step": 41496 }, { "epoch": 1.904318296544445, "grad_norm": 0.44100144505500793, "learning_rate": 3.0530232704416923e-06, "loss": 0.327, "step": 41497 }, { "epoch": 1.9043641870496995, "grad_norm": 0.4666369557380676, "learning_rate": 3.0527974373814796e-06, "loss": 0.3326, "step": 41498 }, { "epoch": 1.904410077554954, "grad_norm": 0.4460645318031311, "learning_rate": 3.052571609003676e-06, "loss": 0.3105, "step": 41499 }, { "epoch": 1.9044559680602084, "grad_norm": 0.486577570438385, "learning_rate": 3.05234578530882e-06, "loss": 0.3805, "step": 41500 }, { "epoch": 1.904501858565463, "grad_norm": 0.46949371695518494, "learning_rate": 3.0521199662974595e-06, "loss": 0.3586, "step": 41501 }, { "epoch": 1.9045477490707172, "grad_norm": 0.4689438045024872, "learning_rate": 3.0518941519701356e-06, "loss": 0.3397, "step": 41502 }, { "epoch": 1.9045936395759717, "grad_norm": 0.5158957839012146, "learning_rate": 3.0516683423273906e-06, "loss": 0.4308, "step": 41503 }, { "epoch": 1.9046395300812262, "grad_norm": 0.9596680402755737, "learning_rate": 3.05144253736977e-06, "loss": 0.3019, "step": 41504 }, { "epoch": 1.9046854205864805, "grad_norm": 0.4527713656425476, "learning_rate": 3.0512167370978142e-06, "loss": 0.3126, "step": 41505 }, { "epoch": 1.904731311091735, "grad_norm": 0.4414074122905731, "learning_rate": 3.0509909415120655e-06, "loss": 0.31, "step": 41506 }, { "epoch": 1.9047772015969895, "grad_norm": 0.4565839171409607, "learning_rate": 3.05076515061307e-06, "loss": 0.3144, "step": 41507 }, { "epoch": 1.904823092102244, "grad_norm": 0.4432392716407776, "learning_rate": 3.050539364401369e-06, "loss": 0.3499, "step": 41508 }, { "epoch": 1.9048689826074985, "grad_norm": 0.49031075835227966, "learning_rate": 3.0503135828775056e-06, "loss": 0.3893, "step": 41509 }, { "epoch": 1.904914873112753, "grad_norm": 0.5095597505569458, "learning_rate": 3.0500878060420224e-06, "loss": 0.4622, "step": 41510 }, { "epoch": 1.9049607636180075, "grad_norm": 0.4964854121208191, "learning_rate": 3.0498620338954634e-06, "loss": 0.3939, "step": 41511 }, { "epoch": 1.905006654123262, "grad_norm": 0.49824872612953186, "learning_rate": 3.0496362664383684e-06, "loss": 0.3285, "step": 41512 }, { "epoch": 1.9050525446285165, "grad_norm": 0.46538910269737244, "learning_rate": 3.049410503671285e-06, "loss": 0.3906, "step": 41513 }, { "epoch": 1.905098435133771, "grad_norm": 0.4694610834121704, "learning_rate": 3.049184745594753e-06, "loss": 0.3739, "step": 41514 }, { "epoch": 1.9051443256390252, "grad_norm": 0.4684291481971741, "learning_rate": 3.048958992209316e-06, "loss": 0.3693, "step": 41515 }, { "epoch": 1.9051902161442797, "grad_norm": 0.5311850905418396, "learning_rate": 3.0487332435155175e-06, "loss": 0.3912, "step": 41516 }, { "epoch": 1.9052361066495342, "grad_norm": 0.4864659607410431, "learning_rate": 3.0485074995139e-06, "loss": 0.3635, "step": 41517 }, { "epoch": 1.9052819971547885, "grad_norm": 0.4855727255344391, "learning_rate": 3.0482817602050053e-06, "loss": 0.3532, "step": 41518 }, { "epoch": 1.905327887660043, "grad_norm": 0.4549660086631775, "learning_rate": 3.0480560255893754e-06, "loss": 0.3275, "step": 41519 }, { "epoch": 1.9053737781652975, "grad_norm": 0.4462898373603821, "learning_rate": 3.0478302956675566e-06, "loss": 0.3293, "step": 41520 }, { "epoch": 1.905419668670552, "grad_norm": 0.47286492586135864, "learning_rate": 3.047604570440091e-06, "loss": 0.3492, "step": 41521 }, { "epoch": 1.9054655591758065, "grad_norm": 0.45767542719841003, "learning_rate": 3.0473788499075167e-06, "loss": 0.35, "step": 41522 }, { "epoch": 1.905511449681061, "grad_norm": 0.48942410945892334, "learning_rate": 3.0471531340703824e-06, "loss": 0.3771, "step": 41523 }, { "epoch": 1.9055573401863155, "grad_norm": 0.4900580048561096, "learning_rate": 3.0469274229292288e-06, "loss": 0.3848, "step": 41524 }, { "epoch": 1.90560323069157, "grad_norm": 0.461749404668808, "learning_rate": 3.0467017164845967e-06, "loss": 0.3521, "step": 41525 }, { "epoch": 1.9056491211968245, "grad_norm": 0.45963889360427856, "learning_rate": 3.046476014737032e-06, "loss": 0.3239, "step": 41526 }, { "epoch": 1.905695011702079, "grad_norm": 0.4606442451477051, "learning_rate": 3.0462503176870755e-06, "loss": 0.3311, "step": 41527 }, { "epoch": 1.9057409022073333, "grad_norm": 0.4645827114582062, "learning_rate": 3.0460246253352688e-06, "loss": 0.3332, "step": 41528 }, { "epoch": 1.9057867927125878, "grad_norm": 0.451529860496521, "learning_rate": 3.045798937682158e-06, "loss": 0.3052, "step": 41529 }, { "epoch": 1.9058326832178423, "grad_norm": 0.4676174223423004, "learning_rate": 3.0455732547282835e-06, "loss": 0.343, "step": 41530 }, { "epoch": 1.9058785737230965, "grad_norm": 0.44746118783950806, "learning_rate": 3.045347576474188e-06, "loss": 0.3352, "step": 41531 }, { "epoch": 1.905924464228351, "grad_norm": 0.45558294653892517, "learning_rate": 3.045121902920416e-06, "loss": 0.3718, "step": 41532 }, { "epoch": 1.9059703547336055, "grad_norm": 0.4658662676811218, "learning_rate": 3.044896234067508e-06, "loss": 0.3726, "step": 41533 }, { "epoch": 1.90601624523886, "grad_norm": 0.46545907855033875, "learning_rate": 3.044670569916006e-06, "loss": 0.3307, "step": 41534 }, { "epoch": 1.9060621357441145, "grad_norm": 0.4706931412220001, "learning_rate": 3.0444449104664565e-06, "loss": 0.4031, "step": 41535 }, { "epoch": 1.906108026249369, "grad_norm": 0.4775809943675995, "learning_rate": 3.0442192557193996e-06, "loss": 0.3692, "step": 41536 }, { "epoch": 1.9061539167546235, "grad_norm": 0.48859256505966187, "learning_rate": 3.0439936056753766e-06, "loss": 0.3996, "step": 41537 }, { "epoch": 1.906199807259878, "grad_norm": 0.5069428086280823, "learning_rate": 3.0437679603349336e-06, "loss": 0.3975, "step": 41538 }, { "epoch": 1.9062456977651325, "grad_norm": 0.4373435378074646, "learning_rate": 3.0435423196986104e-06, "loss": 0.2881, "step": 41539 }, { "epoch": 1.9062915882703868, "grad_norm": 0.462407648563385, "learning_rate": 3.0433166837669483e-06, "loss": 0.3268, "step": 41540 }, { "epoch": 1.9063374787756413, "grad_norm": 0.4256834387779236, "learning_rate": 3.0430910525404946e-06, "loss": 0.3198, "step": 41541 }, { "epoch": 1.9063833692808958, "grad_norm": 0.47236669063568115, "learning_rate": 3.042865426019789e-06, "loss": 0.3555, "step": 41542 }, { "epoch": 1.9064292597861503, "grad_norm": 0.5328798294067383, "learning_rate": 3.042639804205374e-06, "loss": 0.4622, "step": 41543 }, { "epoch": 1.9064751502914046, "grad_norm": 0.44350722432136536, "learning_rate": 3.042414187097793e-06, "loss": 0.3081, "step": 41544 }, { "epoch": 1.906521040796659, "grad_norm": 0.4943241775035858, "learning_rate": 3.0421885746975877e-06, "loss": 0.429, "step": 41545 }, { "epoch": 1.9065669313019136, "grad_norm": 0.49184250831604004, "learning_rate": 3.0419629670053e-06, "loss": 0.375, "step": 41546 }, { "epoch": 1.906612821807168, "grad_norm": 0.4664251506328583, "learning_rate": 3.0417373640214753e-06, "loss": 0.347, "step": 41547 }, { "epoch": 1.9066587123124226, "grad_norm": 0.4462032616138458, "learning_rate": 3.041511765746653e-06, "loss": 0.3153, "step": 41548 }, { "epoch": 1.906704602817677, "grad_norm": 0.4971274137496948, "learning_rate": 3.041286172181378e-06, "loss": 0.4329, "step": 41549 }, { "epoch": 1.9067504933229316, "grad_norm": 0.4704916775226593, "learning_rate": 3.0410605833261884e-06, "loss": 0.3769, "step": 41550 }, { "epoch": 1.906796383828186, "grad_norm": 0.4660366475582123, "learning_rate": 3.040834999181633e-06, "loss": 0.3592, "step": 41551 }, { "epoch": 1.9068422743334406, "grad_norm": 0.45621976256370544, "learning_rate": 3.0406094197482504e-06, "loss": 0.3179, "step": 41552 }, { "epoch": 1.9068881648386948, "grad_norm": 0.49223658442497253, "learning_rate": 3.0403838450265827e-06, "loss": 0.3954, "step": 41553 }, { "epoch": 1.9069340553439493, "grad_norm": 0.4706885516643524, "learning_rate": 3.0401582750171745e-06, "loss": 0.3353, "step": 41554 }, { "epoch": 1.9069799458492038, "grad_norm": 0.4801173210144043, "learning_rate": 3.0399327097205674e-06, "loss": 0.3773, "step": 41555 }, { "epoch": 1.907025836354458, "grad_norm": 0.47125449776649475, "learning_rate": 3.039707149137301e-06, "loss": 0.3602, "step": 41556 }, { "epoch": 1.9070717268597126, "grad_norm": 0.4658561646938324, "learning_rate": 3.0394815932679224e-06, "loss": 0.3842, "step": 41557 }, { "epoch": 1.907117617364967, "grad_norm": 0.4492603540420532, "learning_rate": 3.039256042112972e-06, "loss": 0.3212, "step": 41558 }, { "epoch": 1.9071635078702216, "grad_norm": 0.5101969242095947, "learning_rate": 3.039030495672991e-06, "loss": 0.4037, "step": 41559 }, { "epoch": 1.907209398375476, "grad_norm": 0.4976857900619507, "learning_rate": 3.0388049539485233e-06, "loss": 0.3637, "step": 41560 }, { "epoch": 1.9072552888807306, "grad_norm": 0.6312056183815002, "learning_rate": 3.038579416940111e-06, "loss": 0.4498, "step": 41561 }, { "epoch": 1.907301179385985, "grad_norm": 0.455485075712204, "learning_rate": 3.038353884648294e-06, "loss": 0.379, "step": 41562 }, { "epoch": 1.9073470698912396, "grad_norm": 0.4763627350330353, "learning_rate": 3.03812835707362e-06, "loss": 0.348, "step": 41563 }, { "epoch": 1.907392960396494, "grad_norm": 0.49052444100379944, "learning_rate": 3.037902834216626e-06, "loss": 0.4205, "step": 41564 }, { "epoch": 1.9074388509017486, "grad_norm": 0.5040299296379089, "learning_rate": 3.037677316077857e-06, "loss": 0.4231, "step": 41565 }, { "epoch": 1.9074847414070029, "grad_norm": 0.49479496479034424, "learning_rate": 3.0374518026578553e-06, "loss": 0.3828, "step": 41566 }, { "epoch": 1.9075306319122574, "grad_norm": 0.4606206715106964, "learning_rate": 3.0372262939571628e-06, "loss": 0.3282, "step": 41567 }, { "epoch": 1.9075765224175119, "grad_norm": 0.49075305461883545, "learning_rate": 3.03700078997632e-06, "loss": 0.3659, "step": 41568 }, { "epoch": 1.9076224129227661, "grad_norm": 0.44100430607795715, "learning_rate": 3.0367752907158723e-06, "loss": 0.3251, "step": 41569 }, { "epoch": 1.9076683034280206, "grad_norm": 0.45194581151008606, "learning_rate": 3.03654979617636e-06, "loss": 0.3443, "step": 41570 }, { "epoch": 1.9077141939332751, "grad_norm": 0.47992345690727234, "learning_rate": 3.036324306358324e-06, "loss": 0.395, "step": 41571 }, { "epoch": 1.9077600844385296, "grad_norm": 0.4880546033382416, "learning_rate": 3.0360988212623103e-06, "loss": 0.3724, "step": 41572 }, { "epoch": 1.9078059749437841, "grad_norm": 0.43381908535957336, "learning_rate": 3.0358733408888597e-06, "loss": 0.2904, "step": 41573 }, { "epoch": 1.9078518654490386, "grad_norm": 0.5021708011627197, "learning_rate": 3.035647865238512e-06, "loss": 0.3332, "step": 41574 }, { "epoch": 1.9078977559542931, "grad_norm": 0.4859614074230194, "learning_rate": 3.035422394311812e-06, "loss": 0.3976, "step": 41575 }, { "epoch": 1.9079436464595476, "grad_norm": 0.47483667731285095, "learning_rate": 3.035196928109302e-06, "loss": 0.3871, "step": 41576 }, { "epoch": 1.9079895369648021, "grad_norm": 0.4652535915374756, "learning_rate": 3.0349714666315213e-06, "loss": 0.3413, "step": 41577 }, { "epoch": 1.9080354274700566, "grad_norm": 0.48963284492492676, "learning_rate": 3.034746009879015e-06, "loss": 0.3906, "step": 41578 }, { "epoch": 1.9080813179753109, "grad_norm": 0.4341690242290497, "learning_rate": 3.034520557852325e-06, "loss": 0.2858, "step": 41579 }, { "epoch": 1.9081272084805654, "grad_norm": 0.4465838372707367, "learning_rate": 3.034295110551991e-06, "loss": 0.3297, "step": 41580 }, { "epoch": 1.9081730989858199, "grad_norm": 0.4731297791004181, "learning_rate": 3.0340696679785586e-06, "loss": 0.3259, "step": 41581 }, { "epoch": 1.9082189894910742, "grad_norm": 0.42460453510284424, "learning_rate": 3.033844230132569e-06, "loss": 0.2947, "step": 41582 }, { "epoch": 1.9082648799963287, "grad_norm": 0.4718386232852936, "learning_rate": 3.033618797014562e-06, "loss": 0.3608, "step": 41583 }, { "epoch": 1.9083107705015832, "grad_norm": 0.4700298607349396, "learning_rate": 3.0333933686250795e-06, "loss": 0.3575, "step": 41584 }, { "epoch": 1.9083566610068377, "grad_norm": 0.4496673047542572, "learning_rate": 3.033167944964668e-06, "loss": 0.303, "step": 41585 }, { "epoch": 1.9084025515120921, "grad_norm": 0.5110210180282593, "learning_rate": 3.032942526033866e-06, "loss": 0.4337, "step": 41586 }, { "epoch": 1.9084484420173466, "grad_norm": 0.5274921655654907, "learning_rate": 3.0327171118332154e-06, "loss": 0.4293, "step": 41587 }, { "epoch": 1.9084943325226011, "grad_norm": 0.4800852835178375, "learning_rate": 3.032491702363261e-06, "loss": 0.3814, "step": 41588 }, { "epoch": 1.9085402230278556, "grad_norm": 0.4119489789009094, "learning_rate": 3.0322662976245426e-06, "loss": 0.2756, "step": 41589 }, { "epoch": 1.9085861135331101, "grad_norm": 0.44783815741539, "learning_rate": 3.0320408976176017e-06, "loss": 0.3424, "step": 41590 }, { "epoch": 1.9086320040383644, "grad_norm": 0.4968620538711548, "learning_rate": 3.0318155023429827e-06, "loss": 0.3651, "step": 41591 }, { "epoch": 1.908677894543619, "grad_norm": 0.4413295388221741, "learning_rate": 3.031590111801226e-06, "loss": 0.3041, "step": 41592 }, { "epoch": 1.9087237850488734, "grad_norm": 0.450089693069458, "learning_rate": 3.0313647259928714e-06, "loss": 0.2814, "step": 41593 }, { "epoch": 1.9087696755541277, "grad_norm": 0.4679228365421295, "learning_rate": 3.0311393449184654e-06, "loss": 0.4171, "step": 41594 }, { "epoch": 1.9088155660593822, "grad_norm": 0.4476267993450165, "learning_rate": 3.030913968578549e-06, "loss": 0.3235, "step": 41595 }, { "epoch": 1.9088614565646367, "grad_norm": 0.4413772523403168, "learning_rate": 3.030688596973661e-06, "loss": 0.3116, "step": 41596 }, { "epoch": 1.9089073470698912, "grad_norm": 0.5042796730995178, "learning_rate": 3.0304632301043467e-06, "loss": 0.4061, "step": 41597 }, { "epoch": 1.9089532375751457, "grad_norm": 0.4520305097103119, "learning_rate": 3.030237867971146e-06, "loss": 0.3337, "step": 41598 }, { "epoch": 1.9089991280804002, "grad_norm": 0.47830358147621155, "learning_rate": 3.030012510574601e-06, "loss": 0.3555, "step": 41599 }, { "epoch": 1.9090450185856547, "grad_norm": 0.46305590867996216, "learning_rate": 3.0297871579152548e-06, "loss": 0.36, "step": 41600 }, { "epoch": 1.9090909090909092, "grad_norm": 0.47261208295822144, "learning_rate": 3.0295618099936497e-06, "loss": 0.3959, "step": 41601 }, { "epoch": 1.9091367995961637, "grad_norm": 0.46703213453292847, "learning_rate": 3.0293364668103254e-06, "loss": 0.3588, "step": 41602 }, { "epoch": 1.9091826901014182, "grad_norm": 0.4855630099773407, "learning_rate": 3.029111128365825e-06, "loss": 0.4047, "step": 41603 }, { "epoch": 1.9092285806066724, "grad_norm": 0.41291651129722595, "learning_rate": 3.0288857946606915e-06, "loss": 0.2298, "step": 41604 }, { "epoch": 1.909274471111927, "grad_norm": 0.4846917986869812, "learning_rate": 3.028660465695463e-06, "loss": 0.4055, "step": 41605 }, { "epoch": 1.9093203616171814, "grad_norm": 0.4999304711818695, "learning_rate": 3.028435141470686e-06, "loss": 0.3673, "step": 41606 }, { "epoch": 1.9093662521224357, "grad_norm": 0.4918630123138428, "learning_rate": 3.0282098219869e-06, "loss": 0.4025, "step": 41607 }, { "epoch": 1.9094121426276902, "grad_norm": 0.42343443632125854, "learning_rate": 3.0279845072446463e-06, "loss": 0.2835, "step": 41608 }, { "epoch": 1.9094580331329447, "grad_norm": 0.48952850699424744, "learning_rate": 3.0277591972444687e-06, "loss": 0.4068, "step": 41609 }, { "epoch": 1.9095039236381992, "grad_norm": 0.4705795347690582, "learning_rate": 3.027533891986907e-06, "loss": 0.3842, "step": 41610 }, { "epoch": 1.9095498141434537, "grad_norm": 0.47345244884490967, "learning_rate": 3.027308591472502e-06, "loss": 0.3762, "step": 41611 }, { "epoch": 1.9095957046487082, "grad_norm": 0.4783681035041809, "learning_rate": 3.027083295701801e-06, "loss": 0.3787, "step": 41612 }, { "epoch": 1.9096415951539627, "grad_norm": 0.4678381383419037, "learning_rate": 3.0268580046753405e-06, "loss": 0.336, "step": 41613 }, { "epoch": 1.9096874856592172, "grad_norm": 0.4978208839893341, "learning_rate": 3.026632718393662e-06, "loss": 0.3297, "step": 41614 }, { "epoch": 1.9097333761644717, "grad_norm": 0.4917984902858734, "learning_rate": 3.0264074368573102e-06, "loss": 0.3473, "step": 41615 }, { "epoch": 1.9097792666697262, "grad_norm": 0.49002933502197266, "learning_rate": 3.0261821600668263e-06, "loss": 0.3713, "step": 41616 }, { "epoch": 1.9098251571749805, "grad_norm": 0.4661906957626343, "learning_rate": 3.0259568880227515e-06, "loss": 0.3526, "step": 41617 }, { "epoch": 1.909871047680235, "grad_norm": 0.47650644183158875, "learning_rate": 3.025731620725626e-06, "loss": 0.3893, "step": 41618 }, { "epoch": 1.9099169381854895, "grad_norm": 0.4874667823314667, "learning_rate": 3.0255063581759936e-06, "loss": 0.3733, "step": 41619 }, { "epoch": 1.9099628286907437, "grad_norm": 0.517134428024292, "learning_rate": 3.025281100374396e-06, "loss": 0.4524, "step": 41620 }, { "epoch": 1.9100087191959982, "grad_norm": 0.45107442140579224, "learning_rate": 3.0250558473213716e-06, "loss": 0.3223, "step": 41621 }, { "epoch": 1.9100546097012527, "grad_norm": 0.48389163613319397, "learning_rate": 3.0248305990174666e-06, "loss": 0.3836, "step": 41622 }, { "epoch": 1.9101005002065072, "grad_norm": 0.4673812985420227, "learning_rate": 3.0246053554632206e-06, "loss": 0.3427, "step": 41623 }, { "epoch": 1.9101463907117617, "grad_norm": 0.4295503497123718, "learning_rate": 3.0243801166591747e-06, "loss": 0.3094, "step": 41624 }, { "epoch": 1.9101922812170162, "grad_norm": 0.4560742676258087, "learning_rate": 3.0241548826058715e-06, "loss": 0.34, "step": 41625 }, { "epoch": 1.9102381717222707, "grad_norm": 0.4905715584754944, "learning_rate": 3.0239296533038526e-06, "loss": 0.3703, "step": 41626 }, { "epoch": 1.9102840622275252, "grad_norm": 0.45267176628112793, "learning_rate": 3.0237044287536564e-06, "loss": 0.3427, "step": 41627 }, { "epoch": 1.9103299527327797, "grad_norm": 0.42638611793518066, "learning_rate": 3.023479208955831e-06, "loss": 0.2823, "step": 41628 }, { "epoch": 1.910375843238034, "grad_norm": 0.4465063214302063, "learning_rate": 3.023253993910913e-06, "loss": 0.3162, "step": 41629 }, { "epoch": 1.9104217337432885, "grad_norm": 0.4348711669445038, "learning_rate": 3.0230287836194445e-06, "loss": 0.2949, "step": 41630 }, { "epoch": 1.910467624248543, "grad_norm": 0.4795144498348236, "learning_rate": 3.022803578081969e-06, "loss": 0.3501, "step": 41631 }, { "epoch": 1.9105135147537975, "grad_norm": 0.47305795550346375, "learning_rate": 3.022578377299027e-06, "loss": 0.3515, "step": 41632 }, { "epoch": 1.9105594052590518, "grad_norm": 0.47762468457221985, "learning_rate": 3.0223531812711572e-06, "loss": 0.3419, "step": 41633 }, { "epoch": 1.9106052957643063, "grad_norm": 0.461089551448822, "learning_rate": 3.0221279899989077e-06, "loss": 0.339, "step": 41634 }, { "epoch": 1.9106511862695608, "grad_norm": 0.5005724430084229, "learning_rate": 3.0219028034828146e-06, "loss": 0.4052, "step": 41635 }, { "epoch": 1.9106970767748153, "grad_norm": 0.4699056148529053, "learning_rate": 3.021677621723419e-06, "loss": 0.3698, "step": 41636 }, { "epoch": 1.9107429672800698, "grad_norm": 0.43832463026046753, "learning_rate": 3.021452444721267e-06, "loss": 0.2763, "step": 41637 }, { "epoch": 1.9107888577853243, "grad_norm": 0.4869318902492523, "learning_rate": 3.0212272724768967e-06, "loss": 0.4142, "step": 41638 }, { "epoch": 1.9108347482905788, "grad_norm": 0.4772319197654724, "learning_rate": 3.021002104990849e-06, "loss": 0.3594, "step": 41639 }, { "epoch": 1.9108806387958333, "grad_norm": 0.44488105177879333, "learning_rate": 3.0207769422636678e-06, "loss": 0.3165, "step": 41640 }, { "epoch": 1.9109265293010878, "grad_norm": 0.459980845451355, "learning_rate": 3.0205517842958944e-06, "loss": 0.2696, "step": 41641 }, { "epoch": 1.910972419806342, "grad_norm": 0.4743506610393524, "learning_rate": 3.0203266310880663e-06, "loss": 0.341, "step": 41642 }, { "epoch": 1.9110183103115965, "grad_norm": 0.5056712627410889, "learning_rate": 3.02010148264073e-06, "loss": 0.4365, "step": 41643 }, { "epoch": 1.911064200816851, "grad_norm": 0.5320311784744263, "learning_rate": 3.019876338954425e-06, "loss": 0.4278, "step": 41644 }, { "epoch": 1.9111100913221053, "grad_norm": 0.5276470184326172, "learning_rate": 3.019651200029691e-06, "loss": 0.3651, "step": 41645 }, { "epoch": 1.9111559818273598, "grad_norm": 0.45133671164512634, "learning_rate": 3.0194260658670727e-06, "loss": 0.3141, "step": 41646 }, { "epoch": 1.9112018723326143, "grad_norm": 0.4967919886112213, "learning_rate": 3.019200936467109e-06, "loss": 0.4217, "step": 41647 }, { "epoch": 1.9112477628378688, "grad_norm": 0.45508357882499695, "learning_rate": 3.0189758118303416e-06, "loss": 0.3468, "step": 41648 }, { "epoch": 1.9112936533431233, "grad_norm": 0.4873710572719574, "learning_rate": 3.0187506919573105e-06, "loss": 0.4072, "step": 41649 }, { "epoch": 1.9113395438483778, "grad_norm": 0.4491294324398041, "learning_rate": 3.018525576848561e-06, "loss": 0.2981, "step": 41650 }, { "epoch": 1.9113854343536323, "grad_norm": 0.43335095047950745, "learning_rate": 3.018300466504632e-06, "loss": 0.2955, "step": 41651 }, { "epoch": 1.9114313248588868, "grad_norm": 0.4938606321811676, "learning_rate": 3.018075360926063e-06, "loss": 0.3983, "step": 41652 }, { "epoch": 1.9114772153641413, "grad_norm": 0.47704455256462097, "learning_rate": 3.0178502601134e-06, "loss": 0.3579, "step": 41653 }, { "epoch": 1.9115231058693958, "grad_norm": 0.4614299237728119, "learning_rate": 3.01762516406718e-06, "loss": 0.3354, "step": 41654 }, { "epoch": 1.91156899637465, "grad_norm": 0.4565369784832001, "learning_rate": 3.0174000727879443e-06, "loss": 0.3175, "step": 41655 }, { "epoch": 1.9116148868799046, "grad_norm": 0.45117250084877014, "learning_rate": 3.017174986276239e-06, "loss": 0.328, "step": 41656 }, { "epoch": 1.911660777385159, "grad_norm": 0.4690169394016266, "learning_rate": 3.016949904532601e-06, "loss": 0.3103, "step": 41657 }, { "epoch": 1.9117066678904133, "grad_norm": 0.49757808446884155, "learning_rate": 3.01672482755757e-06, "loss": 0.4029, "step": 41658 }, { "epoch": 1.9117525583956678, "grad_norm": 0.4536018967628479, "learning_rate": 3.0164997553516922e-06, "loss": 0.2825, "step": 41659 }, { "epoch": 1.9117984489009223, "grad_norm": 0.47895774245262146, "learning_rate": 3.016274687915507e-06, "loss": 0.3648, "step": 41660 }, { "epoch": 1.9118443394061768, "grad_norm": 0.4861104190349579, "learning_rate": 3.016049625249553e-06, "loss": 0.3718, "step": 41661 }, { "epoch": 1.9118902299114313, "grad_norm": 0.48282304406166077, "learning_rate": 3.015824567354375e-06, "loss": 0.3599, "step": 41662 }, { "epoch": 1.9119361204166858, "grad_norm": 0.4301174581050873, "learning_rate": 3.0155995142305134e-06, "loss": 0.2783, "step": 41663 }, { "epoch": 1.9119820109219403, "grad_norm": 0.4923028349876404, "learning_rate": 3.0153744658785057e-06, "loss": 0.3815, "step": 41664 }, { "epoch": 1.9120279014271948, "grad_norm": 0.448930561542511, "learning_rate": 3.0151494222988985e-06, "loss": 0.3544, "step": 41665 }, { "epoch": 1.9120737919324493, "grad_norm": 0.5043471455574036, "learning_rate": 3.0149243834922305e-06, "loss": 0.3809, "step": 41666 }, { "epoch": 1.9121196824377038, "grad_norm": 0.4646277129650116, "learning_rate": 3.014699349459042e-06, "loss": 0.3822, "step": 41667 }, { "epoch": 1.912165572942958, "grad_norm": 0.4866785705089569, "learning_rate": 3.014474320199876e-06, "loss": 0.3686, "step": 41668 }, { "epoch": 1.9122114634482126, "grad_norm": 0.4378274977207184, "learning_rate": 3.0142492957152724e-06, "loss": 0.2878, "step": 41669 }, { "epoch": 1.912257353953467, "grad_norm": 0.45598047971725464, "learning_rate": 3.0140242760057702e-06, "loss": 0.3592, "step": 41670 }, { "epoch": 1.9123032444587214, "grad_norm": 0.4920518398284912, "learning_rate": 3.0137992610719152e-06, "loss": 0.3779, "step": 41671 }, { "epoch": 1.9123491349639758, "grad_norm": 0.46314477920532227, "learning_rate": 3.013574250914246e-06, "loss": 0.3552, "step": 41672 }, { "epoch": 1.9123950254692303, "grad_norm": 0.44451725482940674, "learning_rate": 3.013349245533303e-06, "loss": 0.3237, "step": 41673 }, { "epoch": 1.9124409159744848, "grad_norm": 0.43934786319732666, "learning_rate": 3.0131242449296287e-06, "loss": 0.3014, "step": 41674 }, { "epoch": 1.9124868064797393, "grad_norm": 0.4652961790561676, "learning_rate": 3.0128992491037638e-06, "loss": 0.3463, "step": 41675 }, { "epoch": 1.9125326969849938, "grad_norm": 0.486237108707428, "learning_rate": 3.012674258056247e-06, "loss": 0.3393, "step": 41676 }, { "epoch": 1.9125785874902483, "grad_norm": 0.48892784118652344, "learning_rate": 3.0124492717876236e-06, "loss": 0.3788, "step": 41677 }, { "epoch": 1.9126244779955028, "grad_norm": 0.45125612616539, "learning_rate": 3.0122242902984334e-06, "loss": 0.3546, "step": 41678 }, { "epoch": 1.9126703685007573, "grad_norm": 0.49366915225982666, "learning_rate": 3.0119993135892127e-06, "loss": 0.3437, "step": 41679 }, { "epoch": 1.9127162590060116, "grad_norm": 0.47382959723472595, "learning_rate": 3.0117743416605088e-06, "loss": 0.3591, "step": 41680 }, { "epoch": 1.912762149511266, "grad_norm": 0.4751995801925659, "learning_rate": 3.01154937451286e-06, "loss": 0.3841, "step": 41681 }, { "epoch": 1.9128080400165206, "grad_norm": 0.4567111134529114, "learning_rate": 3.0113244121468077e-06, "loss": 0.3456, "step": 41682 }, { "epoch": 1.9128539305217749, "grad_norm": 0.4498870074748993, "learning_rate": 3.0110994545628913e-06, "loss": 0.3216, "step": 41683 }, { "epoch": 1.9128998210270294, "grad_norm": 0.4735798239707947, "learning_rate": 3.0108745017616537e-06, "loss": 0.3751, "step": 41684 }, { "epoch": 1.9129457115322839, "grad_norm": 0.4554518759250641, "learning_rate": 3.0106495537436353e-06, "loss": 0.3378, "step": 41685 }, { "epoch": 1.9129916020375384, "grad_norm": 0.47784242033958435, "learning_rate": 3.010424610509375e-06, "loss": 0.3506, "step": 41686 }, { "epoch": 1.9130374925427929, "grad_norm": 0.4618084132671356, "learning_rate": 3.0101996720594177e-06, "loss": 0.3247, "step": 41687 }, { "epoch": 1.9130833830480474, "grad_norm": 0.4639759063720703, "learning_rate": 3.0099747383943023e-06, "loss": 0.3454, "step": 41688 }, { "epoch": 1.9131292735533019, "grad_norm": 0.48087120056152344, "learning_rate": 3.009749809514568e-06, "loss": 0.3583, "step": 41689 }, { "epoch": 1.9131751640585564, "grad_norm": 0.4728133976459503, "learning_rate": 3.0095248854207586e-06, "loss": 0.4158, "step": 41690 }, { "epoch": 1.9132210545638109, "grad_norm": 0.4833429753780365, "learning_rate": 3.0092999661134137e-06, "loss": 0.3768, "step": 41691 }, { "epoch": 1.9132669450690654, "grad_norm": 0.459972083568573, "learning_rate": 3.009075051593072e-06, "loss": 0.3203, "step": 41692 }, { "epoch": 1.9133128355743196, "grad_norm": 0.4629722535610199, "learning_rate": 3.008850141860279e-06, "loss": 0.3023, "step": 41693 }, { "epoch": 1.9133587260795741, "grad_norm": 0.4597342014312744, "learning_rate": 3.0086252369155723e-06, "loss": 0.3535, "step": 41694 }, { "epoch": 1.9134046165848286, "grad_norm": 0.4700259268283844, "learning_rate": 3.0084003367594927e-06, "loss": 0.3451, "step": 41695 }, { "epoch": 1.913450507090083, "grad_norm": 0.4755796492099762, "learning_rate": 3.0081754413925824e-06, "loss": 0.3337, "step": 41696 }, { "epoch": 1.9134963975953374, "grad_norm": 0.4516092538833618, "learning_rate": 3.0079505508153817e-06, "loss": 0.3425, "step": 41697 }, { "epoch": 1.913542288100592, "grad_norm": 0.5098990201950073, "learning_rate": 3.0077256650284292e-06, "loss": 0.3783, "step": 41698 }, { "epoch": 1.9135881786058464, "grad_norm": 0.4617146849632263, "learning_rate": 3.0075007840322697e-06, "loss": 0.3262, "step": 41699 }, { "epoch": 1.913634069111101, "grad_norm": 0.44453758001327515, "learning_rate": 3.0072759078274427e-06, "loss": 0.3092, "step": 41700 }, { "epoch": 1.9136799596163554, "grad_norm": 0.48229146003723145, "learning_rate": 3.0070510364144856e-06, "loss": 0.3955, "step": 41701 }, { "epoch": 1.91372585012161, "grad_norm": 0.45639634132385254, "learning_rate": 3.006826169793944e-06, "loss": 0.3374, "step": 41702 }, { "epoch": 1.9137717406268644, "grad_norm": 0.47794729471206665, "learning_rate": 3.0066013079663557e-06, "loss": 0.3892, "step": 41703 }, { "epoch": 1.913817631132119, "grad_norm": 0.4468208849430084, "learning_rate": 3.0063764509322614e-06, "loss": 0.32, "step": 41704 }, { "epoch": 1.9138635216373734, "grad_norm": 0.4624806046485901, "learning_rate": 3.006151598692204e-06, "loss": 0.3383, "step": 41705 }, { "epoch": 1.9139094121426277, "grad_norm": 0.47448474168777466, "learning_rate": 3.005926751246722e-06, "loss": 0.3646, "step": 41706 }, { "epoch": 1.9139553026478822, "grad_norm": 0.4930984675884247, "learning_rate": 3.0057019085963557e-06, "loss": 0.3728, "step": 41707 }, { "epoch": 1.9140011931531367, "grad_norm": 0.5671778917312622, "learning_rate": 3.005477070741648e-06, "loss": 0.3373, "step": 41708 }, { "epoch": 1.914047083658391, "grad_norm": 0.4597375690937042, "learning_rate": 3.005252237683139e-06, "loss": 0.3482, "step": 41709 }, { "epoch": 1.9140929741636454, "grad_norm": 0.44170600175857544, "learning_rate": 3.0050274094213674e-06, "loss": 0.2914, "step": 41710 }, { "epoch": 1.9141388646689, "grad_norm": 0.45526987314224243, "learning_rate": 3.004802585956876e-06, "loss": 0.3254, "step": 41711 }, { "epoch": 1.9141847551741544, "grad_norm": 0.43136322498321533, "learning_rate": 3.0045777672902053e-06, "loss": 0.2934, "step": 41712 }, { "epoch": 1.914230645679409, "grad_norm": 0.4659397006034851, "learning_rate": 3.0043529534218956e-06, "loss": 0.3284, "step": 41713 }, { "epoch": 1.9142765361846634, "grad_norm": 0.49269405007362366, "learning_rate": 3.004128144352485e-06, "loss": 0.3695, "step": 41714 }, { "epoch": 1.914322426689918, "grad_norm": 0.4972403943538666, "learning_rate": 3.003903340082518e-06, "loss": 0.3705, "step": 41715 }, { "epoch": 1.9143683171951724, "grad_norm": 0.45791342854499817, "learning_rate": 3.0036785406125334e-06, "loss": 0.3819, "step": 41716 }, { "epoch": 1.914414207700427, "grad_norm": 0.4808557331562042, "learning_rate": 3.003453745943071e-06, "loss": 0.3487, "step": 41717 }, { "epoch": 1.9144600982056812, "grad_norm": 0.4396786689758301, "learning_rate": 3.003228956074673e-06, "loss": 0.3376, "step": 41718 }, { "epoch": 1.9145059887109357, "grad_norm": 0.45059651136398315, "learning_rate": 3.0030041710078794e-06, "loss": 0.321, "step": 41719 }, { "epoch": 1.9145518792161902, "grad_norm": 0.4912230372428894, "learning_rate": 3.0027793907432285e-06, "loss": 0.3637, "step": 41720 }, { "epoch": 1.9145977697214447, "grad_norm": 0.5117926001548767, "learning_rate": 3.0025546152812646e-06, "loss": 0.4122, "step": 41721 }, { "epoch": 1.914643660226699, "grad_norm": 0.4468610882759094, "learning_rate": 3.002329844622528e-06, "loss": 0.309, "step": 41722 }, { "epoch": 1.9146895507319535, "grad_norm": 0.47212472558021545, "learning_rate": 3.002105078767554e-06, "loss": 0.3654, "step": 41723 }, { "epoch": 1.914735441237208, "grad_norm": 0.4743143618106842, "learning_rate": 3.0018803177168885e-06, "loss": 0.3495, "step": 41724 }, { "epoch": 1.9147813317424625, "grad_norm": 0.5651989579200745, "learning_rate": 3.0016555614710715e-06, "loss": 0.457, "step": 41725 }, { "epoch": 1.914827222247717, "grad_norm": 0.4660301208496094, "learning_rate": 3.00143081003064e-06, "loss": 0.3527, "step": 41726 }, { "epoch": 1.9148731127529715, "grad_norm": 0.5289551019668579, "learning_rate": 3.001206063396138e-06, "loss": 0.4389, "step": 41727 }, { "epoch": 1.914919003258226, "grad_norm": 0.4840894937515259, "learning_rate": 3.000981321568105e-06, "loss": 0.3913, "step": 41728 }, { "epoch": 1.9149648937634804, "grad_norm": 0.48058992624282837, "learning_rate": 3.000756584547079e-06, "loss": 0.3857, "step": 41729 }, { "epoch": 1.915010784268735, "grad_norm": 0.4991056025028229, "learning_rate": 3.0005318523336038e-06, "loss": 0.376, "step": 41730 }, { "epoch": 1.9150566747739892, "grad_norm": 0.4563962519168854, "learning_rate": 3.0003071249282184e-06, "loss": 0.3283, "step": 41731 }, { "epoch": 1.9151025652792437, "grad_norm": 0.465788871049881, "learning_rate": 3.0000824023314624e-06, "loss": 0.3161, "step": 41732 }, { "epoch": 1.9151484557844982, "grad_norm": 0.5125126838684082, "learning_rate": 2.9998576845438783e-06, "loss": 0.4973, "step": 41733 }, { "epoch": 1.9151943462897525, "grad_norm": 0.42360371351242065, "learning_rate": 2.999632971566005e-06, "loss": 0.2756, "step": 41734 }, { "epoch": 1.915240236795007, "grad_norm": 0.477347195148468, "learning_rate": 2.9994082633983812e-06, "loss": 0.3666, "step": 41735 }, { "epoch": 1.9152861273002615, "grad_norm": 0.4850514233112335, "learning_rate": 2.999183560041551e-06, "loss": 0.3948, "step": 41736 }, { "epoch": 1.915332017805516, "grad_norm": 0.42634090781211853, "learning_rate": 2.998958861496053e-06, "loss": 0.284, "step": 41737 }, { "epoch": 1.9153779083107705, "grad_norm": 0.4769398868083954, "learning_rate": 2.998734167762426e-06, "loss": 0.4056, "step": 41738 }, { "epoch": 1.915423798816025, "grad_norm": 0.4478975236415863, "learning_rate": 2.9985094788412134e-06, "loss": 0.3403, "step": 41739 }, { "epoch": 1.9154696893212795, "grad_norm": 0.4741719961166382, "learning_rate": 2.998284794732953e-06, "loss": 0.3343, "step": 41740 }, { "epoch": 1.915515579826534, "grad_norm": 0.4893762767314911, "learning_rate": 2.998060115438185e-06, "loss": 0.3787, "step": 41741 }, { "epoch": 1.9155614703317885, "grad_norm": 0.4750825762748718, "learning_rate": 2.997835440957452e-06, "loss": 0.3804, "step": 41742 }, { "epoch": 1.915607360837043, "grad_norm": 0.43534931540489197, "learning_rate": 2.9976107712912926e-06, "loss": 0.2666, "step": 41743 }, { "epoch": 1.9156532513422972, "grad_norm": 0.43634799122810364, "learning_rate": 2.9973861064402463e-06, "loss": 0.2858, "step": 41744 }, { "epoch": 1.9156991418475517, "grad_norm": 0.45257246494293213, "learning_rate": 2.997161446404856e-06, "loss": 0.3376, "step": 41745 }, { "epoch": 1.9157450323528062, "grad_norm": 0.45347413420677185, "learning_rate": 2.9969367911856597e-06, "loss": 0.3504, "step": 41746 }, { "epoch": 1.9157909228580605, "grad_norm": 0.4663369357585907, "learning_rate": 2.9967121407831985e-06, "loss": 0.3057, "step": 41747 }, { "epoch": 1.915836813363315, "grad_norm": 0.4768209457397461, "learning_rate": 2.996487495198011e-06, "loss": 0.3583, "step": 41748 }, { "epoch": 1.9158827038685695, "grad_norm": 0.5064505934715271, "learning_rate": 2.9962628544306406e-06, "loss": 0.4014, "step": 41749 }, { "epoch": 1.915928594373824, "grad_norm": 0.4451788067817688, "learning_rate": 2.9960382184816243e-06, "loss": 0.3206, "step": 41750 }, { "epoch": 1.9159744848790785, "grad_norm": 0.4536818861961365, "learning_rate": 2.9958135873515026e-06, "loss": 0.327, "step": 41751 }, { "epoch": 1.916020375384333, "grad_norm": 0.45812085270881653, "learning_rate": 2.9955889610408183e-06, "loss": 0.3518, "step": 41752 }, { "epoch": 1.9160662658895875, "grad_norm": 0.44672730565071106, "learning_rate": 2.9953643395501103e-06, "loss": 0.3318, "step": 41753 }, { "epoch": 1.916112156394842, "grad_norm": 0.46509045362472534, "learning_rate": 2.995139722879917e-06, "loss": 0.3783, "step": 41754 }, { "epoch": 1.9161580469000965, "grad_norm": 0.46337124705314636, "learning_rate": 2.994915111030781e-06, "loss": 0.3425, "step": 41755 }, { "epoch": 1.916203937405351, "grad_norm": 0.492946058511734, "learning_rate": 2.9946905040032413e-06, "loss": 0.358, "step": 41756 }, { "epoch": 1.9162498279106053, "grad_norm": 0.466013103723526, "learning_rate": 2.994465901797836e-06, "loss": 0.3318, "step": 41757 }, { "epoch": 1.9162957184158598, "grad_norm": 0.4845457077026367, "learning_rate": 2.9942413044151096e-06, "loss": 0.3806, "step": 41758 }, { "epoch": 1.9163416089211143, "grad_norm": 0.47627297043800354, "learning_rate": 2.9940167118556e-06, "loss": 0.3617, "step": 41759 }, { "epoch": 1.9163874994263685, "grad_norm": 0.4811996817588806, "learning_rate": 2.9937921241198457e-06, "loss": 0.3874, "step": 41760 }, { "epoch": 1.916433389931623, "grad_norm": 0.474230021238327, "learning_rate": 2.993567541208389e-06, "loss": 0.3687, "step": 41761 }, { "epoch": 1.9164792804368775, "grad_norm": 0.47917526960372925, "learning_rate": 2.9933429631217698e-06, "loss": 0.3279, "step": 41762 }, { "epoch": 1.916525170942132, "grad_norm": 0.5076526999473572, "learning_rate": 2.993118389860525e-06, "loss": 0.3134, "step": 41763 }, { "epoch": 1.9165710614473865, "grad_norm": 0.4887468218803406, "learning_rate": 2.992893821425199e-06, "loss": 0.3605, "step": 41764 }, { "epoch": 1.916616951952641, "grad_norm": 0.4685856103897095, "learning_rate": 2.9926692578163296e-06, "loss": 0.3477, "step": 41765 }, { "epoch": 1.9166628424578955, "grad_norm": 0.5036227107048035, "learning_rate": 2.992444699034457e-06, "loss": 0.3543, "step": 41766 }, { "epoch": 1.91670873296315, "grad_norm": 0.463027685880661, "learning_rate": 2.9922201450801213e-06, "loss": 0.3557, "step": 41767 }, { "epoch": 1.9167546234684045, "grad_norm": 0.4274784326553345, "learning_rate": 2.9919955959538626e-06, "loss": 0.3065, "step": 41768 }, { "epoch": 1.9168005139736588, "grad_norm": 0.4676278829574585, "learning_rate": 2.9917710516562193e-06, "loss": 0.3318, "step": 41769 }, { "epoch": 1.9168464044789133, "grad_norm": 0.44727200269699097, "learning_rate": 2.991546512187736e-06, "loss": 0.2994, "step": 41770 }, { "epoch": 1.9168922949841678, "grad_norm": 0.44487136602401733, "learning_rate": 2.9913219775489477e-06, "loss": 0.3059, "step": 41771 }, { "epoch": 1.916938185489422, "grad_norm": 0.48411113023757935, "learning_rate": 2.9910974477403942e-06, "loss": 0.3739, "step": 41772 }, { "epoch": 1.9169840759946766, "grad_norm": 0.46533793210983276, "learning_rate": 2.99087292276262e-06, "loss": 0.3762, "step": 41773 }, { "epoch": 1.917029966499931, "grad_norm": 0.49383974075317383, "learning_rate": 2.990648402616162e-06, "loss": 0.4357, "step": 41774 }, { "epoch": 1.9170758570051856, "grad_norm": 0.4654548168182373, "learning_rate": 2.990423887301559e-06, "loss": 0.3421, "step": 41775 }, { "epoch": 1.91712174751044, "grad_norm": 0.44804930686950684, "learning_rate": 2.9901993768193536e-06, "loss": 0.3477, "step": 41776 }, { "epoch": 1.9171676380156946, "grad_norm": 0.4503944218158722, "learning_rate": 2.989974871170085e-06, "loss": 0.3107, "step": 41777 }, { "epoch": 1.917213528520949, "grad_norm": 0.445602148771286, "learning_rate": 2.9897503703542913e-06, "loss": 0.277, "step": 41778 }, { "epoch": 1.9172594190262036, "grad_norm": 0.464530348777771, "learning_rate": 2.989525874372512e-06, "loss": 0.3392, "step": 41779 }, { "epoch": 1.917305309531458, "grad_norm": 0.48807087540626526, "learning_rate": 2.9893013832252904e-06, "loss": 0.3583, "step": 41780 }, { "epoch": 1.9173512000367126, "grad_norm": 0.5158286094665527, "learning_rate": 2.9890768969131646e-06, "loss": 0.4432, "step": 41781 }, { "epoch": 1.9173970905419668, "grad_norm": 0.46204522252082825, "learning_rate": 2.9888524154366728e-06, "loss": 0.3314, "step": 41782 }, { "epoch": 1.9174429810472213, "grad_norm": 0.4104793965816498, "learning_rate": 2.988627938796358e-06, "loss": 0.2535, "step": 41783 }, { "epoch": 1.9174888715524758, "grad_norm": 0.47452694177627563, "learning_rate": 2.9884034669927575e-06, "loss": 0.3315, "step": 41784 }, { "epoch": 1.91753476205773, "grad_norm": 0.4363560974597931, "learning_rate": 2.9881790000264097e-06, "loss": 0.3186, "step": 41785 }, { "epoch": 1.9175806525629846, "grad_norm": 0.4924233853816986, "learning_rate": 2.9879545378978593e-06, "loss": 0.4069, "step": 41786 }, { "epoch": 1.917626543068239, "grad_norm": 0.4382440447807312, "learning_rate": 2.9877300806076424e-06, "loss": 0.2962, "step": 41787 }, { "epoch": 1.9176724335734936, "grad_norm": 0.4669543504714966, "learning_rate": 2.9875056281562986e-06, "loss": 0.3679, "step": 41788 }, { "epoch": 1.917718324078748, "grad_norm": 0.4740910828113556, "learning_rate": 2.9872811805443704e-06, "loss": 0.3815, "step": 41789 }, { "epoch": 1.9177642145840026, "grad_norm": 0.4820399880409241, "learning_rate": 2.9870567377723948e-06, "loss": 0.3027, "step": 41790 }, { "epoch": 1.917810105089257, "grad_norm": 0.4799916744232178, "learning_rate": 2.9868322998409105e-06, "loss": 0.3652, "step": 41791 }, { "epoch": 1.9178559955945116, "grad_norm": 0.4766339063644409, "learning_rate": 2.9866078667504626e-06, "loss": 0.3562, "step": 41792 }, { "epoch": 1.917901886099766, "grad_norm": 0.48064756393432617, "learning_rate": 2.986383438501586e-06, "loss": 0.35, "step": 41793 }, { "epoch": 1.9179477766050206, "grad_norm": 0.4538975954055786, "learning_rate": 2.9861590150948194e-06, "loss": 0.3556, "step": 41794 }, { "epoch": 1.9179936671102749, "grad_norm": 0.46717819571495056, "learning_rate": 2.9859345965307074e-06, "loss": 0.3627, "step": 41795 }, { "epoch": 1.9180395576155294, "grad_norm": 0.47148364782333374, "learning_rate": 2.985710182809787e-06, "loss": 0.3388, "step": 41796 }, { "epoch": 1.9180854481207839, "grad_norm": 0.4607082009315491, "learning_rate": 2.9854857739325973e-06, "loss": 0.333, "step": 41797 }, { "epoch": 1.9181313386260381, "grad_norm": 0.48574763536453247, "learning_rate": 2.9852613698996786e-06, "loss": 0.3607, "step": 41798 }, { "epoch": 1.9181772291312926, "grad_norm": 0.48474326729774475, "learning_rate": 2.985036970711571e-06, "loss": 0.3352, "step": 41799 }, { "epoch": 1.9182231196365471, "grad_norm": 0.47861436009407043, "learning_rate": 2.9848125763688114e-06, "loss": 0.358, "step": 41800 }, { "epoch": 1.9182690101418016, "grad_norm": 0.47306203842163086, "learning_rate": 2.9845881868719432e-06, "loss": 0.3671, "step": 41801 }, { "epoch": 1.9183149006470561, "grad_norm": 0.4566236734390259, "learning_rate": 2.984363802221505e-06, "loss": 0.3141, "step": 41802 }, { "epoch": 1.9183607911523106, "grad_norm": 0.4790041148662567, "learning_rate": 2.984139422418034e-06, "loss": 0.3489, "step": 41803 }, { "epoch": 1.9184066816575651, "grad_norm": 0.49721789360046387, "learning_rate": 2.9839150474620725e-06, "loss": 0.4003, "step": 41804 }, { "epoch": 1.9184525721628196, "grad_norm": 0.44512462615966797, "learning_rate": 2.9836906773541596e-06, "loss": 0.3274, "step": 41805 }, { "epoch": 1.9184984626680741, "grad_norm": 0.41029343008995056, "learning_rate": 2.983466312094832e-06, "loss": 0.2645, "step": 41806 }, { "epoch": 1.9185443531733284, "grad_norm": 0.4615787863731384, "learning_rate": 2.9832419516846333e-06, "loss": 0.3531, "step": 41807 }, { "epoch": 1.9185902436785829, "grad_norm": 0.4936063885688782, "learning_rate": 2.9830175961241013e-06, "loss": 0.3863, "step": 41808 }, { "epoch": 1.9186361341838374, "grad_norm": 0.49395230412483215, "learning_rate": 2.9827932454137748e-06, "loss": 0.3607, "step": 41809 }, { "epoch": 1.9186820246890919, "grad_norm": 0.46430283784866333, "learning_rate": 2.9825688995541946e-06, "loss": 0.3398, "step": 41810 }, { "epoch": 1.9187279151943462, "grad_norm": 0.4747788906097412, "learning_rate": 2.9823445585458995e-06, "loss": 0.407, "step": 41811 }, { "epoch": 1.9187738056996007, "grad_norm": 0.48410215973854065, "learning_rate": 2.9821202223894286e-06, "loss": 0.3696, "step": 41812 }, { "epoch": 1.9188196962048552, "grad_norm": 0.4767077565193176, "learning_rate": 2.98189589108532e-06, "loss": 0.3338, "step": 41813 }, { "epoch": 1.9188655867101096, "grad_norm": 0.47598183155059814, "learning_rate": 2.9816715646341176e-06, "loss": 0.3367, "step": 41814 }, { "epoch": 1.9189114772153641, "grad_norm": 0.4904116094112396, "learning_rate": 2.981447243036357e-06, "loss": 0.3998, "step": 41815 }, { "epoch": 1.9189573677206186, "grad_norm": 0.4456970691680908, "learning_rate": 2.9812229262925773e-06, "loss": 0.3401, "step": 41816 }, { "epoch": 1.9190032582258731, "grad_norm": 0.4662283957004547, "learning_rate": 2.980998614403321e-06, "loss": 0.3272, "step": 41817 }, { "epoch": 1.9190491487311276, "grad_norm": 0.44600093364715576, "learning_rate": 2.9807743073691264e-06, "loss": 0.2907, "step": 41818 }, { "epoch": 1.9190950392363821, "grad_norm": 0.4765022099018097, "learning_rate": 2.9805500051905304e-06, "loss": 0.3398, "step": 41819 }, { "epoch": 1.9191409297416364, "grad_norm": 0.4777287542819977, "learning_rate": 2.980325707868076e-06, "loss": 0.3532, "step": 41820 }, { "epoch": 1.919186820246891, "grad_norm": 0.4977973699569702, "learning_rate": 2.9801014154023002e-06, "loss": 0.4108, "step": 41821 }, { "epoch": 1.9192327107521454, "grad_norm": 0.5042879581451416, "learning_rate": 2.9798771277937412e-06, "loss": 0.4072, "step": 41822 }, { "epoch": 1.9192786012573997, "grad_norm": 0.5028910040855408, "learning_rate": 2.9796528450429424e-06, "loss": 0.3816, "step": 41823 }, { "epoch": 1.9193244917626542, "grad_norm": 0.4690171480178833, "learning_rate": 2.979428567150441e-06, "loss": 0.2964, "step": 41824 }, { "epoch": 1.9193703822679087, "grad_norm": 0.4921649694442749, "learning_rate": 2.9792042941167744e-06, "loss": 0.3734, "step": 41825 }, { "epoch": 1.9194162727731632, "grad_norm": 0.4507807791233063, "learning_rate": 2.978980025942485e-06, "loss": 0.2937, "step": 41826 }, { "epoch": 1.9194621632784177, "grad_norm": 0.45530906319618225, "learning_rate": 2.9787557626281118e-06, "loss": 0.3335, "step": 41827 }, { "epoch": 1.9195080537836722, "grad_norm": 0.47488731145858765, "learning_rate": 2.9785315041741897e-06, "loss": 0.3433, "step": 41828 }, { "epoch": 1.9195539442889267, "grad_norm": 0.46775805950164795, "learning_rate": 2.978307250581264e-06, "loss": 0.3538, "step": 41829 }, { "epoch": 1.9195998347941812, "grad_norm": 0.47717270255088806, "learning_rate": 2.978083001849871e-06, "loss": 0.3824, "step": 41830 }, { "epoch": 1.9196457252994357, "grad_norm": 0.4396953582763672, "learning_rate": 2.9778587579805496e-06, "loss": 0.3133, "step": 41831 }, { "epoch": 1.9196916158046902, "grad_norm": 0.4536491334438324, "learning_rate": 2.97763451897384e-06, "loss": 0.3469, "step": 41832 }, { "epoch": 1.9197375063099444, "grad_norm": 0.5234580636024475, "learning_rate": 2.977410284830282e-06, "loss": 0.4376, "step": 41833 }, { "epoch": 1.919783396815199, "grad_norm": 0.48807504773139954, "learning_rate": 2.9771860555504113e-06, "loss": 0.3963, "step": 41834 }, { "epoch": 1.9198292873204534, "grad_norm": 0.4543924927711487, "learning_rate": 2.976961831134772e-06, "loss": 0.3082, "step": 41835 }, { "epoch": 1.9198751778257077, "grad_norm": 0.4713936746120453, "learning_rate": 2.976737611583902e-06, "loss": 0.3292, "step": 41836 }, { "epoch": 1.9199210683309622, "grad_norm": 0.41817507147789, "learning_rate": 2.976513396898336e-06, "loss": 0.2741, "step": 41837 }, { "epoch": 1.9199669588362167, "grad_norm": 0.44113749265670776, "learning_rate": 2.976289187078619e-06, "loss": 0.2849, "step": 41838 }, { "epoch": 1.9200128493414712, "grad_norm": 0.7265335917472839, "learning_rate": 2.9760649821252885e-06, "loss": 0.4523, "step": 41839 }, { "epoch": 1.9200587398467257, "grad_norm": 0.4962233006954193, "learning_rate": 2.9758407820388812e-06, "loss": 0.3799, "step": 41840 }, { "epoch": 1.9201046303519802, "grad_norm": 0.46318161487579346, "learning_rate": 2.9756165868199387e-06, "loss": 0.3694, "step": 41841 }, { "epoch": 1.9201505208572347, "grad_norm": 0.459846168756485, "learning_rate": 2.9753923964690003e-06, "loss": 0.3117, "step": 41842 }, { "epoch": 1.9201964113624892, "grad_norm": 0.4549297094345093, "learning_rate": 2.9751682109866015e-06, "loss": 0.3496, "step": 41843 }, { "epoch": 1.9202423018677437, "grad_norm": 0.5336573719978333, "learning_rate": 2.9749440303732857e-06, "loss": 0.3525, "step": 41844 }, { "epoch": 1.9202881923729982, "grad_norm": 0.459121972322464, "learning_rate": 2.9747198546295914e-06, "loss": 0.3433, "step": 41845 }, { "epoch": 1.9203340828782525, "grad_norm": 0.47601741552352905, "learning_rate": 2.9744956837560563e-06, "loss": 0.3341, "step": 41846 }, { "epoch": 1.920379973383507, "grad_norm": 0.4690493643283844, "learning_rate": 2.974271517753219e-06, "loss": 0.3385, "step": 41847 }, { "epoch": 1.9204258638887615, "grad_norm": 0.4414122402667999, "learning_rate": 2.9740473566216198e-06, "loss": 0.3003, "step": 41848 }, { "epoch": 1.9204717543940157, "grad_norm": 0.5038902759552002, "learning_rate": 2.973823200361797e-06, "loss": 0.3752, "step": 41849 }, { "epoch": 1.9205176448992702, "grad_norm": 0.4800187051296234, "learning_rate": 2.973599048974289e-06, "loss": 0.3849, "step": 41850 }, { "epoch": 1.9205635354045247, "grad_norm": 0.45802515745162964, "learning_rate": 2.973374902459637e-06, "loss": 0.3291, "step": 41851 }, { "epoch": 1.9206094259097792, "grad_norm": 0.4946646988391876, "learning_rate": 2.97315076081838e-06, "loss": 0.3672, "step": 41852 }, { "epoch": 1.9206553164150337, "grad_norm": 0.4805428087711334, "learning_rate": 2.9729266240510528e-06, "loss": 0.3404, "step": 41853 }, { "epoch": 1.9207012069202882, "grad_norm": 0.502227246761322, "learning_rate": 2.972702492158199e-06, "loss": 0.3473, "step": 41854 }, { "epoch": 1.9207470974255427, "grad_norm": 0.4485898017883301, "learning_rate": 2.9724783651403564e-06, "loss": 0.3062, "step": 41855 }, { "epoch": 1.9207929879307972, "grad_norm": 0.4735080897808075, "learning_rate": 2.972254242998061e-06, "loss": 0.3378, "step": 41856 }, { "epoch": 1.9208388784360517, "grad_norm": 0.459975928068161, "learning_rate": 2.9720301257318564e-06, "loss": 0.351, "step": 41857 }, { "epoch": 1.920884768941306, "grad_norm": 0.5321691632270813, "learning_rate": 2.97180601334228e-06, "loss": 0.4083, "step": 41858 }, { "epoch": 1.9209306594465605, "grad_norm": 0.44991692900657654, "learning_rate": 2.9715819058298678e-06, "loss": 0.2747, "step": 41859 }, { "epoch": 1.920976549951815, "grad_norm": 0.5082188248634338, "learning_rate": 2.971357803195162e-06, "loss": 0.4257, "step": 41860 }, { "epoch": 1.9210224404570693, "grad_norm": 0.4544338881969452, "learning_rate": 2.971133705438701e-06, "loss": 0.3695, "step": 41861 }, { "epoch": 1.9210683309623238, "grad_norm": 0.4720914959907532, "learning_rate": 2.9709096125610216e-06, "loss": 0.3757, "step": 41862 }, { "epoch": 1.9211142214675783, "grad_norm": 0.44792866706848145, "learning_rate": 2.970685524562665e-06, "loss": 0.3271, "step": 41863 }, { "epoch": 1.9211601119728328, "grad_norm": 0.46573078632354736, "learning_rate": 2.97046144144417e-06, "loss": 0.3257, "step": 41864 }, { "epoch": 1.9212060024780873, "grad_norm": 0.49484550952911377, "learning_rate": 2.9702373632060726e-06, "loss": 0.404, "step": 41865 }, { "epoch": 1.9212518929833418, "grad_norm": 0.49919289350509644, "learning_rate": 2.9700132898489152e-06, "loss": 0.4288, "step": 41866 }, { "epoch": 1.9212977834885963, "grad_norm": 0.48735877871513367, "learning_rate": 2.969789221373236e-06, "loss": 0.3925, "step": 41867 }, { "epoch": 1.9213436739938508, "grad_norm": 0.5008126497268677, "learning_rate": 2.9695651577795715e-06, "loss": 0.385, "step": 41868 }, { "epoch": 1.9213895644991053, "grad_norm": 0.49752306938171387, "learning_rate": 2.9693410990684625e-06, "loss": 0.3901, "step": 41869 }, { "epoch": 1.9214354550043597, "grad_norm": 0.47773054242134094, "learning_rate": 2.9691170452404485e-06, "loss": 0.3294, "step": 41870 }, { "epoch": 1.921481345509614, "grad_norm": 0.45417356491088867, "learning_rate": 2.9688929962960635e-06, "loss": 0.3488, "step": 41871 }, { "epoch": 1.9215272360148685, "grad_norm": 0.467722088098526, "learning_rate": 2.9686689522358526e-06, "loss": 0.2979, "step": 41872 }, { "epoch": 1.921573126520123, "grad_norm": 0.6165967583656311, "learning_rate": 2.968444913060352e-06, "loss": 0.3496, "step": 41873 }, { "epoch": 1.9216190170253773, "grad_norm": 0.5154542326927185, "learning_rate": 2.968220878770098e-06, "loss": 0.4205, "step": 41874 }, { "epoch": 1.9216649075306318, "grad_norm": 0.5117013454437256, "learning_rate": 2.967996849365634e-06, "loss": 0.4294, "step": 41875 }, { "epoch": 1.9217107980358863, "grad_norm": 0.4553329348564148, "learning_rate": 2.9677728248474957e-06, "loss": 0.3397, "step": 41876 }, { "epoch": 1.9217566885411408, "grad_norm": 0.4612378180027008, "learning_rate": 2.967548805216223e-06, "loss": 0.3517, "step": 41877 }, { "epoch": 1.9218025790463953, "grad_norm": 0.48482367396354675, "learning_rate": 2.967324790472351e-06, "loss": 0.3515, "step": 41878 }, { "epoch": 1.9218484695516498, "grad_norm": 0.46882227063179016, "learning_rate": 2.967100780616423e-06, "loss": 0.3729, "step": 41879 }, { "epoch": 1.9218943600569043, "grad_norm": 0.4302375316619873, "learning_rate": 2.966876775648978e-06, "loss": 0.2818, "step": 41880 }, { "epoch": 1.9219402505621588, "grad_norm": 0.45599886775016785, "learning_rate": 2.9666527755705498e-06, "loss": 0.349, "step": 41881 }, { "epoch": 1.9219861410674133, "grad_norm": 0.45980632305145264, "learning_rate": 2.9664287803816814e-06, "loss": 0.349, "step": 41882 }, { "epoch": 1.9220320315726678, "grad_norm": 0.43737518787384033, "learning_rate": 2.96620479008291e-06, "loss": 0.2985, "step": 41883 }, { "epoch": 1.922077922077922, "grad_norm": 0.43544867634773254, "learning_rate": 2.965980804674773e-06, "loss": 0.3108, "step": 41884 }, { "epoch": 1.9221238125831765, "grad_norm": 0.4507075548171997, "learning_rate": 2.9657568241578117e-06, "loss": 0.3197, "step": 41885 }, { "epoch": 1.922169703088431, "grad_norm": 0.5068269968032837, "learning_rate": 2.965532848532563e-06, "loss": 0.3928, "step": 41886 }, { "epoch": 1.9222155935936853, "grad_norm": 0.4722825288772583, "learning_rate": 2.9653088777995637e-06, "loss": 0.369, "step": 41887 }, { "epoch": 1.9222614840989398, "grad_norm": 0.45635172724723816, "learning_rate": 2.965084911959356e-06, "loss": 0.2801, "step": 41888 }, { "epoch": 1.9223073746041943, "grad_norm": 0.44466546177864075, "learning_rate": 2.9648609510124777e-06, "loss": 0.2994, "step": 41889 }, { "epoch": 1.9223532651094488, "grad_norm": 0.4614938497543335, "learning_rate": 2.964636994959465e-06, "loss": 0.3196, "step": 41890 }, { "epoch": 1.9223991556147033, "grad_norm": 0.4615688920021057, "learning_rate": 2.964413043800859e-06, "loss": 0.3464, "step": 41891 }, { "epoch": 1.9224450461199578, "grad_norm": 0.47216251492500305, "learning_rate": 2.9641890975371973e-06, "loss": 0.366, "step": 41892 }, { "epoch": 1.9224909366252123, "grad_norm": 0.43224194645881653, "learning_rate": 2.9639651561690163e-06, "loss": 0.2864, "step": 41893 }, { "epoch": 1.9225368271304668, "grad_norm": 0.4847928583621979, "learning_rate": 2.9637412196968585e-06, "loss": 0.4007, "step": 41894 }, { "epoch": 1.9225827176357213, "grad_norm": 0.468631774187088, "learning_rate": 2.9635172881212605e-06, "loss": 0.3392, "step": 41895 }, { "epoch": 1.9226286081409756, "grad_norm": 0.4627031087875366, "learning_rate": 2.963293361442759e-06, "loss": 0.3608, "step": 41896 }, { "epoch": 1.92267449864623, "grad_norm": 0.48067599534988403, "learning_rate": 2.963069439661895e-06, "loss": 0.3703, "step": 41897 }, { "epoch": 1.9227203891514846, "grad_norm": 0.49436983466148376, "learning_rate": 2.9628455227792076e-06, "loss": 0.3745, "step": 41898 }, { "epoch": 1.922766279656739, "grad_norm": 0.4585495889186859, "learning_rate": 2.962621610795231e-06, "loss": 0.3511, "step": 41899 }, { "epoch": 1.9228121701619934, "grad_norm": 0.46161770820617676, "learning_rate": 2.962397703710508e-06, "loss": 0.3551, "step": 41900 }, { "epoch": 1.9228580606672478, "grad_norm": 0.4922403395175934, "learning_rate": 2.962173801525576e-06, "loss": 0.4351, "step": 41901 }, { "epoch": 1.9229039511725023, "grad_norm": 0.43618670105934143, "learning_rate": 2.9619499042409717e-06, "loss": 0.2918, "step": 41902 }, { "epoch": 1.9229498416777568, "grad_norm": 0.4862567186355591, "learning_rate": 2.9617260118572354e-06, "loss": 0.3236, "step": 41903 }, { "epoch": 1.9229957321830113, "grad_norm": 0.4898208677768707, "learning_rate": 2.961502124374905e-06, "loss": 0.4289, "step": 41904 }, { "epoch": 1.9230416226882658, "grad_norm": 0.5007339715957642, "learning_rate": 2.9612782417945174e-06, "loss": 0.3542, "step": 41905 }, { "epoch": 1.9230875131935203, "grad_norm": 0.467084139585495, "learning_rate": 2.9610543641166135e-06, "loss": 0.3916, "step": 41906 }, { "epoch": 1.9231334036987748, "grad_norm": 0.5827847123146057, "learning_rate": 2.9608304913417298e-06, "loss": 0.3337, "step": 41907 }, { "epoch": 1.9231792942040293, "grad_norm": 0.4145813584327698, "learning_rate": 2.960606623470403e-06, "loss": 0.2769, "step": 41908 }, { "epoch": 1.9232251847092836, "grad_norm": 0.4832856059074402, "learning_rate": 2.960382760503177e-06, "loss": 0.3366, "step": 41909 }, { "epoch": 1.923271075214538, "grad_norm": 0.5123738050460815, "learning_rate": 2.9601589024405857e-06, "loss": 0.4106, "step": 41910 }, { "epoch": 1.9233169657197926, "grad_norm": 0.47792401909828186, "learning_rate": 2.9599350492831693e-06, "loss": 0.3562, "step": 41911 }, { "epoch": 1.9233628562250469, "grad_norm": 0.4488510191440582, "learning_rate": 2.959711201031463e-06, "loss": 0.3299, "step": 41912 }, { "epoch": 1.9234087467303014, "grad_norm": 0.4758816361427307, "learning_rate": 2.9594873576860094e-06, "loss": 0.3644, "step": 41913 }, { "epoch": 1.9234546372355559, "grad_norm": 0.48227789998054504, "learning_rate": 2.959263519247344e-06, "loss": 0.3515, "step": 41914 }, { "epoch": 1.9235005277408104, "grad_norm": 0.42213302850723267, "learning_rate": 2.9590396857160044e-06, "loss": 0.2736, "step": 41915 }, { "epoch": 1.9235464182460649, "grad_norm": 0.4177272617816925, "learning_rate": 2.9588158570925318e-06, "loss": 0.2875, "step": 41916 }, { "epoch": 1.9235923087513194, "grad_norm": 0.4797302484512329, "learning_rate": 2.9585920333774626e-06, "loss": 0.353, "step": 41917 }, { "epoch": 1.9236381992565739, "grad_norm": 0.4814547002315521, "learning_rate": 2.9583682145713343e-06, "loss": 0.3811, "step": 41918 }, { "epoch": 1.9236840897618284, "grad_norm": 0.4413779079914093, "learning_rate": 2.9581444006746874e-06, "loss": 0.3512, "step": 41919 }, { "epoch": 1.9237299802670829, "grad_norm": 0.4780769348144531, "learning_rate": 2.9579205916880588e-06, "loss": 0.3942, "step": 41920 }, { "epoch": 1.9237758707723374, "grad_norm": 0.459751695394516, "learning_rate": 2.9576967876119842e-06, "loss": 0.3266, "step": 41921 }, { "epoch": 1.9238217612775916, "grad_norm": 0.5005755424499512, "learning_rate": 2.957472988447007e-06, "loss": 0.4192, "step": 41922 }, { "epoch": 1.9238676517828461, "grad_norm": 0.5024755001068115, "learning_rate": 2.957249194193662e-06, "loss": 0.3712, "step": 41923 }, { "epoch": 1.9239135422881006, "grad_norm": 0.5143392086029053, "learning_rate": 2.9570254048524875e-06, "loss": 0.4486, "step": 41924 }, { "epoch": 1.923959432793355, "grad_norm": 0.49715369939804077, "learning_rate": 2.956801620424023e-06, "loss": 0.4206, "step": 41925 }, { "epoch": 1.9240053232986094, "grad_norm": 0.46908146142959595, "learning_rate": 2.9565778409088052e-06, "loss": 0.394, "step": 41926 }, { "epoch": 1.924051213803864, "grad_norm": 0.45585912466049194, "learning_rate": 2.9563540663073724e-06, "loss": 0.3059, "step": 41927 }, { "epoch": 1.9240971043091184, "grad_norm": 0.4791746437549591, "learning_rate": 2.956130296620264e-06, "loss": 0.4014, "step": 41928 }, { "epoch": 1.924142994814373, "grad_norm": 0.47247791290283203, "learning_rate": 2.9559065318480172e-06, "loss": 0.3084, "step": 41929 }, { "epoch": 1.9241888853196274, "grad_norm": 0.4580712914466858, "learning_rate": 2.9556827719911673e-06, "loss": 0.3355, "step": 41930 }, { "epoch": 1.924234775824882, "grad_norm": 0.5332343578338623, "learning_rate": 2.9554590170502585e-06, "loss": 0.4631, "step": 41931 }, { "epoch": 1.9242806663301364, "grad_norm": 0.4268239140510559, "learning_rate": 2.9552352670258246e-06, "loss": 0.3028, "step": 41932 }, { "epoch": 1.9243265568353909, "grad_norm": 0.4749758541584015, "learning_rate": 2.9550115219184034e-06, "loss": 0.3715, "step": 41933 }, { "epoch": 1.9243724473406454, "grad_norm": 0.5002425909042358, "learning_rate": 2.9547877817285353e-06, "loss": 0.3831, "step": 41934 }, { "epoch": 1.9244183378458997, "grad_norm": 0.6400960087776184, "learning_rate": 2.9545640464567573e-06, "loss": 0.382, "step": 41935 }, { "epoch": 1.9244642283511542, "grad_norm": 0.46105068922042847, "learning_rate": 2.9543403161036048e-06, "loss": 0.3584, "step": 41936 }, { "epoch": 1.9245101188564087, "grad_norm": 0.48778751492500305, "learning_rate": 2.9541165906696207e-06, "loss": 0.374, "step": 41937 }, { "epoch": 1.924556009361663, "grad_norm": 0.4511970579624176, "learning_rate": 2.9538928701553403e-06, "loss": 0.3096, "step": 41938 }, { "epoch": 1.9246018998669174, "grad_norm": 0.4820083677768707, "learning_rate": 2.9536691545613007e-06, "loss": 0.3392, "step": 41939 }, { "epoch": 1.924647790372172, "grad_norm": 0.4615422785282135, "learning_rate": 2.9534454438880422e-06, "loss": 0.3515, "step": 41940 }, { "epoch": 1.9246936808774264, "grad_norm": 0.5101310610771179, "learning_rate": 2.9532217381361006e-06, "loss": 0.3376, "step": 41941 }, { "epoch": 1.924739571382681, "grad_norm": 0.4846838414669037, "learning_rate": 2.9529980373060155e-06, "loss": 0.3956, "step": 41942 }, { "epoch": 1.9247854618879354, "grad_norm": 0.4240683615207672, "learning_rate": 2.9527743413983222e-06, "loss": 0.2859, "step": 41943 }, { "epoch": 1.92483135239319, "grad_norm": 0.46952730417251587, "learning_rate": 2.9525506504135624e-06, "loss": 0.3716, "step": 41944 }, { "epoch": 1.9248772428984444, "grad_norm": 0.49186116456985474, "learning_rate": 2.9523269643522718e-06, "loss": 0.39, "step": 41945 }, { "epoch": 1.924923133403699, "grad_norm": 0.4616750180721283, "learning_rate": 2.9521032832149878e-06, "loss": 0.2902, "step": 41946 }, { "epoch": 1.9249690239089532, "grad_norm": 0.4390864670276642, "learning_rate": 2.95187960700225e-06, "loss": 0.3061, "step": 41947 }, { "epoch": 1.9250149144142077, "grad_norm": 0.4713165760040283, "learning_rate": 2.951655935714595e-06, "loss": 0.3829, "step": 41948 }, { "epoch": 1.9250608049194622, "grad_norm": 0.4831652343273163, "learning_rate": 2.9514322693525604e-06, "loss": 0.3231, "step": 41949 }, { "epoch": 1.9251066954247165, "grad_norm": 0.4459077715873718, "learning_rate": 2.9512086079166855e-06, "loss": 0.3233, "step": 41950 }, { "epoch": 1.925152585929971, "grad_norm": 0.4912387728691101, "learning_rate": 2.9509849514075075e-06, "loss": 0.3933, "step": 41951 }, { "epoch": 1.9251984764352255, "grad_norm": 0.4813597798347473, "learning_rate": 2.950761299825562e-06, "loss": 0.3937, "step": 41952 }, { "epoch": 1.92524436694048, "grad_norm": 0.47595614194869995, "learning_rate": 2.9505376531713904e-06, "loss": 0.36, "step": 41953 }, { "epoch": 1.9252902574457345, "grad_norm": 0.4626908302307129, "learning_rate": 2.9503140114455296e-06, "loss": 0.3046, "step": 41954 }, { "epoch": 1.925336147950989, "grad_norm": 0.4720257818698883, "learning_rate": 2.9500903746485155e-06, "loss": 0.4229, "step": 41955 }, { "epoch": 1.9253820384562434, "grad_norm": 0.47402459383010864, "learning_rate": 2.9498667427808875e-06, "loss": 0.2719, "step": 41956 }, { "epoch": 1.925427928961498, "grad_norm": 0.4400371015071869, "learning_rate": 2.949643115843184e-06, "loss": 0.3466, "step": 41957 }, { "epoch": 1.9254738194667524, "grad_norm": 0.4624444842338562, "learning_rate": 2.9494194938359385e-06, "loss": 0.3417, "step": 41958 }, { "epoch": 1.925519709972007, "grad_norm": 0.46192869544029236, "learning_rate": 2.949195876759694e-06, "loss": 0.3461, "step": 41959 }, { "epoch": 1.9255656004772612, "grad_norm": 0.45404961705207825, "learning_rate": 2.9489722646149864e-06, "loss": 0.331, "step": 41960 }, { "epoch": 1.9256114909825157, "grad_norm": 0.46741676330566406, "learning_rate": 2.9487486574023525e-06, "loss": 0.3695, "step": 41961 }, { "epoch": 1.9256573814877702, "grad_norm": 0.44783446192741394, "learning_rate": 2.948525055122332e-06, "loss": 0.3213, "step": 41962 }, { "epoch": 1.9257032719930245, "grad_norm": 0.47805196046829224, "learning_rate": 2.948301457775461e-06, "loss": 0.3278, "step": 41963 }, { "epoch": 1.925749162498279, "grad_norm": 0.4132208526134491, "learning_rate": 2.9480778653622754e-06, "loss": 0.2631, "step": 41964 }, { "epoch": 1.9257950530035335, "grad_norm": 0.46063268184661865, "learning_rate": 2.9478542778833163e-06, "loss": 0.3333, "step": 41965 }, { "epoch": 1.925840943508788, "grad_norm": 0.4590644836425781, "learning_rate": 2.947630695339121e-06, "loss": 0.3227, "step": 41966 }, { "epoch": 1.9258868340140425, "grad_norm": 0.46503424644470215, "learning_rate": 2.9474071177302244e-06, "loss": 0.3523, "step": 41967 }, { "epoch": 1.925932724519297, "grad_norm": 0.41096875071525574, "learning_rate": 2.9471835450571674e-06, "loss": 0.2591, "step": 41968 }, { "epoch": 1.9259786150245515, "grad_norm": 0.4484167695045471, "learning_rate": 2.946959977320486e-06, "loss": 0.3343, "step": 41969 }, { "epoch": 1.926024505529806, "grad_norm": 0.470647931098938, "learning_rate": 2.9467364145207155e-06, "loss": 0.316, "step": 41970 }, { "epoch": 1.9260703960350605, "grad_norm": 0.46141326427459717, "learning_rate": 2.946512856658399e-06, "loss": 0.3207, "step": 41971 }, { "epoch": 1.926116286540315, "grad_norm": 0.43813079595565796, "learning_rate": 2.94628930373407e-06, "loss": 0.2788, "step": 41972 }, { "epoch": 1.9261621770455692, "grad_norm": 0.46059638261795044, "learning_rate": 2.946065755748265e-06, "loss": 0.3586, "step": 41973 }, { "epoch": 1.9262080675508237, "grad_norm": 0.4555175006389618, "learning_rate": 2.9458422127015263e-06, "loss": 0.3288, "step": 41974 }, { "epoch": 1.9262539580560782, "grad_norm": 0.48617950081825256, "learning_rate": 2.9456186745943884e-06, "loss": 0.3446, "step": 41975 }, { "epoch": 1.9262998485613325, "grad_norm": 0.48549461364746094, "learning_rate": 2.9453951414273885e-06, "loss": 0.3991, "step": 41976 }, { "epoch": 1.926345739066587, "grad_norm": 0.4570874869823456, "learning_rate": 2.9451716132010645e-06, "loss": 0.3232, "step": 41977 }, { "epoch": 1.9263916295718415, "grad_norm": 0.4721803069114685, "learning_rate": 2.9449480899159554e-06, "loss": 0.3737, "step": 41978 }, { "epoch": 1.926437520077096, "grad_norm": 0.5005239248275757, "learning_rate": 2.944724571572597e-06, "loss": 0.3825, "step": 41979 }, { "epoch": 1.9264834105823505, "grad_norm": 0.48354828357696533, "learning_rate": 2.9445010581715256e-06, "loss": 0.3402, "step": 41980 }, { "epoch": 1.926529301087605, "grad_norm": 0.47847437858581543, "learning_rate": 2.9442775497132823e-06, "loss": 0.3259, "step": 41981 }, { "epoch": 1.9265751915928595, "grad_norm": 0.4790976643562317, "learning_rate": 2.944054046198403e-06, "loss": 0.3718, "step": 41982 }, { "epoch": 1.926621082098114, "grad_norm": 0.5011014938354492, "learning_rate": 2.943830547627423e-06, "loss": 0.3964, "step": 41983 }, { "epoch": 1.9266669726033685, "grad_norm": 0.46058952808380127, "learning_rate": 2.943607054000884e-06, "loss": 0.3353, "step": 41984 }, { "epoch": 1.9267128631086228, "grad_norm": 0.4711500406265259, "learning_rate": 2.94338356531932e-06, "loss": 0.3217, "step": 41985 }, { "epoch": 1.9267587536138773, "grad_norm": 0.46995511651039124, "learning_rate": 2.9431600815832677e-06, "loss": 0.3688, "step": 41986 }, { "epoch": 1.9268046441191318, "grad_norm": 0.43752768635749817, "learning_rate": 2.942936602793268e-06, "loss": 0.3252, "step": 41987 }, { "epoch": 1.9268505346243863, "grad_norm": 0.4626831114292145, "learning_rate": 2.942713128949857e-06, "loss": 0.3409, "step": 41988 }, { "epoch": 1.9268964251296405, "grad_norm": 0.4503333270549774, "learning_rate": 2.94248966005357e-06, "loss": 0.3439, "step": 41989 }, { "epoch": 1.926942315634895, "grad_norm": 0.43884652853012085, "learning_rate": 2.942266196104948e-06, "loss": 0.2921, "step": 41990 }, { "epoch": 1.9269882061401495, "grad_norm": 0.44349929690361023, "learning_rate": 2.942042737104526e-06, "loss": 0.2878, "step": 41991 }, { "epoch": 1.927034096645404, "grad_norm": 0.4687816798686981, "learning_rate": 2.9418192830528393e-06, "loss": 0.3561, "step": 41992 }, { "epoch": 1.9270799871506585, "grad_norm": 0.522201657295227, "learning_rate": 2.9415958339504306e-06, "loss": 0.4591, "step": 41993 }, { "epoch": 1.927125877655913, "grad_norm": 0.5221208333969116, "learning_rate": 2.9413723897978335e-06, "loss": 0.3825, "step": 41994 }, { "epoch": 1.9271717681611675, "grad_norm": 0.4742391109466553, "learning_rate": 2.9411489505955844e-06, "loss": 0.3464, "step": 41995 }, { "epoch": 1.927217658666422, "grad_norm": 0.48604616522789, "learning_rate": 2.940925516344224e-06, "loss": 0.4005, "step": 41996 }, { "epoch": 1.9272635491716765, "grad_norm": 0.448048859834671, "learning_rate": 2.9407020870442877e-06, "loss": 0.3451, "step": 41997 }, { "epoch": 1.9273094396769308, "grad_norm": 0.46839645504951477, "learning_rate": 2.9404786626963123e-06, "loss": 0.3255, "step": 41998 }, { "epoch": 1.9273553301821853, "grad_norm": 0.46611514687538147, "learning_rate": 2.9402552433008365e-06, "loss": 0.3933, "step": 41999 }, { "epoch": 1.9274012206874398, "grad_norm": 0.4442223310470581, "learning_rate": 2.9400318288583964e-06, "loss": 0.3179, "step": 42000 }, { "epoch": 1.927447111192694, "grad_norm": 0.45116138458251953, "learning_rate": 2.9398084193695282e-06, "loss": 0.3284, "step": 42001 }, { "epoch": 1.9274930016979486, "grad_norm": 0.6890984773635864, "learning_rate": 2.939585014834772e-06, "loss": 0.3524, "step": 42002 }, { "epoch": 1.927538892203203, "grad_norm": 0.46744847297668457, "learning_rate": 2.939361615254664e-06, "loss": 0.3175, "step": 42003 }, { "epoch": 1.9275847827084576, "grad_norm": 0.48233115673065186, "learning_rate": 2.93913822062974e-06, "loss": 0.4167, "step": 42004 }, { "epoch": 1.927630673213712, "grad_norm": 0.47306227684020996, "learning_rate": 2.9389148309605385e-06, "loss": 0.3463, "step": 42005 }, { "epoch": 1.9276765637189666, "grad_norm": 0.4628267288208008, "learning_rate": 2.9386914462475966e-06, "loss": 0.3095, "step": 42006 }, { "epoch": 1.927722454224221, "grad_norm": 0.49226096272468567, "learning_rate": 2.9384680664914522e-06, "loss": 0.3859, "step": 42007 }, { "epoch": 1.9277683447294756, "grad_norm": 0.4826112389564514, "learning_rate": 2.9382446916926384e-06, "loss": 0.403, "step": 42008 }, { "epoch": 1.92781423523473, "grad_norm": 0.47766876220703125, "learning_rate": 2.9380213218516974e-06, "loss": 0.3695, "step": 42009 }, { "epoch": 1.9278601257399846, "grad_norm": 0.4814397990703583, "learning_rate": 2.937797956969165e-06, "loss": 0.3894, "step": 42010 }, { "epoch": 1.9279060162452388, "grad_norm": 0.4574735760688782, "learning_rate": 2.937574597045576e-06, "loss": 0.3496, "step": 42011 }, { "epoch": 1.9279519067504933, "grad_norm": 0.4615628719329834, "learning_rate": 2.937351242081471e-06, "loss": 0.3254, "step": 42012 }, { "epoch": 1.9279977972557478, "grad_norm": 0.4787447154521942, "learning_rate": 2.9371278920773843e-06, "loss": 0.3396, "step": 42013 }, { "epoch": 1.928043687761002, "grad_norm": 0.45925071835517883, "learning_rate": 2.936904547033852e-06, "loss": 0.3463, "step": 42014 }, { "epoch": 1.9280895782662566, "grad_norm": 0.4711998701095581, "learning_rate": 2.936681206951417e-06, "loss": 0.3873, "step": 42015 }, { "epoch": 1.928135468771511, "grad_norm": 0.44571447372436523, "learning_rate": 2.936457871830611e-06, "loss": 0.2821, "step": 42016 }, { "epoch": 1.9281813592767656, "grad_norm": 0.4529707133769989, "learning_rate": 2.9362345416719705e-06, "loss": 0.3361, "step": 42017 }, { "epoch": 1.92822724978202, "grad_norm": 0.47458595037460327, "learning_rate": 2.9360112164760367e-06, "loss": 0.3524, "step": 42018 }, { "epoch": 1.9282731402872746, "grad_norm": 0.5064071416854858, "learning_rate": 2.9357878962433446e-06, "loss": 0.387, "step": 42019 }, { "epoch": 1.928319030792529, "grad_norm": 0.4533954858779907, "learning_rate": 2.93556458097443e-06, "loss": 0.3327, "step": 42020 }, { "epoch": 1.9283649212977836, "grad_norm": 0.407736212015152, "learning_rate": 2.9353412706698316e-06, "loss": 0.231, "step": 42021 }, { "epoch": 1.928410811803038, "grad_norm": 0.4988231658935547, "learning_rate": 2.9351179653300865e-06, "loss": 0.3689, "step": 42022 }, { "epoch": 1.9284567023082926, "grad_norm": 0.4432070553302765, "learning_rate": 2.9348946649557286e-06, "loss": 0.3154, "step": 42023 }, { "epoch": 1.9285025928135469, "grad_norm": 0.47020474076271057, "learning_rate": 2.9346713695472996e-06, "loss": 0.3731, "step": 42024 }, { "epoch": 1.9285484833188014, "grad_norm": 0.4891517162322998, "learning_rate": 2.934448079105334e-06, "loss": 0.4265, "step": 42025 }, { "epoch": 1.9285943738240559, "grad_norm": 0.47520044445991516, "learning_rate": 2.9342247936303677e-06, "loss": 0.37, "step": 42026 }, { "epoch": 1.9286402643293101, "grad_norm": 0.5230405330657959, "learning_rate": 2.9340015131229393e-06, "loss": 0.4124, "step": 42027 }, { "epoch": 1.9286861548345646, "grad_norm": 0.4929693937301636, "learning_rate": 2.933778237583586e-06, "loss": 0.4079, "step": 42028 }, { "epoch": 1.9287320453398191, "grad_norm": 0.4549710154533386, "learning_rate": 2.9335549670128414e-06, "loss": 0.375, "step": 42029 }, { "epoch": 1.9287779358450736, "grad_norm": 0.46801477670669556, "learning_rate": 2.9333317014112473e-06, "loss": 0.3408, "step": 42030 }, { "epoch": 1.9288238263503281, "grad_norm": 0.5093133449554443, "learning_rate": 2.9331084407793385e-06, "loss": 0.4043, "step": 42031 }, { "epoch": 1.9288697168555826, "grad_norm": 0.4378516674041748, "learning_rate": 2.9328851851176498e-06, "loss": 0.3051, "step": 42032 }, { "epoch": 1.9289156073608371, "grad_norm": 0.48876699805259705, "learning_rate": 2.9326619344267217e-06, "loss": 0.3963, "step": 42033 }, { "epoch": 1.9289614978660916, "grad_norm": 0.4867330491542816, "learning_rate": 2.932438688707089e-06, "loss": 0.3718, "step": 42034 }, { "epoch": 1.929007388371346, "grad_norm": 0.4951794445514679, "learning_rate": 2.932215447959286e-06, "loss": 0.3927, "step": 42035 }, { "epoch": 1.9290532788766004, "grad_norm": 0.4565521776676178, "learning_rate": 2.9319922121838546e-06, "loss": 0.3338, "step": 42036 }, { "epoch": 1.9290991693818549, "grad_norm": 0.4589291512966156, "learning_rate": 2.93176898138133e-06, "loss": 0.3593, "step": 42037 }, { "epoch": 1.9291450598871094, "grad_norm": 0.5452494025230408, "learning_rate": 2.931545755552246e-06, "loss": 0.4368, "step": 42038 }, { "epoch": 1.9291909503923637, "grad_norm": 0.5167415142059326, "learning_rate": 2.931322534697143e-06, "loss": 0.3852, "step": 42039 }, { "epoch": 1.9292368408976182, "grad_norm": 0.4778722822666168, "learning_rate": 2.9310993188165563e-06, "loss": 0.3808, "step": 42040 }, { "epoch": 1.9292827314028727, "grad_norm": 0.4628783166408539, "learning_rate": 2.9308761079110233e-06, "loss": 0.3551, "step": 42041 }, { "epoch": 1.9293286219081272, "grad_norm": 0.5074269771575928, "learning_rate": 2.930652901981079e-06, "loss": 0.3506, "step": 42042 }, { "epoch": 1.9293745124133816, "grad_norm": 0.5508561730384827, "learning_rate": 2.930429701027262e-06, "loss": 0.4317, "step": 42043 }, { "epoch": 1.9294204029186361, "grad_norm": 0.5162907838821411, "learning_rate": 2.9302065050501093e-06, "loss": 0.4355, "step": 42044 }, { "epoch": 1.9294662934238906, "grad_norm": 0.4954761266708374, "learning_rate": 2.929983314050154e-06, "loss": 0.3991, "step": 42045 }, { "epoch": 1.9295121839291451, "grad_norm": 0.4895554780960083, "learning_rate": 2.9297601280279374e-06, "loss": 0.3984, "step": 42046 }, { "epoch": 1.9295580744343996, "grad_norm": 0.45133498311042786, "learning_rate": 2.9295369469839944e-06, "loss": 0.3221, "step": 42047 }, { "epoch": 1.9296039649396541, "grad_norm": 0.4288400411605835, "learning_rate": 2.929313770918861e-06, "loss": 0.2783, "step": 42048 }, { "epoch": 1.9296498554449084, "grad_norm": 0.4393431842327118, "learning_rate": 2.9290905998330755e-06, "loss": 0.3051, "step": 42049 }, { "epoch": 1.929695745950163, "grad_norm": 0.4939703941345215, "learning_rate": 2.9288674337271726e-06, "loss": 0.3434, "step": 42050 }, { "epoch": 1.9297416364554174, "grad_norm": 0.481050044298172, "learning_rate": 2.928644272601689e-06, "loss": 0.3526, "step": 42051 }, { "epoch": 1.9297875269606717, "grad_norm": 0.4991039037704468, "learning_rate": 2.9284211164571637e-06, "loss": 0.4258, "step": 42052 }, { "epoch": 1.9298334174659262, "grad_norm": 0.4867273271083832, "learning_rate": 2.9281979652941317e-06, "loss": 0.3689, "step": 42053 }, { "epoch": 1.9298793079711807, "grad_norm": 0.5187358856201172, "learning_rate": 2.9279748191131284e-06, "loss": 0.3522, "step": 42054 }, { "epoch": 1.9299251984764352, "grad_norm": 0.428314208984375, "learning_rate": 2.927751677914693e-06, "loss": 0.2793, "step": 42055 }, { "epoch": 1.9299710889816897, "grad_norm": 0.4773090183734894, "learning_rate": 2.9275285416993617e-06, "loss": 0.3876, "step": 42056 }, { "epoch": 1.9300169794869442, "grad_norm": 0.4792625904083252, "learning_rate": 2.9273054104676666e-06, "loss": 0.3763, "step": 42057 }, { "epoch": 1.9300628699921987, "grad_norm": 0.44843921065330505, "learning_rate": 2.9270822842201506e-06, "loss": 0.3027, "step": 42058 }, { "epoch": 1.9301087604974532, "grad_norm": 0.42734670639038086, "learning_rate": 2.9268591629573486e-06, "loss": 0.2932, "step": 42059 }, { "epoch": 1.9301546510027077, "grad_norm": 0.4823648929595947, "learning_rate": 2.926636046679793e-06, "loss": 0.4101, "step": 42060 }, { "epoch": 1.9302005415079622, "grad_norm": 0.4839698374271393, "learning_rate": 2.9264129353880254e-06, "loss": 0.3662, "step": 42061 }, { "epoch": 1.9302464320132164, "grad_norm": 0.4839797019958496, "learning_rate": 2.9261898290825797e-06, "loss": 0.3816, "step": 42062 }, { "epoch": 1.930292322518471, "grad_norm": 0.49361205101013184, "learning_rate": 2.9259667277639925e-06, "loss": 0.3881, "step": 42063 }, { "epoch": 1.9303382130237254, "grad_norm": 0.42431896924972534, "learning_rate": 2.9257436314328014e-06, "loss": 0.2716, "step": 42064 }, { "epoch": 1.9303841035289797, "grad_norm": 0.45804792642593384, "learning_rate": 2.9255205400895426e-06, "loss": 0.3051, "step": 42065 }, { "epoch": 1.9304299940342342, "grad_norm": 0.5154539346694946, "learning_rate": 2.9252974537347496e-06, "loss": 0.4405, "step": 42066 }, { "epoch": 1.9304758845394887, "grad_norm": 0.4379839301109314, "learning_rate": 2.9250743723689633e-06, "loss": 0.2845, "step": 42067 }, { "epoch": 1.9305217750447432, "grad_norm": 0.49869784712791443, "learning_rate": 2.924851295992719e-06, "loss": 0.4407, "step": 42068 }, { "epoch": 1.9305676655499977, "grad_norm": 0.4674595892429352, "learning_rate": 2.9246282246065516e-06, "loss": 0.369, "step": 42069 }, { "epoch": 1.9306135560552522, "grad_norm": 0.5784079432487488, "learning_rate": 2.9244051582109982e-06, "loss": 0.3863, "step": 42070 }, { "epoch": 1.9306594465605067, "grad_norm": 0.49187976121902466, "learning_rate": 2.9241820968065958e-06, "loss": 0.4153, "step": 42071 }, { "epoch": 1.9307053370657612, "grad_norm": 0.43748101592063904, "learning_rate": 2.9239590403938787e-06, "loss": 0.3271, "step": 42072 }, { "epoch": 1.9307512275710157, "grad_norm": 0.44551801681518555, "learning_rate": 2.9237359889733867e-06, "loss": 0.3017, "step": 42073 }, { "epoch": 1.93079711807627, "grad_norm": 0.4959730803966522, "learning_rate": 2.923512942545654e-06, "loss": 0.3916, "step": 42074 }, { "epoch": 1.9308430085815245, "grad_norm": 0.48266884684562683, "learning_rate": 2.923289901111218e-06, "loss": 0.3724, "step": 42075 }, { "epoch": 1.930888899086779, "grad_norm": 0.4773469865322113, "learning_rate": 2.923066864670613e-06, "loss": 0.4333, "step": 42076 }, { "epoch": 1.9309347895920335, "grad_norm": 0.506525993347168, "learning_rate": 2.922843833224378e-06, "loss": 0.3461, "step": 42077 }, { "epoch": 1.9309806800972877, "grad_norm": 0.47661933302879333, "learning_rate": 2.9226208067730477e-06, "loss": 0.3589, "step": 42078 }, { "epoch": 1.9310265706025422, "grad_norm": 0.4811803102493286, "learning_rate": 2.9223977853171563e-06, "loss": 0.3798, "step": 42079 }, { "epoch": 1.9310724611077967, "grad_norm": 0.4394163191318512, "learning_rate": 2.922174768857245e-06, "loss": 0.3513, "step": 42080 }, { "epoch": 1.9311183516130512, "grad_norm": 0.4961719214916229, "learning_rate": 2.9219517573938482e-06, "loss": 0.4458, "step": 42081 }, { "epoch": 1.9311642421183057, "grad_norm": 0.4551577568054199, "learning_rate": 2.9217287509274996e-06, "loss": 0.3326, "step": 42082 }, { "epoch": 1.9312101326235602, "grad_norm": 0.4592211842536926, "learning_rate": 2.9215057494587385e-06, "loss": 0.3372, "step": 42083 }, { "epoch": 1.9312560231288147, "grad_norm": 0.46909746527671814, "learning_rate": 2.9212827529881003e-06, "loss": 0.3431, "step": 42084 }, { "epoch": 1.9313019136340692, "grad_norm": 0.49250999093055725, "learning_rate": 2.9210597615161195e-06, "loss": 0.3983, "step": 42085 }, { "epoch": 1.9313478041393237, "grad_norm": 0.5084668397903442, "learning_rate": 2.920836775043335e-06, "loss": 0.4409, "step": 42086 }, { "epoch": 1.931393694644578, "grad_norm": 0.4608757197856903, "learning_rate": 2.9206137935702827e-06, "loss": 0.3799, "step": 42087 }, { "epoch": 1.9314395851498325, "grad_norm": 0.4868035912513733, "learning_rate": 2.920390817097496e-06, "loss": 0.381, "step": 42088 }, { "epoch": 1.931485475655087, "grad_norm": 0.4886019229888916, "learning_rate": 2.9201678456255145e-06, "loss": 0.4257, "step": 42089 }, { "epoch": 1.9315313661603413, "grad_norm": 0.4507116973400116, "learning_rate": 2.9199448791548724e-06, "loss": 0.3259, "step": 42090 }, { "epoch": 1.9315772566655958, "grad_norm": 0.5034294724464417, "learning_rate": 2.919721917686106e-06, "loss": 0.4467, "step": 42091 }, { "epoch": 1.9316231471708503, "grad_norm": 0.47825857996940613, "learning_rate": 2.9194989612197526e-06, "loss": 0.4231, "step": 42092 }, { "epoch": 1.9316690376761048, "grad_norm": 0.4530491530895233, "learning_rate": 2.9192760097563477e-06, "loss": 0.3012, "step": 42093 }, { "epoch": 1.9317149281813593, "grad_norm": 0.4639139473438263, "learning_rate": 2.9190530632964253e-06, "loss": 0.3418, "step": 42094 }, { "epoch": 1.9317608186866138, "grad_norm": 0.4808740019798279, "learning_rate": 2.918830121840526e-06, "loss": 0.3815, "step": 42095 }, { "epoch": 1.9318067091918683, "grad_norm": 0.5169860124588013, "learning_rate": 2.918607185389183e-06, "loss": 0.389, "step": 42096 }, { "epoch": 1.9318525996971228, "grad_norm": 0.4533408582210541, "learning_rate": 2.918384253942931e-06, "loss": 0.3058, "step": 42097 }, { "epoch": 1.9318984902023772, "grad_norm": 0.9880781173706055, "learning_rate": 2.918161327502309e-06, "loss": 0.3605, "step": 42098 }, { "epoch": 1.9319443807076317, "grad_norm": 0.5079941153526306, "learning_rate": 2.9179384060678527e-06, "loss": 0.3942, "step": 42099 }, { "epoch": 1.931990271212886, "grad_norm": 0.5017287135124207, "learning_rate": 2.917715489640097e-06, "loss": 0.3507, "step": 42100 }, { "epoch": 1.9320361617181405, "grad_norm": 0.47653111815452576, "learning_rate": 2.917492578219579e-06, "loss": 0.3328, "step": 42101 }, { "epoch": 1.932082052223395, "grad_norm": 0.45364201068878174, "learning_rate": 2.917269671806834e-06, "loss": 0.3674, "step": 42102 }, { "epoch": 1.9321279427286493, "grad_norm": 0.4628329575061798, "learning_rate": 2.917046770402396e-06, "loss": 0.374, "step": 42103 }, { "epoch": 1.9321738332339038, "grad_norm": 0.4694858193397522, "learning_rate": 2.9168238740068044e-06, "loss": 0.3445, "step": 42104 }, { "epoch": 1.9322197237391583, "grad_norm": 0.45285314321517944, "learning_rate": 2.916600982620595e-06, "loss": 0.3015, "step": 42105 }, { "epoch": 1.9322656142444128, "grad_norm": 0.4394005537033081, "learning_rate": 2.9163780962443024e-06, "loss": 0.3378, "step": 42106 }, { "epoch": 1.9323115047496673, "grad_norm": 0.4401160180568695, "learning_rate": 2.91615521487846e-06, "loss": 0.2752, "step": 42107 }, { "epoch": 1.9323573952549218, "grad_norm": 0.4532160460948944, "learning_rate": 2.915932338523609e-06, "loss": 0.3035, "step": 42108 }, { "epoch": 1.9324032857601763, "grad_norm": 0.4551438093185425, "learning_rate": 2.9157094671802842e-06, "loss": 0.3206, "step": 42109 }, { "epoch": 1.9324491762654308, "grad_norm": 0.5063810348510742, "learning_rate": 2.915486600849019e-06, "loss": 0.4034, "step": 42110 }, { "epoch": 1.9324950667706853, "grad_norm": 0.46457839012145996, "learning_rate": 2.915263739530351e-06, "loss": 0.3331, "step": 42111 }, { "epoch": 1.9325409572759396, "grad_norm": 0.5064998865127563, "learning_rate": 2.9150408832248155e-06, "loss": 0.3894, "step": 42112 }, { "epoch": 1.932586847781194, "grad_norm": 0.4744535982608795, "learning_rate": 2.914818031932948e-06, "loss": 0.3337, "step": 42113 }, { "epoch": 1.9326327382864485, "grad_norm": 0.42398813366889954, "learning_rate": 2.9145951856552857e-06, "loss": 0.2739, "step": 42114 }, { "epoch": 1.932678628791703, "grad_norm": 0.4621594548225403, "learning_rate": 2.914372344392364e-06, "loss": 0.3178, "step": 42115 }, { "epoch": 1.9327245192969573, "grad_norm": 0.4785335958003998, "learning_rate": 2.914149508144716e-06, "loss": 0.3504, "step": 42116 }, { "epoch": 1.9327704098022118, "grad_norm": 0.4840576648712158, "learning_rate": 2.9139266769128834e-06, "loss": 0.3824, "step": 42117 }, { "epoch": 1.9328163003074663, "grad_norm": 0.5077571868896484, "learning_rate": 2.913703850697398e-06, "loss": 0.4273, "step": 42118 }, { "epoch": 1.9328621908127208, "grad_norm": 0.4889448583126068, "learning_rate": 2.9134810294987942e-06, "loss": 0.3989, "step": 42119 }, { "epoch": 1.9329080813179753, "grad_norm": 0.44574835896492004, "learning_rate": 2.9132582133176124e-06, "loss": 0.3015, "step": 42120 }, { "epoch": 1.9329539718232298, "grad_norm": 0.47418269515037537, "learning_rate": 2.913035402154386e-06, "loss": 0.3664, "step": 42121 }, { "epoch": 1.9329998623284843, "grad_norm": 0.4595847725868225, "learning_rate": 2.912812596009651e-06, "loss": 0.3344, "step": 42122 }, { "epoch": 1.9330457528337388, "grad_norm": 0.5350896716117859, "learning_rate": 2.9125897948839423e-06, "loss": 0.3602, "step": 42123 }, { "epoch": 1.9330916433389933, "grad_norm": 0.45919641852378845, "learning_rate": 2.912366998777797e-06, "loss": 0.3457, "step": 42124 }, { "epoch": 1.9331375338442476, "grad_norm": 0.4299490451812744, "learning_rate": 2.9121442076917472e-06, "loss": 0.3044, "step": 42125 }, { "epoch": 1.933183424349502, "grad_norm": 0.5153296589851379, "learning_rate": 2.911921421626335e-06, "loss": 0.4377, "step": 42126 }, { "epoch": 1.9332293148547566, "grad_norm": 0.47546374797821045, "learning_rate": 2.9116986405820918e-06, "loss": 0.3638, "step": 42127 }, { "epoch": 1.9332752053600109, "grad_norm": 0.4855397641658783, "learning_rate": 2.9114758645595526e-06, "loss": 0.3811, "step": 42128 }, { "epoch": 1.9333210958652653, "grad_norm": 0.4485084116458893, "learning_rate": 2.9112530935592564e-06, "loss": 0.3402, "step": 42129 }, { "epoch": 1.9333669863705198, "grad_norm": 0.45368972420692444, "learning_rate": 2.9110303275817377e-06, "loss": 0.3126, "step": 42130 }, { "epoch": 1.9334128768757743, "grad_norm": 0.49389320611953735, "learning_rate": 2.9108075666275323e-06, "loss": 0.3814, "step": 42131 }, { "epoch": 1.9334587673810288, "grad_norm": 0.4625241756439209, "learning_rate": 2.9105848106971745e-06, "loss": 0.3067, "step": 42132 }, { "epoch": 1.9335046578862833, "grad_norm": 0.48028871417045593, "learning_rate": 2.9103620597912006e-06, "loss": 0.3749, "step": 42133 }, { "epoch": 1.9335505483915378, "grad_norm": 0.5282205939292908, "learning_rate": 2.9101393139101455e-06, "loss": 0.3922, "step": 42134 }, { "epoch": 1.9335964388967923, "grad_norm": 0.4563572108745575, "learning_rate": 2.9099165730545465e-06, "loss": 0.3453, "step": 42135 }, { "epoch": 1.9336423294020468, "grad_norm": 0.4855148494243622, "learning_rate": 2.9096938372249396e-06, "loss": 0.3414, "step": 42136 }, { "epoch": 1.9336882199073013, "grad_norm": 0.45987820625305176, "learning_rate": 2.9094711064218566e-06, "loss": 0.3296, "step": 42137 }, { "epoch": 1.9337341104125556, "grad_norm": 0.5211333632469177, "learning_rate": 2.9092483806458383e-06, "loss": 0.4557, "step": 42138 }, { "epoch": 1.93378000091781, "grad_norm": 0.4933227300643921, "learning_rate": 2.9090256598974175e-06, "loss": 0.3568, "step": 42139 }, { "epoch": 1.9338258914230646, "grad_norm": 0.4733366072177887, "learning_rate": 2.90880294417713e-06, "loss": 0.4067, "step": 42140 }, { "epoch": 1.9338717819283189, "grad_norm": 0.46922239661216736, "learning_rate": 2.9085802334855097e-06, "loss": 0.3291, "step": 42141 }, { "epoch": 1.9339176724335734, "grad_norm": 0.4737309515476227, "learning_rate": 2.908357527823095e-06, "loss": 0.2983, "step": 42142 }, { "epoch": 1.9339635629388279, "grad_norm": 0.5814620852470398, "learning_rate": 2.9081348271904214e-06, "loss": 0.4987, "step": 42143 }, { "epoch": 1.9340094534440824, "grad_norm": 0.4887355864048004, "learning_rate": 2.907912131588023e-06, "loss": 0.4084, "step": 42144 }, { "epoch": 1.9340553439493369, "grad_norm": 0.5126840472221375, "learning_rate": 2.9076894410164352e-06, "loss": 0.3431, "step": 42145 }, { "epoch": 1.9341012344545914, "grad_norm": 0.5245996713638306, "learning_rate": 2.9074667554761946e-06, "loss": 0.4325, "step": 42146 }, { "epoch": 1.9341471249598459, "grad_norm": 0.4735458493232727, "learning_rate": 2.9072440749678333e-06, "loss": 0.3502, "step": 42147 }, { "epoch": 1.9341930154651004, "grad_norm": 0.43617480993270874, "learning_rate": 2.907021399491892e-06, "loss": 0.3062, "step": 42148 }, { "epoch": 1.9342389059703549, "grad_norm": 0.47177228331565857, "learning_rate": 2.906798729048904e-06, "loss": 0.3343, "step": 42149 }, { "epoch": 1.9342847964756094, "grad_norm": 0.45170366764068604, "learning_rate": 2.9065760636394014e-06, "loss": 0.3282, "step": 42150 }, { "epoch": 1.9343306869808636, "grad_norm": 0.4893967807292938, "learning_rate": 2.9063534032639256e-06, "loss": 0.3518, "step": 42151 }, { "epoch": 1.9343765774861181, "grad_norm": 0.4224891662597656, "learning_rate": 2.9061307479230087e-06, "loss": 0.272, "step": 42152 }, { "epoch": 1.9344224679913726, "grad_norm": 0.4226889908313751, "learning_rate": 2.9059080976171868e-06, "loss": 0.2654, "step": 42153 }, { "epoch": 1.934468358496627, "grad_norm": 0.47182178497314453, "learning_rate": 2.905685452346995e-06, "loss": 0.3401, "step": 42154 }, { "epoch": 1.9345142490018814, "grad_norm": 0.46092697978019714, "learning_rate": 2.905462812112968e-06, "loss": 0.3444, "step": 42155 }, { "epoch": 1.934560139507136, "grad_norm": 0.4311964511871338, "learning_rate": 2.9052401769156403e-06, "loss": 0.2945, "step": 42156 }, { "epoch": 1.9346060300123904, "grad_norm": 0.4491490125656128, "learning_rate": 2.905017546755551e-06, "loss": 0.3244, "step": 42157 }, { "epoch": 1.934651920517645, "grad_norm": 0.4752199351787567, "learning_rate": 2.904794921633233e-06, "loss": 0.3521, "step": 42158 }, { "epoch": 1.9346978110228994, "grad_norm": 0.4533317983150482, "learning_rate": 2.9045723015492207e-06, "loss": 0.3309, "step": 42159 }, { "epoch": 1.934743701528154, "grad_norm": 0.4792214334011078, "learning_rate": 2.9043496865040523e-06, "loss": 0.425, "step": 42160 }, { "epoch": 1.9347895920334084, "grad_norm": 0.533702552318573, "learning_rate": 2.904127076498261e-06, "loss": 0.4237, "step": 42161 }, { "epoch": 1.9348354825386629, "grad_norm": 0.4997898042201996, "learning_rate": 2.9039044715323806e-06, "loss": 0.39, "step": 42162 }, { "epoch": 1.9348813730439172, "grad_norm": 0.4483516812324524, "learning_rate": 2.9036818716069514e-06, "loss": 0.3369, "step": 42163 }, { "epoch": 1.9349272635491717, "grad_norm": 0.4723590612411499, "learning_rate": 2.9034592767225046e-06, "loss": 0.37, "step": 42164 }, { "epoch": 1.9349731540544262, "grad_norm": 0.5056209564208984, "learning_rate": 2.903236686879577e-06, "loss": 0.3748, "step": 42165 }, { "epoch": 1.9350190445596807, "grad_norm": 0.4600323736667633, "learning_rate": 2.9030141020787034e-06, "loss": 0.3644, "step": 42166 }, { "epoch": 1.935064935064935, "grad_norm": 0.4863569438457489, "learning_rate": 2.9027915223204194e-06, "loss": 0.3845, "step": 42167 }, { "epoch": 1.9351108255701894, "grad_norm": 0.4348300099372864, "learning_rate": 2.9025689476052576e-06, "loss": 0.2912, "step": 42168 }, { "epoch": 1.935156716075444, "grad_norm": 0.488645076751709, "learning_rate": 2.902346377933757e-06, "loss": 0.35, "step": 42169 }, { "epoch": 1.9352026065806984, "grad_norm": 0.41446781158447266, "learning_rate": 2.9021238133064523e-06, "loss": 0.2736, "step": 42170 }, { "epoch": 1.935248497085953, "grad_norm": 0.43505948781967163, "learning_rate": 2.901901253723877e-06, "loss": 0.2999, "step": 42171 }, { "epoch": 1.9352943875912074, "grad_norm": 0.4395577609539032, "learning_rate": 2.9016786991865654e-06, "loss": 0.3156, "step": 42172 }, { "epoch": 1.935340278096462, "grad_norm": 0.44843775033950806, "learning_rate": 2.9014561496950555e-06, "loss": 0.3446, "step": 42173 }, { "epoch": 1.9353861686017164, "grad_norm": 0.4655916392803192, "learning_rate": 2.9012336052498824e-06, "loss": 0.2908, "step": 42174 }, { "epoch": 1.935432059106971, "grad_norm": 0.45491644740104675, "learning_rate": 2.901011065851579e-06, "loss": 0.2982, "step": 42175 }, { "epoch": 1.9354779496122252, "grad_norm": 0.5015460848808289, "learning_rate": 2.9007885315006823e-06, "loss": 0.4545, "step": 42176 }, { "epoch": 1.9355238401174797, "grad_norm": 0.4443851113319397, "learning_rate": 2.9005660021977266e-06, "loss": 0.3232, "step": 42177 }, { "epoch": 1.9355697306227342, "grad_norm": 0.454862505197525, "learning_rate": 2.900343477943245e-06, "loss": 0.3256, "step": 42178 }, { "epoch": 1.9356156211279885, "grad_norm": 0.4940004348754883, "learning_rate": 2.900120958737776e-06, "loss": 0.3937, "step": 42179 }, { "epoch": 1.935661511633243, "grad_norm": 0.40973758697509766, "learning_rate": 2.899898444581854e-06, "loss": 0.288, "step": 42180 }, { "epoch": 1.9357074021384975, "grad_norm": 0.4419251084327698, "learning_rate": 2.8996759354760113e-06, "loss": 0.2838, "step": 42181 }, { "epoch": 1.935753292643752, "grad_norm": 0.47457218170166016, "learning_rate": 2.8994534314207866e-06, "loss": 0.369, "step": 42182 }, { "epoch": 1.9357991831490065, "grad_norm": 0.46906277537345886, "learning_rate": 2.8992309324167145e-06, "loss": 0.3306, "step": 42183 }, { "epoch": 1.935845073654261, "grad_norm": 0.48183688521385193, "learning_rate": 2.8990084384643265e-06, "loss": 0.357, "step": 42184 }, { "epoch": 1.9358909641595154, "grad_norm": 0.49267420172691345, "learning_rate": 2.898785949564162e-06, "loss": 0.3492, "step": 42185 }, { "epoch": 1.93593685466477, "grad_norm": 0.4676329791545868, "learning_rate": 2.8985634657167538e-06, "loss": 0.3551, "step": 42186 }, { "epoch": 1.9359827451700244, "grad_norm": 0.4613717198371887, "learning_rate": 2.8983409869226375e-06, "loss": 0.3206, "step": 42187 }, { "epoch": 1.936028635675279, "grad_norm": 0.512630820274353, "learning_rate": 2.898118513182349e-06, "loss": 0.426, "step": 42188 }, { "epoch": 1.9360745261805332, "grad_norm": 0.4612993597984314, "learning_rate": 2.89789604449642e-06, "loss": 0.3273, "step": 42189 }, { "epoch": 1.9361204166857877, "grad_norm": 0.421529084444046, "learning_rate": 2.8976735808653866e-06, "loss": 0.2942, "step": 42190 }, { "epoch": 1.9361663071910422, "grad_norm": 0.4599722623825073, "learning_rate": 2.897451122289787e-06, "loss": 0.3489, "step": 42191 }, { "epoch": 1.9362121976962965, "grad_norm": 0.4721481502056122, "learning_rate": 2.8972286687701535e-06, "loss": 0.368, "step": 42192 }, { "epoch": 1.936258088201551, "grad_norm": 0.43672046065330505, "learning_rate": 2.8970062203070195e-06, "loss": 0.3314, "step": 42193 }, { "epoch": 1.9363039787068055, "grad_norm": 0.4383222758769989, "learning_rate": 2.8967837769009237e-06, "loss": 0.3222, "step": 42194 }, { "epoch": 1.93634986921206, "grad_norm": 0.48983079195022583, "learning_rate": 2.8965613385523994e-06, "loss": 0.4209, "step": 42195 }, { "epoch": 1.9363957597173145, "grad_norm": 0.4640178680419922, "learning_rate": 2.896338905261981e-06, "loss": 0.3223, "step": 42196 }, { "epoch": 1.936441650222569, "grad_norm": 0.46103790402412415, "learning_rate": 2.8961164770302036e-06, "loss": 0.3175, "step": 42197 }, { "epoch": 1.9364875407278235, "grad_norm": 0.48561206459999084, "learning_rate": 2.895894053857602e-06, "loss": 0.3811, "step": 42198 }, { "epoch": 1.936533431233078, "grad_norm": 0.5179795622825623, "learning_rate": 2.895671635744709e-06, "loss": 0.3793, "step": 42199 }, { "epoch": 1.9365793217383325, "grad_norm": 0.4862802028656006, "learning_rate": 2.895449222692064e-06, "loss": 0.3763, "step": 42200 }, { "epoch": 1.9366252122435867, "grad_norm": 0.4537147879600525, "learning_rate": 2.8952268147001993e-06, "loss": 0.3344, "step": 42201 }, { "epoch": 1.9366711027488412, "grad_norm": 0.4523382782936096, "learning_rate": 2.895004411769648e-06, "loss": 0.3027, "step": 42202 }, { "epoch": 1.9367169932540957, "grad_norm": 0.4754458963871002, "learning_rate": 2.894782013900949e-06, "loss": 0.3542, "step": 42203 }, { "epoch": 1.9367628837593502, "grad_norm": 0.48246800899505615, "learning_rate": 2.8945596210946336e-06, "loss": 0.4005, "step": 42204 }, { "epoch": 1.9368087742646045, "grad_norm": 0.4990043342113495, "learning_rate": 2.8943372333512386e-06, "loss": 0.3951, "step": 42205 }, { "epoch": 1.936854664769859, "grad_norm": 0.4921986162662506, "learning_rate": 2.894114850671296e-06, "loss": 0.3757, "step": 42206 }, { "epoch": 1.9369005552751135, "grad_norm": 0.5044657588005066, "learning_rate": 2.8938924730553443e-06, "loss": 0.4283, "step": 42207 }, { "epoch": 1.936946445780368, "grad_norm": 0.4628879427909851, "learning_rate": 2.893670100503917e-06, "loss": 0.3071, "step": 42208 }, { "epoch": 1.9369923362856225, "grad_norm": 0.5120128989219666, "learning_rate": 2.8934477330175475e-06, "loss": 0.3984, "step": 42209 }, { "epoch": 1.937038226790877, "grad_norm": 0.4499504268169403, "learning_rate": 2.8932253705967716e-06, "loss": 0.3186, "step": 42210 }, { "epoch": 1.9370841172961315, "grad_norm": 0.5084688663482666, "learning_rate": 2.8930030132421243e-06, "loss": 0.4366, "step": 42211 }, { "epoch": 1.937130007801386, "grad_norm": 0.4980149567127228, "learning_rate": 2.8927806609541375e-06, "loss": 0.4157, "step": 42212 }, { "epoch": 1.9371758983066405, "grad_norm": 0.5168014168739319, "learning_rate": 2.89255831373335e-06, "loss": 0.4309, "step": 42213 }, { "epoch": 1.9372217888118948, "grad_norm": 0.46546173095703125, "learning_rate": 2.8923359715802946e-06, "loss": 0.3502, "step": 42214 }, { "epoch": 1.9372676793171493, "grad_norm": 0.47357630729675293, "learning_rate": 2.892113634495505e-06, "loss": 0.353, "step": 42215 }, { "epoch": 1.9373135698224038, "grad_norm": 0.4673360288143158, "learning_rate": 2.891891302479517e-06, "loss": 0.3532, "step": 42216 }, { "epoch": 1.937359460327658, "grad_norm": 0.46716147661209106, "learning_rate": 2.8916689755328663e-06, "loss": 0.3076, "step": 42217 }, { "epoch": 1.9374053508329125, "grad_norm": 0.47978776693344116, "learning_rate": 2.891446653656086e-06, "loss": 0.3718, "step": 42218 }, { "epoch": 1.937451241338167, "grad_norm": 0.4397413432598114, "learning_rate": 2.891224336849711e-06, "loss": 0.3502, "step": 42219 }, { "epoch": 1.9374971318434215, "grad_norm": 0.47159865498542786, "learning_rate": 2.891002025114276e-06, "loss": 0.3427, "step": 42220 }, { "epoch": 1.937543022348676, "grad_norm": 0.4388810396194458, "learning_rate": 2.890779718450314e-06, "loss": 0.3008, "step": 42221 }, { "epoch": 1.9375889128539305, "grad_norm": 0.4507732391357422, "learning_rate": 2.890557416858363e-06, "loss": 0.3549, "step": 42222 }, { "epoch": 1.937634803359185, "grad_norm": 0.47572746872901917, "learning_rate": 2.8903351203389546e-06, "loss": 0.3611, "step": 42223 }, { "epoch": 1.9376806938644395, "grad_norm": 0.49364981055259705, "learning_rate": 2.890112828892623e-06, "loss": 0.3834, "step": 42224 }, { "epoch": 1.937726584369694, "grad_norm": 0.5216743350028992, "learning_rate": 2.8898905425199066e-06, "loss": 0.3894, "step": 42225 }, { "epoch": 1.9377724748749485, "grad_norm": 0.4627477526664734, "learning_rate": 2.8896682612213365e-06, "loss": 0.3594, "step": 42226 }, { "epoch": 1.9378183653802028, "grad_norm": 0.42021045088768005, "learning_rate": 2.889445984997447e-06, "loss": 0.27, "step": 42227 }, { "epoch": 1.9378642558854573, "grad_norm": 0.43709009885787964, "learning_rate": 2.8892237138487755e-06, "loss": 0.2957, "step": 42228 }, { "epoch": 1.9379101463907118, "grad_norm": 0.5263662934303284, "learning_rate": 2.8890014477758547e-06, "loss": 0.4336, "step": 42229 }, { "epoch": 1.937956036895966, "grad_norm": 0.43518704175949097, "learning_rate": 2.8887791867792198e-06, "loss": 0.2909, "step": 42230 }, { "epoch": 1.9380019274012206, "grad_norm": 0.4150220453739166, "learning_rate": 2.8885569308594043e-06, "loss": 0.2697, "step": 42231 }, { "epoch": 1.938047817906475, "grad_norm": 0.46325165033340454, "learning_rate": 2.8883346800169427e-06, "loss": 0.38, "step": 42232 }, { "epoch": 1.9380937084117296, "grad_norm": 0.4877205193042755, "learning_rate": 2.888112434252368e-06, "loss": 0.3826, "step": 42233 }, { "epoch": 1.938139598916984, "grad_norm": 0.4559682309627533, "learning_rate": 2.887890193566218e-06, "loss": 0.3603, "step": 42234 }, { "epoch": 1.9381854894222386, "grad_norm": 0.4480857849121094, "learning_rate": 2.887667957959026e-06, "loss": 0.3135, "step": 42235 }, { "epoch": 1.938231379927493, "grad_norm": 0.4790278673171997, "learning_rate": 2.887445727431325e-06, "loss": 0.3739, "step": 42236 }, { "epoch": 1.9382772704327476, "grad_norm": 0.49222835898399353, "learning_rate": 2.8872235019836493e-06, "loss": 0.3651, "step": 42237 }, { "epoch": 1.938323160938002, "grad_norm": 0.48999857902526855, "learning_rate": 2.887001281616536e-06, "loss": 0.3783, "step": 42238 }, { "epoch": 1.9383690514432566, "grad_norm": 0.47584909200668335, "learning_rate": 2.8867790663305175e-06, "loss": 0.3516, "step": 42239 }, { "epoch": 1.9384149419485108, "grad_norm": 0.4508305788040161, "learning_rate": 2.886556856126128e-06, "loss": 0.3236, "step": 42240 }, { "epoch": 1.9384608324537653, "grad_norm": 0.48666274547576904, "learning_rate": 2.886334651003903e-06, "loss": 0.3935, "step": 42241 }, { "epoch": 1.9385067229590198, "grad_norm": 0.4965948462486267, "learning_rate": 2.8861124509643756e-06, "loss": 0.3818, "step": 42242 }, { "epoch": 1.938552613464274, "grad_norm": 0.5081051588058472, "learning_rate": 2.8858902560080794e-06, "loss": 0.3848, "step": 42243 }, { "epoch": 1.9385985039695286, "grad_norm": 0.46162158250808716, "learning_rate": 2.885668066135551e-06, "loss": 0.3491, "step": 42244 }, { "epoch": 1.938644394474783, "grad_norm": 0.5153376460075378, "learning_rate": 2.885445881347324e-06, "loss": 0.4676, "step": 42245 }, { "epoch": 1.9386902849800376, "grad_norm": 0.6601130962371826, "learning_rate": 2.88522370164393e-06, "loss": 0.3797, "step": 42246 }, { "epoch": 1.938736175485292, "grad_norm": 0.5284818410873413, "learning_rate": 2.885001527025908e-06, "loss": 0.3569, "step": 42247 }, { "epoch": 1.9387820659905466, "grad_norm": 0.4753550887107849, "learning_rate": 2.88477935749379e-06, "loss": 0.3459, "step": 42248 }, { "epoch": 1.938827956495801, "grad_norm": 0.4834121763706207, "learning_rate": 2.8845571930481076e-06, "loss": 0.3823, "step": 42249 }, { "epoch": 1.9388738470010556, "grad_norm": 0.4619996249675751, "learning_rate": 2.8843350336893993e-06, "loss": 0.317, "step": 42250 }, { "epoch": 1.93891973750631, "grad_norm": 0.4547794461250305, "learning_rate": 2.884112879418197e-06, "loss": 0.3214, "step": 42251 }, { "epoch": 1.9389656280115644, "grad_norm": 0.41677114367485046, "learning_rate": 2.8838907302350365e-06, "loss": 0.261, "step": 42252 }, { "epoch": 1.9390115185168189, "grad_norm": 0.5085619688034058, "learning_rate": 2.8836685861404514e-06, "loss": 0.4129, "step": 42253 }, { "epoch": 1.9390574090220734, "grad_norm": 0.43450894951820374, "learning_rate": 2.8834464471349745e-06, "loss": 0.2874, "step": 42254 }, { "epoch": 1.9391032995273279, "grad_norm": 0.48946666717529297, "learning_rate": 2.8832243132191386e-06, "loss": 0.4237, "step": 42255 }, { "epoch": 1.9391491900325821, "grad_norm": 0.43609315156936646, "learning_rate": 2.8830021843934824e-06, "loss": 0.31, "step": 42256 }, { "epoch": 1.9391950805378366, "grad_norm": 0.4435378313064575, "learning_rate": 2.8827800606585375e-06, "loss": 0.3235, "step": 42257 }, { "epoch": 1.9392409710430911, "grad_norm": 0.5002389550209045, "learning_rate": 2.8825579420148373e-06, "loss": 0.353, "step": 42258 }, { "epoch": 1.9392868615483456, "grad_norm": 0.423209547996521, "learning_rate": 2.8823358284629186e-06, "loss": 0.3048, "step": 42259 }, { "epoch": 1.9393327520536001, "grad_norm": 0.4849652051925659, "learning_rate": 2.8821137200033133e-06, "loss": 0.3664, "step": 42260 }, { "epoch": 1.9393786425588546, "grad_norm": 0.40150266885757446, "learning_rate": 2.8818916166365567e-06, "loss": 0.2702, "step": 42261 }, { "epoch": 1.9394245330641091, "grad_norm": 0.45437896251678467, "learning_rate": 2.8816695183631828e-06, "loss": 0.3259, "step": 42262 }, { "epoch": 1.9394704235693636, "grad_norm": 0.47784993052482605, "learning_rate": 2.8814474251837245e-06, "loss": 0.4095, "step": 42263 }, { "epoch": 1.939516314074618, "grad_norm": 0.46958816051483154, "learning_rate": 2.881225337098714e-06, "loss": 0.3461, "step": 42264 }, { "epoch": 1.9395622045798724, "grad_norm": 0.48354607820510864, "learning_rate": 2.88100325410869e-06, "loss": 0.3589, "step": 42265 }, { "epoch": 1.9396080950851269, "grad_norm": 0.46869584918022156, "learning_rate": 2.8807811762141845e-06, "loss": 0.3217, "step": 42266 }, { "epoch": 1.9396539855903814, "grad_norm": 0.4924481213092804, "learning_rate": 2.8805591034157297e-06, "loss": 0.3991, "step": 42267 }, { "epoch": 1.9396998760956357, "grad_norm": 0.44264158606529236, "learning_rate": 2.8803370357138637e-06, "loss": 0.3226, "step": 42268 }, { "epoch": 1.9397457666008902, "grad_norm": 0.47411057353019714, "learning_rate": 2.8801149731091175e-06, "loss": 0.317, "step": 42269 }, { "epoch": 1.9397916571061447, "grad_norm": 0.4653431177139282, "learning_rate": 2.879892915602026e-06, "loss": 0.3211, "step": 42270 }, { "epoch": 1.9398375476113991, "grad_norm": 0.5193920135498047, "learning_rate": 2.8796708631931214e-06, "loss": 0.3977, "step": 42271 }, { "epoch": 1.9398834381166536, "grad_norm": 0.44486159086227417, "learning_rate": 2.8794488158829404e-06, "loss": 0.3235, "step": 42272 }, { "epoch": 1.9399293286219081, "grad_norm": 0.45401468873023987, "learning_rate": 2.879226773672016e-06, "loss": 0.3565, "step": 42273 }, { "epoch": 1.9399752191271626, "grad_norm": 0.47701025009155273, "learning_rate": 2.879004736560882e-06, "loss": 0.3566, "step": 42274 }, { "epoch": 1.9400211096324171, "grad_norm": 0.4512176811695099, "learning_rate": 2.8787827045500715e-06, "loss": 0.3301, "step": 42275 }, { "epoch": 1.9400670001376716, "grad_norm": 0.47241300344467163, "learning_rate": 2.87856067764012e-06, "loss": 0.3312, "step": 42276 }, { "epoch": 1.9401128906429261, "grad_norm": 0.4834900200366974, "learning_rate": 2.8783386558315583e-06, "loss": 0.3443, "step": 42277 }, { "epoch": 1.9401587811481804, "grad_norm": 0.45314115285873413, "learning_rate": 2.878116639124924e-06, "loss": 0.3138, "step": 42278 }, { "epoch": 1.940204671653435, "grad_norm": 0.49480968713760376, "learning_rate": 2.8778946275207505e-06, "loss": 0.3862, "step": 42279 }, { "epoch": 1.9402505621586894, "grad_norm": 0.46920135617256165, "learning_rate": 2.877672621019568e-06, "loss": 0.3603, "step": 42280 }, { "epoch": 1.9402964526639437, "grad_norm": 0.43961387872695923, "learning_rate": 2.877450619621915e-06, "loss": 0.2966, "step": 42281 }, { "epoch": 1.9403423431691982, "grad_norm": 0.45799311995506287, "learning_rate": 2.877228623328323e-06, "loss": 0.3085, "step": 42282 }, { "epoch": 1.9403882336744527, "grad_norm": 0.4498348534107208, "learning_rate": 2.877006632139327e-06, "loss": 0.3467, "step": 42283 }, { "epoch": 1.9404341241797072, "grad_norm": 0.48230624198913574, "learning_rate": 2.876784646055459e-06, "loss": 0.377, "step": 42284 }, { "epoch": 1.9404800146849617, "grad_norm": 0.46398574113845825, "learning_rate": 2.876562665077255e-06, "loss": 0.3226, "step": 42285 }, { "epoch": 1.9405259051902162, "grad_norm": 0.47819042205810547, "learning_rate": 2.8763406892052454e-06, "loss": 0.3826, "step": 42286 }, { "epoch": 1.9405717956954707, "grad_norm": 0.47676002979278564, "learning_rate": 2.8761187184399673e-06, "loss": 0.3439, "step": 42287 }, { "epoch": 1.9406176862007252, "grad_norm": 0.4551865756511688, "learning_rate": 2.875896752781955e-06, "loss": 0.3104, "step": 42288 }, { "epoch": 1.9406635767059797, "grad_norm": 0.4709368646144867, "learning_rate": 2.8756747922317375e-06, "loss": 0.3958, "step": 42289 }, { "epoch": 1.940709467211234, "grad_norm": 0.44992244243621826, "learning_rate": 2.875452836789854e-06, "loss": 0.3333, "step": 42290 }, { "epoch": 1.9407553577164884, "grad_norm": 0.497011661529541, "learning_rate": 2.875230886456835e-06, "loss": 0.442, "step": 42291 }, { "epoch": 1.940801248221743, "grad_norm": 0.4552088677883148, "learning_rate": 2.8750089412332137e-06, "loss": 0.3703, "step": 42292 }, { "epoch": 1.9408471387269974, "grad_norm": 0.5278934240341187, "learning_rate": 2.874787001119528e-06, "loss": 0.4657, "step": 42293 }, { "epoch": 1.9408930292322517, "grad_norm": 0.4521971344947815, "learning_rate": 2.874565066116308e-06, "loss": 0.3412, "step": 42294 }, { "epoch": 1.9409389197375062, "grad_norm": 0.463529497385025, "learning_rate": 2.874343136224088e-06, "loss": 0.343, "step": 42295 }, { "epoch": 1.9409848102427607, "grad_norm": 0.4611636996269226, "learning_rate": 2.8741212114434024e-06, "loss": 0.3536, "step": 42296 }, { "epoch": 1.9410307007480152, "grad_norm": 0.45311644673347473, "learning_rate": 2.873899291774784e-06, "loss": 0.3092, "step": 42297 }, { "epoch": 1.9410765912532697, "grad_norm": 0.536088764667511, "learning_rate": 2.873677377218765e-06, "loss": 0.4206, "step": 42298 }, { "epoch": 1.9411224817585242, "grad_norm": 0.46567708253860474, "learning_rate": 2.873455467775883e-06, "loss": 0.3675, "step": 42299 }, { "epoch": 1.9411683722637787, "grad_norm": 0.4789806604385376, "learning_rate": 2.873233563446669e-06, "loss": 0.4028, "step": 42300 }, { "epoch": 1.9412142627690332, "grad_norm": 0.4266676604747772, "learning_rate": 2.8730116642316553e-06, "loss": 0.2885, "step": 42301 }, { "epoch": 1.9412601532742877, "grad_norm": 0.4976128935813904, "learning_rate": 2.872789770131379e-06, "loss": 0.3864, "step": 42302 }, { "epoch": 1.941306043779542, "grad_norm": 0.48775428533554077, "learning_rate": 2.872567881146372e-06, "loss": 0.3573, "step": 42303 }, { "epoch": 1.9413519342847965, "grad_norm": 0.48267868161201477, "learning_rate": 2.872345997277168e-06, "loss": 0.4549, "step": 42304 }, { "epoch": 1.941397824790051, "grad_norm": 0.4972201883792877, "learning_rate": 2.872124118524301e-06, "loss": 0.4429, "step": 42305 }, { "epoch": 1.9414437152953052, "grad_norm": 0.4835011065006256, "learning_rate": 2.8719022448883026e-06, "loss": 0.4098, "step": 42306 }, { "epoch": 1.9414896058005597, "grad_norm": 0.43651050329208374, "learning_rate": 2.8716803763697086e-06, "loss": 0.297, "step": 42307 }, { "epoch": 1.9415354963058142, "grad_norm": 0.4938250482082367, "learning_rate": 2.871458512969049e-06, "loss": 0.4041, "step": 42308 }, { "epoch": 1.9415813868110687, "grad_norm": 0.4971288740634918, "learning_rate": 2.8712366546868627e-06, "loss": 0.4305, "step": 42309 }, { "epoch": 1.9416272773163232, "grad_norm": 0.4850749969482422, "learning_rate": 2.8710148015236793e-06, "loss": 0.3926, "step": 42310 }, { "epoch": 1.9416731678215777, "grad_norm": 0.4991370439529419, "learning_rate": 2.870792953480032e-06, "loss": 0.3984, "step": 42311 }, { "epoch": 1.9417190583268322, "grad_norm": 0.5606815218925476, "learning_rate": 2.8705711105564584e-06, "loss": 0.477, "step": 42312 }, { "epoch": 1.9417649488320867, "grad_norm": 0.48161765933036804, "learning_rate": 2.8703492727534878e-06, "loss": 0.3902, "step": 42313 }, { "epoch": 1.9418108393373412, "grad_norm": 0.444158136844635, "learning_rate": 2.8701274400716538e-06, "loss": 0.3331, "step": 42314 }, { "epoch": 1.9418567298425957, "grad_norm": 0.42736807465553284, "learning_rate": 2.8699056125114934e-06, "loss": 0.2705, "step": 42315 }, { "epoch": 1.94190262034785, "grad_norm": 0.46261027455329895, "learning_rate": 2.869683790073537e-06, "loss": 0.3235, "step": 42316 }, { "epoch": 1.9419485108531045, "grad_norm": 0.4624198377132416, "learning_rate": 2.869461972758319e-06, "loss": 0.3406, "step": 42317 }, { "epoch": 1.941994401358359, "grad_norm": 0.44138920307159424, "learning_rate": 2.8692401605663724e-06, "loss": 0.2729, "step": 42318 }, { "epoch": 1.9420402918636133, "grad_norm": 0.4355107545852661, "learning_rate": 2.8690183534982307e-06, "loss": 0.3389, "step": 42319 }, { "epoch": 1.9420861823688678, "grad_norm": 0.4525952935218811, "learning_rate": 2.8687965515544257e-06, "loss": 0.3484, "step": 42320 }, { "epoch": 1.9421320728741223, "grad_norm": 0.4638443887233734, "learning_rate": 2.868574754735494e-06, "loss": 0.3044, "step": 42321 }, { "epoch": 1.9421779633793768, "grad_norm": 0.45758339762687683, "learning_rate": 2.8683529630419676e-06, "loss": 0.2869, "step": 42322 }, { "epoch": 1.9422238538846313, "grad_norm": 0.4716130197048187, "learning_rate": 2.8681311764743775e-06, "loss": 0.342, "step": 42323 }, { "epoch": 1.9422697443898858, "grad_norm": 0.5328834056854248, "learning_rate": 2.867909395033261e-06, "loss": 0.4426, "step": 42324 }, { "epoch": 1.9423156348951403, "grad_norm": 0.48337554931640625, "learning_rate": 2.8676876187191495e-06, "loss": 0.355, "step": 42325 }, { "epoch": 1.9423615254003948, "grad_norm": 0.4759368896484375, "learning_rate": 2.8674658475325745e-06, "loss": 0.3444, "step": 42326 }, { "epoch": 1.9424074159056492, "grad_norm": 0.46888524293899536, "learning_rate": 2.867244081474075e-06, "loss": 0.3444, "step": 42327 }, { "epoch": 1.9424533064109037, "grad_norm": 0.4886777997016907, "learning_rate": 2.8670223205441783e-06, "loss": 0.3675, "step": 42328 }, { "epoch": 1.942499196916158, "grad_norm": 0.44057416915893555, "learning_rate": 2.8668005647434173e-06, "loss": 0.3245, "step": 42329 }, { "epoch": 1.9425450874214125, "grad_norm": 0.4584789276123047, "learning_rate": 2.8665788140723305e-06, "loss": 0.3512, "step": 42330 }, { "epoch": 1.942590977926667, "grad_norm": 0.4837767481803894, "learning_rate": 2.866357068531448e-06, "loss": 0.3538, "step": 42331 }, { "epoch": 1.9426368684319213, "grad_norm": 0.4615592360496521, "learning_rate": 2.8661353281213013e-06, "loss": 0.3466, "step": 42332 }, { "epoch": 1.9426827589371758, "grad_norm": 0.4658503532409668, "learning_rate": 2.8659135928424273e-06, "loss": 0.3485, "step": 42333 }, { "epoch": 1.9427286494424303, "grad_norm": 0.4668123126029968, "learning_rate": 2.865691862695358e-06, "loss": 0.3507, "step": 42334 }, { "epoch": 1.9427745399476848, "grad_norm": 0.45850706100463867, "learning_rate": 2.865470137680626e-06, "loss": 0.3667, "step": 42335 }, { "epoch": 1.9428204304529393, "grad_norm": 0.48505502939224243, "learning_rate": 2.8652484177987625e-06, "loss": 0.334, "step": 42336 }, { "epoch": 1.9428663209581938, "grad_norm": 0.46346062421798706, "learning_rate": 2.8650267030503054e-06, "loss": 0.3583, "step": 42337 }, { "epoch": 1.9429122114634483, "grad_norm": 0.4359017312526703, "learning_rate": 2.8648049934357848e-06, "loss": 0.2702, "step": 42338 }, { "epoch": 1.9429581019687028, "grad_norm": 0.4613456428050995, "learning_rate": 2.8645832889557344e-06, "loss": 0.361, "step": 42339 }, { "epoch": 1.9430039924739573, "grad_norm": 0.514443039894104, "learning_rate": 2.864361589610687e-06, "loss": 0.4548, "step": 42340 }, { "epoch": 1.9430498829792116, "grad_norm": 0.461169570684433, "learning_rate": 2.864139895401176e-06, "loss": 0.3753, "step": 42341 }, { "epoch": 1.943095773484466, "grad_norm": 0.5346269607543945, "learning_rate": 2.8639182063277327e-06, "loss": 0.3142, "step": 42342 }, { "epoch": 1.9431416639897205, "grad_norm": 0.4384450614452362, "learning_rate": 2.863696522390894e-06, "loss": 0.2973, "step": 42343 }, { "epoch": 1.943187554494975, "grad_norm": 0.47538575530052185, "learning_rate": 2.8634748435911906e-06, "loss": 0.413, "step": 42344 }, { "epoch": 1.9432334450002293, "grad_norm": 0.45169275999069214, "learning_rate": 2.8632531699291545e-06, "loss": 0.3447, "step": 42345 }, { "epoch": 1.9432793355054838, "grad_norm": 0.40608957409858704, "learning_rate": 2.8630315014053215e-06, "loss": 0.2677, "step": 42346 }, { "epoch": 1.9433252260107383, "grad_norm": 0.45585164427757263, "learning_rate": 2.862809838020224e-06, "loss": 0.3561, "step": 42347 }, { "epoch": 1.9433711165159928, "grad_norm": 0.4664261043071747, "learning_rate": 2.8625881797743913e-06, "loss": 0.3474, "step": 42348 }, { "epoch": 1.9434170070212473, "grad_norm": 0.5196836590766907, "learning_rate": 2.8623665266683643e-06, "loss": 0.4372, "step": 42349 }, { "epoch": 1.9434628975265018, "grad_norm": 0.4719387888908386, "learning_rate": 2.862144878702669e-06, "loss": 0.3728, "step": 42350 }, { "epoch": 1.9435087880317563, "grad_norm": 0.44653886556625366, "learning_rate": 2.861923235877838e-06, "loss": 0.3161, "step": 42351 }, { "epoch": 1.9435546785370108, "grad_norm": 0.4639488160610199, "learning_rate": 2.8617015981944094e-06, "loss": 0.3667, "step": 42352 }, { "epoch": 1.9436005690422653, "grad_norm": 0.5011780261993408, "learning_rate": 2.8614799656529134e-06, "loss": 0.437, "step": 42353 }, { "epoch": 1.9436464595475196, "grad_norm": 0.450440913438797, "learning_rate": 2.861258338253881e-06, "loss": 0.3125, "step": 42354 }, { "epoch": 1.943692350052774, "grad_norm": 0.5109073519706726, "learning_rate": 2.86103671599785e-06, "loss": 0.4073, "step": 42355 }, { "epoch": 1.9437382405580286, "grad_norm": 0.4427860975265503, "learning_rate": 2.86081509888535e-06, "loss": 0.3189, "step": 42356 }, { "epoch": 1.9437841310632828, "grad_norm": 0.5144197940826416, "learning_rate": 2.8605934869169124e-06, "loss": 0.4971, "step": 42357 }, { "epoch": 1.9438300215685373, "grad_norm": 0.4542863070964813, "learning_rate": 2.8603718800930748e-06, "loss": 0.3344, "step": 42358 }, { "epoch": 1.9438759120737918, "grad_norm": 0.4256596565246582, "learning_rate": 2.8601502784143664e-06, "loss": 0.2804, "step": 42359 }, { "epoch": 1.9439218025790463, "grad_norm": 0.46273332834243774, "learning_rate": 2.859928681881322e-06, "loss": 0.3951, "step": 42360 }, { "epoch": 1.9439676930843008, "grad_norm": 0.4489917457103729, "learning_rate": 2.859707090494474e-06, "loss": 0.3265, "step": 42361 }, { "epoch": 1.9440135835895553, "grad_norm": 0.4978906512260437, "learning_rate": 2.8594855042543545e-06, "loss": 0.3482, "step": 42362 }, { "epoch": 1.9440594740948098, "grad_norm": 0.42999300360679626, "learning_rate": 2.859263923161495e-06, "loss": 0.2905, "step": 42363 }, { "epoch": 1.9441053646000643, "grad_norm": 0.4374128580093384, "learning_rate": 2.859042347216432e-06, "loss": 0.2781, "step": 42364 }, { "epoch": 1.9441512551053188, "grad_norm": 0.5311461091041565, "learning_rate": 2.858820776419697e-06, "loss": 0.4248, "step": 42365 }, { "epoch": 1.9441971456105733, "grad_norm": 0.4619891047477722, "learning_rate": 2.8585992107718197e-06, "loss": 0.3083, "step": 42366 }, { "epoch": 1.9442430361158276, "grad_norm": 0.4418337941169739, "learning_rate": 2.858377650273337e-06, "loss": 0.3057, "step": 42367 }, { "epoch": 1.944288926621082, "grad_norm": 0.44293010234832764, "learning_rate": 2.858156094924781e-06, "loss": 0.3252, "step": 42368 }, { "epoch": 1.9443348171263366, "grad_norm": 0.48752227425575256, "learning_rate": 2.8579345447266837e-06, "loss": 0.4254, "step": 42369 }, { "epoch": 1.9443807076315909, "grad_norm": 0.49328041076660156, "learning_rate": 2.857712999679575e-06, "loss": 0.3912, "step": 42370 }, { "epoch": 1.9444265981368454, "grad_norm": 0.4733926057815552, "learning_rate": 2.857491459783995e-06, "loss": 0.3974, "step": 42371 }, { "epoch": 1.9444724886420999, "grad_norm": 0.5088169574737549, "learning_rate": 2.8572699250404693e-06, "loss": 0.2726, "step": 42372 }, { "epoch": 1.9445183791473544, "grad_norm": 0.45664742588996887, "learning_rate": 2.857048395449532e-06, "loss": 0.3331, "step": 42373 }, { "epoch": 1.9445642696526089, "grad_norm": 0.47497960925102234, "learning_rate": 2.856826871011718e-06, "loss": 0.3564, "step": 42374 }, { "epoch": 1.9446101601578634, "grad_norm": 0.44423750042915344, "learning_rate": 2.8566053517275593e-06, "loss": 0.3015, "step": 42375 }, { "epoch": 1.9446560506631179, "grad_norm": 0.5260916352272034, "learning_rate": 2.856383837597587e-06, "loss": 0.5292, "step": 42376 }, { "epoch": 1.9447019411683724, "grad_norm": 0.4879947304725647, "learning_rate": 2.8561623286223363e-06, "loss": 0.3734, "step": 42377 }, { "epoch": 1.9447478316736269, "grad_norm": 0.4590960741043091, "learning_rate": 2.8559408248023383e-06, "loss": 0.3658, "step": 42378 }, { "epoch": 1.9447937221788811, "grad_norm": 0.47383373975753784, "learning_rate": 2.8557193261381244e-06, "loss": 0.3745, "step": 42379 }, { "epoch": 1.9448396126841356, "grad_norm": 0.47204065322875977, "learning_rate": 2.855497832630231e-06, "loss": 0.3735, "step": 42380 }, { "epoch": 1.9448855031893901, "grad_norm": 0.4993223547935486, "learning_rate": 2.8552763442791877e-06, "loss": 0.3953, "step": 42381 }, { "epoch": 1.9449313936946446, "grad_norm": 0.47566714882850647, "learning_rate": 2.8550548610855288e-06, "loss": 0.3448, "step": 42382 }, { "epoch": 1.944977284199899, "grad_norm": 0.4414668679237366, "learning_rate": 2.8548333830497855e-06, "loss": 0.3053, "step": 42383 }, { "epoch": 1.9450231747051534, "grad_norm": 0.4796183109283447, "learning_rate": 2.8546119101724906e-06, "loss": 0.375, "step": 42384 }, { "epoch": 1.945069065210408, "grad_norm": 0.500424861907959, "learning_rate": 2.8543904424541757e-06, "loss": 0.4537, "step": 42385 }, { "epoch": 1.9451149557156624, "grad_norm": 0.45035821199417114, "learning_rate": 2.854168979895376e-06, "loss": 0.3556, "step": 42386 }, { "epoch": 1.945160846220917, "grad_norm": 0.47117337584495544, "learning_rate": 2.8539475224966227e-06, "loss": 0.3817, "step": 42387 }, { "epoch": 1.9452067367261714, "grad_norm": 0.46737900376319885, "learning_rate": 2.853726070258447e-06, "loss": 0.3282, "step": 42388 }, { "epoch": 1.945252627231426, "grad_norm": 0.520505428314209, "learning_rate": 2.8535046231813834e-06, "loss": 0.4338, "step": 42389 }, { "epoch": 1.9452985177366804, "grad_norm": 0.4850632846355438, "learning_rate": 2.8532831812659647e-06, "loss": 0.3277, "step": 42390 }, { "epoch": 1.9453444082419349, "grad_norm": 0.4342845678329468, "learning_rate": 2.8530617445127205e-06, "loss": 0.3052, "step": 42391 }, { "epoch": 1.9453902987471892, "grad_norm": 0.49113643169403076, "learning_rate": 2.852840312922187e-06, "loss": 0.4142, "step": 42392 }, { "epoch": 1.9454361892524437, "grad_norm": 0.4937026798725128, "learning_rate": 2.852618886494897e-06, "loss": 0.3759, "step": 42393 }, { "epoch": 1.9454820797576982, "grad_norm": 0.5197744369506836, "learning_rate": 2.8523974652313764e-06, "loss": 0.4344, "step": 42394 }, { "epoch": 1.9455279702629524, "grad_norm": 0.505089521408081, "learning_rate": 2.8521760491321645e-06, "loss": 0.413, "step": 42395 }, { "epoch": 1.945573860768207, "grad_norm": 0.501385509967804, "learning_rate": 2.851954638197791e-06, "loss": 0.3664, "step": 42396 }, { "epoch": 1.9456197512734614, "grad_norm": 0.5000234842300415, "learning_rate": 2.851733232428787e-06, "loss": 0.4409, "step": 42397 }, { "epoch": 1.945665641778716, "grad_norm": 0.46717020869255066, "learning_rate": 2.8515118318256885e-06, "loss": 0.3585, "step": 42398 }, { "epoch": 1.9457115322839704, "grad_norm": 0.51054447889328, "learning_rate": 2.851290436389026e-06, "loss": 0.4745, "step": 42399 }, { "epoch": 1.945757422789225, "grad_norm": 0.4783273935317993, "learning_rate": 2.851069046119331e-06, "loss": 0.3521, "step": 42400 }, { "epoch": 1.9458033132944794, "grad_norm": 0.4688833951950073, "learning_rate": 2.8508476610171353e-06, "loss": 0.3685, "step": 42401 }, { "epoch": 1.945849203799734, "grad_norm": 0.4221022427082062, "learning_rate": 2.8506262810829756e-06, "loss": 0.2972, "step": 42402 }, { "epoch": 1.9458950943049884, "grad_norm": 0.454321950674057, "learning_rate": 2.85040490631738e-06, "loss": 0.3027, "step": 42403 }, { "epoch": 1.945940984810243, "grad_norm": 0.4578109383583069, "learning_rate": 2.850183536720883e-06, "loss": 0.3076, "step": 42404 }, { "epoch": 1.9459868753154972, "grad_norm": 0.4838213324546814, "learning_rate": 2.8499621722940155e-06, "loss": 0.417, "step": 42405 }, { "epoch": 1.9460327658207517, "grad_norm": 0.46016213297843933, "learning_rate": 2.849740813037311e-06, "loss": 0.3467, "step": 42406 }, { "epoch": 1.9460786563260062, "grad_norm": 0.4736923277378082, "learning_rate": 2.8495194589512985e-06, "loss": 0.3395, "step": 42407 }, { "epoch": 1.9461245468312605, "grad_norm": 0.528885543346405, "learning_rate": 2.8492981100365157e-06, "loss": 0.4364, "step": 42408 }, { "epoch": 1.946170437336515, "grad_norm": 0.4899018108844757, "learning_rate": 2.849076766293492e-06, "loss": 0.3689, "step": 42409 }, { "epoch": 1.9462163278417695, "grad_norm": 0.4459191560745239, "learning_rate": 2.8488554277227577e-06, "loss": 0.2972, "step": 42410 }, { "epoch": 1.946262218347024, "grad_norm": 0.5053806900978088, "learning_rate": 2.8486340943248494e-06, "loss": 0.4293, "step": 42411 }, { "epoch": 1.9463081088522785, "grad_norm": 0.4487350881099701, "learning_rate": 2.848412766100297e-06, "loss": 0.3316, "step": 42412 }, { "epoch": 1.946353999357533, "grad_norm": 0.4759421646595001, "learning_rate": 2.8481914430496303e-06, "loss": 0.3425, "step": 42413 }, { "epoch": 1.9463998898627874, "grad_norm": 0.4829477369785309, "learning_rate": 2.8479701251733864e-06, "loss": 0.3586, "step": 42414 }, { "epoch": 1.946445780368042, "grad_norm": 0.4899613857269287, "learning_rate": 2.8477488124720975e-06, "loss": 0.3578, "step": 42415 }, { "epoch": 1.9464916708732964, "grad_norm": 0.5239880084991455, "learning_rate": 2.847527504946289e-06, "loss": 0.4435, "step": 42416 }, { "epoch": 1.946537561378551, "grad_norm": 0.5071898698806763, "learning_rate": 2.8473062025964997e-06, "loss": 0.3325, "step": 42417 }, { "epoch": 1.9465834518838052, "grad_norm": 0.48911234736442566, "learning_rate": 2.84708490542326e-06, "loss": 0.3749, "step": 42418 }, { "epoch": 1.9466293423890597, "grad_norm": 0.4952391982078552, "learning_rate": 2.846863613427099e-06, "loss": 0.4127, "step": 42419 }, { "epoch": 1.9466752328943142, "grad_norm": 0.5020302534103394, "learning_rate": 2.8466423266085534e-06, "loss": 0.4434, "step": 42420 }, { "epoch": 1.9467211233995685, "grad_norm": 0.46724188327789307, "learning_rate": 2.846421044968154e-06, "loss": 0.3404, "step": 42421 }, { "epoch": 1.946767013904823, "grad_norm": 0.47430843114852905, "learning_rate": 2.8461997685064295e-06, "loss": 0.3413, "step": 42422 }, { "epoch": 1.9468129044100775, "grad_norm": 0.4442247748374939, "learning_rate": 2.8459784972239176e-06, "loss": 0.3207, "step": 42423 }, { "epoch": 1.946858794915332, "grad_norm": 0.49720704555511475, "learning_rate": 2.8457572311211468e-06, "loss": 0.3965, "step": 42424 }, { "epoch": 1.9469046854205865, "grad_norm": 0.483260840177536, "learning_rate": 2.8455359701986508e-06, "loss": 0.3772, "step": 42425 }, { "epoch": 1.946950575925841, "grad_norm": 0.4952370822429657, "learning_rate": 2.84531471445696e-06, "loss": 0.4414, "step": 42426 }, { "epoch": 1.9469964664310955, "grad_norm": 0.4601132869720459, "learning_rate": 2.845093463896608e-06, "loss": 0.3128, "step": 42427 }, { "epoch": 1.94704235693635, "grad_norm": 0.49074140191078186, "learning_rate": 2.844872218518123e-06, "loss": 0.3817, "step": 42428 }, { "epoch": 1.9470882474416045, "grad_norm": 0.491845965385437, "learning_rate": 2.844650978322043e-06, "loss": 0.4049, "step": 42429 }, { "epoch": 1.9471341379468587, "grad_norm": 0.4709020256996155, "learning_rate": 2.8444297433088975e-06, "loss": 0.3901, "step": 42430 }, { "epoch": 1.9471800284521132, "grad_norm": 0.4624183177947998, "learning_rate": 2.8442085134792153e-06, "loss": 0.3227, "step": 42431 }, { "epoch": 1.9472259189573677, "grad_norm": 0.4746169447898865, "learning_rate": 2.843987288833534e-06, "loss": 0.3905, "step": 42432 }, { "epoch": 1.9472718094626222, "grad_norm": 0.46739792823791504, "learning_rate": 2.843766069372382e-06, "loss": 0.3644, "step": 42433 }, { "epoch": 1.9473176999678765, "grad_norm": 0.5035632848739624, "learning_rate": 2.843544855096293e-06, "loss": 0.3695, "step": 42434 }, { "epoch": 1.947363590473131, "grad_norm": 0.508005678653717, "learning_rate": 2.8433236460057958e-06, "loss": 0.4164, "step": 42435 }, { "epoch": 1.9474094809783855, "grad_norm": 0.45017924904823303, "learning_rate": 2.8431024421014264e-06, "loss": 0.3457, "step": 42436 }, { "epoch": 1.94745537148364, "grad_norm": 0.4457870423793793, "learning_rate": 2.8428812433837163e-06, "loss": 0.3058, "step": 42437 }, { "epoch": 1.9475012619888945, "grad_norm": 0.4544684588909149, "learning_rate": 2.8426600498531927e-06, "loss": 0.3676, "step": 42438 }, { "epoch": 1.947547152494149, "grad_norm": 0.4508378505706787, "learning_rate": 2.8424388615103928e-06, "loss": 0.2974, "step": 42439 }, { "epoch": 1.9475930429994035, "grad_norm": 0.44784507155418396, "learning_rate": 2.842217678355846e-06, "loss": 0.3011, "step": 42440 }, { "epoch": 1.947638933504658, "grad_norm": 0.45314615964889526, "learning_rate": 2.841996500390083e-06, "loss": 0.3105, "step": 42441 }, { "epoch": 1.9476848240099125, "grad_norm": 0.4720838963985443, "learning_rate": 2.8417753276136394e-06, "loss": 0.3412, "step": 42442 }, { "epoch": 1.9477307145151668, "grad_norm": 0.46888601779937744, "learning_rate": 2.8415541600270447e-06, "loss": 0.3345, "step": 42443 }, { "epoch": 1.9477766050204213, "grad_norm": 0.4765230715274811, "learning_rate": 2.841332997630829e-06, "loss": 0.3134, "step": 42444 }, { "epoch": 1.9478224955256758, "grad_norm": 0.5001325011253357, "learning_rate": 2.841111840425529e-06, "loss": 0.3799, "step": 42445 }, { "epoch": 1.94786838603093, "grad_norm": 0.5129137635231018, "learning_rate": 2.8408906884116727e-06, "loss": 0.4493, "step": 42446 }, { "epoch": 1.9479142765361845, "grad_norm": 0.521508514881134, "learning_rate": 2.8406695415897934e-06, "loss": 0.4657, "step": 42447 }, { "epoch": 1.947960167041439, "grad_norm": 0.4937654733657837, "learning_rate": 2.8404483999604217e-06, "loss": 0.3902, "step": 42448 }, { "epoch": 1.9480060575466935, "grad_norm": 0.4775790572166443, "learning_rate": 2.840227263524091e-06, "loss": 0.3627, "step": 42449 }, { "epoch": 1.948051948051948, "grad_norm": 0.43531087040901184, "learning_rate": 2.840006132281329e-06, "loss": 0.3293, "step": 42450 }, { "epoch": 1.9480978385572025, "grad_norm": 0.46937450766563416, "learning_rate": 2.839785006232673e-06, "loss": 0.3652, "step": 42451 }, { "epoch": 1.948143729062457, "grad_norm": 0.47242382168769836, "learning_rate": 2.839563885378652e-06, "loss": 0.3479, "step": 42452 }, { "epoch": 1.9481896195677115, "grad_norm": 0.4657030999660492, "learning_rate": 2.8393427697197973e-06, "loss": 0.3583, "step": 42453 }, { "epoch": 1.948235510072966, "grad_norm": 0.439483106136322, "learning_rate": 2.8391216592566416e-06, "loss": 0.2991, "step": 42454 }, { "epoch": 1.9482814005782205, "grad_norm": 0.44933009147644043, "learning_rate": 2.8389005539897176e-06, "loss": 0.3354, "step": 42455 }, { "epoch": 1.9483272910834748, "grad_norm": 0.4766775071620941, "learning_rate": 2.838679453919553e-06, "loss": 0.3551, "step": 42456 }, { "epoch": 1.9483731815887293, "grad_norm": 0.45603442192077637, "learning_rate": 2.8384583590466845e-06, "loss": 0.304, "step": 42457 }, { "epoch": 1.9484190720939838, "grad_norm": 0.4871738851070404, "learning_rate": 2.8382372693716414e-06, "loss": 0.3833, "step": 42458 }, { "epoch": 1.948464962599238, "grad_norm": 0.4615531265735626, "learning_rate": 2.838016184894955e-06, "loss": 0.3202, "step": 42459 }, { "epoch": 1.9485108531044926, "grad_norm": 0.4755474328994751, "learning_rate": 2.837795105617158e-06, "loss": 0.4256, "step": 42460 }, { "epoch": 1.948556743609747, "grad_norm": 0.4822428822517395, "learning_rate": 2.8375740315387812e-06, "loss": 0.3565, "step": 42461 }, { "epoch": 1.9486026341150016, "grad_norm": 0.43045395612716675, "learning_rate": 2.8373529626603536e-06, "loss": 0.2924, "step": 42462 }, { "epoch": 1.948648524620256, "grad_norm": 0.47284722328186035, "learning_rate": 2.8371318989824127e-06, "loss": 0.3711, "step": 42463 }, { "epoch": 1.9486944151255106, "grad_norm": 0.4907500147819519, "learning_rate": 2.8369108405054862e-06, "loss": 0.3948, "step": 42464 }, { "epoch": 1.948740305630765, "grad_norm": 0.5306557416915894, "learning_rate": 2.836689787230107e-06, "loss": 0.4532, "step": 42465 }, { "epoch": 1.9487861961360196, "grad_norm": 0.4576500654220581, "learning_rate": 2.8364687391568037e-06, "loss": 0.3428, "step": 42466 }, { "epoch": 1.948832086641274, "grad_norm": 0.4818820059299469, "learning_rate": 2.836247696286112e-06, "loss": 0.4032, "step": 42467 }, { "epoch": 1.9488779771465283, "grad_norm": 0.4623189866542816, "learning_rate": 2.836026658618562e-06, "loss": 0.3406, "step": 42468 }, { "epoch": 1.9489238676517828, "grad_norm": 0.5096890330314636, "learning_rate": 2.835805626154685e-06, "loss": 0.4218, "step": 42469 }, { "epoch": 1.9489697581570373, "grad_norm": 0.4543008506298065, "learning_rate": 2.8355845988950125e-06, "loss": 0.3235, "step": 42470 }, { "epoch": 1.9490156486622918, "grad_norm": 0.4332923889160156, "learning_rate": 2.8353635768400754e-06, "loss": 0.2731, "step": 42471 }, { "epoch": 1.949061539167546, "grad_norm": 0.6159323453903198, "learning_rate": 2.8351425599904044e-06, "loss": 0.3947, "step": 42472 }, { "epoch": 1.9491074296728006, "grad_norm": 0.4629206359386444, "learning_rate": 2.834921548346534e-06, "loss": 0.3405, "step": 42473 }, { "epoch": 1.949153320178055, "grad_norm": 0.4813768267631531, "learning_rate": 2.8347005419089933e-06, "loss": 0.4072, "step": 42474 }, { "epoch": 1.9491992106833096, "grad_norm": 0.4381105303764343, "learning_rate": 2.8344795406783133e-06, "loss": 0.2965, "step": 42475 }, { "epoch": 1.949245101188564, "grad_norm": 0.4875320494174957, "learning_rate": 2.8342585446550285e-06, "loss": 0.3751, "step": 42476 }, { "epoch": 1.9492909916938186, "grad_norm": 0.5055011510848999, "learning_rate": 2.834037553839668e-06, "loss": 0.4668, "step": 42477 }, { "epoch": 1.949336882199073, "grad_norm": 0.48518139123916626, "learning_rate": 2.833816568232762e-06, "loss": 0.4158, "step": 42478 }, { "epoch": 1.9493827727043276, "grad_norm": 0.4666910171508789, "learning_rate": 2.8335955878348455e-06, "loss": 0.3554, "step": 42479 }, { "epoch": 1.949428663209582, "grad_norm": 1.4019774198532104, "learning_rate": 2.833374612646447e-06, "loss": 0.3494, "step": 42480 }, { "epoch": 1.9494745537148364, "grad_norm": 0.4731694161891937, "learning_rate": 2.8331536426680994e-06, "loss": 0.3393, "step": 42481 }, { "epoch": 1.9495204442200909, "grad_norm": 0.49350517988204956, "learning_rate": 2.8329326779003336e-06, "loss": 0.3877, "step": 42482 }, { "epoch": 1.9495663347253454, "grad_norm": 0.4587632417678833, "learning_rate": 2.83271171834368e-06, "loss": 0.3364, "step": 42483 }, { "epoch": 1.9496122252305996, "grad_norm": 0.4981546103954315, "learning_rate": 2.8324907639986697e-06, "loss": 0.4214, "step": 42484 }, { "epoch": 1.9496581157358541, "grad_norm": 0.48166605830192566, "learning_rate": 2.8322698148658366e-06, "loss": 0.3567, "step": 42485 }, { "epoch": 1.9497040062411086, "grad_norm": 0.49478209018707275, "learning_rate": 2.8320488709457104e-06, "loss": 0.3692, "step": 42486 }, { "epoch": 1.9497498967463631, "grad_norm": 0.43518316745758057, "learning_rate": 2.8318279322388203e-06, "loss": 0.2934, "step": 42487 }, { "epoch": 1.9497957872516176, "grad_norm": 0.44196969270706177, "learning_rate": 2.831606998745702e-06, "loss": 0.3045, "step": 42488 }, { "epoch": 1.9498416777568721, "grad_norm": 0.5177297592163086, "learning_rate": 2.8313860704668843e-06, "loss": 0.4622, "step": 42489 }, { "epoch": 1.9498875682621266, "grad_norm": 0.4825424253940582, "learning_rate": 2.831165147402899e-06, "loss": 0.3981, "step": 42490 }, { "epoch": 1.9499334587673811, "grad_norm": 0.5021687150001526, "learning_rate": 2.8309442295542764e-06, "loss": 0.3819, "step": 42491 }, { "epoch": 1.9499793492726356, "grad_norm": 0.5172713994979858, "learning_rate": 2.8307233169215487e-06, "loss": 0.3725, "step": 42492 }, { "epoch": 1.95002523977789, "grad_norm": 0.4370590150356293, "learning_rate": 2.8305024095052446e-06, "loss": 0.3105, "step": 42493 }, { "epoch": 1.9500711302831444, "grad_norm": 0.4698132574558258, "learning_rate": 2.8302815073058998e-06, "loss": 0.3446, "step": 42494 }, { "epoch": 1.9501170207883989, "grad_norm": 0.4961320757865906, "learning_rate": 2.8300606103240437e-06, "loss": 0.3915, "step": 42495 }, { "epoch": 1.9501629112936534, "grad_norm": 0.5245936512947083, "learning_rate": 2.8298397185602046e-06, "loss": 0.4078, "step": 42496 }, { "epoch": 1.9502088017989077, "grad_norm": 0.4557390511035919, "learning_rate": 2.8296188320149176e-06, "loss": 0.3221, "step": 42497 }, { "epoch": 1.9502546923041622, "grad_norm": 0.46944060921669006, "learning_rate": 2.8293979506887127e-06, "loss": 0.3528, "step": 42498 }, { "epoch": 1.9503005828094166, "grad_norm": 0.40785226225852966, "learning_rate": 2.82917707458212e-06, "loss": 0.2672, "step": 42499 }, { "epoch": 1.9503464733146711, "grad_norm": 0.45082634687423706, "learning_rate": 2.82895620369567e-06, "loss": 0.3222, "step": 42500 }, { "epoch": 1.9503923638199256, "grad_norm": 0.47749245166778564, "learning_rate": 2.8287353380298966e-06, "loss": 0.3402, "step": 42501 }, { "epoch": 1.9504382543251801, "grad_norm": 0.46454060077667236, "learning_rate": 2.8285144775853303e-06, "loss": 0.3184, "step": 42502 }, { "epoch": 1.9504841448304346, "grad_norm": 0.4924120604991913, "learning_rate": 2.8282936223625e-06, "loss": 0.3561, "step": 42503 }, { "epoch": 1.9505300353356891, "grad_norm": 0.4693794548511505, "learning_rate": 2.8280727723619393e-06, "loss": 0.3846, "step": 42504 }, { "epoch": 1.9505759258409436, "grad_norm": 0.47027331590652466, "learning_rate": 2.8278519275841777e-06, "loss": 0.3835, "step": 42505 }, { "epoch": 1.9506218163461981, "grad_norm": 0.4731113612651825, "learning_rate": 2.827631088029745e-06, "loss": 0.3744, "step": 42506 }, { "epoch": 1.9506677068514524, "grad_norm": 0.4881572723388672, "learning_rate": 2.8274102536991754e-06, "loss": 0.3701, "step": 42507 }, { "epoch": 1.950713597356707, "grad_norm": 0.43266406655311584, "learning_rate": 2.827189424592999e-06, "loss": 0.3083, "step": 42508 }, { "epoch": 1.9507594878619614, "grad_norm": 0.5126971006393433, "learning_rate": 2.826968600711744e-06, "loss": 0.4272, "step": 42509 }, { "epoch": 1.9508053783672157, "grad_norm": 0.46197235584259033, "learning_rate": 2.826747782055945e-06, "loss": 0.3321, "step": 42510 }, { "epoch": 1.9508512688724702, "grad_norm": 0.4891059100627899, "learning_rate": 2.8265269686261326e-06, "loss": 0.3618, "step": 42511 }, { "epoch": 1.9508971593777247, "grad_norm": 0.44532984495162964, "learning_rate": 2.826306160422837e-06, "loss": 0.2911, "step": 42512 }, { "epoch": 1.9509430498829792, "grad_norm": 0.4884195029735565, "learning_rate": 2.8260853574465884e-06, "loss": 0.4022, "step": 42513 }, { "epoch": 1.9509889403882337, "grad_norm": 0.49024680256843567, "learning_rate": 2.8258645596979185e-06, "loss": 0.4169, "step": 42514 }, { "epoch": 1.9510348308934882, "grad_norm": 0.4090471565723419, "learning_rate": 2.825643767177356e-06, "loss": 0.2565, "step": 42515 }, { "epoch": 1.9510807213987427, "grad_norm": 0.5053630471229553, "learning_rate": 2.825422979885437e-06, "loss": 0.3949, "step": 42516 }, { "epoch": 1.9511266119039972, "grad_norm": 0.4524213671684265, "learning_rate": 2.8252021978226883e-06, "loss": 0.2911, "step": 42517 }, { "epoch": 1.9511725024092517, "grad_norm": 0.4692447781562805, "learning_rate": 2.824981420989641e-06, "loss": 0.329, "step": 42518 }, { "epoch": 1.951218392914506, "grad_norm": 0.4875037968158722, "learning_rate": 2.824760649386828e-06, "loss": 0.339, "step": 42519 }, { "epoch": 1.9512642834197604, "grad_norm": 0.5145904421806335, "learning_rate": 2.824539883014779e-06, "loss": 0.4635, "step": 42520 }, { "epoch": 1.951310173925015, "grad_norm": 0.451589435338974, "learning_rate": 2.8243191218740237e-06, "loss": 0.3266, "step": 42521 }, { "epoch": 1.9513560644302694, "grad_norm": 0.4614278972148895, "learning_rate": 2.8240983659650955e-06, "loss": 0.3158, "step": 42522 }, { "epoch": 1.9514019549355237, "grad_norm": 0.480217307806015, "learning_rate": 2.823877615288525e-06, "loss": 0.3556, "step": 42523 }, { "epoch": 1.9514478454407782, "grad_norm": 0.43987399339675903, "learning_rate": 2.8236568698448407e-06, "loss": 0.3067, "step": 42524 }, { "epoch": 1.9514937359460327, "grad_norm": 0.4859858453273773, "learning_rate": 2.823436129634576e-06, "loss": 0.3376, "step": 42525 }, { "epoch": 1.9515396264512872, "grad_norm": 0.47993794083595276, "learning_rate": 2.82321539465826e-06, "loss": 0.3751, "step": 42526 }, { "epoch": 1.9515855169565417, "grad_norm": 0.4649876058101654, "learning_rate": 2.822994664916422e-06, "loss": 0.3209, "step": 42527 }, { "epoch": 1.9516314074617962, "grad_norm": 0.4646109640598297, "learning_rate": 2.8227739404095966e-06, "loss": 0.3221, "step": 42528 }, { "epoch": 1.9516772979670507, "grad_norm": 0.4545367658138275, "learning_rate": 2.8225532211383128e-06, "loss": 0.3629, "step": 42529 }, { "epoch": 1.9517231884723052, "grad_norm": 0.48687881231307983, "learning_rate": 2.8223325071030995e-06, "loss": 0.4337, "step": 42530 }, { "epoch": 1.9517690789775597, "grad_norm": 0.5047340393066406, "learning_rate": 2.822111798304491e-06, "loss": 0.4243, "step": 42531 }, { "epoch": 1.951814969482814, "grad_norm": 0.4757983684539795, "learning_rate": 2.8218910947430157e-06, "loss": 0.3305, "step": 42532 }, { "epoch": 1.9518608599880685, "grad_norm": 0.45608747005462646, "learning_rate": 2.821670396419206e-06, "loss": 0.302, "step": 42533 }, { "epoch": 1.951906750493323, "grad_norm": 0.47517067193984985, "learning_rate": 2.821449703333591e-06, "loss": 0.3433, "step": 42534 }, { "epoch": 1.9519526409985772, "grad_norm": 0.48527246713638306, "learning_rate": 2.821229015486702e-06, "loss": 0.3726, "step": 42535 }, { "epoch": 1.9519985315038317, "grad_norm": 0.4840909242630005, "learning_rate": 2.821008332879069e-06, "loss": 0.4219, "step": 42536 }, { "epoch": 1.9520444220090862, "grad_norm": 0.48162558674812317, "learning_rate": 2.820787655511221e-06, "loss": 0.3283, "step": 42537 }, { "epoch": 1.9520903125143407, "grad_norm": 0.4531991481781006, "learning_rate": 2.8205669833836944e-06, "loss": 0.3301, "step": 42538 }, { "epoch": 1.9521362030195952, "grad_norm": 0.4448402523994446, "learning_rate": 2.820346316497016e-06, "loss": 0.2906, "step": 42539 }, { "epoch": 1.9521820935248497, "grad_norm": 0.5169556140899658, "learning_rate": 2.8201256548517137e-06, "loss": 0.4126, "step": 42540 }, { "epoch": 1.9522279840301042, "grad_norm": 0.4678075313568115, "learning_rate": 2.819904998448324e-06, "loss": 0.3408, "step": 42541 }, { "epoch": 1.9522738745353587, "grad_norm": 0.4547345042228699, "learning_rate": 2.819684347287375e-06, "loss": 0.3387, "step": 42542 }, { "epoch": 1.9523197650406132, "grad_norm": 0.4803924262523651, "learning_rate": 2.819463701369394e-06, "loss": 0.3542, "step": 42543 }, { "epoch": 1.9523656555458677, "grad_norm": 0.47113198041915894, "learning_rate": 2.8192430606949172e-06, "loss": 0.387, "step": 42544 }, { "epoch": 1.952411546051122, "grad_norm": 0.47771966457366943, "learning_rate": 2.819022425264473e-06, "loss": 0.3581, "step": 42545 }, { "epoch": 1.9524574365563765, "grad_norm": 0.4476812481880188, "learning_rate": 2.8188017950785905e-06, "loss": 0.3195, "step": 42546 }, { "epoch": 1.952503327061631, "grad_norm": 0.46197864413261414, "learning_rate": 2.8185811701378018e-06, "loss": 0.3335, "step": 42547 }, { "epoch": 1.9525492175668853, "grad_norm": 0.4620197117328644, "learning_rate": 2.8183605504426372e-06, "loss": 0.3381, "step": 42548 }, { "epoch": 1.9525951080721398, "grad_norm": 0.4671401381492615, "learning_rate": 2.8181399359936246e-06, "loss": 0.362, "step": 42549 }, { "epoch": 1.9526409985773943, "grad_norm": 0.5330823063850403, "learning_rate": 2.817919326791299e-06, "loss": 0.4382, "step": 42550 }, { "epoch": 1.9526868890826488, "grad_norm": 0.46532678604125977, "learning_rate": 2.8176987228361883e-06, "loss": 0.3429, "step": 42551 }, { "epoch": 1.9527327795879033, "grad_norm": 0.40732866525650024, "learning_rate": 2.817478124128822e-06, "loss": 0.2593, "step": 42552 }, { "epoch": 1.9527786700931578, "grad_norm": 0.5604402422904968, "learning_rate": 2.8172575306697337e-06, "loss": 0.4952, "step": 42553 }, { "epoch": 1.9528245605984123, "grad_norm": 0.47648921608924866, "learning_rate": 2.8170369424594525e-06, "loss": 0.3899, "step": 42554 }, { "epoch": 1.9528704511036667, "grad_norm": 0.46424171328544617, "learning_rate": 2.816816359498508e-06, "loss": 0.3295, "step": 42555 }, { "epoch": 1.9529163416089212, "grad_norm": 0.4813767075538635, "learning_rate": 2.816595781787431e-06, "loss": 0.3436, "step": 42556 }, { "epoch": 1.9529622321141755, "grad_norm": 0.4569571614265442, "learning_rate": 2.8163752093267527e-06, "loss": 0.3446, "step": 42557 }, { "epoch": 1.95300812261943, "grad_norm": 0.48273661732673645, "learning_rate": 2.8161546421170005e-06, "loss": 0.4289, "step": 42558 }, { "epoch": 1.9530540131246845, "grad_norm": 0.46390098333358765, "learning_rate": 2.81593408015871e-06, "loss": 0.3394, "step": 42559 }, { "epoch": 1.953099903629939, "grad_norm": 0.5205157399177551, "learning_rate": 2.8157135234524075e-06, "loss": 0.3802, "step": 42560 }, { "epoch": 1.9531457941351933, "grad_norm": 0.48730960488319397, "learning_rate": 2.815492971998623e-06, "loss": 0.3552, "step": 42561 }, { "epoch": 1.9531916846404478, "grad_norm": 0.4250916540622711, "learning_rate": 2.8152724257978904e-06, "loss": 0.2488, "step": 42562 }, { "epoch": 1.9532375751457023, "grad_norm": 0.5010707974433899, "learning_rate": 2.815051884850738e-06, "loss": 0.4288, "step": 42563 }, { "epoch": 1.9532834656509568, "grad_norm": 0.49828991293907166, "learning_rate": 2.814831349157696e-06, "loss": 0.4441, "step": 42564 }, { "epoch": 1.9533293561562113, "grad_norm": 0.49227431416511536, "learning_rate": 2.814610818719294e-06, "loss": 0.3693, "step": 42565 }, { "epoch": 1.9533752466614658, "grad_norm": 0.48540937900543213, "learning_rate": 2.8143902935360646e-06, "loss": 0.3592, "step": 42566 }, { "epoch": 1.9534211371667203, "grad_norm": 0.4975406527519226, "learning_rate": 2.814169773608536e-06, "loss": 0.3813, "step": 42567 }, { "epoch": 1.9534670276719748, "grad_norm": 0.47891250252723694, "learning_rate": 2.8139492589372407e-06, "loss": 0.3517, "step": 42568 }, { "epoch": 1.9535129181772293, "grad_norm": 0.45402589440345764, "learning_rate": 2.813728749522706e-06, "loss": 0.3129, "step": 42569 }, { "epoch": 1.9535588086824835, "grad_norm": 0.48088952898979187, "learning_rate": 2.813508245365465e-06, "loss": 0.3743, "step": 42570 }, { "epoch": 1.953604699187738, "grad_norm": 0.48054125905036926, "learning_rate": 2.8132877464660435e-06, "loss": 0.3847, "step": 42571 }, { "epoch": 1.9536505896929925, "grad_norm": 0.4481973350048065, "learning_rate": 2.8130672528249775e-06, "loss": 0.3266, "step": 42572 }, { "epoch": 1.9536964801982468, "grad_norm": 0.5275359153747559, "learning_rate": 2.8128467644427947e-06, "loss": 0.3731, "step": 42573 }, { "epoch": 1.9537423707035013, "grad_norm": 0.48155292868614197, "learning_rate": 2.812626281320022e-06, "loss": 0.3689, "step": 42574 }, { "epoch": 1.9537882612087558, "grad_norm": 0.4975397288799286, "learning_rate": 2.812405803457196e-06, "loss": 0.3972, "step": 42575 }, { "epoch": 1.9538341517140103, "grad_norm": 0.609037458896637, "learning_rate": 2.812185330854843e-06, "loss": 0.383, "step": 42576 }, { "epoch": 1.9538800422192648, "grad_norm": 0.47047191858291626, "learning_rate": 2.8119648635134943e-06, "loss": 0.3378, "step": 42577 }, { "epoch": 1.9539259327245193, "grad_norm": 0.4643189311027527, "learning_rate": 2.8117444014336793e-06, "loss": 0.3277, "step": 42578 }, { "epoch": 1.9539718232297738, "grad_norm": 0.49498069286346436, "learning_rate": 2.811523944615928e-06, "loss": 0.3649, "step": 42579 }, { "epoch": 1.9540177137350283, "grad_norm": 0.5016837120056152, "learning_rate": 2.811303493060769e-06, "loss": 0.4672, "step": 42580 }, { "epoch": 1.9540636042402828, "grad_norm": 0.4253122806549072, "learning_rate": 2.811083046768737e-06, "loss": 0.2697, "step": 42581 }, { "epoch": 1.9541094947455373, "grad_norm": 0.4417501389980316, "learning_rate": 2.8108626057403587e-06, "loss": 0.2855, "step": 42582 }, { "epoch": 1.9541553852507916, "grad_norm": 0.4434540271759033, "learning_rate": 2.8106421699761633e-06, "loss": 0.3095, "step": 42583 }, { "epoch": 1.954201275756046, "grad_norm": 0.46882364153862, "learning_rate": 2.8104217394766843e-06, "loss": 0.3492, "step": 42584 }, { "epoch": 1.9542471662613006, "grad_norm": 0.5064638257026672, "learning_rate": 2.81020131424245e-06, "loss": 0.3942, "step": 42585 }, { "epoch": 1.9542930567665548, "grad_norm": 0.5324026942253113, "learning_rate": 2.809980894273988e-06, "loss": 0.4332, "step": 42586 }, { "epoch": 1.9543389472718093, "grad_norm": 0.4467504620552063, "learning_rate": 2.8097604795718337e-06, "loss": 0.3007, "step": 42587 }, { "epoch": 1.9543848377770638, "grad_norm": 0.48012593388557434, "learning_rate": 2.809540070136513e-06, "loss": 0.3334, "step": 42588 }, { "epoch": 1.9544307282823183, "grad_norm": 0.45491722226142883, "learning_rate": 2.8093196659685585e-06, "loss": 0.3636, "step": 42589 }, { "epoch": 1.9544766187875728, "grad_norm": 0.440604567527771, "learning_rate": 2.809099267068498e-06, "loss": 0.3178, "step": 42590 }, { "epoch": 1.9545225092928273, "grad_norm": 0.44432374835014343, "learning_rate": 2.808878873436862e-06, "loss": 0.3337, "step": 42591 }, { "epoch": 1.9545683997980818, "grad_norm": 0.4582633078098297, "learning_rate": 2.8086584850741793e-06, "loss": 0.3219, "step": 42592 }, { "epoch": 1.9546142903033363, "grad_norm": 0.46526646614074707, "learning_rate": 2.8084381019809833e-06, "loss": 0.377, "step": 42593 }, { "epoch": 1.9546601808085908, "grad_norm": 0.4804357886314392, "learning_rate": 2.808217724157802e-06, "loss": 0.3765, "step": 42594 }, { "epoch": 1.9547060713138453, "grad_norm": 0.46754559874534607, "learning_rate": 2.807997351605163e-06, "loss": 0.3681, "step": 42595 }, { "epoch": 1.9547519618190996, "grad_norm": 0.46250200271606445, "learning_rate": 2.807776984323601e-06, "loss": 0.3192, "step": 42596 }, { "epoch": 1.954797852324354, "grad_norm": 0.4233126938343048, "learning_rate": 2.807556622313643e-06, "loss": 0.3075, "step": 42597 }, { "epoch": 1.9548437428296086, "grad_norm": 0.4820030927658081, "learning_rate": 2.8073362655758197e-06, "loss": 0.3236, "step": 42598 }, { "epoch": 1.9548896333348629, "grad_norm": 0.4851837754249573, "learning_rate": 2.8071159141106607e-06, "loss": 0.3748, "step": 42599 }, { "epoch": 1.9549355238401174, "grad_norm": 0.4749301075935364, "learning_rate": 2.8068955679186956e-06, "loss": 0.3413, "step": 42600 }, { "epoch": 1.9549814143453719, "grad_norm": 0.4721699655056, "learning_rate": 2.806675227000454e-06, "loss": 0.3423, "step": 42601 }, { "epoch": 1.9550273048506264, "grad_norm": 0.4495364725589752, "learning_rate": 2.806454891356465e-06, "loss": 0.3039, "step": 42602 }, { "epoch": 1.9550731953558809, "grad_norm": 0.4818999767303467, "learning_rate": 2.8062345609872617e-06, "loss": 0.4053, "step": 42603 }, { "epoch": 1.9551190858611354, "grad_norm": 0.4635816514492035, "learning_rate": 2.8060142358933716e-06, "loss": 0.3362, "step": 42604 }, { "epoch": 1.9551649763663899, "grad_norm": 0.44836628437042236, "learning_rate": 2.8057939160753235e-06, "loss": 0.287, "step": 42605 }, { "epoch": 1.9552108668716444, "grad_norm": 0.438023179769516, "learning_rate": 2.80557360153365e-06, "loss": 0.2924, "step": 42606 }, { "epoch": 1.9552567573768989, "grad_norm": 0.47951453924179077, "learning_rate": 2.8053532922688786e-06, "loss": 0.3809, "step": 42607 }, { "epoch": 1.9553026478821531, "grad_norm": 0.46520310640335083, "learning_rate": 2.805132988281539e-06, "loss": 0.3626, "step": 42608 }, { "epoch": 1.9553485383874076, "grad_norm": 0.42953282594680786, "learning_rate": 2.8049126895721633e-06, "loss": 0.2727, "step": 42609 }, { "epoch": 1.9553944288926621, "grad_norm": 0.46154528856277466, "learning_rate": 2.8046923961412795e-06, "loss": 0.3742, "step": 42610 }, { "epoch": 1.9554403193979166, "grad_norm": 0.4611739218235016, "learning_rate": 2.804472107989419e-06, "loss": 0.3808, "step": 42611 }, { "epoch": 1.955486209903171, "grad_norm": 0.45480746030807495, "learning_rate": 2.804251825117109e-06, "loss": 0.3637, "step": 42612 }, { "epoch": 1.9555321004084254, "grad_norm": 0.47020718455314636, "learning_rate": 2.8040315475248804e-06, "loss": 0.2885, "step": 42613 }, { "epoch": 1.95557799091368, "grad_norm": 0.45602288842201233, "learning_rate": 2.8038112752132614e-06, "loss": 0.3294, "step": 42614 }, { "epoch": 1.9556238814189344, "grad_norm": 0.4646095335483551, "learning_rate": 2.8035910081827843e-06, "loss": 0.3617, "step": 42615 }, { "epoch": 1.955669771924189, "grad_norm": 0.5059548020362854, "learning_rate": 2.8033707464339787e-06, "loss": 0.3588, "step": 42616 }, { "epoch": 1.9557156624294434, "grad_norm": 0.5013076663017273, "learning_rate": 2.80315048996737e-06, "loss": 0.3961, "step": 42617 }, { "epoch": 1.9557615529346979, "grad_norm": 0.4865446090698242, "learning_rate": 2.8029302387834946e-06, "loss": 0.3725, "step": 42618 }, { "epoch": 1.9558074434399524, "grad_norm": 0.4943789839744568, "learning_rate": 2.802709992882877e-06, "loss": 0.4118, "step": 42619 }, { "epoch": 1.9558533339452069, "grad_norm": 0.5054281949996948, "learning_rate": 2.8024897522660488e-06, "loss": 0.4323, "step": 42620 }, { "epoch": 1.9558992244504612, "grad_norm": 0.5302573442459106, "learning_rate": 2.8022695169335394e-06, "loss": 0.4339, "step": 42621 }, { "epoch": 1.9559451149557157, "grad_norm": 0.5048846006393433, "learning_rate": 2.8020492868858786e-06, "loss": 0.4098, "step": 42622 }, { "epoch": 1.9559910054609702, "grad_norm": 0.4763071537017822, "learning_rate": 2.8018290621235933e-06, "loss": 0.3609, "step": 42623 }, { "epoch": 1.9560368959662244, "grad_norm": 0.4737274646759033, "learning_rate": 2.8016088426472167e-06, "loss": 0.3575, "step": 42624 }, { "epoch": 1.956082786471479, "grad_norm": 0.4714599549770355, "learning_rate": 2.8013886284572777e-06, "loss": 0.4007, "step": 42625 }, { "epoch": 1.9561286769767334, "grad_norm": 0.4745331406593323, "learning_rate": 2.8011684195543033e-06, "loss": 0.3354, "step": 42626 }, { "epoch": 1.956174567481988, "grad_norm": 0.5430697798728943, "learning_rate": 2.800948215938826e-06, "loss": 0.4104, "step": 42627 }, { "epoch": 1.9562204579872424, "grad_norm": 0.4837821424007416, "learning_rate": 2.800728017611375e-06, "loss": 0.3509, "step": 42628 }, { "epoch": 1.956266348492497, "grad_norm": 0.4735223352909088, "learning_rate": 2.8005078245724784e-06, "loss": 0.3433, "step": 42629 }, { "epoch": 1.9563122389977514, "grad_norm": 0.46920254826545715, "learning_rate": 2.8002876368226652e-06, "loss": 0.3515, "step": 42630 }, { "epoch": 1.956358129503006, "grad_norm": 0.43987610936164856, "learning_rate": 2.8000674543624674e-06, "loss": 0.3203, "step": 42631 }, { "epoch": 1.9564040200082604, "grad_norm": 0.4764060080051422, "learning_rate": 2.799847277192412e-06, "loss": 0.357, "step": 42632 }, { "epoch": 1.956449910513515, "grad_norm": 0.4895569384098053, "learning_rate": 2.799627105313031e-06, "loss": 0.3528, "step": 42633 }, { "epoch": 1.9564958010187692, "grad_norm": 0.4754211902618408, "learning_rate": 2.7994069387248525e-06, "loss": 0.3545, "step": 42634 }, { "epoch": 1.9565416915240237, "grad_norm": 0.4861854016780853, "learning_rate": 2.7991867774284047e-06, "loss": 0.385, "step": 42635 }, { "epoch": 1.9565875820292782, "grad_norm": 0.4404165744781494, "learning_rate": 2.7989666214242167e-06, "loss": 0.3197, "step": 42636 }, { "epoch": 1.9566334725345325, "grad_norm": 0.48736467957496643, "learning_rate": 2.7987464707128216e-06, "loss": 0.3828, "step": 42637 }, { "epoch": 1.956679363039787, "grad_norm": 0.4658512473106384, "learning_rate": 2.798526325294746e-06, "loss": 0.3477, "step": 42638 }, { "epoch": 1.9567252535450415, "grad_norm": 0.46470773220062256, "learning_rate": 2.7983061851705177e-06, "loss": 0.3516, "step": 42639 }, { "epoch": 1.956771144050296, "grad_norm": 0.45387718081474304, "learning_rate": 2.7980860503406702e-06, "loss": 0.3181, "step": 42640 }, { "epoch": 1.9568170345555504, "grad_norm": 0.4341164827346802, "learning_rate": 2.797865920805731e-06, "loss": 0.3402, "step": 42641 }, { "epoch": 1.956862925060805, "grad_norm": 0.43490374088287354, "learning_rate": 2.7976457965662295e-06, "loss": 0.3148, "step": 42642 }, { "epoch": 1.9569088155660594, "grad_norm": 0.4563387930393219, "learning_rate": 2.7974256776226945e-06, "loss": 0.3503, "step": 42643 }, { "epoch": 1.956954706071314, "grad_norm": 0.45153698325157166, "learning_rate": 2.797205563975656e-06, "loss": 0.3418, "step": 42644 }, { "epoch": 1.9570005965765684, "grad_norm": 0.4886835813522339, "learning_rate": 2.7969854556256396e-06, "loss": 0.3655, "step": 42645 }, { "epoch": 1.9570464870818227, "grad_norm": 0.5136573910713196, "learning_rate": 2.796765352573181e-06, "loss": 0.4141, "step": 42646 }, { "epoch": 1.9570923775870772, "grad_norm": 0.455084890127182, "learning_rate": 2.7965452548188067e-06, "loss": 0.3544, "step": 42647 }, { "epoch": 1.9571382680923317, "grad_norm": 0.4364270567893982, "learning_rate": 2.796325162363043e-06, "loss": 0.3198, "step": 42648 }, { "epoch": 1.9571841585975862, "grad_norm": 0.4567607641220093, "learning_rate": 2.7961050752064233e-06, "loss": 0.368, "step": 42649 }, { "epoch": 1.9572300491028405, "grad_norm": 0.4805290699005127, "learning_rate": 2.795884993349476e-06, "loss": 0.3848, "step": 42650 }, { "epoch": 1.957275939608095, "grad_norm": 0.4760979115962982, "learning_rate": 2.7956649167927275e-06, "loss": 0.3482, "step": 42651 }, { "epoch": 1.9573218301133495, "grad_norm": 0.4655333459377289, "learning_rate": 2.7954448455367113e-06, "loss": 0.32, "step": 42652 }, { "epoch": 1.957367720618604, "grad_norm": 0.4388354420661926, "learning_rate": 2.795224779581954e-06, "loss": 0.3027, "step": 42653 }, { "epoch": 1.9574136111238585, "grad_norm": 0.47450101375579834, "learning_rate": 2.795004718928986e-06, "loss": 0.3658, "step": 42654 }, { "epoch": 1.957459501629113, "grad_norm": 0.5150549411773682, "learning_rate": 2.7947846635783353e-06, "loss": 0.4081, "step": 42655 }, { "epoch": 1.9575053921343675, "grad_norm": 0.4700637757778168, "learning_rate": 2.7945646135305316e-06, "loss": 0.3223, "step": 42656 }, { "epoch": 1.957551282639622, "grad_norm": 0.47039172053337097, "learning_rate": 2.7943445687861016e-06, "loss": 0.3644, "step": 42657 }, { "epoch": 1.9575971731448765, "grad_norm": 0.4821496307849884, "learning_rate": 2.7941245293455793e-06, "loss": 0.4421, "step": 42658 }, { "epoch": 1.9576430636501307, "grad_norm": 0.4409633278846741, "learning_rate": 2.793904495209491e-06, "loss": 0.3098, "step": 42659 }, { "epoch": 1.9576889541553852, "grad_norm": 0.41578003764152527, "learning_rate": 2.7936844663783648e-06, "loss": 0.2717, "step": 42660 }, { "epoch": 1.9577348446606397, "grad_norm": 0.5190191268920898, "learning_rate": 2.7934644428527326e-06, "loss": 0.4288, "step": 42661 }, { "epoch": 1.957780735165894, "grad_norm": 0.488781213760376, "learning_rate": 2.7932444246331216e-06, "loss": 0.3919, "step": 42662 }, { "epoch": 1.9578266256711485, "grad_norm": 0.47790324687957764, "learning_rate": 2.7930244117200618e-06, "loss": 0.3739, "step": 42663 }, { "epoch": 1.957872516176403, "grad_norm": 0.46007439494132996, "learning_rate": 2.792804404114081e-06, "loss": 0.328, "step": 42664 }, { "epoch": 1.9579184066816575, "grad_norm": 0.5023325085639954, "learning_rate": 2.79258440181571e-06, "loss": 0.3666, "step": 42665 }, { "epoch": 1.957964297186912, "grad_norm": 0.4422638714313507, "learning_rate": 2.792364404825476e-06, "loss": 0.3325, "step": 42666 }, { "epoch": 1.9580101876921665, "grad_norm": 0.5093100666999817, "learning_rate": 2.792144413143908e-06, "loss": 0.3658, "step": 42667 }, { "epoch": 1.958056078197421, "grad_norm": 0.5203491449356079, "learning_rate": 2.7919244267715367e-06, "loss": 0.4367, "step": 42668 }, { "epoch": 1.9581019687026755, "grad_norm": 0.4647260904312134, "learning_rate": 2.7917044457088906e-06, "loss": 0.3356, "step": 42669 }, { "epoch": 1.95814785920793, "grad_norm": 0.4616556763648987, "learning_rate": 2.791484469956497e-06, "loss": 0.3289, "step": 42670 }, { "epoch": 1.9581937497131845, "grad_norm": 0.5037557482719421, "learning_rate": 2.7912644995148875e-06, "loss": 0.3849, "step": 42671 }, { "epoch": 1.9582396402184388, "grad_norm": 0.48734956979751587, "learning_rate": 2.79104453438459e-06, "loss": 0.3505, "step": 42672 }, { "epoch": 1.9582855307236933, "grad_norm": 0.4402637779712677, "learning_rate": 2.790824574566131e-06, "loss": 0.3008, "step": 42673 }, { "epoch": 1.9583314212289478, "grad_norm": 0.45045006275177, "learning_rate": 2.790604620060044e-06, "loss": 0.315, "step": 42674 }, { "epoch": 1.958377311734202, "grad_norm": 0.5239521265029907, "learning_rate": 2.7903846708668553e-06, "loss": 0.3867, "step": 42675 }, { "epoch": 1.9584232022394565, "grad_norm": 0.49133506417274475, "learning_rate": 2.7901647269870946e-06, "loss": 0.3725, "step": 42676 }, { "epoch": 1.958469092744711, "grad_norm": 0.4734187424182892, "learning_rate": 2.789944788421289e-06, "loss": 0.3462, "step": 42677 }, { "epoch": 1.9585149832499655, "grad_norm": 0.4839710295200348, "learning_rate": 2.78972485516997e-06, "loss": 0.3342, "step": 42678 }, { "epoch": 1.95856087375522, "grad_norm": 0.4628952145576477, "learning_rate": 2.789504927233663e-06, "loss": 0.3446, "step": 42679 }, { "epoch": 1.9586067642604745, "grad_norm": 0.4872036278247833, "learning_rate": 2.7892850046129007e-06, "loss": 0.3317, "step": 42680 }, { "epoch": 1.958652654765729, "grad_norm": 0.46467864513397217, "learning_rate": 2.7890650873082105e-06, "loss": 0.3637, "step": 42681 }, { "epoch": 1.9586985452709835, "grad_norm": 0.44176945090293884, "learning_rate": 2.7888451753201183e-06, "loss": 0.3215, "step": 42682 }, { "epoch": 1.958744435776238, "grad_norm": 0.4156891703605652, "learning_rate": 2.7886252686491585e-06, "loss": 0.306, "step": 42683 }, { "epoch": 1.9587903262814925, "grad_norm": 0.4656137228012085, "learning_rate": 2.7884053672958567e-06, "loss": 0.3298, "step": 42684 }, { "epoch": 1.9588362167867468, "grad_norm": 0.4568449556827545, "learning_rate": 2.7881854712607394e-06, "loss": 0.3013, "step": 42685 }, { "epoch": 1.9588821072920013, "grad_norm": 0.5044740438461304, "learning_rate": 2.7879655805443427e-06, "loss": 0.3948, "step": 42686 }, { "epoch": 1.9589279977972558, "grad_norm": 0.4669477343559265, "learning_rate": 2.7877456951471883e-06, "loss": 0.3705, "step": 42687 }, { "epoch": 1.95897388830251, "grad_norm": 0.5089010000228882, "learning_rate": 2.787525815069805e-06, "loss": 0.4724, "step": 42688 }, { "epoch": 1.9590197788077646, "grad_norm": 0.5099437832832336, "learning_rate": 2.7873059403127266e-06, "loss": 0.3948, "step": 42689 }, { "epoch": 1.959065669313019, "grad_norm": 0.4738597869873047, "learning_rate": 2.7870860708764784e-06, "loss": 0.3779, "step": 42690 }, { "epoch": 1.9591115598182736, "grad_norm": 0.4569297134876251, "learning_rate": 2.786866206761588e-06, "loss": 0.3093, "step": 42691 }, { "epoch": 1.959157450323528, "grad_norm": 0.489706814289093, "learning_rate": 2.786646347968588e-06, "loss": 0.4058, "step": 42692 }, { "epoch": 1.9592033408287826, "grad_norm": 0.455205500125885, "learning_rate": 2.7864264944980044e-06, "loss": 0.3178, "step": 42693 }, { "epoch": 1.959249231334037, "grad_norm": 0.49059292674064636, "learning_rate": 2.7862066463503674e-06, "loss": 0.3892, "step": 42694 }, { "epoch": 1.9592951218392916, "grad_norm": 0.49572381377220154, "learning_rate": 2.785986803526202e-06, "loss": 0.321, "step": 42695 }, { "epoch": 1.959341012344546, "grad_norm": 0.4644756019115448, "learning_rate": 2.7857669660260423e-06, "loss": 0.3564, "step": 42696 }, { "epoch": 1.9593869028498003, "grad_norm": 0.42119333148002625, "learning_rate": 2.7855471338504135e-06, "loss": 0.2977, "step": 42697 }, { "epoch": 1.9594327933550548, "grad_norm": 0.48818761110305786, "learning_rate": 2.7853273069998453e-06, "loss": 0.3316, "step": 42698 }, { "epoch": 1.9594786838603093, "grad_norm": 0.49452564120292664, "learning_rate": 2.785107485474866e-06, "loss": 0.4417, "step": 42699 }, { "epoch": 1.9595245743655636, "grad_norm": 0.5207871794700623, "learning_rate": 2.7848876692760043e-06, "loss": 0.4352, "step": 42700 }, { "epoch": 1.959570464870818, "grad_norm": 0.44345036149024963, "learning_rate": 2.7846678584037856e-06, "loss": 0.3264, "step": 42701 }, { "epoch": 1.9596163553760726, "grad_norm": 0.479976624250412, "learning_rate": 2.7844480528587447e-06, "loss": 0.4187, "step": 42702 }, { "epoch": 1.959662245881327, "grad_norm": 0.46240389347076416, "learning_rate": 2.7842282526414068e-06, "loss": 0.3258, "step": 42703 }, { "epoch": 1.9597081363865816, "grad_norm": 0.46940183639526367, "learning_rate": 2.7840084577522987e-06, "loss": 0.3376, "step": 42704 }, { "epoch": 1.959754026891836, "grad_norm": 0.45884016156196594, "learning_rate": 2.7837886681919524e-06, "loss": 0.3408, "step": 42705 }, { "epoch": 1.9597999173970906, "grad_norm": 0.4864921569824219, "learning_rate": 2.7835688839608943e-06, "loss": 0.3988, "step": 42706 }, { "epoch": 1.959845807902345, "grad_norm": 0.47822505235671997, "learning_rate": 2.783349105059653e-06, "loss": 0.3263, "step": 42707 }, { "epoch": 1.9598916984075996, "grad_norm": 0.5204706192016602, "learning_rate": 2.7831293314887607e-06, "loss": 0.4288, "step": 42708 }, { "epoch": 1.959937588912854, "grad_norm": 0.4732123911380768, "learning_rate": 2.7829095632487403e-06, "loss": 0.3607, "step": 42709 }, { "epoch": 1.9599834794181084, "grad_norm": 0.5083494186401367, "learning_rate": 2.7826898003401213e-06, "loss": 0.4462, "step": 42710 }, { "epoch": 1.9600293699233629, "grad_norm": 0.4558075964450836, "learning_rate": 2.7824700427634355e-06, "loss": 0.3451, "step": 42711 }, { "epoch": 1.9600752604286173, "grad_norm": 0.4846417307853699, "learning_rate": 2.7822502905192095e-06, "loss": 0.3654, "step": 42712 }, { "epoch": 1.9601211509338716, "grad_norm": 0.4569319486618042, "learning_rate": 2.782030543607969e-06, "loss": 0.3349, "step": 42713 }, { "epoch": 1.9601670414391261, "grad_norm": 0.5422413349151611, "learning_rate": 2.7818108020302478e-06, "loss": 0.3994, "step": 42714 }, { "epoch": 1.9602129319443806, "grad_norm": 0.4837099611759186, "learning_rate": 2.7815910657865707e-06, "loss": 0.3697, "step": 42715 }, { "epoch": 1.9602588224496351, "grad_norm": 0.48875996470451355, "learning_rate": 2.7813713348774653e-06, "loss": 0.3743, "step": 42716 }, { "epoch": 1.9603047129548896, "grad_norm": 0.48086073994636536, "learning_rate": 2.7811516093034642e-06, "loss": 0.3826, "step": 42717 }, { "epoch": 1.9603506034601441, "grad_norm": 0.43515247106552124, "learning_rate": 2.7809318890650927e-06, "loss": 0.3076, "step": 42718 }, { "epoch": 1.9603964939653986, "grad_norm": 0.46085551381111145, "learning_rate": 2.7807121741628796e-06, "loss": 0.3831, "step": 42719 }, { "epoch": 1.960442384470653, "grad_norm": 0.4880446493625641, "learning_rate": 2.780492464597353e-06, "loss": 0.3786, "step": 42720 }, { "epoch": 1.9604882749759076, "grad_norm": 0.4760649502277374, "learning_rate": 2.7802727603690414e-06, "loss": 0.3692, "step": 42721 }, { "epoch": 1.960534165481162, "grad_norm": 0.5025593638420105, "learning_rate": 2.780053061478472e-06, "loss": 0.3374, "step": 42722 }, { "epoch": 1.9605800559864164, "grad_norm": 0.4647161662578583, "learning_rate": 2.7798333679261764e-06, "loss": 0.3398, "step": 42723 }, { "epoch": 1.9606259464916709, "grad_norm": 0.4864025115966797, "learning_rate": 2.7796136797126806e-06, "loss": 0.3886, "step": 42724 }, { "epoch": 1.9606718369969254, "grad_norm": 0.4592362344264984, "learning_rate": 2.779393996838511e-06, "loss": 0.3408, "step": 42725 }, { "epoch": 1.9607177275021797, "grad_norm": 0.43354520201683044, "learning_rate": 2.7791743193041997e-06, "loss": 0.2939, "step": 42726 }, { "epoch": 1.9607636180074342, "grad_norm": 0.51848965883255, "learning_rate": 2.7789546471102734e-06, "loss": 0.3389, "step": 42727 }, { "epoch": 1.9608095085126886, "grad_norm": 0.5233510732650757, "learning_rate": 2.7787349802572604e-06, "loss": 0.4447, "step": 42728 }, { "epoch": 1.9608553990179431, "grad_norm": 0.47242671251296997, "learning_rate": 2.778515318745687e-06, "loss": 0.3763, "step": 42729 }, { "epoch": 1.9609012895231976, "grad_norm": 0.47130510210990906, "learning_rate": 2.778295662576087e-06, "loss": 0.3413, "step": 42730 }, { "epoch": 1.9609471800284521, "grad_norm": 0.4930626451969147, "learning_rate": 2.7780760117489826e-06, "loss": 0.3509, "step": 42731 }, { "epoch": 1.9609930705337066, "grad_norm": 0.4682520627975464, "learning_rate": 2.7778563662649015e-06, "loss": 0.3274, "step": 42732 }, { "epoch": 1.9610389610389611, "grad_norm": 0.46139806509017944, "learning_rate": 2.7776367261243774e-06, "loss": 0.3482, "step": 42733 }, { "epoch": 1.9610848515442156, "grad_norm": 0.4676688611507416, "learning_rate": 2.7774170913279353e-06, "loss": 0.3373, "step": 42734 }, { "epoch": 1.96113074204947, "grad_norm": 0.46672171354293823, "learning_rate": 2.7771974618761017e-06, "loss": 0.3485, "step": 42735 }, { "epoch": 1.9611766325547244, "grad_norm": 0.48873549699783325, "learning_rate": 2.7769778377694085e-06, "loss": 0.3589, "step": 42736 }, { "epoch": 1.961222523059979, "grad_norm": 0.47027504444122314, "learning_rate": 2.776758219008382e-06, "loss": 0.3353, "step": 42737 }, { "epoch": 1.9612684135652334, "grad_norm": 0.4936540722846985, "learning_rate": 2.7765386055935484e-06, "loss": 0.3893, "step": 42738 }, { "epoch": 1.9613143040704877, "grad_norm": 0.44667407870292664, "learning_rate": 2.7763189975254397e-06, "loss": 0.3103, "step": 42739 }, { "epoch": 1.9613601945757422, "grad_norm": 0.47585251927375793, "learning_rate": 2.7760993948045823e-06, "loss": 0.3634, "step": 42740 }, { "epoch": 1.9614060850809967, "grad_norm": 0.44235143065452576, "learning_rate": 2.7758797974315034e-06, "loss": 0.322, "step": 42741 }, { "epoch": 1.9614519755862512, "grad_norm": 0.4876372516155243, "learning_rate": 2.7756602054067316e-06, "loss": 0.3582, "step": 42742 }, { "epoch": 1.9614978660915057, "grad_norm": 0.4664761424064636, "learning_rate": 2.7754406187307954e-06, "loss": 0.3807, "step": 42743 }, { "epoch": 1.9615437565967602, "grad_norm": 0.4331066310405731, "learning_rate": 2.77522103740422e-06, "loss": 0.2868, "step": 42744 }, { "epoch": 1.9615896471020147, "grad_norm": 0.4685346484184265, "learning_rate": 2.7750014614275384e-06, "loss": 0.3584, "step": 42745 }, { "epoch": 1.9616355376072692, "grad_norm": 0.5147796869277954, "learning_rate": 2.7747818908012756e-06, "loss": 0.4228, "step": 42746 }, { "epoch": 1.9616814281125237, "grad_norm": 0.48538294434547424, "learning_rate": 2.7745623255259584e-06, "loss": 0.3985, "step": 42747 }, { "epoch": 1.961727318617778, "grad_norm": 0.4577445089817047, "learning_rate": 2.7743427656021183e-06, "loss": 0.3598, "step": 42748 }, { "epoch": 1.9617732091230324, "grad_norm": 0.41643044352531433, "learning_rate": 2.7741232110302813e-06, "loss": 0.2679, "step": 42749 }, { "epoch": 1.961819099628287, "grad_norm": 0.47026699781417847, "learning_rate": 2.7739036618109738e-06, "loss": 0.432, "step": 42750 }, { "epoch": 1.9618649901335412, "grad_norm": 0.44650447368621826, "learning_rate": 2.7736841179447273e-06, "loss": 0.3051, "step": 42751 }, { "epoch": 1.9619108806387957, "grad_norm": 0.5170765519142151, "learning_rate": 2.773464579432069e-06, "loss": 0.4114, "step": 42752 }, { "epoch": 1.9619567711440502, "grad_norm": 0.46876320242881775, "learning_rate": 2.7732450462735227e-06, "loss": 0.3551, "step": 42753 }, { "epoch": 1.9620026616493047, "grad_norm": 0.5149762630462646, "learning_rate": 2.773025518469621e-06, "loss": 0.4218, "step": 42754 }, { "epoch": 1.9620485521545592, "grad_norm": 0.5108720064163208, "learning_rate": 2.7728059960208905e-06, "loss": 0.4239, "step": 42755 }, { "epoch": 1.9620944426598137, "grad_norm": 0.4551020562648773, "learning_rate": 2.7725864789278558e-06, "loss": 0.3172, "step": 42756 }, { "epoch": 1.9621403331650682, "grad_norm": 0.46515828371047974, "learning_rate": 2.7723669671910505e-06, "loss": 0.3478, "step": 42757 }, { "epoch": 1.9621862236703227, "grad_norm": 0.48717600107192993, "learning_rate": 2.772147460810999e-06, "loss": 0.3562, "step": 42758 }, { "epoch": 1.9622321141755772, "grad_norm": 0.47209182381629944, "learning_rate": 2.7719279597882298e-06, "loss": 0.3743, "step": 42759 }, { "epoch": 1.9622780046808317, "grad_norm": 0.49100419878959656, "learning_rate": 2.771708464123268e-06, "loss": 0.4104, "step": 42760 }, { "epoch": 1.962323895186086, "grad_norm": 0.4669548273086548, "learning_rate": 2.771488973816647e-06, "loss": 0.3572, "step": 42761 }, { "epoch": 1.9623697856913405, "grad_norm": 0.48151662945747375, "learning_rate": 2.771269488868892e-06, "loss": 0.3872, "step": 42762 }, { "epoch": 1.962415676196595, "grad_norm": 0.522543728351593, "learning_rate": 2.77105000928053e-06, "loss": 0.4845, "step": 42763 }, { "epoch": 1.9624615667018492, "grad_norm": 0.4768964946269989, "learning_rate": 2.7708305350520893e-06, "loss": 0.3893, "step": 42764 }, { "epoch": 1.9625074572071037, "grad_norm": 0.47656336426734924, "learning_rate": 2.770611066184097e-06, "loss": 0.3584, "step": 42765 }, { "epoch": 1.9625533477123582, "grad_norm": 0.44240084290504456, "learning_rate": 2.77039160267708e-06, "loss": 0.3341, "step": 42766 }, { "epoch": 1.9625992382176127, "grad_norm": 0.4717763662338257, "learning_rate": 2.77017214453157e-06, "loss": 0.3481, "step": 42767 }, { "epoch": 1.9626451287228672, "grad_norm": 0.45515933632850647, "learning_rate": 2.769952691748091e-06, "loss": 0.361, "step": 42768 }, { "epoch": 1.9626910192281217, "grad_norm": 0.4609382450580597, "learning_rate": 2.7697332443271705e-06, "loss": 0.3469, "step": 42769 }, { "epoch": 1.9627369097333762, "grad_norm": 0.43735918402671814, "learning_rate": 2.76951380226934e-06, "loss": 0.3001, "step": 42770 }, { "epoch": 1.9627828002386307, "grad_norm": 0.48054012656211853, "learning_rate": 2.769294365575125e-06, "loss": 0.3722, "step": 42771 }, { "epoch": 1.9628286907438852, "grad_norm": 0.46344009041786194, "learning_rate": 2.7690749342450507e-06, "loss": 0.3657, "step": 42772 }, { "epoch": 1.9628745812491397, "grad_norm": 0.4297401010990143, "learning_rate": 2.7688555082796486e-06, "loss": 0.2913, "step": 42773 }, { "epoch": 1.962920471754394, "grad_norm": 0.4592282474040985, "learning_rate": 2.7686360876794477e-06, "loss": 0.3479, "step": 42774 }, { "epoch": 1.9629663622596485, "grad_norm": 0.4723133146762848, "learning_rate": 2.7684166724449684e-06, "loss": 0.3449, "step": 42775 }, { "epoch": 1.963012252764903, "grad_norm": 0.45876574516296387, "learning_rate": 2.768197262576745e-06, "loss": 0.3234, "step": 42776 }, { "epoch": 1.9630581432701573, "grad_norm": 0.45564863085746765, "learning_rate": 2.7679778580753024e-06, "loss": 0.3264, "step": 42777 }, { "epoch": 1.9631040337754118, "grad_norm": 0.480070024728775, "learning_rate": 2.7677584589411665e-06, "loss": 0.3329, "step": 42778 }, { "epoch": 1.9631499242806663, "grad_norm": 0.5161702036857605, "learning_rate": 2.767539065174869e-06, "loss": 0.476, "step": 42779 }, { "epoch": 1.9631958147859208, "grad_norm": 0.43776044249534607, "learning_rate": 2.7673196767769363e-06, "loss": 0.287, "step": 42780 }, { "epoch": 1.9632417052911753, "grad_norm": 0.5098643898963928, "learning_rate": 2.767100293747892e-06, "loss": 0.4032, "step": 42781 }, { "epoch": 1.9632875957964298, "grad_norm": 0.4734732210636139, "learning_rate": 2.7668809160882692e-06, "loss": 0.3197, "step": 42782 }, { "epoch": 1.9633334863016842, "grad_norm": 0.44803106784820557, "learning_rate": 2.7666615437985933e-06, "loss": 0.3356, "step": 42783 }, { "epoch": 1.9633793768069387, "grad_norm": 0.4644460082054138, "learning_rate": 2.766442176879391e-06, "loss": 0.2991, "step": 42784 }, { "epoch": 1.9634252673121932, "grad_norm": 0.464834600687027, "learning_rate": 2.7662228153311905e-06, "loss": 0.3409, "step": 42785 }, { "epoch": 1.9634711578174475, "grad_norm": 0.41341957449913025, "learning_rate": 2.7660034591545194e-06, "loss": 0.2791, "step": 42786 }, { "epoch": 1.963517048322702, "grad_norm": 0.5017837285995483, "learning_rate": 2.7657841083499028e-06, "loss": 0.3983, "step": 42787 }, { "epoch": 1.9635629388279565, "grad_norm": 0.47702300548553467, "learning_rate": 2.7655647629178717e-06, "loss": 0.3523, "step": 42788 }, { "epoch": 1.9636088293332108, "grad_norm": 0.5161416530609131, "learning_rate": 2.7653454228589526e-06, "loss": 0.411, "step": 42789 }, { "epoch": 1.9636547198384653, "grad_norm": 0.479185551404953, "learning_rate": 2.7651260881736707e-06, "loss": 0.4007, "step": 42790 }, { "epoch": 1.9637006103437198, "grad_norm": 0.5051237344741821, "learning_rate": 2.764906758862557e-06, "loss": 0.3846, "step": 42791 }, { "epoch": 1.9637465008489743, "grad_norm": 0.4717367887496948, "learning_rate": 2.7646874349261377e-06, "loss": 0.3315, "step": 42792 }, { "epoch": 1.9637923913542288, "grad_norm": 0.4566437900066376, "learning_rate": 2.764468116364939e-06, "loss": 0.3276, "step": 42793 }, { "epoch": 1.9638382818594833, "grad_norm": 0.462356835603714, "learning_rate": 2.7642488031794865e-06, "loss": 0.3492, "step": 42794 }, { "epoch": 1.9638841723647378, "grad_norm": 0.4651295244693756, "learning_rate": 2.7640294953703127e-06, "loss": 0.3461, "step": 42795 }, { "epoch": 1.9639300628699923, "grad_norm": 0.4714713990688324, "learning_rate": 2.763810192937945e-06, "loss": 0.3934, "step": 42796 }, { "epoch": 1.9639759533752468, "grad_norm": 0.42005255818367004, "learning_rate": 2.763590895882903e-06, "loss": 0.25, "step": 42797 }, { "epoch": 1.9640218438805013, "grad_norm": 0.48238804936408997, "learning_rate": 2.7633716042057206e-06, "loss": 0.3822, "step": 42798 }, { "epoch": 1.9640677343857555, "grad_norm": 0.47966626286506653, "learning_rate": 2.7631523179069243e-06, "loss": 0.3967, "step": 42799 }, { "epoch": 1.96411362489101, "grad_norm": 0.46954798698425293, "learning_rate": 2.762933036987039e-06, "loss": 0.3814, "step": 42800 }, { "epoch": 1.9641595153962645, "grad_norm": 0.48840081691741943, "learning_rate": 2.762713761446595e-06, "loss": 0.3617, "step": 42801 }, { "epoch": 1.9642054059015188, "grad_norm": 0.48907679319381714, "learning_rate": 2.7624944912861187e-06, "loss": 0.3848, "step": 42802 }, { "epoch": 1.9642512964067733, "grad_norm": 0.4763927459716797, "learning_rate": 2.7622752265061347e-06, "loss": 0.3917, "step": 42803 }, { "epoch": 1.9642971869120278, "grad_norm": 0.5006308555603027, "learning_rate": 2.7620559671071745e-06, "loss": 0.3599, "step": 42804 }, { "epoch": 1.9643430774172823, "grad_norm": 0.467944860458374, "learning_rate": 2.761836713089764e-06, "loss": 0.3595, "step": 42805 }, { "epoch": 1.9643889679225368, "grad_norm": 0.4432183504104614, "learning_rate": 2.761617464454429e-06, "loss": 0.3268, "step": 42806 }, { "epoch": 1.9644348584277913, "grad_norm": 0.49462372064590454, "learning_rate": 2.7613982212016978e-06, "loss": 0.3359, "step": 42807 }, { "epoch": 1.9644807489330458, "grad_norm": 0.5255132913589478, "learning_rate": 2.761178983332098e-06, "loss": 0.4229, "step": 42808 }, { "epoch": 1.9645266394383003, "grad_norm": 0.4943297207355499, "learning_rate": 2.760959750846153e-06, "loss": 0.4114, "step": 42809 }, { "epoch": 1.9645725299435548, "grad_norm": 0.4755267798900604, "learning_rate": 2.760740523744396e-06, "loss": 0.4052, "step": 42810 }, { "epoch": 1.9646184204488093, "grad_norm": 0.4742956757545471, "learning_rate": 2.760521302027351e-06, "loss": 0.3656, "step": 42811 }, { "epoch": 1.9646643109540636, "grad_norm": 0.46458613872528076, "learning_rate": 2.7603020856955444e-06, "loss": 0.3412, "step": 42812 }, { "epoch": 1.964710201459318, "grad_norm": 0.46573612093925476, "learning_rate": 2.760082874749505e-06, "loss": 0.3145, "step": 42813 }, { "epoch": 1.9647560919645726, "grad_norm": 0.5091037750244141, "learning_rate": 2.7598636691897594e-06, "loss": 0.3938, "step": 42814 }, { "epoch": 1.9648019824698268, "grad_norm": 0.48971569538116455, "learning_rate": 2.759644469016834e-06, "loss": 0.3754, "step": 42815 }, { "epoch": 1.9648478729750813, "grad_norm": 0.4829268753528595, "learning_rate": 2.7594252742312576e-06, "loss": 0.3905, "step": 42816 }, { "epoch": 1.9648937634803358, "grad_norm": 0.5252099633216858, "learning_rate": 2.7592060848335565e-06, "loss": 0.4342, "step": 42817 }, { "epoch": 1.9649396539855903, "grad_norm": 0.4242521822452545, "learning_rate": 2.7589869008242576e-06, "loss": 0.3029, "step": 42818 }, { "epoch": 1.9649855444908448, "grad_norm": 0.43491846323013306, "learning_rate": 2.7587677222038878e-06, "loss": 0.2985, "step": 42819 }, { "epoch": 1.9650314349960993, "grad_norm": 0.5011933445930481, "learning_rate": 2.758548548972974e-06, "loss": 0.4069, "step": 42820 }, { "epoch": 1.9650773255013538, "grad_norm": 0.5023173689842224, "learning_rate": 2.7583293811320424e-06, "loss": 0.4158, "step": 42821 }, { "epoch": 1.9651232160066083, "grad_norm": 0.43884092569351196, "learning_rate": 2.758110218681622e-06, "loss": 0.3175, "step": 42822 }, { "epoch": 1.9651691065118628, "grad_norm": 0.49482211470603943, "learning_rate": 2.7578910616222397e-06, "loss": 0.3685, "step": 42823 }, { "epoch": 1.965214997017117, "grad_norm": 0.4836224913597107, "learning_rate": 2.7576719099544193e-06, "loss": 0.4032, "step": 42824 }, { "epoch": 1.9652608875223716, "grad_norm": 0.6609395146369934, "learning_rate": 2.757452763678693e-06, "loss": 0.4138, "step": 42825 }, { "epoch": 1.965306778027626, "grad_norm": 0.45624494552612305, "learning_rate": 2.7572336227955844e-06, "loss": 0.3417, "step": 42826 }, { "epoch": 1.9653526685328806, "grad_norm": 0.4524540901184082, "learning_rate": 2.7570144873056215e-06, "loss": 0.3471, "step": 42827 }, { "epoch": 1.9653985590381349, "grad_norm": 0.4653138518333435, "learning_rate": 2.7567953572093302e-06, "loss": 0.3496, "step": 42828 }, { "epoch": 1.9654444495433894, "grad_norm": 0.4477055072784424, "learning_rate": 2.7565762325072387e-06, "loss": 0.3338, "step": 42829 }, { "epoch": 1.9654903400486439, "grad_norm": 0.4896380305290222, "learning_rate": 2.7563571131998733e-06, "loss": 0.3647, "step": 42830 }, { "epoch": 1.9655362305538984, "grad_norm": 0.45300671458244324, "learning_rate": 2.7561379992877583e-06, "loss": 0.3167, "step": 42831 }, { "epoch": 1.9655821210591529, "grad_norm": 0.46967750787734985, "learning_rate": 2.7559188907714262e-06, "loss": 0.364, "step": 42832 }, { "epoch": 1.9656280115644074, "grad_norm": 0.4484383761882782, "learning_rate": 2.7556997876514004e-06, "loss": 0.3341, "step": 42833 }, { "epoch": 1.9656739020696619, "grad_norm": 0.4530675411224365, "learning_rate": 2.755480689928206e-06, "loss": 0.3555, "step": 42834 }, { "epoch": 1.9657197925749164, "grad_norm": 0.5037322044372559, "learning_rate": 2.7552615976023743e-06, "loss": 0.3818, "step": 42835 }, { "epoch": 1.9657656830801709, "grad_norm": 0.4686565101146698, "learning_rate": 2.7550425106744306e-06, "loss": 0.3738, "step": 42836 }, { "epoch": 1.9658115735854251, "grad_norm": 0.4718894958496094, "learning_rate": 2.754823429144898e-06, "loss": 0.3495, "step": 42837 }, { "epoch": 1.9658574640906796, "grad_norm": 0.46795952320098877, "learning_rate": 2.7546043530143096e-06, "loss": 0.3598, "step": 42838 }, { "epoch": 1.9659033545959341, "grad_norm": 0.4499022364616394, "learning_rate": 2.7543852822831885e-06, "loss": 0.2695, "step": 42839 }, { "epoch": 1.9659492451011884, "grad_norm": 0.42825740575790405, "learning_rate": 2.754166216952062e-06, "loss": 0.2969, "step": 42840 }, { "epoch": 1.965995135606443, "grad_norm": 0.485538125038147, "learning_rate": 2.753947157021457e-06, "loss": 0.384, "step": 42841 }, { "epoch": 1.9660410261116974, "grad_norm": 0.5101372599601746, "learning_rate": 2.7537281024919005e-06, "loss": 0.3958, "step": 42842 }, { "epoch": 1.966086916616952, "grad_norm": 0.46135130524635315, "learning_rate": 2.753509053363917e-06, "loss": 0.3506, "step": 42843 }, { "epoch": 1.9661328071222064, "grad_norm": 0.49424290657043457, "learning_rate": 2.7532900096380373e-06, "loss": 0.3897, "step": 42844 }, { "epoch": 1.966178697627461, "grad_norm": 0.4105677306652069, "learning_rate": 2.753070971314785e-06, "loss": 0.271, "step": 42845 }, { "epoch": 1.9662245881327154, "grad_norm": 0.4613577127456665, "learning_rate": 2.752851938394687e-06, "loss": 0.3429, "step": 42846 }, { "epoch": 1.9662704786379699, "grad_norm": 0.45415017008781433, "learning_rate": 2.7526329108782722e-06, "loss": 0.3163, "step": 42847 }, { "epoch": 1.9663163691432244, "grad_norm": 0.47726237773895264, "learning_rate": 2.752413888766067e-06, "loss": 0.3894, "step": 42848 }, { "epoch": 1.9663622596484789, "grad_norm": 0.4523850381374359, "learning_rate": 2.7521948720585957e-06, "loss": 0.3084, "step": 42849 }, { "epoch": 1.9664081501537332, "grad_norm": 0.4916176497936249, "learning_rate": 2.7519758607563875e-06, "loss": 0.4355, "step": 42850 }, { "epoch": 1.9664540406589877, "grad_norm": 0.4534570276737213, "learning_rate": 2.751756854859967e-06, "loss": 0.3532, "step": 42851 }, { "epoch": 1.9664999311642422, "grad_norm": 0.4663563370704651, "learning_rate": 2.75153785436986e-06, "loss": 0.3324, "step": 42852 }, { "epoch": 1.9665458216694964, "grad_norm": 0.5541428923606873, "learning_rate": 2.7513188592865963e-06, "loss": 0.3516, "step": 42853 }, { "epoch": 1.966591712174751, "grad_norm": 0.4353538751602173, "learning_rate": 2.7510998696107015e-06, "loss": 0.303, "step": 42854 }, { "epoch": 1.9666376026800054, "grad_norm": 0.44753772020339966, "learning_rate": 2.7508808853427e-06, "loss": 0.3383, "step": 42855 }, { "epoch": 1.96668349318526, "grad_norm": 0.4872537851333618, "learning_rate": 2.750661906483122e-06, "loss": 0.3553, "step": 42856 }, { "epoch": 1.9667293836905144, "grad_norm": 0.4808294475078583, "learning_rate": 2.7504429330324923e-06, "loss": 0.3567, "step": 42857 }, { "epoch": 1.966775274195769, "grad_norm": 0.43983739614486694, "learning_rate": 2.7502239649913365e-06, "loss": 0.356, "step": 42858 }, { "epoch": 1.9668211647010234, "grad_norm": 0.4941234588623047, "learning_rate": 2.750005002360181e-06, "loss": 0.3812, "step": 42859 }, { "epoch": 1.966867055206278, "grad_norm": 0.5301010012626648, "learning_rate": 2.7497860451395555e-06, "loss": 0.4144, "step": 42860 }, { "epoch": 1.9669129457115324, "grad_norm": 0.4641619324684143, "learning_rate": 2.7495670933299836e-06, "loss": 0.3789, "step": 42861 }, { "epoch": 1.966958836216787, "grad_norm": 0.4496115446090698, "learning_rate": 2.7493481469319925e-06, "loss": 0.314, "step": 42862 }, { "epoch": 1.9670047267220412, "grad_norm": 0.5010966062545776, "learning_rate": 2.7491292059461096e-06, "loss": 0.3819, "step": 42863 }, { "epoch": 1.9670506172272957, "grad_norm": 0.5159322023391724, "learning_rate": 2.74891027037286e-06, "loss": 0.4192, "step": 42864 }, { "epoch": 1.9670965077325502, "grad_norm": 0.4328669607639313, "learning_rate": 2.7486913402127687e-06, "loss": 0.2913, "step": 42865 }, { "epoch": 1.9671423982378045, "grad_norm": 0.49574124813079834, "learning_rate": 2.748472415466366e-06, "loss": 0.3802, "step": 42866 }, { "epoch": 1.967188288743059, "grad_norm": 0.4546925127506256, "learning_rate": 2.7482534961341767e-06, "loss": 0.3374, "step": 42867 }, { "epoch": 1.9672341792483135, "grad_norm": 0.46523427963256836, "learning_rate": 2.7480345822167253e-06, "loss": 0.3511, "step": 42868 }, { "epoch": 1.967280069753568, "grad_norm": 0.45414188504219055, "learning_rate": 2.7478156737145424e-06, "loss": 0.3448, "step": 42869 }, { "epoch": 1.9673259602588224, "grad_norm": 0.5064060091972351, "learning_rate": 2.7475967706281513e-06, "loss": 0.4747, "step": 42870 }, { "epoch": 1.967371850764077, "grad_norm": 0.46521270275115967, "learning_rate": 2.7473778729580793e-06, "loss": 0.3202, "step": 42871 }, { "epoch": 1.9674177412693314, "grad_norm": 0.4995707869529724, "learning_rate": 2.7471589807048525e-06, "loss": 0.452, "step": 42872 }, { "epoch": 1.967463631774586, "grad_norm": 0.46405601501464844, "learning_rate": 2.7469400938689973e-06, "loss": 0.3924, "step": 42873 }, { "epoch": 1.9675095222798404, "grad_norm": 0.4776924252510071, "learning_rate": 2.7467212124510385e-06, "loss": 0.3277, "step": 42874 }, { "epoch": 1.9675554127850947, "grad_norm": 0.43833082914352417, "learning_rate": 2.7465023364515053e-06, "loss": 0.2984, "step": 42875 }, { "epoch": 1.9676013032903492, "grad_norm": 0.46614187955856323, "learning_rate": 2.746283465870924e-06, "loss": 0.3685, "step": 42876 }, { "epoch": 1.9676471937956037, "grad_norm": 0.46277499198913574, "learning_rate": 2.7460646007098163e-06, "loss": 0.3438, "step": 42877 }, { "epoch": 1.967693084300858, "grad_norm": 0.5671554207801819, "learning_rate": 2.7458457409687147e-06, "loss": 0.3679, "step": 42878 }, { "epoch": 1.9677389748061125, "grad_norm": 0.46522852778434753, "learning_rate": 2.7456268866481423e-06, "loss": 0.3298, "step": 42879 }, { "epoch": 1.967784865311367, "grad_norm": 0.470097154378891, "learning_rate": 2.7454080377486236e-06, "loss": 0.3321, "step": 42880 }, { "epoch": 1.9678307558166215, "grad_norm": 0.4826377034187317, "learning_rate": 2.7451891942706893e-06, "loss": 0.3966, "step": 42881 }, { "epoch": 1.967876646321876, "grad_norm": 0.4593963623046875, "learning_rate": 2.7449703562148635e-06, "loss": 0.3086, "step": 42882 }, { "epoch": 1.9679225368271305, "grad_norm": 0.46453648805618286, "learning_rate": 2.744751523581672e-06, "loss": 0.3349, "step": 42883 }, { "epoch": 1.967968427332385, "grad_norm": 0.6076543927192688, "learning_rate": 2.7445326963716414e-06, "loss": 0.3173, "step": 42884 }, { "epoch": 1.9680143178376395, "grad_norm": 0.44654417037963867, "learning_rate": 2.7443138745852975e-06, "loss": 0.309, "step": 42885 }, { "epoch": 1.968060208342894, "grad_norm": 0.4610488712787628, "learning_rate": 2.744095058223165e-06, "loss": 0.3278, "step": 42886 }, { "epoch": 1.9681060988481485, "grad_norm": 0.48547354340553284, "learning_rate": 2.7438762472857745e-06, "loss": 0.3664, "step": 42887 }, { "epoch": 1.9681519893534027, "grad_norm": 0.47164011001586914, "learning_rate": 2.7436574417736487e-06, "loss": 0.3358, "step": 42888 }, { "epoch": 1.9681978798586572, "grad_norm": 0.47939860820770264, "learning_rate": 2.743438641687313e-06, "loss": 0.3181, "step": 42889 }, { "epoch": 1.9682437703639117, "grad_norm": 0.47419288754463196, "learning_rate": 2.7432198470272977e-06, "loss": 0.3313, "step": 42890 }, { "epoch": 1.968289660869166, "grad_norm": 0.47287169098854065, "learning_rate": 2.7430010577941257e-06, "loss": 0.3234, "step": 42891 }, { "epoch": 1.9683355513744205, "grad_norm": 0.4942079484462738, "learning_rate": 2.7427822739883237e-06, "loss": 0.3512, "step": 42892 }, { "epoch": 1.968381441879675, "grad_norm": 0.43587690591812134, "learning_rate": 2.7425634956104186e-06, "loss": 0.3047, "step": 42893 }, { "epoch": 1.9684273323849295, "grad_norm": 0.45775818824768066, "learning_rate": 2.742344722660936e-06, "loss": 0.3442, "step": 42894 }, { "epoch": 1.968473222890184, "grad_norm": 0.4763652980327606, "learning_rate": 2.7421259551404006e-06, "loss": 0.3419, "step": 42895 }, { "epoch": 1.9685191133954385, "grad_norm": 0.47356292605400085, "learning_rate": 2.7419071930493385e-06, "loss": 0.3449, "step": 42896 }, { "epoch": 1.968565003900693, "grad_norm": 0.4498305022716522, "learning_rate": 2.741688436388279e-06, "loss": 0.3236, "step": 42897 }, { "epoch": 1.9686108944059475, "grad_norm": 0.4654752314090729, "learning_rate": 2.7414696851577455e-06, "loss": 0.3656, "step": 42898 }, { "epoch": 1.968656784911202, "grad_norm": 0.43499255180358887, "learning_rate": 2.741250939358263e-06, "loss": 0.3197, "step": 42899 }, { "epoch": 1.9687026754164565, "grad_norm": 0.438260942697525, "learning_rate": 2.741032198990361e-06, "loss": 0.2696, "step": 42900 }, { "epoch": 1.9687485659217108, "grad_norm": 0.4765600562095642, "learning_rate": 2.7408134640545635e-06, "loss": 0.3606, "step": 42901 }, { "epoch": 1.9687944564269653, "grad_norm": 0.5291645526885986, "learning_rate": 2.740594734551395e-06, "loss": 0.4426, "step": 42902 }, { "epoch": 1.9688403469322198, "grad_norm": 0.47119608521461487, "learning_rate": 2.740376010481385e-06, "loss": 0.3586, "step": 42903 }, { "epoch": 1.968886237437474, "grad_norm": 0.49991872906684875, "learning_rate": 2.740157291845057e-06, "loss": 0.4441, "step": 42904 }, { "epoch": 1.9689321279427285, "grad_norm": 0.4392249882221222, "learning_rate": 2.7399385786429376e-06, "loss": 0.3302, "step": 42905 }, { "epoch": 1.968978018447983, "grad_norm": 0.4530896544456482, "learning_rate": 2.7397198708755535e-06, "loss": 0.3295, "step": 42906 }, { "epoch": 1.9690239089532375, "grad_norm": 0.42862504720687866, "learning_rate": 2.739501168543429e-06, "loss": 0.2902, "step": 42907 }, { "epoch": 1.969069799458492, "grad_norm": 0.4913938045501709, "learning_rate": 2.739282471647089e-06, "loss": 0.3829, "step": 42908 }, { "epoch": 1.9691156899637465, "grad_norm": 0.45172184705734253, "learning_rate": 2.739063780187063e-06, "loss": 0.3301, "step": 42909 }, { "epoch": 1.969161580469001, "grad_norm": 0.4399804472923279, "learning_rate": 2.738845094163875e-06, "loss": 0.3239, "step": 42910 }, { "epoch": 1.9692074709742555, "grad_norm": 0.43593159317970276, "learning_rate": 2.7386264135780487e-06, "loss": 0.3192, "step": 42911 }, { "epoch": 1.96925336147951, "grad_norm": 0.47051000595092773, "learning_rate": 2.7384077384301142e-06, "loss": 0.3845, "step": 42912 }, { "epoch": 1.9692992519847643, "grad_norm": 0.5013925433158875, "learning_rate": 2.738189068720596e-06, "loss": 0.3423, "step": 42913 }, { "epoch": 1.9693451424900188, "grad_norm": 0.4304320514202118, "learning_rate": 2.737970404450019e-06, "loss": 0.274, "step": 42914 }, { "epoch": 1.9693910329952733, "grad_norm": 0.437936007976532, "learning_rate": 2.737751745618909e-06, "loss": 0.3231, "step": 42915 }, { "epoch": 1.9694369235005278, "grad_norm": 0.48506858944892883, "learning_rate": 2.737533092227792e-06, "loss": 0.3503, "step": 42916 }, { "epoch": 1.969482814005782, "grad_norm": 0.474704772233963, "learning_rate": 2.737314444277192e-06, "loss": 0.4048, "step": 42917 }, { "epoch": 1.9695287045110366, "grad_norm": 0.4491488039493561, "learning_rate": 2.7370958017676384e-06, "loss": 0.3358, "step": 42918 }, { "epoch": 1.969574595016291, "grad_norm": 0.4897958040237427, "learning_rate": 2.7368771646996557e-06, "loss": 0.3503, "step": 42919 }, { "epoch": 1.9696204855215456, "grad_norm": 0.49196600914001465, "learning_rate": 2.736658533073767e-06, "loss": 0.3963, "step": 42920 }, { "epoch": 1.9696663760268, "grad_norm": 0.4512460231781006, "learning_rate": 2.736439906890502e-06, "loss": 0.3083, "step": 42921 }, { "epoch": 1.9697122665320546, "grad_norm": 0.4670305848121643, "learning_rate": 2.7362212861503844e-06, "loss": 0.392, "step": 42922 }, { "epoch": 1.969758157037309, "grad_norm": 0.47963711619377136, "learning_rate": 2.7360026708539404e-06, "loss": 0.3647, "step": 42923 }, { "epoch": 1.9698040475425636, "grad_norm": 0.4632793068885803, "learning_rate": 2.735784061001694e-06, "loss": 0.2974, "step": 42924 }, { "epoch": 1.969849938047818, "grad_norm": 0.5326076745986938, "learning_rate": 2.7355654565941735e-06, "loss": 0.426, "step": 42925 }, { "epoch": 1.9698958285530723, "grad_norm": 0.48221635818481445, "learning_rate": 2.735346857631904e-06, "loss": 0.3606, "step": 42926 }, { "epoch": 1.9699417190583268, "grad_norm": 0.46529513597488403, "learning_rate": 2.73512826411541e-06, "loss": 0.3289, "step": 42927 }, { "epoch": 1.9699876095635813, "grad_norm": 0.5107858777046204, "learning_rate": 2.734909676045218e-06, "loss": 0.3612, "step": 42928 }, { "epoch": 1.9700335000688356, "grad_norm": 0.4577659070491791, "learning_rate": 2.734691093421853e-06, "loss": 0.3398, "step": 42929 }, { "epoch": 1.97007939057409, "grad_norm": 0.4828402101993561, "learning_rate": 2.7344725162458395e-06, "loss": 0.3761, "step": 42930 }, { "epoch": 1.9701252810793446, "grad_norm": 0.5176158547401428, "learning_rate": 2.734253944517706e-06, "loss": 0.4263, "step": 42931 }, { "epoch": 1.970171171584599, "grad_norm": 0.47431305050849915, "learning_rate": 2.7340353782379775e-06, "loss": 0.3715, "step": 42932 }, { "epoch": 1.9702170620898536, "grad_norm": 0.46877244114875793, "learning_rate": 2.7338168174071767e-06, "loss": 0.3507, "step": 42933 }, { "epoch": 1.970262952595108, "grad_norm": 0.46306392550468445, "learning_rate": 2.7335982620258326e-06, "loss": 0.3462, "step": 42934 }, { "epoch": 1.9703088431003626, "grad_norm": 0.47579166293144226, "learning_rate": 2.733379712094469e-06, "loss": 0.352, "step": 42935 }, { "epoch": 1.970354733605617, "grad_norm": 0.4748784601688385, "learning_rate": 2.733161167613613e-06, "loss": 0.3949, "step": 42936 }, { "epoch": 1.9704006241108716, "grad_norm": 0.45802444219589233, "learning_rate": 2.732942628583788e-06, "loss": 0.3198, "step": 42937 }, { "epoch": 1.970446514616126, "grad_norm": 0.4631478190422058, "learning_rate": 2.7327240950055213e-06, "loss": 0.3727, "step": 42938 }, { "epoch": 1.9704924051213804, "grad_norm": 0.48245882987976074, "learning_rate": 2.7325055668793347e-06, "loss": 0.3832, "step": 42939 }, { "epoch": 1.9705382956266349, "grad_norm": 0.46733686327934265, "learning_rate": 2.7322870442057592e-06, "loss": 0.3511, "step": 42940 }, { "epoch": 1.9705841861318893, "grad_norm": 0.45125386118888855, "learning_rate": 2.7320685269853175e-06, "loss": 0.3365, "step": 42941 }, { "epoch": 1.9706300766371436, "grad_norm": 0.44264450669288635, "learning_rate": 2.7318500152185333e-06, "loss": 0.3036, "step": 42942 }, { "epoch": 1.9706759671423981, "grad_norm": 0.4715102016925812, "learning_rate": 2.731631508905936e-06, "loss": 0.3491, "step": 42943 }, { "epoch": 1.9707218576476526, "grad_norm": 0.4583599865436554, "learning_rate": 2.7314130080480483e-06, "loss": 0.3461, "step": 42944 }, { "epoch": 1.9707677481529071, "grad_norm": 0.47679781913757324, "learning_rate": 2.7311945126453953e-06, "loss": 0.3365, "step": 42945 }, { "epoch": 1.9708136386581616, "grad_norm": 0.4098743796348572, "learning_rate": 2.730976022698505e-06, "loss": 0.2691, "step": 42946 }, { "epoch": 1.9708595291634161, "grad_norm": 0.4181537628173828, "learning_rate": 2.730757538207901e-06, "loss": 0.2778, "step": 42947 }, { "epoch": 1.9709054196686706, "grad_norm": 0.39751628041267395, "learning_rate": 2.730539059174109e-06, "loss": 0.2417, "step": 42948 }, { "epoch": 1.970951310173925, "grad_norm": 0.48492783308029175, "learning_rate": 2.7303205855976545e-06, "loss": 0.4413, "step": 42949 }, { "epoch": 1.9709972006791796, "grad_norm": 0.47156772017478943, "learning_rate": 2.730102117479063e-06, "loss": 0.3594, "step": 42950 }, { "epoch": 1.971043091184434, "grad_norm": 0.4805081784725189, "learning_rate": 2.729883654818857e-06, "loss": 0.3463, "step": 42951 }, { "epoch": 1.9710889816896884, "grad_norm": 0.4399474561214447, "learning_rate": 2.729665197617567e-06, "loss": 0.3554, "step": 42952 }, { "epoch": 1.9711348721949429, "grad_norm": 0.48627540469169617, "learning_rate": 2.7294467458757145e-06, "loss": 0.3887, "step": 42953 }, { "epoch": 1.9711807627001974, "grad_norm": 0.48422372341156006, "learning_rate": 2.7292282995938246e-06, "loss": 0.3976, "step": 42954 }, { "epoch": 1.9712266532054517, "grad_norm": 0.492933064699173, "learning_rate": 2.7290098587724263e-06, "loss": 0.3084, "step": 42955 }, { "epoch": 1.9712725437107061, "grad_norm": 0.48610132932662964, "learning_rate": 2.728791423412042e-06, "loss": 0.3933, "step": 42956 }, { "epoch": 1.9713184342159606, "grad_norm": 0.4766220152378082, "learning_rate": 2.7285729935131976e-06, "loss": 0.3733, "step": 42957 }, { "epoch": 1.9713643247212151, "grad_norm": 0.46368440985679626, "learning_rate": 2.728354569076419e-06, "loss": 0.3816, "step": 42958 }, { "epoch": 1.9714102152264696, "grad_norm": 0.4775179922580719, "learning_rate": 2.7281361501022295e-06, "loss": 0.37, "step": 42959 }, { "epoch": 1.9714561057317241, "grad_norm": 0.4159829616546631, "learning_rate": 2.7279177365911567e-06, "loss": 0.2667, "step": 42960 }, { "epoch": 1.9715019962369786, "grad_norm": 0.47893813252449036, "learning_rate": 2.727699328543722e-06, "loss": 0.3197, "step": 42961 }, { "epoch": 1.9715478867422331, "grad_norm": 0.43894073367118835, "learning_rate": 2.727480925960455e-06, "loss": 0.3245, "step": 42962 }, { "epoch": 1.9715937772474876, "grad_norm": 0.47644418478012085, "learning_rate": 2.7272625288418797e-06, "loss": 0.3621, "step": 42963 }, { "epoch": 1.971639667752742, "grad_norm": 0.4449101984500885, "learning_rate": 2.727044137188519e-06, "loss": 0.2853, "step": 42964 }, { "epoch": 1.9716855582579964, "grad_norm": 0.4864620566368103, "learning_rate": 2.726825751000901e-06, "loss": 0.3847, "step": 42965 }, { "epoch": 1.971731448763251, "grad_norm": 0.4494321942329407, "learning_rate": 2.72660737027955e-06, "loss": 0.283, "step": 42966 }, { "epoch": 1.9717773392685052, "grad_norm": 0.43498343229293823, "learning_rate": 2.726388995024989e-06, "loss": 0.2821, "step": 42967 }, { "epoch": 1.9718232297737597, "grad_norm": 0.48384878039360046, "learning_rate": 2.7261706252377464e-06, "loss": 0.3454, "step": 42968 }, { "epoch": 1.9718691202790142, "grad_norm": 0.479285329580307, "learning_rate": 2.7259522609183458e-06, "loss": 0.4084, "step": 42969 }, { "epoch": 1.9719150107842687, "grad_norm": 0.4740636944770813, "learning_rate": 2.7257339020673124e-06, "loss": 0.3324, "step": 42970 }, { "epoch": 1.9719609012895232, "grad_norm": 0.5093536972999573, "learning_rate": 2.7255155486851714e-06, "loss": 0.4067, "step": 42971 }, { "epoch": 1.9720067917947777, "grad_norm": 0.4903455376625061, "learning_rate": 2.7252972007724474e-06, "loss": 0.3337, "step": 42972 }, { "epoch": 1.9720526823000322, "grad_norm": 0.47710734605789185, "learning_rate": 2.725078858329664e-06, "loss": 0.37, "step": 42973 }, { "epoch": 1.9720985728052867, "grad_norm": 0.5705099701881409, "learning_rate": 2.72486052135735e-06, "loss": 0.3685, "step": 42974 }, { "epoch": 1.9721444633105412, "grad_norm": 0.4722226858139038, "learning_rate": 2.724642189856028e-06, "loss": 0.3603, "step": 42975 }, { "epoch": 1.9721903538157957, "grad_norm": 0.4722960889339447, "learning_rate": 2.724423863826222e-06, "loss": 0.3548, "step": 42976 }, { "epoch": 1.97223624432105, "grad_norm": 0.42023125290870667, "learning_rate": 2.72420554326846e-06, "loss": 0.3054, "step": 42977 }, { "epoch": 1.9722821348263044, "grad_norm": 0.4182262122631073, "learning_rate": 2.7239872281832657e-06, "loss": 0.2749, "step": 42978 }, { "epoch": 1.972328025331559, "grad_norm": 0.4726836085319519, "learning_rate": 2.7237689185711634e-06, "loss": 0.326, "step": 42979 }, { "epoch": 1.9723739158368132, "grad_norm": 0.4718392491340637, "learning_rate": 2.7235506144326783e-06, "loss": 0.3154, "step": 42980 }, { "epoch": 1.9724198063420677, "grad_norm": 0.4731011688709259, "learning_rate": 2.7233323157683362e-06, "loss": 0.3356, "step": 42981 }, { "epoch": 1.9724656968473222, "grad_norm": 0.44952255487442017, "learning_rate": 2.723114022578659e-06, "loss": 0.3298, "step": 42982 }, { "epoch": 1.9725115873525767, "grad_norm": 0.45575496554374695, "learning_rate": 2.722895734864176e-06, "loss": 0.3662, "step": 42983 }, { "epoch": 1.9725574778578312, "grad_norm": 0.48486408591270447, "learning_rate": 2.7226774526254095e-06, "loss": 0.3429, "step": 42984 }, { "epoch": 1.9726033683630857, "grad_norm": 0.474436491727829, "learning_rate": 2.722459175862884e-06, "loss": 0.367, "step": 42985 }, { "epoch": 1.9726492588683402, "grad_norm": 0.4444190561771393, "learning_rate": 2.7222409045771264e-06, "loss": 0.2982, "step": 42986 }, { "epoch": 1.9726951493735947, "grad_norm": 0.4446990191936493, "learning_rate": 2.722022638768661e-06, "loss": 0.3114, "step": 42987 }, { "epoch": 1.9727410398788492, "grad_norm": 0.5016521215438843, "learning_rate": 2.721804378438012e-06, "loss": 0.441, "step": 42988 }, { "epoch": 1.9727869303841037, "grad_norm": 0.5104778409004211, "learning_rate": 2.7215861235857035e-06, "loss": 0.4052, "step": 42989 }, { "epoch": 1.972832820889358, "grad_norm": 0.5073155164718628, "learning_rate": 2.721367874212262e-06, "loss": 0.3933, "step": 42990 }, { "epoch": 1.9728787113946125, "grad_norm": 0.49366697669029236, "learning_rate": 2.7211496303182116e-06, "loss": 0.4149, "step": 42991 }, { "epoch": 1.972924601899867, "grad_norm": 0.4453880488872528, "learning_rate": 2.720931391904078e-06, "loss": 0.3166, "step": 42992 }, { "epoch": 1.9729704924051212, "grad_norm": 0.4908974766731262, "learning_rate": 2.7207131589703844e-06, "loss": 0.3363, "step": 42993 }, { "epoch": 1.9730163829103757, "grad_norm": 0.4505852460861206, "learning_rate": 2.7204949315176565e-06, "loss": 0.3236, "step": 42994 }, { "epoch": 1.9730622734156302, "grad_norm": 0.49948233366012573, "learning_rate": 2.7202767095464173e-06, "loss": 0.3717, "step": 42995 }, { "epoch": 1.9731081639208847, "grad_norm": 0.5156455636024475, "learning_rate": 2.7200584930571947e-06, "loss": 0.4265, "step": 42996 }, { "epoch": 1.9731540544261392, "grad_norm": 0.4576736092567444, "learning_rate": 2.719840282050512e-06, "loss": 0.3472, "step": 42997 }, { "epoch": 1.9731999449313937, "grad_norm": 0.46018362045288086, "learning_rate": 2.719622076526891e-06, "loss": 0.3723, "step": 42998 }, { "epoch": 1.9732458354366482, "grad_norm": 0.463645339012146, "learning_rate": 2.7194038764868625e-06, "loss": 0.3167, "step": 42999 }, { "epoch": 1.9732917259419027, "grad_norm": 0.4825528562068939, "learning_rate": 2.719185681930947e-06, "loss": 0.3627, "step": 43000 }, { "epoch": 1.9733376164471572, "grad_norm": 0.48279833793640137, "learning_rate": 2.7189674928596706e-06, "loss": 0.3464, "step": 43001 }, { "epoch": 1.9733835069524115, "grad_norm": 0.4674633741378784, "learning_rate": 2.7187493092735573e-06, "loss": 0.3635, "step": 43002 }, { "epoch": 1.973429397457666, "grad_norm": 0.4723011553287506, "learning_rate": 2.7185311311731317e-06, "loss": 0.3763, "step": 43003 }, { "epoch": 1.9734752879629205, "grad_norm": 0.4689598083496094, "learning_rate": 2.7183129585589165e-06, "loss": 0.3596, "step": 43004 }, { "epoch": 1.973521178468175, "grad_norm": 0.47768914699554443, "learning_rate": 2.718094791431441e-06, "loss": 0.3544, "step": 43005 }, { "epoch": 1.9735670689734293, "grad_norm": 0.48436489701271057, "learning_rate": 2.7178766297912275e-06, "loss": 0.4014, "step": 43006 }, { "epoch": 1.9736129594786838, "grad_norm": 0.45127004384994507, "learning_rate": 2.717658473638798e-06, "loss": 0.3176, "step": 43007 }, { "epoch": 1.9736588499839383, "grad_norm": 0.4875739514827728, "learning_rate": 2.7174403229746816e-06, "loss": 0.3697, "step": 43008 }, { "epoch": 1.9737047404891928, "grad_norm": 0.4774787127971649, "learning_rate": 2.7172221777994008e-06, "loss": 0.3856, "step": 43009 }, { "epoch": 1.9737506309944473, "grad_norm": 0.5168051719665527, "learning_rate": 2.7170040381134783e-06, "loss": 0.4139, "step": 43010 }, { "epoch": 1.9737965214997018, "grad_norm": 0.4452592432498932, "learning_rate": 2.7167859039174428e-06, "loss": 0.3949, "step": 43011 }, { "epoch": 1.9738424120049562, "grad_norm": 0.48069578409194946, "learning_rate": 2.716567775211816e-06, "loss": 0.3517, "step": 43012 }, { "epoch": 1.9738883025102107, "grad_norm": 0.43767228722572327, "learning_rate": 2.7163496519971233e-06, "loss": 0.333, "step": 43013 }, { "epoch": 1.9739341930154652, "grad_norm": 0.4627382159233093, "learning_rate": 2.7161315342738893e-06, "loss": 0.3437, "step": 43014 }, { "epoch": 1.9739800835207195, "grad_norm": 0.47605419158935547, "learning_rate": 2.7159134220426374e-06, "loss": 0.3661, "step": 43015 }, { "epoch": 1.974025974025974, "grad_norm": 0.46920841932296753, "learning_rate": 2.715695315303891e-06, "loss": 0.3251, "step": 43016 }, { "epoch": 1.9740718645312285, "grad_norm": 0.4719802141189575, "learning_rate": 2.715477214058179e-06, "loss": 0.3757, "step": 43017 }, { "epoch": 1.9741177550364828, "grad_norm": 0.4033859968185425, "learning_rate": 2.7152591183060226e-06, "loss": 0.2626, "step": 43018 }, { "epoch": 1.9741636455417373, "grad_norm": 0.45449164509773254, "learning_rate": 2.7150410280479463e-06, "loss": 0.3578, "step": 43019 }, { "epoch": 1.9742095360469918, "grad_norm": 0.46683114767074585, "learning_rate": 2.7148229432844757e-06, "loss": 0.3584, "step": 43020 }, { "epoch": 1.9742554265522463, "grad_norm": 0.43749991059303284, "learning_rate": 2.7146048640161356e-06, "loss": 0.3088, "step": 43021 }, { "epoch": 1.9743013170575008, "grad_norm": 0.45934537053108215, "learning_rate": 2.714386790243449e-06, "loss": 0.3608, "step": 43022 }, { "epoch": 1.9743472075627553, "grad_norm": 0.44710272550582886, "learning_rate": 2.714168721966941e-06, "loss": 0.3077, "step": 43023 }, { "epoch": 1.9743930980680098, "grad_norm": 0.4379536509513855, "learning_rate": 2.7139506591871354e-06, "loss": 0.2865, "step": 43024 }, { "epoch": 1.9744389885732643, "grad_norm": 0.47337642312049866, "learning_rate": 2.7137326019045574e-06, "loss": 0.3406, "step": 43025 }, { "epoch": 1.9744848790785188, "grad_norm": 0.45020273327827454, "learning_rate": 2.713514550119729e-06, "loss": 0.3261, "step": 43026 }, { "epoch": 1.9745307695837733, "grad_norm": 0.523016631603241, "learning_rate": 2.713296503833178e-06, "loss": 0.4053, "step": 43027 }, { "epoch": 1.9745766600890275, "grad_norm": 0.49811258912086487, "learning_rate": 2.7130784630454275e-06, "loss": 0.3577, "step": 43028 }, { "epoch": 1.974622550594282, "grad_norm": 0.4756976366043091, "learning_rate": 2.7128604277569997e-06, "loss": 0.3617, "step": 43029 }, { "epoch": 1.9746684410995365, "grad_norm": 0.4516562223434448, "learning_rate": 2.7126423979684225e-06, "loss": 0.3506, "step": 43030 }, { "epoch": 1.9747143316047908, "grad_norm": 0.4506928324699402, "learning_rate": 2.7124243736802186e-06, "loss": 0.3374, "step": 43031 }, { "epoch": 1.9747602221100453, "grad_norm": 0.45910584926605225, "learning_rate": 2.71220635489291e-06, "loss": 0.3467, "step": 43032 }, { "epoch": 1.9748061126152998, "grad_norm": 0.455471932888031, "learning_rate": 2.711988341607025e-06, "loss": 0.3224, "step": 43033 }, { "epoch": 1.9748520031205543, "grad_norm": 0.5064855217933655, "learning_rate": 2.711770333823086e-06, "loss": 0.4672, "step": 43034 }, { "epoch": 1.9748978936258088, "grad_norm": 0.48381954431533813, "learning_rate": 2.711552331541617e-06, "loss": 0.3946, "step": 43035 }, { "epoch": 1.9749437841310633, "grad_norm": 0.4522872865200043, "learning_rate": 2.711334334763143e-06, "loss": 0.3506, "step": 43036 }, { "epoch": 1.9749896746363178, "grad_norm": 0.480379194021225, "learning_rate": 2.7111163434881864e-06, "loss": 0.3453, "step": 43037 }, { "epoch": 1.9750355651415723, "grad_norm": 0.4447477459907532, "learning_rate": 2.7108983577172713e-06, "loss": 0.3484, "step": 43038 }, { "epoch": 1.9750814556468268, "grad_norm": 0.4796711206436157, "learning_rate": 2.7106803774509256e-06, "loss": 0.4088, "step": 43039 }, { "epoch": 1.9751273461520813, "grad_norm": 0.4892890453338623, "learning_rate": 2.710462402689671e-06, "loss": 0.3471, "step": 43040 }, { "epoch": 1.9751732366573356, "grad_norm": 0.4526127576828003, "learning_rate": 2.710244433434029e-06, "loss": 0.3097, "step": 43041 }, { "epoch": 1.97521912716259, "grad_norm": 0.48445919156074524, "learning_rate": 2.710026469684529e-06, "loss": 0.3915, "step": 43042 }, { "epoch": 1.9752650176678446, "grad_norm": 0.4925227463245392, "learning_rate": 2.709808511441692e-06, "loss": 0.3689, "step": 43043 }, { "epoch": 1.9753109081730988, "grad_norm": 0.45780596137046814, "learning_rate": 2.709590558706043e-06, "loss": 0.3546, "step": 43044 }, { "epoch": 1.9753567986783533, "grad_norm": 0.44601908326148987, "learning_rate": 2.709372611478106e-06, "loss": 0.3202, "step": 43045 }, { "epoch": 1.9754026891836078, "grad_norm": 0.48589542508125305, "learning_rate": 2.7091546697584054e-06, "loss": 0.4023, "step": 43046 }, { "epoch": 1.9754485796888623, "grad_norm": 0.4720264673233032, "learning_rate": 2.7089367335474626e-06, "loss": 0.3725, "step": 43047 }, { "epoch": 1.9754944701941168, "grad_norm": 0.4534439444541931, "learning_rate": 2.7087188028458054e-06, "loss": 0.3412, "step": 43048 }, { "epoch": 1.9755403606993713, "grad_norm": 0.5026981234550476, "learning_rate": 2.7085008776539565e-06, "loss": 0.3938, "step": 43049 }, { "epoch": 1.9755862512046258, "grad_norm": 0.44207507371902466, "learning_rate": 2.7082829579724377e-06, "loss": 0.3202, "step": 43050 }, { "epoch": 1.9756321417098803, "grad_norm": 0.4496201276779175, "learning_rate": 2.7080650438017776e-06, "loss": 0.3181, "step": 43051 }, { "epoch": 1.9756780322151348, "grad_norm": 0.4767274260520935, "learning_rate": 2.7078471351424973e-06, "loss": 0.374, "step": 43052 }, { "epoch": 1.975723922720389, "grad_norm": 0.45770418643951416, "learning_rate": 2.7076292319951193e-06, "loss": 0.326, "step": 43053 }, { "epoch": 1.9757698132256436, "grad_norm": 0.4409182667732239, "learning_rate": 2.7074113343601715e-06, "loss": 0.304, "step": 43054 }, { "epoch": 1.975815703730898, "grad_norm": 0.5399067997932434, "learning_rate": 2.7071934422381753e-06, "loss": 0.4476, "step": 43055 }, { "epoch": 1.9758615942361524, "grad_norm": 0.43848690390586853, "learning_rate": 2.7069755556296564e-06, "loss": 0.2867, "step": 43056 }, { "epoch": 1.9759074847414069, "grad_norm": 0.474045991897583, "learning_rate": 2.7067576745351364e-06, "loss": 0.3528, "step": 43057 }, { "epoch": 1.9759533752466614, "grad_norm": 0.48616617918014526, "learning_rate": 2.706539798955141e-06, "loss": 0.3843, "step": 43058 }, { "epoch": 1.9759992657519159, "grad_norm": 0.4606216847896576, "learning_rate": 2.7063219288901937e-06, "loss": 0.3286, "step": 43059 }, { "epoch": 1.9760451562571704, "grad_norm": 0.46810126304626465, "learning_rate": 2.706104064340816e-06, "loss": 0.3659, "step": 43060 }, { "epoch": 1.9760910467624249, "grad_norm": 0.48562416434288025, "learning_rate": 2.705886205307536e-06, "loss": 0.3875, "step": 43061 }, { "epoch": 1.9761369372676794, "grad_norm": 0.545442521572113, "learning_rate": 2.705668351790876e-06, "loss": 0.4408, "step": 43062 }, { "epoch": 1.9761828277729339, "grad_norm": 0.5049591064453125, "learning_rate": 2.705450503791357e-06, "loss": 0.3809, "step": 43063 }, { "epoch": 1.9762287182781884, "grad_norm": 0.5137231945991516, "learning_rate": 2.705232661309508e-06, "loss": 0.4193, "step": 43064 }, { "epoch": 1.9762746087834429, "grad_norm": 0.46165013313293457, "learning_rate": 2.7050148243458494e-06, "loss": 0.3786, "step": 43065 }, { "epoch": 1.9763204992886971, "grad_norm": 0.482311874628067, "learning_rate": 2.7047969929009064e-06, "loss": 0.3431, "step": 43066 }, { "epoch": 1.9763663897939516, "grad_norm": 0.48097366094589233, "learning_rate": 2.704579166975202e-06, "loss": 0.3733, "step": 43067 }, { "epoch": 1.9764122802992061, "grad_norm": 0.4603255093097687, "learning_rate": 2.704361346569261e-06, "loss": 0.3277, "step": 43068 }, { "epoch": 1.9764581708044604, "grad_norm": 0.46200206875801086, "learning_rate": 2.7041435316836036e-06, "loss": 0.3442, "step": 43069 }, { "epoch": 1.976504061309715, "grad_norm": 0.46408936381340027, "learning_rate": 2.703925722318759e-06, "loss": 0.3385, "step": 43070 }, { "epoch": 1.9765499518149694, "grad_norm": 0.496676504611969, "learning_rate": 2.7037079184752475e-06, "loss": 0.3602, "step": 43071 }, { "epoch": 1.976595842320224, "grad_norm": 0.4971490204334259, "learning_rate": 2.7034901201535923e-06, "loss": 0.4083, "step": 43072 }, { "epoch": 1.9766417328254784, "grad_norm": 0.4566163718700409, "learning_rate": 2.7032723273543204e-06, "loss": 0.3546, "step": 43073 }, { "epoch": 1.976687623330733, "grad_norm": 0.47319772839546204, "learning_rate": 2.703054540077954e-06, "loss": 0.3579, "step": 43074 }, { "epoch": 1.9767335138359874, "grad_norm": 0.4660305380821228, "learning_rate": 2.7028367583250137e-06, "loss": 0.3598, "step": 43075 }, { "epoch": 1.9767794043412419, "grad_norm": 0.4800879657268524, "learning_rate": 2.702618982096029e-06, "loss": 0.3909, "step": 43076 }, { "epoch": 1.9768252948464964, "grad_norm": 0.4666239619255066, "learning_rate": 2.70240121139152e-06, "loss": 0.3835, "step": 43077 }, { "epoch": 1.9768711853517509, "grad_norm": 0.470390260219574, "learning_rate": 2.702183446212011e-06, "loss": 0.3607, "step": 43078 }, { "epoch": 1.9769170758570052, "grad_norm": 0.485868901014328, "learning_rate": 2.7019656865580257e-06, "loss": 0.3878, "step": 43079 }, { "epoch": 1.9769629663622597, "grad_norm": 0.45943811535835266, "learning_rate": 2.701747932430088e-06, "loss": 0.3867, "step": 43080 }, { "epoch": 1.9770088568675142, "grad_norm": 0.4532267153263092, "learning_rate": 2.701530183828718e-06, "loss": 0.3523, "step": 43081 }, { "epoch": 1.9770547473727684, "grad_norm": 0.47276532649993896, "learning_rate": 2.7013124407544456e-06, "loss": 0.3539, "step": 43082 }, { "epoch": 1.977100637878023, "grad_norm": 0.49072468280792236, "learning_rate": 2.701094703207791e-06, "loss": 0.3891, "step": 43083 }, { "epoch": 1.9771465283832774, "grad_norm": 0.47355905175209045, "learning_rate": 2.7008769711892757e-06, "loss": 0.3709, "step": 43084 }, { "epoch": 1.977192418888532, "grad_norm": 0.42839449644088745, "learning_rate": 2.7006592446994284e-06, "loss": 0.3004, "step": 43085 }, { "epoch": 1.9772383093937864, "grad_norm": 0.47325605154037476, "learning_rate": 2.7004415237387694e-06, "loss": 0.3378, "step": 43086 }, { "epoch": 1.977284199899041, "grad_norm": 0.44909465312957764, "learning_rate": 2.700223808307823e-06, "loss": 0.3098, "step": 43087 }, { "epoch": 1.9773300904042954, "grad_norm": 0.5121114253997803, "learning_rate": 2.7000060984071123e-06, "loss": 0.4155, "step": 43088 }, { "epoch": 1.97737598090955, "grad_norm": 0.49131855368614197, "learning_rate": 2.699788394037161e-06, "loss": 0.4021, "step": 43089 }, { "epoch": 1.9774218714148044, "grad_norm": 0.508849024772644, "learning_rate": 2.699570695198493e-06, "loss": 0.445, "step": 43090 }, { "epoch": 1.9774677619200587, "grad_norm": 0.47688090801239014, "learning_rate": 2.6993530018916293e-06, "loss": 0.3947, "step": 43091 }, { "epoch": 1.9775136524253132, "grad_norm": 0.5060880780220032, "learning_rate": 2.6991353141170973e-06, "loss": 0.3623, "step": 43092 }, { "epoch": 1.9775595429305677, "grad_norm": 0.4545553922653198, "learning_rate": 2.698917631875419e-06, "loss": 0.3358, "step": 43093 }, { "epoch": 1.9776054334358222, "grad_norm": 0.3883197009563446, "learning_rate": 2.6986999551671154e-06, "loss": 0.2521, "step": 43094 }, { "epoch": 1.9776513239410765, "grad_norm": 0.4842115044593811, "learning_rate": 2.698482283992714e-06, "loss": 0.4033, "step": 43095 }, { "epoch": 1.977697214446331, "grad_norm": 0.46912774443626404, "learning_rate": 2.698264618352736e-06, "loss": 0.3419, "step": 43096 }, { "epoch": 1.9777431049515855, "grad_norm": 0.5099304914474487, "learning_rate": 2.6980469582477043e-06, "loss": 0.4316, "step": 43097 }, { "epoch": 1.97778899545684, "grad_norm": 0.47993943095207214, "learning_rate": 2.6978293036781442e-06, "loss": 0.3801, "step": 43098 }, { "epoch": 1.9778348859620944, "grad_norm": 0.48221877217292786, "learning_rate": 2.6976116546445785e-06, "loss": 0.359, "step": 43099 }, { "epoch": 1.977880776467349, "grad_norm": 0.4791015386581421, "learning_rate": 2.6973940111475296e-06, "loss": 0.354, "step": 43100 }, { "epoch": 1.9779266669726034, "grad_norm": 0.44510969519615173, "learning_rate": 2.6971763731875216e-06, "loss": 0.3102, "step": 43101 }, { "epoch": 1.977972557477858, "grad_norm": 0.5093168616294861, "learning_rate": 2.696958740765078e-06, "loss": 0.4484, "step": 43102 }, { "epoch": 1.9780184479831124, "grad_norm": 0.5319295525550842, "learning_rate": 2.6967411138807193e-06, "loss": 0.4433, "step": 43103 }, { "epoch": 1.9780643384883667, "grad_norm": 0.5002061724662781, "learning_rate": 2.6965234925349738e-06, "loss": 0.4322, "step": 43104 }, { "epoch": 1.9781102289936212, "grad_norm": 0.4628775119781494, "learning_rate": 2.696305876728361e-06, "loss": 0.3644, "step": 43105 }, { "epoch": 1.9781561194988757, "grad_norm": 0.479069322347641, "learning_rate": 2.696088266461405e-06, "loss": 0.3566, "step": 43106 }, { "epoch": 1.97820201000413, "grad_norm": 0.5325402021408081, "learning_rate": 2.695870661734631e-06, "loss": 0.277, "step": 43107 }, { "epoch": 1.9782479005093845, "grad_norm": 0.5054181814193726, "learning_rate": 2.695653062548561e-06, "loss": 0.3988, "step": 43108 }, { "epoch": 1.978293791014639, "grad_norm": 0.48243290185928345, "learning_rate": 2.695435468903716e-06, "loss": 0.3419, "step": 43109 }, { "epoch": 1.9783396815198935, "grad_norm": 0.4701647460460663, "learning_rate": 2.6952178808006254e-06, "loss": 0.3804, "step": 43110 }, { "epoch": 1.978385572025148, "grad_norm": 0.49498775601387024, "learning_rate": 2.695000298239806e-06, "loss": 0.3896, "step": 43111 }, { "epoch": 1.9784314625304025, "grad_norm": 0.45390596985816956, "learning_rate": 2.694782721221782e-06, "loss": 0.3411, "step": 43112 }, { "epoch": 1.978477353035657, "grad_norm": 0.4760518968105316, "learning_rate": 2.6945651497470798e-06, "loss": 0.3457, "step": 43113 }, { "epoch": 1.9785232435409115, "grad_norm": 0.44144967198371887, "learning_rate": 2.69434758381622e-06, "loss": 0.2997, "step": 43114 }, { "epoch": 1.978569134046166, "grad_norm": 0.4646770656108856, "learning_rate": 2.6941300234297264e-06, "loss": 0.33, "step": 43115 }, { "epoch": 1.9786150245514205, "grad_norm": 0.43195241689682007, "learning_rate": 2.693912468588123e-06, "loss": 0.3174, "step": 43116 }, { "epoch": 1.9786609150566747, "grad_norm": 0.5053424835205078, "learning_rate": 2.6936949192919325e-06, "loss": 0.4097, "step": 43117 }, { "epoch": 1.9787068055619292, "grad_norm": 0.5600436925888062, "learning_rate": 2.6934773755416754e-06, "loss": 0.3597, "step": 43118 }, { "epoch": 1.9787526960671837, "grad_norm": 0.444132536649704, "learning_rate": 2.6932598373378806e-06, "loss": 0.3278, "step": 43119 }, { "epoch": 1.978798586572438, "grad_norm": 0.5015686750411987, "learning_rate": 2.6930423046810666e-06, "loss": 0.4451, "step": 43120 }, { "epoch": 1.9788444770776925, "grad_norm": 0.49007469415664673, "learning_rate": 2.692824777571758e-06, "loss": 0.3948, "step": 43121 }, { "epoch": 1.978890367582947, "grad_norm": 0.46476617455482483, "learning_rate": 2.6926072560104777e-06, "loss": 0.3532, "step": 43122 }, { "epoch": 1.9789362580882015, "grad_norm": 0.4301423728466034, "learning_rate": 2.6923897399977493e-06, "loss": 0.2713, "step": 43123 }, { "epoch": 1.978982148593456, "grad_norm": 0.46674251556396484, "learning_rate": 2.6921722295340947e-06, "loss": 0.3618, "step": 43124 }, { "epoch": 1.9790280390987105, "grad_norm": 0.43386775255203247, "learning_rate": 2.6919547246200356e-06, "loss": 0.3063, "step": 43125 }, { "epoch": 1.979073929603965, "grad_norm": 0.46225476264953613, "learning_rate": 2.691737225256099e-06, "loss": 0.3519, "step": 43126 }, { "epoch": 1.9791198201092195, "grad_norm": 0.4568866789340973, "learning_rate": 2.6915197314428056e-06, "loss": 0.3511, "step": 43127 }, { "epoch": 1.979165710614474, "grad_norm": 0.46512269973754883, "learning_rate": 2.691302243180677e-06, "loss": 0.3464, "step": 43128 }, { "epoch": 1.9792116011197285, "grad_norm": 0.5248969793319702, "learning_rate": 2.6910847604702394e-06, "loss": 0.3279, "step": 43129 }, { "epoch": 1.9792574916249828, "grad_norm": 0.486942857503891, "learning_rate": 2.6908672833120147e-06, "loss": 0.3774, "step": 43130 }, { "epoch": 1.9793033821302373, "grad_norm": 0.49245813488960266, "learning_rate": 2.690649811706524e-06, "loss": 0.4404, "step": 43131 }, { "epoch": 1.9793492726354918, "grad_norm": 0.47092217206954956, "learning_rate": 2.690432345654295e-06, "loss": 0.3289, "step": 43132 }, { "epoch": 1.979395163140746, "grad_norm": 0.4704309105873108, "learning_rate": 2.6902148851558445e-06, "loss": 0.3758, "step": 43133 }, { "epoch": 1.9794410536460005, "grad_norm": 0.44915661215782166, "learning_rate": 2.689997430211697e-06, "loss": 0.3137, "step": 43134 }, { "epoch": 1.979486944151255, "grad_norm": 0.5048395395278931, "learning_rate": 2.689779980822379e-06, "loss": 0.4015, "step": 43135 }, { "epoch": 1.9795328346565095, "grad_norm": 0.46840277314186096, "learning_rate": 2.6895625369884105e-06, "loss": 0.3318, "step": 43136 }, { "epoch": 1.979578725161764, "grad_norm": 0.47447413206100464, "learning_rate": 2.689345098710313e-06, "loss": 0.3639, "step": 43137 }, { "epoch": 1.9796246156670185, "grad_norm": 0.46133333444595337, "learning_rate": 2.689127665988613e-06, "loss": 0.3545, "step": 43138 }, { "epoch": 1.979670506172273, "grad_norm": 0.4870116412639618, "learning_rate": 2.688910238823832e-06, "loss": 0.3671, "step": 43139 }, { "epoch": 1.9797163966775275, "grad_norm": 0.5232671499252319, "learning_rate": 2.6886928172164905e-06, "loss": 0.3599, "step": 43140 }, { "epoch": 1.979762287182782, "grad_norm": 0.5012676119804382, "learning_rate": 2.6884754011671147e-06, "loss": 0.3696, "step": 43141 }, { "epoch": 1.9798081776880363, "grad_norm": 0.4681413471698761, "learning_rate": 2.6882579906762262e-06, "loss": 0.3335, "step": 43142 }, { "epoch": 1.9798540681932908, "grad_norm": 0.47126173973083496, "learning_rate": 2.688040585744348e-06, "loss": 0.3586, "step": 43143 }, { "epoch": 1.9798999586985453, "grad_norm": 0.4498625099658966, "learning_rate": 2.687823186372003e-06, "loss": 0.3178, "step": 43144 }, { "epoch": 1.9799458492037996, "grad_norm": 0.4618842303752899, "learning_rate": 2.6876057925597128e-06, "loss": 0.3471, "step": 43145 }, { "epoch": 1.979991739709054, "grad_norm": 0.47371867299079895, "learning_rate": 2.687388404307999e-06, "loss": 0.3455, "step": 43146 }, { "epoch": 1.9800376302143086, "grad_norm": 0.4555322825908661, "learning_rate": 2.6871710216173884e-06, "loss": 0.3203, "step": 43147 }, { "epoch": 1.980083520719563, "grad_norm": 0.4681788384914398, "learning_rate": 2.6869536444884016e-06, "loss": 0.3785, "step": 43148 }, { "epoch": 1.9801294112248176, "grad_norm": 0.4780575931072235, "learning_rate": 2.6867362729215586e-06, "loss": 0.3825, "step": 43149 }, { "epoch": 1.980175301730072, "grad_norm": 0.4842798113822937, "learning_rate": 2.686518906917387e-06, "loss": 0.3566, "step": 43150 }, { "epoch": 1.9802211922353266, "grad_norm": 0.4575755000114441, "learning_rate": 2.6863015464764077e-06, "loss": 0.3232, "step": 43151 }, { "epoch": 1.980267082740581, "grad_norm": 0.47339776158332825, "learning_rate": 2.6860841915991428e-06, "loss": 0.3426, "step": 43152 }, { "epoch": 1.9803129732458356, "grad_norm": 0.47130417823791504, "learning_rate": 2.6858668422861127e-06, "loss": 0.3102, "step": 43153 }, { "epoch": 1.98035886375109, "grad_norm": 0.5300039649009705, "learning_rate": 2.685649498537847e-06, "loss": 0.4439, "step": 43154 }, { "epoch": 1.9804047542563443, "grad_norm": 0.45438867807388306, "learning_rate": 2.685432160354862e-06, "loss": 0.335, "step": 43155 }, { "epoch": 1.9804506447615988, "grad_norm": 0.4509146213531494, "learning_rate": 2.6852148277376795e-06, "loss": 0.331, "step": 43156 }, { "epoch": 1.9804965352668533, "grad_norm": 0.4600050747394562, "learning_rate": 2.6849975006868268e-06, "loss": 0.3281, "step": 43157 }, { "epoch": 1.9805424257721076, "grad_norm": 0.47251150012016296, "learning_rate": 2.684780179202825e-06, "loss": 0.3536, "step": 43158 }, { "epoch": 1.980588316277362, "grad_norm": 0.4947102963924408, "learning_rate": 2.6845628632861935e-06, "loss": 0.3914, "step": 43159 }, { "epoch": 1.9806342067826166, "grad_norm": 0.4416678547859192, "learning_rate": 2.68434555293746e-06, "loss": 0.2755, "step": 43160 }, { "epoch": 1.980680097287871, "grad_norm": 0.4619218707084656, "learning_rate": 2.6841282481571448e-06, "loss": 0.3125, "step": 43161 }, { "epoch": 1.9807259877931256, "grad_norm": 0.5089234709739685, "learning_rate": 2.6839109489457683e-06, "loss": 0.414, "step": 43162 }, { "epoch": 1.98077187829838, "grad_norm": 0.4740343689918518, "learning_rate": 2.6836936553038563e-06, "loss": 0.3339, "step": 43163 }, { "epoch": 1.9808177688036346, "grad_norm": 0.5019904375076294, "learning_rate": 2.6834763672319305e-06, "loss": 0.4203, "step": 43164 }, { "epoch": 1.980863659308889, "grad_norm": 0.45936256647109985, "learning_rate": 2.683259084730513e-06, "loss": 0.3642, "step": 43165 }, { "epoch": 1.9809095498141436, "grad_norm": 0.4591171145439148, "learning_rate": 2.6830418078001257e-06, "loss": 0.3355, "step": 43166 }, { "epoch": 1.980955440319398, "grad_norm": 0.44852474331855774, "learning_rate": 2.682824536441292e-06, "loss": 0.3435, "step": 43167 }, { "epoch": 1.9810013308246524, "grad_norm": 0.49362272024154663, "learning_rate": 2.682607270654532e-06, "loss": 0.3959, "step": 43168 }, { "epoch": 1.9810472213299068, "grad_norm": 0.48774832487106323, "learning_rate": 2.6823900104403723e-06, "loss": 0.3518, "step": 43169 }, { "epoch": 1.9810931118351613, "grad_norm": 0.47221601009368896, "learning_rate": 2.6821727557993326e-06, "loss": 0.3266, "step": 43170 }, { "epoch": 1.9811390023404156, "grad_norm": 0.5298958420753479, "learning_rate": 2.6819555067319344e-06, "loss": 0.4232, "step": 43171 }, { "epoch": 1.9811848928456701, "grad_norm": 0.4496658146381378, "learning_rate": 2.6817382632387033e-06, "loss": 0.3283, "step": 43172 }, { "epoch": 1.9812307833509246, "grad_norm": 0.44550344347953796, "learning_rate": 2.6815210253201602e-06, "loss": 0.3136, "step": 43173 }, { "epoch": 1.9812766738561791, "grad_norm": 0.44526734948158264, "learning_rate": 2.681303792976825e-06, "loss": 0.2867, "step": 43174 }, { "epoch": 1.9813225643614336, "grad_norm": 0.4784291684627533, "learning_rate": 2.6810865662092246e-06, "loss": 0.3343, "step": 43175 }, { "epoch": 1.9813684548666881, "grad_norm": 0.46351248025894165, "learning_rate": 2.6808693450178814e-06, "loss": 0.3449, "step": 43176 }, { "epoch": 1.9814143453719426, "grad_norm": 0.48730435967445374, "learning_rate": 2.6806521294033112e-06, "loss": 0.3964, "step": 43177 }, { "epoch": 1.981460235877197, "grad_norm": 0.48592326045036316, "learning_rate": 2.6804349193660422e-06, "loss": 0.364, "step": 43178 }, { "epoch": 1.9815061263824516, "grad_norm": 0.502487063407898, "learning_rate": 2.6802177149065957e-06, "loss": 0.4822, "step": 43179 }, { "epoch": 1.9815520168877059, "grad_norm": 0.46852949261665344, "learning_rate": 2.680000516025492e-06, "loss": 0.3326, "step": 43180 }, { "epoch": 1.9815979073929604, "grad_norm": 0.4312870502471924, "learning_rate": 2.6797833227232556e-06, "loss": 0.3286, "step": 43181 }, { "epoch": 1.9816437978982149, "grad_norm": 0.46828654408454895, "learning_rate": 2.679566135000409e-06, "loss": 0.3938, "step": 43182 }, { "epoch": 1.9816896884034694, "grad_norm": 0.514013946056366, "learning_rate": 2.6793489528574705e-06, "loss": 0.4195, "step": 43183 }, { "epoch": 1.9817355789087236, "grad_norm": 0.48704972863197327, "learning_rate": 2.679131776294968e-06, "loss": 0.3821, "step": 43184 }, { "epoch": 1.9817814694139781, "grad_norm": 0.4998438060283661, "learning_rate": 2.6789146053134215e-06, "loss": 0.4217, "step": 43185 }, { "epoch": 1.9818273599192326, "grad_norm": 0.4347667396068573, "learning_rate": 2.678697439913352e-06, "loss": 0.3002, "step": 43186 }, { "epoch": 1.9818732504244871, "grad_norm": 0.468792587518692, "learning_rate": 2.678480280095283e-06, "loss": 0.3976, "step": 43187 }, { "epoch": 1.9819191409297416, "grad_norm": 0.4715765118598938, "learning_rate": 2.6782631258597357e-06, "loss": 0.3584, "step": 43188 }, { "epoch": 1.9819650314349961, "grad_norm": 0.5024660229682922, "learning_rate": 2.6780459772072336e-06, "loss": 0.4455, "step": 43189 }, { "epoch": 1.9820109219402506, "grad_norm": 0.49241316318511963, "learning_rate": 2.6778288341382957e-06, "loss": 0.3723, "step": 43190 }, { "epoch": 1.9820568124455051, "grad_norm": 0.4418708086013794, "learning_rate": 2.6776116966534487e-06, "loss": 0.3004, "step": 43191 }, { "epoch": 1.9821027029507596, "grad_norm": 0.5096988677978516, "learning_rate": 2.6773945647532128e-06, "loss": 0.4391, "step": 43192 }, { "epoch": 1.982148593456014, "grad_norm": 0.4441571533679962, "learning_rate": 2.6771774384381075e-06, "loss": 0.3242, "step": 43193 }, { "epoch": 1.9821944839612684, "grad_norm": 0.47249630093574524, "learning_rate": 2.6769603177086594e-06, "loss": 0.4025, "step": 43194 }, { "epoch": 1.982240374466523, "grad_norm": 0.4635104238986969, "learning_rate": 2.6767432025653885e-06, "loss": 0.3248, "step": 43195 }, { "epoch": 1.9822862649717772, "grad_norm": 0.4669688940048218, "learning_rate": 2.6765260930088153e-06, "loss": 0.3523, "step": 43196 }, { "epoch": 1.9823321554770317, "grad_norm": 0.49607664346694946, "learning_rate": 2.6763089890394654e-06, "loss": 0.4208, "step": 43197 }, { "epoch": 1.9823780459822862, "grad_norm": 0.4378975033760071, "learning_rate": 2.676091890657861e-06, "loss": 0.3336, "step": 43198 }, { "epoch": 1.9824239364875407, "grad_norm": 0.43017831444740295, "learning_rate": 2.6758747978645183e-06, "loss": 0.289, "step": 43199 }, { "epoch": 1.9824698269927952, "grad_norm": 0.4746286869049072, "learning_rate": 2.6756577106599646e-06, "loss": 0.3786, "step": 43200 }, { "epoch": 1.9825157174980497, "grad_norm": 0.5178020596504211, "learning_rate": 2.675440629044721e-06, "loss": 0.4205, "step": 43201 }, { "epoch": 1.9825616080033042, "grad_norm": 0.4440845847129822, "learning_rate": 2.6752235530193073e-06, "loss": 0.3186, "step": 43202 }, { "epoch": 1.9826074985085587, "grad_norm": 0.4626782536506653, "learning_rate": 2.675006482584249e-06, "loss": 0.3061, "step": 43203 }, { "epoch": 1.9826533890138132, "grad_norm": 0.4835585951805115, "learning_rate": 2.6747894177400656e-06, "loss": 0.3828, "step": 43204 }, { "epoch": 1.9826992795190677, "grad_norm": 0.43964555859565735, "learning_rate": 2.6745723584872787e-06, "loss": 0.2903, "step": 43205 }, { "epoch": 1.982745170024322, "grad_norm": 0.46803954243659973, "learning_rate": 2.674355304826413e-06, "loss": 0.3393, "step": 43206 }, { "epoch": 1.9827910605295764, "grad_norm": 0.45373210310935974, "learning_rate": 2.674138256757989e-06, "loss": 0.3414, "step": 43207 }, { "epoch": 1.982836951034831, "grad_norm": 0.44474294781684875, "learning_rate": 2.6739212142825283e-06, "loss": 0.2868, "step": 43208 }, { "epoch": 1.9828828415400852, "grad_norm": 0.4508303999900818, "learning_rate": 2.6737041774005527e-06, "loss": 0.3237, "step": 43209 }, { "epoch": 1.9829287320453397, "grad_norm": 0.4737163484096527, "learning_rate": 2.6734871461125843e-06, "loss": 0.3761, "step": 43210 }, { "epoch": 1.9829746225505942, "grad_norm": 0.5069913268089294, "learning_rate": 2.6732701204191436e-06, "loss": 0.4616, "step": 43211 }, { "epoch": 1.9830205130558487, "grad_norm": 0.49565333127975464, "learning_rate": 2.673053100320756e-06, "loss": 0.361, "step": 43212 }, { "epoch": 1.9830664035611032, "grad_norm": 0.4690817892551422, "learning_rate": 2.6728360858179407e-06, "loss": 0.3688, "step": 43213 }, { "epoch": 1.9831122940663577, "grad_norm": 0.4544459283351898, "learning_rate": 2.6726190769112183e-06, "loss": 0.3057, "step": 43214 }, { "epoch": 1.9831581845716122, "grad_norm": 0.4465959668159485, "learning_rate": 2.6724020736011145e-06, "loss": 0.3226, "step": 43215 }, { "epoch": 1.9832040750768667, "grad_norm": 0.47844526171684265, "learning_rate": 2.6721850758881494e-06, "loss": 0.4058, "step": 43216 }, { "epoch": 1.9832499655821212, "grad_norm": 0.4553939700126648, "learning_rate": 2.6719680837728447e-06, "loss": 0.3272, "step": 43217 }, { "epoch": 1.9832958560873757, "grad_norm": 0.4971645176410675, "learning_rate": 2.671751097255719e-06, "loss": 0.4424, "step": 43218 }, { "epoch": 1.98334174659263, "grad_norm": 0.44561585783958435, "learning_rate": 2.6715341163373e-06, "loss": 0.2855, "step": 43219 }, { "epoch": 1.9833876370978845, "grad_norm": 0.4681265354156494, "learning_rate": 2.6713171410181083e-06, "loss": 0.3969, "step": 43220 }, { "epoch": 1.983433527603139, "grad_norm": 0.4675227999687195, "learning_rate": 2.6711001712986596e-06, "loss": 0.3333, "step": 43221 }, { "epoch": 1.9834794181083932, "grad_norm": 0.4366927146911621, "learning_rate": 2.670883207179482e-06, "loss": 0.3134, "step": 43222 }, { "epoch": 1.9835253086136477, "grad_norm": 0.43500030040740967, "learning_rate": 2.6706662486610957e-06, "loss": 0.2955, "step": 43223 }, { "epoch": 1.9835711991189022, "grad_norm": 0.43693315982818604, "learning_rate": 2.67044929574402e-06, "loss": 0.3262, "step": 43224 }, { "epoch": 1.9836170896241567, "grad_norm": 0.4820905327796936, "learning_rate": 2.6702323484287794e-06, "loss": 0.3632, "step": 43225 }, { "epoch": 1.9836629801294112, "grad_norm": 0.4821542501449585, "learning_rate": 2.6700154067158958e-06, "loss": 0.3575, "step": 43226 }, { "epoch": 1.9837088706346657, "grad_norm": 0.45242324471473694, "learning_rate": 2.6697984706058877e-06, "loss": 0.3571, "step": 43227 }, { "epoch": 1.9837547611399202, "grad_norm": 0.48843076825141907, "learning_rate": 2.669581540099281e-06, "loss": 0.3719, "step": 43228 }, { "epoch": 1.9838006516451747, "grad_norm": 0.7229228019714355, "learning_rate": 2.6693646151965945e-06, "loss": 0.3719, "step": 43229 }, { "epoch": 1.9838465421504292, "grad_norm": 0.42431607842445374, "learning_rate": 2.6691476958983508e-06, "loss": 0.2807, "step": 43230 }, { "epoch": 1.9838924326556835, "grad_norm": 0.5001864433288574, "learning_rate": 2.6689307822050714e-06, "loss": 0.4092, "step": 43231 }, { "epoch": 1.983938323160938, "grad_norm": 0.46732696890830994, "learning_rate": 2.6687138741172773e-06, "loss": 0.3503, "step": 43232 }, { "epoch": 1.9839842136661925, "grad_norm": 0.48768356442451477, "learning_rate": 2.668496971635489e-06, "loss": 0.4142, "step": 43233 }, { "epoch": 1.9840301041714468, "grad_norm": 0.4879835844039917, "learning_rate": 2.668280074760232e-06, "loss": 0.3605, "step": 43234 }, { "epoch": 1.9840759946767013, "grad_norm": 0.486217200756073, "learning_rate": 2.6680631834920244e-06, "loss": 0.3798, "step": 43235 }, { "epoch": 1.9841218851819558, "grad_norm": 0.46995219588279724, "learning_rate": 2.667846297831388e-06, "loss": 0.3559, "step": 43236 }, { "epoch": 1.9841677756872103, "grad_norm": 0.4792799949645996, "learning_rate": 2.6676294177788463e-06, "loss": 0.3821, "step": 43237 }, { "epoch": 1.9842136661924648, "grad_norm": 0.4595497250556946, "learning_rate": 2.66741254333492e-06, "loss": 0.3517, "step": 43238 }, { "epoch": 1.9842595566977193, "grad_norm": 0.4479900598526001, "learning_rate": 2.6671956745001287e-06, "loss": 0.3023, "step": 43239 }, { "epoch": 1.9843054472029737, "grad_norm": 0.49852603673934937, "learning_rate": 2.6669788112749972e-06, "loss": 0.3499, "step": 43240 }, { "epoch": 1.9843513377082282, "grad_norm": 0.4633050262928009, "learning_rate": 2.6667619536600453e-06, "loss": 0.3347, "step": 43241 }, { "epoch": 1.9843972282134827, "grad_norm": 0.4546201527118683, "learning_rate": 2.666545101655795e-06, "loss": 0.2997, "step": 43242 }, { "epoch": 1.9844431187187372, "grad_norm": 0.4829615354537964, "learning_rate": 2.666328255262767e-06, "loss": 0.3491, "step": 43243 }, { "epoch": 1.9844890092239915, "grad_norm": 0.4754326343536377, "learning_rate": 2.666111414481483e-06, "loss": 0.3851, "step": 43244 }, { "epoch": 1.984534899729246, "grad_norm": 0.49072161316871643, "learning_rate": 2.6658945793124625e-06, "loss": 0.4123, "step": 43245 }, { "epoch": 1.9845807902345005, "grad_norm": 0.45561665296554565, "learning_rate": 2.665677749756231e-06, "loss": 0.3587, "step": 43246 }, { "epoch": 1.9846266807397548, "grad_norm": 0.4680860638618469, "learning_rate": 2.6654609258133072e-06, "loss": 0.316, "step": 43247 }, { "epoch": 1.9846725712450093, "grad_norm": 0.48525574803352356, "learning_rate": 2.665244107484212e-06, "loss": 0.4066, "step": 43248 }, { "epoch": 1.9847184617502638, "grad_norm": 0.4495392143726349, "learning_rate": 2.665027294769469e-06, "loss": 0.3504, "step": 43249 }, { "epoch": 1.9847643522555183, "grad_norm": 0.44399794936180115, "learning_rate": 2.664810487669599e-06, "loss": 0.3304, "step": 43250 }, { "epoch": 1.9848102427607728, "grad_norm": 0.7158249020576477, "learning_rate": 2.6645936861851216e-06, "loss": 0.3877, "step": 43251 }, { "epoch": 1.9848561332660273, "grad_norm": 0.4605919122695923, "learning_rate": 2.664376890316561e-06, "loss": 0.3327, "step": 43252 }, { "epoch": 1.9849020237712818, "grad_norm": 0.4624258279800415, "learning_rate": 2.664160100064436e-06, "loss": 0.3461, "step": 43253 }, { "epoch": 1.9849479142765363, "grad_norm": 0.45508649945259094, "learning_rate": 2.6639433154292684e-06, "loss": 0.3582, "step": 43254 }, { "epoch": 1.9849938047817908, "grad_norm": 0.4862181842327118, "learning_rate": 2.663726536411578e-06, "loss": 0.3536, "step": 43255 }, { "epoch": 1.9850396952870453, "grad_norm": 0.4658057689666748, "learning_rate": 2.6635097630118906e-06, "loss": 0.3482, "step": 43256 }, { "epoch": 1.9850855857922995, "grad_norm": 0.47662732005119324, "learning_rate": 2.6632929952307242e-06, "loss": 0.3682, "step": 43257 }, { "epoch": 1.985131476297554, "grad_norm": 0.4431224763393402, "learning_rate": 2.6630762330685987e-06, "loss": 0.2737, "step": 43258 }, { "epoch": 1.9851773668028085, "grad_norm": 0.420151025056839, "learning_rate": 2.6628594765260396e-06, "loss": 0.2965, "step": 43259 }, { "epoch": 1.9852232573080628, "grad_norm": 0.48970189690589905, "learning_rate": 2.662642725603566e-06, "loss": 0.4131, "step": 43260 }, { "epoch": 1.9852691478133173, "grad_norm": 0.44862979650497437, "learning_rate": 2.662425980301696e-06, "loss": 0.3202, "step": 43261 }, { "epoch": 1.9853150383185718, "grad_norm": 0.4888520836830139, "learning_rate": 2.6622092406209566e-06, "loss": 0.3494, "step": 43262 }, { "epoch": 1.9853609288238263, "grad_norm": 0.496141254901886, "learning_rate": 2.6619925065618656e-06, "loss": 0.376, "step": 43263 }, { "epoch": 1.9854068193290808, "grad_norm": 0.5399676561355591, "learning_rate": 2.6617757781249455e-06, "loss": 0.3829, "step": 43264 }, { "epoch": 1.9854527098343353, "grad_norm": 0.49451279640197754, "learning_rate": 2.661559055310716e-06, "loss": 0.3751, "step": 43265 }, { "epoch": 1.9854986003395898, "grad_norm": 0.43755173683166504, "learning_rate": 2.6613423381196996e-06, "loss": 0.2939, "step": 43266 }, { "epoch": 1.9855444908448443, "grad_norm": 0.4871475398540497, "learning_rate": 2.6611256265524144e-06, "loss": 0.4042, "step": 43267 }, { "epoch": 1.9855903813500988, "grad_norm": 0.4361470341682434, "learning_rate": 2.660908920609386e-06, "loss": 0.2837, "step": 43268 }, { "epoch": 1.985636271855353, "grad_norm": 0.45998796820640564, "learning_rate": 2.6606922202911328e-06, "loss": 0.3499, "step": 43269 }, { "epoch": 1.9856821623606076, "grad_norm": 0.4479124844074249, "learning_rate": 2.6604755255981753e-06, "loss": 0.3404, "step": 43270 }, { "epoch": 1.985728052865862, "grad_norm": 0.5127881765365601, "learning_rate": 2.660258836531037e-06, "loss": 0.4286, "step": 43271 }, { "epoch": 1.9857739433711166, "grad_norm": 0.47999855875968933, "learning_rate": 2.6600421530902385e-06, "loss": 0.3721, "step": 43272 }, { "epoch": 1.9858198338763708, "grad_norm": 0.45671913027763367, "learning_rate": 2.6598254752762997e-06, "loss": 0.3403, "step": 43273 }, { "epoch": 1.9858657243816253, "grad_norm": 0.47982463240623474, "learning_rate": 2.659608803089742e-06, "loss": 0.3846, "step": 43274 }, { "epoch": 1.9859116148868798, "grad_norm": 0.45638489723205566, "learning_rate": 2.6593921365310873e-06, "loss": 0.3313, "step": 43275 }, { "epoch": 1.9859575053921343, "grad_norm": 0.4985358715057373, "learning_rate": 2.6591754756008525e-06, "loss": 0.4098, "step": 43276 }, { "epoch": 1.9860033958973888, "grad_norm": 0.48731914162635803, "learning_rate": 2.6589588202995646e-06, "loss": 0.3824, "step": 43277 }, { "epoch": 1.9860492864026433, "grad_norm": 0.46081170439720154, "learning_rate": 2.6587421706277416e-06, "loss": 0.3413, "step": 43278 }, { "epoch": 1.9860951769078978, "grad_norm": 0.4757758677005768, "learning_rate": 2.6585255265859034e-06, "loss": 0.3507, "step": 43279 }, { "epoch": 1.9861410674131523, "grad_norm": 0.49008744955062866, "learning_rate": 2.658308888174574e-06, "loss": 0.4068, "step": 43280 }, { "epoch": 1.9861869579184068, "grad_norm": 0.48594027757644653, "learning_rate": 2.6580922553942722e-06, "loss": 0.3861, "step": 43281 }, { "epoch": 1.986232848423661, "grad_norm": 0.46818509697914124, "learning_rate": 2.657875628245518e-06, "loss": 0.3616, "step": 43282 }, { "epoch": 1.9862787389289156, "grad_norm": 0.4532407820224762, "learning_rate": 2.6576590067288356e-06, "loss": 0.3338, "step": 43283 }, { "epoch": 1.98632462943417, "grad_norm": 0.4521293342113495, "learning_rate": 2.657442390844744e-06, "loss": 0.3545, "step": 43284 }, { "epoch": 1.9863705199394244, "grad_norm": 0.4783223569393158, "learning_rate": 2.657225780593764e-06, "loss": 0.3816, "step": 43285 }, { "epoch": 1.9864164104446789, "grad_norm": 0.44485804438591003, "learning_rate": 2.657009175976416e-06, "loss": 0.318, "step": 43286 }, { "epoch": 1.9864623009499334, "grad_norm": 0.43985068798065186, "learning_rate": 2.6567925769932222e-06, "loss": 0.3115, "step": 43287 }, { "epoch": 1.9865081914551879, "grad_norm": 0.4451111853122711, "learning_rate": 2.6565759836447025e-06, "loss": 0.2978, "step": 43288 }, { "epoch": 1.9865540819604424, "grad_norm": 0.4707503914833069, "learning_rate": 2.656359395931376e-06, "loss": 0.4082, "step": 43289 }, { "epoch": 1.9865999724656969, "grad_norm": 0.4510821998119354, "learning_rate": 2.656142813853768e-06, "loss": 0.3402, "step": 43290 }, { "epoch": 1.9866458629709514, "grad_norm": 0.4516948461532593, "learning_rate": 2.6559262374123962e-06, "loss": 0.3123, "step": 43291 }, { "epoch": 1.9866917534762059, "grad_norm": 0.4725872278213501, "learning_rate": 2.6557096666077804e-06, "loss": 0.3472, "step": 43292 }, { "epoch": 1.9867376439814604, "grad_norm": 0.4570804834365845, "learning_rate": 2.6554931014404445e-06, "loss": 0.3058, "step": 43293 }, { "epoch": 1.9867835344867149, "grad_norm": 0.47417378425598145, "learning_rate": 2.655276541910908e-06, "loss": 0.3403, "step": 43294 }, { "epoch": 1.9868294249919691, "grad_norm": 0.5003190636634827, "learning_rate": 2.6550599880196913e-06, "loss": 0.4293, "step": 43295 }, { "epoch": 1.9868753154972236, "grad_norm": 0.4518091678619385, "learning_rate": 2.654843439767315e-06, "loss": 0.3411, "step": 43296 }, { "epoch": 1.9869212060024781, "grad_norm": 0.45399075746536255, "learning_rate": 2.6546268971543006e-06, "loss": 0.3612, "step": 43297 }, { "epoch": 1.9869670965077324, "grad_norm": 0.4527285099029541, "learning_rate": 2.6544103601811654e-06, "loss": 0.364, "step": 43298 }, { "epoch": 1.987012987012987, "grad_norm": 0.5471859574317932, "learning_rate": 2.654193828848436e-06, "loss": 0.4023, "step": 43299 }, { "epoch": 1.9870588775182414, "grad_norm": 0.47555187344551086, "learning_rate": 2.65397730315663e-06, "loss": 0.3597, "step": 43300 }, { "epoch": 1.987104768023496, "grad_norm": 0.48820748925209045, "learning_rate": 2.6537607831062654e-06, "loss": 0.3972, "step": 43301 }, { "epoch": 1.9871506585287504, "grad_norm": 0.45449841022491455, "learning_rate": 2.6535442686978685e-06, "loss": 0.2934, "step": 43302 }, { "epoch": 1.9871965490340049, "grad_norm": 0.4821808636188507, "learning_rate": 2.653327759931956e-06, "loss": 0.367, "step": 43303 }, { "epoch": 1.9872424395392594, "grad_norm": 0.49018624424934387, "learning_rate": 2.653111256809049e-06, "loss": 0.3568, "step": 43304 }, { "epoch": 1.9872883300445139, "grad_norm": 0.5007258057594299, "learning_rate": 2.6528947593296697e-06, "loss": 0.4489, "step": 43305 }, { "epoch": 1.9873342205497684, "grad_norm": 0.5081180334091187, "learning_rate": 2.6526782674943387e-06, "loss": 0.4719, "step": 43306 }, { "epoch": 1.9873801110550229, "grad_norm": 0.45810064673423767, "learning_rate": 2.6524617813035747e-06, "loss": 0.365, "step": 43307 }, { "epoch": 1.9874260015602772, "grad_norm": 0.512195348739624, "learning_rate": 2.652245300757899e-06, "loss": 0.3656, "step": 43308 }, { "epoch": 1.9874718920655317, "grad_norm": 0.4660709500312805, "learning_rate": 2.652028825857833e-06, "loss": 0.3128, "step": 43309 }, { "epoch": 1.9875177825707862, "grad_norm": 0.4849500060081482, "learning_rate": 2.6518123566038944e-06, "loss": 0.3956, "step": 43310 }, { "epoch": 1.9875636730760404, "grad_norm": 0.45419880747795105, "learning_rate": 2.651595892996608e-06, "loss": 0.3112, "step": 43311 }, { "epoch": 1.987609563581295, "grad_norm": 0.49454638361930847, "learning_rate": 2.6513794350364925e-06, "loss": 0.4176, "step": 43312 }, { "epoch": 1.9876554540865494, "grad_norm": 0.44968920946121216, "learning_rate": 2.651162982724066e-06, "loss": 0.3263, "step": 43313 }, { "epoch": 1.987701344591804, "grad_norm": 0.47197842597961426, "learning_rate": 2.6509465360598536e-06, "loss": 0.3456, "step": 43314 }, { "epoch": 1.9877472350970584, "grad_norm": 0.4636886417865753, "learning_rate": 2.6507300950443726e-06, "loss": 0.3183, "step": 43315 }, { "epoch": 1.987793125602313, "grad_norm": 0.4490591883659363, "learning_rate": 2.650513659678145e-06, "loss": 0.341, "step": 43316 }, { "epoch": 1.9878390161075674, "grad_norm": 0.4331201910972595, "learning_rate": 2.65029722996169e-06, "loss": 0.311, "step": 43317 }, { "epoch": 1.987884906612822, "grad_norm": 0.4280503988265991, "learning_rate": 2.650080805895529e-06, "loss": 0.2844, "step": 43318 }, { "epoch": 1.9879307971180764, "grad_norm": 0.48602843284606934, "learning_rate": 2.649864387480182e-06, "loss": 0.3565, "step": 43319 }, { "epoch": 1.9879766876233307, "grad_norm": 0.494600385427475, "learning_rate": 2.6496479747161674e-06, "loss": 0.4133, "step": 43320 }, { "epoch": 1.9880225781285852, "grad_norm": 0.41177231073379517, "learning_rate": 2.6494315676040093e-06, "loss": 0.2651, "step": 43321 }, { "epoch": 1.9880684686338397, "grad_norm": 0.4757979214191437, "learning_rate": 2.6492151661442266e-06, "loss": 0.4093, "step": 43322 }, { "epoch": 1.988114359139094, "grad_norm": 0.5252898335456848, "learning_rate": 2.6489987703373376e-06, "loss": 0.3677, "step": 43323 }, { "epoch": 1.9881602496443485, "grad_norm": 0.44714444875717163, "learning_rate": 2.6487823801838663e-06, "loss": 0.3283, "step": 43324 }, { "epoch": 1.988206140149603, "grad_norm": 0.48737475275993347, "learning_rate": 2.648565995684332e-06, "loss": 0.405, "step": 43325 }, { "epoch": 1.9882520306548574, "grad_norm": 0.4801470637321472, "learning_rate": 2.648349616839252e-06, "loss": 0.353, "step": 43326 }, { "epoch": 1.988297921160112, "grad_norm": 0.5045482516288757, "learning_rate": 2.64813324364915e-06, "loss": 0.4309, "step": 43327 }, { "epoch": 1.9883438116653664, "grad_norm": 0.45169174671173096, "learning_rate": 2.6479168761145467e-06, "loss": 0.3071, "step": 43328 }, { "epoch": 1.988389702170621, "grad_norm": 0.5058801174163818, "learning_rate": 2.64770051423596e-06, "loss": 0.4339, "step": 43329 }, { "epoch": 1.9884355926758754, "grad_norm": 0.505872905254364, "learning_rate": 2.647484158013912e-06, "loss": 0.3621, "step": 43330 }, { "epoch": 1.98848148318113, "grad_norm": 0.43194788694381714, "learning_rate": 2.647267807448921e-06, "loss": 0.3044, "step": 43331 }, { "epoch": 1.9885273736863844, "grad_norm": 0.4160498082637787, "learning_rate": 2.6470514625415077e-06, "loss": 0.2855, "step": 43332 }, { "epoch": 1.9885732641916387, "grad_norm": 0.48693475127220154, "learning_rate": 2.646835123292194e-06, "loss": 0.3922, "step": 43333 }, { "epoch": 1.9886191546968932, "grad_norm": 0.43357419967651367, "learning_rate": 2.6466187897014996e-06, "loss": 0.3389, "step": 43334 }, { "epoch": 1.9886650452021477, "grad_norm": 0.4509291648864746, "learning_rate": 2.6464024617699425e-06, "loss": 0.3421, "step": 43335 }, { "epoch": 1.988710935707402, "grad_norm": 0.4928635060787201, "learning_rate": 2.646186139498046e-06, "loss": 0.3688, "step": 43336 }, { "epoch": 1.9887568262126565, "grad_norm": 0.4841051399707794, "learning_rate": 2.6459698228863295e-06, "loss": 0.405, "step": 43337 }, { "epoch": 1.988802716717911, "grad_norm": 0.4398481249809265, "learning_rate": 2.6457535119353117e-06, "loss": 0.3065, "step": 43338 }, { "epoch": 1.9888486072231655, "grad_norm": 0.46194323897361755, "learning_rate": 2.645537206645515e-06, "loss": 0.3473, "step": 43339 }, { "epoch": 1.98889449772842, "grad_norm": 0.42861220240592957, "learning_rate": 2.6453209070174574e-06, "loss": 0.2902, "step": 43340 }, { "epoch": 1.9889403882336745, "grad_norm": 0.4792320728302002, "learning_rate": 2.6451046130516576e-06, "loss": 0.332, "step": 43341 }, { "epoch": 1.988986278738929, "grad_norm": 0.4614847004413605, "learning_rate": 2.644888324748641e-06, "loss": 0.3356, "step": 43342 }, { "epoch": 1.9890321692441835, "grad_norm": 0.5008379220962524, "learning_rate": 2.644672042108924e-06, "loss": 0.4241, "step": 43343 }, { "epoch": 1.989078059749438, "grad_norm": 0.4531208276748657, "learning_rate": 2.6444557651330255e-06, "loss": 0.319, "step": 43344 }, { "epoch": 1.9891239502546925, "grad_norm": 0.412923663854599, "learning_rate": 2.6442394938214695e-06, "loss": 0.2657, "step": 43345 }, { "epoch": 1.9891698407599467, "grad_norm": 0.4707610011100769, "learning_rate": 2.6440232281747742e-06, "loss": 0.3457, "step": 43346 }, { "epoch": 1.9892157312652012, "grad_norm": 0.470540851354599, "learning_rate": 2.643806968193457e-06, "loss": 0.3329, "step": 43347 }, { "epoch": 1.9892616217704557, "grad_norm": 0.4819805920124054, "learning_rate": 2.6435907138780425e-06, "loss": 0.3919, "step": 43348 }, { "epoch": 1.98930751227571, "grad_norm": 0.5174906849861145, "learning_rate": 2.6433744652290487e-06, "loss": 0.3277, "step": 43349 }, { "epoch": 1.9893534027809645, "grad_norm": 0.46744304895401, "learning_rate": 2.643158222246996e-06, "loss": 0.3159, "step": 43350 }, { "epoch": 1.989399293286219, "grad_norm": 0.4820377826690674, "learning_rate": 2.6429419849324033e-06, "loss": 0.3623, "step": 43351 }, { "epoch": 1.9894451837914735, "grad_norm": 0.4900696873664856, "learning_rate": 2.642725753285792e-06, "loss": 0.3673, "step": 43352 }, { "epoch": 1.989491074296728, "grad_norm": 0.4480346143245697, "learning_rate": 2.6425095273076805e-06, "loss": 0.3205, "step": 43353 }, { "epoch": 1.9895369648019825, "grad_norm": 0.5048576593399048, "learning_rate": 2.642293306998588e-06, "loss": 0.3733, "step": 43354 }, { "epoch": 1.989582855307237, "grad_norm": 0.4269384741783142, "learning_rate": 2.6420770923590378e-06, "loss": 0.2734, "step": 43355 }, { "epoch": 1.9896287458124915, "grad_norm": 0.46099716424942017, "learning_rate": 2.6418608833895476e-06, "loss": 0.35, "step": 43356 }, { "epoch": 1.989674636317746, "grad_norm": 0.4357855021953583, "learning_rate": 2.641644680090636e-06, "loss": 0.3227, "step": 43357 }, { "epoch": 1.9897205268230003, "grad_norm": 0.4243274927139282, "learning_rate": 2.641428482462827e-06, "loss": 0.281, "step": 43358 }, { "epoch": 1.9897664173282548, "grad_norm": 0.4360494017601013, "learning_rate": 2.6412122905066372e-06, "loss": 0.3265, "step": 43359 }, { "epoch": 1.9898123078335093, "grad_norm": 0.48708534240722656, "learning_rate": 2.6409961042225885e-06, "loss": 0.386, "step": 43360 }, { "epoch": 1.9898581983387638, "grad_norm": 0.4680542051792145, "learning_rate": 2.640779923611199e-06, "loss": 0.3539, "step": 43361 }, { "epoch": 1.989904088844018, "grad_norm": 0.44694122672080994, "learning_rate": 2.6405637486729897e-06, "loss": 0.3172, "step": 43362 }, { "epoch": 1.9899499793492725, "grad_norm": 0.45567813515663147, "learning_rate": 2.6403475794084777e-06, "loss": 0.3089, "step": 43363 }, { "epoch": 1.989995869854527, "grad_norm": 0.47259414196014404, "learning_rate": 2.640131415818187e-06, "loss": 0.3464, "step": 43364 }, { "epoch": 1.9900417603597815, "grad_norm": 0.4794715940952301, "learning_rate": 2.6399152579026357e-06, "loss": 0.3805, "step": 43365 }, { "epoch": 1.990087650865036, "grad_norm": 0.44594621658325195, "learning_rate": 2.6396991056623407e-06, "loss": 0.3223, "step": 43366 }, { "epoch": 1.9901335413702905, "grad_norm": 0.44707733392715454, "learning_rate": 2.6394829590978264e-06, "loss": 0.3145, "step": 43367 }, { "epoch": 1.990179431875545, "grad_norm": 0.4865078628063202, "learning_rate": 2.6392668182096114e-06, "loss": 0.3996, "step": 43368 }, { "epoch": 1.9902253223807995, "grad_norm": 0.6785011291503906, "learning_rate": 2.639050682998212e-06, "loss": 0.278, "step": 43369 }, { "epoch": 1.990271212886054, "grad_norm": 0.4647110104560852, "learning_rate": 2.6388345534641524e-06, "loss": 0.3207, "step": 43370 }, { "epoch": 1.9903171033913083, "grad_norm": 0.5090306401252747, "learning_rate": 2.638618429607951e-06, "loss": 0.4192, "step": 43371 }, { "epoch": 1.9903629938965628, "grad_norm": 0.44892221689224243, "learning_rate": 2.638402311430127e-06, "loss": 0.3004, "step": 43372 }, { "epoch": 1.9904088844018173, "grad_norm": 0.5025840401649475, "learning_rate": 2.6381861989311998e-06, "loss": 0.3977, "step": 43373 }, { "epoch": 1.9904547749070716, "grad_norm": 0.43405893445014954, "learning_rate": 2.6379700921116897e-06, "loss": 0.2953, "step": 43374 }, { "epoch": 1.990500665412326, "grad_norm": 0.492634117603302, "learning_rate": 2.637753990972114e-06, "loss": 0.3914, "step": 43375 }, { "epoch": 1.9905465559175806, "grad_norm": 0.48303088545799255, "learning_rate": 2.637537895512996e-06, "loss": 0.4275, "step": 43376 }, { "epoch": 1.990592446422835, "grad_norm": 0.5093284249305725, "learning_rate": 2.6373218057348535e-06, "loss": 0.3523, "step": 43377 }, { "epoch": 1.9906383369280896, "grad_norm": 0.4901745617389679, "learning_rate": 2.6371057216382055e-06, "loss": 0.3881, "step": 43378 }, { "epoch": 1.990684227433344, "grad_norm": 0.43879425525665283, "learning_rate": 2.6368896432235734e-06, "loss": 0.3027, "step": 43379 }, { "epoch": 1.9907301179385986, "grad_norm": 0.4827568531036377, "learning_rate": 2.636673570491477e-06, "loss": 0.3437, "step": 43380 }, { "epoch": 1.990776008443853, "grad_norm": 0.4673489034175873, "learning_rate": 2.636457503442434e-06, "loss": 0.3757, "step": 43381 }, { "epoch": 1.9908218989491075, "grad_norm": 0.4985295534133911, "learning_rate": 2.636241442076965e-06, "loss": 0.3589, "step": 43382 }, { "epoch": 1.990867789454362, "grad_norm": 0.4456523060798645, "learning_rate": 2.6360253863955888e-06, "loss": 0.3113, "step": 43383 }, { "epoch": 1.9909136799596163, "grad_norm": 0.46577149629592896, "learning_rate": 2.6358093363988257e-06, "loss": 0.3301, "step": 43384 }, { "epoch": 1.9909595704648708, "grad_norm": 0.4556271731853485, "learning_rate": 2.6355932920871936e-06, "loss": 0.3593, "step": 43385 }, { "epoch": 1.9910054609701253, "grad_norm": 0.44698846340179443, "learning_rate": 2.6353772534612147e-06, "loss": 0.3361, "step": 43386 }, { "epoch": 1.9910513514753796, "grad_norm": 0.4638999104499817, "learning_rate": 2.6351612205214078e-06, "loss": 0.277, "step": 43387 }, { "epoch": 1.991097241980634, "grad_norm": 0.41555649042129517, "learning_rate": 2.634945193268289e-06, "loss": 0.2616, "step": 43388 }, { "epoch": 1.9911431324858886, "grad_norm": 0.4532291889190674, "learning_rate": 2.634729171702384e-06, "loss": 0.2867, "step": 43389 }, { "epoch": 1.991189022991143, "grad_norm": 0.4801202416419983, "learning_rate": 2.6345131558242074e-06, "loss": 0.3755, "step": 43390 }, { "epoch": 1.9912349134963976, "grad_norm": 0.456535279750824, "learning_rate": 2.6342971456342782e-06, "loss": 0.375, "step": 43391 }, { "epoch": 1.991280804001652, "grad_norm": 0.4986515939235687, "learning_rate": 2.63408114113312e-06, "loss": 0.3966, "step": 43392 }, { "epoch": 1.9913266945069066, "grad_norm": 0.46540403366088867, "learning_rate": 2.6338651423212502e-06, "loss": 0.3744, "step": 43393 }, { "epoch": 1.991372585012161, "grad_norm": 0.46242424845695496, "learning_rate": 2.633649149199188e-06, "loss": 0.3524, "step": 43394 }, { "epoch": 1.9914184755174156, "grad_norm": 0.5062050819396973, "learning_rate": 2.6334331617674525e-06, "loss": 0.4207, "step": 43395 }, { "epoch": 1.99146436602267, "grad_norm": 0.4778480529785156, "learning_rate": 2.633217180026564e-06, "loss": 0.3561, "step": 43396 }, { "epoch": 1.9915102565279243, "grad_norm": 0.45920810103416443, "learning_rate": 2.633001203977038e-06, "loss": 0.2934, "step": 43397 }, { "epoch": 1.9915561470331788, "grad_norm": 0.4391583204269409, "learning_rate": 2.6327852336193997e-06, "loss": 0.304, "step": 43398 }, { "epoch": 1.9916020375384333, "grad_norm": 0.4553906321525574, "learning_rate": 2.632569268954166e-06, "loss": 0.3593, "step": 43399 }, { "epoch": 1.9916479280436876, "grad_norm": 0.6995207667350769, "learning_rate": 2.6323533099818534e-06, "loss": 0.3767, "step": 43400 }, { "epoch": 1.9916938185489421, "grad_norm": 0.4738048017024994, "learning_rate": 2.6321373567029863e-06, "loss": 0.3794, "step": 43401 }, { "epoch": 1.9917397090541966, "grad_norm": 0.4616584777832031, "learning_rate": 2.6319214091180817e-06, "loss": 0.2895, "step": 43402 }, { "epoch": 1.9917855995594511, "grad_norm": 0.47007960081100464, "learning_rate": 2.631705467227659e-06, "loss": 0.2875, "step": 43403 }, { "epoch": 1.9918314900647056, "grad_norm": 0.4791438579559326, "learning_rate": 2.6314895310322363e-06, "loss": 0.3611, "step": 43404 }, { "epoch": 1.99187738056996, "grad_norm": 0.5257573127746582, "learning_rate": 2.6312736005323336e-06, "loss": 0.489, "step": 43405 }, { "epoch": 1.9919232710752146, "grad_norm": 0.45210495591163635, "learning_rate": 2.631057675728469e-06, "loss": 0.3198, "step": 43406 }, { "epoch": 1.991969161580469, "grad_norm": 0.4715217053890228, "learning_rate": 2.6308417566211654e-06, "loss": 0.4221, "step": 43407 }, { "epoch": 1.9920150520857236, "grad_norm": 0.47869932651519775, "learning_rate": 2.630625843210939e-06, "loss": 0.4044, "step": 43408 }, { "epoch": 1.9920609425909779, "grad_norm": 0.48531776666641235, "learning_rate": 2.6304099354983078e-06, "loss": 0.399, "step": 43409 }, { "epoch": 1.9921068330962324, "grad_norm": 0.4626838266849518, "learning_rate": 2.6301940334837944e-06, "loss": 0.3719, "step": 43410 }, { "epoch": 1.9921527236014869, "grad_norm": 0.48710039258003235, "learning_rate": 2.6299781371679167e-06, "loss": 0.3819, "step": 43411 }, { "epoch": 1.9921986141067412, "grad_norm": 0.4521794617176056, "learning_rate": 2.6297622465511917e-06, "loss": 0.3386, "step": 43412 }, { "epoch": 1.9922445046119956, "grad_norm": 0.45772430300712585, "learning_rate": 2.6295463616341426e-06, "loss": 0.3391, "step": 43413 }, { "epoch": 1.9922903951172501, "grad_norm": 0.41179922223091125, "learning_rate": 2.629330482417286e-06, "loss": 0.2784, "step": 43414 }, { "epoch": 1.9923362856225046, "grad_norm": 0.4537340998649597, "learning_rate": 2.6291146089011423e-06, "loss": 0.3486, "step": 43415 }, { "epoch": 1.9923821761277591, "grad_norm": 0.4797387719154358, "learning_rate": 2.6288987410862287e-06, "loss": 0.4118, "step": 43416 }, { "epoch": 1.9924280666330136, "grad_norm": 0.541167140007019, "learning_rate": 2.628682878973066e-06, "loss": 0.4511, "step": 43417 }, { "epoch": 1.9924739571382681, "grad_norm": 0.4526997208595276, "learning_rate": 2.628467022562171e-06, "loss": 0.3294, "step": 43418 }, { "epoch": 1.9925198476435226, "grad_norm": 0.5251675248146057, "learning_rate": 2.6282511718540636e-06, "loss": 0.4303, "step": 43419 }, { "epoch": 1.9925657381487771, "grad_norm": 0.47006529569625854, "learning_rate": 2.6280353268492654e-06, "loss": 0.3575, "step": 43420 }, { "epoch": 1.9926116286540316, "grad_norm": 0.4677884876728058, "learning_rate": 2.6278194875482943e-06, "loss": 0.407, "step": 43421 }, { "epoch": 1.992657519159286, "grad_norm": 0.4622874855995178, "learning_rate": 2.6276036539516657e-06, "loss": 0.3725, "step": 43422 }, { "epoch": 1.9927034096645404, "grad_norm": 0.4669547975063324, "learning_rate": 2.627387826059904e-06, "loss": 0.3081, "step": 43423 }, { "epoch": 1.992749300169795, "grad_norm": 0.4693675637245178, "learning_rate": 2.6271720038735254e-06, "loss": 0.369, "step": 43424 }, { "epoch": 1.9927951906750492, "grad_norm": 0.4572746753692627, "learning_rate": 2.6269561873930495e-06, "loss": 0.2796, "step": 43425 }, { "epoch": 1.9928410811803037, "grad_norm": 0.510162353515625, "learning_rate": 2.626740376618995e-06, "loss": 0.4147, "step": 43426 }, { "epoch": 1.9928869716855582, "grad_norm": 0.4567500352859497, "learning_rate": 2.626524571551881e-06, "loss": 0.3479, "step": 43427 }, { "epoch": 1.9929328621908127, "grad_norm": 0.4552430510520935, "learning_rate": 2.626308772192224e-06, "loss": 0.361, "step": 43428 }, { "epoch": 1.9929787526960672, "grad_norm": 0.4946906268596649, "learning_rate": 2.6260929785405475e-06, "loss": 0.3679, "step": 43429 }, { "epoch": 1.9930246432013217, "grad_norm": 0.4993841350078583, "learning_rate": 2.625877190597368e-06, "loss": 0.3721, "step": 43430 }, { "epoch": 1.9930705337065762, "grad_norm": 0.46443378925323486, "learning_rate": 2.6256614083632025e-06, "loss": 0.3761, "step": 43431 }, { "epoch": 1.9931164242118307, "grad_norm": 0.4553373157978058, "learning_rate": 2.6254456318385744e-06, "loss": 0.3345, "step": 43432 }, { "epoch": 1.9931623147170852, "grad_norm": 0.4568401873111725, "learning_rate": 2.625229861024e-06, "loss": 0.3452, "step": 43433 }, { "epoch": 1.9932082052223397, "grad_norm": 0.4471507966518402, "learning_rate": 2.625014095919996e-06, "loss": 0.3248, "step": 43434 }, { "epoch": 1.993254095727594, "grad_norm": 0.42721685767173767, "learning_rate": 2.624798336527086e-06, "loss": 0.322, "step": 43435 }, { "epoch": 1.9932999862328484, "grad_norm": 0.4957304894924164, "learning_rate": 2.624582582845786e-06, "loss": 0.3853, "step": 43436 }, { "epoch": 1.993345876738103, "grad_norm": 0.4773694574832916, "learning_rate": 2.6243668348766147e-06, "loss": 0.4047, "step": 43437 }, { "epoch": 1.9933917672433572, "grad_norm": 0.44989436864852905, "learning_rate": 2.624151092620092e-06, "loss": 0.3398, "step": 43438 }, { "epoch": 1.9934376577486117, "grad_norm": 0.45981255173683167, "learning_rate": 2.623935356076736e-06, "loss": 0.3559, "step": 43439 }, { "epoch": 1.9934835482538662, "grad_norm": 0.47435519099235535, "learning_rate": 2.6237196252470637e-06, "loss": 0.3368, "step": 43440 }, { "epoch": 1.9935294387591207, "grad_norm": 0.49050796031951904, "learning_rate": 2.6235039001315977e-06, "loss": 0.4256, "step": 43441 }, { "epoch": 1.9935753292643752, "grad_norm": 0.4807266592979431, "learning_rate": 2.623288180730854e-06, "loss": 0.3511, "step": 43442 }, { "epoch": 1.9936212197696297, "grad_norm": 0.5053434371948242, "learning_rate": 2.623072467045351e-06, "loss": 0.3995, "step": 43443 }, { "epoch": 1.9936671102748842, "grad_norm": 0.43278032541275024, "learning_rate": 2.6228567590756103e-06, "loss": 0.3049, "step": 43444 }, { "epoch": 1.9937130007801387, "grad_norm": 0.49170348048210144, "learning_rate": 2.6226410568221488e-06, "loss": 0.3861, "step": 43445 }, { "epoch": 1.9937588912853932, "grad_norm": 0.43277403712272644, "learning_rate": 2.6224253602854854e-06, "loss": 0.3047, "step": 43446 }, { "epoch": 1.9938047817906475, "grad_norm": 0.5038232207298279, "learning_rate": 2.6222096694661386e-06, "loss": 0.3919, "step": 43447 }, { "epoch": 1.993850672295902, "grad_norm": 0.45835354924201965, "learning_rate": 2.6219939843646268e-06, "loss": 0.3982, "step": 43448 }, { "epoch": 1.9938965628011565, "grad_norm": 0.49463650584220886, "learning_rate": 2.6217783049814695e-06, "loss": 0.3655, "step": 43449 }, { "epoch": 1.993942453306411, "grad_norm": 0.4506600797176361, "learning_rate": 2.621562631317182e-06, "loss": 0.2983, "step": 43450 }, { "epoch": 1.9939883438116652, "grad_norm": 0.4609457850456238, "learning_rate": 2.6213469633722883e-06, "loss": 0.3143, "step": 43451 }, { "epoch": 1.9940342343169197, "grad_norm": 0.481280654668808, "learning_rate": 2.6211313011473042e-06, "loss": 0.31, "step": 43452 }, { "epoch": 1.9940801248221742, "grad_norm": 0.48059970140457153, "learning_rate": 2.6209156446427466e-06, "loss": 0.3431, "step": 43453 }, { "epoch": 1.9941260153274287, "grad_norm": 0.4950662851333618, "learning_rate": 2.6206999938591384e-06, "loss": 0.3938, "step": 43454 }, { "epoch": 1.9941719058326832, "grad_norm": 0.48106417059898376, "learning_rate": 2.620484348796995e-06, "loss": 0.3613, "step": 43455 }, { "epoch": 1.9942177963379377, "grad_norm": 0.45322123169898987, "learning_rate": 2.6202687094568345e-06, "loss": 0.3476, "step": 43456 }, { "epoch": 1.9942636868431922, "grad_norm": 0.445407897233963, "learning_rate": 2.6200530758391782e-06, "loss": 0.3112, "step": 43457 }, { "epoch": 1.9943095773484467, "grad_norm": 0.48476442694664, "learning_rate": 2.6198374479445433e-06, "loss": 0.3959, "step": 43458 }, { "epoch": 1.9943554678537012, "grad_norm": 0.4721941649913788, "learning_rate": 2.619621825773448e-06, "loss": 0.4006, "step": 43459 }, { "epoch": 1.9944013583589555, "grad_norm": 0.4715723395347595, "learning_rate": 2.6194062093264108e-06, "loss": 0.3599, "step": 43460 }, { "epoch": 1.99444724886421, "grad_norm": 0.48683395981788635, "learning_rate": 2.619190598603951e-06, "loss": 0.3987, "step": 43461 }, { "epoch": 1.9944931393694645, "grad_norm": 0.42631796002388, "learning_rate": 2.6189749936065832e-06, "loss": 0.2589, "step": 43462 }, { "epoch": 1.9945390298747188, "grad_norm": 0.47948041558265686, "learning_rate": 2.6187593943348322e-06, "loss": 0.4098, "step": 43463 }, { "epoch": 1.9945849203799733, "grad_norm": 0.48881423473358154, "learning_rate": 2.618543800789213e-06, "loss": 0.4405, "step": 43464 }, { "epoch": 1.9946308108852278, "grad_norm": 0.4477350115776062, "learning_rate": 2.6183282129702426e-06, "loss": 0.3176, "step": 43465 }, { "epoch": 1.9946767013904823, "grad_norm": 0.530788004398346, "learning_rate": 2.618112630878443e-06, "loss": 0.4829, "step": 43466 }, { "epoch": 1.9947225918957368, "grad_norm": 0.49737969040870667, "learning_rate": 2.61789705451433e-06, "loss": 0.349, "step": 43467 }, { "epoch": 1.9947684824009912, "grad_norm": 0.4586632251739502, "learning_rate": 2.6176814838784238e-06, "loss": 0.3066, "step": 43468 }, { "epoch": 1.9948143729062457, "grad_norm": 0.48012715578079224, "learning_rate": 2.6174659189712416e-06, "loss": 0.3803, "step": 43469 }, { "epoch": 1.9948602634115002, "grad_norm": 0.5049107670783997, "learning_rate": 2.6172503597933014e-06, "loss": 0.3554, "step": 43470 }, { "epoch": 1.9949061539167547, "grad_norm": 0.4434470534324646, "learning_rate": 2.61703480634512e-06, "loss": 0.2907, "step": 43471 }, { "epoch": 1.9949520444220092, "grad_norm": 0.4836183786392212, "learning_rate": 2.61681925862722e-06, "loss": 0.3841, "step": 43472 }, { "epoch": 1.9949979349272635, "grad_norm": 0.47668492794036865, "learning_rate": 2.616603716640118e-06, "loss": 0.3858, "step": 43473 }, { "epoch": 1.995043825432518, "grad_norm": 0.4343286156654358, "learning_rate": 2.6163881803843293e-06, "loss": 0.2976, "step": 43474 }, { "epoch": 1.9950897159377725, "grad_norm": 0.46433010697364807, "learning_rate": 2.6161726498603773e-06, "loss": 0.3755, "step": 43475 }, { "epoch": 1.9951356064430268, "grad_norm": 0.48212745785713196, "learning_rate": 2.6159571250687776e-06, "loss": 0.3658, "step": 43476 }, { "epoch": 1.9951814969482813, "grad_norm": 0.4511442482471466, "learning_rate": 2.615741606010046e-06, "loss": 0.3159, "step": 43477 }, { "epoch": 1.9952273874535358, "grad_norm": 0.4912593960762024, "learning_rate": 2.615526092684706e-06, "loss": 0.3708, "step": 43478 }, { "epoch": 1.9952732779587903, "grad_norm": 0.4846009910106659, "learning_rate": 2.6153105850932727e-06, "loss": 0.3444, "step": 43479 }, { "epoch": 1.9953191684640448, "grad_norm": 0.47422948479652405, "learning_rate": 2.6150950832362652e-06, "loss": 0.373, "step": 43480 }, { "epoch": 1.9953650589692993, "grad_norm": 0.46811941266059875, "learning_rate": 2.6148795871142007e-06, "loss": 0.3504, "step": 43481 }, { "epoch": 1.9954109494745538, "grad_norm": 0.4349003732204437, "learning_rate": 2.6146640967275987e-06, "loss": 0.3015, "step": 43482 }, { "epoch": 1.9954568399798083, "grad_norm": 0.45451727509498596, "learning_rate": 2.6144486120769763e-06, "loss": 0.3226, "step": 43483 }, { "epoch": 1.9955027304850628, "grad_norm": 0.4717479646205902, "learning_rate": 2.6142331331628503e-06, "loss": 0.4057, "step": 43484 }, { "epoch": 1.9955486209903173, "grad_norm": 0.49039867520332336, "learning_rate": 2.6140176599857427e-06, "loss": 0.3888, "step": 43485 }, { "epoch": 1.9955945114955715, "grad_norm": 0.4888279139995575, "learning_rate": 2.61380219254617e-06, "loss": 0.3511, "step": 43486 }, { "epoch": 1.995640402000826, "grad_norm": 0.48961466550827026, "learning_rate": 2.613586730844647e-06, "loss": 0.4077, "step": 43487 }, { "epoch": 1.9956862925060805, "grad_norm": 0.5120751261711121, "learning_rate": 2.6133712748816974e-06, "loss": 0.4465, "step": 43488 }, { "epoch": 1.9957321830113348, "grad_norm": 0.4331657588481903, "learning_rate": 2.613155824657836e-06, "loss": 0.308, "step": 43489 }, { "epoch": 1.9957780735165893, "grad_norm": 0.47769105434417725, "learning_rate": 2.612940380173582e-06, "loss": 0.3384, "step": 43490 }, { "epoch": 1.9958239640218438, "grad_norm": 0.48808202147483826, "learning_rate": 2.612724941429453e-06, "loss": 0.3737, "step": 43491 }, { "epoch": 1.9958698545270983, "grad_norm": 0.44576549530029297, "learning_rate": 2.6125095084259667e-06, "loss": 0.3172, "step": 43492 }, { "epoch": 1.9959157450323528, "grad_norm": 0.47165411710739136, "learning_rate": 2.61229408116364e-06, "loss": 0.356, "step": 43493 }, { "epoch": 1.9959616355376073, "grad_norm": 0.4942033588886261, "learning_rate": 2.6120786596429947e-06, "loss": 0.3752, "step": 43494 }, { "epoch": 1.9960075260428618, "grad_norm": 0.4608750641345978, "learning_rate": 2.611863243864546e-06, "loss": 0.3586, "step": 43495 }, { "epoch": 1.9960534165481163, "grad_norm": 0.4616643786430359, "learning_rate": 2.611647833828811e-06, "loss": 0.3629, "step": 43496 }, { "epoch": 1.9960993070533708, "grad_norm": 0.5115818977355957, "learning_rate": 2.6114324295363102e-06, "loss": 0.3879, "step": 43497 }, { "epoch": 1.996145197558625, "grad_norm": 0.4367506802082062, "learning_rate": 2.611217030987562e-06, "loss": 0.3051, "step": 43498 }, { "epoch": 1.9961910880638796, "grad_norm": 0.4718990921974182, "learning_rate": 2.6110016381830793e-06, "loss": 0.3518, "step": 43499 }, { "epoch": 1.996236978569134, "grad_norm": 0.44062480330467224, "learning_rate": 2.6107862511233867e-06, "loss": 0.2815, "step": 43500 }, { "epoch": 1.9962828690743883, "grad_norm": 0.4524686634540558, "learning_rate": 2.6105708698089988e-06, "loss": 0.3264, "step": 43501 }, { "epoch": 1.9963287595796428, "grad_norm": 0.4581872522830963, "learning_rate": 2.6103554942404343e-06, "loss": 0.3202, "step": 43502 }, { "epoch": 1.9963746500848973, "grad_norm": 0.48497748374938965, "learning_rate": 2.61014012441821e-06, "loss": 0.3565, "step": 43503 }, { "epoch": 1.9964205405901518, "grad_norm": 0.4639185965061188, "learning_rate": 2.6099247603428444e-06, "loss": 0.3632, "step": 43504 }, { "epoch": 1.9964664310954063, "grad_norm": 0.4519108831882477, "learning_rate": 2.6097094020148538e-06, "loss": 0.3049, "step": 43505 }, { "epoch": 1.9965123216006608, "grad_norm": 0.4374118447303772, "learning_rate": 2.6094940494347596e-06, "loss": 0.3298, "step": 43506 }, { "epoch": 1.9965582121059153, "grad_norm": 0.47617262601852417, "learning_rate": 2.6092787026030775e-06, "loss": 0.3561, "step": 43507 }, { "epoch": 1.9966041026111698, "grad_norm": 0.4675796627998352, "learning_rate": 2.6090633615203244e-06, "loss": 0.3632, "step": 43508 }, { "epoch": 1.9966499931164243, "grad_norm": 0.4834240674972534, "learning_rate": 2.60884802618702e-06, "loss": 0.3542, "step": 43509 }, { "epoch": 1.9966958836216788, "grad_norm": 0.49119311571121216, "learning_rate": 2.6086326966036824e-06, "loss": 0.322, "step": 43510 }, { "epoch": 1.996741774126933, "grad_norm": 0.465612530708313, "learning_rate": 2.608417372770826e-06, "loss": 0.3572, "step": 43511 }, { "epoch": 1.9967876646321876, "grad_norm": 0.48576590418815613, "learning_rate": 2.6082020546889752e-06, "loss": 0.4164, "step": 43512 }, { "epoch": 1.996833555137442, "grad_norm": 0.49535486102104187, "learning_rate": 2.607986742358642e-06, "loss": 0.4019, "step": 43513 }, { "epoch": 1.9968794456426964, "grad_norm": 0.4790431261062622, "learning_rate": 2.6077714357803453e-06, "loss": 0.3295, "step": 43514 }, { "epoch": 1.9969253361479509, "grad_norm": 0.5595104098320007, "learning_rate": 2.607556134954601e-06, "loss": 0.4117, "step": 43515 }, { "epoch": 1.9969712266532054, "grad_norm": 0.43628984689712524, "learning_rate": 2.6073408398819316e-06, "loss": 0.3606, "step": 43516 }, { "epoch": 1.9970171171584599, "grad_norm": 0.4653662145137787, "learning_rate": 2.6071255505628523e-06, "loss": 0.3184, "step": 43517 }, { "epoch": 1.9970630076637144, "grad_norm": 0.45600298047065735, "learning_rate": 2.6069102669978787e-06, "loss": 0.3538, "step": 43518 }, { "epoch": 1.9971088981689689, "grad_norm": 0.509855329990387, "learning_rate": 2.6066949891875326e-06, "loss": 0.4693, "step": 43519 }, { "epoch": 1.9971547886742234, "grad_norm": 0.4904026985168457, "learning_rate": 2.60647971713233e-06, "loss": 0.4098, "step": 43520 }, { "epoch": 1.9972006791794779, "grad_norm": 0.47173231840133667, "learning_rate": 2.6062644508327862e-06, "loss": 0.311, "step": 43521 }, { "epoch": 1.9972465696847324, "grad_norm": 0.4544871151447296, "learning_rate": 2.6060491902894223e-06, "loss": 0.3138, "step": 43522 }, { "epoch": 1.9972924601899869, "grad_norm": 0.49053242802619934, "learning_rate": 2.605833935502755e-06, "loss": 0.3402, "step": 43523 }, { "epoch": 1.9973383506952411, "grad_norm": 0.4452246427536011, "learning_rate": 2.605618686473301e-06, "loss": 0.3184, "step": 43524 }, { "epoch": 1.9973842412004956, "grad_norm": 0.47043755650520325, "learning_rate": 2.6054034432015794e-06, "loss": 0.3605, "step": 43525 }, { "epoch": 1.9974301317057501, "grad_norm": 0.4630054235458374, "learning_rate": 2.6051882056881062e-06, "loss": 0.3691, "step": 43526 }, { "epoch": 1.9974760222110044, "grad_norm": 0.4433959424495697, "learning_rate": 2.6049729739333974e-06, "loss": 0.3093, "step": 43527 }, { "epoch": 1.997521912716259, "grad_norm": 0.47992223501205444, "learning_rate": 2.604757747937975e-06, "loss": 0.3347, "step": 43528 }, { "epoch": 1.9975678032215134, "grad_norm": 0.4607185423374176, "learning_rate": 2.604542527702354e-06, "loss": 0.3418, "step": 43529 }, { "epoch": 1.997613693726768, "grad_norm": 0.449396014213562, "learning_rate": 2.60432731322705e-06, "loss": 0.3384, "step": 43530 }, { "epoch": 1.9976595842320224, "grad_norm": 0.45522579550743103, "learning_rate": 2.604112104512585e-06, "loss": 0.2932, "step": 43531 }, { "epoch": 1.9977054747372769, "grad_norm": 0.4637390077114105, "learning_rate": 2.603896901559474e-06, "loss": 0.3068, "step": 43532 }, { "epoch": 1.9977513652425314, "grad_norm": 0.4483066499233246, "learning_rate": 2.6036817043682326e-06, "loss": 0.3018, "step": 43533 }, { "epoch": 1.9977972557477859, "grad_norm": 0.45455294847488403, "learning_rate": 2.6034665129393856e-06, "loss": 0.3726, "step": 43534 }, { "epoch": 1.9978431462530404, "grad_norm": 0.47806546092033386, "learning_rate": 2.6032513272734416e-06, "loss": 0.3473, "step": 43535 }, { "epoch": 1.9978890367582947, "grad_norm": 0.49040573835372925, "learning_rate": 2.603036147370921e-06, "loss": 0.3532, "step": 43536 }, { "epoch": 1.9979349272635492, "grad_norm": 0.4696485996246338, "learning_rate": 2.6028209732323433e-06, "loss": 0.3334, "step": 43537 }, { "epoch": 1.9979808177688037, "grad_norm": 0.45687347650527954, "learning_rate": 2.602605804858225e-06, "loss": 0.3422, "step": 43538 }, { "epoch": 1.9980267082740581, "grad_norm": 0.4481918513774872, "learning_rate": 2.6023906422490806e-06, "loss": 0.3017, "step": 43539 }, { "epoch": 1.9980725987793124, "grad_norm": 0.5024559497833252, "learning_rate": 2.6021754854054325e-06, "loss": 0.3967, "step": 43540 }, { "epoch": 1.998118489284567, "grad_norm": 0.42068129777908325, "learning_rate": 2.6019603343277955e-06, "loss": 0.2592, "step": 43541 }, { "epoch": 1.9981643797898214, "grad_norm": 0.4506835639476776, "learning_rate": 2.6017451890166844e-06, "loss": 0.3025, "step": 43542 }, { "epoch": 1.998210270295076, "grad_norm": 0.49793699383735657, "learning_rate": 2.6015300494726216e-06, "loss": 0.379, "step": 43543 }, { "epoch": 1.9982561608003304, "grad_norm": 0.4432838559150696, "learning_rate": 2.6013149156961226e-06, "loss": 0.3096, "step": 43544 }, { "epoch": 1.998302051305585, "grad_norm": 0.4561251699924469, "learning_rate": 2.6010997876877032e-06, "loss": 0.3037, "step": 43545 }, { "epoch": 1.9983479418108394, "grad_norm": 0.4678066372871399, "learning_rate": 2.6008846654478823e-06, "loss": 0.3208, "step": 43546 }, { "epoch": 1.998393832316094, "grad_norm": 0.4909549653530121, "learning_rate": 2.6006695489771766e-06, "loss": 0.3553, "step": 43547 }, { "epoch": 1.9984397228213484, "grad_norm": 0.48084524273872375, "learning_rate": 2.600454438276103e-06, "loss": 0.3903, "step": 43548 }, { "epoch": 1.9984856133266027, "grad_norm": 0.4458681046962738, "learning_rate": 2.600239333345178e-06, "loss": 0.2814, "step": 43549 }, { "epoch": 1.9985315038318572, "grad_norm": 0.44019216299057007, "learning_rate": 2.6000242341849214e-06, "loss": 0.297, "step": 43550 }, { "epoch": 1.9985773943371117, "grad_norm": 0.4973321557044983, "learning_rate": 2.599809140795849e-06, "loss": 0.4253, "step": 43551 }, { "epoch": 1.998623284842366, "grad_norm": 0.4755401015281677, "learning_rate": 2.599594053178476e-06, "loss": 0.3982, "step": 43552 }, { "epoch": 1.9986691753476205, "grad_norm": 0.4840502142906189, "learning_rate": 2.5993789713333245e-06, "loss": 0.404, "step": 43553 }, { "epoch": 1.998715065852875, "grad_norm": 0.45206618309020996, "learning_rate": 2.599163895260908e-06, "loss": 0.3312, "step": 43554 }, { "epoch": 1.9987609563581294, "grad_norm": 0.5010907649993896, "learning_rate": 2.5989488249617435e-06, "loss": 0.3994, "step": 43555 }, { "epoch": 1.998806846863384, "grad_norm": 0.44600456953048706, "learning_rate": 2.5987337604363527e-06, "loss": 0.3006, "step": 43556 }, { "epoch": 1.9988527373686384, "grad_norm": 0.4460684359073639, "learning_rate": 2.598518701685247e-06, "loss": 0.3398, "step": 43557 }, { "epoch": 1.998898627873893, "grad_norm": 0.4644332826137543, "learning_rate": 2.598303648708944e-06, "loss": 0.3096, "step": 43558 }, { "epoch": 1.9989445183791474, "grad_norm": 0.4907805323600769, "learning_rate": 2.5980886015079654e-06, "loss": 0.4468, "step": 43559 }, { "epoch": 1.998990408884402, "grad_norm": 0.45399779081344604, "learning_rate": 2.5978735600828254e-06, "loss": 0.3651, "step": 43560 }, { "epoch": 1.9990362993896564, "grad_norm": 0.4339732825756073, "learning_rate": 2.597658524434039e-06, "loss": 0.3163, "step": 43561 }, { "epoch": 1.9990821898949107, "grad_norm": 0.43160516023635864, "learning_rate": 2.597443494562128e-06, "loss": 0.3009, "step": 43562 }, { "epoch": 1.9991280804001652, "grad_norm": 0.4887740910053253, "learning_rate": 2.597228470467607e-06, "loss": 0.3787, "step": 43563 }, { "epoch": 1.9991739709054197, "grad_norm": 0.4786919355392456, "learning_rate": 2.5970134521509916e-06, "loss": 0.3914, "step": 43564 }, { "epoch": 1.999219861410674, "grad_norm": 0.42965057492256165, "learning_rate": 2.596798439612802e-06, "loss": 0.2872, "step": 43565 }, { "epoch": 1.9992657519159285, "grad_norm": 0.4697248041629791, "learning_rate": 2.5965834328535533e-06, "loss": 0.3275, "step": 43566 }, { "epoch": 1.999311642421183, "grad_norm": 0.4421330392360687, "learning_rate": 2.596368431873764e-06, "loss": 0.3179, "step": 43567 }, { "epoch": 1.9993575329264375, "grad_norm": 0.4759458601474762, "learning_rate": 2.596153436673949e-06, "loss": 0.3525, "step": 43568 }, { "epoch": 1.999403423431692, "grad_norm": 0.45938190817832947, "learning_rate": 2.595938447254627e-06, "loss": 0.3649, "step": 43569 }, { "epoch": 1.9994493139369465, "grad_norm": 0.4524313509464264, "learning_rate": 2.5957234636163114e-06, "loss": 0.3086, "step": 43570 }, { "epoch": 1.999495204442201, "grad_norm": 0.43937116861343384, "learning_rate": 2.595508485759525e-06, "loss": 0.3271, "step": 43571 }, { "epoch": 1.9995410949474555, "grad_norm": 0.4756285846233368, "learning_rate": 2.5952935136847813e-06, "loss": 0.3623, "step": 43572 }, { "epoch": 1.99958698545271, "grad_norm": 0.5210612416267395, "learning_rate": 2.5950785473925953e-06, "loss": 0.4454, "step": 43573 }, { "epoch": 1.9996328759579645, "grad_norm": 0.4370582401752472, "learning_rate": 2.5948635868834894e-06, "loss": 0.2792, "step": 43574 }, { "epoch": 1.9996787664632187, "grad_norm": 0.4702579379081726, "learning_rate": 2.5946486321579766e-06, "loss": 0.3379, "step": 43575 }, { "epoch": 1.9997246569684732, "grad_norm": 0.4482766389846802, "learning_rate": 2.5944336832165736e-06, "loss": 0.3035, "step": 43576 }, { "epoch": 1.9997705474737277, "grad_norm": 0.5091465711593628, "learning_rate": 2.5942187400597996e-06, "loss": 0.4438, "step": 43577 }, { "epoch": 1.999816437978982, "grad_norm": 0.4840451776981354, "learning_rate": 2.5940038026881724e-06, "loss": 0.4279, "step": 43578 }, { "epoch": 1.9998623284842365, "grad_norm": 0.5300149321556091, "learning_rate": 2.593788871102205e-06, "loss": 0.302, "step": 43579 }, { "epoch": 1.999908218989491, "grad_norm": 0.4825488030910492, "learning_rate": 2.5935739453024135e-06, "loss": 0.3597, "step": 43580 }, { "epoch": 1.9999541094947455, "grad_norm": 0.4886365234851837, "learning_rate": 2.5933590252893193e-06, "loss": 0.3945, "step": 43581 }, { "epoch": 2.0, "grad_norm": 0.44774341583251953, "learning_rate": 2.5931441110634375e-06, "loss": 0.286, "step": 43582 }, { "epoch": 2.0000458905052545, "grad_norm": 0.4844712018966675, "learning_rate": 2.5929292026252823e-06, "loss": 0.3938, "step": 43583 }, { "epoch": 2.000091781010509, "grad_norm": 0.4449591040611267, "learning_rate": 2.5927142999753746e-06, "loss": 0.2723, "step": 43584 }, { "epoch": 2.0001376715157635, "grad_norm": 0.4971478283405304, "learning_rate": 2.5924994031142294e-06, "loss": 0.3517, "step": 43585 }, { "epoch": 2.000183562021018, "grad_norm": 0.4722493290901184, "learning_rate": 2.5922845120423613e-06, "loss": 0.3557, "step": 43586 }, { "epoch": 2.0002294525262725, "grad_norm": 0.4680596590042114, "learning_rate": 2.5920696267602907e-06, "loss": 0.2794, "step": 43587 }, { "epoch": 2.000275343031527, "grad_norm": 0.4398815631866455, "learning_rate": 2.591854747268533e-06, "loss": 0.3129, "step": 43588 }, { "epoch": 2.000321233536781, "grad_norm": 0.4684835970401764, "learning_rate": 2.591639873567604e-06, "loss": 0.3361, "step": 43589 }, { "epoch": 2.0003671240420355, "grad_norm": 0.44333454966545105, "learning_rate": 2.5914250056580216e-06, "loss": 0.2702, "step": 43590 }, { "epoch": 2.00041301454729, "grad_norm": 0.4783194661140442, "learning_rate": 2.591210143540302e-06, "loss": 0.3146, "step": 43591 }, { "epoch": 2.0004589050525445, "grad_norm": 0.4561416208744049, "learning_rate": 2.59099528721496e-06, "loss": 0.3159, "step": 43592 }, { "epoch": 2.000504795557799, "grad_norm": 0.4763440191745758, "learning_rate": 2.590780436682515e-06, "loss": 0.3386, "step": 43593 }, { "epoch": 2.0005506860630535, "grad_norm": 0.43661850690841675, "learning_rate": 2.5905655919434834e-06, "loss": 0.2594, "step": 43594 }, { "epoch": 2.000596576568308, "grad_norm": 0.6111595630645752, "learning_rate": 2.590350752998379e-06, "loss": 0.3965, "step": 43595 }, { "epoch": 2.0006424670735625, "grad_norm": 0.4455806612968445, "learning_rate": 2.590135919847723e-06, "loss": 0.2996, "step": 43596 }, { "epoch": 2.000688357578817, "grad_norm": 0.4861047565937042, "learning_rate": 2.589921092492028e-06, "loss": 0.3237, "step": 43597 }, { "epoch": 2.0007342480840715, "grad_norm": 0.4809759557247162, "learning_rate": 2.5897062709318117e-06, "loss": 0.3219, "step": 43598 }, { "epoch": 2.000780138589326, "grad_norm": 0.5701629519462585, "learning_rate": 2.5894914551675926e-06, "loss": 0.287, "step": 43599 }, { "epoch": 2.0008260290945805, "grad_norm": 0.4957137703895569, "learning_rate": 2.5892766451998873e-06, "loss": 0.3111, "step": 43600 }, { "epoch": 2.000871919599835, "grad_norm": 0.5069689750671387, "learning_rate": 2.5890618410292077e-06, "loss": 0.3482, "step": 43601 }, { "epoch": 2.000917810105089, "grad_norm": 0.5015645623207092, "learning_rate": 2.588847042656075e-06, "loss": 0.3287, "step": 43602 }, { "epoch": 2.0009637006103436, "grad_norm": 0.4182235598564148, "learning_rate": 2.588632250081004e-06, "loss": 0.2286, "step": 43603 }, { "epoch": 2.001009591115598, "grad_norm": 0.5139101147651672, "learning_rate": 2.58841746330451e-06, "loss": 0.3965, "step": 43604 }, { "epoch": 2.0010554816208526, "grad_norm": 0.547021746635437, "learning_rate": 2.588202682327112e-06, "loss": 0.4359, "step": 43605 }, { "epoch": 2.001101372126107, "grad_norm": 0.46635201573371887, "learning_rate": 2.5879879071493263e-06, "loss": 0.3207, "step": 43606 }, { "epoch": 2.0011472626313616, "grad_norm": 0.47318413853645325, "learning_rate": 2.587773137771666e-06, "loss": 0.3202, "step": 43607 }, { "epoch": 2.001193153136616, "grad_norm": 0.4387804865837097, "learning_rate": 2.587558374194652e-06, "loss": 0.2565, "step": 43608 }, { "epoch": 2.0012390436418706, "grad_norm": 0.48904040455818176, "learning_rate": 2.587343616418798e-06, "loss": 0.351, "step": 43609 }, { "epoch": 2.001284934147125, "grad_norm": 0.46633121371269226, "learning_rate": 2.5871288644446223e-06, "loss": 0.2942, "step": 43610 }, { "epoch": 2.0013308246523795, "grad_norm": 0.503555417060852, "learning_rate": 2.58691411827264e-06, "loss": 0.3625, "step": 43611 }, { "epoch": 2.001376715157634, "grad_norm": 0.4855799674987793, "learning_rate": 2.5866993779033665e-06, "loss": 0.3491, "step": 43612 }, { "epoch": 2.0014226056628885, "grad_norm": 0.46129336953163147, "learning_rate": 2.5864846433373203e-06, "loss": 0.3108, "step": 43613 }, { "epoch": 2.0014684961681426, "grad_norm": 0.4918213188648224, "learning_rate": 2.5862699145750147e-06, "loss": 0.3753, "step": 43614 }, { "epoch": 2.001514386673397, "grad_norm": 0.5084760785102844, "learning_rate": 2.5860551916169706e-06, "loss": 0.3494, "step": 43615 }, { "epoch": 2.0015602771786516, "grad_norm": 0.49756646156311035, "learning_rate": 2.585840474463701e-06, "loss": 0.3891, "step": 43616 }, { "epoch": 2.001606167683906, "grad_norm": 0.5025991201400757, "learning_rate": 2.5856257631157223e-06, "loss": 0.3863, "step": 43617 }, { "epoch": 2.0016520581891606, "grad_norm": 0.4629007577896118, "learning_rate": 2.5854110575735523e-06, "loss": 0.3063, "step": 43618 }, { "epoch": 2.001697948694415, "grad_norm": 0.4451930522918701, "learning_rate": 2.585196357837707e-06, "loss": 0.2831, "step": 43619 }, { "epoch": 2.0017438391996696, "grad_norm": 0.4724777936935425, "learning_rate": 2.584981663908701e-06, "loss": 0.2993, "step": 43620 }, { "epoch": 2.001789729704924, "grad_norm": 0.5392200946807861, "learning_rate": 2.5847669757870537e-06, "loss": 0.4277, "step": 43621 }, { "epoch": 2.0018356202101786, "grad_norm": 0.4594961702823639, "learning_rate": 2.584552293473281e-06, "loss": 0.2804, "step": 43622 }, { "epoch": 2.001881510715433, "grad_norm": 0.4628780782222748, "learning_rate": 2.584337616967894e-06, "loss": 0.3038, "step": 43623 }, { "epoch": 2.0019274012206876, "grad_norm": 0.4215259253978729, "learning_rate": 2.5841229462714147e-06, "loss": 0.2266, "step": 43624 }, { "epoch": 2.001973291725942, "grad_norm": 0.4617981016635895, "learning_rate": 2.5839082813843575e-06, "loss": 0.3087, "step": 43625 }, { "epoch": 2.0020191822311966, "grad_norm": 0.5508463382720947, "learning_rate": 2.583693622307236e-06, "loss": 0.4324, "step": 43626 }, { "epoch": 2.0020650727364506, "grad_norm": 0.46040359139442444, "learning_rate": 2.5834789690405705e-06, "loss": 0.3022, "step": 43627 }, { "epoch": 2.002110963241705, "grad_norm": 0.4900420308113098, "learning_rate": 2.583264321584876e-06, "loss": 0.3211, "step": 43628 }, { "epoch": 2.0021568537469596, "grad_norm": 0.49306097626686096, "learning_rate": 2.5830496799406656e-06, "loss": 0.407, "step": 43629 }, { "epoch": 2.002202744252214, "grad_norm": 0.4392761290073395, "learning_rate": 2.5828350441084597e-06, "loss": 0.3163, "step": 43630 }, { "epoch": 2.0022486347574686, "grad_norm": 0.4424388110637665, "learning_rate": 2.5826204140887735e-06, "loss": 0.2776, "step": 43631 }, { "epoch": 2.002294525262723, "grad_norm": 0.5118333697319031, "learning_rate": 2.5824057898821215e-06, "loss": 0.3852, "step": 43632 }, { "epoch": 2.0023404157679776, "grad_norm": 0.4602487087249756, "learning_rate": 2.582191171489021e-06, "loss": 0.299, "step": 43633 }, { "epoch": 2.002386306273232, "grad_norm": 0.43042048811912537, "learning_rate": 2.5819765589099873e-06, "loss": 0.2499, "step": 43634 }, { "epoch": 2.0024321967784866, "grad_norm": 0.49058640003204346, "learning_rate": 2.581761952145535e-06, "loss": 0.3622, "step": 43635 }, { "epoch": 2.002478087283741, "grad_norm": 0.47918495535850525, "learning_rate": 2.581547351196184e-06, "loss": 0.3376, "step": 43636 }, { "epoch": 2.0025239777889956, "grad_norm": 0.42966604232788086, "learning_rate": 2.581332756062448e-06, "loss": 0.242, "step": 43637 }, { "epoch": 2.00256986829425, "grad_norm": 0.4447372257709503, "learning_rate": 2.5811181667448416e-06, "loss": 0.3097, "step": 43638 }, { "epoch": 2.0026157587995046, "grad_norm": 0.4536151587963104, "learning_rate": 2.580903583243885e-06, "loss": 0.277, "step": 43639 }, { "epoch": 2.0026616493047587, "grad_norm": 0.4750029444694519, "learning_rate": 2.5806890055600916e-06, "loss": 0.2836, "step": 43640 }, { "epoch": 2.002707539810013, "grad_norm": 0.4889214336872101, "learning_rate": 2.5804744336939754e-06, "loss": 0.293, "step": 43641 }, { "epoch": 2.0027534303152676, "grad_norm": 0.4424329996109009, "learning_rate": 2.5802598676460565e-06, "loss": 0.2695, "step": 43642 }, { "epoch": 2.002799320820522, "grad_norm": 0.5081387758255005, "learning_rate": 2.5800453074168486e-06, "loss": 0.3827, "step": 43643 }, { "epoch": 2.0028452113257766, "grad_norm": 0.4818970263004303, "learning_rate": 2.5798307530068706e-06, "loss": 0.3247, "step": 43644 }, { "epoch": 2.002891101831031, "grad_norm": 0.45791763067245483, "learning_rate": 2.5796162044166315e-06, "loss": 0.3392, "step": 43645 }, { "epoch": 2.0029369923362856, "grad_norm": 0.5072755217552185, "learning_rate": 2.579401661646654e-06, "loss": 0.3315, "step": 43646 }, { "epoch": 2.00298288284154, "grad_norm": 0.5062328577041626, "learning_rate": 2.579187124697451e-06, "loss": 0.3721, "step": 43647 }, { "epoch": 2.0030287733467946, "grad_norm": 0.49300679564476013, "learning_rate": 2.578972593569537e-06, "loss": 0.3243, "step": 43648 }, { "epoch": 2.003074663852049, "grad_norm": 0.4685254991054535, "learning_rate": 2.5787580682634316e-06, "loss": 0.2989, "step": 43649 }, { "epoch": 2.0031205543573036, "grad_norm": 0.46845701336860657, "learning_rate": 2.5785435487796493e-06, "loss": 0.3075, "step": 43650 }, { "epoch": 2.003166444862558, "grad_norm": 0.48184698820114136, "learning_rate": 2.578329035118704e-06, "loss": 0.3277, "step": 43651 }, { "epoch": 2.003212335367812, "grad_norm": 0.48065608739852905, "learning_rate": 2.5781145272811144e-06, "loss": 0.3424, "step": 43652 }, { "epoch": 2.0032582258730667, "grad_norm": 0.49073874950408936, "learning_rate": 2.5779000252673946e-06, "loss": 0.3505, "step": 43653 }, { "epoch": 2.003304116378321, "grad_norm": 0.45567062497138977, "learning_rate": 2.5776855290780615e-06, "loss": 0.3347, "step": 43654 }, { "epoch": 2.0033500068835757, "grad_norm": 0.5064171552658081, "learning_rate": 2.5774710387136304e-06, "loss": 0.3519, "step": 43655 }, { "epoch": 2.00339589738883, "grad_norm": 0.5021344423294067, "learning_rate": 2.5772565541746163e-06, "loss": 0.3352, "step": 43656 }, { "epoch": 2.0034417878940847, "grad_norm": 0.5424817800521851, "learning_rate": 2.5770420754615338e-06, "loss": 0.3763, "step": 43657 }, { "epoch": 2.003487678399339, "grad_norm": 0.5128730535507202, "learning_rate": 2.5768276025749024e-06, "loss": 0.2587, "step": 43658 }, { "epoch": 2.0035335689045937, "grad_norm": 0.5240526795387268, "learning_rate": 2.5766131355152357e-06, "loss": 0.4059, "step": 43659 }, { "epoch": 2.003579459409848, "grad_norm": 0.49225014448165894, "learning_rate": 2.5763986742830476e-06, "loss": 0.3222, "step": 43660 }, { "epoch": 2.0036253499151027, "grad_norm": 0.4729706645011902, "learning_rate": 2.576184218878858e-06, "loss": 0.2936, "step": 43661 }, { "epoch": 2.003671240420357, "grad_norm": 0.4805581569671631, "learning_rate": 2.5759697693031797e-06, "loss": 0.319, "step": 43662 }, { "epoch": 2.0037171309256117, "grad_norm": 0.48805731534957886, "learning_rate": 2.575755325556528e-06, "loss": 0.3031, "step": 43663 }, { "epoch": 2.003763021430866, "grad_norm": 0.50248122215271, "learning_rate": 2.5755408876394213e-06, "loss": 0.3661, "step": 43664 }, { "epoch": 2.00380891193612, "grad_norm": 0.4822775721549988, "learning_rate": 2.575326455552373e-06, "loss": 0.3771, "step": 43665 }, { "epoch": 2.0038548024413747, "grad_norm": 0.5054476857185364, "learning_rate": 2.5751120292958996e-06, "loss": 0.3874, "step": 43666 }, { "epoch": 2.003900692946629, "grad_norm": 0.4908200800418854, "learning_rate": 2.574897608870517e-06, "loss": 0.3219, "step": 43667 }, { "epoch": 2.0039465834518837, "grad_norm": 0.4589777886867523, "learning_rate": 2.5746831942767396e-06, "loss": 0.2906, "step": 43668 }, { "epoch": 2.003992473957138, "grad_norm": 0.5172182321548462, "learning_rate": 2.5744687855150823e-06, "loss": 0.333, "step": 43669 }, { "epoch": 2.0040383644623927, "grad_norm": 0.4623895585536957, "learning_rate": 2.5742543825860633e-06, "loss": 0.2911, "step": 43670 }, { "epoch": 2.004084254967647, "grad_norm": 0.4870242476463318, "learning_rate": 2.574039985490198e-06, "loss": 0.3433, "step": 43671 }, { "epoch": 2.0041301454729017, "grad_norm": 0.4880375564098358, "learning_rate": 2.5738255942279977e-06, "loss": 0.3223, "step": 43672 }, { "epoch": 2.004176035978156, "grad_norm": 0.6126068830490112, "learning_rate": 2.5736112087999843e-06, "loss": 0.3578, "step": 43673 }, { "epoch": 2.0042219264834107, "grad_norm": 0.462011456489563, "learning_rate": 2.5733968292066693e-06, "loss": 0.3496, "step": 43674 }, { "epoch": 2.004267816988665, "grad_norm": 0.4952201843261719, "learning_rate": 2.573182455448569e-06, "loss": 0.397, "step": 43675 }, { "epoch": 2.0043137074939197, "grad_norm": 0.4475078284740448, "learning_rate": 2.572968087526199e-06, "loss": 0.2681, "step": 43676 }, { "epoch": 2.004359597999174, "grad_norm": 0.4546314775943756, "learning_rate": 2.5727537254400755e-06, "loss": 0.2892, "step": 43677 }, { "epoch": 2.0044054885044282, "grad_norm": 0.4949602782726288, "learning_rate": 2.5725393691907123e-06, "loss": 0.3274, "step": 43678 }, { "epoch": 2.0044513790096827, "grad_norm": 0.4624207317829132, "learning_rate": 2.5723250187786243e-06, "loss": 0.2685, "step": 43679 }, { "epoch": 2.0044972695149372, "grad_norm": 0.45165547728538513, "learning_rate": 2.5721106742043305e-06, "loss": 0.291, "step": 43680 }, { "epoch": 2.0045431600201917, "grad_norm": 0.4655570983886719, "learning_rate": 2.5718963354683443e-06, "loss": 0.2668, "step": 43681 }, { "epoch": 2.0045890505254462, "grad_norm": 0.44164353609085083, "learning_rate": 2.5716820025711785e-06, "loss": 0.2778, "step": 43682 }, { "epoch": 2.0046349410307007, "grad_norm": 0.4908074140548706, "learning_rate": 2.5714676755133537e-06, "loss": 0.4017, "step": 43683 }, { "epoch": 2.004680831535955, "grad_norm": 0.4459596276283264, "learning_rate": 2.5712533542953822e-06, "loss": 0.2518, "step": 43684 }, { "epoch": 2.0047267220412097, "grad_norm": 0.4482170641422272, "learning_rate": 2.571039038917779e-06, "loss": 0.2797, "step": 43685 }, { "epoch": 2.004772612546464, "grad_norm": 0.47261425852775574, "learning_rate": 2.5708247293810605e-06, "loss": 0.314, "step": 43686 }, { "epoch": 2.0048185030517187, "grad_norm": 0.4824172556400299, "learning_rate": 2.5706104256857435e-06, "loss": 0.3536, "step": 43687 }, { "epoch": 2.004864393556973, "grad_norm": 0.4346352517604828, "learning_rate": 2.5703961278323408e-06, "loss": 0.229, "step": 43688 }, { "epoch": 2.0049102840622277, "grad_norm": 0.48954832553863525, "learning_rate": 2.570181835821368e-06, "loss": 0.3503, "step": 43689 }, { "epoch": 2.004956174567482, "grad_norm": 0.5062331557273865, "learning_rate": 2.5699675496533422e-06, "loss": 0.3631, "step": 43690 }, { "epoch": 2.0050020650727363, "grad_norm": 0.44990062713623047, "learning_rate": 2.569753269328775e-06, "loss": 0.2617, "step": 43691 }, { "epoch": 2.0050479555779908, "grad_norm": 0.47855308651924133, "learning_rate": 2.5695389948481865e-06, "loss": 0.3125, "step": 43692 }, { "epoch": 2.0050938460832453, "grad_norm": 0.5123343467712402, "learning_rate": 2.5693247262120892e-06, "loss": 0.3704, "step": 43693 }, { "epoch": 2.0051397365884998, "grad_norm": 0.47852420806884766, "learning_rate": 2.5691104634209974e-06, "loss": 0.3341, "step": 43694 }, { "epoch": 2.0051856270937543, "grad_norm": 0.4693133533000946, "learning_rate": 2.5688962064754287e-06, "loss": 0.3347, "step": 43695 }, { "epoch": 2.0052315175990088, "grad_norm": 0.5039023160934448, "learning_rate": 2.568681955375898e-06, "loss": 0.3449, "step": 43696 }, { "epoch": 2.0052774081042632, "grad_norm": 0.46535900235176086, "learning_rate": 2.5684677101229198e-06, "loss": 0.2907, "step": 43697 }, { "epoch": 2.0053232986095177, "grad_norm": 0.5254426002502441, "learning_rate": 2.568253470717009e-06, "loss": 0.3968, "step": 43698 }, { "epoch": 2.0053691891147722, "grad_norm": 0.5210719108581543, "learning_rate": 2.568039237158682e-06, "loss": 0.3821, "step": 43699 }, { "epoch": 2.0054150796200267, "grad_norm": 0.42893901467323303, "learning_rate": 2.5678250094484504e-06, "loss": 0.2279, "step": 43700 }, { "epoch": 2.0054609701252812, "grad_norm": 0.4883652329444885, "learning_rate": 2.5676107875868346e-06, "loss": 0.3264, "step": 43701 }, { "epoch": 2.0055068606305357, "grad_norm": 0.4708270728588104, "learning_rate": 2.5673965715743465e-06, "loss": 0.292, "step": 43702 }, { "epoch": 2.00555275113579, "grad_norm": 0.5141642689704895, "learning_rate": 2.5671823614115e-06, "loss": 0.358, "step": 43703 }, { "epoch": 2.0055986416410443, "grad_norm": 0.4907875955104828, "learning_rate": 2.5669681570988146e-06, "loss": 0.3216, "step": 43704 }, { "epoch": 2.005644532146299, "grad_norm": 0.49331778287887573, "learning_rate": 2.566753958636802e-06, "loss": 0.392, "step": 43705 }, { "epoch": 2.0056904226515533, "grad_norm": 0.4538797438144684, "learning_rate": 2.566539766025977e-06, "loss": 0.2693, "step": 43706 }, { "epoch": 2.005736313156808, "grad_norm": 0.502259373664856, "learning_rate": 2.5663255792668578e-06, "loss": 0.3497, "step": 43707 }, { "epoch": 2.0057822036620623, "grad_norm": 0.4454059600830078, "learning_rate": 2.566111398359957e-06, "loss": 0.2641, "step": 43708 }, { "epoch": 2.0058280941673168, "grad_norm": 0.46292680501937866, "learning_rate": 2.5658972233057904e-06, "loss": 0.2933, "step": 43709 }, { "epoch": 2.0058739846725713, "grad_norm": 0.5193755626678467, "learning_rate": 2.565683054104873e-06, "loss": 0.3277, "step": 43710 }, { "epoch": 2.0059198751778258, "grad_norm": 0.4787512719631195, "learning_rate": 2.565468890757719e-06, "loss": 0.3289, "step": 43711 }, { "epoch": 2.0059657656830803, "grad_norm": 0.44748401641845703, "learning_rate": 2.5652547332648446e-06, "loss": 0.275, "step": 43712 }, { "epoch": 2.0060116561883348, "grad_norm": 0.4969683885574341, "learning_rate": 2.565040581626762e-06, "loss": 0.312, "step": 43713 }, { "epoch": 2.0060575466935893, "grad_norm": 0.46029752492904663, "learning_rate": 2.5648264358439903e-06, "loss": 0.2917, "step": 43714 }, { "epoch": 2.0061034371988438, "grad_norm": 0.4765859842300415, "learning_rate": 2.5646122959170417e-06, "loss": 0.3256, "step": 43715 }, { "epoch": 2.006149327704098, "grad_norm": 0.4484683573246002, "learning_rate": 2.5643981618464307e-06, "loss": 0.2923, "step": 43716 }, { "epoch": 2.0061952182093523, "grad_norm": 0.5032331943511963, "learning_rate": 2.564184033632675e-06, "loss": 0.3017, "step": 43717 }, { "epoch": 2.006241108714607, "grad_norm": 0.4672977030277252, "learning_rate": 2.5639699112762883e-06, "loss": 0.337, "step": 43718 }, { "epoch": 2.0062869992198613, "grad_norm": 0.47363749146461487, "learning_rate": 2.563755794777785e-06, "loss": 0.3519, "step": 43719 }, { "epoch": 2.006332889725116, "grad_norm": 0.5113139748573303, "learning_rate": 2.56354168413768e-06, "loss": 0.3747, "step": 43720 }, { "epoch": 2.0063787802303703, "grad_norm": 0.4829648733139038, "learning_rate": 2.563327579356488e-06, "loss": 0.3398, "step": 43721 }, { "epoch": 2.006424670735625, "grad_norm": 0.4470955431461334, "learning_rate": 2.5631134804347226e-06, "loss": 0.2696, "step": 43722 }, { "epoch": 2.0064705612408793, "grad_norm": 0.47903206944465637, "learning_rate": 2.5628993873729025e-06, "loss": 0.3206, "step": 43723 }, { "epoch": 2.006516451746134, "grad_norm": 0.49464279413223267, "learning_rate": 2.562685300171539e-06, "loss": 0.3506, "step": 43724 }, { "epoch": 2.0065623422513883, "grad_norm": 0.48042237758636475, "learning_rate": 2.5624712188311473e-06, "loss": 0.3053, "step": 43725 }, { "epoch": 2.006608232756643, "grad_norm": 0.5060098767280579, "learning_rate": 2.5622571433522447e-06, "loss": 0.3401, "step": 43726 }, { "epoch": 2.0066541232618973, "grad_norm": 0.46541112661361694, "learning_rate": 2.562043073735344e-06, "loss": 0.3133, "step": 43727 }, { "epoch": 2.006700013767152, "grad_norm": 0.4658122956752777, "learning_rate": 2.5618290099809585e-06, "loss": 0.3233, "step": 43728 }, { "epoch": 2.006745904272406, "grad_norm": 0.45811259746551514, "learning_rate": 2.5616149520896065e-06, "loss": 0.3122, "step": 43729 }, { "epoch": 2.0067917947776603, "grad_norm": 0.4856727421283722, "learning_rate": 2.5614009000618013e-06, "loss": 0.3499, "step": 43730 }, { "epoch": 2.006837685282915, "grad_norm": 0.5311927199363708, "learning_rate": 2.561186853898057e-06, "loss": 0.3399, "step": 43731 }, { "epoch": 2.0068835757881693, "grad_norm": 0.4651277959346771, "learning_rate": 2.560972813598889e-06, "loss": 0.3344, "step": 43732 }, { "epoch": 2.006929466293424, "grad_norm": 0.4807904362678528, "learning_rate": 2.560758779164811e-06, "loss": 0.3016, "step": 43733 }, { "epoch": 2.0069753567986783, "grad_norm": 0.5431494116783142, "learning_rate": 2.5605447505963364e-06, "loss": 0.4372, "step": 43734 }, { "epoch": 2.007021247303933, "grad_norm": 0.48266854882240295, "learning_rate": 2.560330727893985e-06, "loss": 0.2769, "step": 43735 }, { "epoch": 2.0070671378091873, "grad_norm": 0.4945589601993561, "learning_rate": 2.560116711058267e-06, "loss": 0.3028, "step": 43736 }, { "epoch": 2.007113028314442, "grad_norm": 0.47400084137916565, "learning_rate": 2.559902700089696e-06, "loss": 0.3185, "step": 43737 }, { "epoch": 2.0071589188196963, "grad_norm": 0.4820843040943146, "learning_rate": 2.5596886949887916e-06, "loss": 0.2952, "step": 43738 }, { "epoch": 2.007204809324951, "grad_norm": 0.43015116453170776, "learning_rate": 2.559474695756066e-06, "loss": 0.2309, "step": 43739 }, { "epoch": 2.0072506998302053, "grad_norm": 0.490279883146286, "learning_rate": 2.5592607023920324e-06, "loss": 0.3398, "step": 43740 }, { "epoch": 2.0072965903354594, "grad_norm": 0.4648468494415283, "learning_rate": 2.559046714897207e-06, "loss": 0.3128, "step": 43741 }, { "epoch": 2.007342480840714, "grad_norm": 0.48699691891670227, "learning_rate": 2.5588327332721043e-06, "loss": 0.3375, "step": 43742 }, { "epoch": 2.0073883713459684, "grad_norm": 0.4886507987976074, "learning_rate": 2.558618757517238e-06, "loss": 0.322, "step": 43743 }, { "epoch": 2.007434261851223, "grad_norm": 0.5255902409553528, "learning_rate": 2.5584047876331207e-06, "loss": 0.3953, "step": 43744 }, { "epoch": 2.0074801523564774, "grad_norm": 0.46543624997138977, "learning_rate": 2.558190823620272e-06, "loss": 0.2943, "step": 43745 }, { "epoch": 2.007526042861732, "grad_norm": 0.4834055006504059, "learning_rate": 2.557976865479203e-06, "loss": 0.3541, "step": 43746 }, { "epoch": 2.0075719333669864, "grad_norm": 0.4499962627887726, "learning_rate": 2.5577629132104275e-06, "loss": 0.3255, "step": 43747 }, { "epoch": 2.007617823872241, "grad_norm": 0.4805598855018616, "learning_rate": 2.5575489668144633e-06, "loss": 0.3478, "step": 43748 }, { "epoch": 2.0076637143774954, "grad_norm": 0.4580801725387573, "learning_rate": 2.557335026291823e-06, "loss": 0.3005, "step": 43749 }, { "epoch": 2.00770960488275, "grad_norm": 0.4909159541130066, "learning_rate": 2.5571210916430187e-06, "loss": 0.3548, "step": 43750 }, { "epoch": 2.0077554953880044, "grad_norm": 0.4659135341644287, "learning_rate": 2.556907162868569e-06, "loss": 0.2711, "step": 43751 }, { "epoch": 2.007801385893259, "grad_norm": 0.4905852675437927, "learning_rate": 2.5566932399689876e-06, "loss": 0.3449, "step": 43752 }, { "epoch": 2.0078472763985133, "grad_norm": 0.49415165185928345, "learning_rate": 2.556479322944787e-06, "loss": 0.3656, "step": 43753 }, { "epoch": 2.0078931669037674, "grad_norm": 0.4498327672481537, "learning_rate": 2.5562654117964824e-06, "loss": 0.2534, "step": 43754 }, { "epoch": 2.007939057409022, "grad_norm": 0.46590033173561096, "learning_rate": 2.5560515065245883e-06, "loss": 0.341, "step": 43755 }, { "epoch": 2.0079849479142764, "grad_norm": 0.4729540944099426, "learning_rate": 2.5558376071296175e-06, "loss": 0.3385, "step": 43756 }, { "epoch": 2.008030838419531, "grad_norm": 0.5121253728866577, "learning_rate": 2.5556237136120877e-06, "loss": 0.339, "step": 43757 }, { "epoch": 2.0080767289247854, "grad_norm": 0.4590664803981781, "learning_rate": 2.555409825972511e-06, "loss": 0.3299, "step": 43758 }, { "epoch": 2.00812261943004, "grad_norm": 0.4793297052383423, "learning_rate": 2.5551959442114006e-06, "loss": 0.326, "step": 43759 }, { "epoch": 2.0081685099352944, "grad_norm": 0.5160317420959473, "learning_rate": 2.5549820683292737e-06, "loss": 0.2978, "step": 43760 }, { "epoch": 2.008214400440549, "grad_norm": 0.520672082901001, "learning_rate": 2.5547681983266436e-06, "loss": 0.3882, "step": 43761 }, { "epoch": 2.0082602909458034, "grad_norm": 0.4532833397388458, "learning_rate": 2.554554334204024e-06, "loss": 0.2522, "step": 43762 }, { "epoch": 2.008306181451058, "grad_norm": 0.4701540768146515, "learning_rate": 2.5543404759619294e-06, "loss": 0.3156, "step": 43763 }, { "epoch": 2.0083520719563124, "grad_norm": 0.4838799238204956, "learning_rate": 2.5541266236008745e-06, "loss": 0.3093, "step": 43764 }, { "epoch": 2.008397962461567, "grad_norm": 0.48567995429039, "learning_rate": 2.5539127771213712e-06, "loss": 0.3696, "step": 43765 }, { "epoch": 2.0084438529668214, "grad_norm": 0.48994317650794983, "learning_rate": 2.5536989365239375e-06, "loss": 0.3378, "step": 43766 }, { "epoch": 2.0084897434720754, "grad_norm": 0.5031680464744568, "learning_rate": 2.5534851018090857e-06, "loss": 0.3772, "step": 43767 }, { "epoch": 2.00853563397733, "grad_norm": 0.4770662188529968, "learning_rate": 2.553271272977328e-06, "loss": 0.3502, "step": 43768 }, { "epoch": 2.0085815244825844, "grad_norm": 0.4693060517311096, "learning_rate": 2.5530574500291826e-06, "loss": 0.2741, "step": 43769 }, { "epoch": 2.008627414987839, "grad_norm": 0.5177751779556274, "learning_rate": 2.5528436329651625e-06, "loss": 0.428, "step": 43770 }, { "epoch": 2.0086733054930934, "grad_norm": 0.4525376856327057, "learning_rate": 2.552629821785778e-06, "loss": 0.2765, "step": 43771 }, { "epoch": 2.008719195998348, "grad_norm": 0.5076024532318115, "learning_rate": 2.552416016491549e-06, "loss": 0.334, "step": 43772 }, { "epoch": 2.0087650865036024, "grad_norm": 0.4827359914779663, "learning_rate": 2.5522022170829874e-06, "loss": 0.3518, "step": 43773 }, { "epoch": 2.008810977008857, "grad_norm": 0.4879688620567322, "learning_rate": 2.551988423560606e-06, "loss": 0.3178, "step": 43774 }, { "epoch": 2.0088568675141114, "grad_norm": 0.5137057304382324, "learning_rate": 2.551774635924921e-06, "loss": 0.3994, "step": 43775 }, { "epoch": 2.008902758019366, "grad_norm": 0.4939470589160919, "learning_rate": 2.551560854176444e-06, "loss": 0.314, "step": 43776 }, { "epoch": 2.0089486485246204, "grad_norm": 0.4707486629486084, "learning_rate": 2.5513470783156912e-06, "loss": 0.3004, "step": 43777 }, { "epoch": 2.008994539029875, "grad_norm": 0.48745715618133545, "learning_rate": 2.5511333083431734e-06, "loss": 0.3689, "step": 43778 }, { "epoch": 2.0090404295351294, "grad_norm": 0.45244458317756653, "learning_rate": 2.550919544259409e-06, "loss": 0.2646, "step": 43779 }, { "epoch": 2.0090863200403835, "grad_norm": 0.534142255783081, "learning_rate": 2.550705786064911e-06, "loss": 0.4078, "step": 43780 }, { "epoch": 2.009132210545638, "grad_norm": 0.4892018437385559, "learning_rate": 2.5504920337601902e-06, "loss": 0.3102, "step": 43781 }, { "epoch": 2.0091781010508925, "grad_norm": 0.49181580543518066, "learning_rate": 2.550278287345765e-06, "loss": 0.3335, "step": 43782 }, { "epoch": 2.009223991556147, "grad_norm": 0.5097191333770752, "learning_rate": 2.5500645468221474e-06, "loss": 0.353, "step": 43783 }, { "epoch": 2.0092698820614014, "grad_norm": 0.46487361192703247, "learning_rate": 2.549850812189851e-06, "loss": 0.2839, "step": 43784 }, { "epoch": 2.009315772566656, "grad_norm": 0.4459165334701538, "learning_rate": 2.54963708344939e-06, "loss": 0.2541, "step": 43785 }, { "epoch": 2.0093616630719104, "grad_norm": 0.4816977083683014, "learning_rate": 2.5494233606012788e-06, "loss": 0.3567, "step": 43786 }, { "epoch": 2.009407553577165, "grad_norm": 0.4885375201702118, "learning_rate": 2.549209643646029e-06, "loss": 0.3169, "step": 43787 }, { "epoch": 2.0094534440824194, "grad_norm": 0.5076816082000732, "learning_rate": 2.5489959325841585e-06, "loss": 0.3181, "step": 43788 }, { "epoch": 2.009499334587674, "grad_norm": 0.49200117588043213, "learning_rate": 2.5487822274161787e-06, "loss": 0.3491, "step": 43789 }, { "epoch": 2.0095452250929284, "grad_norm": 0.48720628023147583, "learning_rate": 2.5485685281426033e-06, "loss": 0.301, "step": 43790 }, { "epoch": 2.009591115598183, "grad_norm": 0.5126504898071289, "learning_rate": 2.5483548347639474e-06, "loss": 0.3331, "step": 43791 }, { "epoch": 2.009637006103437, "grad_norm": 0.43541908264160156, "learning_rate": 2.548141147280725e-06, "loss": 0.2505, "step": 43792 }, { "epoch": 2.0096828966086915, "grad_norm": 0.4436420798301697, "learning_rate": 2.547927465693447e-06, "loss": 0.2772, "step": 43793 }, { "epoch": 2.009728787113946, "grad_norm": 0.4911344349384308, "learning_rate": 2.5477137900026326e-06, "loss": 0.3056, "step": 43794 }, { "epoch": 2.0097746776192005, "grad_norm": 0.5293405055999756, "learning_rate": 2.5475001202087917e-06, "loss": 0.3492, "step": 43795 }, { "epoch": 2.009820568124455, "grad_norm": 0.5054666996002197, "learning_rate": 2.547286456312439e-06, "loss": 0.3635, "step": 43796 }, { "epoch": 2.0098664586297095, "grad_norm": 0.49643296003341675, "learning_rate": 2.5470727983140886e-06, "loss": 0.3043, "step": 43797 }, { "epoch": 2.009912349134964, "grad_norm": 0.4807972311973572, "learning_rate": 2.5468591462142535e-06, "loss": 0.3305, "step": 43798 }, { "epoch": 2.0099582396402185, "grad_norm": 0.4591931402683258, "learning_rate": 2.546645500013446e-06, "loss": 0.2779, "step": 43799 }, { "epoch": 2.010004130145473, "grad_norm": 0.44251781702041626, "learning_rate": 2.546431859712184e-06, "loss": 0.2743, "step": 43800 }, { "epoch": 2.0100500206507275, "grad_norm": 0.4940275251865387, "learning_rate": 2.546218225310979e-06, "loss": 0.3481, "step": 43801 }, { "epoch": 2.010095911155982, "grad_norm": 0.46646368503570557, "learning_rate": 2.546004596810343e-06, "loss": 0.3036, "step": 43802 }, { "epoch": 2.0101418016612365, "grad_norm": 0.45650115609169006, "learning_rate": 2.5457909742107934e-06, "loss": 0.2938, "step": 43803 }, { "epoch": 2.010187692166491, "grad_norm": 0.4809924364089966, "learning_rate": 2.5455773575128418e-06, "loss": 0.3659, "step": 43804 }, { "epoch": 2.010233582671745, "grad_norm": 0.42802879214286804, "learning_rate": 2.5453637467170013e-06, "loss": 0.2596, "step": 43805 }, { "epoch": 2.0102794731769995, "grad_norm": 0.5067252516746521, "learning_rate": 2.5451501418237866e-06, "loss": 0.3875, "step": 43806 }, { "epoch": 2.010325363682254, "grad_norm": 0.4658513367176056, "learning_rate": 2.5449365428337114e-06, "loss": 0.3121, "step": 43807 }, { "epoch": 2.0103712541875085, "grad_norm": 0.4886285662651062, "learning_rate": 2.544722949747289e-06, "loss": 0.3596, "step": 43808 }, { "epoch": 2.010417144692763, "grad_norm": 0.4542262554168701, "learning_rate": 2.544509362565031e-06, "loss": 0.2742, "step": 43809 }, { "epoch": 2.0104630351980175, "grad_norm": 0.4549892544746399, "learning_rate": 2.544295781287455e-06, "loss": 0.2798, "step": 43810 }, { "epoch": 2.010508925703272, "grad_norm": 0.48968493938446045, "learning_rate": 2.5440822059150726e-06, "loss": 0.3819, "step": 43811 }, { "epoch": 2.0105548162085265, "grad_norm": 0.513505220413208, "learning_rate": 2.5438686364483956e-06, "loss": 0.2875, "step": 43812 }, { "epoch": 2.010600706713781, "grad_norm": 0.4749179482460022, "learning_rate": 2.5436550728879404e-06, "loss": 0.3071, "step": 43813 }, { "epoch": 2.0106465972190355, "grad_norm": 0.5001460313796997, "learning_rate": 2.5434415152342207e-06, "loss": 0.3099, "step": 43814 }, { "epoch": 2.01069248772429, "grad_norm": 0.5225446820259094, "learning_rate": 2.543227963487746e-06, "loss": 0.4213, "step": 43815 }, { "epoch": 2.0107383782295445, "grad_norm": 0.431170254945755, "learning_rate": 2.5430144176490345e-06, "loss": 0.2767, "step": 43816 }, { "epoch": 2.010784268734799, "grad_norm": 0.4868049621582031, "learning_rate": 2.5428008777185988e-06, "loss": 0.3152, "step": 43817 }, { "epoch": 2.010830159240053, "grad_norm": 0.46672624349594116, "learning_rate": 2.5425873436969505e-06, "loss": 0.289, "step": 43818 }, { "epoch": 2.0108760497453075, "grad_norm": 0.47404828667640686, "learning_rate": 2.542373815584605e-06, "loss": 0.3301, "step": 43819 }, { "epoch": 2.010921940250562, "grad_norm": 0.4752424359321594, "learning_rate": 2.5421602933820744e-06, "loss": 0.3066, "step": 43820 }, { "epoch": 2.0109678307558165, "grad_norm": 0.4813607931137085, "learning_rate": 2.541946777089871e-06, "loss": 0.344, "step": 43821 }, { "epoch": 2.011013721261071, "grad_norm": 0.5086230635643005, "learning_rate": 2.541733266708511e-06, "loss": 0.3095, "step": 43822 }, { "epoch": 2.0110596117663255, "grad_norm": 0.5241252779960632, "learning_rate": 2.5415197622385067e-06, "loss": 0.3673, "step": 43823 }, { "epoch": 2.01110550227158, "grad_norm": 0.520601212978363, "learning_rate": 2.54130626368037e-06, "loss": 0.367, "step": 43824 }, { "epoch": 2.0111513927768345, "grad_norm": 0.5152016878128052, "learning_rate": 2.5410927710346176e-06, "loss": 0.3981, "step": 43825 }, { "epoch": 2.011197283282089, "grad_norm": 0.5219717025756836, "learning_rate": 2.5408792843017605e-06, "loss": 0.3522, "step": 43826 }, { "epoch": 2.0112431737873435, "grad_norm": 0.4979209005832672, "learning_rate": 2.5406658034823135e-06, "loss": 0.3288, "step": 43827 }, { "epoch": 2.011289064292598, "grad_norm": 0.5158754587173462, "learning_rate": 2.5404523285767883e-06, "loss": 0.3341, "step": 43828 }, { "epoch": 2.0113349547978525, "grad_norm": 0.5207123756408691, "learning_rate": 2.5402388595857e-06, "loss": 0.3377, "step": 43829 }, { "epoch": 2.0113808453031066, "grad_norm": 0.49175387620925903, "learning_rate": 2.5400253965095577e-06, "loss": 0.3231, "step": 43830 }, { "epoch": 2.011426735808361, "grad_norm": 0.5107274055480957, "learning_rate": 2.5398119393488806e-06, "loss": 0.3632, "step": 43831 }, { "epoch": 2.0114726263136156, "grad_norm": 0.4979594945907593, "learning_rate": 2.539598488104179e-06, "loss": 0.3428, "step": 43832 }, { "epoch": 2.01151851681887, "grad_norm": 0.48799246549606323, "learning_rate": 2.5393850427759647e-06, "loss": 0.3225, "step": 43833 }, { "epoch": 2.0115644073241246, "grad_norm": 0.48617082834243774, "learning_rate": 2.539171603364755e-06, "loss": 0.3265, "step": 43834 }, { "epoch": 2.011610297829379, "grad_norm": 0.502138614654541, "learning_rate": 2.5389581698710607e-06, "loss": 0.342, "step": 43835 }, { "epoch": 2.0116561883346336, "grad_norm": 0.508787989616394, "learning_rate": 2.5387447422953932e-06, "loss": 0.3159, "step": 43836 }, { "epoch": 2.011702078839888, "grad_norm": 0.4687052369117737, "learning_rate": 2.53853132063827e-06, "loss": 0.3078, "step": 43837 }, { "epoch": 2.0117479693451426, "grad_norm": 0.4818289577960968, "learning_rate": 2.5383179049002017e-06, "loss": 0.3036, "step": 43838 }, { "epoch": 2.011793859850397, "grad_norm": 0.48350799083709717, "learning_rate": 2.5381044950817025e-06, "loss": 0.3078, "step": 43839 }, { "epoch": 2.0118397503556515, "grad_norm": 0.4493600130081177, "learning_rate": 2.5378910911832842e-06, "loss": 0.2587, "step": 43840 }, { "epoch": 2.011885640860906, "grad_norm": 0.4948440492153168, "learning_rate": 2.5376776932054615e-06, "loss": 0.3782, "step": 43841 }, { "epoch": 2.0119315313661605, "grad_norm": 0.5043162703514099, "learning_rate": 2.5374643011487464e-06, "loss": 0.3751, "step": 43842 }, { "epoch": 2.0119774218714146, "grad_norm": 0.4890897274017334, "learning_rate": 2.537250915013651e-06, "loss": 0.3541, "step": 43843 }, { "epoch": 2.012023312376669, "grad_norm": 0.5071358680725098, "learning_rate": 2.5370375348006914e-06, "loss": 0.3131, "step": 43844 }, { "epoch": 2.0120692028819236, "grad_norm": 0.45573458075523376, "learning_rate": 2.5368241605103793e-06, "loss": 0.2908, "step": 43845 }, { "epoch": 2.012115093387178, "grad_norm": 0.4748174250125885, "learning_rate": 2.536610792143226e-06, "loss": 0.3263, "step": 43846 }, { "epoch": 2.0121609838924326, "grad_norm": 0.4881909191608429, "learning_rate": 2.5363974296997473e-06, "loss": 0.3159, "step": 43847 }, { "epoch": 2.012206874397687, "grad_norm": 0.4843316078186035, "learning_rate": 2.5361840731804567e-06, "loss": 0.3326, "step": 43848 }, { "epoch": 2.0122527649029416, "grad_norm": 0.4606104791164398, "learning_rate": 2.535970722585864e-06, "loss": 0.2609, "step": 43849 }, { "epoch": 2.012298655408196, "grad_norm": 0.468860000371933, "learning_rate": 2.5357573779164856e-06, "loss": 0.2854, "step": 43850 }, { "epoch": 2.0123445459134506, "grad_norm": 0.46169668436050415, "learning_rate": 2.5355440391728327e-06, "loss": 0.3035, "step": 43851 }, { "epoch": 2.012390436418705, "grad_norm": 0.4977363348007202, "learning_rate": 2.535330706355416e-06, "loss": 0.3351, "step": 43852 }, { "epoch": 2.0124363269239596, "grad_norm": 0.5179955363273621, "learning_rate": 2.535117379464753e-06, "loss": 0.3875, "step": 43853 }, { "epoch": 2.012482217429214, "grad_norm": 0.5369314551353455, "learning_rate": 2.5349040585013556e-06, "loss": 0.3844, "step": 43854 }, { "epoch": 2.0125281079344686, "grad_norm": 0.501923680305481, "learning_rate": 2.5346907434657335e-06, "loss": 0.3598, "step": 43855 }, { "epoch": 2.0125739984397226, "grad_norm": 0.44475021958351135, "learning_rate": 2.5344774343584043e-06, "loss": 0.2731, "step": 43856 }, { "epoch": 2.012619888944977, "grad_norm": 0.487800270318985, "learning_rate": 2.534264131179879e-06, "loss": 0.3749, "step": 43857 }, { "epoch": 2.0126657794502316, "grad_norm": 0.48556065559387207, "learning_rate": 2.5340508339306667e-06, "loss": 0.2866, "step": 43858 }, { "epoch": 2.012711669955486, "grad_norm": 0.5159242153167725, "learning_rate": 2.533837542611287e-06, "loss": 0.3447, "step": 43859 }, { "epoch": 2.0127575604607406, "grad_norm": 0.5006429553031921, "learning_rate": 2.5336242572222504e-06, "loss": 0.2717, "step": 43860 }, { "epoch": 2.012803450965995, "grad_norm": 0.4680998921394348, "learning_rate": 2.5334109777640683e-06, "loss": 0.3283, "step": 43861 }, { "epoch": 2.0128493414712496, "grad_norm": 0.5020669102668762, "learning_rate": 2.5331977042372534e-06, "loss": 0.2891, "step": 43862 }, { "epoch": 2.012895231976504, "grad_norm": 0.4863584339618683, "learning_rate": 2.5329844366423208e-06, "loss": 0.3145, "step": 43863 }, { "epoch": 2.0129411224817586, "grad_norm": 0.45392343401908875, "learning_rate": 2.5327711749797795e-06, "loss": 0.2524, "step": 43864 }, { "epoch": 2.012987012987013, "grad_norm": 0.47296059131622314, "learning_rate": 2.532557919250147e-06, "loss": 0.3104, "step": 43865 }, { "epoch": 2.0130329034922676, "grad_norm": 0.4889378845691681, "learning_rate": 2.532344669453934e-06, "loss": 0.2958, "step": 43866 }, { "epoch": 2.013078793997522, "grad_norm": 0.5107224583625793, "learning_rate": 2.5321314255916506e-06, "loss": 0.3204, "step": 43867 }, { "epoch": 2.0131246845027766, "grad_norm": 0.453745037317276, "learning_rate": 2.5319181876638153e-06, "loss": 0.2512, "step": 43868 }, { "epoch": 2.0131705750080306, "grad_norm": 0.4848005473613739, "learning_rate": 2.531704955670937e-06, "loss": 0.3286, "step": 43869 }, { "epoch": 2.013216465513285, "grad_norm": 0.4894874095916748, "learning_rate": 2.5314917296135278e-06, "loss": 0.327, "step": 43870 }, { "epoch": 2.0132623560185396, "grad_norm": 0.5080018043518066, "learning_rate": 2.5312785094921055e-06, "loss": 0.366, "step": 43871 }, { "epoch": 2.013308246523794, "grad_norm": 0.475763201713562, "learning_rate": 2.5310652953071776e-06, "loss": 0.343, "step": 43872 }, { "epoch": 2.0133541370290486, "grad_norm": 0.4893595278263092, "learning_rate": 2.5308520870592577e-06, "loss": 0.3423, "step": 43873 }, { "epoch": 2.013400027534303, "grad_norm": 0.4915807247161865, "learning_rate": 2.5306388847488584e-06, "loss": 0.3504, "step": 43874 }, { "epoch": 2.0134459180395576, "grad_norm": 0.5083504915237427, "learning_rate": 2.5304256883764942e-06, "loss": 0.3468, "step": 43875 }, { "epoch": 2.013491808544812, "grad_norm": 0.5098035335540771, "learning_rate": 2.530212497942677e-06, "loss": 0.3424, "step": 43876 }, { "epoch": 2.0135376990500666, "grad_norm": 0.49076545238494873, "learning_rate": 2.5299993134479177e-06, "loss": 0.313, "step": 43877 }, { "epoch": 2.013583589555321, "grad_norm": 0.4642910361289978, "learning_rate": 2.5297861348927323e-06, "loss": 0.3071, "step": 43878 }, { "epoch": 2.0136294800605756, "grad_norm": 0.4723716080188751, "learning_rate": 2.5295729622776314e-06, "loss": 0.2906, "step": 43879 }, { "epoch": 2.01367537056583, "grad_norm": 0.5080165266990662, "learning_rate": 2.5293597956031262e-06, "loss": 0.3599, "step": 43880 }, { "epoch": 2.013721261071084, "grad_norm": 0.49046531319618225, "learning_rate": 2.5291466348697326e-06, "loss": 0.3306, "step": 43881 }, { "epoch": 2.0137671515763387, "grad_norm": 0.49121373891830444, "learning_rate": 2.5289334800779617e-06, "loss": 0.333, "step": 43882 }, { "epoch": 2.013813042081593, "grad_norm": 0.48879274725914, "learning_rate": 2.528720331228326e-06, "loss": 0.3579, "step": 43883 }, { "epoch": 2.0138589325868477, "grad_norm": 0.4825191795825958, "learning_rate": 2.5285071883213375e-06, "loss": 0.321, "step": 43884 }, { "epoch": 2.013904823092102, "grad_norm": 0.44309690594673157, "learning_rate": 2.5282940513575094e-06, "loss": 0.2659, "step": 43885 }, { "epoch": 2.0139507135973567, "grad_norm": 0.4973062574863434, "learning_rate": 2.5280809203373523e-06, "loss": 0.3438, "step": 43886 }, { "epoch": 2.013996604102611, "grad_norm": 0.4765530526638031, "learning_rate": 2.5278677952613827e-06, "loss": 0.3004, "step": 43887 }, { "epoch": 2.0140424946078657, "grad_norm": 0.49563929438591003, "learning_rate": 2.52765467613011e-06, "loss": 0.3365, "step": 43888 }, { "epoch": 2.01408838511312, "grad_norm": 0.4780856966972351, "learning_rate": 2.527441562944046e-06, "loss": 0.3397, "step": 43889 }, { "epoch": 2.0141342756183747, "grad_norm": 0.49740245938301086, "learning_rate": 2.527228455703706e-06, "loss": 0.339, "step": 43890 }, { "epoch": 2.014180166123629, "grad_norm": 0.49825698137283325, "learning_rate": 2.527015354409602e-06, "loss": 0.317, "step": 43891 }, { "epoch": 2.0142260566288837, "grad_norm": 0.5217834115028381, "learning_rate": 2.5268022590622433e-06, "loss": 0.3928, "step": 43892 }, { "epoch": 2.014271947134138, "grad_norm": 0.44545963406562805, "learning_rate": 2.5265891696621486e-06, "loss": 0.2666, "step": 43893 }, { "epoch": 2.014317837639392, "grad_norm": 0.4696104824542999, "learning_rate": 2.526376086209824e-06, "loss": 0.3241, "step": 43894 }, { "epoch": 2.0143637281446467, "grad_norm": 0.4544749855995178, "learning_rate": 2.526163008705783e-06, "loss": 0.308, "step": 43895 }, { "epoch": 2.014409618649901, "grad_norm": 0.43423962593078613, "learning_rate": 2.5259499371505402e-06, "loss": 0.2677, "step": 43896 }, { "epoch": 2.0144555091551557, "grad_norm": 0.46652644872665405, "learning_rate": 2.5257368715446073e-06, "loss": 0.277, "step": 43897 }, { "epoch": 2.01450139966041, "grad_norm": 0.4934419095516205, "learning_rate": 2.525523811888495e-06, "loss": 0.3391, "step": 43898 }, { "epoch": 2.0145472901656647, "grad_norm": 0.47350746393203735, "learning_rate": 2.5253107581827186e-06, "loss": 0.2624, "step": 43899 }, { "epoch": 2.014593180670919, "grad_norm": 0.49032244086265564, "learning_rate": 2.5250977104277886e-06, "loss": 0.3738, "step": 43900 }, { "epoch": 2.0146390711761737, "grad_norm": 0.472483366727829, "learning_rate": 2.5248846686242155e-06, "loss": 0.2978, "step": 43901 }, { "epoch": 2.014684961681428, "grad_norm": 0.5161455273628235, "learning_rate": 2.524671632772515e-06, "loss": 0.3415, "step": 43902 }, { "epoch": 2.0147308521866827, "grad_norm": 0.4800645112991333, "learning_rate": 2.5244586028731987e-06, "loss": 0.3612, "step": 43903 }, { "epoch": 2.014776742691937, "grad_norm": 0.5013259053230286, "learning_rate": 2.5242455789267784e-06, "loss": 0.3477, "step": 43904 }, { "epoch": 2.0148226331971917, "grad_norm": 0.46954140067100525, "learning_rate": 2.5240325609337656e-06, "loss": 0.3325, "step": 43905 }, { "epoch": 2.014868523702446, "grad_norm": 0.4729161560535431, "learning_rate": 2.5238195488946735e-06, "loss": 0.3248, "step": 43906 }, { "epoch": 2.0149144142077002, "grad_norm": 0.500304639339447, "learning_rate": 2.523606542810014e-06, "loss": 0.4027, "step": 43907 }, { "epoch": 2.0149603047129547, "grad_norm": 0.4824456572532654, "learning_rate": 2.523393542680297e-06, "loss": 0.3178, "step": 43908 }, { "epoch": 2.0150061952182092, "grad_norm": 0.4339597821235657, "learning_rate": 2.523180548506038e-06, "loss": 0.2756, "step": 43909 }, { "epoch": 2.0150520857234637, "grad_norm": 0.47673264145851135, "learning_rate": 2.522967560287749e-06, "loss": 0.2945, "step": 43910 }, { "epoch": 2.0150979762287182, "grad_norm": 0.5147387385368347, "learning_rate": 2.5227545780259387e-06, "loss": 0.3727, "step": 43911 }, { "epoch": 2.0151438667339727, "grad_norm": 0.4680587649345398, "learning_rate": 2.5225416017211245e-06, "loss": 0.2979, "step": 43912 }, { "epoch": 2.015189757239227, "grad_norm": 0.5131319165229797, "learning_rate": 2.5223286313738153e-06, "loss": 0.3345, "step": 43913 }, { "epoch": 2.0152356477444817, "grad_norm": 0.49784141778945923, "learning_rate": 2.522115666984522e-06, "loss": 0.3742, "step": 43914 }, { "epoch": 2.015281538249736, "grad_norm": 0.48749786615371704, "learning_rate": 2.5219027085537626e-06, "loss": 0.3239, "step": 43915 }, { "epoch": 2.0153274287549907, "grad_norm": 0.42005592584609985, "learning_rate": 2.5216897560820432e-06, "loss": 0.2312, "step": 43916 }, { "epoch": 2.015373319260245, "grad_norm": 0.48870399594306946, "learning_rate": 2.521476809569875e-06, "loss": 0.3251, "step": 43917 }, { "epoch": 2.0154192097654997, "grad_norm": 0.5034509301185608, "learning_rate": 2.5212638690177754e-06, "loss": 0.4244, "step": 43918 }, { "epoch": 2.0154651002707538, "grad_norm": 0.4646150469779968, "learning_rate": 2.5210509344262536e-06, "loss": 0.3287, "step": 43919 }, { "epoch": 2.0155109907760083, "grad_norm": 0.45048967003822327, "learning_rate": 2.5208380057958203e-06, "loss": 0.3104, "step": 43920 }, { "epoch": 2.0155568812812628, "grad_norm": 0.46446090936660767, "learning_rate": 2.5206250831269907e-06, "loss": 0.305, "step": 43921 }, { "epoch": 2.0156027717865173, "grad_norm": 0.49919232726097107, "learning_rate": 2.5204121664202752e-06, "loss": 0.3233, "step": 43922 }, { "epoch": 2.0156486622917718, "grad_norm": 0.4784594476222992, "learning_rate": 2.5201992556761847e-06, "loss": 0.3186, "step": 43923 }, { "epoch": 2.0156945527970263, "grad_norm": 0.45333245396614075, "learning_rate": 2.5199863508952336e-06, "loss": 0.2998, "step": 43924 }, { "epoch": 2.0157404433022807, "grad_norm": 0.483097106218338, "learning_rate": 2.519773452077933e-06, "loss": 0.3086, "step": 43925 }, { "epoch": 2.0157863338075352, "grad_norm": 0.49029210209846497, "learning_rate": 2.5195605592247942e-06, "loss": 0.3292, "step": 43926 }, { "epoch": 2.0158322243127897, "grad_norm": 0.45336493849754333, "learning_rate": 2.5193476723363296e-06, "loss": 0.3407, "step": 43927 }, { "epoch": 2.0158781148180442, "grad_norm": 0.4453563392162323, "learning_rate": 2.5191347914130504e-06, "loss": 0.2821, "step": 43928 }, { "epoch": 2.0159240053232987, "grad_norm": 0.48070886731147766, "learning_rate": 2.518921916455468e-06, "loss": 0.2868, "step": 43929 }, { "epoch": 2.0159698958285532, "grad_norm": 0.4838741719722748, "learning_rate": 2.518709047464096e-06, "loss": 0.3588, "step": 43930 }, { "epoch": 2.0160157863338077, "grad_norm": 0.4852774739265442, "learning_rate": 2.518496184439447e-06, "loss": 0.3467, "step": 43931 }, { "epoch": 2.016061676839062, "grad_norm": 0.49208828806877136, "learning_rate": 2.518283327382029e-06, "loss": 0.3444, "step": 43932 }, { "epoch": 2.0161075673443163, "grad_norm": 0.507270097732544, "learning_rate": 2.5180704762923582e-06, "loss": 0.3131, "step": 43933 }, { "epoch": 2.016153457849571, "grad_norm": 0.4793279767036438, "learning_rate": 2.5178576311709446e-06, "loss": 0.3614, "step": 43934 }, { "epoch": 2.0161993483548253, "grad_norm": 0.47131043672561646, "learning_rate": 2.5176447920182977e-06, "loss": 0.3251, "step": 43935 }, { "epoch": 2.01624523886008, "grad_norm": 0.473725289106369, "learning_rate": 2.5174319588349343e-06, "loss": 0.3014, "step": 43936 }, { "epoch": 2.0162911293653343, "grad_norm": 0.4771834909915924, "learning_rate": 2.517219131621365e-06, "loss": 0.3364, "step": 43937 }, { "epoch": 2.0163370198705888, "grad_norm": 0.4983333945274353, "learning_rate": 2.5170063103780986e-06, "loss": 0.3333, "step": 43938 }, { "epoch": 2.0163829103758433, "grad_norm": 0.4583309590816498, "learning_rate": 2.516793495105646e-06, "loss": 0.2918, "step": 43939 }, { "epoch": 2.0164288008810978, "grad_norm": 0.4522255063056946, "learning_rate": 2.5165806858045232e-06, "loss": 0.2851, "step": 43940 }, { "epoch": 2.0164746913863523, "grad_norm": 0.5120725631713867, "learning_rate": 2.51636788247524e-06, "loss": 0.3627, "step": 43941 }, { "epoch": 2.0165205818916068, "grad_norm": 0.476701945066452, "learning_rate": 2.516155085118307e-06, "loss": 0.2979, "step": 43942 }, { "epoch": 2.0165664723968613, "grad_norm": 0.4793146848678589, "learning_rate": 2.515942293734238e-06, "loss": 0.3165, "step": 43943 }, { "epoch": 2.0166123629021158, "grad_norm": 0.47328636050224304, "learning_rate": 2.5157295083235444e-06, "loss": 0.3147, "step": 43944 }, { "epoch": 2.01665825340737, "grad_norm": 0.4885639548301697, "learning_rate": 2.5155167288867355e-06, "loss": 0.3611, "step": 43945 }, { "epoch": 2.0167041439126243, "grad_norm": 0.5268436670303345, "learning_rate": 2.515303955424326e-06, "loss": 0.4061, "step": 43946 }, { "epoch": 2.016750034417879, "grad_norm": 0.4444359838962555, "learning_rate": 2.5150911879368267e-06, "loss": 0.2938, "step": 43947 }, { "epoch": 2.0167959249231333, "grad_norm": 0.4631958305835724, "learning_rate": 2.5148784264247495e-06, "loss": 0.2942, "step": 43948 }, { "epoch": 2.016841815428388, "grad_norm": 0.47893357276916504, "learning_rate": 2.514665670888604e-06, "loss": 0.2898, "step": 43949 }, { "epoch": 2.0168877059336423, "grad_norm": 0.5468403100967407, "learning_rate": 2.514452921328904e-06, "loss": 0.386, "step": 43950 }, { "epoch": 2.016933596438897, "grad_norm": 0.48551762104034424, "learning_rate": 2.5142401777461585e-06, "loss": 0.3431, "step": 43951 }, { "epoch": 2.0169794869441513, "grad_norm": 0.46854326128959656, "learning_rate": 2.5140274401408828e-06, "loss": 0.3349, "step": 43952 }, { "epoch": 2.017025377449406, "grad_norm": 0.43519696593284607, "learning_rate": 2.5138147085135857e-06, "loss": 0.2232, "step": 43953 }, { "epoch": 2.0170712679546603, "grad_norm": 0.5051290392875671, "learning_rate": 2.5136019828647784e-06, "loss": 0.3881, "step": 43954 }, { "epoch": 2.017117158459915, "grad_norm": 0.5379789471626282, "learning_rate": 2.513389263194975e-06, "loss": 0.383, "step": 43955 }, { "epoch": 2.0171630489651693, "grad_norm": 0.49520957469940186, "learning_rate": 2.5131765495046854e-06, "loss": 0.3462, "step": 43956 }, { "epoch": 2.0172089394704233, "grad_norm": 0.46914055943489075, "learning_rate": 2.5129638417944203e-06, "loss": 0.3065, "step": 43957 }, { "epoch": 2.017254829975678, "grad_norm": 0.462139755487442, "learning_rate": 2.5127511400646944e-06, "loss": 0.2956, "step": 43958 }, { "epoch": 2.0173007204809323, "grad_norm": 0.4541197121143341, "learning_rate": 2.5125384443160176e-06, "loss": 0.2667, "step": 43959 }, { "epoch": 2.017346610986187, "grad_norm": 0.467947781085968, "learning_rate": 2.5123257545488978e-06, "loss": 0.3083, "step": 43960 }, { "epoch": 2.0173925014914413, "grad_norm": 0.45543619990348816, "learning_rate": 2.5121130707638507e-06, "loss": 0.2882, "step": 43961 }, { "epoch": 2.017438391996696, "grad_norm": 0.4957212209701538, "learning_rate": 2.5119003929613872e-06, "loss": 0.3748, "step": 43962 }, { "epoch": 2.0174842825019503, "grad_norm": 0.4610357880592346, "learning_rate": 2.511687721142016e-06, "loss": 0.276, "step": 43963 }, { "epoch": 2.017530173007205, "grad_norm": 0.4969644844532013, "learning_rate": 2.511475055306252e-06, "loss": 0.3201, "step": 43964 }, { "epoch": 2.0175760635124593, "grad_norm": 0.48030999302864075, "learning_rate": 2.5112623954546056e-06, "loss": 0.3221, "step": 43965 }, { "epoch": 2.017621954017714, "grad_norm": 0.47609302401542664, "learning_rate": 2.5110497415875856e-06, "loss": 0.3159, "step": 43966 }, { "epoch": 2.0176678445229683, "grad_norm": 0.4846997559070587, "learning_rate": 2.510837093705707e-06, "loss": 0.292, "step": 43967 }, { "epoch": 2.017713735028223, "grad_norm": 0.5531144738197327, "learning_rate": 2.5106244518094798e-06, "loss": 0.3116, "step": 43968 }, { "epoch": 2.0177596255334773, "grad_norm": 0.5079323053359985, "learning_rate": 2.510411815899415e-06, "loss": 0.3403, "step": 43969 }, { "epoch": 2.0178055160387314, "grad_norm": 0.5111728310585022, "learning_rate": 2.5101991859760242e-06, "loss": 0.383, "step": 43970 }, { "epoch": 2.017851406543986, "grad_norm": 0.43835774064064026, "learning_rate": 2.5099865620398185e-06, "loss": 0.2787, "step": 43971 }, { "epoch": 2.0178972970492404, "grad_norm": 0.452069491147995, "learning_rate": 2.5097739440913093e-06, "loss": 0.2785, "step": 43972 }, { "epoch": 2.017943187554495, "grad_norm": 0.4641483426094055, "learning_rate": 2.509561332131006e-06, "loss": 0.2748, "step": 43973 }, { "epoch": 2.0179890780597494, "grad_norm": 0.5283618569374084, "learning_rate": 2.509348726159423e-06, "loss": 0.384, "step": 43974 }, { "epoch": 2.018034968565004, "grad_norm": 0.4725908935070038, "learning_rate": 2.5091361261770708e-06, "loss": 0.3303, "step": 43975 }, { "epoch": 2.0180808590702584, "grad_norm": 0.4774918556213379, "learning_rate": 2.5089235321844584e-06, "loss": 0.3443, "step": 43976 }, { "epoch": 2.018126749575513, "grad_norm": 0.4773925244808197, "learning_rate": 2.5087109441821e-06, "loss": 0.285, "step": 43977 }, { "epoch": 2.0181726400807674, "grad_norm": 0.4938560128211975, "learning_rate": 2.508498362170506e-06, "loss": 0.3341, "step": 43978 }, { "epoch": 2.018218530586022, "grad_norm": 0.436746746301651, "learning_rate": 2.5082857861501854e-06, "loss": 0.2467, "step": 43979 }, { "epoch": 2.0182644210912764, "grad_norm": 0.48668918013572693, "learning_rate": 2.508073216121652e-06, "loss": 0.3313, "step": 43980 }, { "epoch": 2.018310311596531, "grad_norm": 0.45892852544784546, "learning_rate": 2.5078606520854188e-06, "loss": 0.2598, "step": 43981 }, { "epoch": 2.0183562021017853, "grad_norm": 0.4988388419151306, "learning_rate": 2.50764809404199e-06, "loss": 0.3381, "step": 43982 }, { "epoch": 2.0184020926070394, "grad_norm": 0.5160019993782043, "learning_rate": 2.5074355419918828e-06, "loss": 0.3862, "step": 43983 }, { "epoch": 2.018447983112294, "grad_norm": 0.4716640114784241, "learning_rate": 2.5072229959356063e-06, "loss": 0.2799, "step": 43984 }, { "epoch": 2.0184938736175484, "grad_norm": 0.4866861402988434, "learning_rate": 2.5070104558736707e-06, "loss": 0.3226, "step": 43985 }, { "epoch": 2.018539764122803, "grad_norm": 0.4635947048664093, "learning_rate": 2.506797921806589e-06, "loss": 0.3643, "step": 43986 }, { "epoch": 2.0185856546280574, "grad_norm": 0.47598734498023987, "learning_rate": 2.5065853937348715e-06, "loss": 0.33, "step": 43987 }, { "epoch": 2.018631545133312, "grad_norm": 0.4753451943397522, "learning_rate": 2.5063728716590287e-06, "loss": 0.3326, "step": 43988 }, { "epoch": 2.0186774356385664, "grad_norm": 0.48025956749916077, "learning_rate": 2.5061603555795722e-06, "loss": 0.3394, "step": 43989 }, { "epoch": 2.018723326143821, "grad_norm": 0.5563918948173523, "learning_rate": 2.505947845497014e-06, "loss": 0.2202, "step": 43990 }, { "epoch": 2.0187692166490754, "grad_norm": 0.49601122736930847, "learning_rate": 2.505735341411864e-06, "loss": 0.355, "step": 43991 }, { "epoch": 2.01881510715433, "grad_norm": 0.4918726980686188, "learning_rate": 2.505522843324633e-06, "loss": 0.3245, "step": 43992 }, { "epoch": 2.0188609976595844, "grad_norm": 0.4753451943397522, "learning_rate": 2.5053103512358325e-06, "loss": 0.3531, "step": 43993 }, { "epoch": 2.018906888164839, "grad_norm": 0.4845462739467621, "learning_rate": 2.505097865145971e-06, "loss": 0.3318, "step": 43994 }, { "epoch": 2.0189527786700934, "grad_norm": 0.49072739481925964, "learning_rate": 2.5048853850555643e-06, "loss": 0.3097, "step": 43995 }, { "epoch": 2.0189986691753474, "grad_norm": 0.5049818158149719, "learning_rate": 2.50467291096512e-06, "loss": 0.332, "step": 43996 }, { "epoch": 2.019044559680602, "grad_norm": 0.49033215641975403, "learning_rate": 2.5044604428751483e-06, "loss": 0.3206, "step": 43997 }, { "epoch": 2.0190904501858564, "grad_norm": 0.45598530769348145, "learning_rate": 2.5042479807861635e-06, "loss": 0.2627, "step": 43998 }, { "epoch": 2.019136340691111, "grad_norm": 0.45843300223350525, "learning_rate": 2.504035524698675e-06, "loss": 0.2819, "step": 43999 }, { "epoch": 2.0191822311963654, "grad_norm": 0.5183503031730652, "learning_rate": 2.503823074613191e-06, "loss": 0.3487, "step": 44000 } ], "logging_steps": 1, "max_steps": 65373, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.601551861547008e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }