{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3902, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005125576627370579, "grad_norm": 15.510491304586509, "learning_rate": 1.6949152542372883e-07, "loss": 1.2357, "step": 1 }, { "epoch": 0.0010251153254741158, "grad_norm": 16.985531822734757, "learning_rate": 3.3898305084745766e-07, "loss": 1.3093, "step": 2 }, { "epoch": 0.0015376729882111738, "grad_norm": 16.205803399484438, "learning_rate": 5.084745762711865e-07, "loss": 1.275, "step": 3 }, { "epoch": 0.0020502306509482316, "grad_norm": 15.14727158101863, "learning_rate": 6.779661016949153e-07, "loss": 1.2217, "step": 4 }, { "epoch": 0.0025627883136852894, "grad_norm": 15.037107004822138, "learning_rate": 8.474576271186441e-07, "loss": 1.191, "step": 5 }, { "epoch": 0.0030753459764223477, "grad_norm": 14.404056066224703, "learning_rate": 1.016949152542373e-06, "loss": 1.2763, "step": 6 }, { "epoch": 0.0035879036391594054, "grad_norm": 12.187790304507987, "learning_rate": 1.186440677966102e-06, "loss": 1.2054, "step": 7 }, { "epoch": 0.004100461301896463, "grad_norm": 10.162755309554079, "learning_rate": 1.3559322033898307e-06, "loss": 1.1404, "step": 8 }, { "epoch": 0.0046130189646335215, "grad_norm": 6.442458247348861, "learning_rate": 1.5254237288135596e-06, "loss": 1.0538, "step": 9 }, { "epoch": 0.005125576627370579, "grad_norm": 5.703800961591361, "learning_rate": 1.6949152542372882e-06, "loss": 0.988, "step": 10 }, { "epoch": 0.005638134290107637, "grad_norm": 6.263909923010601, "learning_rate": 1.8644067796610171e-06, "loss": 1.1174, "step": 11 }, { "epoch": 0.006150691952844695, "grad_norm": 6.425919140959223, "learning_rate": 2.033898305084746e-06, "loss": 0.9216, "step": 12 }, { "epoch": 0.006663249615581753, "grad_norm": 6.585815859689743, "learning_rate": 2.203389830508475e-06, "loss": 0.9418, "step": 13 }, { "epoch": 0.007175807278318811, "grad_norm": 5.229745746149112, "learning_rate": 2.372881355932204e-06, "loss": 0.9048, "step": 14 }, { "epoch": 0.007688364941055869, "grad_norm": 3.326488881097891, "learning_rate": 2.5423728813559323e-06, "loss": 0.9511, "step": 15 }, { "epoch": 0.008200922603792926, "grad_norm": 4.386298580200502, "learning_rate": 2.7118644067796613e-06, "loss": 0.8987, "step": 16 }, { "epoch": 0.008713480266529985, "grad_norm": 3.739045365492467, "learning_rate": 2.8813559322033903e-06, "loss": 0.9082, "step": 17 }, { "epoch": 0.009226037929267043, "grad_norm": 3.161255571622015, "learning_rate": 3.0508474576271192e-06, "loss": 0.9074, "step": 18 }, { "epoch": 0.009738595592004101, "grad_norm": 2.7541683747120635, "learning_rate": 3.2203389830508473e-06, "loss": 0.8155, "step": 19 }, { "epoch": 0.010251153254741158, "grad_norm": 2.380143591219213, "learning_rate": 3.3898305084745763e-06, "loss": 0.7869, "step": 20 }, { "epoch": 0.010763710917478216, "grad_norm": 2.4030672341099035, "learning_rate": 3.5593220338983053e-06, "loss": 0.8165, "step": 21 }, { "epoch": 0.011276268580215274, "grad_norm": 2.259798535609679, "learning_rate": 3.7288135593220342e-06, "loss": 0.7499, "step": 22 }, { "epoch": 0.011788826242952332, "grad_norm": 2.3582000927028455, "learning_rate": 3.898305084745763e-06, "loss": 0.7992, "step": 23 }, { "epoch": 0.01230138390568939, "grad_norm": 2.4581580125570675, "learning_rate": 4.067796610169492e-06, "loss": 0.7858, "step": 24 }, { "epoch": 0.012813941568426449, "grad_norm": 2.2111423066383153, "learning_rate": 4.23728813559322e-06, "loss": 0.7781, "step": 25 }, { "epoch": 0.013326499231163505, "grad_norm": 2.1405180163170816, "learning_rate": 4.40677966101695e-06, "loss": 0.7817, "step": 26 }, { "epoch": 0.013839056893900564, "grad_norm": 1.7948087088864622, "learning_rate": 4.576271186440678e-06, "loss": 0.7357, "step": 27 }, { "epoch": 0.014351614556637622, "grad_norm": 2.112134187601825, "learning_rate": 4.745762711864408e-06, "loss": 0.7589, "step": 28 }, { "epoch": 0.01486417221937468, "grad_norm": 1.8791542495233833, "learning_rate": 4.915254237288136e-06, "loss": 0.7217, "step": 29 }, { "epoch": 0.015376729882111738, "grad_norm": 1.901949720141708, "learning_rate": 5.084745762711865e-06, "loss": 0.6903, "step": 30 }, { "epoch": 0.015889287544848796, "grad_norm": 1.8680705207511652, "learning_rate": 5.254237288135594e-06, "loss": 0.7305, "step": 31 }, { "epoch": 0.016401845207585853, "grad_norm": 1.8953682242364107, "learning_rate": 5.423728813559323e-06, "loss": 0.7049, "step": 32 }, { "epoch": 0.016914402870322913, "grad_norm": 1.9490000338595392, "learning_rate": 5.593220338983051e-06, "loss": 0.7497, "step": 33 }, { "epoch": 0.01742696053305997, "grad_norm": 1.9378667810760846, "learning_rate": 5.7627118644067805e-06, "loss": 0.7426, "step": 34 }, { "epoch": 0.017939518195797026, "grad_norm": 1.841152105084804, "learning_rate": 5.932203389830509e-06, "loss": 0.6954, "step": 35 }, { "epoch": 0.018452075858534086, "grad_norm": 1.6562606015718868, "learning_rate": 6.1016949152542385e-06, "loss": 0.668, "step": 36 }, { "epoch": 0.018964633521271142, "grad_norm": 1.8874616203948182, "learning_rate": 6.271186440677966e-06, "loss": 0.7525, "step": 37 }, { "epoch": 0.019477191184008202, "grad_norm": 1.8065137985414856, "learning_rate": 6.440677966101695e-06, "loss": 0.7246, "step": 38 }, { "epoch": 0.01998974884674526, "grad_norm": 1.8181473283043865, "learning_rate": 6.610169491525424e-06, "loss": 0.6926, "step": 39 }, { "epoch": 0.020502306509482315, "grad_norm": 1.8093186944503992, "learning_rate": 6.779661016949153e-06, "loss": 0.6494, "step": 40 }, { "epoch": 0.021014864172219375, "grad_norm": 1.8417609463026514, "learning_rate": 6.949152542372882e-06, "loss": 0.7267, "step": 41 }, { "epoch": 0.021527421834956432, "grad_norm": 1.799135244210391, "learning_rate": 7.1186440677966106e-06, "loss": 0.6879, "step": 42 }, { "epoch": 0.022039979497693492, "grad_norm": 1.7278394393028809, "learning_rate": 7.288135593220339e-06, "loss": 0.7057, "step": 43 }, { "epoch": 0.02255253716043055, "grad_norm": 1.662616203805043, "learning_rate": 7.4576271186440685e-06, "loss": 0.685, "step": 44 }, { "epoch": 0.023065094823167605, "grad_norm": 1.7735047714164933, "learning_rate": 7.627118644067797e-06, "loss": 0.6994, "step": 45 }, { "epoch": 0.023577652485904665, "grad_norm": 1.6426481706072484, "learning_rate": 7.796610169491526e-06, "loss": 0.6443, "step": 46 }, { "epoch": 0.02409021014864172, "grad_norm": 1.858716106813801, "learning_rate": 7.966101694915255e-06, "loss": 0.7149, "step": 47 }, { "epoch": 0.02460276781137878, "grad_norm": 1.656358492911123, "learning_rate": 8.135593220338983e-06, "loss": 0.6502, "step": 48 }, { "epoch": 0.025115325474115838, "grad_norm": 1.68282341584063, "learning_rate": 8.305084745762712e-06, "loss": 0.6767, "step": 49 }, { "epoch": 0.025627883136852898, "grad_norm": 1.8164087593473721, "learning_rate": 8.47457627118644e-06, "loss": 0.6683, "step": 50 }, { "epoch": 0.026140440799589954, "grad_norm": 1.8972312838460823, "learning_rate": 8.64406779661017e-06, "loss": 0.6553, "step": 51 }, { "epoch": 0.02665299846232701, "grad_norm": 1.7835572790424346, "learning_rate": 8.8135593220339e-06, "loss": 0.7437, "step": 52 }, { "epoch": 0.02716555612506407, "grad_norm": 1.6414334689131926, "learning_rate": 8.983050847457628e-06, "loss": 0.6484, "step": 53 }, { "epoch": 0.027678113787801127, "grad_norm": 1.8440499849805279, "learning_rate": 9.152542372881356e-06, "loss": 0.6512, "step": 54 }, { "epoch": 0.028190671450538187, "grad_norm": 1.8384213747118674, "learning_rate": 9.322033898305085e-06, "loss": 0.6923, "step": 55 }, { "epoch": 0.028703229113275244, "grad_norm": 1.719742713684079, "learning_rate": 9.491525423728815e-06, "loss": 0.6692, "step": 56 }, { "epoch": 0.0292157867760123, "grad_norm": 1.7922275339710732, "learning_rate": 9.661016949152544e-06, "loss": 0.7138, "step": 57 }, { "epoch": 0.02972834443874936, "grad_norm": 1.8587654559735667, "learning_rate": 9.830508474576272e-06, "loss": 0.6555, "step": 58 }, { "epoch": 0.030240902101486417, "grad_norm": 1.7101441410157283, "learning_rate": 1e-05, "loss": 0.6597, "step": 59 }, { "epoch": 0.030753459764223477, "grad_norm": 1.7227967999889007, "learning_rate": 1.016949152542373e-05, "loss": 0.6655, "step": 60 }, { "epoch": 0.031266017426960536, "grad_norm": 1.7161154450111564, "learning_rate": 1.0338983050847458e-05, "loss": 0.6798, "step": 61 }, { "epoch": 0.03177857508969759, "grad_norm": 1.6486223488243923, "learning_rate": 1.0508474576271188e-05, "loss": 0.6486, "step": 62 }, { "epoch": 0.03229113275243465, "grad_norm": 1.8505970552096938, "learning_rate": 1.0677966101694917e-05, "loss": 0.6894, "step": 63 }, { "epoch": 0.032803690415171706, "grad_norm": 1.778818819321164, "learning_rate": 1.0847457627118645e-05, "loss": 0.6869, "step": 64 }, { "epoch": 0.03331624807790876, "grad_norm": 1.6436963727925962, "learning_rate": 1.1016949152542374e-05, "loss": 0.6553, "step": 65 }, { "epoch": 0.033828805740645826, "grad_norm": 1.7904607329018232, "learning_rate": 1.1186440677966102e-05, "loss": 0.6327, "step": 66 }, { "epoch": 0.03434136340338288, "grad_norm": 1.7019554084229158, "learning_rate": 1.1355932203389833e-05, "loss": 0.6445, "step": 67 }, { "epoch": 0.03485392106611994, "grad_norm": 1.7439249245206419, "learning_rate": 1.1525423728813561e-05, "loss": 0.6433, "step": 68 }, { "epoch": 0.035366478728856995, "grad_norm": 1.7232302638999377, "learning_rate": 1.169491525423729e-05, "loss": 0.6709, "step": 69 }, { "epoch": 0.03587903639159405, "grad_norm": 1.9430685318709107, "learning_rate": 1.1864406779661018e-05, "loss": 0.7504, "step": 70 }, { "epoch": 0.036391594054331115, "grad_norm": 1.7106330730956882, "learning_rate": 1.2033898305084745e-05, "loss": 0.6545, "step": 71 }, { "epoch": 0.03690415171706817, "grad_norm": 1.891608422523634, "learning_rate": 1.2203389830508477e-05, "loss": 0.6219, "step": 72 }, { "epoch": 0.03741670937980523, "grad_norm": 1.6568197661553454, "learning_rate": 1.2372881355932205e-05, "loss": 0.6007, "step": 73 }, { "epoch": 0.037929267042542285, "grad_norm": 1.613449328457924, "learning_rate": 1.2542372881355932e-05, "loss": 0.6475, "step": 74 }, { "epoch": 0.03844182470527934, "grad_norm": 1.6780532009281695, "learning_rate": 1.2711864406779661e-05, "loss": 0.6503, "step": 75 }, { "epoch": 0.038954382368016405, "grad_norm": 1.813282455092354, "learning_rate": 1.288135593220339e-05, "loss": 0.663, "step": 76 }, { "epoch": 0.03946694003075346, "grad_norm": 1.7263197599174094, "learning_rate": 1.305084745762712e-05, "loss": 0.6876, "step": 77 }, { "epoch": 0.03997949769349052, "grad_norm": 1.6765017098206776, "learning_rate": 1.3220338983050848e-05, "loss": 0.6744, "step": 78 }, { "epoch": 0.040492055356227574, "grad_norm": 1.5951158535446524, "learning_rate": 1.3389830508474577e-05, "loss": 0.6858, "step": 79 }, { "epoch": 0.04100461301896463, "grad_norm": 2.6603877413716868, "learning_rate": 1.3559322033898305e-05, "loss": 0.6963, "step": 80 }, { "epoch": 0.041517170681701694, "grad_norm": 1.785618118347696, "learning_rate": 1.3728813559322034e-05, "loss": 0.657, "step": 81 }, { "epoch": 0.04202972834443875, "grad_norm": 1.6127249337040477, "learning_rate": 1.3898305084745764e-05, "loss": 0.6742, "step": 82 }, { "epoch": 0.04254228600717581, "grad_norm": 1.5766500907756194, "learning_rate": 1.4067796610169493e-05, "loss": 0.6581, "step": 83 }, { "epoch": 0.043054843669912864, "grad_norm": 1.7772879341514807, "learning_rate": 1.4237288135593221e-05, "loss": 0.627, "step": 84 }, { "epoch": 0.04356740133264992, "grad_norm": 1.7132692754648, "learning_rate": 1.440677966101695e-05, "loss": 0.6282, "step": 85 }, { "epoch": 0.044079958995386984, "grad_norm": 1.6901266643945119, "learning_rate": 1.4576271186440678e-05, "loss": 0.6033, "step": 86 }, { "epoch": 0.04459251665812404, "grad_norm": 1.8136641554226098, "learning_rate": 1.4745762711864408e-05, "loss": 0.6892, "step": 87 }, { "epoch": 0.0451050743208611, "grad_norm": 1.7576444811167677, "learning_rate": 1.4915254237288137e-05, "loss": 0.6208, "step": 88 }, { "epoch": 0.04561763198359815, "grad_norm": 1.5964036250266795, "learning_rate": 1.5084745762711865e-05, "loss": 0.6198, "step": 89 }, { "epoch": 0.04613018964633521, "grad_norm": 1.7583762186329783, "learning_rate": 1.5254237288135594e-05, "loss": 0.621, "step": 90 }, { "epoch": 0.04664274730907227, "grad_norm": 1.7468723254146872, "learning_rate": 1.5423728813559326e-05, "loss": 0.6923, "step": 91 }, { "epoch": 0.04715530497180933, "grad_norm": 1.6638859909829853, "learning_rate": 1.5593220338983053e-05, "loss": 0.6211, "step": 92 }, { "epoch": 0.047667862634546386, "grad_norm": 1.845270039031081, "learning_rate": 1.576271186440678e-05, "loss": 0.6504, "step": 93 }, { "epoch": 0.04818042029728344, "grad_norm": 1.614695973238351, "learning_rate": 1.593220338983051e-05, "loss": 0.6489, "step": 94 }, { "epoch": 0.0486929779600205, "grad_norm": 1.5393945591998532, "learning_rate": 1.6101694915254237e-05, "loss": 0.6428, "step": 95 }, { "epoch": 0.04920553562275756, "grad_norm": 1.5717971234356334, "learning_rate": 1.6271186440677967e-05, "loss": 0.6301, "step": 96 }, { "epoch": 0.04971809328549462, "grad_norm": 1.5907960587510528, "learning_rate": 1.6440677966101697e-05, "loss": 0.633, "step": 97 }, { "epoch": 0.050230650948231675, "grad_norm": 1.611393844607098, "learning_rate": 1.6610169491525424e-05, "loss": 0.6219, "step": 98 }, { "epoch": 0.05074320861096873, "grad_norm": 1.6861853576553227, "learning_rate": 1.6779661016949154e-05, "loss": 0.6867, "step": 99 }, { "epoch": 0.051255766273705795, "grad_norm": 1.638088264989108, "learning_rate": 1.694915254237288e-05, "loss": 0.6265, "step": 100 }, { "epoch": 0.05176832393644285, "grad_norm": 1.4897546556398478, "learning_rate": 1.711864406779661e-05, "loss": 0.6225, "step": 101 }, { "epoch": 0.05228088159917991, "grad_norm": 1.6401798195912418, "learning_rate": 1.728813559322034e-05, "loss": 0.6499, "step": 102 }, { "epoch": 0.052793439261916965, "grad_norm": 1.5795535053414669, "learning_rate": 1.745762711864407e-05, "loss": 0.6368, "step": 103 }, { "epoch": 0.05330599692465402, "grad_norm": 1.543812691646017, "learning_rate": 1.76271186440678e-05, "loss": 0.5992, "step": 104 }, { "epoch": 0.053818554587391085, "grad_norm": 1.611694149464319, "learning_rate": 1.7796610169491526e-05, "loss": 0.6664, "step": 105 }, { "epoch": 0.05433111225012814, "grad_norm": 1.669027321820169, "learning_rate": 1.7966101694915256e-05, "loss": 0.6675, "step": 106 }, { "epoch": 0.0548436699128652, "grad_norm": 1.6716374191477528, "learning_rate": 1.8135593220338986e-05, "loss": 0.6567, "step": 107 }, { "epoch": 0.055356227575602254, "grad_norm": 1.5592071937201843, "learning_rate": 1.8305084745762713e-05, "loss": 0.6112, "step": 108 }, { "epoch": 0.05586878523833931, "grad_norm": 1.5321143532277295, "learning_rate": 1.8474576271186443e-05, "loss": 0.6322, "step": 109 }, { "epoch": 0.056381342901076374, "grad_norm": 1.6657905288785042, "learning_rate": 1.864406779661017e-05, "loss": 0.6315, "step": 110 }, { "epoch": 0.05689390056381343, "grad_norm": 1.671284266938336, "learning_rate": 1.88135593220339e-05, "loss": 0.6676, "step": 111 }, { "epoch": 0.05740645822655049, "grad_norm": 1.8080143679424183, "learning_rate": 1.898305084745763e-05, "loss": 0.6919, "step": 112 }, { "epoch": 0.057919015889287544, "grad_norm": 1.509727343070678, "learning_rate": 1.9152542372881357e-05, "loss": 0.6295, "step": 113 }, { "epoch": 0.0584315735520246, "grad_norm": 1.616383614678075, "learning_rate": 1.9322033898305087e-05, "loss": 0.5961, "step": 114 }, { "epoch": 0.058944131214761664, "grad_norm": 1.678998543558273, "learning_rate": 1.9491525423728814e-05, "loss": 0.7019, "step": 115 }, { "epoch": 0.05945668887749872, "grad_norm": 1.5918738755414439, "learning_rate": 1.9661016949152545e-05, "loss": 0.663, "step": 116 }, { "epoch": 0.05996924654023578, "grad_norm": 1.7977382229346006, "learning_rate": 1.9830508474576275e-05, "loss": 0.6516, "step": 117 }, { "epoch": 0.06048180420297283, "grad_norm": 1.6673994351374344, "learning_rate": 2e-05, "loss": 0.687, "step": 118 }, { "epoch": 0.06099436186570989, "grad_norm": 1.495754189740338, "learning_rate": 1.9999996553585817e-05, "loss": 0.621, "step": 119 }, { "epoch": 0.06150691952844695, "grad_norm": 1.616804021723521, "learning_rate": 1.999998621434563e-05, "loss": 0.6509, "step": 120 }, { "epoch": 0.06201947719118401, "grad_norm": 1.5479096758455746, "learning_rate": 1.9999968982286576e-05, "loss": 0.653, "step": 121 }, { "epoch": 0.06253203485392107, "grad_norm": 1.5891237398622962, "learning_rate": 1.9999944857420527e-05, "loss": 0.6659, "step": 122 }, { "epoch": 0.06304459251665813, "grad_norm": 1.748680173979723, "learning_rate": 1.9999913839764117e-05, "loss": 0.6999, "step": 123 }, { "epoch": 0.06355715017939519, "grad_norm": 1.680320447198205, "learning_rate": 1.9999875929338718e-05, "loss": 0.6363, "step": 124 }, { "epoch": 0.06406970784213224, "grad_norm": 1.7023265979562057, "learning_rate": 1.999983112617047e-05, "loss": 0.6304, "step": 125 }, { "epoch": 0.0645822655048693, "grad_norm": 1.5514635690412786, "learning_rate": 1.9999779430290247e-05, "loss": 0.6418, "step": 126 }, { "epoch": 0.06509482316760636, "grad_norm": 1.6829968219196538, "learning_rate": 1.9999720841733688e-05, "loss": 0.6446, "step": 127 }, { "epoch": 0.06560738083034341, "grad_norm": 1.6362738685088842, "learning_rate": 1.999965536054117e-05, "loss": 0.6386, "step": 128 }, { "epoch": 0.06611993849308047, "grad_norm": 1.5938771688393008, "learning_rate": 1.999958298675784e-05, "loss": 0.5888, "step": 129 }, { "epoch": 0.06663249615581752, "grad_norm": 1.4643367772205251, "learning_rate": 1.9999503720433575e-05, "loss": 0.5919, "step": 130 }, { "epoch": 0.06714505381855458, "grad_norm": 1.4922796615061455, "learning_rate": 1.999941756162301e-05, "loss": 0.6506, "step": 131 }, { "epoch": 0.06765761148129165, "grad_norm": 1.902006839527552, "learning_rate": 1.9999324510385543e-05, "loss": 0.6519, "step": 132 }, { "epoch": 0.06817016914402871, "grad_norm": 1.5233357439224147, "learning_rate": 1.9999224566785302e-05, "loss": 0.6448, "step": 133 }, { "epoch": 0.06868272680676576, "grad_norm": 1.6716853649083223, "learning_rate": 1.999911773089118e-05, "loss": 0.66, "step": 134 }, { "epoch": 0.06919528446950282, "grad_norm": 1.5213331155963077, "learning_rate": 1.9999004002776822e-05, "loss": 0.6135, "step": 135 }, { "epoch": 0.06970784213223988, "grad_norm": 1.4429896714847446, "learning_rate": 1.999888338252061e-05, "loss": 0.665, "step": 136 }, { "epoch": 0.07022039979497693, "grad_norm": 1.484756733194533, "learning_rate": 1.9998755870205694e-05, "loss": 0.6675, "step": 137 }, { "epoch": 0.07073295745771399, "grad_norm": 1.5017620767000548, "learning_rate": 1.999862146591996e-05, "loss": 0.6186, "step": 138 }, { "epoch": 0.07124551512045105, "grad_norm": 1.4718737462244087, "learning_rate": 1.9998480169756052e-05, "loss": 0.6375, "step": 139 }, { "epoch": 0.0717580727831881, "grad_norm": 1.4637403142007392, "learning_rate": 1.999833198181137e-05, "loss": 0.7091, "step": 140 }, { "epoch": 0.07227063044592516, "grad_norm": 1.5137493229080259, "learning_rate": 1.9998176902188042e-05, "loss": 0.6242, "step": 141 }, { "epoch": 0.07278318810866223, "grad_norm": 1.5202328993917977, "learning_rate": 1.9998014930992976e-05, "loss": 0.6029, "step": 142 }, { "epoch": 0.07329574577139929, "grad_norm": 1.3409124794676677, "learning_rate": 1.999784606833781e-05, "loss": 0.6355, "step": 143 }, { "epoch": 0.07380830343413634, "grad_norm": 1.6168467134246405, "learning_rate": 1.9997670314338936e-05, "loss": 0.6443, "step": 144 }, { "epoch": 0.0743208610968734, "grad_norm": 1.4167226060581926, "learning_rate": 1.9997487669117506e-05, "loss": 0.6706, "step": 145 }, { "epoch": 0.07483341875961046, "grad_norm": 1.53361249358829, "learning_rate": 1.9997298132799408e-05, "loss": 0.6751, "step": 146 }, { "epoch": 0.07534597642234751, "grad_norm": 1.5700066252420992, "learning_rate": 1.999710170551529e-05, "loss": 0.674, "step": 147 }, { "epoch": 0.07585853408508457, "grad_norm": 1.3936211221702701, "learning_rate": 1.999689838740054e-05, "loss": 0.6243, "step": 148 }, { "epoch": 0.07637109174782163, "grad_norm": 1.4603278996521531, "learning_rate": 1.9996688178595303e-05, "loss": 0.6633, "step": 149 }, { "epoch": 0.07688364941055868, "grad_norm": 1.6234963041842376, "learning_rate": 1.9996471079244477e-05, "loss": 0.6223, "step": 150 }, { "epoch": 0.07739620707329574, "grad_norm": 1.5663766635608147, "learning_rate": 1.9996247089497703e-05, "loss": 0.6377, "step": 151 }, { "epoch": 0.07790876473603281, "grad_norm": 1.353577305652699, "learning_rate": 1.999601620950937e-05, "loss": 0.6178, "step": 152 }, { "epoch": 0.07842132239876987, "grad_norm": 1.5873668539123265, "learning_rate": 1.999577843943863e-05, "loss": 0.6578, "step": 153 }, { "epoch": 0.07893388006150692, "grad_norm": 1.52157823309216, "learning_rate": 1.999553377944936e-05, "loss": 0.6082, "step": 154 }, { "epoch": 0.07944643772424398, "grad_norm": 1.6444003119206199, "learning_rate": 1.9995282229710208e-05, "loss": 0.7125, "step": 155 }, { "epoch": 0.07995899538698104, "grad_norm": 1.4793453690141878, "learning_rate": 1.9995023790394558e-05, "loss": 0.6515, "step": 156 }, { "epoch": 0.08047155304971809, "grad_norm": 1.3206250069475216, "learning_rate": 1.999475846168055e-05, "loss": 0.5999, "step": 157 }, { "epoch": 0.08098411071245515, "grad_norm": 1.5648340996999337, "learning_rate": 1.9994486243751076e-05, "loss": 0.6443, "step": 158 }, { "epoch": 0.0814966683751922, "grad_norm": 1.5511972351488015, "learning_rate": 1.9994207136793763e-05, "loss": 0.6776, "step": 159 }, { "epoch": 0.08200922603792926, "grad_norm": 1.4340281279690061, "learning_rate": 1.9993921141001003e-05, "loss": 0.6284, "step": 160 }, { "epoch": 0.08252178370066633, "grad_norm": 1.4236562889572075, "learning_rate": 1.999362825656992e-05, "loss": 0.6356, "step": 161 }, { "epoch": 0.08303434136340339, "grad_norm": 1.4059303561001426, "learning_rate": 1.9993328483702393e-05, "loss": 0.5784, "step": 162 }, { "epoch": 0.08354689902614044, "grad_norm": 1.4734244158531686, "learning_rate": 1.999302182260506e-05, "loss": 0.6548, "step": 163 }, { "epoch": 0.0840594566888775, "grad_norm": 1.4958562767612291, "learning_rate": 1.999270827348929e-05, "loss": 0.588, "step": 164 }, { "epoch": 0.08457201435161456, "grad_norm": 1.3961253177403927, "learning_rate": 1.999238783657121e-05, "loss": 0.6167, "step": 165 }, { "epoch": 0.08508457201435161, "grad_norm": 1.4276271675254508, "learning_rate": 1.999206051207169e-05, "loss": 0.6244, "step": 166 }, { "epoch": 0.08559712967708867, "grad_norm": 1.3117861396146993, "learning_rate": 1.999172630021635e-05, "loss": 0.6246, "step": 167 }, { "epoch": 0.08610968733982573, "grad_norm": 1.383986024743241, "learning_rate": 1.9991385201235552e-05, "loss": 0.6494, "step": 168 }, { "epoch": 0.08662224500256278, "grad_norm": 1.456880512367855, "learning_rate": 1.999103721536442e-05, "loss": 0.6372, "step": 169 }, { "epoch": 0.08713480266529984, "grad_norm": 1.429354848157193, "learning_rate": 1.9990682342842805e-05, "loss": 0.6539, "step": 170 }, { "epoch": 0.08764736032803691, "grad_norm": 1.3883147789599242, "learning_rate": 1.999032058391532e-05, "loss": 0.6413, "step": 171 }, { "epoch": 0.08815991799077397, "grad_norm": 1.482835247201447, "learning_rate": 1.9989951938831315e-05, "loss": 0.7078, "step": 172 }, { "epoch": 0.08867247565351102, "grad_norm": 1.3053975610574333, "learning_rate": 1.9989576407844894e-05, "loss": 0.6377, "step": 173 }, { "epoch": 0.08918503331624808, "grad_norm": 1.3202584947895588, "learning_rate": 1.99891939912149e-05, "loss": 0.6384, "step": 174 }, { "epoch": 0.08969759097898514, "grad_norm": 1.3590936065678239, "learning_rate": 1.9988804689204934e-05, "loss": 0.6525, "step": 175 }, { "epoch": 0.0902101486417222, "grad_norm": 1.4433479335031323, "learning_rate": 1.9988408502083328e-05, "loss": 0.6446, "step": 176 }, { "epoch": 0.09072270630445925, "grad_norm": 1.5305964443843239, "learning_rate": 1.998800543012317e-05, "loss": 0.6555, "step": 177 }, { "epoch": 0.0912352639671963, "grad_norm": 1.412502449411404, "learning_rate": 1.9987595473602292e-05, "loss": 0.6173, "step": 178 }, { "epoch": 0.09174782162993336, "grad_norm": 1.4823520658735219, "learning_rate": 1.9987178632803265e-05, "loss": 0.6003, "step": 179 }, { "epoch": 0.09226037929267042, "grad_norm": 1.4706508098687003, "learning_rate": 1.9986754908013415e-05, "loss": 0.6806, "step": 180 }, { "epoch": 0.09277293695540749, "grad_norm": 1.2813837407733693, "learning_rate": 1.9986324299524807e-05, "loss": 0.6114, "step": 181 }, { "epoch": 0.09328549461814455, "grad_norm": 1.3571207056438488, "learning_rate": 1.9985886807634246e-05, "loss": 0.6026, "step": 182 }, { "epoch": 0.0937980522808816, "grad_norm": 1.4383389660704353, "learning_rate": 1.99854424326433e-05, "loss": 0.646, "step": 183 }, { "epoch": 0.09431060994361866, "grad_norm": 1.4753891530991736, "learning_rate": 1.998499117485826e-05, "loss": 0.6089, "step": 184 }, { "epoch": 0.09482316760635572, "grad_norm": 1.457100061964455, "learning_rate": 1.998453303459017e-05, "loss": 0.5894, "step": 185 }, { "epoch": 0.09533572526909277, "grad_norm": 1.3351752933440146, "learning_rate": 1.9984068012154824e-05, "loss": 0.6273, "step": 186 }, { "epoch": 0.09584828293182983, "grad_norm": 1.3738149399030364, "learning_rate": 1.9983596107872748e-05, "loss": 0.5901, "step": 187 }, { "epoch": 0.09636084059456688, "grad_norm": 1.337521569877749, "learning_rate": 1.9983117322069226e-05, "loss": 0.6561, "step": 188 }, { "epoch": 0.09687339825730394, "grad_norm": 1.342048888053643, "learning_rate": 1.9982631655074264e-05, "loss": 0.5579, "step": 189 }, { "epoch": 0.097385955920041, "grad_norm": 1.3528795507729003, "learning_rate": 1.9982139107222634e-05, "loss": 0.5976, "step": 190 }, { "epoch": 0.09789851358277807, "grad_norm": 1.4619823167002932, "learning_rate": 1.998163967885384e-05, "loss": 0.6473, "step": 191 }, { "epoch": 0.09841107124551512, "grad_norm": 1.3801581164818364, "learning_rate": 1.9981133370312123e-05, "loss": 0.6293, "step": 192 }, { "epoch": 0.09892362890825218, "grad_norm": 1.4158208428131143, "learning_rate": 1.9980620181946476e-05, "loss": 0.6402, "step": 193 }, { "epoch": 0.09943618657098924, "grad_norm": 1.2995696767606282, "learning_rate": 1.9980100114110637e-05, "loss": 0.5793, "step": 194 }, { "epoch": 0.0999487442337263, "grad_norm": 1.4559803520649093, "learning_rate": 1.997957316716307e-05, "loss": 0.6788, "step": 195 }, { "epoch": 0.10046130189646335, "grad_norm": 1.342973522913356, "learning_rate": 1.9979039341466997e-05, "loss": 0.5977, "step": 196 }, { "epoch": 0.10097385955920041, "grad_norm": 1.338576424679714, "learning_rate": 1.9978498637390375e-05, "loss": 0.6426, "step": 197 }, { "epoch": 0.10148641722193746, "grad_norm": 1.2685053077105197, "learning_rate": 1.99779510553059e-05, "loss": 0.5633, "step": 198 }, { "epoch": 0.10199897488467452, "grad_norm": 1.3265306403933568, "learning_rate": 1.997739659559101e-05, "loss": 0.6226, "step": 199 }, { "epoch": 0.10251153254741159, "grad_norm": 1.452580928678974, "learning_rate": 1.9976835258627884e-05, "loss": 0.6262, "step": 200 }, { "epoch": 0.10302409021014865, "grad_norm": 1.3294647437412166, "learning_rate": 1.9976267044803442e-05, "loss": 0.5989, "step": 201 }, { "epoch": 0.1035366478728857, "grad_norm": 1.415152709746015, "learning_rate": 1.9975691954509347e-05, "loss": 0.6124, "step": 202 }, { "epoch": 0.10404920553562276, "grad_norm": 1.4224128296445235, "learning_rate": 1.9975109988142e-05, "loss": 0.5784, "step": 203 }, { "epoch": 0.10456176319835982, "grad_norm": 1.3740166062118624, "learning_rate": 1.9974521146102535e-05, "loss": 0.5549, "step": 204 }, { "epoch": 0.10507432086109687, "grad_norm": 1.4078536901833714, "learning_rate": 1.9973925428796837e-05, "loss": 0.6343, "step": 205 }, { "epoch": 0.10558687852383393, "grad_norm": 1.5145121731784046, "learning_rate": 1.9973322836635517e-05, "loss": 0.6567, "step": 206 }, { "epoch": 0.10609943618657099, "grad_norm": 1.412212290868194, "learning_rate": 1.9972713370033937e-05, "loss": 0.6132, "step": 207 }, { "epoch": 0.10661199384930804, "grad_norm": 1.2544810139192875, "learning_rate": 1.997209702941219e-05, "loss": 0.5676, "step": 208 }, { "epoch": 0.1071245515120451, "grad_norm": 2.127393874828922, "learning_rate": 1.9971473815195106e-05, "loss": 0.5892, "step": 209 }, { "epoch": 0.10763710917478217, "grad_norm": 1.48906047519196, "learning_rate": 1.997084372781226e-05, "loss": 0.6596, "step": 210 }, { "epoch": 0.10814966683751923, "grad_norm": 1.422564975362822, "learning_rate": 1.9970206767697958e-05, "loss": 0.6238, "step": 211 }, { "epoch": 0.10866222450025628, "grad_norm": 1.3578459883833842, "learning_rate": 1.9969562935291248e-05, "loss": 0.6125, "step": 212 }, { "epoch": 0.10917478216299334, "grad_norm": 1.394528734780358, "learning_rate": 1.996891223103591e-05, "loss": 0.6488, "step": 213 }, { "epoch": 0.1096873398257304, "grad_norm": 1.5136671639108694, "learning_rate": 1.9968254655380465e-05, "loss": 0.6416, "step": 214 }, { "epoch": 0.11019989748846745, "grad_norm": 1.2317961744659476, "learning_rate": 1.996759020877817e-05, "loss": 0.5968, "step": 215 }, { "epoch": 0.11071245515120451, "grad_norm": 1.3922354761654023, "learning_rate": 1.996691889168701e-05, "loss": 0.6017, "step": 216 }, { "epoch": 0.11122501281394157, "grad_norm": 1.3811859832440434, "learning_rate": 1.9966240704569722e-05, "loss": 0.6113, "step": 217 }, { "epoch": 0.11173757047667862, "grad_norm": 1.2911749837865623, "learning_rate": 1.996555564789376e-05, "loss": 0.5957, "step": 218 }, { "epoch": 0.11225012813941568, "grad_norm": 1.3488957849173755, "learning_rate": 1.996486372213133e-05, "loss": 0.5926, "step": 219 }, { "epoch": 0.11276268580215275, "grad_norm": 1.2568423807229023, "learning_rate": 1.9964164927759354e-05, "loss": 0.6249, "step": 220 }, { "epoch": 0.1132752434648898, "grad_norm": 1.3184349543505904, "learning_rate": 1.9963459265259512e-05, "loss": 0.5878, "step": 221 }, { "epoch": 0.11378780112762686, "grad_norm": 1.3650553050729568, "learning_rate": 1.996274673511819e-05, "loss": 0.585, "step": 222 }, { "epoch": 0.11430035879036392, "grad_norm": 1.231393811747069, "learning_rate": 1.9962027337826538e-05, "loss": 0.5628, "step": 223 }, { "epoch": 0.11481291645310097, "grad_norm": 1.3980522789339116, "learning_rate": 1.9961301073880413e-05, "loss": 0.6249, "step": 224 }, { "epoch": 0.11532547411583803, "grad_norm": 1.5780195969910644, "learning_rate": 1.9960567943780423e-05, "loss": 0.6451, "step": 225 }, { "epoch": 0.11583803177857509, "grad_norm": 1.3762688119647166, "learning_rate": 1.99598279480319e-05, "loss": 0.5802, "step": 226 }, { "epoch": 0.11635058944131214, "grad_norm": 1.318040523151673, "learning_rate": 1.995908108714491e-05, "loss": 0.596, "step": 227 }, { "epoch": 0.1168631471040492, "grad_norm": 1.375097393060397, "learning_rate": 1.9958327361634248e-05, "loss": 0.6083, "step": 228 }, { "epoch": 0.11737570476678626, "grad_norm": 1.2686208272823367, "learning_rate": 1.995756677201945e-05, "loss": 0.5956, "step": 229 }, { "epoch": 0.11788826242952333, "grad_norm": 1.398161778232072, "learning_rate": 1.9956799318824776e-05, "loss": 0.6539, "step": 230 }, { "epoch": 0.11840082009226038, "grad_norm": 1.4505833395303078, "learning_rate": 1.9956025002579214e-05, "loss": 0.622, "step": 231 }, { "epoch": 0.11891337775499744, "grad_norm": 1.2298818681398136, "learning_rate": 1.995524382381649e-05, "loss": 0.5886, "step": 232 }, { "epoch": 0.1194259354177345, "grad_norm": 1.3335693148248855, "learning_rate": 1.995445578307506e-05, "loss": 0.5992, "step": 233 }, { "epoch": 0.11993849308047155, "grad_norm": 1.5238628614200085, "learning_rate": 1.99536608808981e-05, "loss": 0.6272, "step": 234 }, { "epoch": 0.12045105074320861, "grad_norm": 1.3135344705170855, "learning_rate": 1.9952859117833526e-05, "loss": 0.5891, "step": 235 }, { "epoch": 0.12096360840594567, "grad_norm": 1.328073184252166, "learning_rate": 1.9952050494433983e-05, "loss": 0.5541, "step": 236 }, { "epoch": 0.12147616606868272, "grad_norm": 1.350510360892268, "learning_rate": 1.9951235011256836e-05, "loss": 0.6465, "step": 237 }, { "epoch": 0.12198872373141978, "grad_norm": 1.3849798602171273, "learning_rate": 1.995041266886419e-05, "loss": 0.6551, "step": 238 }, { "epoch": 0.12250128139415685, "grad_norm": 1.4224156409287558, "learning_rate": 1.9949583467822863e-05, "loss": 0.6423, "step": 239 }, { "epoch": 0.1230138390568939, "grad_norm": 1.2864604937629316, "learning_rate": 1.9948747408704415e-05, "loss": 0.5894, "step": 240 }, { "epoch": 0.12352639671963096, "grad_norm": 1.3177695180302218, "learning_rate": 1.9947904492085122e-05, "loss": 0.6185, "step": 241 }, { "epoch": 0.12403895438236802, "grad_norm": 1.3996986327067802, "learning_rate": 1.9947054718545996e-05, "loss": 0.644, "step": 242 }, { "epoch": 0.12455151204510508, "grad_norm": 1.2770764849894127, "learning_rate": 1.9946198088672776e-05, "loss": 0.5974, "step": 243 }, { "epoch": 0.12506406970784215, "grad_norm": 1.302721693609211, "learning_rate": 1.994533460305591e-05, "loss": 0.5978, "step": 244 }, { "epoch": 0.1255766273705792, "grad_norm": 1.3308728194263169, "learning_rate": 1.9944464262290596e-05, "loss": 0.5962, "step": 245 }, { "epoch": 0.12608918503331626, "grad_norm": 1.2969797727666803, "learning_rate": 1.994358706697674e-05, "loss": 0.6025, "step": 246 }, { "epoch": 0.12660174269605332, "grad_norm": 1.2921387476097426, "learning_rate": 1.9942703017718977e-05, "loss": 0.6454, "step": 247 }, { "epoch": 0.12711430035879037, "grad_norm": 1.2205908086404658, "learning_rate": 1.9941812115126664e-05, "loss": 0.5659, "step": 248 }, { "epoch": 0.12762685802152743, "grad_norm": 1.3478386904156001, "learning_rate": 1.994091435981389e-05, "loss": 0.5864, "step": 249 }, { "epoch": 0.12813941568426448, "grad_norm": 1.2709241245204406, "learning_rate": 1.9940009752399462e-05, "loss": 0.6034, "step": 250 }, { "epoch": 0.12865197334700154, "grad_norm": 1.3115638970659091, "learning_rate": 1.9939098293506906e-05, "loss": 0.5507, "step": 251 }, { "epoch": 0.1291645310097386, "grad_norm": 1.1937769536438703, "learning_rate": 1.993817998376448e-05, "loss": 0.5819, "step": 252 }, { "epoch": 0.12967708867247565, "grad_norm": 1.296320691184083, "learning_rate": 1.9937254823805156e-05, "loss": 0.6089, "step": 253 }, { "epoch": 0.1301896463352127, "grad_norm": 1.438254918253199, "learning_rate": 1.9936322814266634e-05, "loss": 0.5559, "step": 254 }, { "epoch": 0.13070220399794977, "grad_norm": 1.3890534236230696, "learning_rate": 1.9935383955791326e-05, "loss": 0.6086, "step": 255 }, { "epoch": 0.13121476166068682, "grad_norm": 1.2507513520926092, "learning_rate": 1.993443824902638e-05, "loss": 0.5897, "step": 256 }, { "epoch": 0.13172731932342388, "grad_norm": 1.43255902297897, "learning_rate": 1.993348569462365e-05, "loss": 0.6192, "step": 257 }, { "epoch": 0.13223987698616094, "grad_norm": 1.2976212388888364, "learning_rate": 1.9932526293239713e-05, "loss": 0.541, "step": 258 }, { "epoch": 0.132752434648898, "grad_norm": 1.479937137654119, "learning_rate": 1.9931560045535873e-05, "loss": 0.6247, "step": 259 }, { "epoch": 0.13326499231163505, "grad_norm": 1.2640630167790585, "learning_rate": 1.9930586952178146e-05, "loss": 0.5915, "step": 260 }, { "epoch": 0.1337775499743721, "grad_norm": 1.2295357908898887, "learning_rate": 1.9929607013837268e-05, "loss": 0.6105, "step": 261 }, { "epoch": 0.13429010763710916, "grad_norm": 1.2971218663888922, "learning_rate": 1.9928620231188694e-05, "loss": 0.5478, "step": 262 }, { "epoch": 0.13480266529984622, "grad_norm": 1.3867719659355033, "learning_rate": 1.9927626604912594e-05, "loss": 0.6152, "step": 263 }, { "epoch": 0.1353152229625833, "grad_norm": 1.2666524153529515, "learning_rate": 1.9926626135693866e-05, "loss": 0.599, "step": 264 }, { "epoch": 0.13582778062532036, "grad_norm": 1.5781287185922233, "learning_rate": 1.9925618824222103e-05, "loss": 0.5795, "step": 265 }, { "epoch": 0.13634033828805742, "grad_norm": 1.3513706593645862, "learning_rate": 1.992460467119164e-05, "loss": 0.576, "step": 266 }, { "epoch": 0.13685289595079447, "grad_norm": 1.234426744746147, "learning_rate": 1.9923583677301507e-05, "loss": 0.6036, "step": 267 }, { "epoch": 0.13736545361353153, "grad_norm": 1.2863970022093731, "learning_rate": 1.9922555843255463e-05, "loss": 0.6409, "step": 268 }, { "epoch": 0.13787801127626859, "grad_norm": 1.3728494804984566, "learning_rate": 1.9921521169761974e-05, "loss": 0.6101, "step": 269 }, { "epoch": 0.13839056893900564, "grad_norm": 1.2559629378374657, "learning_rate": 1.992047965753422e-05, "loss": 0.6166, "step": 270 }, { "epoch": 0.1389031266017427, "grad_norm": 1.1967351427378665, "learning_rate": 1.9919431307290105e-05, "loss": 0.5667, "step": 271 }, { "epoch": 0.13941568426447976, "grad_norm": 1.2065200632926398, "learning_rate": 1.991837611975223e-05, "loss": 0.5746, "step": 272 }, { "epoch": 0.1399282419272168, "grad_norm": 1.2290165093547978, "learning_rate": 1.991731409564792e-05, "loss": 0.549, "step": 273 }, { "epoch": 0.14044079958995387, "grad_norm": 1.280935180774135, "learning_rate": 1.991624523570922e-05, "loss": 0.621, "step": 274 }, { "epoch": 0.14095335725269093, "grad_norm": 1.2533300864016401, "learning_rate": 1.9915169540672857e-05, "loss": 0.5881, "step": 275 }, { "epoch": 0.14146591491542798, "grad_norm": 1.3652372310923393, "learning_rate": 1.9914087011280308e-05, "loss": 0.6192, "step": 276 }, { "epoch": 0.14197847257816504, "grad_norm": 1.3484048588323074, "learning_rate": 1.9912997648277736e-05, "loss": 0.6134, "step": 277 }, { "epoch": 0.1424910302409021, "grad_norm": 1.400567956686028, "learning_rate": 1.9911901452416012e-05, "loss": 0.5881, "step": 278 }, { "epoch": 0.14300358790363915, "grad_norm": 1.4985111110250653, "learning_rate": 1.9910798424450734e-05, "loss": 0.6073, "step": 279 }, { "epoch": 0.1435161455663762, "grad_norm": 1.304180802629035, "learning_rate": 1.99096885651422e-05, "loss": 0.5682, "step": 280 }, { "epoch": 0.14402870322911326, "grad_norm": 1.2290551665817706, "learning_rate": 1.9908571875255416e-05, "loss": 0.536, "step": 281 }, { "epoch": 0.14454126089185032, "grad_norm": 1.3049884836633907, "learning_rate": 1.9907448355560094e-05, "loss": 0.5649, "step": 282 }, { "epoch": 0.1450538185545874, "grad_norm": 1.2295512845820566, "learning_rate": 1.990631800683066e-05, "loss": 0.5451, "step": 283 }, { "epoch": 0.14556637621732446, "grad_norm": 1.2563032414392865, "learning_rate": 1.990518082984624e-05, "loss": 0.5979, "step": 284 }, { "epoch": 0.14607893388006152, "grad_norm": 1.2285054050918074, "learning_rate": 1.9904036825390672e-05, "loss": 0.6256, "step": 285 }, { "epoch": 0.14659149154279857, "grad_norm": 1.3004324835180967, "learning_rate": 1.9902885994252506e-05, "loss": 0.5795, "step": 286 }, { "epoch": 0.14710404920553563, "grad_norm": 1.2430341943888197, "learning_rate": 1.990172833722498e-05, "loss": 0.5905, "step": 287 }, { "epoch": 0.1476166068682727, "grad_norm": 1.2528182115706297, "learning_rate": 1.9900563855106055e-05, "loss": 0.5808, "step": 288 }, { "epoch": 0.14812916453100974, "grad_norm": 1.251912274794903, "learning_rate": 1.9899392548698385e-05, "loss": 0.5574, "step": 289 }, { "epoch": 0.1486417221937468, "grad_norm": 1.2623279357055315, "learning_rate": 1.989821441880933e-05, "loss": 0.5673, "step": 290 }, { "epoch": 0.14915427985648386, "grad_norm": 1.3008656505938996, "learning_rate": 1.9897029466250955e-05, "loss": 0.5532, "step": 291 }, { "epoch": 0.1496668375192209, "grad_norm": 1.3252634880169218, "learning_rate": 1.989583769184003e-05, "loss": 0.578, "step": 292 }, { "epoch": 0.15017939518195797, "grad_norm": 1.3233702885024665, "learning_rate": 1.9894639096398022e-05, "loss": 0.5825, "step": 293 }, { "epoch": 0.15069195284469503, "grad_norm": 1.3088508682554771, "learning_rate": 1.9893433680751105e-05, "loss": 0.5168, "step": 294 }, { "epoch": 0.15120451050743208, "grad_norm": 1.3099601927439466, "learning_rate": 1.989222144573015e-05, "loss": 0.5953, "step": 295 }, { "epoch": 0.15171706817016914, "grad_norm": 1.0478160552145592, "learning_rate": 1.9891002392170727e-05, "loss": 0.5203, "step": 296 }, { "epoch": 0.1522296258329062, "grad_norm": 1.28044877000418, "learning_rate": 1.988977652091311e-05, "loss": 0.573, "step": 297 }, { "epoch": 0.15274218349564325, "grad_norm": 1.3071271299945704, "learning_rate": 1.9888543832802277e-05, "loss": 0.6165, "step": 298 }, { "epoch": 0.1532547411583803, "grad_norm": 1.158080896184805, "learning_rate": 1.9887304328687892e-05, "loss": 0.5801, "step": 299 }, { "epoch": 0.15376729882111737, "grad_norm": 1.2689416185743845, "learning_rate": 1.9886058009424323e-05, "loss": 0.6097, "step": 300 }, { "epoch": 0.15427985648385442, "grad_norm": 1.1704422006564141, "learning_rate": 1.9884804875870645e-05, "loss": 0.5299, "step": 301 }, { "epoch": 0.15479241414659148, "grad_norm": 1.1313740553192562, "learning_rate": 1.9883544928890612e-05, "loss": 0.5468, "step": 302 }, { "epoch": 0.15530497180932856, "grad_norm": 1.2203114470334908, "learning_rate": 1.988227816935269e-05, "loss": 0.5911, "step": 303 }, { "epoch": 0.15581752947206562, "grad_norm": 1.1863647760130518, "learning_rate": 1.988100459813003e-05, "loss": 0.5397, "step": 304 }, { "epoch": 0.15633008713480268, "grad_norm": 1.227833935721664, "learning_rate": 1.9879724216100488e-05, "loss": 0.5833, "step": 305 }, { "epoch": 0.15684264479753973, "grad_norm": 1.2420489131067822, "learning_rate": 1.9878437024146603e-05, "loss": 0.5274, "step": 306 }, { "epoch": 0.1573552024602768, "grad_norm": 1.284872821269742, "learning_rate": 1.987714302315562e-05, "loss": 0.592, "step": 307 }, { "epoch": 0.15786776012301384, "grad_norm": 1.2599983663941625, "learning_rate": 1.987584221401947e-05, "loss": 0.5397, "step": 308 }, { "epoch": 0.1583803177857509, "grad_norm": 1.2067791354614643, "learning_rate": 1.9874534597634776e-05, "loss": 0.568, "step": 309 }, { "epoch": 0.15889287544848796, "grad_norm": 1.2631810747897205, "learning_rate": 1.9873220174902857e-05, "loss": 0.65, "step": 310 }, { "epoch": 0.15940543311122501, "grad_norm": 1.2185204071087328, "learning_rate": 1.9871898946729728e-05, "loss": 0.5458, "step": 311 }, { "epoch": 0.15991799077396207, "grad_norm": 1.1647740192932121, "learning_rate": 1.9870570914026074e-05, "loss": 0.5716, "step": 312 }, { "epoch": 0.16043054843669913, "grad_norm": 1.326356607092497, "learning_rate": 1.9869236077707302e-05, "loss": 0.6129, "step": 313 }, { "epoch": 0.16094310609943618, "grad_norm": 1.1608375697385338, "learning_rate": 1.986789443869348e-05, "loss": 0.5579, "step": 314 }, { "epoch": 0.16145566376217324, "grad_norm": 1.3014445372929209, "learning_rate": 1.9866545997909387e-05, "loss": 0.5581, "step": 315 }, { "epoch": 0.1619682214249103, "grad_norm": 1.1612081614022527, "learning_rate": 1.9865190756284467e-05, "loss": 0.5544, "step": 316 }, { "epoch": 0.16248077908764735, "grad_norm": 1.2540831124276475, "learning_rate": 1.9863828714752877e-05, "loss": 0.5962, "step": 317 }, { "epoch": 0.1629933367503844, "grad_norm": 1.2360968361267566, "learning_rate": 1.9862459874253438e-05, "loss": 0.5892, "step": 318 }, { "epoch": 0.16350589441312147, "grad_norm": 1.0476597046283944, "learning_rate": 1.986108423572968e-05, "loss": 0.547, "step": 319 }, { "epoch": 0.16401845207585852, "grad_norm": 1.1751500387195264, "learning_rate": 1.9859701800129797e-05, "loss": 0.5841, "step": 320 }, { "epoch": 0.16453100973859558, "grad_norm": 1.1837184112075243, "learning_rate": 1.9858312568406686e-05, "loss": 0.5615, "step": 321 }, { "epoch": 0.16504356740133266, "grad_norm": 1.24008643654851, "learning_rate": 1.985691654151791e-05, "loss": 0.5244, "step": 322 }, { "epoch": 0.16555612506406972, "grad_norm": 1.2454943051633556, "learning_rate": 1.9855513720425742e-05, "loss": 0.5902, "step": 323 }, { "epoch": 0.16606868272680678, "grad_norm": 1.195854466704046, "learning_rate": 1.985410410609711e-05, "loss": 0.5956, "step": 324 }, { "epoch": 0.16658124038954383, "grad_norm": 1.217183296829736, "learning_rate": 1.9852687699503638e-05, "loss": 0.5395, "step": 325 }, { "epoch": 0.1670937980522809, "grad_norm": 1.3367311031328217, "learning_rate": 1.9851264501621635e-05, "loss": 0.6008, "step": 326 }, { "epoch": 0.16760635571501795, "grad_norm": 1.2911308277068838, "learning_rate": 1.9849834513432084e-05, "loss": 0.6046, "step": 327 }, { "epoch": 0.168118913377755, "grad_norm": 1.3275589535567887, "learning_rate": 1.9848397735920656e-05, "loss": 0.5786, "step": 328 }, { "epoch": 0.16863147104049206, "grad_norm": 1.0869629639879108, "learning_rate": 1.984695417007769e-05, "loss": 0.5374, "step": 329 }, { "epoch": 0.16914402870322912, "grad_norm": 1.2256601215789797, "learning_rate": 1.984550381689822e-05, "loss": 0.5727, "step": 330 }, { "epoch": 0.16965658636596617, "grad_norm": 1.2295539440053498, "learning_rate": 1.9844046677381936e-05, "loss": 0.5669, "step": 331 }, { "epoch": 0.17016914402870323, "grad_norm": 1.3663436895519694, "learning_rate": 1.984258275253323e-05, "loss": 0.6184, "step": 332 }, { "epoch": 0.17068170169144029, "grad_norm": 1.2563874839033824, "learning_rate": 1.984111204336116e-05, "loss": 0.6188, "step": 333 }, { "epoch": 0.17119425935417734, "grad_norm": 1.2330577383464432, "learning_rate": 1.983963455087946e-05, "loss": 0.5575, "step": 334 }, { "epoch": 0.1717068170169144, "grad_norm": 1.2575401870759362, "learning_rate": 1.983815027610654e-05, "loss": 0.5787, "step": 335 }, { "epoch": 0.17221937467965145, "grad_norm": 1.326614487873389, "learning_rate": 1.983665922006548e-05, "loss": 0.5724, "step": 336 }, { "epoch": 0.1727319323423885, "grad_norm": 1.1987072375194416, "learning_rate": 1.983516138378404e-05, "loss": 0.5923, "step": 337 }, { "epoch": 0.17324449000512557, "grad_norm": 1.1757978987105537, "learning_rate": 1.983365676829466e-05, "loss": 0.5862, "step": 338 }, { "epoch": 0.17375704766786262, "grad_norm": 1.261835867044061, "learning_rate": 1.9832145374634445e-05, "loss": 0.6003, "step": 339 }, { "epoch": 0.17426960533059968, "grad_norm": 1.2224709451223996, "learning_rate": 1.9830627203845164e-05, "loss": 0.5863, "step": 340 }, { "epoch": 0.17478216299333674, "grad_norm": 1.149198225344373, "learning_rate": 1.9829102256973274e-05, "loss": 0.549, "step": 341 }, { "epoch": 0.17529472065607382, "grad_norm": 1.156989768935487, "learning_rate": 1.982757053506989e-05, "loss": 0.5671, "step": 342 }, { "epoch": 0.17580727831881088, "grad_norm": 1.1952177606006076, "learning_rate": 1.9826032039190805e-05, "loss": 0.5814, "step": 343 }, { "epoch": 0.17631983598154793, "grad_norm": 1.1909257926016694, "learning_rate": 1.9824486770396477e-05, "loss": 0.5938, "step": 344 }, { "epoch": 0.176832393644285, "grad_norm": 1.2182745353393944, "learning_rate": 1.9822934729752036e-05, "loss": 0.5788, "step": 345 }, { "epoch": 0.17734495130702205, "grad_norm": 1.2273704475266052, "learning_rate": 1.9821375918327268e-05, "loss": 0.5658, "step": 346 }, { "epoch": 0.1778575089697591, "grad_norm": 1.2723896726373722, "learning_rate": 1.9819810337196644e-05, "loss": 0.6, "step": 347 }, { "epoch": 0.17837006663249616, "grad_norm": 1.3012892789453305, "learning_rate": 1.9818237987439287e-05, "loss": 0.5911, "step": 348 }, { "epoch": 0.17888262429523322, "grad_norm": 1.204449831003173, "learning_rate": 1.981665887013899e-05, "loss": 0.5205, "step": 349 }, { "epoch": 0.17939518195797027, "grad_norm": 1.2668332514633358, "learning_rate": 1.981507298638422e-05, "loss": 0.6141, "step": 350 }, { "epoch": 0.17990773962070733, "grad_norm": 1.2063286174217942, "learning_rate": 1.981348033726809e-05, "loss": 0.5423, "step": 351 }, { "epoch": 0.1804202972834444, "grad_norm": 1.3128519955348328, "learning_rate": 1.9811880923888388e-05, "loss": 0.5711, "step": 352 }, { "epoch": 0.18093285494618144, "grad_norm": 1.1895518890894756, "learning_rate": 1.9810274747347565e-05, "loss": 0.6083, "step": 353 }, { "epoch": 0.1814454126089185, "grad_norm": 1.1558018500527432, "learning_rate": 1.9808661808752735e-05, "loss": 0.5585, "step": 354 }, { "epoch": 0.18195797027165556, "grad_norm": 1.1603520178934397, "learning_rate": 1.9807042109215656e-05, "loss": 0.5603, "step": 355 }, { "epoch": 0.1824705279343926, "grad_norm": 1.1216125058238011, "learning_rate": 1.9805415649852767e-05, "loss": 0.5324, "step": 356 }, { "epoch": 0.18298308559712967, "grad_norm": 1.060916436105574, "learning_rate": 1.980378243178516e-05, "loss": 0.5082, "step": 357 }, { "epoch": 0.18349564325986673, "grad_norm": 1.254514563130757, "learning_rate": 1.980214245613858e-05, "loss": 0.5228, "step": 358 }, { "epoch": 0.18400820092260378, "grad_norm": 1.4381507929062303, "learning_rate": 1.980049572404344e-05, "loss": 0.5978, "step": 359 }, { "epoch": 0.18452075858534084, "grad_norm": 1.4471855989859128, "learning_rate": 1.9798842236634797e-05, "loss": 0.5097, "step": 360 }, { "epoch": 0.18503331624807792, "grad_norm": 1.238706814742061, "learning_rate": 1.9797181995052374e-05, "loss": 0.5465, "step": 361 }, { "epoch": 0.18554587391081498, "grad_norm": 1.351373231034825, "learning_rate": 1.979551500044055e-05, "loss": 0.5804, "step": 362 }, { "epoch": 0.18605843157355204, "grad_norm": 1.30884551142424, "learning_rate": 1.979384125394835e-05, "loss": 0.5609, "step": 363 }, { "epoch": 0.1865709892362891, "grad_norm": 1.2599508433563729, "learning_rate": 1.9792160756729468e-05, "loss": 0.5783, "step": 364 }, { "epoch": 0.18708354689902615, "grad_norm": 1.1175545855625628, "learning_rate": 1.9790473509942233e-05, "loss": 0.5136, "step": 365 }, { "epoch": 0.1875961045617632, "grad_norm": 1.2370148921536126, "learning_rate": 1.9788779514749635e-05, "loss": 0.5258, "step": 366 }, { "epoch": 0.18810866222450026, "grad_norm": 1.1941751378519956, "learning_rate": 1.9787078772319328e-05, "loss": 0.5621, "step": 367 }, { "epoch": 0.18862121988723732, "grad_norm": 1.2020478527019194, "learning_rate": 1.978537128382359e-05, "loss": 0.5509, "step": 368 }, { "epoch": 0.18913377754997437, "grad_norm": 1.263409193217691, "learning_rate": 1.978365705043937e-05, "loss": 0.5414, "step": 369 }, { "epoch": 0.18964633521271143, "grad_norm": 1.2859287363627423, "learning_rate": 1.978193607334826e-05, "loss": 0.5772, "step": 370 }, { "epoch": 0.1901588928754485, "grad_norm": 1.18664798266001, "learning_rate": 1.9780208353736493e-05, "loss": 0.5741, "step": 371 }, { "epoch": 0.19067145053818554, "grad_norm": 1.1456415237217885, "learning_rate": 1.977847389279497e-05, "loss": 0.5668, "step": 372 }, { "epoch": 0.1911840082009226, "grad_norm": 1.2784071043803313, "learning_rate": 1.9776732691719215e-05, "loss": 0.5453, "step": 373 }, { "epoch": 0.19169656586365966, "grad_norm": 1.230648477465709, "learning_rate": 1.977498475170941e-05, "loss": 0.5498, "step": 374 }, { "epoch": 0.1922091235263967, "grad_norm": 1.1907090155716664, "learning_rate": 1.977323007397038e-05, "loss": 0.5622, "step": 375 }, { "epoch": 0.19272168118913377, "grad_norm": 1.1467911013543617, "learning_rate": 1.9771468659711595e-05, "loss": 0.5062, "step": 376 }, { "epoch": 0.19323423885187083, "grad_norm": 1.1310832277821452, "learning_rate": 1.9769700510147173e-05, "loss": 0.5531, "step": 377 }, { "epoch": 0.19374679651460788, "grad_norm": 1.219891779388756, "learning_rate": 1.9767925626495857e-05, "loss": 0.5708, "step": 378 }, { "epoch": 0.19425935417734494, "grad_norm": 1.1839212567306883, "learning_rate": 1.976614400998105e-05, "loss": 0.5733, "step": 379 }, { "epoch": 0.194771911840082, "grad_norm": 1.1505418159642606, "learning_rate": 1.9764355661830796e-05, "loss": 0.5295, "step": 380 }, { "epoch": 0.19528446950281908, "grad_norm": 1.2576793534243933, "learning_rate": 1.9762560583277763e-05, "loss": 0.5743, "step": 381 }, { "epoch": 0.19579702716555614, "grad_norm": 1.2962299210017605, "learning_rate": 1.9760758775559275e-05, "loss": 0.5992, "step": 382 }, { "epoch": 0.1963095848282932, "grad_norm": 1.098899032785433, "learning_rate": 1.975895023991728e-05, "loss": 0.543, "step": 383 }, { "epoch": 0.19682214249103025, "grad_norm": 1.1596472391778136, "learning_rate": 1.9757134977598374e-05, "loss": 0.5598, "step": 384 }, { "epoch": 0.1973347001537673, "grad_norm": 1.176040504830086, "learning_rate": 1.975531298985379e-05, "loss": 0.5669, "step": 385 }, { "epoch": 0.19784725781650436, "grad_norm": 1.216729958849192, "learning_rate": 1.975348427793939e-05, "loss": 0.6031, "step": 386 }, { "epoch": 0.19835981547924142, "grad_norm": 1.229394387712876, "learning_rate": 1.975164884311567e-05, "loss": 0.5458, "step": 387 }, { "epoch": 0.19887237314197848, "grad_norm": 1.2669518600528746, "learning_rate": 1.974980668664777e-05, "loss": 0.572, "step": 388 }, { "epoch": 0.19938493080471553, "grad_norm": 1.2071295055774647, "learning_rate": 1.974795780980545e-05, "loss": 0.5647, "step": 389 }, { "epoch": 0.1998974884674526, "grad_norm": 1.1867443677959604, "learning_rate": 1.9746102213863113e-05, "loss": 0.5611, "step": 390 }, { "epoch": 0.20041004613018965, "grad_norm": 1.1277902866655807, "learning_rate": 1.974423990009979e-05, "loss": 0.5338, "step": 391 }, { "epoch": 0.2009226037929267, "grad_norm": 1.3113730462972488, "learning_rate": 1.9742370869799147e-05, "loss": 0.5438, "step": 392 }, { "epoch": 0.20143516145566376, "grad_norm": 1.1438389184149194, "learning_rate": 1.9740495124249462e-05, "loss": 0.5373, "step": 393 }, { "epoch": 0.20194771911840081, "grad_norm": 1.1872326105296502, "learning_rate": 1.973861266474366e-05, "loss": 0.5808, "step": 394 }, { "epoch": 0.20246027678113787, "grad_norm": 1.1621336353480112, "learning_rate": 1.9736723492579295e-05, "loss": 0.5922, "step": 395 }, { "epoch": 0.20297283444387493, "grad_norm": 1.2186343818207723, "learning_rate": 1.9734827609058533e-05, "loss": 0.5535, "step": 396 }, { "epoch": 0.20348539210661198, "grad_norm": 1.0966395904496529, "learning_rate": 1.9732925015488174e-05, "loss": 0.493, "step": 397 }, { "epoch": 0.20399794976934904, "grad_norm": 1.1889732834388305, "learning_rate": 1.9731015713179643e-05, "loss": 0.5515, "step": 398 }, { "epoch": 0.2045105074320861, "grad_norm": 1.412112372568811, "learning_rate": 1.9729099703449e-05, "loss": 0.5412, "step": 399 }, { "epoch": 0.20502306509482318, "grad_norm": 1.1469695403859153, "learning_rate": 1.9727176987616897e-05, "loss": 0.5332, "step": 400 }, { "epoch": 0.20553562275756024, "grad_norm": 1.1818709591022563, "learning_rate": 1.972524756700865e-05, "loss": 0.5422, "step": 401 }, { "epoch": 0.2060481804202973, "grad_norm": 1.1765102157737684, "learning_rate": 1.9723311442954163e-05, "loss": 0.5846, "step": 402 }, { "epoch": 0.20656073808303435, "grad_norm": 1.1700313566392921, "learning_rate": 1.9721368616787976e-05, "loss": 0.5618, "step": 403 }, { "epoch": 0.2070732957457714, "grad_norm": 1.2012993619528227, "learning_rate": 1.971941908984925e-05, "loss": 0.5551, "step": 404 }, { "epoch": 0.20758585340850846, "grad_norm": 1.256144837758276, "learning_rate": 1.9717462863481753e-05, "loss": 0.5461, "step": 405 }, { "epoch": 0.20809841107124552, "grad_norm": 1.0642219191441524, "learning_rate": 1.9715499939033883e-05, "loss": 0.5385, "step": 406 }, { "epoch": 0.20861096873398258, "grad_norm": 1.109363421017917, "learning_rate": 1.9713530317858647e-05, "loss": 0.5411, "step": 407 }, { "epoch": 0.20912352639671963, "grad_norm": 1.0995680595734107, "learning_rate": 1.9711554001313674e-05, "loss": 0.5263, "step": 408 }, { "epoch": 0.2096360840594567, "grad_norm": 1.1041524432925358, "learning_rate": 1.9709570990761207e-05, "loss": 0.5066, "step": 409 }, { "epoch": 0.21014864172219375, "grad_norm": 1.1646160926617648, "learning_rate": 1.9707581287568094e-05, "loss": 0.5637, "step": 410 }, { "epoch": 0.2106611993849308, "grad_norm": 1.2098977284180066, "learning_rate": 1.970558489310581e-05, "loss": 0.6445, "step": 411 }, { "epoch": 0.21117375704766786, "grad_norm": 1.3673382914978423, "learning_rate": 1.9703581808750436e-05, "loss": 0.6411, "step": 412 }, { "epoch": 0.21168631471040492, "grad_norm": 1.2003295290409546, "learning_rate": 1.9701572035882658e-05, "loss": 0.5412, "step": 413 }, { "epoch": 0.21219887237314197, "grad_norm": 1.1498375907226983, "learning_rate": 1.969955557588778e-05, "loss": 0.5568, "step": 414 }, { "epoch": 0.21271143003587903, "grad_norm": 1.1024164772668055, "learning_rate": 1.9697532430155718e-05, "loss": 0.5301, "step": 415 }, { "epoch": 0.21322398769861609, "grad_norm": 1.138592882237268, "learning_rate": 1.9695502600080983e-05, "loss": 0.5512, "step": 416 }, { "epoch": 0.21373654536135314, "grad_norm": 1.0833069362460297, "learning_rate": 1.969346608706271e-05, "loss": 0.4905, "step": 417 }, { "epoch": 0.2142491030240902, "grad_norm": 1.1161787469309827, "learning_rate": 1.9691422892504626e-05, "loss": 0.5212, "step": 418 }, { "epoch": 0.21476166068682725, "grad_norm": 1.3376263832702324, "learning_rate": 1.9689373017815076e-05, "loss": 0.553, "step": 419 }, { "epoch": 0.21527421834956434, "grad_norm": 1.258029897632316, "learning_rate": 1.9687316464406997e-05, "loss": 0.5889, "step": 420 }, { "epoch": 0.2157867760123014, "grad_norm": 1.1858539577752873, "learning_rate": 1.9685253233697943e-05, "loss": 0.5111, "step": 421 }, { "epoch": 0.21629933367503845, "grad_norm": 1.073311407517639, "learning_rate": 1.968318332711006e-05, "loss": 0.5015, "step": 422 }, { "epoch": 0.2168118913377755, "grad_norm": 1.2196276204294527, "learning_rate": 1.9681106746070096e-05, "loss": 0.5974, "step": 423 }, { "epoch": 0.21732444900051257, "grad_norm": 1.1186880180233258, "learning_rate": 1.967902349200941e-05, "loss": 0.5358, "step": 424 }, { "epoch": 0.21783700666324962, "grad_norm": 1.1520638547360842, "learning_rate": 1.9676933566363943e-05, "loss": 0.5813, "step": 425 }, { "epoch": 0.21834956432598668, "grad_norm": 1.1755133244297582, "learning_rate": 1.9674836970574253e-05, "loss": 0.4951, "step": 426 }, { "epoch": 0.21886212198872373, "grad_norm": 1.219531101720796, "learning_rate": 1.9672733706085488e-05, "loss": 0.5494, "step": 427 }, { "epoch": 0.2193746796514608, "grad_norm": 1.321875508776449, "learning_rate": 1.9670623774347387e-05, "loss": 0.5517, "step": 428 }, { "epoch": 0.21988723731419785, "grad_norm": 1.2036686994212094, "learning_rate": 1.9668507176814295e-05, "loss": 0.5479, "step": 429 }, { "epoch": 0.2203997949769349, "grad_norm": 1.1763204925378836, "learning_rate": 1.966638391494514e-05, "loss": 0.5302, "step": 430 }, { "epoch": 0.22091235263967196, "grad_norm": 1.0851885499194291, "learning_rate": 1.9664253990203456e-05, "loss": 0.5182, "step": 431 }, { "epoch": 0.22142491030240902, "grad_norm": 1.148210591732324, "learning_rate": 1.966211740405736e-05, "loss": 0.5177, "step": 432 }, { "epoch": 0.22193746796514607, "grad_norm": 1.2326386402139002, "learning_rate": 1.9659974157979566e-05, "loss": 0.5638, "step": 433 }, { "epoch": 0.22245002562788313, "grad_norm": 1.2187047411920229, "learning_rate": 1.9657824253447378e-05, "loss": 0.5465, "step": 434 }, { "epoch": 0.2229625832906202, "grad_norm": 1.4999089759087854, "learning_rate": 1.9655667691942684e-05, "loss": 0.6152, "step": 435 }, { "epoch": 0.22347514095335724, "grad_norm": 1.2124296163374242, "learning_rate": 1.9653504474951965e-05, "loss": 0.5616, "step": 436 }, { "epoch": 0.2239876986160943, "grad_norm": 1.2572805723207015, "learning_rate": 1.9651334603966298e-05, "loss": 0.563, "step": 437 }, { "epoch": 0.22450025627883136, "grad_norm": 1.1629832003662703, "learning_rate": 1.9649158080481327e-05, "loss": 0.5588, "step": 438 }, { "epoch": 0.22501281394156844, "grad_norm": 1.287169077484538, "learning_rate": 1.9646974905997295e-05, "loss": 0.5781, "step": 439 }, { "epoch": 0.2255253716043055, "grad_norm": 1.2164441696784356, "learning_rate": 1.964478508201903e-05, "loss": 0.5927, "step": 440 }, { "epoch": 0.22603792926704255, "grad_norm": 1.1116308187718889, "learning_rate": 1.964258861005594e-05, "loss": 0.491, "step": 441 }, { "epoch": 0.2265504869297796, "grad_norm": 1.1165493366767587, "learning_rate": 1.964038549162201e-05, "loss": 0.52, "step": 442 }, { "epoch": 0.22706304459251667, "grad_norm": 1.2244580532060843, "learning_rate": 1.9638175728235817e-05, "loss": 0.5454, "step": 443 }, { "epoch": 0.22757560225525372, "grad_norm": 1.2196700749448781, "learning_rate": 1.9635959321420514e-05, "loss": 0.5456, "step": 444 }, { "epoch": 0.22808815991799078, "grad_norm": 1.1155532210641013, "learning_rate": 1.9633736272703826e-05, "loss": 0.5663, "step": 445 }, { "epoch": 0.22860071758072784, "grad_norm": 1.2318539060234655, "learning_rate": 1.963150658361807e-05, "loss": 0.5854, "step": 446 }, { "epoch": 0.2291132752434649, "grad_norm": 1.2318138070596536, "learning_rate": 1.9629270255700127e-05, "loss": 0.5793, "step": 447 }, { "epoch": 0.22962583290620195, "grad_norm": 1.0931779599567186, "learning_rate": 1.962702729049146e-05, "loss": 0.5628, "step": 448 }, { "epoch": 0.230138390568939, "grad_norm": 1.223014894117777, "learning_rate": 1.9624777689538107e-05, "loss": 0.5465, "step": 449 }, { "epoch": 0.23065094823167606, "grad_norm": 1.1256433785840116, "learning_rate": 1.962252145439068e-05, "loss": 0.4734, "step": 450 }, { "epoch": 0.23116350589441312, "grad_norm": 1.074221530463085, "learning_rate": 1.9620258586604364e-05, "loss": 0.4872, "step": 451 }, { "epoch": 0.23167606355715017, "grad_norm": 1.1130740741118808, "learning_rate": 1.9617989087738913e-05, "loss": 0.5503, "step": 452 }, { "epoch": 0.23218862121988723, "grad_norm": 1.0639593124223303, "learning_rate": 1.9615712959358656e-05, "loss": 0.5106, "step": 453 }, { "epoch": 0.2327011788826243, "grad_norm": 1.2414508669477484, "learning_rate": 1.9613430203032486e-05, "loss": 0.5723, "step": 454 }, { "epoch": 0.23321373654536134, "grad_norm": 1.1578088523779477, "learning_rate": 1.9611140820333868e-05, "loss": 0.534, "step": 455 }, { "epoch": 0.2337262942080984, "grad_norm": 1.1577352725517185, "learning_rate": 1.960884481284084e-05, "loss": 0.5742, "step": 456 }, { "epoch": 0.23423885187083546, "grad_norm": 1.145038547329853, "learning_rate": 1.960654218213599e-05, "loss": 0.5436, "step": 457 }, { "epoch": 0.2347514095335725, "grad_norm": 1.1070094694415127, "learning_rate": 1.9604232929806493e-05, "loss": 0.5398, "step": 458 }, { "epoch": 0.2352639671963096, "grad_norm": 1.1115851280364755, "learning_rate": 1.960191705744407e-05, "loss": 0.5545, "step": 459 }, { "epoch": 0.23577652485904665, "grad_norm": 1.112002807267201, "learning_rate": 1.9599594566645016e-05, "loss": 0.5562, "step": 460 }, { "epoch": 0.2362890825217837, "grad_norm": 1.1268829189686638, "learning_rate": 1.9597265459010177e-05, "loss": 0.526, "step": 461 }, { "epoch": 0.23680164018452077, "grad_norm": 1.0784264125522323, "learning_rate": 1.9594929736144978e-05, "loss": 0.5564, "step": 462 }, { "epoch": 0.23731419784725782, "grad_norm": 1.1742340132739653, "learning_rate": 1.959258739965938e-05, "loss": 0.5846, "step": 463 }, { "epoch": 0.23782675550999488, "grad_norm": 1.1684818007511597, "learning_rate": 1.9590238451167927e-05, "loss": 0.5752, "step": 464 }, { "epoch": 0.23833931317273194, "grad_norm": 1.1296723554246146, "learning_rate": 1.95878828922897e-05, "loss": 0.5345, "step": 465 }, { "epoch": 0.238851870835469, "grad_norm": 1.1344244159696433, "learning_rate": 1.9585520724648354e-05, "loss": 0.4985, "step": 466 }, { "epoch": 0.23936442849820605, "grad_norm": 1.2341008597031773, "learning_rate": 1.9583151949872083e-05, "loss": 0.5924, "step": 467 }, { "epoch": 0.2398769861609431, "grad_norm": 1.0546055163265697, "learning_rate": 1.9580776569593646e-05, "loss": 0.4977, "step": 468 }, { "epoch": 0.24038954382368016, "grad_norm": 1.0901048260159891, "learning_rate": 1.9578394585450354e-05, "loss": 0.5908, "step": 469 }, { "epoch": 0.24090210148641722, "grad_norm": 1.080487417820968, "learning_rate": 1.957600599908406e-05, "loss": 0.54, "step": 470 }, { "epoch": 0.24141465914915428, "grad_norm": 1.0701738474250415, "learning_rate": 1.9573610812141183e-05, "loss": 0.5054, "step": 471 }, { "epoch": 0.24192721681189133, "grad_norm": 1.168253258535015, "learning_rate": 1.9571209026272683e-05, "loss": 0.5108, "step": 472 }, { "epoch": 0.2424397744746284, "grad_norm": 1.1325729805758706, "learning_rate": 1.9568800643134073e-05, "loss": 0.5474, "step": 473 }, { "epoch": 0.24295233213736545, "grad_norm": 1.1390376675315066, "learning_rate": 1.95663856643854e-05, "loss": 0.5391, "step": 474 }, { "epoch": 0.2434648898001025, "grad_norm": 1.1561504962596842, "learning_rate": 1.9563964091691275e-05, "loss": 0.6127, "step": 475 }, { "epoch": 0.24397744746283956, "grad_norm": 1.0160732725708592, "learning_rate": 1.9561535926720846e-05, "loss": 0.5336, "step": 476 }, { "epoch": 0.24449000512557661, "grad_norm": 1.0724825700509442, "learning_rate": 1.95591011711478e-05, "loss": 0.5542, "step": 477 }, { "epoch": 0.2450025627883137, "grad_norm": 1.1867681243869146, "learning_rate": 1.955665982665038e-05, "loss": 0.5341, "step": 478 }, { "epoch": 0.24551512045105076, "grad_norm": 1.0026805716874054, "learning_rate": 1.9554211894911363e-05, "loss": 0.5094, "step": 479 }, { "epoch": 0.2460276781137878, "grad_norm": 1.094441888220424, "learning_rate": 1.9551757377618056e-05, "loss": 0.5428, "step": 480 }, { "epoch": 0.24654023577652487, "grad_norm": 1.0950896428889896, "learning_rate": 1.9549296276462326e-05, "loss": 0.5173, "step": 481 }, { "epoch": 0.24705279343926193, "grad_norm": 1.1418046614094364, "learning_rate": 1.9546828593140565e-05, "loss": 0.5083, "step": 482 }, { "epoch": 0.24756535110199898, "grad_norm": 1.1942177782038323, "learning_rate": 1.95443543293537e-05, "loss": 0.5695, "step": 483 }, { "epoch": 0.24807790876473604, "grad_norm": 1.2000420038685193, "learning_rate": 1.954187348680721e-05, "loss": 0.5645, "step": 484 }, { "epoch": 0.2485904664274731, "grad_norm": 1.212561026280578, "learning_rate": 1.953938606721108e-05, "loss": 0.5465, "step": 485 }, { "epoch": 0.24910302409021015, "grad_norm": 1.1505381307557414, "learning_rate": 1.9536892072279863e-05, "loss": 0.5533, "step": 486 }, { "epoch": 0.2496155817529472, "grad_norm": 1.183983905950558, "learning_rate": 1.9534391503732617e-05, "loss": 0.5558, "step": 487 }, { "epoch": 0.2501281394156843, "grad_norm": 1.226991051735672, "learning_rate": 1.9531884363292944e-05, "loss": 0.5283, "step": 488 }, { "epoch": 0.25064069707842135, "grad_norm": 1.061586137409775, "learning_rate": 1.952937065268897e-05, "loss": 0.5669, "step": 489 }, { "epoch": 0.2511532547411584, "grad_norm": 1.0515699006234196, "learning_rate": 1.9526850373653356e-05, "loss": 0.5269, "step": 490 }, { "epoch": 0.25166581240389546, "grad_norm": 1.211944517047087, "learning_rate": 1.9524323527923284e-05, "loss": 0.5761, "step": 491 }, { "epoch": 0.2521783700666325, "grad_norm": 1.081902476135833, "learning_rate": 1.9521790117240472e-05, "loss": 0.5343, "step": 492 }, { "epoch": 0.2526909277293696, "grad_norm": 1.097826915801388, "learning_rate": 1.9519250143351146e-05, "loss": 0.5113, "step": 493 }, { "epoch": 0.25320348539210663, "grad_norm": 1.2266769104813666, "learning_rate": 1.9516703608006074e-05, "loss": 0.5207, "step": 494 }, { "epoch": 0.2537160430548437, "grad_norm": 1.1240930243054124, "learning_rate": 1.951415051296054e-05, "loss": 0.5504, "step": 495 }, { "epoch": 0.25422860071758074, "grad_norm": 1.1638330159304926, "learning_rate": 1.9511590859974344e-05, "loss": 0.5978, "step": 496 }, { "epoch": 0.2547411583803178, "grad_norm": 1.2622706912471613, "learning_rate": 1.9509024650811813e-05, "loss": 0.5367, "step": 497 }, { "epoch": 0.25525371604305486, "grad_norm": 1.2507217274538711, "learning_rate": 1.9506451887241787e-05, "loss": 0.6093, "step": 498 }, { "epoch": 0.2557662737057919, "grad_norm": 1.2131345295634686, "learning_rate": 1.9503872571037637e-05, "loss": 0.5641, "step": 499 }, { "epoch": 0.25627883136852897, "grad_norm": 1.183408466459945, "learning_rate": 1.9501286703977232e-05, "loss": 0.5082, "step": 500 }, { "epoch": 0.256791389031266, "grad_norm": 1.1858587009490882, "learning_rate": 1.9498694287842972e-05, "loss": 0.5947, "step": 501 }, { "epoch": 0.2573039466940031, "grad_norm": 1.181984281737634, "learning_rate": 1.949609532442176e-05, "loss": 0.5613, "step": 502 }, { "epoch": 0.25781650435674014, "grad_norm": 1.1779491533694588, "learning_rate": 1.949348981550502e-05, "loss": 0.5697, "step": 503 }, { "epoch": 0.2583290620194772, "grad_norm": 1.1555434390803587, "learning_rate": 1.9490877762888685e-05, "loss": 0.5035, "step": 504 }, { "epoch": 0.25884161968221425, "grad_norm": 1.2260284315113272, "learning_rate": 1.9488259168373198e-05, "loss": 0.5692, "step": 505 }, { "epoch": 0.2593541773449513, "grad_norm": 1.1559522285064046, "learning_rate": 1.9485634033763507e-05, "loss": 0.5554, "step": 506 }, { "epoch": 0.25986673500768837, "grad_norm": 1.1541934596221746, "learning_rate": 1.9483002360869082e-05, "loss": 0.5133, "step": 507 }, { "epoch": 0.2603792926704254, "grad_norm": 1.2289695708616932, "learning_rate": 1.9480364151503876e-05, "loss": 0.5601, "step": 508 }, { "epoch": 0.2608918503331625, "grad_norm": 1.1179529795251246, "learning_rate": 1.9477719407486372e-05, "loss": 0.5238, "step": 509 }, { "epoch": 0.26140440799589953, "grad_norm": 1.0447065609986352, "learning_rate": 1.9475068130639543e-05, "loss": 0.5194, "step": 510 }, { "epoch": 0.2619169656586366, "grad_norm": 1.1261048998730523, "learning_rate": 1.947241032279087e-05, "loss": 0.467, "step": 511 }, { "epoch": 0.26242952332137365, "grad_norm": 1.155989563504781, "learning_rate": 1.9469745985772333e-05, "loss": 0.5386, "step": 512 }, { "epoch": 0.2629420809841107, "grad_norm": 1.1145595623914561, "learning_rate": 1.9467075121420415e-05, "loss": 0.496, "step": 513 }, { "epoch": 0.26345463864684776, "grad_norm": 1.1856290979092603, "learning_rate": 1.9464397731576093e-05, "loss": 0.54, "step": 514 }, { "epoch": 0.2639671963095848, "grad_norm": 1.1840900421405183, "learning_rate": 1.9461713818084857e-05, "loss": 0.5745, "step": 515 }, { "epoch": 0.2644797539723219, "grad_norm": 1.1181966594665493, "learning_rate": 1.9459023382796667e-05, "loss": 0.5448, "step": 516 }, { "epoch": 0.26499231163505893, "grad_norm": 1.0664527330566065, "learning_rate": 1.9456326427566002e-05, "loss": 0.5199, "step": 517 }, { "epoch": 0.265504869297796, "grad_norm": 1.0695468789848697, "learning_rate": 1.945362295425183e-05, "loss": 0.5112, "step": 518 }, { "epoch": 0.26601742696053304, "grad_norm": 1.0810032743564895, "learning_rate": 1.9450912964717604e-05, "loss": 0.5289, "step": 519 }, { "epoch": 0.2665299846232701, "grad_norm": 1.1979884011922166, "learning_rate": 1.9448196460831274e-05, "loss": 0.5628, "step": 520 }, { "epoch": 0.26704254228600716, "grad_norm": 1.1566111707182078, "learning_rate": 1.944547344446528e-05, "loss": 0.4838, "step": 521 }, { "epoch": 0.2675550999487442, "grad_norm": 1.1981155927655909, "learning_rate": 1.944274391749655e-05, "loss": 0.5301, "step": 522 }, { "epoch": 0.26806765761148127, "grad_norm": 1.4403251473393759, "learning_rate": 1.9440007881806502e-05, "loss": 0.5261, "step": 523 }, { "epoch": 0.2685802152742183, "grad_norm": 1.07389288346436, "learning_rate": 1.9437265339281034e-05, "loss": 0.5161, "step": 524 }, { "epoch": 0.2690927729369554, "grad_norm": 1.2443769648686411, "learning_rate": 1.943451629181054e-05, "loss": 0.5776, "step": 525 }, { "epoch": 0.26960533059969244, "grad_norm": 1.169365237698263, "learning_rate": 1.9431760741289886e-05, "loss": 0.5362, "step": 526 }, { "epoch": 0.27011788826242955, "grad_norm": 1.1696210619218275, "learning_rate": 1.9428998689618428e-05, "loss": 0.4927, "step": 527 }, { "epoch": 0.2706304459251666, "grad_norm": 1.0432401987053435, "learning_rate": 1.9426230138699998e-05, "loss": 0.5031, "step": 528 }, { "epoch": 0.27114300358790366, "grad_norm": 1.0778497982640578, "learning_rate": 1.9423455090442917e-05, "loss": 0.4951, "step": 529 }, { "epoch": 0.2716555612506407, "grad_norm": 1.0186185257086446, "learning_rate": 1.942067354675997e-05, "loss": 0.5122, "step": 530 }, { "epoch": 0.2721681189133778, "grad_norm": 1.1145756624578966, "learning_rate": 1.9417885509568433e-05, "loss": 0.5201, "step": 531 }, { "epoch": 0.27268067657611483, "grad_norm": 1.1985413875539563, "learning_rate": 1.941509098079005e-05, "loss": 0.542, "step": 532 }, { "epoch": 0.2731932342388519, "grad_norm": 1.175931068807576, "learning_rate": 1.9412289962351042e-05, "loss": 0.5494, "step": 533 }, { "epoch": 0.27370579190158895, "grad_norm": 1.2138359513053114, "learning_rate": 1.9409482456182105e-05, "loss": 0.5111, "step": 534 }, { "epoch": 0.274218349564326, "grad_norm": 1.2586699665157597, "learning_rate": 1.94066684642184e-05, "loss": 0.594, "step": 535 }, { "epoch": 0.27473090722706306, "grad_norm": 1.1768326367135526, "learning_rate": 1.940384798839957e-05, "loss": 0.5157, "step": 536 }, { "epoch": 0.2752434648898001, "grad_norm": 1.0619968478868693, "learning_rate": 1.9401021030669715e-05, "loss": 0.5251, "step": 537 }, { "epoch": 0.27575602255253717, "grad_norm": 1.1890282625274873, "learning_rate": 1.939818759297741e-05, "loss": 0.5892, "step": 538 }, { "epoch": 0.27626858021527423, "grad_norm": 1.1321305777920188, "learning_rate": 1.9395347677275695e-05, "loss": 0.5141, "step": 539 }, { "epoch": 0.2767811378780113, "grad_norm": 1.1902255088681357, "learning_rate": 1.9392501285522078e-05, "loss": 0.5268, "step": 540 }, { "epoch": 0.27729369554074834, "grad_norm": 1.0250895662922652, "learning_rate": 1.9389648419678523e-05, "loss": 0.5191, "step": 541 }, { "epoch": 0.2778062532034854, "grad_norm": 1.055578323183336, "learning_rate": 1.9386789081711465e-05, "loss": 0.5428, "step": 542 }, { "epoch": 0.27831881086622245, "grad_norm": 1.1212586334711832, "learning_rate": 1.9383923273591793e-05, "loss": 0.5043, "step": 543 }, { "epoch": 0.2788313685289595, "grad_norm": 1.0520920275912868, "learning_rate": 1.9381050997294864e-05, "loss": 0.5325, "step": 544 }, { "epoch": 0.27934392619169657, "grad_norm": 1.1062710620087992, "learning_rate": 1.9378172254800487e-05, "loss": 0.5521, "step": 545 }, { "epoch": 0.2798564838544336, "grad_norm": 1.1232828317118813, "learning_rate": 1.9375287048092927e-05, "loss": 0.5454, "step": 546 }, { "epoch": 0.2803690415171707, "grad_norm": 1.084308507876073, "learning_rate": 1.9372395379160913e-05, "loss": 0.5009, "step": 547 }, { "epoch": 0.28088159917990774, "grad_norm": 1.120613672992097, "learning_rate": 1.936949724999762e-05, "loss": 0.554, "step": 548 }, { "epoch": 0.2813941568426448, "grad_norm": 1.1507376573398016, "learning_rate": 1.9366592662600678e-05, "loss": 0.5683, "step": 549 }, { "epoch": 0.28190671450538185, "grad_norm": 1.1457334389828122, "learning_rate": 1.9363681618972166e-05, "loss": 0.5395, "step": 550 }, { "epoch": 0.2824192721681189, "grad_norm": 1.1515836754531243, "learning_rate": 1.936076412111862e-05, "loss": 0.5625, "step": 551 }, { "epoch": 0.28293182983085596, "grad_norm": 1.1103302296339266, "learning_rate": 1.9357840171051025e-05, "loss": 0.5392, "step": 552 }, { "epoch": 0.283444387493593, "grad_norm": 1.0567089657955384, "learning_rate": 1.9354909770784808e-05, "loss": 0.468, "step": 553 }, { "epoch": 0.2839569451563301, "grad_norm": 1.0364253512855526, "learning_rate": 1.9351972922339835e-05, "loss": 0.5466, "step": 554 }, { "epoch": 0.28446950281906713, "grad_norm": 1.2379328300176182, "learning_rate": 1.9349029627740434e-05, "loss": 0.4922, "step": 555 }, { "epoch": 0.2849820604818042, "grad_norm": 1.0320083337984278, "learning_rate": 1.9346079889015366e-05, "loss": 0.503, "step": 556 }, { "epoch": 0.28549461814454125, "grad_norm": 1.0214248852480112, "learning_rate": 1.9343123708197834e-05, "loss": 0.5335, "step": 557 }, { "epoch": 0.2860071758072783, "grad_norm": 1.1185452955672566, "learning_rate": 1.9340161087325483e-05, "loss": 0.5669, "step": 558 }, { "epoch": 0.28651973347001536, "grad_norm": 1.0607362588091298, "learning_rate": 1.9337192028440395e-05, "loss": 0.5362, "step": 559 }, { "epoch": 0.2870322911327524, "grad_norm": 1.0857700440798947, "learning_rate": 1.9334216533589095e-05, "loss": 0.4758, "step": 560 }, { "epoch": 0.28754484879548947, "grad_norm": 1.033736830989112, "learning_rate": 1.933123460482254e-05, "loss": 0.5684, "step": 561 }, { "epoch": 0.28805740645822653, "grad_norm": 1.113491533339925, "learning_rate": 1.9328246244196117e-05, "loss": 0.5705, "step": 562 }, { "epoch": 0.2885699641209636, "grad_norm": 1.0971404641493596, "learning_rate": 1.9325251453769657e-05, "loss": 0.5035, "step": 563 }, { "epoch": 0.28908252178370064, "grad_norm": 1.152087899654101, "learning_rate": 1.9322250235607418e-05, "loss": 0.5068, "step": 564 }, { "epoch": 0.2895950794464377, "grad_norm": 1.1581550028487606, "learning_rate": 1.9319242591778086e-05, "loss": 0.5196, "step": 565 }, { "epoch": 0.2901076371091748, "grad_norm": 1.1209694146376472, "learning_rate": 1.931622852435478e-05, "loss": 0.5139, "step": 566 }, { "epoch": 0.29062019477191187, "grad_norm": 1.0356485169747496, "learning_rate": 1.9313208035415042e-05, "loss": 0.4663, "step": 567 }, { "epoch": 0.2911327524346489, "grad_norm": 1.2569393022240154, "learning_rate": 1.9310181127040842e-05, "loss": 0.5982, "step": 568 }, { "epoch": 0.291645310097386, "grad_norm": 1.140771210293889, "learning_rate": 1.9307147801318585e-05, "loss": 0.5395, "step": 569 }, { "epoch": 0.29215786776012304, "grad_norm": 1.1849172154348406, "learning_rate": 1.930410806033908e-05, "loss": 0.5171, "step": 570 }, { "epoch": 0.2926704254228601, "grad_norm": 1.0726495430866159, "learning_rate": 1.9301061906197572e-05, "loss": 0.5335, "step": 571 }, { "epoch": 0.29318298308559715, "grad_norm": 1.121667283182861, "learning_rate": 1.9298009340993725e-05, "loss": 0.4745, "step": 572 }, { "epoch": 0.2936955407483342, "grad_norm": 1.1294903021557652, "learning_rate": 1.9294950366831617e-05, "loss": 0.5241, "step": 573 }, { "epoch": 0.29420809841107126, "grad_norm": 1.077558310694887, "learning_rate": 1.929188498581975e-05, "loss": 0.5389, "step": 574 }, { "epoch": 0.2947206560738083, "grad_norm": 1.1043879916376704, "learning_rate": 1.9288813200071035e-05, "loss": 0.5637, "step": 575 }, { "epoch": 0.2952332137365454, "grad_norm": 1.1759079236255165, "learning_rate": 1.9285735011702803e-05, "loss": 0.5227, "step": 576 }, { "epoch": 0.29574577139928243, "grad_norm": 1.120676422206138, "learning_rate": 1.928265042283679e-05, "loss": 0.5001, "step": 577 }, { "epoch": 0.2962583290620195, "grad_norm": 1.0440188551174812, "learning_rate": 1.9279559435599164e-05, "loss": 0.5489, "step": 578 }, { "epoch": 0.29677088672475654, "grad_norm": 1.103251284850606, "learning_rate": 1.9276462052120475e-05, "loss": 0.5497, "step": 579 }, { "epoch": 0.2972834443874936, "grad_norm": 1.0791642293990567, "learning_rate": 1.9273358274535703e-05, "loss": 0.5182, "step": 580 }, { "epoch": 0.29779600205023066, "grad_norm": 1.06370442788145, "learning_rate": 1.927024810498423e-05, "loss": 0.5436, "step": 581 }, { "epoch": 0.2983085597129677, "grad_norm": 1.0555353456327805, "learning_rate": 1.926713154560984e-05, "loss": 0.5626, "step": 582 }, { "epoch": 0.29882111737570477, "grad_norm": 1.0470918481354339, "learning_rate": 1.9264008598560727e-05, "loss": 0.5486, "step": 583 }, { "epoch": 0.2993336750384418, "grad_norm": 1.0114658381928365, "learning_rate": 1.9260879265989477e-05, "loss": 0.5329, "step": 584 }, { "epoch": 0.2998462327011789, "grad_norm": 1.0337780230101323, "learning_rate": 1.9257743550053097e-05, "loss": 0.4602, "step": 585 }, { "epoch": 0.30035879036391594, "grad_norm": 1.1627672916369023, "learning_rate": 1.9254601452912972e-05, "loss": 0.5123, "step": 586 }, { "epoch": 0.300871348026653, "grad_norm": 1.1475074671401868, "learning_rate": 1.92514529767349e-05, "loss": 0.4631, "step": 587 }, { "epoch": 0.30138390568939005, "grad_norm": 1.0827707228821781, "learning_rate": 1.924829812368907e-05, "loss": 0.5031, "step": 588 }, { "epoch": 0.3018964633521271, "grad_norm": 1.0708102640255373, "learning_rate": 1.924513689595007e-05, "loss": 0.5505, "step": 589 }, { "epoch": 0.30240902101486417, "grad_norm": 1.1382440222469152, "learning_rate": 1.924196929569688e-05, "loss": 0.4997, "step": 590 }, { "epoch": 0.3029215786776012, "grad_norm": 1.1082883767450014, "learning_rate": 1.9238795325112867e-05, "loss": 0.5673, "step": 591 }, { "epoch": 0.3034341363403383, "grad_norm": 1.0304503319064087, "learning_rate": 1.9235614986385806e-05, "loss": 0.5378, "step": 592 }, { "epoch": 0.30394669400307534, "grad_norm": 1.0569463450543617, "learning_rate": 1.923242828170784e-05, "loss": 0.5266, "step": 593 }, { "epoch": 0.3044592516658124, "grad_norm": 1.1374124915877144, "learning_rate": 1.922923521327551e-05, "loss": 0.5657, "step": 594 }, { "epoch": 0.30497180932854945, "grad_norm": 1.0985337523255934, "learning_rate": 1.9226035783289748e-05, "loss": 0.5338, "step": 595 }, { "epoch": 0.3054843669912865, "grad_norm": 1.0394499307151455, "learning_rate": 1.922282999395586e-05, "loss": 0.5342, "step": 596 }, { "epoch": 0.30599692465402356, "grad_norm": 1.041354533457582, "learning_rate": 1.9219617847483553e-05, "loss": 0.5103, "step": 597 }, { "epoch": 0.3065094823167606, "grad_norm": 1.0207438033096412, "learning_rate": 1.9216399346086893e-05, "loss": 0.5264, "step": 598 }, { "epoch": 0.3070220399794977, "grad_norm": 1.015690375643476, "learning_rate": 1.9213174491984342e-05, "loss": 0.4812, "step": 599 }, { "epoch": 0.30753459764223473, "grad_norm": 1.1575833677577305, "learning_rate": 1.9209943287398738e-05, "loss": 0.5015, "step": 600 }, { "epoch": 0.3080471553049718, "grad_norm": 1.1330304076135735, "learning_rate": 1.9206705734557293e-05, "loss": 0.458, "step": 601 }, { "epoch": 0.30855971296770884, "grad_norm": 1.0127170354185022, "learning_rate": 1.9203461835691596e-05, "loss": 0.4414, "step": 602 }, { "epoch": 0.3090722706304459, "grad_norm": 1.1108008483013356, "learning_rate": 1.9200211593037612e-05, "loss": 0.5746, "step": 603 }, { "epoch": 0.30958482829318296, "grad_norm": 1.1558633224709884, "learning_rate": 1.9196955008835678e-05, "loss": 0.5066, "step": 604 }, { "epoch": 0.31009738595592007, "grad_norm": 1.0770464166501112, "learning_rate": 1.9193692085330503e-05, "loss": 0.5174, "step": 605 }, { "epoch": 0.3106099436186571, "grad_norm": 1.1229244092884925, "learning_rate": 1.9190422824771158e-05, "loss": 0.5579, "step": 606 }, { "epoch": 0.3111225012813942, "grad_norm": 0.9917360690348311, "learning_rate": 1.9187147229411094e-05, "loss": 0.4823, "step": 607 }, { "epoch": 0.31163505894413124, "grad_norm": 1.1564606081617377, "learning_rate": 1.918386530150812e-05, "loss": 0.5424, "step": 608 }, { "epoch": 0.3121476166068683, "grad_norm": 1.0032506555102012, "learning_rate": 1.9180577043324415e-05, "loss": 0.5129, "step": 609 }, { "epoch": 0.31266017426960535, "grad_norm": 1.0179640910376728, "learning_rate": 1.9177282457126515e-05, "loss": 0.4688, "step": 610 }, { "epoch": 0.3131727319323424, "grad_norm": 1.0602281959192805, "learning_rate": 1.9173981545185327e-05, "loss": 0.5045, "step": 611 }, { "epoch": 0.31368528959507946, "grad_norm": 0.9721680602115661, "learning_rate": 1.917067430977611e-05, "loss": 0.497, "step": 612 }, { "epoch": 0.3141978472578165, "grad_norm": 1.0390191323121498, "learning_rate": 1.916736075317848e-05, "loss": 0.4891, "step": 613 }, { "epoch": 0.3147104049205536, "grad_norm": 1.1046315953348116, "learning_rate": 1.9164040877676425e-05, "loss": 0.5169, "step": 614 }, { "epoch": 0.31522296258329063, "grad_norm": 1.159879610940931, "learning_rate": 1.9160714685558272e-05, "loss": 0.5079, "step": 615 }, { "epoch": 0.3157355202460277, "grad_norm": 0.9587018318439663, "learning_rate": 1.9157382179116705e-05, "loss": 0.5659, "step": 616 }, { "epoch": 0.31624807790876475, "grad_norm": 1.1327914880767143, "learning_rate": 1.915404336064877e-05, "loss": 0.5538, "step": 617 }, { "epoch": 0.3167606355715018, "grad_norm": 1.1537161858674352, "learning_rate": 1.9150698232455853e-05, "loss": 0.5318, "step": 618 }, { "epoch": 0.31727319323423886, "grad_norm": 1.074807354142965, "learning_rate": 1.9147346796843695e-05, "loss": 0.5157, "step": 619 }, { "epoch": 0.3177857508969759, "grad_norm": 1.1351515201731763, "learning_rate": 1.9143989056122385e-05, "loss": 0.4925, "step": 620 }, { "epoch": 0.318298308559713, "grad_norm": 1.2707835104703582, "learning_rate": 1.9140625012606353e-05, "loss": 0.4946, "step": 621 }, { "epoch": 0.31881086622245003, "grad_norm": 1.1715315877115489, "learning_rate": 1.913725466861438e-05, "loss": 0.5292, "step": 622 }, { "epoch": 0.3193234238851871, "grad_norm": 1.0995772577358354, "learning_rate": 1.913387802646958e-05, "loss": 0.5198, "step": 623 }, { "epoch": 0.31983598154792414, "grad_norm": 1.1555187146009274, "learning_rate": 1.9130495088499417e-05, "loss": 0.5429, "step": 624 }, { "epoch": 0.3203485392106612, "grad_norm": 1.1315046293538165, "learning_rate": 1.9127105857035697e-05, "loss": 0.498, "step": 625 }, { "epoch": 0.32086109687339825, "grad_norm": 1.0796557461121545, "learning_rate": 1.9123710334414552e-05, "loss": 0.5418, "step": 626 }, { "epoch": 0.3213736545361353, "grad_norm": 1.0501156509531986, "learning_rate": 1.9120308522976464e-05, "loss": 0.4736, "step": 627 }, { "epoch": 0.32188621219887237, "grad_norm": 1.1033989596828984, "learning_rate": 1.911690042506624e-05, "loss": 0.5299, "step": 628 }, { "epoch": 0.3223987698616094, "grad_norm": 1.0992005039730814, "learning_rate": 1.9113486043033025e-05, "loss": 0.5321, "step": 629 }, { "epoch": 0.3229113275243465, "grad_norm": 1.1428381635814882, "learning_rate": 1.911006537923029e-05, "loss": 0.5526, "step": 630 }, { "epoch": 0.32342388518708354, "grad_norm": 1.1920894817927552, "learning_rate": 1.9106638436015842e-05, "loss": 0.5345, "step": 631 }, { "epoch": 0.3239364428498206, "grad_norm": 1.062649733393007, "learning_rate": 1.9103205215751815e-05, "loss": 0.4975, "step": 632 }, { "epoch": 0.32444900051255765, "grad_norm": 1.0147022365307181, "learning_rate": 1.9099765720804666e-05, "loss": 0.5059, "step": 633 }, { "epoch": 0.3249615581752947, "grad_norm": 1.079419844254249, "learning_rate": 1.9096319953545186e-05, "loss": 0.5139, "step": 634 }, { "epoch": 0.32547411583803176, "grad_norm": 1.0381435291962855, "learning_rate": 1.909286791634848e-05, "loss": 0.479, "step": 635 }, { "epoch": 0.3259866735007688, "grad_norm": 1.0189674017715302, "learning_rate": 1.908940961159397e-05, "loss": 0.5063, "step": 636 }, { "epoch": 0.3264992311635059, "grad_norm": 1.0326777589715368, "learning_rate": 1.9085945041665418e-05, "loss": 0.5566, "step": 637 }, { "epoch": 0.32701178882624293, "grad_norm": 1.040302153772088, "learning_rate": 1.908247420895089e-05, "loss": 0.5308, "step": 638 }, { "epoch": 0.32752434648898, "grad_norm": 1.07067349680758, "learning_rate": 1.9078997115842767e-05, "loss": 0.5426, "step": 639 }, { "epoch": 0.32803690415171705, "grad_norm": 0.9475340290372944, "learning_rate": 1.9075513764737753e-05, "loss": 0.4787, "step": 640 }, { "epoch": 0.3285494618144541, "grad_norm": 1.1274841109239113, "learning_rate": 1.907202415803686e-05, "loss": 0.5071, "step": 641 }, { "epoch": 0.32906201947719116, "grad_norm": 1.0766159399112707, "learning_rate": 1.9068528298145418e-05, "loss": 0.4964, "step": 642 }, { "epoch": 0.3295745771399282, "grad_norm": 1.0509371553952391, "learning_rate": 1.906502618747306e-05, "loss": 0.5114, "step": 643 }, { "epoch": 0.3300871348026653, "grad_norm": 1.0567827075030225, "learning_rate": 1.906151782843373e-05, "loss": 0.5295, "step": 644 }, { "epoch": 0.3305996924654024, "grad_norm": 0.99838439622406, "learning_rate": 1.9058003223445676e-05, "loss": 0.4889, "step": 645 }, { "epoch": 0.33111225012813944, "grad_norm": 1.0250712625114502, "learning_rate": 1.905448237493147e-05, "loss": 0.5516, "step": 646 }, { "epoch": 0.3316248077908765, "grad_norm": 0.9585215136875024, "learning_rate": 1.9050955285317957e-05, "loss": 0.5237, "step": 647 }, { "epoch": 0.33213736545361355, "grad_norm": 1.0306311870972393, "learning_rate": 1.90474219570363e-05, "loss": 0.513, "step": 648 }, { "epoch": 0.3326499231163506, "grad_norm": 1.0950129723038742, "learning_rate": 1.9043882392521973e-05, "loss": 0.5762, "step": 649 }, { "epoch": 0.33316248077908767, "grad_norm": 1.0223872548113304, "learning_rate": 1.9040336594214727e-05, "loss": 0.5256, "step": 650 }, { "epoch": 0.3336750384418247, "grad_norm": 0.9768317197467952, "learning_rate": 1.9036784564558623e-05, "loss": 0.4941, "step": 651 }, { "epoch": 0.3341875961045618, "grad_norm": 1.1545579902315841, "learning_rate": 1.9033226306002016e-05, "loss": 0.5085, "step": 652 }, { "epoch": 0.33470015376729884, "grad_norm": 1.0929217668801863, "learning_rate": 1.9029661820997548e-05, "loss": 0.4816, "step": 653 }, { "epoch": 0.3352127114300359, "grad_norm": 1.0624443432966526, "learning_rate": 1.9026091112002163e-05, "loss": 0.5677, "step": 654 }, { "epoch": 0.33572526909277295, "grad_norm": 1.1303761041268783, "learning_rate": 1.9022514181477085e-05, "loss": 0.4804, "step": 655 }, { "epoch": 0.33623782675551, "grad_norm": 1.0847748452014154, "learning_rate": 1.901893103188783e-05, "loss": 0.5468, "step": 656 }, { "epoch": 0.33675038441824706, "grad_norm": 1.0965254532652902, "learning_rate": 1.9015341665704206e-05, "loss": 0.5682, "step": 657 }, { "epoch": 0.3372629420809841, "grad_norm": 1.12724082474055, "learning_rate": 1.90117460854003e-05, "loss": 0.5334, "step": 658 }, { "epoch": 0.3377754997437212, "grad_norm": 1.0944017817140248, "learning_rate": 1.9008144293454482e-05, "loss": 0.4793, "step": 659 }, { "epoch": 0.33828805740645823, "grad_norm": 1.114927453179354, "learning_rate": 1.9004536292349406e-05, "loss": 0.5541, "step": 660 }, { "epoch": 0.3388006150691953, "grad_norm": 1.04233333344734, "learning_rate": 1.9000922084572008e-05, "loss": 0.5388, "step": 661 }, { "epoch": 0.33931317273193234, "grad_norm": 1.0431717140885637, "learning_rate": 1.8997301672613496e-05, "loss": 0.5487, "step": 662 }, { "epoch": 0.3398257303946694, "grad_norm": 1.0360055239030284, "learning_rate": 1.899367505896936e-05, "loss": 0.5336, "step": 663 }, { "epoch": 0.34033828805740646, "grad_norm": 1.0861707177227584, "learning_rate": 1.899004224613936e-05, "loss": 0.5055, "step": 664 }, { "epoch": 0.3408508457201435, "grad_norm": 1.0844202525412714, "learning_rate": 1.898640323662753e-05, "loss": 0.4716, "step": 665 }, { "epoch": 0.34136340338288057, "grad_norm": 0.9861380815619857, "learning_rate": 1.8982758032942184e-05, "loss": 0.4856, "step": 666 }, { "epoch": 0.3418759610456176, "grad_norm": 1.0329176647148497, "learning_rate": 1.8979106637595895e-05, "loss": 0.4962, "step": 667 }, { "epoch": 0.3423885187083547, "grad_norm": 1.1717356593721517, "learning_rate": 1.8975449053105505e-05, "loss": 0.5389, "step": 668 }, { "epoch": 0.34290107637109174, "grad_norm": 1.0448831893708501, "learning_rate": 1.8971785281992124e-05, "loss": 0.4914, "step": 669 }, { "epoch": 0.3434136340338288, "grad_norm": 1.10646153836898, "learning_rate": 1.896811532678113e-05, "loss": 0.4699, "step": 670 }, { "epoch": 0.34392619169656585, "grad_norm": 1.0592486253046525, "learning_rate": 1.8964439190002156e-05, "loss": 0.5183, "step": 671 }, { "epoch": 0.3444387493593029, "grad_norm": 1.0619409197412322, "learning_rate": 1.8960756874189104e-05, "loss": 0.4797, "step": 672 }, { "epoch": 0.34495130702203997, "grad_norm": 1.0115586854729632, "learning_rate": 1.895706838188013e-05, "loss": 0.5011, "step": 673 }, { "epoch": 0.345463864684777, "grad_norm": 1.0174626810960206, "learning_rate": 1.8953373715617646e-05, "loss": 0.4962, "step": 674 }, { "epoch": 0.3459764223475141, "grad_norm": 1.032705514272255, "learning_rate": 1.8949672877948324e-05, "loss": 0.5559, "step": 675 }, { "epoch": 0.34648898001025114, "grad_norm": 1.0549161842688208, "learning_rate": 1.894596587142309e-05, "loss": 0.5285, "step": 676 }, { "epoch": 0.3470015376729882, "grad_norm": 0.9423548873539662, "learning_rate": 1.8942252698597113e-05, "loss": 0.4712, "step": 677 }, { "epoch": 0.34751409533572525, "grad_norm": 1.0211784301296702, "learning_rate": 1.893853336202983e-05, "loss": 0.4882, "step": 678 }, { "epoch": 0.3480266529984623, "grad_norm": 1.0691485888302403, "learning_rate": 1.8934807864284904e-05, "loss": 0.499, "step": 679 }, { "epoch": 0.34853921066119936, "grad_norm": 1.0712121196102362, "learning_rate": 1.8931076207930265e-05, "loss": 0.4994, "step": 680 }, { "epoch": 0.3490517683239364, "grad_norm": 1.1067189116389058, "learning_rate": 1.892733839553808e-05, "loss": 0.5565, "step": 681 }, { "epoch": 0.3495643259866735, "grad_norm": 1.0357513290336395, "learning_rate": 1.892359442968475e-05, "loss": 0.4691, "step": 682 }, { "epoch": 0.3500768836494106, "grad_norm": 1.0965372778169156, "learning_rate": 1.8919844312950937e-05, "loss": 0.5231, "step": 683 }, { "epoch": 0.35058944131214764, "grad_norm": 1.0501925032663466, "learning_rate": 1.8916088047921527e-05, "loss": 0.5122, "step": 684 }, { "epoch": 0.3511019989748847, "grad_norm": 0.9537457789834791, "learning_rate": 1.8912325637185647e-05, "loss": 0.4937, "step": 685 }, { "epoch": 0.35161455663762176, "grad_norm": 0.9785285745124769, "learning_rate": 1.8908557083336668e-05, "loss": 0.4805, "step": 686 }, { "epoch": 0.3521271143003588, "grad_norm": 1.0867696070915396, "learning_rate": 1.8904782388972186e-05, "loss": 0.529, "step": 687 }, { "epoch": 0.35263967196309587, "grad_norm": 1.0266400556112427, "learning_rate": 1.890100155669403e-05, "loss": 0.4895, "step": 688 }, { "epoch": 0.3531522296258329, "grad_norm": 1.0616683571731438, "learning_rate": 1.889721458910827e-05, "loss": 0.5511, "step": 689 }, { "epoch": 0.35366478728857, "grad_norm": 1.153697620128154, "learning_rate": 1.889342148882519e-05, "loss": 0.5462, "step": 690 }, { "epoch": 0.35417734495130704, "grad_norm": 0.9584966762633598, "learning_rate": 1.8889622258459317e-05, "loss": 0.5093, "step": 691 }, { "epoch": 0.3546899026140441, "grad_norm": 1.0503808796814404, "learning_rate": 1.888581690062939e-05, "loss": 0.5108, "step": 692 }, { "epoch": 0.35520246027678115, "grad_norm": 1.0530220854908392, "learning_rate": 1.8882005417958376e-05, "loss": 0.4792, "step": 693 }, { "epoch": 0.3557150179395182, "grad_norm": 0.9711985378166852, "learning_rate": 1.8878187813073465e-05, "loss": 0.5288, "step": 694 }, { "epoch": 0.35622757560225526, "grad_norm": 1.0003328636434972, "learning_rate": 1.887436408860607e-05, "loss": 0.4948, "step": 695 }, { "epoch": 0.3567401332649923, "grad_norm": 1.0426247169116232, "learning_rate": 1.8870534247191815e-05, "loss": 0.448, "step": 696 }, { "epoch": 0.3572526909277294, "grad_norm": 1.0544250676388576, "learning_rate": 1.886669829147055e-05, "loss": 0.4802, "step": 697 }, { "epoch": 0.35776524859046643, "grad_norm": 1.050361558824311, "learning_rate": 1.886285622408633e-05, "loss": 0.5473, "step": 698 }, { "epoch": 0.3582778062532035, "grad_norm": 1.0616450595657436, "learning_rate": 1.885900804768742e-05, "loss": 0.4898, "step": 699 }, { "epoch": 0.35879036391594055, "grad_norm": 1.1018012916236688, "learning_rate": 1.8855153764926307e-05, "loss": 0.4993, "step": 700 }, { "epoch": 0.3593029215786776, "grad_norm": 1.046873014797236, "learning_rate": 1.8851293378459685e-05, "loss": 0.4777, "step": 701 }, { "epoch": 0.35981547924141466, "grad_norm": 1.094110974185452, "learning_rate": 1.8847426890948447e-05, "loss": 0.5193, "step": 702 }, { "epoch": 0.3603280369041517, "grad_norm": 1.0186931348043178, "learning_rate": 1.88435543050577e-05, "loss": 0.488, "step": 703 }, { "epoch": 0.3608405945668888, "grad_norm": 1.0125773025350762, "learning_rate": 1.883967562345675e-05, "loss": 0.5204, "step": 704 }, { "epoch": 0.36135315222962583, "grad_norm": 0.9854415746708586, "learning_rate": 1.8835790848819105e-05, "loss": 0.5021, "step": 705 }, { "epoch": 0.3618657098923629, "grad_norm": 1.1149317208983422, "learning_rate": 1.8831899983822475e-05, "loss": 0.5416, "step": 706 }, { "epoch": 0.36237826755509994, "grad_norm": 1.037506692707458, "learning_rate": 1.8828003031148764e-05, "loss": 0.4945, "step": 707 }, { "epoch": 0.362890825217837, "grad_norm": 0.99285808588638, "learning_rate": 1.8824099993484075e-05, "loss": 0.5115, "step": 708 }, { "epoch": 0.36340338288057406, "grad_norm": 1.0287285753140598, "learning_rate": 1.8820190873518708e-05, "loss": 0.5471, "step": 709 }, { "epoch": 0.3639159405433111, "grad_norm": 1.0005825332745355, "learning_rate": 1.8816275673947148e-05, "loss": 0.5347, "step": 710 }, { "epoch": 0.36442849820604817, "grad_norm": 1.0063087780030846, "learning_rate": 1.8812354397468076e-05, "loss": 0.4887, "step": 711 }, { "epoch": 0.3649410558687852, "grad_norm": 1.1759990038805945, "learning_rate": 1.8808427046784365e-05, "loss": 0.5224, "step": 712 }, { "epoch": 0.3654536135315223, "grad_norm": 1.0490636506419821, "learning_rate": 1.8804493624603064e-05, "loss": 0.4823, "step": 713 }, { "epoch": 0.36596617119425934, "grad_norm": 1.012799856844532, "learning_rate": 1.8800554133635417e-05, "loss": 0.4909, "step": 714 }, { "epoch": 0.3664787288569964, "grad_norm": 1.0873597560398034, "learning_rate": 1.879660857659685e-05, "loss": 0.5369, "step": 715 }, { "epoch": 0.36699128651973345, "grad_norm": 1.0492001075464552, "learning_rate": 1.879265695620696e-05, "loss": 0.5181, "step": 716 }, { "epoch": 0.3675038441824705, "grad_norm": 0.9693205309376531, "learning_rate": 1.8788699275189538e-05, "loss": 0.5492, "step": 717 }, { "epoch": 0.36801640184520756, "grad_norm": 1.035722696714738, "learning_rate": 1.8784735536272543e-05, "loss": 0.5205, "step": 718 }, { "epoch": 0.3685289595079446, "grad_norm": 1.050603921178112, "learning_rate": 1.8780765742188113e-05, "loss": 0.5265, "step": 719 }, { "epoch": 0.3690415171706817, "grad_norm": 1.0572748601753397, "learning_rate": 1.8776789895672557e-05, "loss": 0.5343, "step": 720 }, { "epoch": 0.36955407483341873, "grad_norm": 1.0008644627131598, "learning_rate": 1.8772807999466362e-05, "loss": 0.5095, "step": 721 }, { "epoch": 0.37006663249615585, "grad_norm": 1.0596474486221652, "learning_rate": 1.8768820056314173e-05, "loss": 0.4722, "step": 722 }, { "epoch": 0.3705791901588929, "grad_norm": 1.0445099286449142, "learning_rate": 1.876482606896482e-05, "loss": 0.4966, "step": 723 }, { "epoch": 0.37109174782162996, "grad_norm": 1.076645003084554, "learning_rate": 1.8760826040171286e-05, "loss": 0.4785, "step": 724 }, { "epoch": 0.371604305484367, "grad_norm": 1.6012285118280867, "learning_rate": 1.875681997269072e-05, "loss": 0.5138, "step": 725 }, { "epoch": 0.37211686314710407, "grad_norm": 1.0606463976893479, "learning_rate": 1.875280786928444e-05, "loss": 0.4872, "step": 726 }, { "epoch": 0.3726294208098411, "grad_norm": 1.0416166466753027, "learning_rate": 1.874878973271791e-05, "loss": 0.4973, "step": 727 }, { "epoch": 0.3731419784725782, "grad_norm": 1.064757482183335, "learning_rate": 1.8744765565760778e-05, "loss": 0.5085, "step": 728 }, { "epoch": 0.37365453613531524, "grad_norm": 0.9498409800833598, "learning_rate": 1.8740735371186823e-05, "loss": 0.4249, "step": 729 }, { "epoch": 0.3741670937980523, "grad_norm": 0.9890525777932639, "learning_rate": 1.873669915177399e-05, "loss": 0.5258, "step": 730 }, { "epoch": 0.37467965146078935, "grad_norm": 1.1218798209951657, "learning_rate": 1.8732656910304378e-05, "loss": 0.5139, "step": 731 }, { "epoch": 0.3751922091235264, "grad_norm": 1.287739613148134, "learning_rate": 1.872860864956423e-05, "loss": 0.5513, "step": 732 }, { "epoch": 0.37570476678626347, "grad_norm": 1.151500519514076, "learning_rate": 1.872455437234395e-05, "loss": 0.5527, "step": 733 }, { "epoch": 0.3762173244490005, "grad_norm": 1.0757057720781815, "learning_rate": 1.872049408143808e-05, "loss": 0.4749, "step": 734 }, { "epoch": 0.3767298821117376, "grad_norm": 1.1008996754438607, "learning_rate": 1.8716427779645303e-05, "loss": 0.4557, "step": 735 }, { "epoch": 0.37724243977447464, "grad_norm": 1.1687769806757684, "learning_rate": 1.8712355469768454e-05, "loss": 0.5691, "step": 736 }, { "epoch": 0.3777549974372117, "grad_norm": 1.1140998205684405, "learning_rate": 1.870827715461451e-05, "loss": 0.5268, "step": 737 }, { "epoch": 0.37826755509994875, "grad_norm": 0.974279753280252, "learning_rate": 1.8704192836994578e-05, "loss": 0.479, "step": 738 }, { "epoch": 0.3787801127626858, "grad_norm": 0.9964547785315673, "learning_rate": 1.8700102519723912e-05, "loss": 0.4809, "step": 739 }, { "epoch": 0.37929267042542286, "grad_norm": 1.013569554458929, "learning_rate": 1.8696006205621898e-05, "loss": 0.4908, "step": 740 }, { "epoch": 0.3798052280881599, "grad_norm": 1.0205976486529602, "learning_rate": 1.869190389751205e-05, "loss": 0.4889, "step": 741 }, { "epoch": 0.380317785750897, "grad_norm": 1.0947046158164004, "learning_rate": 1.8687795598222024e-05, "loss": 0.5215, "step": 742 }, { "epoch": 0.38083034341363403, "grad_norm": 1.1200329678109828, "learning_rate": 1.86836813105836e-05, "loss": 0.5244, "step": 743 }, { "epoch": 0.3813429010763711, "grad_norm": 1.003325972419454, "learning_rate": 1.867956103743268e-05, "loss": 0.5167, "step": 744 }, { "epoch": 0.38185545873910814, "grad_norm": 0.9740211006622709, "learning_rate": 1.8675434781609305e-05, "loss": 0.458, "step": 745 }, { "epoch": 0.3823680164018452, "grad_norm": 0.8833236267204146, "learning_rate": 1.8671302545957628e-05, "loss": 0.4404, "step": 746 }, { "epoch": 0.38288057406458226, "grad_norm": 0.9811591998403126, "learning_rate": 1.8667164333325928e-05, "loss": 0.5026, "step": 747 }, { "epoch": 0.3833931317273193, "grad_norm": 0.9517988080683352, "learning_rate": 1.8663020146566606e-05, "loss": 0.4892, "step": 748 }, { "epoch": 0.38390568939005637, "grad_norm": 0.9248747344265037, "learning_rate": 1.8658869988536175e-05, "loss": 0.5007, "step": 749 }, { "epoch": 0.3844182470527934, "grad_norm": 1.0402601852518205, "learning_rate": 1.8654713862095272e-05, "loss": 0.4994, "step": 750 }, { "epoch": 0.3849308047155305, "grad_norm": 1.035738282528718, "learning_rate": 1.8650551770108644e-05, "loss": 0.474, "step": 751 }, { "epoch": 0.38544336237826754, "grad_norm": 0.9431185899940349, "learning_rate": 1.8646383715445144e-05, "loss": 0.5019, "step": 752 }, { "epoch": 0.3859559200410046, "grad_norm": 0.9708284000331531, "learning_rate": 1.8642209700977745e-05, "loss": 0.439, "step": 753 }, { "epoch": 0.38646847770374165, "grad_norm": 1.019439472929643, "learning_rate": 1.8638029729583524e-05, "loss": 0.5156, "step": 754 }, { "epoch": 0.3869810353664787, "grad_norm": 1.0200954101428676, "learning_rate": 1.8633843804143663e-05, "loss": 0.51, "step": 755 }, { "epoch": 0.38749359302921577, "grad_norm": 1.0511404111826328, "learning_rate": 1.8629651927543447e-05, "loss": 0.5321, "step": 756 }, { "epoch": 0.3880061506919528, "grad_norm": 1.1469036811703943, "learning_rate": 1.8625454102672266e-05, "loss": 0.5545, "step": 757 }, { "epoch": 0.3885187083546899, "grad_norm": 1.073276319184835, "learning_rate": 1.8621250332423603e-05, "loss": 0.5522, "step": 758 }, { "epoch": 0.38903126601742694, "grad_norm": 0.9366771184856985, "learning_rate": 1.8617040619695055e-05, "loss": 0.489, "step": 759 }, { "epoch": 0.389543823680164, "grad_norm": 1.0043401667990193, "learning_rate": 1.8612824967388297e-05, "loss": 0.4791, "step": 760 }, { "epoch": 0.3900563813429011, "grad_norm": 1.0448526676961036, "learning_rate": 1.8608603378409108e-05, "loss": 0.5006, "step": 761 }, { "epoch": 0.39056893900563816, "grad_norm": 1.0013514172414033, "learning_rate": 1.860437585566736e-05, "loss": 0.4512, "step": 762 }, { "epoch": 0.3910814966683752, "grad_norm": 0.9047681049199497, "learning_rate": 1.8600142402077006e-05, "loss": 0.4395, "step": 763 }, { "epoch": 0.3915940543311123, "grad_norm": 1.1207092057195374, "learning_rate": 1.8595903020556098e-05, "loss": 0.5441, "step": 764 }, { "epoch": 0.39210661199384933, "grad_norm": 1.0754514696177702, "learning_rate": 1.8591657714026764e-05, "loss": 0.4877, "step": 765 }, { "epoch": 0.3926191696565864, "grad_norm": 1.1174706577991937, "learning_rate": 1.8587406485415226e-05, "loss": 0.5337, "step": 766 }, { "epoch": 0.39313172731932344, "grad_norm": 1.0452378921908907, "learning_rate": 1.858314933765178e-05, "loss": 0.5154, "step": 767 }, { "epoch": 0.3936442849820605, "grad_norm": 0.9539771487593244, "learning_rate": 1.8578886273670807e-05, "loss": 0.4924, "step": 768 }, { "epoch": 0.39415684264479756, "grad_norm": 1.0133965760691774, "learning_rate": 1.857461729641076e-05, "loss": 0.5266, "step": 769 }, { "epoch": 0.3946694003075346, "grad_norm": 1.0038330716284773, "learning_rate": 1.8570342408814173e-05, "loss": 0.4858, "step": 770 }, { "epoch": 0.39518195797027167, "grad_norm": 0.9518262898540215, "learning_rate": 1.856606161382766e-05, "loss": 0.4651, "step": 771 }, { "epoch": 0.3956945156330087, "grad_norm": 1.0522536180081599, "learning_rate": 1.8561774914401883e-05, "loss": 0.5503, "step": 772 }, { "epoch": 0.3962070732957458, "grad_norm": 1.0257398523347523, "learning_rate": 1.8557482313491607e-05, "loss": 0.5188, "step": 773 }, { "epoch": 0.39671963095848284, "grad_norm": 1.034422687327834, "learning_rate": 1.855318381405564e-05, "loss": 0.473, "step": 774 }, { "epoch": 0.3972321886212199, "grad_norm": 1.10581981015209, "learning_rate": 1.854887941905687e-05, "loss": 0.5519, "step": 775 }, { "epoch": 0.39774474628395695, "grad_norm": 1.0059364506032633, "learning_rate": 1.8544569131462235e-05, "loss": 0.4762, "step": 776 }, { "epoch": 0.398257303946694, "grad_norm": 0.9211992002501899, "learning_rate": 1.8540252954242742e-05, "loss": 0.4655, "step": 777 }, { "epoch": 0.39876986160943106, "grad_norm": 1.0737610320784077, "learning_rate": 1.8535930890373467e-05, "loss": 0.4602, "step": 778 }, { "epoch": 0.3992824192721681, "grad_norm": 1.046469444428949, "learning_rate": 1.8531602942833525e-05, "loss": 0.5385, "step": 779 }, { "epoch": 0.3997949769349052, "grad_norm": 1.054592465978946, "learning_rate": 1.8527269114606102e-05, "loss": 0.4879, "step": 780 }, { "epoch": 0.40030753459764223, "grad_norm": 1.0281536695144582, "learning_rate": 1.8522929408678426e-05, "loss": 0.4761, "step": 781 }, { "epoch": 0.4008200922603793, "grad_norm": 1.0970410087886981, "learning_rate": 1.8518583828041787e-05, "loss": 0.5132, "step": 782 }, { "epoch": 0.40133264992311635, "grad_norm": 1.0262459168135398, "learning_rate": 1.851423237569152e-05, "loss": 0.5091, "step": 783 }, { "epoch": 0.4018452075858534, "grad_norm": 0.9750555307963595, "learning_rate": 1.8509875054627e-05, "loss": 0.5206, "step": 784 }, { "epoch": 0.40235776524859046, "grad_norm": 0.9911066513205726, "learning_rate": 1.8505511867851656e-05, "loss": 0.4437, "step": 785 }, { "epoch": 0.4028703229113275, "grad_norm": 1.0698458584603956, "learning_rate": 1.8501142818372964e-05, "loss": 0.5299, "step": 786 }, { "epoch": 0.4033828805740646, "grad_norm": 1.0286845076027027, "learning_rate": 1.8496767909202423e-05, "loss": 0.5379, "step": 787 }, { "epoch": 0.40389543823680163, "grad_norm": 0.9881333001481235, "learning_rate": 1.8492387143355594e-05, "loss": 0.493, "step": 788 }, { "epoch": 0.4044079958995387, "grad_norm": 0.9482350793041963, "learning_rate": 1.848800052385206e-05, "loss": 0.4374, "step": 789 }, { "epoch": 0.40492055356227574, "grad_norm": 0.9634474033056637, "learning_rate": 1.848360805371544e-05, "loss": 0.5336, "step": 790 }, { "epoch": 0.4054331112250128, "grad_norm": 0.9959148945125744, "learning_rate": 1.847920973597339e-05, "loss": 0.5468, "step": 791 }, { "epoch": 0.40594566888774986, "grad_norm": 1.0863714731462886, "learning_rate": 1.84748055736576e-05, "loss": 0.4703, "step": 792 }, { "epoch": 0.4064582265504869, "grad_norm": 1.0273272832658422, "learning_rate": 1.847039556980377e-05, "loss": 0.5503, "step": 793 }, { "epoch": 0.40697078421322397, "grad_norm": 0.9648437727597583, "learning_rate": 1.8465979727451653e-05, "loss": 0.4923, "step": 794 }, { "epoch": 0.407483341875961, "grad_norm": 1.0813465517873209, "learning_rate": 1.846155804964501e-05, "loss": 0.51, "step": 795 }, { "epoch": 0.4079958995386981, "grad_norm": 0.9925510962199819, "learning_rate": 1.8457130539431623e-05, "loss": 0.5204, "step": 796 }, { "epoch": 0.40850845720143514, "grad_norm": 1.024150679489607, "learning_rate": 1.8452697199863305e-05, "loss": 0.4579, "step": 797 }, { "epoch": 0.4090210148641722, "grad_norm": 0.9685454540425159, "learning_rate": 1.8448258033995877e-05, "loss": 0.4613, "step": 798 }, { "epoch": 0.40953357252690925, "grad_norm": 1.0698293690777516, "learning_rate": 1.8443813044889182e-05, "loss": 0.5434, "step": 799 }, { "epoch": 0.41004613018964636, "grad_norm": 1.0568548444494394, "learning_rate": 1.8439362235607074e-05, "loss": 0.4894, "step": 800 }, { "epoch": 0.4105586878523834, "grad_norm": 0.942219449654292, "learning_rate": 1.8434905609217415e-05, "loss": 0.4963, "step": 801 }, { "epoch": 0.4110712455151205, "grad_norm": 1.0318567186813048, "learning_rate": 1.8430443168792087e-05, "loss": 0.4932, "step": 802 }, { "epoch": 0.41158380317785753, "grad_norm": 1.0260732314161032, "learning_rate": 1.8425974917406974e-05, "loss": 0.5373, "step": 803 }, { "epoch": 0.4120963608405946, "grad_norm": 0.9671932726711632, "learning_rate": 1.842150085814196e-05, "loss": 0.4476, "step": 804 }, { "epoch": 0.41260891850333165, "grad_norm": 1.0763884960006165, "learning_rate": 1.8417020994080945e-05, "loss": 0.4973, "step": 805 }, { "epoch": 0.4131214761660687, "grad_norm": 1.0077006904307293, "learning_rate": 1.8412535328311813e-05, "loss": 0.5713, "step": 806 }, { "epoch": 0.41363403382880576, "grad_norm": 0.9730631806050897, "learning_rate": 1.8408043863926462e-05, "loss": 0.4047, "step": 807 }, { "epoch": 0.4141465914915428, "grad_norm": 0.9758301766342544, "learning_rate": 1.840354660402078e-05, "loss": 0.4628, "step": 808 }, { "epoch": 0.41465914915427987, "grad_norm": 0.8937790060043873, "learning_rate": 1.8399043551694655e-05, "loss": 0.4584, "step": 809 }, { "epoch": 0.41517170681701693, "grad_norm": 1.004961896699036, "learning_rate": 1.8394534710051956e-05, "loss": 0.4614, "step": 810 }, { "epoch": 0.415684264479754, "grad_norm": 1.021472732779167, "learning_rate": 1.8390020082200553e-05, "loss": 0.5026, "step": 811 }, { "epoch": 0.41619682214249104, "grad_norm": 1.0081321978692623, "learning_rate": 1.8385499671252305e-05, "loss": 0.5092, "step": 812 }, { "epoch": 0.4167093798052281, "grad_norm": 1.0278402677771266, "learning_rate": 1.8380973480323052e-05, "loss": 0.5001, "step": 813 }, { "epoch": 0.41722193746796515, "grad_norm": 1.0633585592922488, "learning_rate": 1.8376441512532617e-05, "loss": 0.5031, "step": 814 }, { "epoch": 0.4177344951307022, "grad_norm": 0.9049996361798515, "learning_rate": 1.8371903771004812e-05, "loss": 0.466, "step": 815 }, { "epoch": 0.41824705279343927, "grad_norm": 1.0577721617788742, "learning_rate": 1.8367360258867418e-05, "loss": 0.5192, "step": 816 }, { "epoch": 0.4187596104561763, "grad_norm": 1.050511626019893, "learning_rate": 1.8362810979252206e-05, "loss": 0.509, "step": 817 }, { "epoch": 0.4192721681189134, "grad_norm": 1.0181229756883432, "learning_rate": 1.835825593529492e-05, "loss": 0.5222, "step": 818 }, { "epoch": 0.41978472578165044, "grad_norm": 1.0209441955629153, "learning_rate": 1.8353695130135262e-05, "loss": 0.4633, "step": 819 }, { "epoch": 0.4202972834443875, "grad_norm": 1.0071163154948826, "learning_rate": 1.834912856691692e-05, "loss": 0.4817, "step": 820 }, { "epoch": 0.42080984110712455, "grad_norm": 1.0119404175380649, "learning_rate": 1.8344556248787556e-05, "loss": 0.4689, "step": 821 }, { "epoch": 0.4213223987698616, "grad_norm": 0.9780285613073807, "learning_rate": 1.833997817889878e-05, "loss": 0.5181, "step": 822 }, { "epoch": 0.42183495643259866, "grad_norm": 0.9613923050383183, "learning_rate": 1.8335394360406184e-05, "loss": 0.484, "step": 823 }, { "epoch": 0.4223475140953357, "grad_norm": 1.00477983100337, "learning_rate": 1.8330804796469314e-05, "loss": 0.4763, "step": 824 }, { "epoch": 0.4228600717580728, "grad_norm": 1.0341585960837154, "learning_rate": 1.8326209490251678e-05, "loss": 0.5548, "step": 825 }, { "epoch": 0.42337262942080983, "grad_norm": 1.090763719382996, "learning_rate": 1.8321608444920738e-05, "loss": 0.5223, "step": 826 }, { "epoch": 0.4238851870835469, "grad_norm": 0.9873656636712701, "learning_rate": 1.831700166364792e-05, "loss": 0.4435, "step": 827 }, { "epoch": 0.42439774474628394, "grad_norm": 0.9796280621572016, "learning_rate": 1.8312389149608595e-05, "loss": 0.4771, "step": 828 }, { "epoch": 0.424910302409021, "grad_norm": 1.0784633122388043, "learning_rate": 1.8307770905982092e-05, "loss": 0.5302, "step": 829 }, { "epoch": 0.42542286007175806, "grad_norm": 0.940232133933406, "learning_rate": 1.830314693595169e-05, "loss": 0.4733, "step": 830 }, { "epoch": 0.4259354177344951, "grad_norm": 0.9933563261901087, "learning_rate": 1.8298517242704607e-05, "loss": 0.5011, "step": 831 }, { "epoch": 0.42644797539723217, "grad_norm": 1.072488210148489, "learning_rate": 1.8293881829432015e-05, "loss": 0.516, "step": 832 }, { "epoch": 0.4269605330599692, "grad_norm": 0.94831570204977, "learning_rate": 1.828924069932902e-05, "loss": 0.4965, "step": 833 }, { "epoch": 0.4274730907227063, "grad_norm": 0.9803284695572905, "learning_rate": 1.828459385559468e-05, "loss": 0.4925, "step": 834 }, { "epoch": 0.42798564838544334, "grad_norm": 0.9601547495886623, "learning_rate": 1.827994130143198e-05, "loss": 0.4813, "step": 835 }, { "epoch": 0.4284982060481804, "grad_norm": 1.0571682283223487, "learning_rate": 1.8275283040047848e-05, "loss": 0.4969, "step": 836 }, { "epoch": 0.42901076371091745, "grad_norm": 0.9930603614660051, "learning_rate": 1.8270619074653143e-05, "loss": 0.5058, "step": 837 }, { "epoch": 0.4295233213736545, "grad_norm": 0.8891309091387775, "learning_rate": 1.8265949408462657e-05, "loss": 0.4859, "step": 838 }, { "epoch": 0.4300358790363916, "grad_norm": 1.2340795032058045, "learning_rate": 1.8261274044695106e-05, "loss": 0.4824, "step": 839 }, { "epoch": 0.4305484366991287, "grad_norm": 1.0639167372099618, "learning_rate": 1.825659298657314e-05, "loss": 0.5127, "step": 840 }, { "epoch": 0.43106099436186573, "grad_norm": 0.9780553189796425, "learning_rate": 1.8251906237323338e-05, "loss": 0.4862, "step": 841 }, { "epoch": 0.4315735520246028, "grad_norm": 1.002547968514964, "learning_rate": 1.8247213800176192e-05, "loss": 0.4708, "step": 842 }, { "epoch": 0.43208610968733985, "grad_norm": 1.0004643626134613, "learning_rate": 1.8242515678366114e-05, "loss": 0.4805, "step": 843 }, { "epoch": 0.4325986673500769, "grad_norm": 1.1398043820196355, "learning_rate": 1.8237811875131446e-05, "loss": 0.5014, "step": 844 }, { "epoch": 0.43311122501281396, "grad_norm": 0.9692930739370783, "learning_rate": 1.8233102393714435e-05, "loss": 0.5271, "step": 845 }, { "epoch": 0.433623782675551, "grad_norm": 1.0587021382933062, "learning_rate": 1.8228387237361245e-05, "loss": 0.5223, "step": 846 }, { "epoch": 0.4341363403382881, "grad_norm": 0.9793830835137696, "learning_rate": 1.8223666409321953e-05, "loss": 0.5143, "step": 847 }, { "epoch": 0.43464889800102513, "grad_norm": 0.9934744737744082, "learning_rate": 1.8218939912850547e-05, "loss": 0.4768, "step": 848 }, { "epoch": 0.4351614556637622, "grad_norm": 1.0633604440204873, "learning_rate": 1.8214207751204917e-05, "loss": 0.5342, "step": 849 }, { "epoch": 0.43567401332649924, "grad_norm": 1.0143145417404889, "learning_rate": 1.8209469927646863e-05, "loss": 0.5215, "step": 850 }, { "epoch": 0.4361865709892363, "grad_norm": 0.9803407035621101, "learning_rate": 1.8204726445442084e-05, "loss": 0.4824, "step": 851 }, { "epoch": 0.43669912865197336, "grad_norm": 0.9584058616147907, "learning_rate": 1.8199977307860178e-05, "loss": 0.4488, "step": 852 }, { "epoch": 0.4372116863147104, "grad_norm": 1.0509594376801523, "learning_rate": 1.8195222518174654e-05, "loss": 0.4766, "step": 853 }, { "epoch": 0.43772424397744747, "grad_norm": 1.0310709609166686, "learning_rate": 1.8190462079662897e-05, "loss": 0.491, "step": 854 }, { "epoch": 0.4382368016401845, "grad_norm": 1.0441073707743727, "learning_rate": 1.8185695995606196e-05, "loss": 0.4744, "step": 855 }, { "epoch": 0.4387493593029216, "grad_norm": 0.9820481869415253, "learning_rate": 1.8180924269289737e-05, "loss": 0.5, "step": 856 }, { "epoch": 0.43926191696565864, "grad_norm": 0.9347500431422612, "learning_rate": 1.8176146904002587e-05, "loss": 0.4652, "step": 857 }, { "epoch": 0.4397744746283957, "grad_norm": 1.0122923567543507, "learning_rate": 1.81713639030377e-05, "loss": 0.559, "step": 858 }, { "epoch": 0.44028703229113275, "grad_norm": 0.9607464264706981, "learning_rate": 1.8166575269691914e-05, "loss": 0.4923, "step": 859 }, { "epoch": 0.4407995899538698, "grad_norm": 1.0668932603014118, "learning_rate": 1.8161781007265958e-05, "loss": 0.4968, "step": 860 }, { "epoch": 0.44131214761660686, "grad_norm": 0.8585889758964204, "learning_rate": 1.815698111906443e-05, "loss": 0.4559, "step": 861 }, { "epoch": 0.4418247052793439, "grad_norm": 1.0538232216470436, "learning_rate": 1.8152175608395814e-05, "loss": 0.5256, "step": 862 }, { "epoch": 0.442337262942081, "grad_norm": 0.9878064792566773, "learning_rate": 1.8147364478572463e-05, "loss": 0.5312, "step": 863 }, { "epoch": 0.44284982060481803, "grad_norm": 0.9841243837430224, "learning_rate": 1.8142547732910606e-05, "loss": 0.4679, "step": 864 }, { "epoch": 0.4433623782675551, "grad_norm": 1.0364342114141933, "learning_rate": 1.813772537473035e-05, "loss": 0.4712, "step": 865 }, { "epoch": 0.44387493593029215, "grad_norm": 0.9619079721034988, "learning_rate": 1.8132897407355657e-05, "loss": 0.5131, "step": 866 }, { "epoch": 0.4443874935930292, "grad_norm": 1.0152572326200915, "learning_rate": 1.812806383411436e-05, "loss": 0.5171, "step": 867 }, { "epoch": 0.44490005125576626, "grad_norm": 0.9348230666297718, "learning_rate": 1.8123224658338166e-05, "loss": 0.5056, "step": 868 }, { "epoch": 0.4454126089185033, "grad_norm": 1.0686589835579827, "learning_rate": 1.8118379883362626e-05, "loss": 0.5168, "step": 869 }, { "epoch": 0.4459251665812404, "grad_norm": 0.89913423270372, "learning_rate": 1.811352951252717e-05, "loss": 0.458, "step": 870 }, { "epoch": 0.44643772424397743, "grad_norm": 0.999182093854464, "learning_rate": 1.810867354917507e-05, "loss": 0.4669, "step": 871 }, { "epoch": 0.4469502819067145, "grad_norm": 0.8961432873847147, "learning_rate": 1.8103811996653458e-05, "loss": 0.4695, "step": 872 }, { "epoch": 0.44746283956945154, "grad_norm": 1.0282517419828558, "learning_rate": 1.809894485831332e-05, "loss": 0.4663, "step": 873 }, { "epoch": 0.4479753972321886, "grad_norm": 0.9825475290924991, "learning_rate": 1.809407213750949e-05, "loss": 0.4759, "step": 874 }, { "epoch": 0.44848795489492566, "grad_norm": 1.0082787428186504, "learning_rate": 1.8089193837600653e-05, "loss": 0.4775, "step": 875 }, { "epoch": 0.4490005125576627, "grad_norm": 0.9336430618780969, "learning_rate": 1.8084309961949338e-05, "loss": 0.4498, "step": 876 }, { "epoch": 0.44951307022039977, "grad_norm": 0.9837851830715897, "learning_rate": 1.8079420513921913e-05, "loss": 0.4829, "step": 877 }, { "epoch": 0.4500256278831369, "grad_norm": 0.9649273088826487, "learning_rate": 1.807452549688859e-05, "loss": 0.4846, "step": 878 }, { "epoch": 0.45053818554587394, "grad_norm": 1.0510485173884194, "learning_rate": 1.8069624914223425e-05, "loss": 0.4425, "step": 879 }, { "epoch": 0.451050743208611, "grad_norm": 0.9344152082804352, "learning_rate": 1.8064718769304304e-05, "loss": 0.4869, "step": 880 }, { "epoch": 0.45156330087134805, "grad_norm": 0.9485763127709901, "learning_rate": 1.8059807065512945e-05, "loss": 0.4838, "step": 881 }, { "epoch": 0.4520758585340851, "grad_norm": 0.9562764656612806, "learning_rate": 1.8054889806234906e-05, "loss": 0.4256, "step": 882 }, { "epoch": 0.45258841619682216, "grad_norm": 0.9654651599926986, "learning_rate": 1.8049966994859565e-05, "loss": 0.5145, "step": 883 }, { "epoch": 0.4531009738595592, "grad_norm": 0.9308335848041603, "learning_rate": 1.8045038634780136e-05, "loss": 0.4923, "step": 884 }, { "epoch": 0.4536135315222963, "grad_norm": 0.9338911671717026, "learning_rate": 1.8040104729393652e-05, "loss": 0.4544, "step": 885 }, { "epoch": 0.45412608918503333, "grad_norm": 1.052314580880992, "learning_rate": 1.8035165282100963e-05, "loss": 0.528, "step": 886 }, { "epoch": 0.4546386468477704, "grad_norm": 0.9834859456773417, "learning_rate": 1.8030220296306752e-05, "loss": 0.4695, "step": 887 }, { "epoch": 0.45515120451050745, "grad_norm": 0.9982868132074525, "learning_rate": 1.802526977541951e-05, "loss": 0.5036, "step": 888 }, { "epoch": 0.4556637621732445, "grad_norm": 1.0055213647828518, "learning_rate": 1.8020313722851547e-05, "loss": 0.4773, "step": 889 }, { "epoch": 0.45617631983598156, "grad_norm": 1.039467873406386, "learning_rate": 1.8015352142018984e-05, "loss": 0.4845, "step": 890 }, { "epoch": 0.4566888774987186, "grad_norm": 1.0155039425246997, "learning_rate": 1.801038503634176e-05, "loss": 0.4654, "step": 891 }, { "epoch": 0.45720143516145567, "grad_norm": 0.9912815206756069, "learning_rate": 1.8005412409243604e-05, "loss": 0.5304, "step": 892 }, { "epoch": 0.45771399282419273, "grad_norm": 0.9411351038695086, "learning_rate": 1.8000434264152074e-05, "loss": 0.4417, "step": 893 }, { "epoch": 0.4582265504869298, "grad_norm": 0.9889296303267797, "learning_rate": 1.799545060449851e-05, "loss": 0.5014, "step": 894 }, { "epoch": 0.45873910814966684, "grad_norm": 0.8797145239288495, "learning_rate": 1.799046143371807e-05, "loss": 0.4855, "step": 895 }, { "epoch": 0.4592516658124039, "grad_norm": 0.9327666288729689, "learning_rate": 1.7985466755249703e-05, "loss": 0.4636, "step": 896 }, { "epoch": 0.45976422347514095, "grad_norm": 0.9013977376418929, "learning_rate": 1.798046657253615e-05, "loss": 0.4658, "step": 897 }, { "epoch": 0.460276781137878, "grad_norm": 1.0368875260131305, "learning_rate": 1.797546088902396e-05, "loss": 0.4564, "step": 898 }, { "epoch": 0.46078933880061507, "grad_norm": 1.0440613780923795, "learning_rate": 1.7970449708163455e-05, "loss": 0.5014, "step": 899 }, { "epoch": 0.4613018964633521, "grad_norm": 1.0052243386793094, "learning_rate": 1.796543303340876e-05, "loss": 0.5045, "step": 900 }, { "epoch": 0.4618144541260892, "grad_norm": 1.0715720182201955, "learning_rate": 1.7960410868217786e-05, "loss": 0.4732, "step": 901 }, { "epoch": 0.46232701178882624, "grad_norm": 1.013552743000888, "learning_rate": 1.7955383216052224e-05, "loss": 0.4936, "step": 902 }, { "epoch": 0.4628395694515633, "grad_norm": 0.9663579078032932, "learning_rate": 1.7950350080377545e-05, "loss": 0.429, "step": 903 }, { "epoch": 0.46335212711430035, "grad_norm": 1.0495657629356943, "learning_rate": 1.7945311464663002e-05, "loss": 0.4726, "step": 904 }, { "epoch": 0.4638646847770374, "grad_norm": 1.043533341212574, "learning_rate": 1.794026737238163e-05, "loss": 0.4755, "step": 905 }, { "epoch": 0.46437724243977446, "grad_norm": 1.0065730166227453, "learning_rate": 1.7935217807010238e-05, "loss": 0.4672, "step": 906 }, { "epoch": 0.4648898001025115, "grad_norm": 1.034536809307758, "learning_rate": 1.79301627720294e-05, "loss": 0.5062, "step": 907 }, { "epoch": 0.4654023577652486, "grad_norm": 1.0297855392993027, "learning_rate": 1.7925102270923465e-05, "loss": 0.4686, "step": 908 }, { "epoch": 0.46591491542798563, "grad_norm": 0.8885603551332084, "learning_rate": 1.7920036307180554e-05, "loss": 0.4755, "step": 909 }, { "epoch": 0.4664274730907227, "grad_norm": 0.9657985768442701, "learning_rate": 1.7914964884292543e-05, "loss": 0.4435, "step": 910 }, { "epoch": 0.46694003075345974, "grad_norm": 0.9724662130284587, "learning_rate": 1.7909888005755077e-05, "loss": 0.4875, "step": 911 }, { "epoch": 0.4674525884161968, "grad_norm": 1.0390932186302893, "learning_rate": 1.7904805675067568e-05, "loss": 0.5274, "step": 912 }, { "epoch": 0.46796514607893386, "grad_norm": 1.0504834624634254, "learning_rate": 1.7899717895733175e-05, "loss": 0.5154, "step": 913 }, { "epoch": 0.4684777037416709, "grad_norm": 0.9892455316533739, "learning_rate": 1.7894624671258813e-05, "loss": 0.4842, "step": 914 }, { "epoch": 0.46899026140440797, "grad_norm": 0.9867430837369722, "learning_rate": 1.788952600515516e-05, "loss": 0.4716, "step": 915 }, { "epoch": 0.469502819067145, "grad_norm": 1.0009120556658704, "learning_rate": 1.788442190093664e-05, "loss": 0.4751, "step": 916 }, { "epoch": 0.47001537672988214, "grad_norm": 1.003558669126573, "learning_rate": 1.7879312362121414e-05, "loss": 0.4977, "step": 917 }, { "epoch": 0.4705279343926192, "grad_norm": 1.0079045940108498, "learning_rate": 1.7874197392231414e-05, "loss": 0.4712, "step": 918 }, { "epoch": 0.47104049205535625, "grad_norm": 1.0331312169358753, "learning_rate": 1.786907699479229e-05, "loss": 0.4895, "step": 919 }, { "epoch": 0.4715530497180933, "grad_norm": 1.0185466088180053, "learning_rate": 1.786395117333345e-05, "loss": 0.4978, "step": 920 }, { "epoch": 0.47206560738083037, "grad_norm": 0.9537840410253057, "learning_rate": 1.785881993138803e-05, "loss": 0.4909, "step": 921 }, { "epoch": 0.4725781650435674, "grad_norm": 0.9923335282438484, "learning_rate": 1.7853683272492913e-05, "loss": 0.5217, "step": 922 }, { "epoch": 0.4730907227063045, "grad_norm": 1.0230841377854936, "learning_rate": 1.7848541200188708e-05, "loss": 0.4678, "step": 923 }, { "epoch": 0.47360328036904153, "grad_norm": 1.0968767536676733, "learning_rate": 1.7843393718019753e-05, "loss": 0.4617, "step": 924 }, { "epoch": 0.4741158380317786, "grad_norm": 0.9331608139883709, "learning_rate": 1.7838240829534125e-05, "loss": 0.4209, "step": 925 }, { "epoch": 0.47462839569451565, "grad_norm": 0.9177971044462895, "learning_rate": 1.7833082538283615e-05, "loss": 0.4586, "step": 926 }, { "epoch": 0.4751409533572527, "grad_norm": 1.0716857345262796, "learning_rate": 1.7827918847823746e-05, "loss": 0.4771, "step": 927 }, { "epoch": 0.47565351101998976, "grad_norm": 1.0010274186546249, "learning_rate": 1.7822749761713767e-05, "loss": 0.4478, "step": 928 }, { "epoch": 0.4761660686827268, "grad_norm": 1.0221759005169475, "learning_rate": 1.7817575283516637e-05, "loss": 0.4849, "step": 929 }, { "epoch": 0.4766786263454639, "grad_norm": 0.8618073350390436, "learning_rate": 1.7812395416799034e-05, "loss": 0.5112, "step": 930 }, { "epoch": 0.47719118400820093, "grad_norm": 1.0294757068353309, "learning_rate": 1.7807210165131352e-05, "loss": 0.4867, "step": 931 }, { "epoch": 0.477703741670938, "grad_norm": 0.975149172059086, "learning_rate": 1.7802019532087692e-05, "loss": 0.4887, "step": 932 }, { "epoch": 0.47821629933367504, "grad_norm": 1.1058012798919765, "learning_rate": 1.7796823521245876e-05, "loss": 0.5554, "step": 933 }, { "epoch": 0.4787288569964121, "grad_norm": 0.8859060605110313, "learning_rate": 1.7791622136187422e-05, "loss": 0.4401, "step": 934 }, { "epoch": 0.47924141465914916, "grad_norm": 1.0024235567915807, "learning_rate": 1.778641538049755e-05, "loss": 0.4672, "step": 935 }, { "epoch": 0.4797539723218862, "grad_norm": 0.979583595111768, "learning_rate": 1.7781203257765197e-05, "loss": 0.4776, "step": 936 }, { "epoch": 0.48026652998462327, "grad_norm": 0.888028982256771, "learning_rate": 1.7775985771582986e-05, "loss": 0.4424, "step": 937 }, { "epoch": 0.4807790876473603, "grad_norm": 1.53939371456451, "learning_rate": 1.7770762925547235e-05, "loss": 0.4438, "step": 938 }, { "epoch": 0.4812916453100974, "grad_norm": 0.9912649067554078, "learning_rate": 1.7765534723257972e-05, "loss": 0.4822, "step": 939 }, { "epoch": 0.48180420297283444, "grad_norm": 0.9722168266571894, "learning_rate": 1.7760301168318897e-05, "loss": 0.4979, "step": 940 }, { "epoch": 0.4823167606355715, "grad_norm": 1.016091154780826, "learning_rate": 1.7755062264337412e-05, "loss": 0.4988, "step": 941 }, { "epoch": 0.48282931829830855, "grad_norm": 0.9771516945118998, "learning_rate": 1.7749818014924612e-05, "loss": 0.4801, "step": 942 }, { "epoch": 0.4833418759610456, "grad_norm": 0.9693545082227248, "learning_rate": 1.7744568423695257e-05, "loss": 0.4642, "step": 943 }, { "epoch": 0.48385443362378266, "grad_norm": 0.9616954719288078, "learning_rate": 1.7739313494267808e-05, "loss": 0.4488, "step": 944 }, { "epoch": 0.4843669912865197, "grad_norm": 0.9860479372601967, "learning_rate": 1.7734053230264394e-05, "loss": 0.4793, "step": 945 }, { "epoch": 0.4848795489492568, "grad_norm": 0.9794518221397364, "learning_rate": 1.7728787635310828e-05, "loss": 0.4721, "step": 946 }, { "epoch": 0.48539210661199383, "grad_norm": 0.936560148914802, "learning_rate": 1.7723516713036588e-05, "loss": 0.4778, "step": 947 }, { "epoch": 0.4859046642747309, "grad_norm": 0.9798035261280924, "learning_rate": 1.7718240467074833e-05, "loss": 0.5103, "step": 948 }, { "epoch": 0.48641722193746795, "grad_norm": 1.043742714295928, "learning_rate": 1.7712958901062397e-05, "loss": 0.4719, "step": 949 }, { "epoch": 0.486929779600205, "grad_norm": 0.9099458010667935, "learning_rate": 1.770767201863976e-05, "loss": 0.5039, "step": 950 }, { "epoch": 0.48744233726294206, "grad_norm": 0.9522989142482802, "learning_rate": 1.7702379823451087e-05, "loss": 0.5093, "step": 951 }, { "epoch": 0.4879548949256791, "grad_norm": 0.9543228513231458, "learning_rate": 1.769708231914419e-05, "loss": 0.4565, "step": 952 }, { "epoch": 0.4884674525884162, "grad_norm": 0.9816346333059051, "learning_rate": 1.769177950937056e-05, "loss": 0.496, "step": 953 }, { "epoch": 0.48898001025115323, "grad_norm": 1.088575309028142, "learning_rate": 1.7686471397785322e-05, "loss": 0.4623, "step": 954 }, { "epoch": 0.4894925679138903, "grad_norm": 1.0427878956991616, "learning_rate": 1.7681157988047272e-05, "loss": 0.5126, "step": 955 }, { "epoch": 0.4900051255766274, "grad_norm": 0.9565802910617897, "learning_rate": 1.7675839283818848e-05, "loss": 0.4362, "step": 956 }, { "epoch": 0.49051768323936445, "grad_norm": 1.602786034949694, "learning_rate": 1.7670515288766146e-05, "loss": 0.4584, "step": 957 }, { "epoch": 0.4910302409021015, "grad_norm": 0.983815374792308, "learning_rate": 1.76651860065589e-05, "loss": 0.4872, "step": 958 }, { "epoch": 0.49154279856483857, "grad_norm": 1.0001450348041372, "learning_rate": 1.7659851440870498e-05, "loss": 0.5122, "step": 959 }, { "epoch": 0.4920553562275756, "grad_norm": 0.9044471194312304, "learning_rate": 1.7654511595377956e-05, "loss": 0.5142, "step": 960 }, { "epoch": 0.4925679138903127, "grad_norm": 0.9472186599593325, "learning_rate": 1.7649166473761947e-05, "loss": 0.4669, "step": 961 }, { "epoch": 0.49308047155304974, "grad_norm": 1.039414540176816, "learning_rate": 1.764381607970677e-05, "loss": 0.4752, "step": 962 }, { "epoch": 0.4935930292157868, "grad_norm": 0.9382193008716851, "learning_rate": 1.7638460416900355e-05, "loss": 0.4741, "step": 963 }, { "epoch": 0.49410558687852385, "grad_norm": 0.9532663354075596, "learning_rate": 1.7633099489034272e-05, "loss": 0.4496, "step": 964 }, { "epoch": 0.4946181445412609, "grad_norm": 0.9467477385665892, "learning_rate": 1.7627733299803714e-05, "loss": 0.4994, "step": 965 }, { "epoch": 0.49513070220399796, "grad_norm": 1.0175745138232652, "learning_rate": 1.7622361852907506e-05, "loss": 0.547, "step": 966 }, { "epoch": 0.495643259866735, "grad_norm": 1.017114071908224, "learning_rate": 1.7616985152048092e-05, "loss": 0.4592, "step": 967 }, { "epoch": 0.4961558175294721, "grad_norm": 1.0154624925539493, "learning_rate": 1.761160320093154e-05, "loss": 0.4604, "step": 968 }, { "epoch": 0.49666837519220913, "grad_norm": 0.9040955651908826, "learning_rate": 1.760621600326754e-05, "loss": 0.4742, "step": 969 }, { "epoch": 0.4971809328549462, "grad_norm": 0.9543617189388324, "learning_rate": 1.760082356276939e-05, "loss": 0.5383, "step": 970 }, { "epoch": 0.49769349051768325, "grad_norm": 0.9570836731486122, "learning_rate": 1.759542588315401e-05, "loss": 0.4502, "step": 971 }, { "epoch": 0.4982060481804203, "grad_norm": 0.9264101208525907, "learning_rate": 1.7590022968141928e-05, "loss": 0.4919, "step": 972 }, { "epoch": 0.49871860584315736, "grad_norm": 0.8516644115152403, "learning_rate": 1.7584614821457275e-05, "loss": 0.4404, "step": 973 }, { "epoch": 0.4992311635058944, "grad_norm": 0.9365425786573582, "learning_rate": 1.75792014468278e-05, "loss": 0.4816, "step": 974 }, { "epoch": 0.49974372116863147, "grad_norm": 0.9037911955350518, "learning_rate": 1.7573782847984848e-05, "loss": 0.4905, "step": 975 }, { "epoch": 0.5002562788313686, "grad_norm": 0.8989212188338548, "learning_rate": 1.7568359028663365e-05, "loss": 0.4667, "step": 976 }, { "epoch": 0.5007688364941056, "grad_norm": 0.9721486841499547, "learning_rate": 1.7562929992601895e-05, "loss": 0.5199, "step": 977 }, { "epoch": 0.5012813941568427, "grad_norm": 0.9556306533798753, "learning_rate": 1.7557495743542586e-05, "loss": 0.4586, "step": 978 }, { "epoch": 0.5017939518195798, "grad_norm": 1.009813645576601, "learning_rate": 1.7552056285231166e-05, "loss": 0.4876, "step": 979 }, { "epoch": 0.5023065094823168, "grad_norm": 0.9393822147928, "learning_rate": 1.754661162141696e-05, "loss": 0.5558, "step": 980 }, { "epoch": 0.5028190671450539, "grad_norm": 0.9219839549133577, "learning_rate": 1.7541161755852884e-05, "loss": 0.4979, "step": 981 }, { "epoch": 0.5033316248077909, "grad_norm": 0.9746063191045615, "learning_rate": 1.7535706692295436e-05, "loss": 0.4809, "step": 982 }, { "epoch": 0.503844182470528, "grad_norm": 0.971426244337499, "learning_rate": 1.75302464345047e-05, "loss": 0.4512, "step": 983 }, { "epoch": 0.504356740133265, "grad_norm": 0.9405764924816004, "learning_rate": 1.7524780986244334e-05, "loss": 0.499, "step": 984 }, { "epoch": 0.5048692977960021, "grad_norm": 1.0587493037813598, "learning_rate": 1.751931035128158e-05, "loss": 0.5216, "step": 985 }, { "epoch": 0.5053818554587391, "grad_norm": 0.9277049051220009, "learning_rate": 1.7513834533387256e-05, "loss": 0.4311, "step": 986 }, { "epoch": 0.5058944131214762, "grad_norm": 0.9829049758338124, "learning_rate": 1.750835353633574e-05, "loss": 0.5166, "step": 987 }, { "epoch": 0.5064069707842133, "grad_norm": 0.9134138343289402, "learning_rate": 1.7502867363905e-05, "loss": 0.4661, "step": 988 }, { "epoch": 0.5069195284469503, "grad_norm": 1.0777983824886426, "learning_rate": 1.749737601987655e-05, "loss": 0.5403, "step": 989 }, { "epoch": 0.5074320861096874, "grad_norm": 0.9535695686486358, "learning_rate": 1.7491879508035488e-05, "loss": 0.4981, "step": 990 }, { "epoch": 0.5079446437724244, "grad_norm": 0.9023702915134375, "learning_rate": 1.7486377832170463e-05, "loss": 0.4774, "step": 991 }, { "epoch": 0.5084572014351615, "grad_norm": 1.0640261429944509, "learning_rate": 1.7480870996073682e-05, "loss": 0.4642, "step": 992 }, { "epoch": 0.5089697590978985, "grad_norm": 1.0233006027499911, "learning_rate": 1.7475359003540916e-05, "loss": 0.4896, "step": 993 }, { "epoch": 0.5094823167606356, "grad_norm": 0.9486435715269923, "learning_rate": 1.746984185837149e-05, "loss": 0.4634, "step": 994 }, { "epoch": 0.5099948744233727, "grad_norm": 0.9687018546377133, "learning_rate": 1.7464319564368268e-05, "loss": 0.4976, "step": 995 }, { "epoch": 0.5105074320861097, "grad_norm": 0.9131499736684533, "learning_rate": 1.745879212533768e-05, "loss": 0.4718, "step": 996 }, { "epoch": 0.5110199897488468, "grad_norm": 0.9402242173594835, "learning_rate": 1.7453259545089695e-05, "loss": 0.4735, "step": 997 }, { "epoch": 0.5115325474115838, "grad_norm": 0.8865421879142469, "learning_rate": 1.744772182743782e-05, "loss": 0.4861, "step": 998 }, { "epoch": 0.5120451050743209, "grad_norm": 0.926879034334786, "learning_rate": 1.7442178976199112e-05, "loss": 0.4522, "step": 999 }, { "epoch": 0.5125576627370579, "grad_norm": 0.9486803490413301, "learning_rate": 1.743663099519417e-05, "loss": 0.5188, "step": 1000 }, { "epoch": 0.513070220399795, "grad_norm": 0.983464043235736, "learning_rate": 1.7431077888247108e-05, "loss": 0.4533, "step": 1001 }, { "epoch": 0.513582778062532, "grad_norm": 0.9782003261058241, "learning_rate": 1.7425519659185596e-05, "loss": 0.5277, "step": 1002 }, { "epoch": 0.5140953357252691, "grad_norm": 0.9242936321049796, "learning_rate": 1.741995631184083e-05, "loss": 0.432, "step": 1003 }, { "epoch": 0.5146078933880062, "grad_norm": 0.9447947141576224, "learning_rate": 1.7414387850047522e-05, "loss": 0.5, "step": 1004 }, { "epoch": 0.5151204510507432, "grad_norm": 0.9965239408190869, "learning_rate": 1.740881427764392e-05, "loss": 0.5227, "step": 1005 }, { "epoch": 0.5156330087134803, "grad_norm": 0.9104592870270182, "learning_rate": 1.740323559847179e-05, "loss": 0.4467, "step": 1006 }, { "epoch": 0.5161455663762173, "grad_norm": 0.9888735497519946, "learning_rate": 1.7397651816376423e-05, "loss": 0.4757, "step": 1007 }, { "epoch": 0.5166581240389544, "grad_norm": 0.9297399346454546, "learning_rate": 1.739206293520662e-05, "loss": 0.4154, "step": 1008 }, { "epoch": 0.5171706817016914, "grad_norm": 0.9263013241458795, "learning_rate": 1.7386468958814706e-05, "loss": 0.5061, "step": 1009 }, { "epoch": 0.5176832393644285, "grad_norm": 0.9834899486873998, "learning_rate": 1.738086989105651e-05, "loss": 0.5074, "step": 1010 }, { "epoch": 0.5181957970271656, "grad_norm": 0.9329611688717948, "learning_rate": 1.7375265735791372e-05, "loss": 0.4621, "step": 1011 }, { "epoch": 0.5187083546899026, "grad_norm": 0.9632982573268568, "learning_rate": 1.736965649688214e-05, "loss": 0.4688, "step": 1012 }, { "epoch": 0.5192209123526397, "grad_norm": 0.9446847970148688, "learning_rate": 1.7364042178195176e-05, "loss": 0.4776, "step": 1013 }, { "epoch": 0.5197334700153767, "grad_norm": 1.066921391427055, "learning_rate": 1.735842278360032e-05, "loss": 0.5256, "step": 1014 }, { "epoch": 0.5202460276781138, "grad_norm": 0.983104798811306, "learning_rate": 1.735279831697093e-05, "loss": 0.4753, "step": 1015 }, { "epoch": 0.5207585853408508, "grad_norm": 1.0710688593698714, "learning_rate": 1.7347168782183852e-05, "loss": 0.5136, "step": 1016 }, { "epoch": 0.5212711430035879, "grad_norm": 0.9484781248165758, "learning_rate": 1.734153418311943e-05, "loss": 0.5052, "step": 1017 }, { "epoch": 0.521783700666325, "grad_norm": 0.9909580675115565, "learning_rate": 1.73358945236615e-05, "loss": 0.438, "step": 1018 }, { "epoch": 0.522296258329062, "grad_norm": 1.031362157612371, "learning_rate": 1.7330249807697374e-05, "loss": 0.452, "step": 1019 }, { "epoch": 0.5228088159917991, "grad_norm": 1.0254264505780357, "learning_rate": 1.7324600039117862e-05, "loss": 0.506, "step": 1020 }, { "epoch": 0.5233213736545361, "grad_norm": 1.0425577679831783, "learning_rate": 1.7318945221817255e-05, "loss": 0.5003, "step": 1021 }, { "epoch": 0.5238339313172732, "grad_norm": 1.0141202255881032, "learning_rate": 1.7313285359693322e-05, "loss": 0.4659, "step": 1022 }, { "epoch": 0.5243464889800102, "grad_norm": 1.0189427136990297, "learning_rate": 1.73076204566473e-05, "loss": 0.4705, "step": 1023 }, { "epoch": 0.5248590466427473, "grad_norm": 0.9867246300493809, "learning_rate": 1.7301950516583922e-05, "loss": 0.4556, "step": 1024 }, { "epoch": 0.5253716043054844, "grad_norm": 0.915289860406558, "learning_rate": 1.729627554341137e-05, "loss": 0.4862, "step": 1025 }, { "epoch": 0.5258841619682214, "grad_norm": 0.9617248856486244, "learning_rate": 1.7290595541041312e-05, "loss": 0.5051, "step": 1026 }, { "epoch": 0.5263967196309585, "grad_norm": 0.971358219233939, "learning_rate": 1.728491051338887e-05, "loss": 0.5002, "step": 1027 }, { "epoch": 0.5269092772936955, "grad_norm": 0.9722958520804039, "learning_rate": 1.7279220464372644e-05, "loss": 0.4881, "step": 1028 }, { "epoch": 0.5274218349564326, "grad_norm": 1.0146453593611635, "learning_rate": 1.7273525397914677e-05, "loss": 0.4838, "step": 1029 }, { "epoch": 0.5279343926191696, "grad_norm": 1.0272345915874814, "learning_rate": 1.7267825317940494e-05, "loss": 0.4891, "step": 1030 }, { "epoch": 0.5284469502819067, "grad_norm": 1.0749421256759344, "learning_rate": 1.7262120228379053e-05, "loss": 0.498, "step": 1031 }, { "epoch": 0.5289595079446437, "grad_norm": 0.9260059770662636, "learning_rate": 1.725641013316277e-05, "loss": 0.4627, "step": 1032 }, { "epoch": 0.5294720656073808, "grad_norm": 1.0164872139139913, "learning_rate": 1.725069503622753e-05, "loss": 0.5047, "step": 1033 }, { "epoch": 0.5299846232701179, "grad_norm": 0.8955815026304308, "learning_rate": 1.724497494151264e-05, "loss": 0.4569, "step": 1034 }, { "epoch": 0.5304971809328549, "grad_norm": 0.9813209331022792, "learning_rate": 1.7239249852960863e-05, "loss": 0.5064, "step": 1035 }, { "epoch": 0.531009738595592, "grad_norm": 0.9197285772314917, "learning_rate": 1.723351977451841e-05, "loss": 0.4195, "step": 1036 }, { "epoch": 0.531522296258329, "grad_norm": 0.9194729330059891, "learning_rate": 1.7227784710134924e-05, "loss": 0.465, "step": 1037 }, { "epoch": 0.5320348539210661, "grad_norm": 0.9178642783470934, "learning_rate": 1.7222044663763484e-05, "loss": 0.4594, "step": 1038 }, { "epoch": 0.5325474115838031, "grad_norm": 0.8762879107223209, "learning_rate": 1.7216299639360605e-05, "loss": 0.4961, "step": 1039 }, { "epoch": 0.5330599692465402, "grad_norm": 0.9266357353205461, "learning_rate": 1.7210549640886237e-05, "loss": 0.4428, "step": 1040 }, { "epoch": 0.5335725269092773, "grad_norm": 0.9558330064267343, "learning_rate": 1.7204794672303757e-05, "loss": 0.5238, "step": 1041 }, { "epoch": 0.5340850845720143, "grad_norm": 0.9605278895284876, "learning_rate": 1.7199034737579962e-05, "loss": 0.4864, "step": 1042 }, { "epoch": 0.5345976422347514, "grad_norm": 0.9610989836239622, "learning_rate": 1.7193269840685074e-05, "loss": 0.4816, "step": 1043 }, { "epoch": 0.5351101998974884, "grad_norm": 0.9026345141880597, "learning_rate": 1.7187499985592743e-05, "loss": 0.4387, "step": 1044 }, { "epoch": 0.5356227575602255, "grad_norm": 1.0627263150214654, "learning_rate": 1.7181725176280028e-05, "loss": 0.5403, "step": 1045 }, { "epoch": 0.5361353152229625, "grad_norm": 0.9243886426962229, "learning_rate": 1.7175945416727405e-05, "loss": 0.4129, "step": 1046 }, { "epoch": 0.5366478728856996, "grad_norm": 1.0165294477591988, "learning_rate": 1.717016071091877e-05, "loss": 0.5467, "step": 1047 }, { "epoch": 0.5371604305484367, "grad_norm": 0.8897717263677098, "learning_rate": 1.716437106284141e-05, "loss": 0.4994, "step": 1048 }, { "epoch": 0.5376729882111737, "grad_norm": 0.9225552289078721, "learning_rate": 1.715857647648604e-05, "loss": 0.4814, "step": 1049 }, { "epoch": 0.5381855458739108, "grad_norm": 0.955228107916411, "learning_rate": 1.7152776955846768e-05, "loss": 0.4075, "step": 1050 }, { "epoch": 0.5386981035366478, "grad_norm": 0.9848504541617781, "learning_rate": 1.7146972504921097e-05, "loss": 0.4928, "step": 1051 }, { "epoch": 0.5392106611993849, "grad_norm": 0.9935566660970794, "learning_rate": 1.714116312770994e-05, "loss": 0.526, "step": 1052 }, { "epoch": 0.5397232188621219, "grad_norm": 0.9819087572728213, "learning_rate": 1.71353488282176e-05, "loss": 0.5198, "step": 1053 }, { "epoch": 0.5402357765248591, "grad_norm": 0.8991644594096967, "learning_rate": 1.7129529610451775e-05, "loss": 0.4297, "step": 1054 }, { "epoch": 0.5407483341875962, "grad_norm": 1.0732985713517813, "learning_rate": 1.7123705478423552e-05, "loss": 0.4702, "step": 1055 }, { "epoch": 0.5412608918503332, "grad_norm": 0.9128189493658941, "learning_rate": 1.7117876436147404e-05, "loss": 0.453, "step": 1056 }, { "epoch": 0.5417734495130703, "grad_norm": 0.9530680282230302, "learning_rate": 1.711204248764119e-05, "loss": 0.5066, "step": 1057 }, { "epoch": 0.5422860071758073, "grad_norm": 0.9590158854031241, "learning_rate": 1.7106203636926154e-05, "loss": 0.5045, "step": 1058 }, { "epoch": 0.5427985648385444, "grad_norm": 0.9551959291245826, "learning_rate": 1.710035988802691e-05, "loss": 0.4325, "step": 1059 }, { "epoch": 0.5433111225012814, "grad_norm": 0.9428238645138008, "learning_rate": 1.7094511244971456e-05, "loss": 0.4678, "step": 1060 }, { "epoch": 0.5438236801640185, "grad_norm": 1.0191928809926853, "learning_rate": 1.708865771179116e-05, "loss": 0.4562, "step": 1061 }, { "epoch": 0.5443362378267556, "grad_norm": 1.0469368994939439, "learning_rate": 1.7082799292520767e-05, "loss": 0.4868, "step": 1062 }, { "epoch": 0.5448487954894926, "grad_norm": 0.9180245256782021, "learning_rate": 1.7076935991198383e-05, "loss": 0.5044, "step": 1063 }, { "epoch": 0.5453613531522297, "grad_norm": 1.0776312324593205, "learning_rate": 1.7071067811865477e-05, "loss": 0.5135, "step": 1064 }, { "epoch": 0.5458739108149667, "grad_norm": 0.9199862941607657, "learning_rate": 1.706519475856689e-05, "loss": 0.475, "step": 1065 }, { "epoch": 0.5463864684777038, "grad_norm": 0.9100377445962513, "learning_rate": 1.7059316835350806e-05, "loss": 0.514, "step": 1066 }, { "epoch": 0.5468990261404408, "grad_norm": 0.9233804487031358, "learning_rate": 1.7053434046268793e-05, "loss": 0.4744, "step": 1067 }, { "epoch": 0.5474115838031779, "grad_norm": 0.921372338530967, "learning_rate": 1.704754639537574e-05, "loss": 0.4345, "step": 1068 }, { "epoch": 0.547924141465915, "grad_norm": 0.9514878038721113, "learning_rate": 1.704165388672992e-05, "loss": 0.4292, "step": 1069 }, { "epoch": 0.548436699128652, "grad_norm": 0.9773662743269591, "learning_rate": 1.7035756524392924e-05, "loss": 0.5042, "step": 1070 }, { "epoch": 0.5489492567913891, "grad_norm": 0.8987258752776303, "learning_rate": 1.7029854312429707e-05, "loss": 0.4763, "step": 1071 }, { "epoch": 0.5494618144541261, "grad_norm": 0.9236669760255222, "learning_rate": 1.7023947254908565e-05, "loss": 0.4265, "step": 1072 }, { "epoch": 0.5499743721168632, "grad_norm": 0.9643883161214273, "learning_rate": 1.701803535590113e-05, "loss": 0.4732, "step": 1073 }, { "epoch": 0.5504869297796002, "grad_norm": 1.0186967143159846, "learning_rate": 1.7012118619482376e-05, "loss": 0.4903, "step": 1074 }, { "epoch": 0.5509994874423373, "grad_norm": 0.9603040463268696, "learning_rate": 1.70061970497306e-05, "loss": 0.4704, "step": 1075 }, { "epoch": 0.5515120451050743, "grad_norm": 0.9385263230376574, "learning_rate": 1.7000270650727446e-05, "loss": 0.4299, "step": 1076 }, { "epoch": 0.5520246027678114, "grad_norm": 0.9780192904343243, "learning_rate": 1.6994339426557872e-05, "loss": 0.4432, "step": 1077 }, { "epoch": 0.5525371604305485, "grad_norm": 0.9791742447855529, "learning_rate": 1.6988403381310177e-05, "loss": 0.4917, "step": 1078 }, { "epoch": 0.5530497180932855, "grad_norm": 0.9074742086278611, "learning_rate": 1.6982462519075968e-05, "loss": 0.4515, "step": 1079 }, { "epoch": 0.5535622757560226, "grad_norm": 0.9419910084308467, "learning_rate": 1.6976516843950187e-05, "loss": 0.4999, "step": 1080 }, { "epoch": 0.5540748334187596, "grad_norm": 0.9503609680213448, "learning_rate": 1.6970566360031077e-05, "loss": 0.4698, "step": 1081 }, { "epoch": 0.5545873910814967, "grad_norm": 0.9772347715131129, "learning_rate": 1.696461107142021e-05, "loss": 0.4496, "step": 1082 }, { "epoch": 0.5550999487442337, "grad_norm": 0.9319643289651813, "learning_rate": 1.6958650982222466e-05, "loss": 0.4938, "step": 1083 }, { "epoch": 0.5556125064069708, "grad_norm": 0.9382179709893154, "learning_rate": 1.695268609654602e-05, "loss": 0.471, "step": 1084 }, { "epoch": 0.5561250640697079, "grad_norm": 0.9700315758480339, "learning_rate": 1.6946716418502386e-05, "loss": 0.511, "step": 1085 }, { "epoch": 0.5566376217324449, "grad_norm": 0.9384889152935996, "learning_rate": 1.6940741952206342e-05, "loss": 0.5002, "step": 1086 }, { "epoch": 0.557150179395182, "grad_norm": 1.0109528613462329, "learning_rate": 1.693476270177599e-05, "loss": 0.5001, "step": 1087 }, { "epoch": 0.557662737057919, "grad_norm": 0.979990440491301, "learning_rate": 1.692877867133273e-05, "loss": 0.451, "step": 1088 }, { "epoch": 0.5581752947206561, "grad_norm": 0.964535987344546, "learning_rate": 1.6922789865001254e-05, "loss": 0.5141, "step": 1089 }, { "epoch": 0.5586878523833931, "grad_norm": 0.9789674551244508, "learning_rate": 1.691679628690953e-05, "loss": 0.4581, "step": 1090 }, { "epoch": 0.5592004100461302, "grad_norm": 0.9566614858435173, "learning_rate": 1.6910797941188838e-05, "loss": 0.4667, "step": 1091 }, { "epoch": 0.5597129677088672, "grad_norm": 0.9043004745053183, "learning_rate": 1.6904794831973733e-05, "loss": 0.419, "step": 1092 }, { "epoch": 0.5602255253716043, "grad_norm": 0.9673097995148002, "learning_rate": 1.6898786963402057e-05, "loss": 0.4546, "step": 1093 }, { "epoch": 0.5607380830343414, "grad_norm": 0.9452157545477776, "learning_rate": 1.6892774339614927e-05, "loss": 0.4329, "step": 1094 }, { "epoch": 0.5612506406970784, "grad_norm": 1.0240945076243781, "learning_rate": 1.6886756964756743e-05, "loss": 0.4979, "step": 1095 }, { "epoch": 0.5617631983598155, "grad_norm": 0.9801256233862036, "learning_rate": 1.688073484297518e-05, "loss": 0.4787, "step": 1096 }, { "epoch": 0.5622757560225525, "grad_norm": 1.0134133474443678, "learning_rate": 1.687470797842118e-05, "loss": 0.4537, "step": 1097 }, { "epoch": 0.5627883136852896, "grad_norm": 1.051713245892747, "learning_rate": 1.686867637524896e-05, "loss": 0.5173, "step": 1098 }, { "epoch": 0.5633008713480266, "grad_norm": 0.8833666971704238, "learning_rate": 1.6862640037616e-05, "loss": 0.4332, "step": 1099 }, { "epoch": 0.5638134290107637, "grad_norm": 0.9985827330275138, "learning_rate": 1.6856598969683045e-05, "loss": 0.5079, "step": 1100 }, { "epoch": 0.5643259866735008, "grad_norm": 0.8984167510777299, "learning_rate": 1.6850553175614093e-05, "loss": 0.4716, "step": 1101 }, { "epoch": 0.5648385443362378, "grad_norm": 1.0681612252911943, "learning_rate": 1.6844502659576414e-05, "loss": 0.4532, "step": 1102 }, { "epoch": 0.5653511019989749, "grad_norm": 1.042146927329915, "learning_rate": 1.6838447425740524e-05, "loss": 0.5306, "step": 1103 }, { "epoch": 0.5658636596617119, "grad_norm": 0.9794514127642638, "learning_rate": 1.6832387478280185e-05, "loss": 0.4705, "step": 1104 }, { "epoch": 0.566376217324449, "grad_norm": 0.9328827380995135, "learning_rate": 1.682632282137242e-05, "loss": 0.4565, "step": 1105 }, { "epoch": 0.566888774987186, "grad_norm": 1.0186011598531426, "learning_rate": 1.6820253459197493e-05, "loss": 0.5283, "step": 1106 }, { "epoch": 0.5674013326499231, "grad_norm": 0.905670537182813, "learning_rate": 1.6814179395938915e-05, "loss": 0.4691, "step": 1107 }, { "epoch": 0.5679138903126602, "grad_norm": 1.0403323774683968, "learning_rate": 1.680810063578342e-05, "loss": 0.4263, "step": 1108 }, { "epoch": 0.5684264479753972, "grad_norm": 0.8655712819843638, "learning_rate": 1.680201718292101e-05, "loss": 0.3971, "step": 1109 }, { "epoch": 0.5689390056381343, "grad_norm": 0.9660635614159143, "learning_rate": 1.679592904154489e-05, "loss": 0.4361, "step": 1110 }, { "epoch": 0.5694515633008713, "grad_norm": 0.9801486327500545, "learning_rate": 1.6789836215851525e-05, "loss": 0.4848, "step": 1111 }, { "epoch": 0.5699641209636084, "grad_norm": 0.921659928979543, "learning_rate": 1.6783738710040588e-05, "loss": 0.4813, "step": 1112 }, { "epoch": 0.5704766786263454, "grad_norm": 0.9025512263050715, "learning_rate": 1.677763652831498e-05, "loss": 0.4644, "step": 1113 }, { "epoch": 0.5709892362890825, "grad_norm": 0.8675505934715653, "learning_rate": 1.677152967488084e-05, "loss": 0.4565, "step": 1114 }, { "epoch": 0.5715017939518195, "grad_norm": 0.9096291893083325, "learning_rate": 1.676541815394751e-05, "loss": 0.3875, "step": 1115 }, { "epoch": 0.5720143516145566, "grad_norm": 0.8962173734930268, "learning_rate": 1.675930196972756e-05, "loss": 0.4283, "step": 1116 }, { "epoch": 0.5725269092772937, "grad_norm": 0.8485754144209376, "learning_rate": 1.675318112643677e-05, "loss": 0.4618, "step": 1117 }, { "epoch": 0.5730394669400307, "grad_norm": 0.8927635648531628, "learning_rate": 1.6747055628294134e-05, "loss": 0.4082, "step": 1118 }, { "epoch": 0.5735520246027678, "grad_norm": 0.9007312589759627, "learning_rate": 1.6740925479521844e-05, "loss": 0.4599, "step": 1119 }, { "epoch": 0.5740645822655048, "grad_norm": 0.8382062932491168, "learning_rate": 1.6734790684345322e-05, "loss": 0.4631, "step": 1120 }, { "epoch": 0.5745771399282419, "grad_norm": 0.9076444074607072, "learning_rate": 1.6728651246993165e-05, "loss": 0.4187, "step": 1121 }, { "epoch": 0.5750896975909789, "grad_norm": 0.962652149502678, "learning_rate": 1.6722507171697184e-05, "loss": 0.4854, "step": 1122 }, { "epoch": 0.575602255253716, "grad_norm": 0.9327522717594626, "learning_rate": 1.671635846269239e-05, "loss": 0.454, "step": 1123 }, { "epoch": 0.5761148129164531, "grad_norm": 0.8946772005924849, "learning_rate": 1.6710205124216973e-05, "loss": 0.4882, "step": 1124 }, { "epoch": 0.5766273705791901, "grad_norm": 0.9178960461867215, "learning_rate": 1.6704047160512333e-05, "loss": 0.4695, "step": 1125 }, { "epoch": 0.5771399282419272, "grad_norm": 0.9090685388820532, "learning_rate": 1.669788457582304e-05, "loss": 0.4911, "step": 1126 }, { "epoch": 0.5776524859046642, "grad_norm": 0.9715779530096776, "learning_rate": 1.669171737439687e-05, "loss": 0.4737, "step": 1127 }, { "epoch": 0.5781650435674013, "grad_norm": 0.9517271104443367, "learning_rate": 1.668554556048476e-05, "loss": 0.5014, "step": 1128 }, { "epoch": 0.5786776012301383, "grad_norm": 0.8906644238597621, "learning_rate": 1.6679369138340833e-05, "loss": 0.468, "step": 1129 }, { "epoch": 0.5791901588928754, "grad_norm": 0.8738803094639239, "learning_rate": 1.6673188112222394e-05, "loss": 0.4962, "step": 1130 }, { "epoch": 0.5797027165556125, "grad_norm": 0.8979118842199485, "learning_rate": 1.6667002486389924e-05, "loss": 0.4364, "step": 1131 }, { "epoch": 0.5802152742183496, "grad_norm": 0.9438151445653253, "learning_rate": 1.666081226510706e-05, "loss": 0.4086, "step": 1132 }, { "epoch": 0.5807278318810867, "grad_norm": 0.8997143331403848, "learning_rate": 1.6654617452640622e-05, "loss": 0.4626, "step": 1133 }, { "epoch": 0.5812403895438237, "grad_norm": 0.9154364355330821, "learning_rate": 1.6648418053260585e-05, "loss": 0.4604, "step": 1134 }, { "epoch": 0.5817529472065608, "grad_norm": 0.9252903348062834, "learning_rate": 1.6642214071240085e-05, "loss": 0.4391, "step": 1135 }, { "epoch": 0.5822655048692978, "grad_norm": 0.8984448409922403, "learning_rate": 1.6636005510855425e-05, "loss": 0.4234, "step": 1136 }, { "epoch": 0.5827780625320349, "grad_norm": 0.8467722459544195, "learning_rate": 1.662979237638606e-05, "loss": 0.4224, "step": 1137 }, { "epoch": 0.583290620194772, "grad_norm": 0.9489995522771373, "learning_rate": 1.6623574672114596e-05, "loss": 0.459, "step": 1138 }, { "epoch": 0.583803177857509, "grad_norm": 1.0051392109263895, "learning_rate": 1.661735240232679e-05, "loss": 0.5044, "step": 1139 }, { "epoch": 0.5843157355202461, "grad_norm": 0.9152196693370034, "learning_rate": 1.661112557131154e-05, "loss": 0.443, "step": 1140 }, { "epoch": 0.5848282931829831, "grad_norm": 0.8962870070568475, "learning_rate": 1.66048941833609e-05, "loss": 0.4443, "step": 1141 }, { "epoch": 0.5853408508457202, "grad_norm": 0.921893240625102, "learning_rate": 1.6598658242770054e-05, "loss": 0.439, "step": 1142 }, { "epoch": 0.5858534085084572, "grad_norm": 0.847946959307802, "learning_rate": 1.6592417753837338e-05, "loss": 0.4941, "step": 1143 }, { "epoch": 0.5863659661711943, "grad_norm": 0.8245601541147747, "learning_rate": 1.6586172720864206e-05, "loss": 0.4393, "step": 1144 }, { "epoch": 0.5868785238339314, "grad_norm": 1.0114181285969446, "learning_rate": 1.657992314815525e-05, "loss": 0.4688, "step": 1145 }, { "epoch": 0.5873910814966684, "grad_norm": 0.9613866172605672, "learning_rate": 1.6573669040018202e-05, "loss": 0.4581, "step": 1146 }, { "epoch": 0.5879036391594055, "grad_norm": 0.8698439606873462, "learning_rate": 1.6567410400763902e-05, "loss": 0.429, "step": 1147 }, { "epoch": 0.5884161968221425, "grad_norm": 0.8577769476543113, "learning_rate": 1.6561147234706333e-05, "loss": 0.4269, "step": 1148 }, { "epoch": 0.5889287544848796, "grad_norm": 0.9567867322738912, "learning_rate": 1.6554879546162578e-05, "loss": 0.4665, "step": 1149 }, { "epoch": 0.5894413121476166, "grad_norm": 0.9573678961354939, "learning_rate": 1.6548607339452853e-05, "loss": 0.4342, "step": 1150 }, { "epoch": 0.5899538698103537, "grad_norm": 0.978826477401949, "learning_rate": 1.654233061890048e-05, "loss": 0.4719, "step": 1151 }, { "epoch": 0.5904664274730907, "grad_norm": 1.0515943065582956, "learning_rate": 1.6536049388831897e-05, "loss": 0.4344, "step": 1152 }, { "epoch": 0.5909789851358278, "grad_norm": 0.9298684928099703, "learning_rate": 1.652976365357664e-05, "loss": 0.4574, "step": 1153 }, { "epoch": 0.5914915427985649, "grad_norm": 0.924577102558725, "learning_rate": 1.652347341746737e-05, "loss": 0.484, "step": 1154 }, { "epoch": 0.5920041004613019, "grad_norm": 0.8704871837307129, "learning_rate": 1.6517178684839834e-05, "loss": 0.4346, "step": 1155 }, { "epoch": 0.592516658124039, "grad_norm": 0.8779210118712382, "learning_rate": 1.651087946003288e-05, "loss": 0.4836, "step": 1156 }, { "epoch": 0.593029215786776, "grad_norm": 0.909575886769677, "learning_rate": 1.6504575747388462e-05, "loss": 0.4615, "step": 1157 }, { "epoch": 0.5935417734495131, "grad_norm": 0.9065946017567068, "learning_rate": 1.6498267551251618e-05, "loss": 0.4674, "step": 1158 }, { "epoch": 0.5940543311122501, "grad_norm": 0.9291709252554413, "learning_rate": 1.6491954875970474e-05, "loss": 0.4373, "step": 1159 }, { "epoch": 0.5945668887749872, "grad_norm": 0.9022069106152883, "learning_rate": 1.6485637725896256e-05, "loss": 0.4695, "step": 1160 }, { "epoch": 0.5950794464377243, "grad_norm": 0.9283995142859905, "learning_rate": 1.6479316105383266e-05, "loss": 0.4723, "step": 1161 }, { "epoch": 0.5955920041004613, "grad_norm": 0.8945284780364021, "learning_rate": 1.6472990018788884e-05, "loss": 0.4693, "step": 1162 }, { "epoch": 0.5961045617631984, "grad_norm": 0.9204483689090491, "learning_rate": 1.646665947047358e-05, "loss": 0.4896, "step": 1163 }, { "epoch": 0.5966171194259354, "grad_norm": 0.8959717625454096, "learning_rate": 1.6460324464800884e-05, "loss": 0.4381, "step": 1164 }, { "epoch": 0.5971296770886725, "grad_norm": 0.8743659973623036, "learning_rate": 1.6453985006137412e-05, "loss": 0.4471, "step": 1165 }, { "epoch": 0.5976422347514095, "grad_norm": 0.908641094339923, "learning_rate": 1.644764109885284e-05, "loss": 0.4822, "step": 1166 }, { "epoch": 0.5981547924141466, "grad_norm": 0.9734324982534694, "learning_rate": 1.6441292747319923e-05, "loss": 0.5126, "step": 1167 }, { "epoch": 0.5986673500768837, "grad_norm": 0.860687022861412, "learning_rate": 1.6434939955914466e-05, "loss": 0.4757, "step": 1168 }, { "epoch": 0.5991799077396207, "grad_norm": 0.8864543531901822, "learning_rate": 1.6428582729015333e-05, "loss": 0.4769, "step": 1169 }, { "epoch": 0.5996924654023578, "grad_norm": 0.9617649832770582, "learning_rate": 1.642222107100446e-05, "loss": 0.4652, "step": 1170 }, { "epoch": 0.6002050230650948, "grad_norm": 0.9402948878708643, "learning_rate": 1.6415854986266824e-05, "loss": 0.5095, "step": 1171 }, { "epoch": 0.6007175807278319, "grad_norm": 0.9571934966181341, "learning_rate": 1.6409484479190457e-05, "loss": 0.497, "step": 1172 }, { "epoch": 0.6012301383905689, "grad_norm": 0.9206089974099105, "learning_rate": 1.6403109554166447e-05, "loss": 0.4434, "step": 1173 }, { "epoch": 0.601742696053306, "grad_norm": 0.9162808502700993, "learning_rate": 1.6396730215588913e-05, "loss": 0.5011, "step": 1174 }, { "epoch": 0.602255253716043, "grad_norm": 0.9113627277474995, "learning_rate": 1.6390346467855027e-05, "loss": 0.4474, "step": 1175 }, { "epoch": 0.6027678113787801, "grad_norm": 0.9950991155662605, "learning_rate": 1.6383958315365e-05, "loss": 0.465, "step": 1176 }, { "epoch": 0.6032803690415172, "grad_norm": 0.9908937715825319, "learning_rate": 1.6377565762522066e-05, "loss": 0.4736, "step": 1177 }, { "epoch": 0.6037929267042542, "grad_norm": 0.9146986945871793, "learning_rate": 1.6371168813732514e-05, "loss": 0.478, "step": 1178 }, { "epoch": 0.6043054843669913, "grad_norm": 0.8940475429575373, "learning_rate": 1.636476747340564e-05, "loss": 0.4569, "step": 1179 }, { "epoch": 0.6048180420297283, "grad_norm": 0.8913558465821522, "learning_rate": 1.6358361745953787e-05, "loss": 0.4394, "step": 1180 }, { "epoch": 0.6053305996924654, "grad_norm": 0.938460290457955, "learning_rate": 1.6351951635792306e-05, "loss": 0.4759, "step": 1181 }, { "epoch": 0.6058431573552024, "grad_norm": 1.0133844914638865, "learning_rate": 1.6345537147339578e-05, "loss": 0.5046, "step": 1182 }, { "epoch": 0.6063557150179395, "grad_norm": 0.8855799873597683, "learning_rate": 1.6339118285017005e-05, "loss": 0.4249, "step": 1183 }, { "epoch": 0.6068682726806766, "grad_norm": 0.9143880959226786, "learning_rate": 1.633269505324899e-05, "loss": 0.4833, "step": 1184 }, { "epoch": 0.6073808303434136, "grad_norm": 0.9662254236803384, "learning_rate": 1.6326267456462965e-05, "loss": 0.4333, "step": 1185 }, { "epoch": 0.6078933880061507, "grad_norm": 0.9537548478839504, "learning_rate": 1.6319835499089358e-05, "loss": 0.4867, "step": 1186 }, { "epoch": 0.6084059456688877, "grad_norm": 0.9038844072819208, "learning_rate": 1.63133991855616e-05, "loss": 0.4506, "step": 1187 }, { "epoch": 0.6089185033316248, "grad_norm": 1.0298688582764988, "learning_rate": 1.6306958520316144e-05, "loss": 0.5203, "step": 1188 }, { "epoch": 0.6094310609943618, "grad_norm": 0.857576759113156, "learning_rate": 1.6300513507792426e-05, "loss": 0.4552, "step": 1189 }, { "epoch": 0.6099436186570989, "grad_norm": 0.8677988084655195, "learning_rate": 1.6294064152432878e-05, "loss": 0.4474, "step": 1190 }, { "epoch": 0.610456176319836, "grad_norm": 0.9220562872436886, "learning_rate": 1.628761045868293e-05, "loss": 0.4468, "step": 1191 }, { "epoch": 0.610968733982573, "grad_norm": 0.9660477441258849, "learning_rate": 1.6281152430991012e-05, "loss": 0.4882, "step": 1192 }, { "epoch": 0.6114812916453101, "grad_norm": 0.9490254268564983, "learning_rate": 1.627469007380852e-05, "loss": 0.4537, "step": 1193 }, { "epoch": 0.6119938493080471, "grad_norm": 0.9598269578995223, "learning_rate": 1.626822339158985e-05, "loss": 0.4919, "step": 1194 }, { "epoch": 0.6125064069707842, "grad_norm": 0.9291787216426663, "learning_rate": 1.626175238879238e-05, "loss": 0.4639, "step": 1195 }, { "epoch": 0.6130189646335212, "grad_norm": 0.9483491301688327, "learning_rate": 1.6255277069876454e-05, "loss": 0.4897, "step": 1196 }, { "epoch": 0.6135315222962583, "grad_norm": 0.9882921395054811, "learning_rate": 1.6248797439305403e-05, "loss": 0.4889, "step": 1197 }, { "epoch": 0.6140440799589953, "grad_norm": 0.9921889613454642, "learning_rate": 1.6242313501545522e-05, "loss": 0.4263, "step": 1198 }, { "epoch": 0.6145566376217324, "grad_norm": 0.8383635862551237, "learning_rate": 1.6235825261066078e-05, "loss": 0.4135, "step": 1199 }, { "epoch": 0.6150691952844695, "grad_norm": 0.9150292411516748, "learning_rate": 1.6229332722339308e-05, "loss": 0.4513, "step": 1200 }, { "epoch": 0.6155817529472065, "grad_norm": 0.9363986850783318, "learning_rate": 1.6222835889840402e-05, "loss": 0.4522, "step": 1201 }, { "epoch": 0.6160943106099436, "grad_norm": 0.9520142280701889, "learning_rate": 1.621633476804752e-05, "loss": 0.4346, "step": 1202 }, { "epoch": 0.6166068682726806, "grad_norm": 0.8412208438041192, "learning_rate": 1.6209829361441767e-05, "loss": 0.4933, "step": 1203 }, { "epoch": 0.6171194259354177, "grad_norm": 0.8407534708102508, "learning_rate": 1.6203319674507216e-05, "loss": 0.4635, "step": 1204 }, { "epoch": 0.6176319835981547, "grad_norm": 0.8917869487136492, "learning_rate": 1.619680571173088e-05, "loss": 0.4807, "step": 1205 }, { "epoch": 0.6181445412608918, "grad_norm": 0.8654066167258412, "learning_rate": 1.6190287477602716e-05, "loss": 0.4134, "step": 1206 }, { "epoch": 0.6186570989236289, "grad_norm": 0.9175902908475241, "learning_rate": 1.618376497661564e-05, "loss": 0.4836, "step": 1207 }, { "epoch": 0.6191696565863659, "grad_norm": 0.9151929181073556, "learning_rate": 1.6177238213265497e-05, "loss": 0.465, "step": 1208 }, { "epoch": 0.619682214249103, "grad_norm": 0.9472008287086185, "learning_rate": 1.6170707192051067e-05, "loss": 0.4622, "step": 1209 }, { "epoch": 0.6201947719118401, "grad_norm": 0.8718106990453806, "learning_rate": 1.6164171917474078e-05, "loss": 0.5076, "step": 1210 }, { "epoch": 0.6207073295745772, "grad_norm": 0.9506825233012693, "learning_rate": 1.615763239403918e-05, "loss": 0.4316, "step": 1211 }, { "epoch": 0.6212198872373142, "grad_norm": 0.8555318339695687, "learning_rate": 1.6151088626253954e-05, "loss": 0.4536, "step": 1212 }, { "epoch": 0.6217324449000513, "grad_norm": 0.8455737917933079, "learning_rate": 1.6144540618628907e-05, "loss": 0.4004, "step": 1213 }, { "epoch": 0.6222450025627884, "grad_norm": 0.8978474943623675, "learning_rate": 1.6137988375677466e-05, "loss": 0.4339, "step": 1214 }, { "epoch": 0.6227575602255254, "grad_norm": 0.937910556835394, "learning_rate": 1.6131431901915987e-05, "loss": 0.4695, "step": 1215 }, { "epoch": 0.6232701178882625, "grad_norm": 0.9109897213711855, "learning_rate": 1.612487120186373e-05, "loss": 0.4447, "step": 1216 }, { "epoch": 0.6237826755509995, "grad_norm": 0.9688643173804197, "learning_rate": 1.611830628004287e-05, "loss": 0.492, "step": 1217 }, { "epoch": 0.6242952332137366, "grad_norm": 0.9114808628448079, "learning_rate": 1.6111737140978495e-05, "loss": 0.4385, "step": 1218 }, { "epoch": 0.6248077908764736, "grad_norm": 0.906254916211885, "learning_rate": 1.6105163789198607e-05, "loss": 0.4874, "step": 1219 }, { "epoch": 0.6253203485392107, "grad_norm": 0.9721538470000437, "learning_rate": 1.6098586229234096e-05, "loss": 0.5209, "step": 1220 }, { "epoch": 0.6258329062019478, "grad_norm": 0.9682068868523448, "learning_rate": 1.609200446561877e-05, "loss": 0.4809, "step": 1221 }, { "epoch": 0.6263454638646848, "grad_norm": 1.015943274360847, "learning_rate": 1.6085418502889315e-05, "loss": 0.482, "step": 1222 }, { "epoch": 0.6268580215274219, "grad_norm": 0.906440736041673, "learning_rate": 1.6078828345585332e-05, "loss": 0.4494, "step": 1223 }, { "epoch": 0.6273705791901589, "grad_norm": 1.0812427580042376, "learning_rate": 1.6072233998249303e-05, "loss": 0.4845, "step": 1224 }, { "epoch": 0.627883136852896, "grad_norm": 0.904495943599072, "learning_rate": 1.606563546542659e-05, "loss": 0.4268, "step": 1225 }, { "epoch": 0.628395694515633, "grad_norm": 0.9878428832519979, "learning_rate": 1.6059032751665454e-05, "loss": 0.5413, "step": 1226 }, { "epoch": 0.6289082521783701, "grad_norm": 0.9615792127208889, "learning_rate": 1.6052425861517035e-05, "loss": 0.5126, "step": 1227 }, { "epoch": 0.6294208098411072, "grad_norm": 0.9442669365562686, "learning_rate": 1.604581479953534e-05, "loss": 0.424, "step": 1228 }, { "epoch": 0.6299333675038442, "grad_norm": 0.94830654426329, "learning_rate": 1.603919957027727e-05, "loss": 0.4219, "step": 1229 }, { "epoch": 0.6304459251665813, "grad_norm": 0.8023480747730226, "learning_rate": 1.6032580178302585e-05, "loss": 0.3893, "step": 1230 }, { "epoch": 0.6309584828293183, "grad_norm": 0.902705751166374, "learning_rate": 1.6025956628173916e-05, "loss": 0.5588, "step": 1231 }, { "epoch": 0.6314710404920554, "grad_norm": 0.8410317959002195, "learning_rate": 1.6019328924456764e-05, "loss": 0.457, "step": 1232 }, { "epoch": 0.6319835981547924, "grad_norm": 0.9045898812037849, "learning_rate": 1.6012697071719498e-05, "loss": 0.4359, "step": 1233 }, { "epoch": 0.6324961558175295, "grad_norm": 0.9018529996170991, "learning_rate": 1.600606107453333e-05, "loss": 0.5012, "step": 1234 }, { "epoch": 0.6330087134802665, "grad_norm": 0.9166403144733308, "learning_rate": 1.599942093747234e-05, "loss": 0.3942, "step": 1235 }, { "epoch": 0.6335212711430036, "grad_norm": 0.889691453208496, "learning_rate": 1.599277666511347e-05, "loss": 0.4663, "step": 1236 }, { "epoch": 0.6340338288057407, "grad_norm": 0.8965824770930376, "learning_rate": 1.59861282620365e-05, "loss": 0.486, "step": 1237 }, { "epoch": 0.6345463864684777, "grad_norm": 0.9022131222328663, "learning_rate": 1.597947573282405e-05, "loss": 0.4604, "step": 1238 }, { "epoch": 0.6350589441312148, "grad_norm": 0.9302882161731634, "learning_rate": 1.5972819082061605e-05, "loss": 0.4755, "step": 1239 }, { "epoch": 0.6355715017939518, "grad_norm": 0.899427466882247, "learning_rate": 1.5966158314337472e-05, "loss": 0.4257, "step": 1240 }, { "epoch": 0.6360840594566889, "grad_norm": 0.9380082690058458, "learning_rate": 1.595949343424281e-05, "loss": 0.4293, "step": 1241 }, { "epoch": 0.636596617119426, "grad_norm": 0.8612464763651455, "learning_rate": 1.5952824446371608e-05, "loss": 0.456, "step": 1242 }, { "epoch": 0.637109174782163, "grad_norm": 0.8679755483141685, "learning_rate": 1.5946151355320675e-05, "loss": 0.4473, "step": 1243 }, { "epoch": 0.6376217324449001, "grad_norm": 0.9581370420553723, "learning_rate": 1.593947416568967e-05, "loss": 0.4865, "step": 1244 }, { "epoch": 0.6381342901076371, "grad_norm": 0.937166514494376, "learning_rate": 1.5932792882081062e-05, "loss": 0.504, "step": 1245 }, { "epoch": 0.6386468477703742, "grad_norm": 0.9259991790885805, "learning_rate": 1.592610750910014e-05, "loss": 0.4524, "step": 1246 }, { "epoch": 0.6391594054331112, "grad_norm": 0.909542176000842, "learning_rate": 1.591941805135502e-05, "loss": 0.4837, "step": 1247 }, { "epoch": 0.6396719630958483, "grad_norm": 0.8640872467046399, "learning_rate": 1.591272451345663e-05, "loss": 0.3953, "step": 1248 }, { "epoch": 0.6401845207585853, "grad_norm": 0.8897197782749182, "learning_rate": 1.5906026900018708e-05, "loss": 0.4813, "step": 1249 }, { "epoch": 0.6406970784213224, "grad_norm": 0.9497728426243374, "learning_rate": 1.589932521565781e-05, "loss": 0.4825, "step": 1250 }, { "epoch": 0.6412096360840595, "grad_norm": 0.973823801817427, "learning_rate": 1.589261946499329e-05, "loss": 0.4542, "step": 1251 }, { "epoch": 0.6417221937467965, "grad_norm": 0.9205131625653175, "learning_rate": 1.588590965264731e-05, "loss": 0.4543, "step": 1252 }, { "epoch": 0.6422347514095336, "grad_norm": 0.853762826577436, "learning_rate": 1.587919578324482e-05, "loss": 0.4884, "step": 1253 }, { "epoch": 0.6427473090722706, "grad_norm": 0.9679228699174188, "learning_rate": 1.587247786141358e-05, "loss": 0.4592, "step": 1254 }, { "epoch": 0.6432598667350077, "grad_norm": 0.8865719469755909, "learning_rate": 1.5865755891784137e-05, "loss": 0.4605, "step": 1255 }, { "epoch": 0.6437724243977447, "grad_norm": 0.8766423526238362, "learning_rate": 1.5859029878989828e-05, "loss": 0.3938, "step": 1256 }, { "epoch": 0.6442849820604818, "grad_norm": 0.8597420516404074, "learning_rate": 1.585229982766678e-05, "loss": 0.4143, "step": 1257 }, { "epoch": 0.6447975397232188, "grad_norm": 1.0009195837776894, "learning_rate": 1.5845565742453906e-05, "loss": 0.484, "step": 1258 }, { "epoch": 0.6453100973859559, "grad_norm": 0.8477049880422104, "learning_rate": 1.583882762799289e-05, "loss": 0.4527, "step": 1259 }, { "epoch": 0.645822655048693, "grad_norm": 1.0058365070923694, "learning_rate": 1.58320854889282e-05, "loss": 0.4899, "step": 1260 }, { "epoch": 0.64633521271143, "grad_norm": 0.9380768759193489, "learning_rate": 1.5825339329907074e-05, "loss": 0.484, "step": 1261 }, { "epoch": 0.6468477703741671, "grad_norm": 0.9045170708891165, "learning_rate": 1.581858915557953e-05, "loss": 0.4687, "step": 1262 }, { "epoch": 0.6473603280369041, "grad_norm": 0.8293998652924258, "learning_rate": 1.581183497059834e-05, "loss": 0.4405, "step": 1263 }, { "epoch": 0.6478728856996412, "grad_norm": 0.8467009293859311, "learning_rate": 1.580507677961906e-05, "loss": 0.4731, "step": 1264 }, { "epoch": 0.6483854433623782, "grad_norm": 0.9979126487988191, "learning_rate": 1.5798314587299976e-05, "loss": 0.4618, "step": 1265 }, { "epoch": 0.6488980010251153, "grad_norm": 0.8667696443979837, "learning_rate": 1.5791548398302167e-05, "loss": 0.43, "step": 1266 }, { "epoch": 0.6494105586878524, "grad_norm": 0.8812210236420472, "learning_rate": 1.578477821728944e-05, "loss": 0.475, "step": 1267 }, { "epoch": 0.6499231163505894, "grad_norm": 0.8709846718182546, "learning_rate": 1.577800404892838e-05, "loss": 0.4564, "step": 1268 }, { "epoch": 0.6504356740133265, "grad_norm": 0.8754491808705402, "learning_rate": 1.5771225897888288e-05, "loss": 0.4363, "step": 1269 }, { "epoch": 0.6509482316760635, "grad_norm": 0.8663328798530866, "learning_rate": 1.5764443768841234e-05, "loss": 0.484, "step": 1270 }, { "epoch": 0.6514607893388006, "grad_norm": 0.9288855057110712, "learning_rate": 1.575765766646203e-05, "loss": 0.4755, "step": 1271 }, { "epoch": 0.6519733470015376, "grad_norm": 0.868848308679757, "learning_rate": 1.5750867595428205e-05, "loss": 0.3951, "step": 1272 }, { "epoch": 0.6524859046642747, "grad_norm": 0.923647719334437, "learning_rate": 1.574407356042005e-05, "loss": 0.484, "step": 1273 }, { "epoch": 0.6529984623270118, "grad_norm": 0.8639691016796397, "learning_rate": 1.5737275566120577e-05, "loss": 0.4879, "step": 1274 }, { "epoch": 0.6535110199897488, "grad_norm": 0.9457490234044517, "learning_rate": 1.573047361721552e-05, "loss": 0.466, "step": 1275 }, { "epoch": 0.6540235776524859, "grad_norm": 0.8942986352191705, "learning_rate": 1.572366771839335e-05, "loss": 0.4269, "step": 1276 }, { "epoch": 0.6545361353152229, "grad_norm": 0.8342148554912603, "learning_rate": 1.5716857874345258e-05, "loss": 0.3972, "step": 1277 }, { "epoch": 0.65504869297796, "grad_norm": 0.8887511443986879, "learning_rate": 1.5710044089765144e-05, "loss": 0.4337, "step": 1278 }, { "epoch": 0.655561250640697, "grad_norm": 0.9647300907070918, "learning_rate": 1.5703226369349642e-05, "loss": 0.5312, "step": 1279 }, { "epoch": 0.6560738083034341, "grad_norm": 0.9537602136969644, "learning_rate": 1.5696404717798087e-05, "loss": 0.4447, "step": 1280 }, { "epoch": 0.6565863659661711, "grad_norm": 0.8741646709964525, "learning_rate": 1.5689579139812524e-05, "loss": 0.5137, "step": 1281 }, { "epoch": 0.6570989236289082, "grad_norm": 0.8882761525717575, "learning_rate": 1.5682749640097708e-05, "loss": 0.4561, "step": 1282 }, { "epoch": 0.6576114812916453, "grad_norm": 0.9902188779984976, "learning_rate": 1.5675916223361095e-05, "loss": 0.4404, "step": 1283 }, { "epoch": 0.6581240389543823, "grad_norm": 0.8988817470027421, "learning_rate": 1.5669078894312848e-05, "loss": 0.4692, "step": 1284 }, { "epoch": 0.6586365966171194, "grad_norm": 0.8773960946233608, "learning_rate": 1.566223765766581e-05, "loss": 0.4283, "step": 1285 }, { "epoch": 0.6591491542798564, "grad_norm": 0.9212265939896053, "learning_rate": 1.565539251813554e-05, "loss": 0.5033, "step": 1286 }, { "epoch": 0.6596617119425935, "grad_norm": 0.9080084798647355, "learning_rate": 1.5648543480440264e-05, "loss": 0.523, "step": 1287 }, { "epoch": 0.6601742696053307, "grad_norm": 0.8202946720423246, "learning_rate": 1.5641690549300913e-05, "loss": 0.4555, "step": 1288 }, { "epoch": 0.6606868272680677, "grad_norm": 0.9205506514298872, "learning_rate": 1.5634833729441093e-05, "loss": 0.4675, "step": 1289 }, { "epoch": 0.6611993849308048, "grad_norm": 0.9095114229603197, "learning_rate": 1.5627973025587093e-05, "loss": 0.4623, "step": 1290 }, { "epoch": 0.6617119425935418, "grad_norm": 0.8513585376449007, "learning_rate": 1.5621108442467877e-05, "loss": 0.4518, "step": 1291 }, { "epoch": 0.6622245002562789, "grad_norm": 1.0051334057630161, "learning_rate": 1.5614239984815084e-05, "loss": 0.4161, "step": 1292 }, { "epoch": 0.6627370579190159, "grad_norm": 0.930697959814759, "learning_rate": 1.5607367657363028e-05, "loss": 0.4562, "step": 1293 }, { "epoch": 0.663249615581753, "grad_norm": 0.8558979364125657, "learning_rate": 1.560049146484868e-05, "loss": 0.4512, "step": 1294 }, { "epoch": 0.66376217324449, "grad_norm": 0.9668412125806919, "learning_rate": 1.5593611412011685e-05, "loss": 0.4387, "step": 1295 }, { "epoch": 0.6642747309072271, "grad_norm": 0.890770585633996, "learning_rate": 1.5586727503594347e-05, "loss": 0.4592, "step": 1296 }, { "epoch": 0.6647872885699642, "grad_norm": 0.8206619848320351, "learning_rate": 1.557983974434162e-05, "loss": 0.4278, "step": 1297 }, { "epoch": 0.6652998462327012, "grad_norm": 0.8430868092459644, "learning_rate": 1.5572948139001128e-05, "loss": 0.4131, "step": 1298 }, { "epoch": 0.6658124038954383, "grad_norm": 0.9638014176120561, "learning_rate": 1.5566052692323127e-05, "loss": 0.4451, "step": 1299 }, { "epoch": 0.6663249615581753, "grad_norm": 0.8910095136702263, "learning_rate": 1.5559153409060535e-05, "loss": 0.4751, "step": 1300 }, { "epoch": 0.6668375192209124, "grad_norm": 0.9108219482239749, "learning_rate": 1.5552250293968903e-05, "loss": 0.4723, "step": 1301 }, { "epoch": 0.6673500768836494, "grad_norm": 0.8858603902406628, "learning_rate": 1.5545343351806443e-05, "loss": 0.4432, "step": 1302 }, { "epoch": 0.6678626345463865, "grad_norm": 0.9122133701247528, "learning_rate": 1.553843258733398e-05, "loss": 0.4738, "step": 1303 }, { "epoch": 0.6683751922091236, "grad_norm": 0.9413808761719642, "learning_rate": 1.5531518005314987e-05, "loss": 0.4297, "step": 1304 }, { "epoch": 0.6688877498718606, "grad_norm": 0.9036798821365156, "learning_rate": 1.552459961051557e-05, "loss": 0.4277, "step": 1305 }, { "epoch": 0.6694003075345977, "grad_norm": 0.9095499620162153, "learning_rate": 1.551767740770446e-05, "loss": 0.4962, "step": 1306 }, { "epoch": 0.6699128651973347, "grad_norm": 0.909604573714484, "learning_rate": 1.551075140165301e-05, "loss": 0.419, "step": 1307 }, { "epoch": 0.6704254228600718, "grad_norm": 0.8853964239849762, "learning_rate": 1.55038215971352e-05, "loss": 0.4449, "step": 1308 }, { "epoch": 0.6709379805228088, "grad_norm": 0.891717272088988, "learning_rate": 1.549688799892762e-05, "loss": 0.4135, "step": 1309 }, { "epoch": 0.6714505381855459, "grad_norm": 0.8872402347516295, "learning_rate": 1.5489950611809484e-05, "loss": 0.4388, "step": 1310 }, { "epoch": 0.671963095848283, "grad_norm": 0.8664803230770733, "learning_rate": 1.5483009440562617e-05, "loss": 0.439, "step": 1311 }, { "epoch": 0.67247565351102, "grad_norm": 0.894623439895757, "learning_rate": 1.5476064489971444e-05, "loss": 0.4551, "step": 1312 }, { "epoch": 0.6729882111737571, "grad_norm": 0.9431770662180295, "learning_rate": 1.5469115764823e-05, "loss": 0.5224, "step": 1313 }, { "epoch": 0.6735007688364941, "grad_norm": 0.8366922542594898, "learning_rate": 1.5462163269906928e-05, "loss": 0.415, "step": 1314 }, { "epoch": 0.6740133264992312, "grad_norm": 0.9077384781325305, "learning_rate": 1.5455207010015456e-05, "loss": 0.4779, "step": 1315 }, { "epoch": 0.6745258841619682, "grad_norm": 0.9560405628890077, "learning_rate": 1.544824698994342e-05, "loss": 0.4606, "step": 1316 }, { "epoch": 0.6750384418247053, "grad_norm": 0.9274469441982769, "learning_rate": 1.544128321448824e-05, "loss": 0.4466, "step": 1317 }, { "epoch": 0.6755509994874423, "grad_norm": 1.0002953001301065, "learning_rate": 1.5434315688449924e-05, "loss": 0.4812, "step": 1318 }, { "epoch": 0.6760635571501794, "grad_norm": 0.9116350073310184, "learning_rate": 1.5427344416631076e-05, "loss": 0.4413, "step": 1319 }, { "epoch": 0.6765761148129165, "grad_norm": 0.834355504166229, "learning_rate": 1.5420369403836867e-05, "loss": 0.407, "step": 1320 }, { "epoch": 0.6770886724756535, "grad_norm": 0.9452237195014861, "learning_rate": 1.541339065487506e-05, "loss": 0.4569, "step": 1321 }, { "epoch": 0.6776012301383906, "grad_norm": 0.9350194706284697, "learning_rate": 1.5406408174555978e-05, "loss": 0.4061, "step": 1322 }, { "epoch": 0.6781137878011276, "grad_norm": 0.9375268507339789, "learning_rate": 1.539942196769253e-05, "loss": 0.4777, "step": 1323 }, { "epoch": 0.6786263454638647, "grad_norm": 0.8622641482014707, "learning_rate": 1.5392432039100192e-05, "loss": 0.4065, "step": 1324 }, { "epoch": 0.6791389031266017, "grad_norm": 0.8143801150494977, "learning_rate": 1.5385438393596994e-05, "loss": 0.459, "step": 1325 }, { "epoch": 0.6796514607893388, "grad_norm": 0.860174349970997, "learning_rate": 1.5378441036003543e-05, "loss": 0.4292, "step": 1326 }, { "epoch": 0.6801640184520759, "grad_norm": 0.9088335195069308, "learning_rate": 1.5371439971142993e-05, "loss": 0.492, "step": 1327 }, { "epoch": 0.6806765761148129, "grad_norm": 0.9369188423394075, "learning_rate": 1.5364435203841058e-05, "loss": 0.417, "step": 1328 }, { "epoch": 0.68118913377755, "grad_norm": 0.9176922574106111, "learning_rate": 1.5357426738926008e-05, "loss": 0.4342, "step": 1329 }, { "epoch": 0.681701691440287, "grad_norm": 0.9132737074119333, "learning_rate": 1.535041458122865e-05, "loss": 0.4969, "step": 1330 }, { "epoch": 0.6822142491030241, "grad_norm": 0.9055029157154578, "learning_rate": 1.5343398735582352e-05, "loss": 0.4441, "step": 1331 }, { "epoch": 0.6827268067657611, "grad_norm": 0.898591792122176, "learning_rate": 1.5336379206823013e-05, "loss": 0.4668, "step": 1332 }, { "epoch": 0.6832393644284982, "grad_norm": 0.8375799413106341, "learning_rate": 1.5329355999789074e-05, "loss": 0.4322, "step": 1333 }, { "epoch": 0.6837519220912353, "grad_norm": 0.9658014235003266, "learning_rate": 1.5322329119321508e-05, "loss": 0.4837, "step": 1334 }, { "epoch": 0.6842644797539723, "grad_norm": 0.9403701682041498, "learning_rate": 1.5315298570263827e-05, "loss": 0.4719, "step": 1335 }, { "epoch": 0.6847770374167094, "grad_norm": 0.9274473681493046, "learning_rate": 1.5308264357462068e-05, "loss": 0.4564, "step": 1336 }, { "epoch": 0.6852895950794464, "grad_norm": 0.8478711621027354, "learning_rate": 1.530122648576479e-05, "loss": 0.4449, "step": 1337 }, { "epoch": 0.6858021527421835, "grad_norm": 1.0111630638303881, "learning_rate": 1.529418496002308e-05, "loss": 0.5225, "step": 1338 }, { "epoch": 0.6863147104049205, "grad_norm": 0.9476800882992282, "learning_rate": 1.5287139785090534e-05, "loss": 0.5032, "step": 1339 }, { "epoch": 0.6868272680676576, "grad_norm": 0.9186952620059393, "learning_rate": 1.528009096582328e-05, "loss": 0.4826, "step": 1340 }, { "epoch": 0.6873398257303946, "grad_norm": 0.9037769602972556, "learning_rate": 1.527303850707994e-05, "loss": 0.516, "step": 1341 }, { "epoch": 0.6878523833931317, "grad_norm": 0.833221622071728, "learning_rate": 1.5265982413721662e-05, "loss": 0.4505, "step": 1342 }, { "epoch": 0.6883649410558688, "grad_norm": 0.8739220258012129, "learning_rate": 1.5258922690612085e-05, "loss": 0.4407, "step": 1343 }, { "epoch": 0.6888774987186058, "grad_norm": 0.9022592144860808, "learning_rate": 1.5251859342617353e-05, "loss": 0.4541, "step": 1344 }, { "epoch": 0.6893900563813429, "grad_norm": 0.8853601521644321, "learning_rate": 1.524479237460611e-05, "loss": 0.4142, "step": 1345 }, { "epoch": 0.6899026140440799, "grad_norm": 0.8958811462376671, "learning_rate": 1.5237721791449497e-05, "loss": 0.4643, "step": 1346 }, { "epoch": 0.690415171706817, "grad_norm": 0.797915124929151, "learning_rate": 1.5230647598021153e-05, "loss": 0.352, "step": 1347 }, { "epoch": 0.690927729369554, "grad_norm": 0.8707137769412089, "learning_rate": 1.5223569799197185e-05, "loss": 0.4015, "step": 1348 }, { "epoch": 0.6914402870322911, "grad_norm": 0.8893229860650371, "learning_rate": 1.5216488399856207e-05, "loss": 0.4776, "step": 1349 }, { "epoch": 0.6919528446950282, "grad_norm": 0.899710894940662, "learning_rate": 1.5209403404879305e-05, "loss": 0.4452, "step": 1350 }, { "epoch": 0.6924654023577652, "grad_norm": 0.905985472222968, "learning_rate": 1.5202314819150044e-05, "loss": 0.4987, "step": 1351 }, { "epoch": 0.6929779600205023, "grad_norm": 0.9392776083976845, "learning_rate": 1.5195222647554459e-05, "loss": 0.4484, "step": 1352 }, { "epoch": 0.6934905176832393, "grad_norm": 0.8492688905968816, "learning_rate": 1.5188126894981072e-05, "loss": 0.4112, "step": 1353 }, { "epoch": 0.6940030753459764, "grad_norm": 0.9485547939632318, "learning_rate": 1.5181027566320858e-05, "loss": 0.4525, "step": 1354 }, { "epoch": 0.6945156330087134, "grad_norm": 0.89176562369431, "learning_rate": 1.5173924666467259e-05, "loss": 0.4172, "step": 1355 }, { "epoch": 0.6950281906714505, "grad_norm": 0.8838167991540855, "learning_rate": 1.5166818200316192e-05, "loss": 0.4746, "step": 1356 }, { "epoch": 0.6955407483341876, "grad_norm": 0.87130204609265, "learning_rate": 1.515970817276601e-05, "loss": 0.4396, "step": 1357 }, { "epoch": 0.6960533059969246, "grad_norm": 0.9336564147341987, "learning_rate": 1.5152594588717544e-05, "loss": 0.4899, "step": 1358 }, { "epoch": 0.6965658636596617, "grad_norm": 0.9025977690279944, "learning_rate": 1.5145477453074056e-05, "loss": 0.4654, "step": 1359 }, { "epoch": 0.6970784213223987, "grad_norm": 0.9077612839470858, "learning_rate": 1.5138356770741274e-05, "loss": 0.4522, "step": 1360 }, { "epoch": 0.6975909789851358, "grad_norm": 0.9303530226786585, "learning_rate": 1.5131232546627355e-05, "loss": 0.5022, "step": 1361 }, { "epoch": 0.6981035366478728, "grad_norm": 0.9424639721009297, "learning_rate": 1.5124104785642909e-05, "loss": 0.4706, "step": 1362 }, { "epoch": 0.6986160943106099, "grad_norm": 0.9382142176871471, "learning_rate": 1.5116973492700977e-05, "loss": 0.4562, "step": 1363 }, { "epoch": 0.699128651973347, "grad_norm": 0.884706361461491, "learning_rate": 1.5109838672717034e-05, "loss": 0.4599, "step": 1364 }, { "epoch": 0.699641209636084, "grad_norm": 0.9452720412879557, "learning_rate": 1.5102700330609e-05, "loss": 0.486, "step": 1365 }, { "epoch": 0.7001537672988212, "grad_norm": 0.8298763781976533, "learning_rate": 1.5095558471297196e-05, "loss": 0.4122, "step": 1366 }, { "epoch": 0.7006663249615582, "grad_norm": 0.8857162658053951, "learning_rate": 1.5088413099704396e-05, "loss": 0.4792, "step": 1367 }, { "epoch": 0.7011788826242953, "grad_norm": 0.8300573442159485, "learning_rate": 1.5081264220755775e-05, "loss": 0.4303, "step": 1368 }, { "epoch": 0.7016914402870323, "grad_norm": 0.940034299366193, "learning_rate": 1.5074111839378938e-05, "loss": 0.4543, "step": 1369 }, { "epoch": 0.7022039979497694, "grad_norm": 0.8436537075389458, "learning_rate": 1.5066955960503893e-05, "loss": 0.466, "step": 1370 }, { "epoch": 0.7027165556125065, "grad_norm": 0.9822940787463411, "learning_rate": 1.505979658906307e-05, "loss": 0.478, "step": 1371 }, { "epoch": 0.7032291132752435, "grad_norm": 0.9295263467968957, "learning_rate": 1.5052633729991296e-05, "loss": 0.4955, "step": 1372 }, { "epoch": 0.7037416709379806, "grad_norm": 0.8801213739747926, "learning_rate": 1.5045467388225805e-05, "loss": 0.42, "step": 1373 }, { "epoch": 0.7042542286007176, "grad_norm": 0.9283432531669684, "learning_rate": 1.5038297568706244e-05, "loss": 0.4419, "step": 1374 }, { "epoch": 0.7047667862634547, "grad_norm": 0.9484942579879825, "learning_rate": 1.5031124276374633e-05, "loss": 0.5046, "step": 1375 }, { "epoch": 0.7052793439261917, "grad_norm": 0.8547487403286304, "learning_rate": 1.502394751617541e-05, "loss": 0.4204, "step": 1376 }, { "epoch": 0.7057919015889288, "grad_norm": 0.9524995558304029, "learning_rate": 1.5016767293055388e-05, "loss": 0.4395, "step": 1377 }, { "epoch": 0.7063044592516659, "grad_norm": 0.8987616121377578, "learning_rate": 1.5009583611963772e-05, "loss": 0.4274, "step": 1378 }, { "epoch": 0.7068170169144029, "grad_norm": 0.9120976442639426, "learning_rate": 1.5002396477852146e-05, "loss": 0.4678, "step": 1379 }, { "epoch": 0.70732957457714, "grad_norm": 0.916957304936867, "learning_rate": 1.4995205895674489e-05, "loss": 0.4665, "step": 1380 }, { "epoch": 0.707842132239877, "grad_norm": 0.8564325096713848, "learning_rate": 1.4988011870387138e-05, "loss": 0.451, "step": 1381 }, { "epoch": 0.7083546899026141, "grad_norm": 0.844957015746498, "learning_rate": 1.4980814406948806e-05, "loss": 0.4134, "step": 1382 }, { "epoch": 0.7088672475653511, "grad_norm": 0.903427110946416, "learning_rate": 1.4973613510320595e-05, "loss": 0.5212, "step": 1383 }, { "epoch": 0.7093798052280882, "grad_norm": 0.8708047735834151, "learning_rate": 1.4966409185465949e-05, "loss": 0.464, "step": 1384 }, { "epoch": 0.7098923628908252, "grad_norm": 0.8296953131665147, "learning_rate": 1.4959201437350688e-05, "loss": 0.4593, "step": 1385 }, { "epoch": 0.7104049205535623, "grad_norm": 0.8179822517865223, "learning_rate": 1.4951990270942991e-05, "loss": 0.4595, "step": 1386 }, { "epoch": 0.7109174782162994, "grad_norm": 0.8392061698627697, "learning_rate": 1.494477569121339e-05, "loss": 0.4174, "step": 1387 }, { "epoch": 0.7114300358790364, "grad_norm": 1.0732720216058516, "learning_rate": 1.4937557703134767e-05, "loss": 0.5136, "step": 1388 }, { "epoch": 0.7119425935417735, "grad_norm": 0.9584604581757673, "learning_rate": 1.4930336311682365e-05, "loss": 0.4835, "step": 1389 }, { "epoch": 0.7124551512045105, "grad_norm": 0.8420319643315609, "learning_rate": 1.492311152183376e-05, "loss": 0.4349, "step": 1390 }, { "epoch": 0.7129677088672476, "grad_norm": 0.812030384579919, "learning_rate": 1.4915883338568875e-05, "loss": 0.4291, "step": 1391 }, { "epoch": 0.7134802665299846, "grad_norm": 0.9454476513210072, "learning_rate": 1.4908651766869975e-05, "loss": 0.4411, "step": 1392 }, { "epoch": 0.7139928241927217, "grad_norm": 0.877901682283762, "learning_rate": 1.4901416811721658e-05, "loss": 0.3739, "step": 1393 }, { "epoch": 0.7145053818554588, "grad_norm": 0.8887619265728504, "learning_rate": 1.4894178478110856e-05, "loss": 0.4406, "step": 1394 }, { "epoch": 0.7150179395181958, "grad_norm": 0.885226011234759, "learning_rate": 1.4886936771026826e-05, "loss": 0.4264, "step": 1395 }, { "epoch": 0.7155304971809329, "grad_norm": 0.8760121562775207, "learning_rate": 1.4879691695461151e-05, "loss": 0.4572, "step": 1396 }, { "epoch": 0.7160430548436699, "grad_norm": 0.8524996177760635, "learning_rate": 1.487244325640774e-05, "loss": 0.4571, "step": 1397 }, { "epoch": 0.716555612506407, "grad_norm": 0.865679403201186, "learning_rate": 1.4865191458862816e-05, "loss": 0.4522, "step": 1398 }, { "epoch": 0.717068170169144, "grad_norm": 0.8824846923870371, "learning_rate": 1.4857936307824923e-05, "loss": 0.4227, "step": 1399 }, { "epoch": 0.7175807278318811, "grad_norm": 0.9178876729679788, "learning_rate": 1.4850677808294902e-05, "loss": 0.405, "step": 1400 }, { "epoch": 0.7180932854946181, "grad_norm": 0.830778865231899, "learning_rate": 1.484341596527592e-05, "loss": 0.4647, "step": 1401 }, { "epoch": 0.7186058431573552, "grad_norm": 0.8633327371619072, "learning_rate": 1.4836150783773442e-05, "loss": 0.4194, "step": 1402 }, { "epoch": 0.7191184008200923, "grad_norm": 0.8138530215774639, "learning_rate": 1.4828882268795228e-05, "loss": 0.4205, "step": 1403 }, { "epoch": 0.7196309584828293, "grad_norm": 0.8995191659846842, "learning_rate": 1.4821610425351343e-05, "loss": 0.4801, "step": 1404 }, { "epoch": 0.7201435161455664, "grad_norm": 0.7907148706196974, "learning_rate": 1.4814335258454144e-05, "loss": 0.3693, "step": 1405 }, { "epoch": 0.7206560738083034, "grad_norm": 0.8415837275475192, "learning_rate": 1.4807056773118276e-05, "loss": 0.4134, "step": 1406 }, { "epoch": 0.7211686314710405, "grad_norm": 0.8399881206988197, "learning_rate": 1.4799774974360679e-05, "loss": 0.426, "step": 1407 }, { "epoch": 0.7216811891337775, "grad_norm": 0.9597058487583235, "learning_rate": 1.479248986720057e-05, "loss": 0.4411, "step": 1408 }, { "epoch": 0.7221937467965146, "grad_norm": 0.9457443986898078, "learning_rate": 1.4785201456659446e-05, "loss": 0.4259, "step": 1409 }, { "epoch": 0.7227063044592517, "grad_norm": 0.9144176420050559, "learning_rate": 1.4777909747761085e-05, "loss": 0.4391, "step": 1410 }, { "epoch": 0.7232188621219887, "grad_norm": 0.8627824877517322, "learning_rate": 1.4770614745531538e-05, "loss": 0.4248, "step": 1411 }, { "epoch": 0.7237314197847258, "grad_norm": 0.899900755989583, "learning_rate": 1.4763316454999121e-05, "loss": 0.45, "step": 1412 }, { "epoch": 0.7242439774474628, "grad_norm": 0.8875357379774268, "learning_rate": 1.4756014881194422e-05, "loss": 0.4596, "step": 1413 }, { "epoch": 0.7247565351101999, "grad_norm": 0.9192772816956195, "learning_rate": 1.4748710029150296e-05, "loss": 0.4358, "step": 1414 }, { "epoch": 0.7252690927729369, "grad_norm": 0.8362069861551671, "learning_rate": 1.4741401903901843e-05, "loss": 0.4474, "step": 1415 }, { "epoch": 0.725781650435674, "grad_norm": 0.8120942598092469, "learning_rate": 1.4734090510486435e-05, "loss": 0.4154, "step": 1416 }, { "epoch": 0.726294208098411, "grad_norm": 0.9083704219748274, "learning_rate": 1.4726775853943688e-05, "loss": 0.4559, "step": 1417 }, { "epoch": 0.7268067657611481, "grad_norm": 0.9336425668942007, "learning_rate": 1.4719457939315468e-05, "loss": 0.4336, "step": 1418 }, { "epoch": 0.7273193234238852, "grad_norm": 0.8845112652495256, "learning_rate": 1.4712136771645891e-05, "loss": 0.4243, "step": 1419 }, { "epoch": 0.7278318810866222, "grad_norm": 0.9431083885904118, "learning_rate": 1.4704812355981308e-05, "loss": 0.4989, "step": 1420 }, { "epoch": 0.7283444387493593, "grad_norm": 0.8219753636239955, "learning_rate": 1.4697484697370317e-05, "loss": 0.4563, "step": 1421 }, { "epoch": 0.7288569964120963, "grad_norm": 0.8833243128389598, "learning_rate": 1.4690153800863743e-05, "loss": 0.4706, "step": 1422 }, { "epoch": 0.7293695540748334, "grad_norm": 0.8620857130420202, "learning_rate": 1.4682819671514654e-05, "loss": 0.404, "step": 1423 }, { "epoch": 0.7298821117375704, "grad_norm": 0.8499175987971481, "learning_rate": 1.4675482314378331e-05, "loss": 0.4448, "step": 1424 }, { "epoch": 0.7303946694003075, "grad_norm": 0.8574570660662237, "learning_rate": 1.4668141734512294e-05, "loss": 0.4266, "step": 1425 }, { "epoch": 0.7309072270630446, "grad_norm": 0.8190878236597356, "learning_rate": 1.4660797936976278e-05, "loss": 0.3998, "step": 1426 }, { "epoch": 0.7314197847257816, "grad_norm": 0.8031714886300098, "learning_rate": 1.4653450926832236e-05, "loss": 0.4348, "step": 1427 }, { "epoch": 0.7319323423885187, "grad_norm": 0.9553215791088997, "learning_rate": 1.4646100709144335e-05, "loss": 0.4754, "step": 1428 }, { "epoch": 0.7324449000512557, "grad_norm": 0.8596999962259559, "learning_rate": 1.4638747288978954e-05, "loss": 0.4609, "step": 1429 }, { "epoch": 0.7329574577139928, "grad_norm": 0.8626212997657781, "learning_rate": 1.4631390671404682e-05, "loss": 0.4719, "step": 1430 }, { "epoch": 0.7334700153767298, "grad_norm": 0.8448644155096082, "learning_rate": 1.4624030861492304e-05, "loss": 0.4436, "step": 1431 }, { "epoch": 0.7339825730394669, "grad_norm": 0.8713138861492717, "learning_rate": 1.461666786431482e-05, "loss": 0.4233, "step": 1432 }, { "epoch": 0.734495130702204, "grad_norm": 0.8326295433749018, "learning_rate": 1.4609301684947403e-05, "loss": 0.391, "step": 1433 }, { "epoch": 0.735007688364941, "grad_norm": 0.9952773215893286, "learning_rate": 1.460193232846745e-05, "loss": 0.4477, "step": 1434 }, { "epoch": 0.7355202460276781, "grad_norm": 0.8773958078784411, "learning_rate": 1.4594559799954523e-05, "loss": 0.4725, "step": 1435 }, { "epoch": 0.7360328036904151, "grad_norm": 0.868656068124141, "learning_rate": 1.4587184104490377e-05, "loss": 0.4477, "step": 1436 }, { "epoch": 0.7365453613531522, "grad_norm": 0.917651920213826, "learning_rate": 1.457980524715896e-05, "loss": 0.4353, "step": 1437 }, { "epoch": 0.7370579190158892, "grad_norm": 0.9104914302526631, "learning_rate": 1.4572423233046386e-05, "loss": 0.4413, "step": 1438 }, { "epoch": 0.7375704766786263, "grad_norm": 0.9097483381246133, "learning_rate": 1.4565038067240956e-05, "loss": 0.4548, "step": 1439 }, { "epoch": 0.7380830343413634, "grad_norm": 0.876943040085998, "learning_rate": 1.455764975483313e-05, "loss": 0.4363, "step": 1440 }, { "epoch": 0.7385955920041004, "grad_norm": 0.9313048587164554, "learning_rate": 1.4550258300915551e-05, "loss": 0.4404, "step": 1441 }, { "epoch": 0.7391081496668375, "grad_norm": 0.8782582729668442, "learning_rate": 1.4542863710583022e-05, "loss": 0.4307, "step": 1442 }, { "epoch": 0.7396207073295745, "grad_norm": 0.8255218063019648, "learning_rate": 1.4535465988932502e-05, "loss": 0.3984, "step": 1443 }, { "epoch": 0.7401332649923117, "grad_norm": 0.8745343822744817, "learning_rate": 1.4528065141063119e-05, "loss": 0.4918, "step": 1444 }, { "epoch": 0.7406458226550487, "grad_norm": 0.9488135765792123, "learning_rate": 1.4520661172076147e-05, "loss": 0.4934, "step": 1445 }, { "epoch": 0.7411583803177858, "grad_norm": 0.920094545379883, "learning_rate": 1.4513254087075015e-05, "loss": 0.4604, "step": 1446 }, { "epoch": 0.7416709379805229, "grad_norm": 0.8561162128101851, "learning_rate": 1.4505843891165304e-05, "loss": 0.4752, "step": 1447 }, { "epoch": 0.7421834956432599, "grad_norm": 0.8242966797516035, "learning_rate": 1.4498430589454726e-05, "loss": 0.449, "step": 1448 }, { "epoch": 0.742696053305997, "grad_norm": 0.8583781747291154, "learning_rate": 1.449101418705315e-05, "loss": 0.4766, "step": 1449 }, { "epoch": 0.743208610968734, "grad_norm": 0.8738915456566949, "learning_rate": 1.4483594689072571e-05, "loss": 0.4519, "step": 1450 }, { "epoch": 0.7437211686314711, "grad_norm": 0.8970650831718874, "learning_rate": 1.4476172100627127e-05, "loss": 0.4387, "step": 1451 }, { "epoch": 0.7442337262942081, "grad_norm": 0.8301612386765629, "learning_rate": 1.4468746426833075e-05, "loss": 0.4191, "step": 1452 }, { "epoch": 0.7447462839569452, "grad_norm": 0.8915818355114107, "learning_rate": 1.4461317672808809e-05, "loss": 0.4756, "step": 1453 }, { "epoch": 0.7452588416196823, "grad_norm": 0.8840314404301436, "learning_rate": 1.4453885843674837e-05, "loss": 0.4668, "step": 1454 }, { "epoch": 0.7457713992824193, "grad_norm": 0.8346541885213423, "learning_rate": 1.4446450944553793e-05, "loss": 0.4137, "step": 1455 }, { "epoch": 0.7462839569451564, "grad_norm": 0.8698994671004548, "learning_rate": 1.4439012980570425e-05, "loss": 0.4325, "step": 1456 }, { "epoch": 0.7467965146078934, "grad_norm": 0.8285766516502818, "learning_rate": 1.44315719568516e-05, "loss": 0.4225, "step": 1457 }, { "epoch": 0.7473090722706305, "grad_norm": 0.9178752576508891, "learning_rate": 1.4424127878526278e-05, "loss": 0.456, "step": 1458 }, { "epoch": 0.7478216299333675, "grad_norm": 0.9491643029914382, "learning_rate": 1.4416680750725542e-05, "loss": 0.4425, "step": 1459 }, { "epoch": 0.7483341875961046, "grad_norm": 1.019725200304288, "learning_rate": 1.4409230578582566e-05, "loss": 0.4882, "step": 1460 }, { "epoch": 0.7488467452588417, "grad_norm": 0.8955742721481773, "learning_rate": 1.4401777367232625e-05, "loss": 0.4387, "step": 1461 }, { "epoch": 0.7493593029215787, "grad_norm": 0.8578624614642641, "learning_rate": 1.4394321121813093e-05, "loss": 0.4274, "step": 1462 }, { "epoch": 0.7498718605843158, "grad_norm": 0.9296278523156789, "learning_rate": 1.4386861847463429e-05, "loss": 0.456, "step": 1463 }, { "epoch": 0.7503844182470528, "grad_norm": 0.9112582855214602, "learning_rate": 1.4379399549325185e-05, "loss": 0.4425, "step": 1464 }, { "epoch": 0.7508969759097899, "grad_norm": 0.8624339135391266, "learning_rate": 1.4371934232541991e-05, "loss": 0.4138, "step": 1465 }, { "epoch": 0.7514095335725269, "grad_norm": 0.8871895093062859, "learning_rate": 1.436446590225957e-05, "loss": 0.4845, "step": 1466 }, { "epoch": 0.751922091235264, "grad_norm": 0.7897772916992967, "learning_rate": 1.4356994563625703e-05, "loss": 0.3738, "step": 1467 }, { "epoch": 0.752434648898001, "grad_norm": 0.7974179460051084, "learning_rate": 1.4349520221790263e-05, "loss": 0.3934, "step": 1468 }, { "epoch": 0.7529472065607381, "grad_norm": 0.8821318489754267, "learning_rate": 1.4342042881905184e-05, "loss": 0.4614, "step": 1469 }, { "epoch": 0.7534597642234752, "grad_norm": 0.8869240975547475, "learning_rate": 1.433456254912447e-05, "loss": 0.4356, "step": 1470 }, { "epoch": 0.7539723218862122, "grad_norm": 0.8784402002458527, "learning_rate": 1.4327079228604177e-05, "loss": 0.398, "step": 1471 }, { "epoch": 0.7544848795489493, "grad_norm": 0.8161284900663118, "learning_rate": 1.4319592925502443e-05, "loss": 0.4199, "step": 1472 }, { "epoch": 0.7549974372116863, "grad_norm": 0.8574061867052907, "learning_rate": 1.4312103644979441e-05, "loss": 0.3939, "step": 1473 }, { "epoch": 0.7555099948744234, "grad_norm": 0.8491379553669641, "learning_rate": 1.4304611392197399e-05, "loss": 0.4172, "step": 1474 }, { "epoch": 0.7560225525371604, "grad_norm": 0.8644132623693059, "learning_rate": 1.4297116172320606e-05, "loss": 0.4071, "step": 1475 }, { "epoch": 0.7565351101998975, "grad_norm": 0.8710820690695655, "learning_rate": 1.4289617990515383e-05, "loss": 0.4363, "step": 1476 }, { "epoch": 0.7570476678626346, "grad_norm": 0.8137618627634217, "learning_rate": 1.4282116851950103e-05, "loss": 0.4132, "step": 1477 }, { "epoch": 0.7575602255253716, "grad_norm": 0.8518492948745379, "learning_rate": 1.427461276179517e-05, "loss": 0.4287, "step": 1478 }, { "epoch": 0.7580727831881087, "grad_norm": 0.9126290641096139, "learning_rate": 1.4267105725223023e-05, "loss": 0.4648, "step": 1479 }, { "epoch": 0.7585853408508457, "grad_norm": 0.8849333831430537, "learning_rate": 1.4259595747408133e-05, "loss": 0.4339, "step": 1480 }, { "epoch": 0.7590978985135828, "grad_norm": 0.844855888960061, "learning_rate": 1.4252082833527003e-05, "loss": 0.4534, "step": 1481 }, { "epoch": 0.7596104561763198, "grad_norm": 0.9593236143903552, "learning_rate": 1.4244566988758152e-05, "loss": 0.5051, "step": 1482 }, { "epoch": 0.7601230138390569, "grad_norm": 0.844672077383435, "learning_rate": 1.4237048218282124e-05, "loss": 0.477, "step": 1483 }, { "epoch": 0.760635571501794, "grad_norm": 0.8873422920899185, "learning_rate": 1.422952652728148e-05, "loss": 0.4803, "step": 1484 }, { "epoch": 0.761148129164531, "grad_norm": 0.8400529628485378, "learning_rate": 1.4222001920940787e-05, "loss": 0.4337, "step": 1485 }, { "epoch": 0.7616606868272681, "grad_norm": 0.8857570257253755, "learning_rate": 1.4214474404446633e-05, "loss": 0.4819, "step": 1486 }, { "epoch": 0.7621732444900051, "grad_norm": 0.7679512211625595, "learning_rate": 1.4206943982987604e-05, "loss": 0.3935, "step": 1487 }, { "epoch": 0.7626858021527422, "grad_norm": 0.9165028767933114, "learning_rate": 1.419941066175429e-05, "loss": 0.4762, "step": 1488 }, { "epoch": 0.7631983598154792, "grad_norm": 0.8648141728000703, "learning_rate": 1.4191874445939278e-05, "loss": 0.4398, "step": 1489 }, { "epoch": 0.7637109174782163, "grad_norm": 0.8573355077882813, "learning_rate": 1.4184335340737158e-05, "loss": 0.4706, "step": 1490 }, { "epoch": 0.7642234751409533, "grad_norm": 0.8415173490601486, "learning_rate": 1.41767933513445e-05, "loss": 0.4406, "step": 1491 }, { "epoch": 0.7647360328036904, "grad_norm": 0.8990367364997683, "learning_rate": 1.4169248482959868e-05, "loss": 0.4017, "step": 1492 }, { "epoch": 0.7652485904664275, "grad_norm": 0.9252078630757885, "learning_rate": 1.4161700740783815e-05, "loss": 0.4434, "step": 1493 }, { "epoch": 0.7657611481291645, "grad_norm": 0.8196837627067952, "learning_rate": 1.4154150130018867e-05, "loss": 0.4066, "step": 1494 }, { "epoch": 0.7662737057919016, "grad_norm": 0.9139655360589451, "learning_rate": 1.4146596655869529e-05, "loss": 0.4592, "step": 1495 }, { "epoch": 0.7667862634546386, "grad_norm": 0.8123107788050005, "learning_rate": 1.4139040323542284e-05, "loss": 0.4322, "step": 1496 }, { "epoch": 0.7672988211173757, "grad_norm": 0.8452280724607676, "learning_rate": 1.4131481138245582e-05, "loss": 0.477, "step": 1497 }, { "epoch": 0.7678113787801127, "grad_norm": 0.9204656500857091, "learning_rate": 1.4123919105189836e-05, "loss": 0.4337, "step": 1498 }, { "epoch": 0.7683239364428498, "grad_norm": 0.901292771650625, "learning_rate": 1.4116354229587432e-05, "loss": 0.4608, "step": 1499 }, { "epoch": 0.7688364941055869, "grad_norm": 0.8791552177300336, "learning_rate": 1.4108786516652704e-05, "loss": 0.4552, "step": 1500 }, { "epoch": 0.7693490517683239, "grad_norm": 0.9168254417149796, "learning_rate": 1.4101215971601945e-05, "loss": 0.4024, "step": 1501 }, { "epoch": 0.769861609431061, "grad_norm": 0.8596636595420604, "learning_rate": 1.4093642599653406e-05, "loss": 0.389, "step": 1502 }, { "epoch": 0.770374167093798, "grad_norm": 0.8189312669796711, "learning_rate": 1.4086066406027284e-05, "loss": 0.361, "step": 1503 }, { "epoch": 0.7708867247565351, "grad_norm": 0.8815073350047495, "learning_rate": 1.4078487395945712e-05, "loss": 0.4398, "step": 1504 }, { "epoch": 0.7713992824192721, "grad_norm": 0.919251938062037, "learning_rate": 1.4070905574632779e-05, "loss": 0.4822, "step": 1505 }, { "epoch": 0.7719118400820092, "grad_norm": 0.9173736599338392, "learning_rate": 1.40633209473145e-05, "loss": 0.4613, "step": 1506 }, { "epoch": 0.7724243977447462, "grad_norm": 0.8569917713999854, "learning_rate": 1.4055733519218829e-05, "loss": 0.4036, "step": 1507 }, { "epoch": 0.7729369554074833, "grad_norm": 0.8900446329803635, "learning_rate": 1.404814329557565e-05, "loss": 0.433, "step": 1508 }, { "epoch": 0.7734495130702204, "grad_norm": 0.900533303907843, "learning_rate": 1.4040550281616777e-05, "loss": 0.4111, "step": 1509 }, { "epoch": 0.7739620707329574, "grad_norm": 0.8185664793954978, "learning_rate": 1.4032954482575938e-05, "loss": 0.4124, "step": 1510 }, { "epoch": 0.7744746283956945, "grad_norm": 0.8525210160098752, "learning_rate": 1.4025355903688794e-05, "loss": 0.4555, "step": 1511 }, { "epoch": 0.7749871860584315, "grad_norm": 0.8341275362239134, "learning_rate": 1.401775455019291e-05, "loss": 0.4558, "step": 1512 }, { "epoch": 0.7754997437211686, "grad_norm": 0.8981457916914287, "learning_rate": 1.401015042732777e-05, "loss": 0.4049, "step": 1513 }, { "epoch": 0.7760123013839056, "grad_norm": 0.8759746589163069, "learning_rate": 1.4002543540334766e-05, "loss": 0.4358, "step": 1514 }, { "epoch": 0.7765248590466427, "grad_norm": 0.8442373464301353, "learning_rate": 1.3994933894457193e-05, "loss": 0.4129, "step": 1515 }, { "epoch": 0.7770374167093798, "grad_norm": 0.9215796685161776, "learning_rate": 1.398732149494025e-05, "loss": 0.432, "step": 1516 }, { "epoch": 0.7775499743721168, "grad_norm": 0.8902185062481768, "learning_rate": 1.3979706347031034e-05, "loss": 0.4317, "step": 1517 }, { "epoch": 0.7780625320348539, "grad_norm": 0.8791667679606392, "learning_rate": 1.3972088455978537e-05, "loss": 0.4339, "step": 1518 }, { "epoch": 0.7785750896975909, "grad_norm": 0.9180677040822302, "learning_rate": 1.3964467827033637e-05, "loss": 0.4797, "step": 1519 }, { "epoch": 0.779087647360328, "grad_norm": 0.9124977855188895, "learning_rate": 1.3956844465449106e-05, "loss": 0.4723, "step": 1520 }, { "epoch": 0.779600205023065, "grad_norm": 0.8635520725160787, "learning_rate": 1.3949218376479593e-05, "loss": 0.4372, "step": 1521 }, { "epoch": 0.7801127626858022, "grad_norm": 0.8699107876185879, "learning_rate": 1.3941589565381635e-05, "loss": 0.4828, "step": 1522 }, { "epoch": 0.7806253203485393, "grad_norm": 0.8422153984571211, "learning_rate": 1.3933958037413636e-05, "loss": 0.452, "step": 1523 }, { "epoch": 0.7811378780112763, "grad_norm": 0.8341677426296992, "learning_rate": 1.3926323797835882e-05, "loss": 0.4649, "step": 1524 }, { "epoch": 0.7816504356740134, "grad_norm": 0.870619176717251, "learning_rate": 1.3918686851910516e-05, "loss": 0.4352, "step": 1525 }, { "epoch": 0.7821629933367504, "grad_norm": 0.9160636637837206, "learning_rate": 1.391104720490156e-05, "loss": 0.4317, "step": 1526 }, { "epoch": 0.7826755509994875, "grad_norm": 0.7959208313731447, "learning_rate": 1.3903404862074891e-05, "loss": 0.4322, "step": 1527 }, { "epoch": 0.7831881086622245, "grad_norm": 0.84478583526662, "learning_rate": 1.3895759828698243e-05, "loss": 0.4636, "step": 1528 }, { "epoch": 0.7837006663249616, "grad_norm": 0.8795853769172673, "learning_rate": 1.3888112110041207e-05, "loss": 0.3911, "step": 1529 }, { "epoch": 0.7842132239876987, "grad_norm": 0.8410588975085725, "learning_rate": 1.3880461711375224e-05, "loss": 0.3957, "step": 1530 }, { "epoch": 0.7847257816504357, "grad_norm": 0.8463088535446083, "learning_rate": 1.3872808637973583e-05, "loss": 0.4333, "step": 1531 }, { "epoch": 0.7852383393131728, "grad_norm": 0.8828002167831431, "learning_rate": 1.386515289511141e-05, "loss": 0.4991, "step": 1532 }, { "epoch": 0.7857508969759098, "grad_norm": 0.9671190688210252, "learning_rate": 1.3857494488065693e-05, "loss": 0.4336, "step": 1533 }, { "epoch": 0.7862634546386469, "grad_norm": 0.9098090687297001, "learning_rate": 1.3849833422115221e-05, "loss": 0.4258, "step": 1534 }, { "epoch": 0.7867760123013839, "grad_norm": 0.9337953272575743, "learning_rate": 1.384216970254065e-05, "loss": 0.4788, "step": 1535 }, { "epoch": 0.787288569964121, "grad_norm": 0.9154979021643403, "learning_rate": 1.3834503334624443e-05, "loss": 0.4178, "step": 1536 }, { "epoch": 0.787801127626858, "grad_norm": 0.9707681844149721, "learning_rate": 1.3826834323650899e-05, "loss": 0.4528, "step": 1537 }, { "epoch": 0.7883136852895951, "grad_norm": 0.8852301058832076, "learning_rate": 1.3819162674906134e-05, "loss": 0.4501, "step": 1538 }, { "epoch": 0.7888262429523322, "grad_norm": 0.863904077490955, "learning_rate": 1.3811488393678085e-05, "loss": 0.4072, "step": 1539 }, { "epoch": 0.7893388006150692, "grad_norm": 0.8024014183292975, "learning_rate": 1.38038114852565e-05, "loss": 0.4255, "step": 1540 }, { "epoch": 0.7898513582778063, "grad_norm": 0.850911871604447, "learning_rate": 1.379613195493294e-05, "loss": 0.3893, "step": 1541 }, { "epoch": 0.7903639159405433, "grad_norm": 0.8869657555007124, "learning_rate": 1.378844980800078e-05, "loss": 0.4194, "step": 1542 }, { "epoch": 0.7908764736032804, "grad_norm": 0.9433272542529463, "learning_rate": 1.3780765049755184e-05, "loss": 0.4533, "step": 1543 }, { "epoch": 0.7913890312660175, "grad_norm": 0.8950721611377841, "learning_rate": 1.3773077685493128e-05, "loss": 0.4013, "step": 1544 }, { "epoch": 0.7919015889287545, "grad_norm": 0.8567578066112991, "learning_rate": 1.376538772051338e-05, "loss": 0.4468, "step": 1545 }, { "epoch": 0.7924141465914916, "grad_norm": 0.8928581801065161, "learning_rate": 1.3757695160116502e-05, "loss": 0.4281, "step": 1546 }, { "epoch": 0.7929267042542286, "grad_norm": 0.8480168095694268, "learning_rate": 1.3750000009604843e-05, "loss": 0.4605, "step": 1547 }, { "epoch": 0.7934392619169657, "grad_norm": 0.9064541011651398, "learning_rate": 1.3742302274282532e-05, "loss": 0.4079, "step": 1548 }, { "epoch": 0.7939518195797027, "grad_norm": 0.8320757391267325, "learning_rate": 1.3734601959455495e-05, "loss": 0.4254, "step": 1549 }, { "epoch": 0.7944643772424398, "grad_norm": 0.8962721651647615, "learning_rate": 1.3726899070431423e-05, "loss": 0.4393, "step": 1550 }, { "epoch": 0.7949769349051768, "grad_norm": 0.8523578304039547, "learning_rate": 1.3719193612519789e-05, "loss": 0.4571, "step": 1551 }, { "epoch": 0.7954894925679139, "grad_norm": 0.7899789843869212, "learning_rate": 1.3711485591031821e-05, "loss": 0.3811, "step": 1552 }, { "epoch": 0.796002050230651, "grad_norm": 0.8626499363718604, "learning_rate": 1.3703775011280538e-05, "loss": 0.4146, "step": 1553 }, { "epoch": 0.796514607893388, "grad_norm": 0.9299692047220735, "learning_rate": 1.3696061878580707e-05, "loss": 0.4672, "step": 1554 }, { "epoch": 0.7970271655561251, "grad_norm": 0.8918657366673032, "learning_rate": 1.3688346198248859e-05, "loss": 0.4677, "step": 1555 }, { "epoch": 0.7975397232188621, "grad_norm": 0.9360299187517043, "learning_rate": 1.3680627975603275e-05, "loss": 0.4593, "step": 1556 }, { "epoch": 0.7980522808815992, "grad_norm": 0.9078102329877857, "learning_rate": 1.3672907215964e-05, "loss": 0.4102, "step": 1557 }, { "epoch": 0.7985648385443362, "grad_norm": 0.8185886240042258, "learning_rate": 1.3665183924652817e-05, "loss": 0.4053, "step": 1558 }, { "epoch": 0.7990773962070733, "grad_norm": 0.8611760262712116, "learning_rate": 1.3657458106993257e-05, "loss": 0.4316, "step": 1559 }, { "epoch": 0.7995899538698104, "grad_norm": 0.8968145357503907, "learning_rate": 1.3649729768310598e-05, "loss": 0.4292, "step": 1560 }, { "epoch": 0.8001025115325474, "grad_norm": 0.914094958851734, "learning_rate": 1.3641998913931849e-05, "loss": 0.4629, "step": 1561 }, { "epoch": 0.8006150691952845, "grad_norm": 0.8595410089665203, "learning_rate": 1.3634265549185755e-05, "loss": 0.4199, "step": 1562 }, { "epoch": 0.8011276268580215, "grad_norm": 0.917955342429061, "learning_rate": 1.362652967940279e-05, "loss": 0.4208, "step": 1563 }, { "epoch": 0.8016401845207586, "grad_norm": 0.9129863383096661, "learning_rate": 1.361879130991516e-05, "loss": 0.4711, "step": 1564 }, { "epoch": 0.8021527421834956, "grad_norm": 0.8474696229940991, "learning_rate": 1.3611050446056787e-05, "loss": 0.4641, "step": 1565 }, { "epoch": 0.8026652998462327, "grad_norm": 0.8830154931659809, "learning_rate": 1.3603307093163319e-05, "loss": 0.5111, "step": 1566 }, { "epoch": 0.8031778575089698, "grad_norm": 0.9035007850505294, "learning_rate": 1.3595561256572111e-05, "loss": 0.4503, "step": 1567 }, { "epoch": 0.8036904151717068, "grad_norm": 0.8489871083894263, "learning_rate": 1.3587812941622238e-05, "loss": 0.4481, "step": 1568 }, { "epoch": 0.8042029728344439, "grad_norm": 1.0293753197722415, "learning_rate": 1.3580062153654484e-05, "loss": 0.494, "step": 1569 }, { "epoch": 0.8047155304971809, "grad_norm": 0.8634571083853316, "learning_rate": 1.3572308898011328e-05, "loss": 0.4572, "step": 1570 }, { "epoch": 0.805228088159918, "grad_norm": 0.8621484969376705, "learning_rate": 1.3564553180036962e-05, "loss": 0.4696, "step": 1571 }, { "epoch": 0.805740645822655, "grad_norm": 0.9219431309956306, "learning_rate": 1.3556795005077266e-05, "loss": 0.4673, "step": 1572 }, { "epoch": 0.8062532034853921, "grad_norm": 0.8647964713895752, "learning_rate": 1.3549034378479815e-05, "loss": 0.4604, "step": 1573 }, { "epoch": 0.8067657611481291, "grad_norm": 0.7853454046707604, "learning_rate": 1.3541271305593878e-05, "loss": 0.3891, "step": 1574 }, { "epoch": 0.8072783188108662, "grad_norm": 0.8679988458402141, "learning_rate": 1.3533505791770409e-05, "loss": 0.425, "step": 1575 }, { "epoch": 0.8077908764736033, "grad_norm": 0.9523654395098063, "learning_rate": 1.3525737842362043e-05, "loss": 0.468, "step": 1576 }, { "epoch": 0.8083034341363403, "grad_norm": 0.898143949394579, "learning_rate": 1.3517967462723092e-05, "loss": 0.4469, "step": 1577 }, { "epoch": 0.8088159917990774, "grad_norm": 0.7683707168504053, "learning_rate": 1.3510194658209547e-05, "loss": 0.4191, "step": 1578 }, { "epoch": 0.8093285494618144, "grad_norm": 0.8983707803155124, "learning_rate": 1.3502419434179068e-05, "loss": 0.4393, "step": 1579 }, { "epoch": 0.8098411071245515, "grad_norm": 0.8321234072624192, "learning_rate": 1.3494641795990986e-05, "loss": 0.4363, "step": 1580 }, { "epoch": 0.8103536647872885, "grad_norm": 0.8554827206801837, "learning_rate": 1.3486861749006286e-05, "loss": 0.4145, "step": 1581 }, { "epoch": 0.8108662224500256, "grad_norm": 0.8994548338929111, "learning_rate": 1.3479079298587634e-05, "loss": 0.4258, "step": 1582 }, { "epoch": 0.8113787801127627, "grad_norm": 0.8549303006432097, "learning_rate": 1.3471294450099327e-05, "loss": 0.4294, "step": 1583 }, { "epoch": 0.8118913377754997, "grad_norm": 0.9122895914800688, "learning_rate": 1.3463507208907332e-05, "loss": 0.4481, "step": 1584 }, { "epoch": 0.8124038954382368, "grad_norm": 0.8769690699280012, "learning_rate": 1.3455717580379266e-05, "loss": 0.4572, "step": 1585 }, { "epoch": 0.8129164531009738, "grad_norm": 0.8631424074139098, "learning_rate": 1.3447925569884374e-05, "loss": 0.3974, "step": 1586 }, { "epoch": 0.8134290107637109, "grad_norm": 0.8127440162473082, "learning_rate": 1.3440131182793567e-05, "loss": 0.3858, "step": 1587 }, { "epoch": 0.8139415684264479, "grad_norm": 0.7933119417979968, "learning_rate": 1.3432334424479377e-05, "loss": 0.4335, "step": 1588 }, { "epoch": 0.814454126089185, "grad_norm": 0.7700380763950958, "learning_rate": 1.3424535300315978e-05, "loss": 0.3929, "step": 1589 }, { "epoch": 0.814966683751922, "grad_norm": 0.8736821172486485, "learning_rate": 1.3416733815679166e-05, "loss": 0.4769, "step": 1590 }, { "epoch": 0.8154792414146591, "grad_norm": 0.9278609433976925, "learning_rate": 1.3408929975946379e-05, "loss": 0.4674, "step": 1591 }, { "epoch": 0.8159917990773962, "grad_norm": 0.8148238028564986, "learning_rate": 1.3401123786496664e-05, "loss": 0.4195, "step": 1592 }, { "epoch": 0.8165043567401332, "grad_norm": 0.9319989073744472, "learning_rate": 1.3393315252710695e-05, "loss": 0.4306, "step": 1593 }, { "epoch": 0.8170169144028703, "grad_norm": 0.8703031044358913, "learning_rate": 1.3385504379970764e-05, "loss": 0.3839, "step": 1594 }, { "epoch": 0.8175294720656073, "grad_norm": 0.9497474026612924, "learning_rate": 1.3377691173660764e-05, "loss": 0.4154, "step": 1595 }, { "epoch": 0.8180420297283444, "grad_norm": 0.8388319868224867, "learning_rate": 1.3369875639166212e-05, "loss": 0.4481, "step": 1596 }, { "epoch": 0.8185545873910814, "grad_norm": 0.8696633813911802, "learning_rate": 1.3362057781874215e-05, "loss": 0.4868, "step": 1597 }, { "epoch": 0.8190671450538185, "grad_norm": 0.9591285394445416, "learning_rate": 1.3354237607173494e-05, "loss": 0.4104, "step": 1598 }, { "epoch": 0.8195797027165556, "grad_norm": 0.7713181749496916, "learning_rate": 1.3346415120454354e-05, "loss": 0.3933, "step": 1599 }, { "epoch": 0.8200922603792927, "grad_norm": 0.888093329702593, "learning_rate": 1.3338590327108709e-05, "loss": 0.4633, "step": 1600 }, { "epoch": 0.8206048180420298, "grad_norm": 0.9018099167883835, "learning_rate": 1.3330763232530048e-05, "loss": 0.4449, "step": 1601 }, { "epoch": 0.8211173757047668, "grad_norm": 0.9266252043321587, "learning_rate": 1.3322933842113457e-05, "loss": 0.4879, "step": 1602 }, { "epoch": 0.8216299333675039, "grad_norm": 0.8792083914050685, "learning_rate": 1.3315102161255603e-05, "loss": 0.4101, "step": 1603 }, { "epoch": 0.822142491030241, "grad_norm": 0.8653916288514847, "learning_rate": 1.3307268195354718e-05, "loss": 0.4637, "step": 1604 }, { "epoch": 0.822655048692978, "grad_norm": 0.8212924221514558, "learning_rate": 1.329943194981063e-05, "loss": 0.4177, "step": 1605 }, { "epoch": 0.8231676063557151, "grad_norm": 0.8856299882533567, "learning_rate": 1.3291593430024727e-05, "loss": 0.4908, "step": 1606 }, { "epoch": 0.8236801640184521, "grad_norm": 0.8428449485775634, "learning_rate": 1.3283752641399965e-05, "loss": 0.4233, "step": 1607 }, { "epoch": 0.8241927216811892, "grad_norm": 0.8276562468625088, "learning_rate": 1.3275909589340865e-05, "loss": 0.4175, "step": 1608 }, { "epoch": 0.8247052793439262, "grad_norm": 0.8459446195251248, "learning_rate": 1.3268064279253512e-05, "loss": 0.408, "step": 1609 }, { "epoch": 0.8252178370066633, "grad_norm": 0.7989568432596288, "learning_rate": 1.3260216716545534e-05, "loss": 0.4078, "step": 1610 }, { "epoch": 0.8257303946694003, "grad_norm": 0.9298696549503288, "learning_rate": 1.325236690662613e-05, "loss": 0.4634, "step": 1611 }, { "epoch": 0.8262429523321374, "grad_norm": 0.8844523495583078, "learning_rate": 1.3244514854906039e-05, "loss": 0.3618, "step": 1612 }, { "epoch": 0.8267555099948745, "grad_norm": 0.8989041359320697, "learning_rate": 1.3236660566797542e-05, "loss": 0.4195, "step": 1613 }, { "epoch": 0.8272680676576115, "grad_norm": 0.9254250859207924, "learning_rate": 1.3228804047714462e-05, "loss": 0.5033, "step": 1614 }, { "epoch": 0.8277806253203486, "grad_norm": 0.9302226096692195, "learning_rate": 1.3220945303072172e-05, "loss": 0.4682, "step": 1615 }, { "epoch": 0.8282931829830856, "grad_norm": 0.7866247341378326, "learning_rate": 1.3213084338287563e-05, "loss": 0.4201, "step": 1616 }, { "epoch": 0.8288057406458227, "grad_norm": 0.8493038721879194, "learning_rate": 1.3205221158779063e-05, "loss": 0.4551, "step": 1617 }, { "epoch": 0.8293182983085597, "grad_norm": 0.8562847933676802, "learning_rate": 1.319735576996663e-05, "loss": 0.4766, "step": 1618 }, { "epoch": 0.8298308559712968, "grad_norm": 0.8091352908034071, "learning_rate": 1.3189488177271741e-05, "loss": 0.4232, "step": 1619 }, { "epoch": 0.8303434136340339, "grad_norm": 0.8276196502853937, "learning_rate": 1.318161838611739e-05, "loss": 0.4226, "step": 1620 }, { "epoch": 0.8308559712967709, "grad_norm": 0.8167033888437744, "learning_rate": 1.3173746401928092e-05, "loss": 0.404, "step": 1621 }, { "epoch": 0.831368528959508, "grad_norm": 0.8107318908111949, "learning_rate": 1.3165872230129869e-05, "loss": 0.4217, "step": 1622 }, { "epoch": 0.831881086622245, "grad_norm": 0.8729926432825751, "learning_rate": 1.3157995876150252e-05, "loss": 0.4883, "step": 1623 }, { "epoch": 0.8323936442849821, "grad_norm": 0.8187227976862803, "learning_rate": 1.3150117345418277e-05, "loss": 0.409, "step": 1624 }, { "epoch": 0.8329062019477191, "grad_norm": 0.7927300110228338, "learning_rate": 1.3142236643364481e-05, "loss": 0.458, "step": 1625 }, { "epoch": 0.8334187596104562, "grad_norm": 0.8447931053302804, "learning_rate": 1.3134353775420895e-05, "loss": 0.4535, "step": 1626 }, { "epoch": 0.8339313172731933, "grad_norm": 0.9749095865599114, "learning_rate": 1.3126468747021044e-05, "loss": 0.4822, "step": 1627 }, { "epoch": 0.8344438749359303, "grad_norm": 0.8380351543077752, "learning_rate": 1.3118581563599946e-05, "loss": 0.4339, "step": 1628 }, { "epoch": 0.8349564325986674, "grad_norm": 0.8102460743871209, "learning_rate": 1.31106922305941e-05, "loss": 0.4049, "step": 1629 }, { "epoch": 0.8354689902614044, "grad_norm": 0.7826251374200279, "learning_rate": 1.3102800753441488e-05, "loss": 0.4326, "step": 1630 }, { "epoch": 0.8359815479241415, "grad_norm": 0.7955716685883515, "learning_rate": 1.3094907137581567e-05, "loss": 0.3828, "step": 1631 }, { "epoch": 0.8364941055868785, "grad_norm": 0.8381181005742723, "learning_rate": 1.3087011388455275e-05, "loss": 0.4445, "step": 1632 }, { "epoch": 0.8370066632496156, "grad_norm": 0.9037982888038599, "learning_rate": 1.307911351150501e-05, "loss": 0.4132, "step": 1633 }, { "epoch": 0.8375192209123526, "grad_norm": 0.816698615799825, "learning_rate": 1.3071213512174655e-05, "loss": 0.4391, "step": 1634 }, { "epoch": 0.8380317785750897, "grad_norm": 0.8590976402954839, "learning_rate": 1.3063311395909531e-05, "loss": 0.4095, "step": 1635 }, { "epoch": 0.8385443362378268, "grad_norm": 0.8538503855939815, "learning_rate": 1.3055407168156438e-05, "loss": 0.4506, "step": 1636 }, { "epoch": 0.8390568939005638, "grad_norm": 0.9130986737781017, "learning_rate": 1.3047500834363623e-05, "loss": 0.4466, "step": 1637 }, { "epoch": 0.8395694515633009, "grad_norm": 0.8222923406917801, "learning_rate": 1.3039592399980785e-05, "loss": 0.4356, "step": 1638 }, { "epoch": 0.8400820092260379, "grad_norm": 0.9214924252059564, "learning_rate": 1.3031681870459074e-05, "loss": 0.4647, "step": 1639 }, { "epoch": 0.840594566888775, "grad_norm": 0.909371901071924, "learning_rate": 1.3023769251251081e-05, "loss": 0.4303, "step": 1640 }, { "epoch": 0.841107124551512, "grad_norm": 0.8196646043874675, "learning_rate": 1.301585454781084e-05, "loss": 0.4425, "step": 1641 }, { "epoch": 0.8416196822142491, "grad_norm": 0.8501683448174635, "learning_rate": 1.3007937765593818e-05, "loss": 0.4912, "step": 1642 }, { "epoch": 0.8421322398769862, "grad_norm": 0.9125589709314257, "learning_rate": 1.3000018910056922e-05, "loss": 0.4637, "step": 1643 }, { "epoch": 0.8426447975397232, "grad_norm": 0.8341247781105049, "learning_rate": 1.2992097986658476e-05, "loss": 0.3802, "step": 1644 }, { "epoch": 0.8431573552024603, "grad_norm": 0.8602490158576276, "learning_rate": 1.2984175000858241e-05, "loss": 0.4892, "step": 1645 }, { "epoch": 0.8436699128651973, "grad_norm": 0.9172549211054213, "learning_rate": 1.2976249958117395e-05, "loss": 0.4515, "step": 1646 }, { "epoch": 0.8441824705279344, "grad_norm": 0.8702187338894324, "learning_rate": 1.2968322863898533e-05, "loss": 0.4379, "step": 1647 }, { "epoch": 0.8446950281906714, "grad_norm": 0.9197907277138254, "learning_rate": 1.2960393723665663e-05, "loss": 0.3966, "step": 1648 }, { "epoch": 0.8452075858534085, "grad_norm": 0.9128260541846802, "learning_rate": 1.2952462542884212e-05, "loss": 0.4597, "step": 1649 }, { "epoch": 0.8457201435161456, "grad_norm": 0.9244614041150558, "learning_rate": 1.2944529327021002e-05, "loss": 0.485, "step": 1650 }, { "epoch": 0.8462327011788826, "grad_norm": 0.8710832051496349, "learning_rate": 1.2936594081544258e-05, "loss": 0.4461, "step": 1651 }, { "epoch": 0.8467452588416197, "grad_norm": 0.8045577009852314, "learning_rate": 1.2928656811923621e-05, "loss": 0.4593, "step": 1652 }, { "epoch": 0.8472578165043567, "grad_norm": 0.8257925430603908, "learning_rate": 1.29207175236301e-05, "loss": 0.4258, "step": 1653 }, { "epoch": 0.8477703741670938, "grad_norm": 0.9549274659143313, "learning_rate": 1.291277622213612e-05, "loss": 0.4847, "step": 1654 }, { "epoch": 0.8482829318298308, "grad_norm": 0.9358894405825868, "learning_rate": 1.2904832912915481e-05, "loss": 0.4912, "step": 1655 }, { "epoch": 0.8487954894925679, "grad_norm": 0.8403786034017552, "learning_rate": 1.289688760144337e-05, "loss": 0.446, "step": 1656 }, { "epoch": 0.849308047155305, "grad_norm": 0.8122936002233164, "learning_rate": 1.2888940293196352e-05, "loss": 0.4034, "step": 1657 }, { "epoch": 0.849820604818042, "grad_norm": 0.8471035038185775, "learning_rate": 1.2880990993652379e-05, "loss": 0.4384, "step": 1658 }, { "epoch": 0.8503331624807791, "grad_norm": 0.9201367069315612, "learning_rate": 1.2873039708290753e-05, "loss": 0.4256, "step": 1659 }, { "epoch": 0.8508457201435161, "grad_norm": 0.835426172294736, "learning_rate": 1.2865086442592165e-05, "loss": 0.3573, "step": 1660 }, { "epoch": 0.8513582778062532, "grad_norm": 0.8504060212453896, "learning_rate": 1.2857131202038668e-05, "loss": 0.452, "step": 1661 }, { "epoch": 0.8518708354689902, "grad_norm": 0.9223572101266442, "learning_rate": 1.2849173992113669e-05, "loss": 0.4936, "step": 1662 }, { "epoch": 0.8523833931317273, "grad_norm": 0.7961591082382949, "learning_rate": 1.2841214818301935e-05, "loss": 0.4506, "step": 1663 }, { "epoch": 0.8528959507944643, "grad_norm": 0.8424599789631397, "learning_rate": 1.2833253686089588e-05, "loss": 0.3904, "step": 1664 }, { "epoch": 0.8534085084572014, "grad_norm": 0.8755104672528953, "learning_rate": 1.2825290600964104e-05, "loss": 0.4181, "step": 1665 }, { "epoch": 0.8539210661199385, "grad_norm": 0.8575075847844327, "learning_rate": 1.2817325568414299e-05, "loss": 0.3847, "step": 1666 }, { "epoch": 0.8544336237826755, "grad_norm": 0.8172659215734056, "learning_rate": 1.2809358593930332e-05, "loss": 0.4302, "step": 1667 }, { "epoch": 0.8549461814454126, "grad_norm": 0.8454246654331143, "learning_rate": 1.2801389683003701e-05, "loss": 0.4145, "step": 1668 }, { "epoch": 0.8554587391081496, "grad_norm": 0.835898978378263, "learning_rate": 1.2793418841127242e-05, "loss": 0.4362, "step": 1669 }, { "epoch": 0.8559712967708867, "grad_norm": 0.8148630019213848, "learning_rate": 1.2785446073795118e-05, "loss": 0.3755, "step": 1670 }, { "epoch": 0.8564838544336237, "grad_norm": 0.864332847718402, "learning_rate": 1.277747138650282e-05, "loss": 0.4303, "step": 1671 }, { "epoch": 0.8569964120963608, "grad_norm": 0.7780609280198221, "learning_rate": 1.2769494784747164e-05, "loss": 0.4089, "step": 1672 }, { "epoch": 0.8575089697590978, "grad_norm": 0.9931589437628301, "learning_rate": 1.2761516274026289e-05, "loss": 0.4546, "step": 1673 }, { "epoch": 0.8580215274218349, "grad_norm": 0.9794094357469031, "learning_rate": 1.2753535859839638e-05, "loss": 0.5027, "step": 1674 }, { "epoch": 0.858534085084572, "grad_norm": 0.8129727896506654, "learning_rate": 1.2745553547687977e-05, "loss": 0.4309, "step": 1675 }, { "epoch": 0.859046642747309, "grad_norm": 0.8764997173732881, "learning_rate": 1.2737569343073379e-05, "loss": 0.4829, "step": 1676 }, { "epoch": 0.8595592004100461, "grad_norm": 0.8531985845794535, "learning_rate": 1.2729583251499213e-05, "loss": 0.4151, "step": 1677 }, { "epoch": 0.8600717580727832, "grad_norm": 0.8791093419734225, "learning_rate": 1.272159527847016e-05, "loss": 0.4691, "step": 1678 }, { "epoch": 0.8605843157355203, "grad_norm": 0.8379871537929214, "learning_rate": 1.2713605429492193e-05, "loss": 0.4471, "step": 1679 }, { "epoch": 0.8610968733982574, "grad_norm": 0.7995846846854007, "learning_rate": 1.2705613710072575e-05, "loss": 0.4167, "step": 1680 }, { "epoch": 0.8616094310609944, "grad_norm": 0.8262080772367477, "learning_rate": 1.2697620125719862e-05, "loss": 0.4467, "step": 1681 }, { "epoch": 0.8621219887237315, "grad_norm": 0.881581670013693, "learning_rate": 1.2689624681943897e-05, "loss": 0.4093, "step": 1682 }, { "epoch": 0.8626345463864685, "grad_norm": 0.8473945068435518, "learning_rate": 1.2681627384255796e-05, "loss": 0.4719, "step": 1683 }, { "epoch": 0.8631471040492056, "grad_norm": 0.8824006861709429, "learning_rate": 1.2673628238167965e-05, "loss": 0.4341, "step": 1684 }, { "epoch": 0.8636596617119426, "grad_norm": 0.8759397549287918, "learning_rate": 1.2665627249194078e-05, "loss": 0.454, "step": 1685 }, { "epoch": 0.8641722193746797, "grad_norm": 0.857284874802578, "learning_rate": 1.2657624422849077e-05, "loss": 0.4441, "step": 1686 }, { "epoch": 0.8646847770374168, "grad_norm": 0.8680211231302375, "learning_rate": 1.264961976464917e-05, "loss": 0.4831, "step": 1687 }, { "epoch": 0.8651973347001538, "grad_norm": 0.860699578673245, "learning_rate": 1.2641613280111837e-05, "loss": 0.4224, "step": 1688 }, { "epoch": 0.8657098923628909, "grad_norm": 0.8815311636475641, "learning_rate": 1.2633604974755807e-05, "loss": 0.4978, "step": 1689 }, { "epoch": 0.8662224500256279, "grad_norm": 0.8572739778610614, "learning_rate": 1.2625594854101066e-05, "loss": 0.4703, "step": 1690 }, { "epoch": 0.866735007688365, "grad_norm": 0.8099201502308758, "learning_rate": 1.2617582923668855e-05, "loss": 0.4187, "step": 1691 }, { "epoch": 0.867247565351102, "grad_norm": 0.9181284301853802, "learning_rate": 1.260956918898166e-05, "loss": 0.4897, "step": 1692 }, { "epoch": 0.8677601230138391, "grad_norm": 0.8472681327892709, "learning_rate": 1.2601553655563207e-05, "loss": 0.4289, "step": 1693 }, { "epoch": 0.8682726806765761, "grad_norm": 0.8601677972016213, "learning_rate": 1.2593536328938471e-05, "loss": 0.4376, "step": 1694 }, { "epoch": 0.8687852383393132, "grad_norm": 0.8106372732268099, "learning_rate": 1.258551721463366e-05, "loss": 0.4555, "step": 1695 }, { "epoch": 0.8692977960020503, "grad_norm": 0.9265074632321348, "learning_rate": 1.25774963181762e-05, "loss": 0.4605, "step": 1696 }, { "epoch": 0.8698103536647873, "grad_norm": 0.8174449865727204, "learning_rate": 1.2569473645094772e-05, "loss": 0.37, "step": 1697 }, { "epoch": 0.8703229113275244, "grad_norm": 0.8901146481647438, "learning_rate": 1.2561449200919253e-05, "loss": 0.485, "step": 1698 }, { "epoch": 0.8708354689902614, "grad_norm": 0.8355458059902033, "learning_rate": 1.2553422991180766e-05, "loss": 0.4563, "step": 1699 }, { "epoch": 0.8713480266529985, "grad_norm": 0.8926602046560899, "learning_rate": 1.254539502141163e-05, "loss": 0.4655, "step": 1700 }, { "epoch": 0.8718605843157355, "grad_norm": 0.8460306163584626, "learning_rate": 1.2537365297145396e-05, "loss": 0.463, "step": 1701 }, { "epoch": 0.8723731419784726, "grad_norm": 0.8643102192545284, "learning_rate": 1.2529333823916807e-05, "loss": 0.4249, "step": 1702 }, { "epoch": 0.8728856996412097, "grad_norm": 0.8282953343378859, "learning_rate": 1.2521300607261826e-05, "loss": 0.4606, "step": 1703 }, { "epoch": 0.8733982573039467, "grad_norm": 0.7938926341782654, "learning_rate": 1.2513265652717613e-05, "loss": 0.4224, "step": 1704 }, { "epoch": 0.8739108149666838, "grad_norm": 0.849521993900066, "learning_rate": 1.2505228965822511e-05, "loss": 0.4385, "step": 1705 }, { "epoch": 0.8744233726294208, "grad_norm": 0.8285940729092418, "learning_rate": 1.2497190552116082e-05, "loss": 0.3984, "step": 1706 }, { "epoch": 0.8749359302921579, "grad_norm": 0.8015212408345783, "learning_rate": 1.2489150417139067e-05, "loss": 0.4289, "step": 1707 }, { "epoch": 0.8754484879548949, "grad_norm": 0.8734226315126777, "learning_rate": 1.2481108566433388e-05, "loss": 0.498, "step": 1708 }, { "epoch": 0.875961045617632, "grad_norm": 0.8544635831467131, "learning_rate": 1.2473065005542155e-05, "loss": 0.4272, "step": 1709 }, { "epoch": 0.876473603280369, "grad_norm": 0.8022099166271073, "learning_rate": 1.2465019740009662e-05, "loss": 0.4079, "step": 1710 }, { "epoch": 0.8769861609431061, "grad_norm": 0.7962249765888499, "learning_rate": 1.2456972775381364e-05, "loss": 0.4119, "step": 1711 }, { "epoch": 0.8774987186058432, "grad_norm": 0.7902969031771789, "learning_rate": 1.2448924117203904e-05, "loss": 0.4186, "step": 1712 }, { "epoch": 0.8780112762685802, "grad_norm": 0.8653238817972542, "learning_rate": 1.2440873771025079e-05, "loss": 0.378, "step": 1713 }, { "epoch": 0.8785238339313173, "grad_norm": 0.855774774310222, "learning_rate": 1.2432821742393854e-05, "loss": 0.4535, "step": 1714 }, { "epoch": 0.8790363915940543, "grad_norm": 0.8428407422472419, "learning_rate": 1.2424768036860359e-05, "loss": 0.4309, "step": 1715 }, { "epoch": 0.8795489492567914, "grad_norm": 0.8507440423630015, "learning_rate": 1.2416712659975869e-05, "loss": 0.4064, "step": 1716 }, { "epoch": 0.8800615069195284, "grad_norm": 0.8786946474192718, "learning_rate": 1.2408655617292822e-05, "loss": 0.4544, "step": 1717 }, { "epoch": 0.8805740645822655, "grad_norm": 0.8617555347721287, "learning_rate": 1.2400596914364792e-05, "loss": 0.4469, "step": 1718 }, { "epoch": 0.8810866222450026, "grad_norm": 0.8539224770419029, "learning_rate": 1.2392536556746517e-05, "loss": 0.4993, "step": 1719 }, { "epoch": 0.8815991799077396, "grad_norm": 0.8019349116814536, "learning_rate": 1.238447454999385e-05, "loss": 0.4079, "step": 1720 }, { "epoch": 0.8821117375704767, "grad_norm": 0.8408612997067609, "learning_rate": 1.2376410899663802e-05, "loss": 0.4883, "step": 1721 }, { "epoch": 0.8826242952332137, "grad_norm": 0.8963696078921735, "learning_rate": 1.2368345611314508e-05, "loss": 0.415, "step": 1722 }, { "epoch": 0.8831368528959508, "grad_norm": 0.8609157032605267, "learning_rate": 1.236027869050523e-05, "loss": 0.4835, "step": 1723 }, { "epoch": 0.8836494105586878, "grad_norm": 0.8631663641191174, "learning_rate": 1.2352210142796359e-05, "loss": 0.4633, "step": 1724 }, { "epoch": 0.8841619682214249, "grad_norm": 0.7896475915433931, "learning_rate": 1.2344139973749406e-05, "loss": 0.419, "step": 1725 }, { "epoch": 0.884674525884162, "grad_norm": 0.8368338975290552, "learning_rate": 1.2336068188927002e-05, "loss": 0.3703, "step": 1726 }, { "epoch": 0.885187083546899, "grad_norm": 0.8584070926487531, "learning_rate": 1.2327994793892885e-05, "loss": 0.3885, "step": 1727 }, { "epoch": 0.8856996412096361, "grad_norm": 0.8776464984061019, "learning_rate": 1.231991979421192e-05, "loss": 0.4331, "step": 1728 }, { "epoch": 0.8862121988723731, "grad_norm": 0.9804096259583337, "learning_rate": 1.2311843195450046e-05, "loss": 0.4713, "step": 1729 }, { "epoch": 0.8867247565351102, "grad_norm": 0.8059270818484149, "learning_rate": 1.2303765003174342e-05, "loss": 0.3503, "step": 1730 }, { "epoch": 0.8872373141978472, "grad_norm": 0.8612196702909195, "learning_rate": 1.2295685222952958e-05, "loss": 0.4474, "step": 1731 }, { "epoch": 0.8877498718605843, "grad_norm": 0.8511204805923158, "learning_rate": 1.228760386035515e-05, "loss": 0.4057, "step": 1732 }, { "epoch": 0.8882624295233214, "grad_norm": 0.8363224299739174, "learning_rate": 1.2279520920951262e-05, "loss": 0.4511, "step": 1733 }, { "epoch": 0.8887749871860584, "grad_norm": 0.8488839235963103, "learning_rate": 1.2271436410312727e-05, "loss": 0.3906, "step": 1734 }, { "epoch": 0.8892875448487955, "grad_norm": 0.9030046922375038, "learning_rate": 1.2263350334012059e-05, "loss": 0.4044, "step": 1735 }, { "epoch": 0.8898001025115325, "grad_norm": 0.8085985910791655, "learning_rate": 1.2255262697622848e-05, "loss": 0.3792, "step": 1736 }, { "epoch": 0.8903126601742696, "grad_norm": 0.9019806045207133, "learning_rate": 1.2247173506719768e-05, "loss": 0.4664, "step": 1737 }, { "epoch": 0.8908252178370066, "grad_norm": 0.893657703169445, "learning_rate": 1.2239082766878557e-05, "loss": 0.4321, "step": 1738 }, { "epoch": 0.8913377754997437, "grad_norm": 0.8564051130919693, "learning_rate": 1.2230990483676024e-05, "loss": 0.4072, "step": 1739 }, { "epoch": 0.8918503331624807, "grad_norm": 0.8543391717120811, "learning_rate": 1.222289666269004e-05, "loss": 0.4148, "step": 1740 }, { "epoch": 0.8923628908252178, "grad_norm": 0.8266361758069405, "learning_rate": 1.2214801309499538e-05, "loss": 0.4054, "step": 1741 }, { "epoch": 0.8928754484879549, "grad_norm": 0.8400139822309258, "learning_rate": 1.2206704429684504e-05, "loss": 0.4358, "step": 1742 }, { "epoch": 0.8933880061506919, "grad_norm": 0.8345292324633462, "learning_rate": 1.2198606028825978e-05, "loss": 0.429, "step": 1743 }, { "epoch": 0.893900563813429, "grad_norm": 0.7762167502593357, "learning_rate": 1.2190506112506054e-05, "loss": 0.3844, "step": 1744 }, { "epoch": 0.894413121476166, "grad_norm": 0.864520651970873, "learning_rate": 1.2182404686307857e-05, "loss": 0.4545, "step": 1745 }, { "epoch": 0.8949256791389031, "grad_norm": 0.7543371798538782, "learning_rate": 1.2174301755815572e-05, "loss": 0.3683, "step": 1746 }, { "epoch": 0.8954382368016401, "grad_norm": 0.8447791121412842, "learning_rate": 1.2166197326614399e-05, "loss": 0.4329, "step": 1747 }, { "epoch": 0.8959507944643772, "grad_norm": 0.8602183663235251, "learning_rate": 1.2158091404290588e-05, "loss": 0.415, "step": 1748 }, { "epoch": 0.8964633521271143, "grad_norm": 0.8675310720696252, "learning_rate": 1.214998399443141e-05, "loss": 0.4372, "step": 1749 }, { "epoch": 0.8969759097898513, "grad_norm": 0.8824059813425976, "learning_rate": 1.2141875102625166e-05, "loss": 0.4406, "step": 1750 }, { "epoch": 0.8974884674525884, "grad_norm": 0.8826820803650862, "learning_rate": 1.2133764734461173e-05, "loss": 0.3822, "step": 1751 }, { "epoch": 0.8980010251153254, "grad_norm": 0.8521153917765873, "learning_rate": 1.2125652895529766e-05, "loss": 0.4583, "step": 1752 }, { "epoch": 0.8985135827780625, "grad_norm": 0.8012686569360443, "learning_rate": 1.2117539591422308e-05, "loss": 0.4496, "step": 1753 }, { "epoch": 0.8990261404407995, "grad_norm": 0.8526082385583373, "learning_rate": 1.2109424827731144e-05, "loss": 0.4251, "step": 1754 }, { "epoch": 0.8995386981035366, "grad_norm": 0.7937603838614906, "learning_rate": 1.2101308610049653e-05, "loss": 0.4112, "step": 1755 }, { "epoch": 0.9000512557662738, "grad_norm": 0.8724405600601192, "learning_rate": 1.2093190943972204e-05, "loss": 0.4678, "step": 1756 }, { "epoch": 0.9005638134290108, "grad_norm": 0.8764842326681084, "learning_rate": 1.208507183509416e-05, "loss": 0.4044, "step": 1757 }, { "epoch": 0.9010763710917479, "grad_norm": 0.8196960668920319, "learning_rate": 1.2076951289011884e-05, "loss": 0.4453, "step": 1758 }, { "epoch": 0.9015889287544849, "grad_norm": 0.9224099040758895, "learning_rate": 1.2068829311322731e-05, "loss": 0.4503, "step": 1759 }, { "epoch": 0.902101486417222, "grad_norm": 0.8993949271981215, "learning_rate": 1.2060705907625042e-05, "loss": 0.5009, "step": 1760 }, { "epoch": 0.902614044079959, "grad_norm": 0.8376552245765504, "learning_rate": 1.2052581083518135e-05, "loss": 0.4079, "step": 1761 }, { "epoch": 0.9031266017426961, "grad_norm": 0.7897488591378442, "learning_rate": 1.204445484460232e-05, "loss": 0.4118, "step": 1762 }, { "epoch": 0.9036391594054332, "grad_norm": 0.7951491578073282, "learning_rate": 1.2036327196478866e-05, "loss": 0.4379, "step": 1763 }, { "epoch": 0.9041517170681702, "grad_norm": 0.8377403632148113, "learning_rate": 1.2028198144750023e-05, "loss": 0.441, "step": 1764 }, { "epoch": 0.9046642747309073, "grad_norm": 0.8859995055815243, "learning_rate": 1.202006769501901e-05, "loss": 0.4447, "step": 1765 }, { "epoch": 0.9051768323936443, "grad_norm": 0.7796552701881257, "learning_rate": 1.2011935852890004e-05, "loss": 0.4275, "step": 1766 }, { "epoch": 0.9056893900563814, "grad_norm": 0.898232597403603, "learning_rate": 1.2003802623968144e-05, "loss": 0.4667, "step": 1767 }, { "epoch": 0.9062019477191184, "grad_norm": 0.8441095501553603, "learning_rate": 1.199566801385953e-05, "loss": 0.3964, "step": 1768 }, { "epoch": 0.9067145053818555, "grad_norm": 0.8821243640022735, "learning_rate": 1.19875320281712e-05, "loss": 0.4555, "step": 1769 }, { "epoch": 0.9072270630445926, "grad_norm": 0.7961656493183072, "learning_rate": 1.1979394672511156e-05, "loss": 0.3954, "step": 1770 }, { "epoch": 0.9077396207073296, "grad_norm": 0.8224470054639943, "learning_rate": 1.197125595248834e-05, "loss": 0.452, "step": 1771 }, { "epoch": 0.9082521783700667, "grad_norm": 0.8024868377718372, "learning_rate": 1.196311587371262e-05, "loss": 0.429, "step": 1772 }, { "epoch": 0.9087647360328037, "grad_norm": 0.8775391030057075, "learning_rate": 1.1954974441794827e-05, "loss": 0.4377, "step": 1773 }, { "epoch": 0.9092772936955408, "grad_norm": 0.9148850117036678, "learning_rate": 1.19468316623467e-05, "loss": 0.4264, "step": 1774 }, { "epoch": 0.9097898513582778, "grad_norm": 0.890063253402447, "learning_rate": 1.1938687540980925e-05, "loss": 0.4334, "step": 1775 }, { "epoch": 0.9103024090210149, "grad_norm": 0.8187174668813096, "learning_rate": 1.1930542083311096e-05, "loss": 0.4303, "step": 1776 }, { "epoch": 0.910814966683752, "grad_norm": 0.9668095191038883, "learning_rate": 1.1922395294951747e-05, "loss": 0.4163, "step": 1777 }, { "epoch": 0.911327524346489, "grad_norm": 0.9008072918567142, "learning_rate": 1.1914247181518312e-05, "loss": 0.4591, "step": 1778 }, { "epoch": 0.9118400820092261, "grad_norm": 0.8809828672670066, "learning_rate": 1.190609774862715e-05, "loss": 0.4221, "step": 1779 }, { "epoch": 0.9123526396719631, "grad_norm": 0.905971604066088, "learning_rate": 1.1897947001895524e-05, "loss": 0.4464, "step": 1780 }, { "epoch": 0.9128651973347002, "grad_norm": 0.8626110303869292, "learning_rate": 1.1889794946941603e-05, "loss": 0.4283, "step": 1781 }, { "epoch": 0.9133777549974372, "grad_norm": 0.8335658416212552, "learning_rate": 1.1881641589384456e-05, "loss": 0.4432, "step": 1782 }, { "epoch": 0.9138903126601743, "grad_norm": 0.8688357220518379, "learning_rate": 1.187348693484406e-05, "loss": 0.4272, "step": 1783 }, { "epoch": 0.9144028703229113, "grad_norm": 0.8612806238162041, "learning_rate": 1.1865330988941271e-05, "loss": 0.4516, "step": 1784 }, { "epoch": 0.9149154279856484, "grad_norm": 0.8127506388140139, "learning_rate": 1.1857173757297843e-05, "loss": 0.4474, "step": 1785 }, { "epoch": 0.9154279856483855, "grad_norm": 0.7674779311104735, "learning_rate": 1.1849015245536424e-05, "loss": 0.3579, "step": 1786 }, { "epoch": 0.9159405433111225, "grad_norm": 0.8611715347901373, "learning_rate": 1.1840855459280526e-05, "loss": 0.4392, "step": 1787 }, { "epoch": 0.9164531009738596, "grad_norm": 0.8685156949994624, "learning_rate": 1.1832694404154557e-05, "loss": 0.4271, "step": 1788 }, { "epoch": 0.9169656586365966, "grad_norm": 0.7841203358488771, "learning_rate": 1.1824532085783788e-05, "loss": 0.4162, "step": 1789 }, { "epoch": 0.9174782162993337, "grad_norm": 0.900733927545379, "learning_rate": 1.1816368509794365e-05, "loss": 0.4409, "step": 1790 }, { "epoch": 0.9179907739620707, "grad_norm": 0.8643374609072756, "learning_rate": 1.1808203681813304e-05, "loss": 0.4101, "step": 1791 }, { "epoch": 0.9185033316248078, "grad_norm": 0.8949104480666291, "learning_rate": 1.1800037607468477e-05, "loss": 0.4439, "step": 1792 }, { "epoch": 0.9190158892875449, "grad_norm": 0.8899940182205293, "learning_rate": 1.1791870292388624e-05, "loss": 0.454, "step": 1793 }, { "epoch": 0.9195284469502819, "grad_norm": 0.8155681609359954, "learning_rate": 1.1783701742203326e-05, "loss": 0.4102, "step": 1794 }, { "epoch": 0.920041004613019, "grad_norm": 0.8619720415600124, "learning_rate": 1.1775531962543036e-05, "loss": 0.4247, "step": 1795 }, { "epoch": 0.920553562275756, "grad_norm": 0.784395857092634, "learning_rate": 1.1767360959039032e-05, "loss": 0.3994, "step": 1796 }, { "epoch": 0.9210661199384931, "grad_norm": 0.8521520027100721, "learning_rate": 1.1759188737323454e-05, "loss": 0.4283, "step": 1797 }, { "epoch": 0.9215786776012301, "grad_norm": 0.7709639572404675, "learning_rate": 1.1751015303029272e-05, "loss": 0.3677, "step": 1798 }, { "epoch": 0.9220912352639672, "grad_norm": 0.8515371112884398, "learning_rate": 1.1742840661790294e-05, "loss": 0.4832, "step": 1799 }, { "epoch": 0.9226037929267042, "grad_norm": 0.7751834975518671, "learning_rate": 1.1734664819241159e-05, "loss": 0.3942, "step": 1800 }, { "epoch": 0.9231163505894413, "grad_norm": 0.8496551512235657, "learning_rate": 1.1726487781017337e-05, "loss": 0.4452, "step": 1801 }, { "epoch": 0.9236289082521784, "grad_norm": 0.7970139988093204, "learning_rate": 1.1718309552755118e-05, "loss": 0.3995, "step": 1802 }, { "epoch": 0.9241414659149154, "grad_norm": 0.8490851066074099, "learning_rate": 1.1710130140091611e-05, "loss": 0.4498, "step": 1803 }, { "epoch": 0.9246540235776525, "grad_norm": 0.8229944764040152, "learning_rate": 1.1701949548664753e-05, "loss": 0.4553, "step": 1804 }, { "epoch": 0.9251665812403895, "grad_norm": 0.8550049042426225, "learning_rate": 1.1693767784113281e-05, "loss": 0.4564, "step": 1805 }, { "epoch": 0.9256791389031266, "grad_norm": 0.8406954977666296, "learning_rate": 1.1685584852076746e-05, "loss": 0.4142, "step": 1806 }, { "epoch": 0.9261916965658636, "grad_norm": 0.861392793634285, "learning_rate": 1.1677400758195501e-05, "loss": 0.3838, "step": 1807 }, { "epoch": 0.9267042542286007, "grad_norm": 0.8895250832529711, "learning_rate": 1.16692155081107e-05, "loss": 0.4188, "step": 1808 }, { "epoch": 0.9272168118913378, "grad_norm": 0.900554030849946, "learning_rate": 1.1661029107464298e-05, "loss": 0.3987, "step": 1809 }, { "epoch": 0.9277293695540748, "grad_norm": 0.8498273390811708, "learning_rate": 1.1652841561899042e-05, "loss": 0.3976, "step": 1810 }, { "epoch": 0.9282419272168119, "grad_norm": 0.8538289279708916, "learning_rate": 1.1644652877058464e-05, "loss": 0.4114, "step": 1811 }, { "epoch": 0.9287544848795489, "grad_norm": 0.8259263287678887, "learning_rate": 1.1636463058586882e-05, "loss": 0.4517, "step": 1812 }, { "epoch": 0.929267042542286, "grad_norm": 0.8765394667552775, "learning_rate": 1.1628272112129402e-05, "loss": 0.4199, "step": 1813 }, { "epoch": 0.929779600205023, "grad_norm": 0.8422307139179595, "learning_rate": 1.1620080043331901e-05, "loss": 0.4196, "step": 1814 }, { "epoch": 0.9302921578677601, "grad_norm": 0.8654333039480989, "learning_rate": 1.161188685784103e-05, "loss": 0.4759, "step": 1815 }, { "epoch": 0.9308047155304972, "grad_norm": 0.8232341734154708, "learning_rate": 1.1603692561304214e-05, "loss": 0.4274, "step": 1816 }, { "epoch": 0.9313172731932342, "grad_norm": 0.8403196255172275, "learning_rate": 1.1595497159369638e-05, "loss": 0.4721, "step": 1817 }, { "epoch": 0.9318298308559713, "grad_norm": 0.8581480242178959, "learning_rate": 1.1587300657686254e-05, "loss": 0.4449, "step": 1818 }, { "epoch": 0.9323423885187083, "grad_norm": 0.7754171019008719, "learning_rate": 1.1579103061903765e-05, "loss": 0.3439, "step": 1819 }, { "epoch": 0.9328549461814454, "grad_norm": 0.8520780375622419, "learning_rate": 1.1570904377672642e-05, "loss": 0.4122, "step": 1820 }, { "epoch": 0.9333675038441824, "grad_norm": 0.8200353612188372, "learning_rate": 1.156270461064409e-05, "loss": 0.3957, "step": 1821 }, { "epoch": 0.9338800615069195, "grad_norm": 0.8594871155169308, "learning_rate": 1.1554503766470069e-05, "loss": 0.4525, "step": 1822 }, { "epoch": 0.9343926191696565, "grad_norm": 0.9037683909911096, "learning_rate": 1.1546301850803283e-05, "loss": 0.4028, "step": 1823 }, { "epoch": 0.9349051768323936, "grad_norm": 0.8148128494454222, "learning_rate": 1.153809886929717e-05, "loss": 0.4355, "step": 1824 }, { "epoch": 0.9354177344951307, "grad_norm": 0.8137043651799475, "learning_rate": 1.1529894827605902e-05, "loss": 0.4152, "step": 1825 }, { "epoch": 0.9359302921578677, "grad_norm": 0.8136278454587975, "learning_rate": 1.1521689731384391e-05, "loss": 0.4362, "step": 1826 }, { "epoch": 0.9364428498206048, "grad_norm": 0.8103412298782822, "learning_rate": 1.1513483586288262e-05, "loss": 0.3995, "step": 1827 }, { "epoch": 0.9369554074833418, "grad_norm": 0.8767509763298038, "learning_rate": 1.150527639797387e-05, "loss": 0.4499, "step": 1828 }, { "epoch": 0.9374679651460789, "grad_norm": 0.8481626642005178, "learning_rate": 1.1497068172098295e-05, "loss": 0.4502, "step": 1829 }, { "epoch": 0.9379805228088159, "grad_norm": 0.8319590237133414, "learning_rate": 1.1488858914319321e-05, "loss": 0.426, "step": 1830 }, { "epoch": 0.938493080471553, "grad_norm": 0.7991472897105955, "learning_rate": 1.148064863029545e-05, "loss": 0.3755, "step": 1831 }, { "epoch": 0.93900563813429, "grad_norm": 0.7819395884214041, "learning_rate": 1.147243732568589e-05, "loss": 0.4167, "step": 1832 }, { "epoch": 0.9395181957970271, "grad_norm": 0.8098185076310401, "learning_rate": 1.1464225006150553e-05, "loss": 0.4257, "step": 1833 }, { "epoch": 0.9400307534597643, "grad_norm": 0.8150735824384497, "learning_rate": 1.1456011677350049e-05, "loss": 0.4332, "step": 1834 }, { "epoch": 0.9405433111225013, "grad_norm": 0.8097699044778442, "learning_rate": 1.1447797344945686e-05, "loss": 0.4239, "step": 1835 }, { "epoch": 0.9410558687852384, "grad_norm": 0.7832435306007919, "learning_rate": 1.1439582014599462e-05, "loss": 0.4217, "step": 1836 }, { "epoch": 0.9415684264479754, "grad_norm": 0.873677003747474, "learning_rate": 1.1431365691974059e-05, "loss": 0.4677, "step": 1837 }, { "epoch": 0.9420809841107125, "grad_norm": 0.8350391511431079, "learning_rate": 1.1423148382732854e-05, "loss": 0.422, "step": 1838 }, { "epoch": 0.9425935417734496, "grad_norm": 0.8456270824712017, "learning_rate": 1.1414930092539891e-05, "loss": 0.424, "step": 1839 }, { "epoch": 0.9431060994361866, "grad_norm": 0.8629262514744809, "learning_rate": 1.14067108270599e-05, "loss": 0.4498, "step": 1840 }, { "epoch": 0.9436186570989237, "grad_norm": 0.828595772995757, "learning_rate": 1.139849059195828e-05, "loss": 0.4485, "step": 1841 }, { "epoch": 0.9441312147616607, "grad_norm": 0.8875653865558722, "learning_rate": 1.1390269392901096e-05, "loss": 0.4447, "step": 1842 }, { "epoch": 0.9446437724243978, "grad_norm": 0.8697859478466298, "learning_rate": 1.1382047235555078e-05, "loss": 0.3345, "step": 1843 }, { "epoch": 0.9451563300871348, "grad_norm": 0.7765292992056264, "learning_rate": 1.1373824125587625e-05, "loss": 0.3944, "step": 1844 }, { "epoch": 0.9456688877498719, "grad_norm": 0.8174312550919675, "learning_rate": 1.1365600068666781e-05, "loss": 0.4163, "step": 1845 }, { "epoch": 0.946181445412609, "grad_norm": 0.7775241205854895, "learning_rate": 1.1357375070461241e-05, "loss": 0.3925, "step": 1846 }, { "epoch": 0.946694003075346, "grad_norm": 0.8813010438489302, "learning_rate": 1.1349149136640367e-05, "loss": 0.4771, "step": 1847 }, { "epoch": 0.9472065607380831, "grad_norm": 0.8117331022870904, "learning_rate": 1.1340922272874143e-05, "loss": 0.4289, "step": 1848 }, { "epoch": 0.9477191184008201, "grad_norm": 0.7967367575952142, "learning_rate": 1.1332694484833213e-05, "loss": 0.4305, "step": 1849 }, { "epoch": 0.9482316760635572, "grad_norm": 0.7841626399180632, "learning_rate": 1.1324465778188846e-05, "loss": 0.3984, "step": 1850 }, { "epoch": 0.9487442337262942, "grad_norm": 0.798317737736297, "learning_rate": 1.1316236158612952e-05, "loss": 0.4577, "step": 1851 }, { "epoch": 0.9492567913890313, "grad_norm": 0.8609149617904167, "learning_rate": 1.1308005631778061e-05, "loss": 0.4056, "step": 1852 }, { "epoch": 0.9497693490517684, "grad_norm": 0.8134496278213427, "learning_rate": 1.129977420335734e-05, "loss": 0.4072, "step": 1853 }, { "epoch": 0.9502819067145054, "grad_norm": 0.8023298231021769, "learning_rate": 1.1291541879024568e-05, "loss": 0.4006, "step": 1854 }, { "epoch": 0.9507944643772425, "grad_norm": 0.8439534200465144, "learning_rate": 1.128330866445414e-05, "loss": 0.4245, "step": 1855 }, { "epoch": 0.9513070220399795, "grad_norm": 0.8402470639893247, "learning_rate": 1.127507456532108e-05, "loss": 0.435, "step": 1856 }, { "epoch": 0.9518195797027166, "grad_norm": 0.8804171030015626, "learning_rate": 1.1266839587301004e-05, "loss": 0.4463, "step": 1857 }, { "epoch": 0.9523321373654536, "grad_norm": 0.8844726956986251, "learning_rate": 1.1258603736070145e-05, "loss": 0.4339, "step": 1858 }, { "epoch": 0.9528446950281907, "grad_norm": 0.8734715276750415, "learning_rate": 1.1250367017305328e-05, "loss": 0.4388, "step": 1859 }, { "epoch": 0.9533572526909277, "grad_norm": 0.7901061307056583, "learning_rate": 1.1242129436683988e-05, "loss": 0.4391, "step": 1860 }, { "epoch": 0.9538698103536648, "grad_norm": 0.8221792031828494, "learning_rate": 1.1233890999884143e-05, "loss": 0.4291, "step": 1861 }, { "epoch": 0.9543823680164019, "grad_norm": 0.7673111547702504, "learning_rate": 1.1225651712584413e-05, "loss": 0.3649, "step": 1862 }, { "epoch": 0.9548949256791389, "grad_norm": 0.8048968285650417, "learning_rate": 1.121741158046399e-05, "loss": 0.4518, "step": 1863 }, { "epoch": 0.955407483341876, "grad_norm": 0.84940179355387, "learning_rate": 1.1209170609202657e-05, "loss": 0.4632, "step": 1864 }, { "epoch": 0.955920041004613, "grad_norm": 0.7582877513626296, "learning_rate": 1.120092880448078e-05, "loss": 0.388, "step": 1865 }, { "epoch": 0.9564325986673501, "grad_norm": 0.8727728181344677, "learning_rate": 1.1192686171979288e-05, "loss": 0.4651, "step": 1866 }, { "epoch": 0.9569451563300871, "grad_norm": 0.8016961764806985, "learning_rate": 1.1184442717379687e-05, "loss": 0.4192, "step": 1867 }, { "epoch": 0.9574577139928242, "grad_norm": 0.809614309136127, "learning_rate": 1.1176198446364048e-05, "loss": 0.47, "step": 1868 }, { "epoch": 0.9579702716555613, "grad_norm": 0.8732928946945825, "learning_rate": 1.1167953364615007e-05, "loss": 0.4227, "step": 1869 }, { "epoch": 0.9584828293182983, "grad_norm": 0.793339923853348, "learning_rate": 1.1159707477815756e-05, "loss": 0.3826, "step": 1870 }, { "epoch": 0.9589953869810354, "grad_norm": 0.8322030222422873, "learning_rate": 1.1151460791650046e-05, "loss": 0.3975, "step": 1871 }, { "epoch": 0.9595079446437724, "grad_norm": 0.8770577210671593, "learning_rate": 1.1143213311802177e-05, "loss": 0.4533, "step": 1872 }, { "epoch": 0.9600205023065095, "grad_norm": 0.8414404914102358, "learning_rate": 1.1134965043956987e-05, "loss": 0.4235, "step": 1873 }, { "epoch": 0.9605330599692465, "grad_norm": 0.8603629775602003, "learning_rate": 1.1126715993799875e-05, "loss": 0.4548, "step": 1874 }, { "epoch": 0.9610456176319836, "grad_norm": 0.8233181999320521, "learning_rate": 1.1118466167016763e-05, "loss": 0.41, "step": 1875 }, { "epoch": 0.9615581752947207, "grad_norm": 0.827439199778928, "learning_rate": 1.1110215569294122e-05, "loss": 0.4129, "step": 1876 }, { "epoch": 0.9620707329574577, "grad_norm": 0.7705801117617203, "learning_rate": 1.1101964206318938e-05, "loss": 0.3898, "step": 1877 }, { "epoch": 0.9625832906201948, "grad_norm": 0.8546660832048901, "learning_rate": 1.1093712083778748e-05, "loss": 0.4146, "step": 1878 }, { "epoch": 0.9630958482829318, "grad_norm": 0.8653815887584269, "learning_rate": 1.1085459207361579e-05, "loss": 0.4263, "step": 1879 }, { "epoch": 0.9636084059456689, "grad_norm": 0.7654380590573688, "learning_rate": 1.1077205582756015e-05, "loss": 0.3696, "step": 1880 }, { "epoch": 0.9641209636084059, "grad_norm": 0.7984249113860288, "learning_rate": 1.1068951215651132e-05, "loss": 0.4166, "step": 1881 }, { "epoch": 0.964633521271143, "grad_norm": 0.8080513628514453, "learning_rate": 1.1060696111736515e-05, "loss": 0.4173, "step": 1882 }, { "epoch": 0.96514607893388, "grad_norm": 0.8746188372664002, "learning_rate": 1.1052440276702276e-05, "loss": 0.4562, "step": 1883 }, { "epoch": 0.9656586365966171, "grad_norm": 0.8074646228422262, "learning_rate": 1.1044183716239019e-05, "loss": 0.4104, "step": 1884 }, { "epoch": 0.9661711942593542, "grad_norm": 0.8066364254932795, "learning_rate": 1.1035926436037845e-05, "loss": 0.3932, "step": 1885 }, { "epoch": 0.9666837519220912, "grad_norm": 0.893438085619033, "learning_rate": 1.1027668441790358e-05, "loss": 0.4276, "step": 1886 }, { "epoch": 0.9671963095848283, "grad_norm": 0.8972796874996496, "learning_rate": 1.1019409739188653e-05, "loss": 0.4255, "step": 1887 }, { "epoch": 0.9677088672475653, "grad_norm": 0.8352059404572317, "learning_rate": 1.101115033392531e-05, "loss": 0.4403, "step": 1888 }, { "epoch": 0.9682214249103024, "grad_norm": 0.9274531873835046, "learning_rate": 1.1002890231693395e-05, "loss": 0.4725, "step": 1889 }, { "epoch": 0.9687339825730394, "grad_norm": 0.8899073625159752, "learning_rate": 1.099462943818646e-05, "loss": 0.4441, "step": 1890 }, { "epoch": 0.9692465402357765, "grad_norm": 0.8740857015603026, "learning_rate": 1.0986367959098517e-05, "loss": 0.464, "step": 1891 }, { "epoch": 0.9697590978985136, "grad_norm": 0.8130559060699794, "learning_rate": 1.0978105800124072e-05, "loss": 0.4386, "step": 1892 }, { "epoch": 0.9702716555612506, "grad_norm": 0.8517345000784949, "learning_rate": 1.0969842966958087e-05, "loss": 0.4604, "step": 1893 }, { "epoch": 0.9707842132239877, "grad_norm": 0.7664031056053108, "learning_rate": 1.0961579465295987e-05, "loss": 0.3613, "step": 1894 }, { "epoch": 0.9712967708867247, "grad_norm": 0.8302128570793503, "learning_rate": 1.0953315300833665e-05, "loss": 0.4631, "step": 1895 }, { "epoch": 0.9718093285494618, "grad_norm": 0.7854543197915559, "learning_rate": 1.0945050479267472e-05, "loss": 0.4562, "step": 1896 }, { "epoch": 0.9723218862121988, "grad_norm": 0.8316239543810343, "learning_rate": 1.0936785006294197e-05, "loss": 0.3901, "step": 1897 }, { "epoch": 0.9728344438749359, "grad_norm": 0.8015135104482558, "learning_rate": 1.0928518887611099e-05, "loss": 0.3736, "step": 1898 }, { "epoch": 0.973347001537673, "grad_norm": 0.8421292480025815, "learning_rate": 1.092025212891587e-05, "loss": 0.4232, "step": 1899 }, { "epoch": 0.97385955920041, "grad_norm": 0.8362430459193327, "learning_rate": 1.0911984735906635e-05, "loss": 0.4295, "step": 1900 }, { "epoch": 0.9743721168631471, "grad_norm": 0.7724105034219764, "learning_rate": 1.090371671428198e-05, "loss": 0.3943, "step": 1901 }, { "epoch": 0.9748846745258841, "grad_norm": 0.8681403879034132, "learning_rate": 1.0895448069740902e-05, "loss": 0.4378, "step": 1902 }, { "epoch": 0.9753972321886212, "grad_norm": 0.8902875951208327, "learning_rate": 1.0887178807982837e-05, "loss": 0.4656, "step": 1903 }, { "epoch": 0.9759097898513582, "grad_norm": 0.8488422096230968, "learning_rate": 1.0878908934707646e-05, "loss": 0.4219, "step": 1904 }, { "epoch": 0.9764223475140953, "grad_norm": 0.9340226595043509, "learning_rate": 1.0870638455615613e-05, "loss": 0.4423, "step": 1905 }, { "epoch": 0.9769349051768323, "grad_norm": 0.8350871031115393, "learning_rate": 1.0862367376407433e-05, "loss": 0.4501, "step": 1906 }, { "epoch": 0.9774474628395694, "grad_norm": 0.8303743635179981, "learning_rate": 1.0854095702784222e-05, "loss": 0.4199, "step": 1907 }, { "epoch": 0.9779600205023065, "grad_norm": 0.7860409370731393, "learning_rate": 1.08458234404475e-05, "loss": 0.4091, "step": 1908 }, { "epoch": 0.9784725781650435, "grad_norm": 0.8512278224345876, "learning_rate": 1.08375505950992e-05, "loss": 0.403, "step": 1909 }, { "epoch": 0.9789851358277806, "grad_norm": 0.8224234930932388, "learning_rate": 1.0829277172441648e-05, "loss": 0.4357, "step": 1910 }, { "epoch": 0.9794976934905176, "grad_norm": 0.8707713509747792, "learning_rate": 1.0821003178177572e-05, "loss": 0.4302, "step": 1911 }, { "epoch": 0.9800102511532548, "grad_norm": 0.7908970618475607, "learning_rate": 1.0812728618010098e-05, "loss": 0.4564, "step": 1912 }, { "epoch": 0.9805228088159919, "grad_norm": 0.7716734654464676, "learning_rate": 1.0804453497642734e-05, "loss": 0.4294, "step": 1913 }, { "epoch": 0.9810353664787289, "grad_norm": 0.8400743538553918, "learning_rate": 1.0796177822779384e-05, "loss": 0.385, "step": 1914 }, { "epoch": 0.981547924141466, "grad_norm": 0.8080037643228745, "learning_rate": 1.0787901599124322e-05, "loss": 0.4296, "step": 1915 }, { "epoch": 0.982060481804203, "grad_norm": 0.8472210533233895, "learning_rate": 1.0779624832382211e-05, "loss": 0.4747, "step": 1916 }, { "epoch": 0.9825730394669401, "grad_norm": 0.8121441727454356, "learning_rate": 1.0771347528258084e-05, "loss": 0.3838, "step": 1917 }, { "epoch": 0.9830855971296771, "grad_norm": 0.8174949957635564, "learning_rate": 1.0763069692457346e-05, "loss": 0.3721, "step": 1918 }, { "epoch": 0.9835981547924142, "grad_norm": 0.8496507784832368, "learning_rate": 1.0754791330685763e-05, "loss": 0.4508, "step": 1919 }, { "epoch": 0.9841107124551512, "grad_norm": 0.846965163567813, "learning_rate": 1.0746512448649473e-05, "loss": 0.435, "step": 1920 }, { "epoch": 0.9846232701178883, "grad_norm": 0.798065755877446, "learning_rate": 1.073823305205496e-05, "loss": 0.4225, "step": 1921 }, { "epoch": 0.9851358277806254, "grad_norm": 0.8410327585939353, "learning_rate": 1.0729953146609076e-05, "loss": 0.4428, "step": 1922 }, { "epoch": 0.9856483854433624, "grad_norm": 0.8710683222214801, "learning_rate": 1.0721672738019015e-05, "loss": 0.4377, "step": 1923 }, { "epoch": 0.9861609431060995, "grad_norm": 0.8160565020311591, "learning_rate": 1.0713391831992324e-05, "loss": 0.4057, "step": 1924 }, { "epoch": 0.9866735007688365, "grad_norm": 0.8563406531333622, "learning_rate": 1.0705110434236888e-05, "loss": 0.4115, "step": 1925 }, { "epoch": 0.9871860584315736, "grad_norm": 0.8825249148675487, "learning_rate": 1.0696828550460928e-05, "loss": 0.4169, "step": 1926 }, { "epoch": 0.9876986160943106, "grad_norm": 0.8225781172951415, "learning_rate": 1.0688546186373009e-05, "loss": 0.4161, "step": 1927 }, { "epoch": 0.9882111737570477, "grad_norm": 0.9184779354475355, "learning_rate": 1.0680263347682021e-05, "loss": 0.4148, "step": 1928 }, { "epoch": 0.9887237314197848, "grad_norm": 0.9034558408166614, "learning_rate": 1.067198004009718e-05, "loss": 0.4594, "step": 1929 }, { "epoch": 0.9892362890825218, "grad_norm": 0.8763916629708924, "learning_rate": 1.0663696269328034e-05, "loss": 0.4544, "step": 1930 }, { "epoch": 0.9897488467452589, "grad_norm": 0.8602618077554751, "learning_rate": 1.0655412041084438e-05, "loss": 0.4562, "step": 1931 }, { "epoch": 0.9902614044079959, "grad_norm": 0.8527321312754723, "learning_rate": 1.0647127361076573e-05, "loss": 0.4219, "step": 1932 }, { "epoch": 0.990773962070733, "grad_norm": 0.7780180027189804, "learning_rate": 1.0638842235014923e-05, "loss": 0.3639, "step": 1933 }, { "epoch": 0.99128651973347, "grad_norm": 0.8313228688519184, "learning_rate": 1.0630556668610286e-05, "loss": 0.4347, "step": 1934 }, { "epoch": 0.9917990773962071, "grad_norm": 0.8251843736004358, "learning_rate": 1.0622270667573757e-05, "loss": 0.4148, "step": 1935 }, { "epoch": 0.9923116350589442, "grad_norm": 0.8140060238805296, "learning_rate": 1.061398423761674e-05, "loss": 0.4754, "step": 1936 }, { "epoch": 0.9928241927216812, "grad_norm": 0.8005209442841978, "learning_rate": 1.0605697384450922e-05, "loss": 0.3754, "step": 1937 }, { "epoch": 0.9933367503844183, "grad_norm": 0.8080793103083053, "learning_rate": 1.059741011378829e-05, "loss": 0.4226, "step": 1938 }, { "epoch": 0.9938493080471553, "grad_norm": 0.8610779462594266, "learning_rate": 1.0589122431341129e-05, "loss": 0.4, "step": 1939 }, { "epoch": 0.9943618657098924, "grad_norm": 0.8731870600760537, "learning_rate": 1.0580834342821978e-05, "loss": 0.4386, "step": 1940 }, { "epoch": 0.9948744233726294, "grad_norm": 0.8240207207389799, "learning_rate": 1.0572545853943688e-05, "loss": 0.4676, "step": 1941 }, { "epoch": 0.9953869810353665, "grad_norm": 0.7938600564164977, "learning_rate": 1.0564256970419367e-05, "loss": 0.4128, "step": 1942 }, { "epoch": 0.9958995386981035, "grad_norm": 0.7861141381516156, "learning_rate": 1.0555967697962399e-05, "loss": 0.4264, "step": 1943 }, { "epoch": 0.9964120963608406, "grad_norm": 0.8575094033529737, "learning_rate": 1.0547678042286435e-05, "loss": 0.4692, "step": 1944 }, { "epoch": 0.9969246540235777, "grad_norm": 0.931490653760892, "learning_rate": 1.0539388009105404e-05, "loss": 0.4543, "step": 1945 }, { "epoch": 0.9974372116863147, "grad_norm": 0.8759651156042181, "learning_rate": 1.0531097604133473e-05, "loss": 0.4194, "step": 1946 }, { "epoch": 0.9979497693490518, "grad_norm": 0.7720346629895635, "learning_rate": 1.0522806833085076e-05, "loss": 0.4325, "step": 1947 }, { "epoch": 0.9984623270117888, "grad_norm": 0.7772821026309992, "learning_rate": 1.0514515701674904e-05, "loss": 0.4037, "step": 1948 }, { "epoch": 0.9989748846745259, "grad_norm": 0.9264207824054832, "learning_rate": 1.0506224215617887e-05, "loss": 0.4343, "step": 1949 }, { "epoch": 0.9994874423372629, "grad_norm": 0.7570648501117637, "learning_rate": 1.0497932380629207e-05, "loss": 0.4063, "step": 1950 }, { "epoch": 1.0, "grad_norm": 0.8195801855508342, "learning_rate": 1.0489640202424284e-05, "loss": 0.4549, "step": 1951 }, { "epoch": 1.0005125576627372, "grad_norm": 0.8635272151702572, "learning_rate": 1.0481347686718775e-05, "loss": 0.3025, "step": 1952 }, { "epoch": 1.0010251153254741, "grad_norm": 0.9217948504898674, "learning_rate": 1.0473054839228562e-05, "loss": 0.3339, "step": 1953 }, { "epoch": 1.0015376729882113, "grad_norm": 0.8627968301321738, "learning_rate": 1.0464761665669771e-05, "loss": 0.3282, "step": 1954 }, { "epoch": 1.0020502306509482, "grad_norm": 0.8105044223785929, "learning_rate": 1.045646817175874e-05, "loss": 0.2775, "step": 1955 }, { "epoch": 1.0025627883136854, "grad_norm": 0.8503085322849622, "learning_rate": 1.0448174363212029e-05, "loss": 0.3352, "step": 1956 }, { "epoch": 1.0030753459764223, "grad_norm": 0.8193774254613065, "learning_rate": 1.0439880245746428e-05, "loss": 0.3153, "step": 1957 }, { "epoch": 1.0035879036391595, "grad_norm": 0.7742483355120273, "learning_rate": 1.0431585825078916e-05, "loss": 0.2942, "step": 1958 }, { "epoch": 1.0041004613018965, "grad_norm": 0.8454855460303531, "learning_rate": 1.0423291106926704e-05, "loss": 0.2901, "step": 1959 }, { "epoch": 1.0046130189646336, "grad_norm": 0.9188100740859731, "learning_rate": 1.0414996097007199e-05, "loss": 0.2728, "step": 1960 }, { "epoch": 1.0051255766273706, "grad_norm": 1.0230002431029164, "learning_rate": 1.0406700801038006e-05, "loss": 0.281, "step": 1961 }, { "epoch": 1.0056381342901077, "grad_norm": 0.9957888990981277, "learning_rate": 1.0398405224736927e-05, "loss": 0.277, "step": 1962 }, { "epoch": 1.0061506919528447, "grad_norm": 0.8384557069816917, "learning_rate": 1.039010937382197e-05, "loss": 0.272, "step": 1963 }, { "epoch": 1.0066632496155818, "grad_norm": 1.0218920722551783, "learning_rate": 1.0381813254011317e-05, "loss": 0.3119, "step": 1964 }, { "epoch": 1.0071758072783188, "grad_norm": 0.9070216192733525, "learning_rate": 1.0373516871023336e-05, "loss": 0.3174, "step": 1965 }, { "epoch": 1.007688364941056, "grad_norm": 0.9191952365717672, "learning_rate": 1.0365220230576592e-05, "loss": 0.3088, "step": 1966 }, { "epoch": 1.008200922603793, "grad_norm": 0.8971407538747141, "learning_rate": 1.0356923338389807e-05, "loss": 0.2794, "step": 1967 }, { "epoch": 1.00871348026653, "grad_norm": 0.934532965035784, "learning_rate": 1.034862620018189e-05, "loss": 0.3244, "step": 1968 }, { "epoch": 1.009226037929267, "grad_norm": 0.8083131965423259, "learning_rate": 1.0340328821671918e-05, "loss": 0.2616, "step": 1969 }, { "epoch": 1.0097385955920042, "grad_norm": 0.8556856105201534, "learning_rate": 1.0332031208579133e-05, "loss": 0.2794, "step": 1970 }, { "epoch": 1.0102511532547411, "grad_norm": 0.9252955272045508, "learning_rate": 1.032373336662293e-05, "loss": 0.3214, "step": 1971 }, { "epoch": 1.0107637109174783, "grad_norm": 0.8547923258210367, "learning_rate": 1.0315435301522877e-05, "loss": 0.2846, "step": 1972 }, { "epoch": 1.0112762685802152, "grad_norm": 0.8665400953829908, "learning_rate": 1.0307137018998683e-05, "loss": 0.2607, "step": 1973 }, { "epoch": 1.0117888262429524, "grad_norm": 0.892038909570943, "learning_rate": 1.0298838524770212e-05, "loss": 0.2975, "step": 1974 }, { "epoch": 1.0123013839056894, "grad_norm": 0.8660450685068343, "learning_rate": 1.0290539824557474e-05, "loss": 0.2716, "step": 1975 }, { "epoch": 1.0128139415684265, "grad_norm": 0.8950838217485932, "learning_rate": 1.0282240924080625e-05, "loss": 0.2849, "step": 1976 }, { "epoch": 1.0133264992311635, "grad_norm": 1.056843545514466, "learning_rate": 1.027394182905995e-05, "loss": 0.316, "step": 1977 }, { "epoch": 1.0138390568939006, "grad_norm": 0.9480216487575772, "learning_rate": 1.0265642545215872e-05, "loss": 0.3023, "step": 1978 }, { "epoch": 1.0143516145566376, "grad_norm": 1.014449601599579, "learning_rate": 1.0257343078268948e-05, "loss": 0.3053, "step": 1979 }, { "epoch": 1.0148641722193747, "grad_norm": 0.8991715950239025, "learning_rate": 1.0249043433939857e-05, "loss": 0.2795, "step": 1980 }, { "epoch": 1.0153767298821117, "grad_norm": 0.9869276243951892, "learning_rate": 1.02407436179494e-05, "loss": 0.312, "step": 1981 }, { "epoch": 1.0158892875448489, "grad_norm": 0.8716657363656691, "learning_rate": 1.0232443636018502e-05, "loss": 0.2663, "step": 1982 }, { "epoch": 1.0164018452075858, "grad_norm": 0.8096411993889745, "learning_rate": 1.0224143493868191e-05, "loss": 0.2594, "step": 1983 }, { "epoch": 1.016914402870323, "grad_norm": 0.9345369941183842, "learning_rate": 1.021584319721962e-05, "loss": 0.322, "step": 1984 }, { "epoch": 1.01742696053306, "grad_norm": 0.8808837649266028, "learning_rate": 1.0207542751794035e-05, "loss": 0.2606, "step": 1985 }, { "epoch": 1.017939518195797, "grad_norm": 0.9074295244055929, "learning_rate": 1.0199242163312794e-05, "loss": 0.2705, "step": 1986 }, { "epoch": 1.018452075858534, "grad_norm": 0.9369180166978806, "learning_rate": 1.019094143749735e-05, "loss": 0.3251, "step": 1987 }, { "epoch": 1.0189646335212712, "grad_norm": 0.8950376117288052, "learning_rate": 1.0182640580069249e-05, "loss": 0.3191, "step": 1988 }, { "epoch": 1.0194771911840081, "grad_norm": 0.8680714038472632, "learning_rate": 1.0174339596750131e-05, "loss": 0.2556, "step": 1989 }, { "epoch": 1.0199897488467453, "grad_norm": 0.8802270788792617, "learning_rate": 1.0166038493261723e-05, "loss": 0.276, "step": 1990 }, { "epoch": 1.0205023065094823, "grad_norm": 0.8659486726363811, "learning_rate": 1.0157737275325829e-05, "loss": 0.3001, "step": 1991 }, { "epoch": 1.0210148641722194, "grad_norm": 0.8958726501970735, "learning_rate": 1.0149435948664338e-05, "loss": 0.291, "step": 1992 }, { "epoch": 1.0215274218349564, "grad_norm": 0.832224303754553, "learning_rate": 1.0141134518999216e-05, "loss": 0.2597, "step": 1993 }, { "epoch": 1.0220399794976935, "grad_norm": 0.8477968295968795, "learning_rate": 1.013283299205249e-05, "loss": 0.264, "step": 1994 }, { "epoch": 1.0225525371604305, "grad_norm": 0.9573872441034933, "learning_rate": 1.0124531373546262e-05, "loss": 0.2846, "step": 1995 }, { "epoch": 1.0230650948231677, "grad_norm": 0.965931981770065, "learning_rate": 1.0116229669202692e-05, "loss": 0.313, "step": 1996 }, { "epoch": 1.0235776524859046, "grad_norm": 0.9321457060590127, "learning_rate": 1.0107927884744014e-05, "loss": 0.3006, "step": 1997 }, { "epoch": 1.0240902101486418, "grad_norm": 0.8725587669126668, "learning_rate": 1.0099626025892491e-05, "loss": 0.2763, "step": 1998 }, { "epoch": 1.0246027678113787, "grad_norm": 0.9769573022567122, "learning_rate": 1.009132409837046e-05, "loss": 0.2672, "step": 1999 }, { "epoch": 1.0251153254741159, "grad_norm": 0.9519728956582841, "learning_rate": 1.0083022107900295e-05, "loss": 0.3154, "step": 2000 }, { "epoch": 1.0256278831368528, "grad_norm": 0.8846634439170433, "learning_rate": 1.0074720060204417e-05, "loss": 0.2922, "step": 2001 }, { "epoch": 1.02614044079959, "grad_norm": 1.0374273485220535, "learning_rate": 1.0066417961005283e-05, "loss": 0.2932, "step": 2002 }, { "epoch": 1.026652998462327, "grad_norm": 0.8318043253398597, "learning_rate": 1.0058115816025389e-05, "loss": 0.2679, "step": 2003 }, { "epoch": 1.027165556125064, "grad_norm": 0.9835339792247377, "learning_rate": 1.0049813630987262e-05, "loss": 0.3112, "step": 2004 }, { "epoch": 1.027678113787801, "grad_norm": 0.9817114227196949, "learning_rate": 1.0041511411613449e-05, "loss": 0.3219, "step": 2005 }, { "epoch": 1.0281906714505382, "grad_norm": 0.8665638258167286, "learning_rate": 1.0033209163626539e-05, "loss": 0.2421, "step": 2006 }, { "epoch": 1.0287032291132752, "grad_norm": 0.9061659096086612, "learning_rate": 1.0024906892749115e-05, "loss": 0.2777, "step": 2007 }, { "epoch": 1.0292157867760123, "grad_norm": 0.8640742793816705, "learning_rate": 1.0016604604703803e-05, "loss": 0.2977, "step": 2008 }, { "epoch": 1.0297283444387493, "grad_norm": 0.896976795400341, "learning_rate": 1.000830230521322e-05, "loss": 0.2985, "step": 2009 }, { "epoch": 1.0302409021014864, "grad_norm": 0.95969172479675, "learning_rate": 1e-05, "loss": 0.3113, "step": 2010 }, { "epoch": 1.0307534597642234, "grad_norm": 0.9352243965421809, "learning_rate": 9.99169769478678e-06, "loss": 0.3048, "step": 2011 }, { "epoch": 1.0312660174269606, "grad_norm": 0.9497795420629928, "learning_rate": 9.9833953952962e-06, "loss": 0.2998, "step": 2012 }, { "epoch": 1.0317785750896975, "grad_norm": 0.8773659970008918, "learning_rate": 9.975093107250888e-06, "loss": 0.2748, "step": 2013 }, { "epoch": 1.0322911327524347, "grad_norm": 0.92574412376663, "learning_rate": 9.966790836373465e-06, "loss": 0.3043, "step": 2014 }, { "epoch": 1.0328036904151716, "grad_norm": 0.9691865640707849, "learning_rate": 9.958488588386551e-06, "loss": 0.3195, "step": 2015 }, { "epoch": 1.0333162480779088, "grad_norm": 0.8607241836658983, "learning_rate": 9.950186369012744e-06, "loss": 0.2931, "step": 2016 }, { "epoch": 1.0338288057406457, "grad_norm": 0.8700062021983668, "learning_rate": 9.941884183974614e-06, "loss": 0.2861, "step": 2017 }, { "epoch": 1.034341363403383, "grad_norm": 0.8739968432749078, "learning_rate": 9.933582038994719e-06, "loss": 0.2947, "step": 2018 }, { "epoch": 1.0348539210661198, "grad_norm": 0.8689059919179706, "learning_rate": 9.925279939795583e-06, "loss": 0.2694, "step": 2019 }, { "epoch": 1.035366478728857, "grad_norm": 0.9329854057881297, "learning_rate": 9.916977892099707e-06, "loss": 0.2919, "step": 2020 }, { "epoch": 1.035879036391594, "grad_norm": 0.9593601320454763, "learning_rate": 9.908675901629542e-06, "loss": 0.2745, "step": 2021 }, { "epoch": 1.0363915940543311, "grad_norm": 0.839894695298487, "learning_rate": 9.90037397410751e-06, "loss": 0.2798, "step": 2022 }, { "epoch": 1.036904151717068, "grad_norm": 0.9639055205505437, "learning_rate": 9.892072115255991e-06, "loss": 0.3136, "step": 2023 }, { "epoch": 1.0374167093798052, "grad_norm": 0.920913848568969, "learning_rate": 9.88377033079731e-06, "loss": 0.2777, "step": 2024 }, { "epoch": 1.0379292670425422, "grad_norm": 0.9273189008231338, "learning_rate": 9.875468626453741e-06, "loss": 0.3121, "step": 2025 }, { "epoch": 1.0384418247052793, "grad_norm": 0.8794009200717141, "learning_rate": 9.867167007947511e-06, "loss": 0.255, "step": 2026 }, { "epoch": 1.0389543823680163, "grad_norm": 0.9637318127754265, "learning_rate": 9.858865481000787e-06, "loss": 0.2917, "step": 2027 }, { "epoch": 1.0394669400307535, "grad_norm": 0.8359174701766804, "learning_rate": 9.850564051335663e-06, "loss": 0.2805, "step": 2028 }, { "epoch": 1.0399794976934906, "grad_norm": 0.9546077476085436, "learning_rate": 9.842262724674173e-06, "loss": 0.2928, "step": 2029 }, { "epoch": 1.0404920553562276, "grad_norm": 0.94514903331378, "learning_rate": 9.833961506738282e-06, "loss": 0.2792, "step": 2030 }, { "epoch": 1.0410046130189645, "grad_norm": 0.8783539918321192, "learning_rate": 9.82566040324987e-06, "loss": 0.2785, "step": 2031 }, { "epoch": 1.0415171706817017, "grad_norm": 0.9053315666268472, "learning_rate": 9.817359419930753e-06, "loss": 0.2755, "step": 2032 }, { "epoch": 1.0420297283444389, "grad_norm": 0.8577052960411036, "learning_rate": 9.809058562502653e-06, "loss": 0.2617, "step": 2033 }, { "epoch": 1.0425422860071758, "grad_norm": 0.9061132793702658, "learning_rate": 9.80075783668721e-06, "loss": 0.2903, "step": 2034 }, { "epoch": 1.043054843669913, "grad_norm": 0.9237980365177615, "learning_rate": 9.792457248205967e-06, "loss": 0.2764, "step": 2035 }, { "epoch": 1.04356740133265, "grad_norm": 1.0199709696098949, "learning_rate": 9.784156802780385e-06, "loss": 0.3097, "step": 2036 }, { "epoch": 1.044079958995387, "grad_norm": 0.9661883986262496, "learning_rate": 9.77585650613181e-06, "loss": 0.32, "step": 2037 }, { "epoch": 1.044592516658124, "grad_norm": 0.8815062572531234, "learning_rate": 9.767556363981503e-06, "loss": 0.2632, "step": 2038 }, { "epoch": 1.0451050743208612, "grad_norm": 0.917998378309329, "learning_rate": 9.759256382050602e-06, "loss": 0.3233, "step": 2039 }, { "epoch": 1.0456176319835981, "grad_norm": 0.9179146558295358, "learning_rate": 9.750956566060145e-06, "loss": 0.3019, "step": 2040 }, { "epoch": 1.0461301896463353, "grad_norm": 0.9198563159940644, "learning_rate": 9.742656921731057e-06, "loss": 0.2917, "step": 2041 }, { "epoch": 1.0466427473090723, "grad_norm": 0.8819454573121396, "learning_rate": 9.734357454784131e-06, "loss": 0.2918, "step": 2042 }, { "epoch": 1.0471553049718094, "grad_norm": 0.9354160259668082, "learning_rate": 9.726058170940053e-06, "loss": 0.3147, "step": 2043 }, { "epoch": 1.0476678626345464, "grad_norm": 0.8569645318830078, "learning_rate": 9.717759075919375e-06, "loss": 0.2708, "step": 2044 }, { "epoch": 1.0481804202972835, "grad_norm": 0.9674030182326837, "learning_rate": 9.709460175442527e-06, "loss": 0.3102, "step": 2045 }, { "epoch": 1.0486929779600205, "grad_norm": 1.0087662226888825, "learning_rate": 9.701161475229791e-06, "loss": 0.3529, "step": 2046 }, { "epoch": 1.0492055356227576, "grad_norm": 0.8985795563599253, "learning_rate": 9.692862981001319e-06, "loss": 0.2755, "step": 2047 }, { "epoch": 1.0497180932854946, "grad_norm": 0.9062373796286358, "learning_rate": 9.684564698477128e-06, "loss": 0.2901, "step": 2048 }, { "epoch": 1.0502306509482318, "grad_norm": 0.9317243736532417, "learning_rate": 9.676266633377073e-06, "loss": 0.3063, "step": 2049 }, { "epoch": 1.0507432086109687, "grad_norm": 0.8764695729141755, "learning_rate": 9.66796879142087e-06, "loss": 0.2796, "step": 2050 }, { "epoch": 1.0512557662737059, "grad_norm": 0.8996376871068118, "learning_rate": 9.659671178328083e-06, "loss": 0.264, "step": 2051 }, { "epoch": 1.0517683239364428, "grad_norm": 0.9194650372524013, "learning_rate": 9.651373799818114e-06, "loss": 0.2732, "step": 2052 }, { "epoch": 1.05228088159918, "grad_norm": 0.9920913499365727, "learning_rate": 9.643076661610197e-06, "loss": 0.2563, "step": 2053 }, { "epoch": 1.052793439261917, "grad_norm": 0.9402422317505063, "learning_rate": 9.634779769423412e-06, "loss": 0.3277, "step": 2054 }, { "epoch": 1.053305996924654, "grad_norm": 0.9444289746256145, "learning_rate": 9.626483128976665e-06, "loss": 0.2901, "step": 2055 }, { "epoch": 1.053818554587391, "grad_norm": 0.9139483540499164, "learning_rate": 9.618186745988688e-06, "loss": 0.2948, "step": 2056 }, { "epoch": 1.0543311122501282, "grad_norm": 0.91092257281968, "learning_rate": 9.60989062617803e-06, "loss": 0.2922, "step": 2057 }, { "epoch": 1.0548436699128652, "grad_norm": 0.9249720726977202, "learning_rate": 9.601594775263073e-06, "loss": 0.2811, "step": 2058 }, { "epoch": 1.0553562275756023, "grad_norm": 0.8663587593388189, "learning_rate": 9.593299198961999e-06, "loss": 0.2474, "step": 2059 }, { "epoch": 1.0558687852383393, "grad_norm": 0.9680616514485998, "learning_rate": 9.585003902992804e-06, "loss": 0.2925, "step": 2060 }, { "epoch": 1.0563813429010764, "grad_norm": 0.8772623238519406, "learning_rate": 9.576708893073297e-06, "loss": 0.2698, "step": 2061 }, { "epoch": 1.0568939005638134, "grad_norm": 0.8823314638154363, "learning_rate": 9.568414174921085e-06, "loss": 0.2342, "step": 2062 }, { "epoch": 1.0574064582265505, "grad_norm": 0.9098706374868295, "learning_rate": 9.560119754253578e-06, "loss": 0.26, "step": 2063 }, { "epoch": 1.0579190158892875, "grad_norm": 0.9303901834028865, "learning_rate": 9.551825636787974e-06, "loss": 0.3061, "step": 2064 }, { "epoch": 1.0584315735520247, "grad_norm": 0.9027159880116923, "learning_rate": 9.543531828241263e-06, "loss": 0.2889, "step": 2065 }, { "epoch": 1.0589441312147616, "grad_norm": 0.9386046628430162, "learning_rate": 9.535238334330234e-06, "loss": 0.3009, "step": 2066 }, { "epoch": 1.0594566888774988, "grad_norm": 0.8303371898278099, "learning_rate": 9.526945160771441e-06, "loss": 0.276, "step": 2067 }, { "epoch": 1.0599692465402357, "grad_norm": 0.8101795585530794, "learning_rate": 9.518652313281229e-06, "loss": 0.254, "step": 2068 }, { "epoch": 1.0604818042029729, "grad_norm": 0.9228114114065432, "learning_rate": 9.510359797575716e-06, "loss": 0.2977, "step": 2069 }, { "epoch": 1.0609943618657098, "grad_norm": 0.9165467532957181, "learning_rate": 9.502067619370794e-06, "loss": 0.282, "step": 2070 }, { "epoch": 1.061506919528447, "grad_norm": 0.9872260777206037, "learning_rate": 9.493775784382115e-06, "loss": 0.2941, "step": 2071 }, { "epoch": 1.062019477191184, "grad_norm": 0.8385007997754627, "learning_rate": 9.485484298325098e-06, "loss": 0.2512, "step": 2072 }, { "epoch": 1.0625320348539211, "grad_norm": 0.8819995708255198, "learning_rate": 9.477193166914926e-06, "loss": 0.2706, "step": 2073 }, { "epoch": 1.063044592516658, "grad_norm": 0.8726140173621176, "learning_rate": 9.468902395866532e-06, "loss": 0.2812, "step": 2074 }, { "epoch": 1.0635571501793952, "grad_norm": 0.9278484420668996, "learning_rate": 9.460611990894599e-06, "loss": 0.2594, "step": 2075 }, { "epoch": 1.0640697078421322, "grad_norm": 0.9346227015494154, "learning_rate": 9.452321957713563e-06, "loss": 0.2996, "step": 2076 }, { "epoch": 1.0645822655048693, "grad_norm": 0.935870829367005, "learning_rate": 9.444032302037608e-06, "loss": 0.2794, "step": 2077 }, { "epoch": 1.0650948231676063, "grad_norm": 0.9347478112008265, "learning_rate": 9.435743029580638e-06, "loss": 0.2997, "step": 2078 }, { "epoch": 1.0656073808303435, "grad_norm": 0.9729367700431437, "learning_rate": 9.427454146056315e-06, "loss": 0.3116, "step": 2079 }, { "epoch": 1.0661199384930804, "grad_norm": 0.9522457968569659, "learning_rate": 9.419165657178022e-06, "loss": 0.2909, "step": 2080 }, { "epoch": 1.0666324961558176, "grad_norm": 0.9271402802094446, "learning_rate": 9.410877568658876e-06, "loss": 0.2772, "step": 2081 }, { "epoch": 1.0671450538185545, "grad_norm": 0.9510270599076441, "learning_rate": 9.402589886211711e-06, "loss": 0.283, "step": 2082 }, { "epoch": 1.0676576114812917, "grad_norm": 0.9167508749566629, "learning_rate": 9.394302615549081e-06, "loss": 0.2677, "step": 2083 }, { "epoch": 1.0681701691440286, "grad_norm": 0.8087404647799386, "learning_rate": 9.386015762383262e-06, "loss": 0.2455, "step": 2084 }, { "epoch": 1.0686827268067658, "grad_norm": 0.9675517772718017, "learning_rate": 9.377729332426247e-06, "loss": 0.3332, "step": 2085 }, { "epoch": 1.0691952844695027, "grad_norm": 0.8487392487815847, "learning_rate": 9.369443331389718e-06, "loss": 0.2799, "step": 2086 }, { "epoch": 1.06970784213224, "grad_norm": 0.8543932848850185, "learning_rate": 9.361157764985079e-06, "loss": 0.2735, "step": 2087 }, { "epoch": 1.0702203997949769, "grad_norm": 0.8363786545600811, "learning_rate": 9.35287263892343e-06, "loss": 0.2685, "step": 2088 }, { "epoch": 1.070732957457714, "grad_norm": 0.8897259698752692, "learning_rate": 9.344587958915566e-06, "loss": 0.2744, "step": 2089 }, { "epoch": 1.071245515120451, "grad_norm": 0.9116955786609977, "learning_rate": 9.336303730671968e-06, "loss": 0.2839, "step": 2090 }, { "epoch": 1.0717580727831881, "grad_norm": 0.9237072667340813, "learning_rate": 9.328019959902821e-06, "loss": 0.286, "step": 2091 }, { "epoch": 1.072270630445925, "grad_norm": 0.9121180224744477, "learning_rate": 9.319736652317984e-06, "loss": 0.2641, "step": 2092 }, { "epoch": 1.0727831881086622, "grad_norm": 0.9439674651433881, "learning_rate": 9.311453813626993e-06, "loss": 0.3046, "step": 2093 }, { "epoch": 1.0732957457713992, "grad_norm": 0.8790265559642066, "learning_rate": 9.303171449539074e-06, "loss": 0.2718, "step": 2094 }, { "epoch": 1.0738083034341364, "grad_norm": 0.9236002903254996, "learning_rate": 9.294889565763119e-06, "loss": 0.2474, "step": 2095 }, { "epoch": 1.0743208610968733, "grad_norm": 1.0639538708143301, "learning_rate": 9.286608168007678e-06, "loss": 0.2901, "step": 2096 }, { "epoch": 1.0748334187596105, "grad_norm": 0.8862105629720758, "learning_rate": 9.278327261980986e-06, "loss": 0.2696, "step": 2097 }, { "epoch": 1.0753459764223474, "grad_norm": 0.8478058229649589, "learning_rate": 9.270046853390924e-06, "loss": 0.2535, "step": 2098 }, { "epoch": 1.0758585340850846, "grad_norm": 0.899555529920792, "learning_rate": 9.261766947945043e-06, "loss": 0.2872, "step": 2099 }, { "epoch": 1.0763710917478215, "grad_norm": 1.0092133645494958, "learning_rate": 9.25348755135053e-06, "loss": 0.2894, "step": 2100 }, { "epoch": 1.0768836494105587, "grad_norm": 1.0511872209624065, "learning_rate": 9.245208669314238e-06, "loss": 0.2961, "step": 2101 }, { "epoch": 1.0773962070732956, "grad_norm": 0.8568070229199723, "learning_rate": 9.236930307542654e-06, "loss": 0.259, "step": 2102 }, { "epoch": 1.0779087647360328, "grad_norm": 0.8451132017345234, "learning_rate": 9.228652471741917e-06, "loss": 0.2559, "step": 2103 }, { "epoch": 1.07842132239877, "grad_norm": 0.8834063487331374, "learning_rate": 9.22037516761779e-06, "loss": 0.259, "step": 2104 }, { "epoch": 1.078933880061507, "grad_norm": 0.8438207359961598, "learning_rate": 9.212098400875678e-06, "loss": 0.2498, "step": 2105 }, { "epoch": 1.0794464377242439, "grad_norm": 1.0327317252290256, "learning_rate": 9.203822177220621e-06, "loss": 0.3259, "step": 2106 }, { "epoch": 1.079958995386981, "grad_norm": 0.9141265294783335, "learning_rate": 9.195546502357268e-06, "loss": 0.2586, "step": 2107 }, { "epoch": 1.0804715530497182, "grad_norm": 1.002766453079845, "learning_rate": 9.187271381989906e-06, "loss": 0.3429, "step": 2108 }, { "epoch": 1.0809841107124551, "grad_norm": 0.8886403388370882, "learning_rate": 9.17899682182243e-06, "loss": 0.2595, "step": 2109 }, { "epoch": 1.081496668375192, "grad_norm": 0.9037538690531727, "learning_rate": 9.170722827558357e-06, "loss": 0.2676, "step": 2110 }, { "epoch": 1.0820092260379293, "grad_norm": 0.8922145055977269, "learning_rate": 9.162449404900803e-06, "loss": 0.2621, "step": 2111 }, { "epoch": 1.0825217837006664, "grad_norm": 0.8987010453452, "learning_rate": 9.154176559552502e-06, "loss": 0.2769, "step": 2112 }, { "epoch": 1.0830343413634034, "grad_norm": 0.9174516961993994, "learning_rate": 9.145904297215785e-06, "loss": 0.2894, "step": 2113 }, { "epoch": 1.0835468990261405, "grad_norm": 1.0454119100957016, "learning_rate": 9.13763262359257e-06, "loss": 0.3232, "step": 2114 }, { "epoch": 1.0840594566888775, "grad_norm": 0.9869257163014802, "learning_rate": 9.12936154438439e-06, "loss": 0.3197, "step": 2115 }, { "epoch": 1.0845720143516147, "grad_norm": 0.9047485858730863, "learning_rate": 9.121091065292356e-06, "loss": 0.3126, "step": 2116 }, { "epoch": 1.0850845720143516, "grad_norm": 0.9281220408254875, "learning_rate": 9.112821192017168e-06, "loss": 0.2569, "step": 2117 }, { "epoch": 1.0855971296770888, "grad_norm": 0.9281748195242557, "learning_rate": 9.104551930259101e-06, "loss": 0.2821, "step": 2118 }, { "epoch": 1.0861096873398257, "grad_norm": 0.9936396032926439, "learning_rate": 9.096283285718025e-06, "loss": 0.2903, "step": 2119 }, { "epoch": 1.0866222450025629, "grad_norm": 0.9170989784856948, "learning_rate": 9.088015264093365e-06, "loss": 0.2983, "step": 2120 }, { "epoch": 1.0871348026652998, "grad_norm": 0.8796764571885175, "learning_rate": 9.079747871084137e-06, "loss": 0.2501, "step": 2121 }, { "epoch": 1.087647360328037, "grad_norm": 0.8768398302062347, "learning_rate": 9.071481112388905e-06, "loss": 0.2832, "step": 2122 }, { "epoch": 1.088159917990774, "grad_norm": 0.9540462162649889, "learning_rate": 9.063214993705804e-06, "loss": 0.3108, "step": 2123 }, { "epoch": 1.088672475653511, "grad_norm": 0.9325157975385396, "learning_rate": 9.054949520732535e-06, "loss": 0.2947, "step": 2124 }, { "epoch": 1.089185033316248, "grad_norm": 0.9197729659770694, "learning_rate": 9.046684699166338e-06, "loss": 0.3026, "step": 2125 }, { "epoch": 1.0896975909789852, "grad_norm": 0.9311646957729861, "learning_rate": 9.038420534704015e-06, "loss": 0.3015, "step": 2126 }, { "epoch": 1.0902101486417222, "grad_norm": 0.8824326173562199, "learning_rate": 9.030157033041918e-06, "loss": 0.264, "step": 2127 }, { "epoch": 1.0907227063044593, "grad_norm": 0.9373263997812631, "learning_rate": 9.021894199875934e-06, "loss": 0.3089, "step": 2128 }, { "epoch": 1.0912352639671963, "grad_norm": 0.8788051806587647, "learning_rate": 9.013632040901485e-06, "loss": 0.3007, "step": 2129 }, { "epoch": 1.0917478216299334, "grad_norm": 0.9577628599205064, "learning_rate": 9.005370561813545e-06, "loss": 0.3262, "step": 2130 }, { "epoch": 1.0922603792926704, "grad_norm": 0.9113338817022621, "learning_rate": 8.997109768306607e-06, "loss": 0.2776, "step": 2131 }, { "epoch": 1.0927729369554076, "grad_norm": 0.9149846055879653, "learning_rate": 8.988849666074694e-06, "loss": 0.292, "step": 2132 }, { "epoch": 1.0932854946181445, "grad_norm": 0.9607068295337495, "learning_rate": 8.980590260811349e-06, "loss": 0.3054, "step": 2133 }, { "epoch": 1.0937980522808817, "grad_norm": 0.909565047683156, "learning_rate": 8.972331558209644e-06, "loss": 0.2684, "step": 2134 }, { "epoch": 1.0943106099436186, "grad_norm": 0.8775992215410722, "learning_rate": 8.964073563962158e-06, "loss": 0.2748, "step": 2135 }, { "epoch": 1.0948231676063558, "grad_norm": 0.9252593339301961, "learning_rate": 8.955816283760984e-06, "loss": 0.2919, "step": 2136 }, { "epoch": 1.0953357252690927, "grad_norm": 0.9910760767746192, "learning_rate": 8.947559723297726e-06, "loss": 0.2818, "step": 2137 }, { "epoch": 1.09584828293183, "grad_norm": 0.9387338119340496, "learning_rate": 8.939303888263485e-06, "loss": 0.304, "step": 2138 }, { "epoch": 1.0963608405945668, "grad_norm": 0.8452592469258451, "learning_rate": 8.931048784348875e-06, "loss": 0.2426, "step": 2139 }, { "epoch": 1.096873398257304, "grad_norm": 0.930413516711538, "learning_rate": 8.922794417243989e-06, "loss": 0.2568, "step": 2140 }, { "epoch": 1.097385955920041, "grad_norm": 0.8874653084433751, "learning_rate": 8.91454079263842e-06, "loss": 0.2878, "step": 2141 }, { "epoch": 1.0978985135827781, "grad_norm": 0.8461962213385626, "learning_rate": 8.906287916221259e-06, "loss": 0.2615, "step": 2142 }, { "epoch": 1.098411071245515, "grad_norm": 0.89762946336651, "learning_rate": 8.898035793681064e-06, "loss": 0.2718, "step": 2143 }, { "epoch": 1.0989236289082522, "grad_norm": 0.9086089533158436, "learning_rate": 8.889784430705881e-06, "loss": 0.2708, "step": 2144 }, { "epoch": 1.0994361865709892, "grad_norm": 0.8811585824358119, "learning_rate": 8.881533832983235e-06, "loss": 0.2842, "step": 2145 }, { "epoch": 1.0999487442337263, "grad_norm": 0.9381672653560216, "learning_rate": 8.873284006200129e-06, "loss": 0.2947, "step": 2146 }, { "epoch": 1.1004613018964633, "grad_norm": 0.8713115347226755, "learning_rate": 8.865034956043017e-06, "loss": 0.2675, "step": 2147 }, { "epoch": 1.1009738595592005, "grad_norm": 0.9705445838069379, "learning_rate": 8.856786688197827e-06, "loss": 0.3194, "step": 2148 }, { "epoch": 1.1014864172219374, "grad_norm": 0.904407964579503, "learning_rate": 8.848539208349956e-06, "loss": 0.2927, "step": 2149 }, { "epoch": 1.1019989748846746, "grad_norm": 0.953642147122254, "learning_rate": 8.840292522184247e-06, "loss": 0.3055, "step": 2150 }, { "epoch": 1.1025115325474115, "grad_norm": 0.8911973610216203, "learning_rate": 8.832046635384996e-06, "loss": 0.2824, "step": 2151 }, { "epoch": 1.1030240902101487, "grad_norm": 0.9134344733635279, "learning_rate": 8.823801553635955e-06, "loss": 0.2955, "step": 2152 }, { "epoch": 1.1035366478728856, "grad_norm": 0.8793778496523196, "learning_rate": 8.81555728262032e-06, "loss": 0.2769, "step": 2153 }, { "epoch": 1.1040492055356228, "grad_norm": 0.9056179530274282, "learning_rate": 8.807313828020715e-06, "loss": 0.2969, "step": 2154 }, { "epoch": 1.1045617631983597, "grad_norm": 0.9413425278252061, "learning_rate": 8.799071195519224e-06, "loss": 0.3271, "step": 2155 }, { "epoch": 1.105074320861097, "grad_norm": 1.0031727481789867, "learning_rate": 8.790829390797342e-06, "loss": 0.3007, "step": 2156 }, { "epoch": 1.1055868785238339, "grad_norm": 0.8782484088950964, "learning_rate": 8.782588419536015e-06, "loss": 0.2756, "step": 2157 }, { "epoch": 1.106099436186571, "grad_norm": 0.9547798043869148, "learning_rate": 8.774348287415589e-06, "loss": 0.2596, "step": 2158 }, { "epoch": 1.106611993849308, "grad_norm": 1.0025776773101784, "learning_rate": 8.766109000115857e-06, "loss": 0.3432, "step": 2159 }, { "epoch": 1.1071245515120451, "grad_norm": 0.8574500267586509, "learning_rate": 8.757870563316018e-06, "loss": 0.2484, "step": 2160 }, { "epoch": 1.107637109174782, "grad_norm": 0.9448419928270674, "learning_rate": 8.749632982694675e-06, "loss": 0.271, "step": 2161 }, { "epoch": 1.1081496668375193, "grad_norm": 0.9964283805575198, "learning_rate": 8.74139626392986e-06, "loss": 0.313, "step": 2162 }, { "epoch": 1.1086622245002562, "grad_norm": 0.8915978821611265, "learning_rate": 8.733160412698996e-06, "loss": 0.2888, "step": 2163 }, { "epoch": 1.1091747821629934, "grad_norm": 0.8892260636441724, "learning_rate": 8.724925434678923e-06, "loss": 0.2685, "step": 2164 }, { "epoch": 1.1096873398257303, "grad_norm": 0.9942857383105913, "learning_rate": 8.716691335545862e-06, "loss": 0.2943, "step": 2165 }, { "epoch": 1.1101998974884675, "grad_norm": 0.8978569026669411, "learning_rate": 8.708458120975436e-06, "loss": 0.2962, "step": 2166 }, { "epoch": 1.1107124551512044, "grad_norm": 0.9121160552707436, "learning_rate": 8.700225796642663e-06, "loss": 0.2716, "step": 2167 }, { "epoch": 1.1112250128139416, "grad_norm": 0.9417346049459188, "learning_rate": 8.691994368221942e-06, "loss": 0.2832, "step": 2168 }, { "epoch": 1.1117375704766785, "grad_norm": 0.8949232779657615, "learning_rate": 8.683763841387051e-06, "loss": 0.2811, "step": 2169 }, { "epoch": 1.1122501281394157, "grad_norm": 0.9237134733532485, "learning_rate": 8.675534221811156e-06, "loss": 0.2863, "step": 2170 }, { "epoch": 1.1127626858021527, "grad_norm": 0.9183045477320622, "learning_rate": 8.667305515166792e-06, "loss": 0.3069, "step": 2171 }, { "epoch": 1.1132752434648898, "grad_norm": 1.0577765080415702, "learning_rate": 8.659077727125859e-06, "loss": 0.291, "step": 2172 }, { "epoch": 1.1137878011276268, "grad_norm": 0.9420871424054871, "learning_rate": 8.650850863359636e-06, "loss": 0.279, "step": 2173 }, { "epoch": 1.114300358790364, "grad_norm": 0.96132200934483, "learning_rate": 8.64262492953876e-06, "loss": 0.3055, "step": 2174 }, { "epoch": 1.1148129164531009, "grad_norm": 0.9671299294420014, "learning_rate": 8.634399931333226e-06, "loss": 0.3431, "step": 2175 }, { "epoch": 1.115325474115838, "grad_norm": 1.0219229891941297, "learning_rate": 8.626175874412376e-06, "loss": 0.3035, "step": 2176 }, { "epoch": 1.115838031778575, "grad_norm": 0.8802034269046172, "learning_rate": 8.617952764444922e-06, "loss": 0.2609, "step": 2177 }, { "epoch": 1.1163505894413122, "grad_norm": 0.9823515051466274, "learning_rate": 8.60973060709891e-06, "loss": 0.3005, "step": 2178 }, { "epoch": 1.116863147104049, "grad_norm": 0.8827483851828075, "learning_rate": 8.601509408041722e-06, "loss": 0.2828, "step": 2179 }, { "epoch": 1.1173757047667863, "grad_norm": 0.8915871525938975, "learning_rate": 8.593289172940102e-06, "loss": 0.2922, "step": 2180 }, { "epoch": 1.1178882624295232, "grad_norm": 0.897235212885709, "learning_rate": 8.58506990746011e-06, "loss": 0.2889, "step": 2181 }, { "epoch": 1.1184008200922604, "grad_norm": 0.9249664453549916, "learning_rate": 8.576851617267151e-06, "loss": 0.2835, "step": 2182 }, { "epoch": 1.1189133777549976, "grad_norm": 0.876114712589286, "learning_rate": 8.568634308025945e-06, "loss": 0.2608, "step": 2183 }, { "epoch": 1.1194259354177345, "grad_norm": 0.9364071719468672, "learning_rate": 8.560417985400541e-06, "loss": 0.2861, "step": 2184 }, { "epoch": 1.1199384930804714, "grad_norm": 0.9044581122210542, "learning_rate": 8.552202655054315e-06, "loss": 0.2767, "step": 2185 }, { "epoch": 1.1204510507432086, "grad_norm": 0.9297887005390414, "learning_rate": 8.543988322649954e-06, "loss": 0.293, "step": 2186 }, { "epoch": 1.1209636084059458, "grad_norm": 0.8819013977382089, "learning_rate": 8.535774993849449e-06, "loss": 0.2764, "step": 2187 }, { "epoch": 1.1214761660686827, "grad_norm": 0.9490307681674058, "learning_rate": 8.527562674314109e-06, "loss": 0.2937, "step": 2188 }, { "epoch": 1.1219887237314197, "grad_norm": 1.0233132276606984, "learning_rate": 8.519351369704553e-06, "loss": 0.3066, "step": 2189 }, { "epoch": 1.1225012813941568, "grad_norm": 0.9366623776869242, "learning_rate": 8.511141085680684e-06, "loss": 0.3015, "step": 2190 }, { "epoch": 1.123013839056894, "grad_norm": 0.8535550182434688, "learning_rate": 8.502931827901706e-06, "loss": 0.2534, "step": 2191 }, { "epoch": 1.123526396719631, "grad_norm": 0.852945623310304, "learning_rate": 8.494723602026131e-06, "loss": 0.2665, "step": 2192 }, { "epoch": 1.1240389543823681, "grad_norm": 0.870481131514147, "learning_rate": 8.486516413711743e-06, "loss": 0.3127, "step": 2193 }, { "epoch": 1.124551512045105, "grad_norm": 0.8921521637668857, "learning_rate": 8.478310268615612e-06, "loss": 0.2329, "step": 2194 }, { "epoch": 1.1250640697078422, "grad_norm": 0.9235839813425829, "learning_rate": 8.4701051723941e-06, "loss": 0.2741, "step": 2195 }, { "epoch": 1.1255766273705792, "grad_norm": 0.904039678261809, "learning_rate": 8.461901130702836e-06, "loss": 0.2937, "step": 2196 }, { "epoch": 1.1260891850333163, "grad_norm": 0.9248647968383108, "learning_rate": 8.45369814919672e-06, "loss": 0.2556, "step": 2197 }, { "epoch": 1.1266017426960533, "grad_norm": 0.9523960753886664, "learning_rate": 8.445496233529934e-06, "loss": 0.2601, "step": 2198 }, { "epoch": 1.1271143003587905, "grad_norm": 0.9615873451330212, "learning_rate": 8.437295389355912e-06, "loss": 0.3029, "step": 2199 }, { "epoch": 1.1276268580215274, "grad_norm": 0.8904558717556472, "learning_rate": 8.429095622327363e-06, "loss": 0.2486, "step": 2200 }, { "epoch": 1.1281394156842646, "grad_norm": 0.9843794478341545, "learning_rate": 8.42089693809624e-06, "loss": 0.2869, "step": 2201 }, { "epoch": 1.1286519733470015, "grad_norm": 0.9392802452571288, "learning_rate": 8.41269934231375e-06, "loss": 0.2796, "step": 2202 }, { "epoch": 1.1291645310097387, "grad_norm": 0.9120613280856169, "learning_rate": 8.404502840630363e-06, "loss": 0.286, "step": 2203 }, { "epoch": 1.1296770886724756, "grad_norm": 0.9502140066660282, "learning_rate": 8.39630743869579e-06, "loss": 0.3162, "step": 2204 }, { "epoch": 1.1301896463352128, "grad_norm": 0.9723316280949851, "learning_rate": 8.388113142158972e-06, "loss": 0.2835, "step": 2205 }, { "epoch": 1.1307022039979497, "grad_norm": 0.9598091421299012, "learning_rate": 8.3799199566681e-06, "loss": 0.286, "step": 2206 }, { "epoch": 1.131214761660687, "grad_norm": 0.9353049841342723, "learning_rate": 8.3717278878706e-06, "loss": 0.2681, "step": 2207 }, { "epoch": 1.1317273193234239, "grad_norm": 0.9121148750809184, "learning_rate": 8.363536941413121e-06, "loss": 0.2805, "step": 2208 }, { "epoch": 1.132239876986161, "grad_norm": 0.9262579162842267, "learning_rate": 8.355347122941538e-06, "loss": 0.2811, "step": 2209 }, { "epoch": 1.132752434648898, "grad_norm": 0.9858791375790169, "learning_rate": 8.34715843810096e-06, "loss": 0.3112, "step": 2210 }, { "epoch": 1.1332649923116351, "grad_norm": 0.9724511473433478, "learning_rate": 8.338970892535705e-06, "loss": 0.3186, "step": 2211 }, { "epoch": 1.133777549974372, "grad_norm": 0.8839788596392338, "learning_rate": 8.330784491889303e-06, "loss": 0.275, "step": 2212 }, { "epoch": 1.1342901076371092, "grad_norm": 0.9223353823287292, "learning_rate": 8.322599241804502e-06, "loss": 0.2896, "step": 2213 }, { "epoch": 1.1348026652998462, "grad_norm": 0.9243465505717552, "learning_rate": 8.314415147923254e-06, "loss": 0.2981, "step": 2214 }, { "epoch": 1.1353152229625834, "grad_norm": 0.8861128489634387, "learning_rate": 8.306232215886722e-06, "loss": 0.2473, "step": 2215 }, { "epoch": 1.1358277806253203, "grad_norm": 0.9482044742245072, "learning_rate": 8.298050451335249e-06, "loss": 0.2875, "step": 2216 }, { "epoch": 1.1363403382880575, "grad_norm": 0.8988646136972082, "learning_rate": 8.289869859908389e-06, "loss": 0.2597, "step": 2217 }, { "epoch": 1.1368528959507944, "grad_norm": 0.8594712970085232, "learning_rate": 8.281690447244887e-06, "loss": 0.2657, "step": 2218 }, { "epoch": 1.1373654536135316, "grad_norm": 0.9505351279079294, "learning_rate": 8.273512218982666e-06, "loss": 0.328, "step": 2219 }, { "epoch": 1.1378780112762685, "grad_norm": 0.8797885180803677, "learning_rate": 8.265335180758843e-06, "loss": 0.2892, "step": 2220 }, { "epoch": 1.1383905689390057, "grad_norm": 0.8994142517172271, "learning_rate": 8.257159338209708e-06, "loss": 0.2586, "step": 2221 }, { "epoch": 1.1389031266017426, "grad_norm": 0.9163135819868455, "learning_rate": 8.248984696970732e-06, "loss": 0.2929, "step": 2222 }, { "epoch": 1.1394156842644798, "grad_norm": 0.9731809579642713, "learning_rate": 8.24081126267655e-06, "loss": 0.3441, "step": 2223 }, { "epoch": 1.1399282419272168, "grad_norm": 0.9106811076057736, "learning_rate": 8.23263904096097e-06, "loss": 0.2562, "step": 2224 }, { "epoch": 1.140440799589954, "grad_norm": 0.9691709770852149, "learning_rate": 8.224468037456969e-06, "loss": 0.2778, "step": 2225 }, { "epoch": 1.1409533572526909, "grad_norm": 0.9421386836566774, "learning_rate": 8.216298257796677e-06, "loss": 0.2725, "step": 2226 }, { "epoch": 1.141465914915428, "grad_norm": 0.9343245832318164, "learning_rate": 8.208129707611381e-06, "loss": 0.2895, "step": 2227 }, { "epoch": 1.141978472578165, "grad_norm": 0.898934688341248, "learning_rate": 8.199962392531526e-06, "loss": 0.2859, "step": 2228 }, { "epoch": 1.1424910302409022, "grad_norm": 1.0070004839886761, "learning_rate": 8.191796318186701e-06, "loss": 0.2804, "step": 2229 }, { "epoch": 1.143003587903639, "grad_norm": 0.9270873921680957, "learning_rate": 8.183631490205636e-06, "loss": 0.2758, "step": 2230 }, { "epoch": 1.1435161455663763, "grad_norm": 0.9513279847886001, "learning_rate": 8.175467914216216e-06, "loss": 0.299, "step": 2231 }, { "epoch": 1.1440287032291132, "grad_norm": 0.9002999230472991, "learning_rate": 8.167305595845445e-06, "loss": 0.2698, "step": 2232 }, { "epoch": 1.1445412608918504, "grad_norm": 0.9367017079839578, "learning_rate": 8.159144540719475e-06, "loss": 0.2707, "step": 2233 }, { "epoch": 1.1450538185545873, "grad_norm": 0.9407465861259949, "learning_rate": 8.150984754463578e-06, "loss": 0.2493, "step": 2234 }, { "epoch": 1.1455663762173245, "grad_norm": 0.9605995714874327, "learning_rate": 8.142826242702157e-06, "loss": 0.3049, "step": 2235 }, { "epoch": 1.1460789338800614, "grad_norm": 0.8729152211642764, "learning_rate": 8.134669011058734e-06, "loss": 0.2823, "step": 2236 }, { "epoch": 1.1465914915427986, "grad_norm": 0.8591041677972868, "learning_rate": 8.126513065155944e-06, "loss": 0.2653, "step": 2237 }, { "epoch": 1.1471040492055355, "grad_norm": 0.9603551220319296, "learning_rate": 8.118358410615545e-06, "loss": 0.2994, "step": 2238 }, { "epoch": 1.1476166068682727, "grad_norm": 0.9508546356425773, "learning_rate": 8.110205053058399e-06, "loss": 0.2895, "step": 2239 }, { "epoch": 1.1481291645310097, "grad_norm": 0.9746565846306455, "learning_rate": 8.10205299810448e-06, "loss": 0.3017, "step": 2240 }, { "epoch": 1.1486417221937468, "grad_norm": 0.8955728126473811, "learning_rate": 8.093902251372854e-06, "loss": 0.2704, "step": 2241 }, { "epoch": 1.1491542798564838, "grad_norm": 0.8950697078740593, "learning_rate": 8.08575281848169e-06, "loss": 0.2939, "step": 2242 }, { "epoch": 1.149666837519221, "grad_norm": 0.882498274903209, "learning_rate": 8.077604705048258e-06, "loss": 0.2462, "step": 2243 }, { "epoch": 1.1501793951819579, "grad_norm": 0.7810595040271033, "learning_rate": 8.069457916688907e-06, "loss": 0.2221, "step": 2244 }, { "epoch": 1.150691952844695, "grad_norm": 0.9494000849990473, "learning_rate": 8.061312459019078e-06, "loss": 0.2724, "step": 2245 }, { "epoch": 1.151204510507432, "grad_norm": 0.9464876515113334, "learning_rate": 8.0531683376533e-06, "loss": 0.2934, "step": 2246 }, { "epoch": 1.1517170681701692, "grad_norm": 0.9264872333720654, "learning_rate": 8.04502555820518e-06, "loss": 0.2933, "step": 2247 }, { "epoch": 1.1522296258329061, "grad_norm": 0.9945361829927937, "learning_rate": 8.036884126287382e-06, "loss": 0.3317, "step": 2248 }, { "epoch": 1.1527421834956433, "grad_norm": 0.928667334932546, "learning_rate": 8.028744047511666e-06, "loss": 0.301, "step": 2249 }, { "epoch": 1.1532547411583802, "grad_norm": 0.9101555247201826, "learning_rate": 8.020605327488846e-06, "loss": 0.2817, "step": 2250 }, { "epoch": 1.1537672988211174, "grad_norm": 0.8609025367243007, "learning_rate": 8.012467971828803e-06, "loss": 0.2589, "step": 2251 }, { "epoch": 1.1542798564838543, "grad_norm": 0.9049517457266107, "learning_rate": 8.004331986140474e-06, "loss": 0.2924, "step": 2252 }, { "epoch": 1.1547924141465915, "grad_norm": 0.9038635308736868, "learning_rate": 7.996197376031857e-06, "loss": 0.2577, "step": 2253 }, { "epoch": 1.1553049718093287, "grad_norm": 0.9243803023908179, "learning_rate": 7.988064147110001e-06, "loss": 0.294, "step": 2254 }, { "epoch": 1.1558175294720656, "grad_norm": 0.8396830191384562, "learning_rate": 7.979932304980993e-06, "loss": 0.2477, "step": 2255 }, { "epoch": 1.1563300871348026, "grad_norm": 0.911338937499959, "learning_rate": 7.97180185524998e-06, "loss": 0.2711, "step": 2256 }, { "epoch": 1.1568426447975397, "grad_norm": 0.9197598769920532, "learning_rate": 7.963672803521136e-06, "loss": 0.2895, "step": 2257 }, { "epoch": 1.157355202460277, "grad_norm": 0.9333421310558668, "learning_rate": 7.955545155397684e-06, "loss": 0.2808, "step": 2258 }, { "epoch": 1.1578677601230138, "grad_norm": 0.9782307360730219, "learning_rate": 7.947418916481867e-06, "loss": 0.3001, "step": 2259 }, { "epoch": 1.1583803177857508, "grad_norm": 0.9098430616357251, "learning_rate": 7.939294092374961e-06, "loss": 0.278, "step": 2260 }, { "epoch": 1.158892875448488, "grad_norm": 0.923027643869934, "learning_rate": 7.931170688677272e-06, "loss": 0.2747, "step": 2261 }, { "epoch": 1.1594054331112251, "grad_norm": 0.866967432335247, "learning_rate": 7.923048710988119e-06, "loss": 0.256, "step": 2262 }, { "epoch": 1.159917990773962, "grad_norm": 1.0097601362043624, "learning_rate": 7.914928164905844e-06, "loss": 0.3003, "step": 2263 }, { "epoch": 1.160430548436699, "grad_norm": 0.8995534340830513, "learning_rate": 7.906809056027797e-06, "loss": 0.2868, "step": 2264 }, { "epoch": 1.1609431060994362, "grad_norm": 0.9867705895426101, "learning_rate": 7.898691389950348e-06, "loss": 0.2368, "step": 2265 }, { "epoch": 1.1614556637621734, "grad_norm": 0.9001544399875757, "learning_rate": 7.890575172268858e-06, "loss": 0.271, "step": 2266 }, { "epoch": 1.1619682214249103, "grad_norm": 0.8731625956144542, "learning_rate": 7.882460408577696e-06, "loss": 0.2629, "step": 2267 }, { "epoch": 1.1624807790876472, "grad_norm": 0.9407623925712038, "learning_rate": 7.874347104470234e-06, "loss": 0.2819, "step": 2268 }, { "epoch": 1.1629933367503844, "grad_norm": 0.9100588928835495, "learning_rate": 7.866235265538834e-06, "loss": 0.2857, "step": 2269 }, { "epoch": 1.1635058944131216, "grad_norm": 0.9757803882311141, "learning_rate": 7.858124897374837e-06, "loss": 0.3115, "step": 2270 }, { "epoch": 1.1640184520758585, "grad_norm": 0.915096016308577, "learning_rate": 7.850016005568593e-06, "loss": 0.328, "step": 2271 }, { "epoch": 1.1645310097385955, "grad_norm": 0.9046663573545187, "learning_rate": 7.841908595709416e-06, "loss": 0.2747, "step": 2272 }, { "epoch": 1.1650435674013326, "grad_norm": 0.9622386662949126, "learning_rate": 7.833802673385604e-06, "loss": 0.2898, "step": 2273 }, { "epoch": 1.1655561250640698, "grad_norm": 0.9857576018808558, "learning_rate": 7.825698244184432e-06, "loss": 0.2892, "step": 2274 }, { "epoch": 1.1660686827268067, "grad_norm": 0.9109850349504102, "learning_rate": 7.817595313692143e-06, "loss": 0.2893, "step": 2275 }, { "epoch": 1.166581240389544, "grad_norm": 0.9318139223908165, "learning_rate": 7.80949388749395e-06, "loss": 0.2755, "step": 2276 }, { "epoch": 1.1670937980522809, "grad_norm": 0.8444259583080727, "learning_rate": 7.801393971174024e-06, "loss": 0.2689, "step": 2277 }, { "epoch": 1.167606355715018, "grad_norm": 0.907688832014304, "learning_rate": 7.7932955703155e-06, "loss": 0.2765, "step": 2278 }, { "epoch": 1.168118913377755, "grad_norm": 0.9349096144353314, "learning_rate": 7.785198690500468e-06, "loss": 0.2805, "step": 2279 }, { "epoch": 1.1686314710404921, "grad_norm": 0.9008075485039322, "learning_rate": 7.777103337309963e-06, "loss": 0.2925, "step": 2280 }, { "epoch": 1.169144028703229, "grad_norm": 0.9412800223439903, "learning_rate": 7.76900951632398e-06, "loss": 0.2854, "step": 2281 }, { "epoch": 1.1696565863659663, "grad_norm": 0.9914998920655345, "learning_rate": 7.760917233121443e-06, "loss": 0.3217, "step": 2282 }, { "epoch": 1.1701691440287032, "grad_norm": 0.9313699948942339, "learning_rate": 7.752826493280236e-06, "loss": 0.305, "step": 2283 }, { "epoch": 1.1706817016914404, "grad_norm": 1.0576235845097304, "learning_rate": 7.744737302377156e-06, "loss": 0.3184, "step": 2284 }, { "epoch": 1.1711942593541773, "grad_norm": 0.9550711904166771, "learning_rate": 7.736649665987944e-06, "loss": 0.2969, "step": 2285 }, { "epoch": 1.1717068170169145, "grad_norm": 0.940026182557641, "learning_rate": 7.728563589687275e-06, "loss": 0.3242, "step": 2286 }, { "epoch": 1.1722193746796514, "grad_norm": 0.9405038723354966, "learning_rate": 7.720479079048741e-06, "loss": 0.2585, "step": 2287 }, { "epoch": 1.1727319323423886, "grad_norm": 1.0450627984374419, "learning_rate": 7.712396139644854e-06, "loss": 0.3229, "step": 2288 }, { "epoch": 1.1732444900051255, "grad_norm": 0.9608962467097991, "learning_rate": 7.704314777047045e-06, "loss": 0.2634, "step": 2289 }, { "epoch": 1.1737570476678627, "grad_norm": 0.8869444110723754, "learning_rate": 7.696234996825663e-06, "loss": 0.2792, "step": 2290 }, { "epoch": 1.1742696053305997, "grad_norm": 0.9410190334775955, "learning_rate": 7.688156804549956e-06, "loss": 0.2802, "step": 2291 }, { "epoch": 1.1747821629933368, "grad_norm": 0.8631799217900249, "learning_rate": 7.680080205788086e-06, "loss": 0.2613, "step": 2292 }, { "epoch": 1.1752947206560738, "grad_norm": 0.9216322162332062, "learning_rate": 7.672005206107115e-06, "loss": 0.2736, "step": 2293 }, { "epoch": 1.175807278318811, "grad_norm": 0.880009728955256, "learning_rate": 7.663931811073003e-06, "loss": 0.2647, "step": 2294 }, { "epoch": 1.1763198359815479, "grad_norm": 0.9788096560899798, "learning_rate": 7.655860026250596e-06, "loss": 0.2863, "step": 2295 }, { "epoch": 1.176832393644285, "grad_norm": 1.0148601190924598, "learning_rate": 7.647789857203644e-06, "loss": 0.3144, "step": 2296 }, { "epoch": 1.177344951307022, "grad_norm": 0.9454046444474234, "learning_rate": 7.639721309494771e-06, "loss": 0.2911, "step": 2297 }, { "epoch": 1.1778575089697592, "grad_norm": 0.9243059264143443, "learning_rate": 7.631654388685496e-06, "loss": 0.2541, "step": 2298 }, { "epoch": 1.178370066632496, "grad_norm": 0.9071638258331933, "learning_rate": 7.623589100336199e-06, "loss": 0.2664, "step": 2299 }, { "epoch": 1.1788826242952333, "grad_norm": 0.9258558198980522, "learning_rate": 7.6155254500061494e-06, "loss": 0.2783, "step": 2300 }, { "epoch": 1.1793951819579702, "grad_norm": 0.8957277414412633, "learning_rate": 7.607463443253487e-06, "loss": 0.2888, "step": 2301 }, { "epoch": 1.1799077396207074, "grad_norm": 0.9475242709023864, "learning_rate": 7.599403085635208e-06, "loss": 0.298, "step": 2302 }, { "epoch": 1.1804202972834443, "grad_norm": 0.9575396147232292, "learning_rate": 7.591344382707181e-06, "loss": 0.3173, "step": 2303 }, { "epoch": 1.1809328549461815, "grad_norm": 1.0178303495310894, "learning_rate": 7.583287340024132e-06, "loss": 0.3197, "step": 2304 }, { "epoch": 1.1814454126089184, "grad_norm": 1.0057111948838302, "learning_rate": 7.575231963139645e-06, "loss": 0.3029, "step": 2305 }, { "epoch": 1.1819579702716556, "grad_norm": 0.9765242896991104, "learning_rate": 7.567178257606147e-06, "loss": 0.2833, "step": 2306 }, { "epoch": 1.1824705279343926, "grad_norm": 0.9161584082717522, "learning_rate": 7.559126228974921e-06, "loss": 0.3005, "step": 2307 }, { "epoch": 1.1829830855971297, "grad_norm": 0.8911552151248171, "learning_rate": 7.551075882796098e-06, "loss": 0.2654, "step": 2308 }, { "epoch": 1.1834956432598667, "grad_norm": 0.9130292320378288, "learning_rate": 7.543027224618637e-06, "loss": 0.2628, "step": 2309 }, { "epoch": 1.1840082009226038, "grad_norm": 0.8855048441131406, "learning_rate": 7.534980259990341e-06, "loss": 0.2914, "step": 2310 }, { "epoch": 1.1845207585853408, "grad_norm": 0.9709003395179511, "learning_rate": 7.5269349944578454e-06, "loss": 0.3007, "step": 2311 }, { "epoch": 1.185033316248078, "grad_norm": 0.9303161471647536, "learning_rate": 7.518891433566617e-06, "loss": 0.2903, "step": 2312 }, { "epoch": 1.185545873910815, "grad_norm": 0.8997316101766031, "learning_rate": 7.510849582860936e-06, "loss": 0.2971, "step": 2313 }, { "epoch": 1.186058431573552, "grad_norm": 0.9875429215431901, "learning_rate": 7.50280944788392e-06, "loss": 0.3142, "step": 2314 }, { "epoch": 1.186570989236289, "grad_norm": 0.9625455677645237, "learning_rate": 7.49477103417749e-06, "loss": 0.2825, "step": 2315 }, { "epoch": 1.1870835468990262, "grad_norm": 0.8493340663789113, "learning_rate": 7.486734347282394e-06, "loss": 0.2651, "step": 2316 }, { "epoch": 1.1875961045617631, "grad_norm": 0.88401128486317, "learning_rate": 7.478699392738176e-06, "loss": 0.2852, "step": 2317 }, { "epoch": 1.1881086622245003, "grad_norm": 0.9347444650223842, "learning_rate": 7.470666176083193e-06, "loss": 0.3014, "step": 2318 }, { "epoch": 1.1886212198872372, "grad_norm": 0.9659525868867649, "learning_rate": 7.462634702854607e-06, "loss": 0.2803, "step": 2319 }, { "epoch": 1.1891337775499744, "grad_norm": 0.9598103044237883, "learning_rate": 7.454604978588373e-06, "loss": 0.2866, "step": 2320 }, { "epoch": 1.1896463352127113, "grad_norm": 0.9232753368792568, "learning_rate": 7.446577008819238e-06, "loss": 0.2894, "step": 2321 }, { "epoch": 1.1901588928754485, "grad_norm": 0.9292948906025112, "learning_rate": 7.438550799080746e-06, "loss": 0.2802, "step": 2322 }, { "epoch": 1.1906714505381855, "grad_norm": 0.9473931289882497, "learning_rate": 7.430526354905232e-06, "loss": 0.2798, "step": 2323 }, { "epoch": 1.1911840082009226, "grad_norm": 0.9229592353399434, "learning_rate": 7.4225036818238e-06, "loss": 0.2976, "step": 2324 }, { "epoch": 1.1916965658636596, "grad_norm": 0.8791052560283328, "learning_rate": 7.414482785366342e-06, "loss": 0.2923, "step": 2325 }, { "epoch": 1.1922091235263967, "grad_norm": 0.8765159244877604, "learning_rate": 7.40646367106153e-06, "loss": 0.2482, "step": 2326 }, { "epoch": 1.1927216811891337, "grad_norm": 0.8585702904638437, "learning_rate": 7.398446344436795e-06, "loss": 0.2781, "step": 2327 }, { "epoch": 1.1932342388518709, "grad_norm": 0.9390317570950343, "learning_rate": 7.390430811018343e-06, "loss": 0.291, "step": 2328 }, { "epoch": 1.1937467965146078, "grad_norm": 0.9637682101599292, "learning_rate": 7.382417076331148e-06, "loss": 0.2574, "step": 2329 }, { "epoch": 1.194259354177345, "grad_norm": 1.041670524564976, "learning_rate": 7.3744051458989395e-06, "loss": 0.3319, "step": 2330 }, { "epoch": 1.194771911840082, "grad_norm": 0.9854240682879544, "learning_rate": 7.3663950252441974e-06, "loss": 0.2938, "step": 2331 }, { "epoch": 1.195284469502819, "grad_norm": 0.9368486453128779, "learning_rate": 7.358386719888166e-06, "loss": 0.2862, "step": 2332 }, { "epoch": 1.1957970271655562, "grad_norm": 0.9264999047589468, "learning_rate": 7.35038023535083e-06, "loss": 0.2726, "step": 2333 }, { "epoch": 1.1963095848282932, "grad_norm": 0.8959401896937965, "learning_rate": 7.342375577150928e-06, "loss": 0.2568, "step": 2334 }, { "epoch": 1.1968221424910301, "grad_norm": 0.9529508121664079, "learning_rate": 7.334372750805926e-06, "loss": 0.2761, "step": 2335 }, { "epoch": 1.1973347001537673, "grad_norm": 0.9077755078262069, "learning_rate": 7.326371761832035e-06, "loss": 0.2928, "step": 2336 }, { "epoch": 1.1978472578165045, "grad_norm": 0.9733482163355537, "learning_rate": 7.318372615744207e-06, "loss": 0.2838, "step": 2337 }, { "epoch": 1.1983598154792414, "grad_norm": 0.9025174607423373, "learning_rate": 7.310375318056107e-06, "loss": 0.2634, "step": 2338 }, { "epoch": 1.1988723731419784, "grad_norm": 0.8382823060694212, "learning_rate": 7.30237987428014e-06, "loss": 0.2479, "step": 2339 }, { "epoch": 1.1993849308047155, "grad_norm": 0.8580222227240415, "learning_rate": 7.294386289927425e-06, "loss": 0.2758, "step": 2340 }, { "epoch": 1.1998974884674527, "grad_norm": 0.9551790603652737, "learning_rate": 7.286394570507811e-06, "loss": 0.2928, "step": 2341 }, { "epoch": 1.2004100461301896, "grad_norm": 0.9297649490993279, "learning_rate": 7.278404721529843e-06, "loss": 0.2837, "step": 2342 }, { "epoch": 1.2009226037929266, "grad_norm": 0.9690241221829934, "learning_rate": 7.270416748500788e-06, "loss": 0.2829, "step": 2343 }, { "epoch": 1.2014351614556638, "grad_norm": 0.914727591911585, "learning_rate": 7.2624306569266265e-06, "loss": 0.2905, "step": 2344 }, { "epoch": 1.201947719118401, "grad_norm": 0.8644069862566369, "learning_rate": 7.254446452312027e-06, "loss": 0.2809, "step": 2345 }, { "epoch": 1.2024602767811379, "grad_norm": 0.9383561846309376, "learning_rate": 7.246464140160365e-06, "loss": 0.2846, "step": 2346 }, { "epoch": 1.2029728344438748, "grad_norm": 0.8906045909616, "learning_rate": 7.238483725973715e-06, "loss": 0.258, "step": 2347 }, { "epoch": 1.203485392106612, "grad_norm": 1.0224385112685455, "learning_rate": 7.230505215252839e-06, "loss": 0.312, "step": 2348 }, { "epoch": 1.2039979497693492, "grad_norm": 0.8401486845888705, "learning_rate": 7.2225286134971815e-06, "loss": 0.2501, "step": 2349 }, { "epoch": 1.204510507432086, "grad_norm": 0.9748016329967593, "learning_rate": 7.214553926204884e-06, "loss": 0.301, "step": 2350 }, { "epoch": 1.2050230650948233, "grad_norm": 0.9478976903687556, "learning_rate": 7.206581158872761e-06, "loss": 0.2947, "step": 2351 }, { "epoch": 1.2055356227575602, "grad_norm": 0.9162340310514702, "learning_rate": 7.198610316996303e-06, "loss": 0.2818, "step": 2352 }, { "epoch": 1.2060481804202974, "grad_norm": 0.9965584654038793, "learning_rate": 7.190641406069672e-06, "loss": 0.2761, "step": 2353 }, { "epoch": 1.2065607380830343, "grad_norm": 0.8764801418705863, "learning_rate": 7.182674431585703e-06, "loss": 0.2574, "step": 2354 }, { "epoch": 1.2070732957457715, "grad_norm": 0.9117285562552626, "learning_rate": 7.1747093990359e-06, "loss": 0.3072, "step": 2355 }, { "epoch": 1.2075858534085084, "grad_norm": 0.9279817834894647, "learning_rate": 7.166746313910413e-06, "loss": 0.2644, "step": 2356 }, { "epoch": 1.2080984110712456, "grad_norm": 0.9451924180476, "learning_rate": 7.158785181698069e-06, "loss": 0.2881, "step": 2357 }, { "epoch": 1.2086109687339825, "grad_norm": 0.8419206729236725, "learning_rate": 7.150826007886334e-06, "loss": 0.259, "step": 2358 }, { "epoch": 1.2091235263967197, "grad_norm": 0.9659898641174127, "learning_rate": 7.142868797961335e-06, "loss": 0.2992, "step": 2359 }, { "epoch": 1.2096360840594567, "grad_norm": 0.9018313125517623, "learning_rate": 7.134913557407838e-06, "loss": 0.2704, "step": 2360 }, { "epoch": 1.2101486417221938, "grad_norm": 0.9167972961228283, "learning_rate": 7.126960291709248e-06, "loss": 0.2503, "step": 2361 }, { "epoch": 1.2106611993849308, "grad_norm": 1.0267389614618654, "learning_rate": 7.119009006347625e-06, "loss": 0.3036, "step": 2362 }, { "epoch": 1.211173757047668, "grad_norm": 1.0796452311346223, "learning_rate": 7.111059706803649e-06, "loss": 0.3273, "step": 2363 }, { "epoch": 1.2116863147104049, "grad_norm": 0.9180751019618696, "learning_rate": 7.103112398556631e-06, "loss": 0.2941, "step": 2364 }, { "epoch": 1.212198872373142, "grad_norm": 0.9386396534196924, "learning_rate": 7.095167087084518e-06, "loss": 0.2882, "step": 2365 }, { "epoch": 1.212711430035879, "grad_norm": 0.9451320631071083, "learning_rate": 7.087223777863883e-06, "loss": 0.2874, "step": 2366 }, { "epoch": 1.2132239876986162, "grad_norm": 0.9771913116050941, "learning_rate": 7.079282476369902e-06, "loss": 0.3083, "step": 2367 }, { "epoch": 1.2137365453613531, "grad_norm": 0.8828861742177233, "learning_rate": 7.071343188076383e-06, "loss": 0.2609, "step": 2368 }, { "epoch": 1.2142491030240903, "grad_norm": 0.9158967685353845, "learning_rate": 7.0634059184557415e-06, "loss": 0.2502, "step": 2369 }, { "epoch": 1.2147616606868272, "grad_norm": 0.9471118349000949, "learning_rate": 7.055470672979003e-06, "loss": 0.3021, "step": 2370 }, { "epoch": 1.2152742183495644, "grad_norm": 0.9670858828894591, "learning_rate": 7.04753745711579e-06, "loss": 0.2643, "step": 2371 }, { "epoch": 1.2157867760123013, "grad_norm": 0.9496438855655908, "learning_rate": 7.039606276334336e-06, "loss": 0.2896, "step": 2372 }, { "epoch": 1.2162993336750385, "grad_norm": 0.9274948699379517, "learning_rate": 7.031677136101471e-06, "loss": 0.2765, "step": 2373 }, { "epoch": 1.2168118913377755, "grad_norm": 0.9345466631309864, "learning_rate": 7.023750041882609e-06, "loss": 0.2978, "step": 2374 }, { "epoch": 1.2173244490005126, "grad_norm": 0.8815460565577756, "learning_rate": 7.015824999141762e-06, "loss": 0.2848, "step": 2375 }, { "epoch": 1.2178370066632496, "grad_norm": 0.9943961746349081, "learning_rate": 7.007902013341525e-06, "loss": 0.2896, "step": 2376 }, { "epoch": 1.2183495643259867, "grad_norm": 0.8991386089189617, "learning_rate": 6.999981089943083e-06, "loss": 0.2735, "step": 2377 }, { "epoch": 1.2188621219887237, "grad_norm": 0.8705571686001923, "learning_rate": 6.992062234406185e-06, "loss": 0.2875, "step": 2378 }, { "epoch": 1.2193746796514608, "grad_norm": 0.9699334824160015, "learning_rate": 6.984145452189162e-06, "loss": 0.3152, "step": 2379 }, { "epoch": 1.2198872373141978, "grad_norm": 0.9695104482370266, "learning_rate": 6.976230748748919e-06, "loss": 0.3021, "step": 2380 }, { "epoch": 1.220399794976935, "grad_norm": 0.9733759001432738, "learning_rate": 6.968318129540929e-06, "loss": 0.2957, "step": 2381 }, { "epoch": 1.220912352639672, "grad_norm": 0.911568765535775, "learning_rate": 6.960407600019217e-06, "loss": 0.2871, "step": 2382 }, { "epoch": 1.221424910302409, "grad_norm": 0.9430869325466388, "learning_rate": 6.952499165636379e-06, "loss": 0.2786, "step": 2383 }, { "epoch": 1.221937467965146, "grad_norm": 0.9443018679766895, "learning_rate": 6.944592831843566e-06, "loss": 0.3222, "step": 2384 }, { "epoch": 1.2224500256278832, "grad_norm": 0.8695258342199861, "learning_rate": 6.936688604090472e-06, "loss": 0.2625, "step": 2385 }, { "epoch": 1.2229625832906201, "grad_norm": 0.8837453093024448, "learning_rate": 6.9287864878253475e-06, "loss": 0.2715, "step": 2386 }, { "epoch": 1.2234751409533573, "grad_norm": 0.9392301436077408, "learning_rate": 6.920886488494989e-06, "loss": 0.2825, "step": 2387 }, { "epoch": 1.2239876986160942, "grad_norm": 0.9448857562981949, "learning_rate": 6.9129886115447294e-06, "loss": 0.2828, "step": 2388 }, { "epoch": 1.2245002562788314, "grad_norm": 1.0352340625225915, "learning_rate": 6.905092862418434e-06, "loss": 0.3418, "step": 2389 }, { "epoch": 1.2250128139415684, "grad_norm": 0.9208887684882318, "learning_rate": 6.897199246558515e-06, "loss": 0.2693, "step": 2390 }, { "epoch": 1.2255253716043055, "grad_norm": 1.0556753501049867, "learning_rate": 6.889307769405904e-06, "loss": 0.3407, "step": 2391 }, { "epoch": 1.2260379292670425, "grad_norm": 0.9016519087992757, "learning_rate": 6.881418436400056e-06, "loss": 0.3203, "step": 2392 }, { "epoch": 1.2265504869297796, "grad_norm": 0.9784523716458988, "learning_rate": 6.873531252978959e-06, "loss": 0.3109, "step": 2393 }, { "epoch": 1.2270630445925166, "grad_norm": 1.1189543812496525, "learning_rate": 6.865646224579108e-06, "loss": 0.3315, "step": 2394 }, { "epoch": 1.2275756022552538, "grad_norm": 0.9681766192058673, "learning_rate": 6.857763356635525e-06, "loss": 0.3038, "step": 2395 }, { "epoch": 1.2280881599179907, "grad_norm": 0.9152739595519004, "learning_rate": 6.849882654581726e-06, "loss": 0.2596, "step": 2396 }, { "epoch": 1.2286007175807279, "grad_norm": 0.9642571181742866, "learning_rate": 6.8420041238497525e-06, "loss": 0.2696, "step": 2397 }, { "epoch": 1.2291132752434648, "grad_norm": 0.9121179360178152, "learning_rate": 6.834127769870134e-06, "loss": 0.2851, "step": 2398 }, { "epoch": 1.229625832906202, "grad_norm": 0.8756574856459417, "learning_rate": 6.826253598071911e-06, "loss": 0.2683, "step": 2399 }, { "epoch": 1.230138390568939, "grad_norm": 0.8809769567495143, "learning_rate": 6.818381613882612e-06, "loss": 0.2505, "step": 2400 }, { "epoch": 1.230650948231676, "grad_norm": 0.8825721656871158, "learning_rate": 6.8105118227282605e-06, "loss": 0.2688, "step": 2401 }, { "epoch": 1.231163505894413, "grad_norm": 1.0156091002742205, "learning_rate": 6.802644230033373e-06, "loss": 0.2857, "step": 2402 }, { "epoch": 1.2316760635571502, "grad_norm": 0.9352149884337064, "learning_rate": 6.79477884122094e-06, "loss": 0.3016, "step": 2403 }, { "epoch": 1.2321886212198871, "grad_norm": 0.9773231224157306, "learning_rate": 6.786915661712439e-06, "loss": 0.2714, "step": 2404 }, { "epoch": 1.2327011788826243, "grad_norm": 0.9267469217091987, "learning_rate": 6.779054696927831e-06, "loss": 0.2635, "step": 2405 }, { "epoch": 1.2332137365453613, "grad_norm": 0.8781829384093026, "learning_rate": 6.771195952285541e-06, "loss": 0.2523, "step": 2406 }, { "epoch": 1.2337262942080984, "grad_norm": 0.9229661274375351, "learning_rate": 6.763339433202462e-06, "loss": 0.2763, "step": 2407 }, { "epoch": 1.2342388518708354, "grad_norm": 0.9459562110176495, "learning_rate": 6.755485145093965e-06, "loss": 0.2836, "step": 2408 }, { "epoch": 1.2347514095335725, "grad_norm": 0.9118206039732918, "learning_rate": 6.747633093373873e-06, "loss": 0.2642, "step": 2409 }, { "epoch": 1.2352639671963095, "grad_norm": 0.9590908421566003, "learning_rate": 6.739783283454469e-06, "loss": 0.305, "step": 2410 }, { "epoch": 1.2357765248590467, "grad_norm": 0.9734184712796082, "learning_rate": 6.731935720746492e-06, "loss": 0.2809, "step": 2411 }, { "epoch": 1.2362890825217838, "grad_norm": 0.9508666836299452, "learning_rate": 6.724090410659137e-06, "loss": 0.2754, "step": 2412 }, { "epoch": 1.2368016401845208, "grad_norm": 0.8824049659290515, "learning_rate": 6.716247358600038e-06, "loss": 0.2872, "step": 2413 }, { "epoch": 1.2373141978472577, "grad_norm": 0.9281574693258311, "learning_rate": 6.708406569975274e-06, "loss": 0.3042, "step": 2414 }, { "epoch": 1.2378267555099949, "grad_norm": 0.9554920969003285, "learning_rate": 6.700568050189371e-06, "loss": 0.2918, "step": 2415 }, { "epoch": 1.238339313172732, "grad_norm": 0.9741737764136285, "learning_rate": 6.6927318046452825e-06, "loss": 0.2855, "step": 2416 }, { "epoch": 1.238851870835469, "grad_norm": 0.9108447455314622, "learning_rate": 6.684897838744403e-06, "loss": 0.2667, "step": 2417 }, { "epoch": 1.239364428498206, "grad_norm": 0.9275140514396953, "learning_rate": 6.6770661578865444e-06, "loss": 0.2532, "step": 2418 }, { "epoch": 1.239876986160943, "grad_norm": 0.9214131525021539, "learning_rate": 6.669236767469952e-06, "loss": 0.2931, "step": 2419 }, { "epoch": 1.2403895438236803, "grad_norm": 0.8893814815879154, "learning_rate": 6.6614096728912945e-06, "loss": 0.2969, "step": 2420 }, { "epoch": 1.2409021014864172, "grad_norm": 0.8891489426219981, "learning_rate": 6.6535848795456485e-06, "loss": 0.2922, "step": 2421 }, { "epoch": 1.2414146591491542, "grad_norm": 0.9300056538578293, "learning_rate": 6.645762392826509e-06, "loss": 0.3385, "step": 2422 }, { "epoch": 1.2419272168118913, "grad_norm": 0.900745187924251, "learning_rate": 6.637942218125786e-06, "loss": 0.2877, "step": 2423 }, { "epoch": 1.2424397744746285, "grad_norm": 0.9300075105895553, "learning_rate": 6.630124360833794e-06, "loss": 0.2814, "step": 2424 }, { "epoch": 1.2429523321373654, "grad_norm": 0.9278897298666837, "learning_rate": 6.622308826339237e-06, "loss": 0.2892, "step": 2425 }, { "epoch": 1.2434648898001024, "grad_norm": 0.8725218119314037, "learning_rate": 6.614495620029238e-06, "loss": 0.2689, "step": 2426 }, { "epoch": 1.2439774474628396, "grad_norm": 0.9612307564035077, "learning_rate": 6.6066847472893046e-06, "loss": 0.2815, "step": 2427 }, { "epoch": 1.2444900051255767, "grad_norm": 0.988830087484506, "learning_rate": 6.5988762135033405e-06, "loss": 0.2848, "step": 2428 }, { "epoch": 1.2450025627883137, "grad_norm": 0.90870312983941, "learning_rate": 6.591070024053623e-06, "loss": 0.2602, "step": 2429 }, { "epoch": 1.2455151204510508, "grad_norm": 0.8255048218512195, "learning_rate": 6.583266184320836e-06, "loss": 0.242, "step": 2430 }, { "epoch": 1.2460276781137878, "grad_norm": 0.9123411993959547, "learning_rate": 6.575464699684029e-06, "loss": 0.2862, "step": 2431 }, { "epoch": 1.246540235776525, "grad_norm": 0.9759822537008966, "learning_rate": 6.567665575520625e-06, "loss": 0.2598, "step": 2432 }, { "epoch": 1.247052793439262, "grad_norm": 0.8902503672639903, "learning_rate": 6.559868817206435e-06, "loss": 0.2429, "step": 2433 }, { "epoch": 1.247565351101999, "grad_norm": 0.9246508803750534, "learning_rate": 6.552074430115624e-06, "loss": 0.2572, "step": 2434 }, { "epoch": 1.248077908764736, "grad_norm": 0.9562871270783307, "learning_rate": 6.54428241962074e-06, "loss": 0.2894, "step": 2435 }, { "epoch": 1.2485904664274732, "grad_norm": 1.0410100467046937, "learning_rate": 6.53649279109267e-06, "loss": 0.2828, "step": 2436 }, { "epoch": 1.2491030240902101, "grad_norm": 0.9300320179845659, "learning_rate": 6.528705549900674e-06, "loss": 0.3, "step": 2437 }, { "epoch": 1.2496155817529473, "grad_norm": 0.9089669818554289, "learning_rate": 6.520920701412371e-06, "loss": 0.2514, "step": 2438 }, { "epoch": 1.2501281394156842, "grad_norm": 0.885923330975491, "learning_rate": 6.513138250993716e-06, "loss": 0.2695, "step": 2439 }, { "epoch": 1.2506406970784214, "grad_norm": 0.8923300923607211, "learning_rate": 6.505358204009018e-06, "loss": 0.2639, "step": 2440 }, { "epoch": 1.2511532547411583, "grad_norm": 0.8890315959885036, "learning_rate": 6.4975805658209324e-06, "loss": 0.2752, "step": 2441 }, { "epoch": 1.2516658124038955, "grad_norm": 0.918774948612781, "learning_rate": 6.489805341790456e-06, "loss": 0.2807, "step": 2442 }, { "epoch": 1.2521783700666325, "grad_norm": 0.8946376313523732, "learning_rate": 6.4820325372769126e-06, "loss": 0.2483, "step": 2443 }, { "epoch": 1.2526909277293696, "grad_norm": 0.8990159612778142, "learning_rate": 6.474262157637958e-06, "loss": 0.2636, "step": 2444 }, { "epoch": 1.2532034853921066, "grad_norm": 0.9445872596846987, "learning_rate": 6.4664942082295925e-06, "loss": 0.2981, "step": 2445 }, { "epoch": 1.2537160430548437, "grad_norm": 1.025974890996452, "learning_rate": 6.458728694406124e-06, "loss": 0.3158, "step": 2446 }, { "epoch": 1.2542286007175807, "grad_norm": 0.9445532224384073, "learning_rate": 6.450965621520187e-06, "loss": 0.273, "step": 2447 }, { "epoch": 1.2547411583803179, "grad_norm": 0.9051348287414857, "learning_rate": 6.443204994922738e-06, "loss": 0.2404, "step": 2448 }, { "epoch": 1.2552537160430548, "grad_norm": 0.9946019076075764, "learning_rate": 6.4354468199630425e-06, "loss": 0.2937, "step": 2449 }, { "epoch": 1.255766273705792, "grad_norm": 0.9924755705304664, "learning_rate": 6.427691101988673e-06, "loss": 0.2976, "step": 2450 }, { "epoch": 1.256278831368529, "grad_norm": 0.8592830528112826, "learning_rate": 6.419937846345519e-06, "loss": 0.2558, "step": 2451 }, { "epoch": 1.256791389031266, "grad_norm": 0.9170750021632489, "learning_rate": 6.412187058377762e-06, "loss": 0.2853, "step": 2452 }, { "epoch": 1.257303946694003, "grad_norm": 0.8891519624527056, "learning_rate": 6.404438743427894e-06, "loss": 0.2884, "step": 2453 }, { "epoch": 1.2578165043567402, "grad_norm": 0.8972043707004012, "learning_rate": 6.396692906836686e-06, "loss": 0.2563, "step": 2454 }, { "epoch": 1.2583290620194771, "grad_norm": 0.8573988878930028, "learning_rate": 6.388949553943216e-06, "loss": 0.2541, "step": 2455 }, { "epoch": 1.2588416196822143, "grad_norm": 0.9044219119191146, "learning_rate": 6.3812086900848456e-06, "loss": 0.2609, "step": 2456 }, { "epoch": 1.2593541773449513, "grad_norm": 0.9420601972214973, "learning_rate": 6.373470320597212e-06, "loss": 0.2889, "step": 2457 }, { "epoch": 1.2598667350076884, "grad_norm": 0.8781218164919959, "learning_rate": 6.3657344508142495e-06, "loss": 0.2703, "step": 2458 }, { "epoch": 1.2603792926704254, "grad_norm": 0.8972348640000619, "learning_rate": 6.358001086068153e-06, "loss": 0.2795, "step": 2459 }, { "epoch": 1.2608918503331625, "grad_norm": 0.9566955661322033, "learning_rate": 6.350270231689405e-06, "loss": 0.2636, "step": 2460 }, { "epoch": 1.2614044079958995, "grad_norm": 0.8799812746124863, "learning_rate": 6.342541893006746e-06, "loss": 0.2685, "step": 2461 }, { "epoch": 1.2619169656586366, "grad_norm": 0.9594353494445046, "learning_rate": 6.334816075347185e-06, "loss": 0.2893, "step": 2462 }, { "epoch": 1.2624295233213736, "grad_norm": 1.0040299522955982, "learning_rate": 6.327092784036002e-06, "loss": 0.2922, "step": 2463 }, { "epoch": 1.2629420809841108, "grad_norm": 0.8793386695199601, "learning_rate": 6.319372024396728e-06, "loss": 0.2584, "step": 2464 }, { "epoch": 1.2634546386468477, "grad_norm": 0.9149470223666678, "learning_rate": 6.311653801751145e-06, "loss": 0.2668, "step": 2465 }, { "epoch": 1.2639671963095849, "grad_norm": 0.9170291827886723, "learning_rate": 6.303938121419295e-06, "loss": 0.2676, "step": 2466 }, { "epoch": 1.2644797539723218, "grad_norm": 0.8794637613995153, "learning_rate": 6.296224988719466e-06, "loss": 0.2713, "step": 2467 }, { "epoch": 1.264992311635059, "grad_norm": 0.9128345120993007, "learning_rate": 6.288514408968183e-06, "loss": 0.2734, "step": 2468 }, { "epoch": 1.265504869297796, "grad_norm": 0.9773594245320931, "learning_rate": 6.2808063874802165e-06, "loss": 0.3121, "step": 2469 }, { "epoch": 1.266017426960533, "grad_norm": 1.0239627849482338, "learning_rate": 6.273100929568579e-06, "loss": 0.285, "step": 2470 }, { "epoch": 1.26652998462327, "grad_norm": 0.9219073801112275, "learning_rate": 6.265398040544508e-06, "loss": 0.2868, "step": 2471 }, { "epoch": 1.2670425422860072, "grad_norm": 0.9534778974987728, "learning_rate": 6.257697725717469e-06, "loss": 0.3118, "step": 2472 }, { "epoch": 1.2675550999487442, "grad_norm": 0.8960543403556862, "learning_rate": 6.249999990395162e-06, "loss": 0.2755, "step": 2473 }, { "epoch": 1.2680676576114813, "grad_norm": 0.8905378864751365, "learning_rate": 6.242304839883502e-06, "loss": 0.2816, "step": 2474 }, { "epoch": 1.2685802152742183, "grad_norm": 0.9604189418736244, "learning_rate": 6.23461227948662e-06, "loss": 0.2851, "step": 2475 }, { "epoch": 1.2690927729369554, "grad_norm": 1.0035127383336944, "learning_rate": 6.226922314506874e-06, "loss": 0.2901, "step": 2476 }, { "epoch": 1.2696053305996924, "grad_norm": 0.8678574263621285, "learning_rate": 6.219234950244816e-06, "loss": 0.2403, "step": 2477 }, { "epoch": 1.2701178882624296, "grad_norm": 0.9771719042522052, "learning_rate": 6.211550191999223e-06, "loss": 0.3078, "step": 2478 }, { "epoch": 1.2706304459251667, "grad_norm": 0.9576402064450437, "learning_rate": 6.2038680450670605e-06, "loss": 0.3012, "step": 2479 }, { "epoch": 1.2711430035879037, "grad_norm": 0.8945799003893532, "learning_rate": 6.1961885147435015e-06, "loss": 0.2697, "step": 2480 }, { "epoch": 1.2716555612506406, "grad_norm": 0.8927238072315871, "learning_rate": 6.188511606321918e-06, "loss": 0.2423, "step": 2481 }, { "epoch": 1.2721681189133778, "grad_norm": 0.8599268594601992, "learning_rate": 6.18083732509387e-06, "loss": 0.2619, "step": 2482 }, { "epoch": 1.272680676576115, "grad_norm": 0.9120279917068138, "learning_rate": 6.173165676349103e-06, "loss": 0.2513, "step": 2483 }, { "epoch": 1.273193234238852, "grad_norm": 0.9194141745460563, "learning_rate": 6.165496665375556e-06, "loss": 0.2894, "step": 2484 }, { "epoch": 1.2737057919015888, "grad_norm": 0.9297781335985346, "learning_rate": 6.1578302974593526e-06, "loss": 0.2725, "step": 2485 }, { "epoch": 1.274218349564326, "grad_norm": 0.8587768685651778, "learning_rate": 6.150166577884781e-06, "loss": 0.2565, "step": 2486 }, { "epoch": 1.2747309072270632, "grad_norm": 0.916382994940318, "learning_rate": 6.1425055119343115e-06, "loss": 0.247, "step": 2487 }, { "epoch": 1.2752434648898001, "grad_norm": 0.899027392449331, "learning_rate": 6.134847104888588e-06, "loss": 0.2866, "step": 2488 }, { "epoch": 1.275756022552537, "grad_norm": 0.9109663764901355, "learning_rate": 6.1271913620264225e-06, "loss": 0.294, "step": 2489 }, { "epoch": 1.2762685802152742, "grad_norm": 0.9732580435579081, "learning_rate": 6.119538288624778e-06, "loss": 0.2886, "step": 2490 }, { "epoch": 1.2767811378780114, "grad_norm": 0.8841671803239627, "learning_rate": 6.111887889958795e-06, "loss": 0.2903, "step": 2491 }, { "epoch": 1.2772936955407483, "grad_norm": 0.9530821099792854, "learning_rate": 6.104240171301756e-06, "loss": 0.3028, "step": 2492 }, { "epoch": 1.2778062532034853, "grad_norm": 0.9549412338300635, "learning_rate": 6.096595137925111e-06, "loss": 0.2993, "step": 2493 }, { "epoch": 1.2783188108662225, "grad_norm": 0.8951292695824293, "learning_rate": 6.088952795098442e-06, "loss": 0.267, "step": 2494 }, { "epoch": 1.2788313685289596, "grad_norm": 0.8836048850810204, "learning_rate": 6.081313148089485e-06, "loss": 0.2444, "step": 2495 }, { "epoch": 1.2793439261916966, "grad_norm": 0.9477480124761491, "learning_rate": 6.073676202164123e-06, "loss": 0.2707, "step": 2496 }, { "epoch": 1.2798564838544335, "grad_norm": 0.8935587453562054, "learning_rate": 6.066041962586367e-06, "loss": 0.2736, "step": 2497 }, { "epoch": 1.2803690415171707, "grad_norm": 0.9960760465991988, "learning_rate": 6.058410434618367e-06, "loss": 0.2933, "step": 2498 }, { "epoch": 1.2808815991799078, "grad_norm": 0.9072848150603265, "learning_rate": 6.0507816235204065e-06, "loss": 0.2585, "step": 2499 }, { "epoch": 1.2813941568426448, "grad_norm": 0.9136502825158898, "learning_rate": 6.043155534550897e-06, "loss": 0.263, "step": 2500 }, { "epoch": 1.2819067145053817, "grad_norm": 0.918600948545386, "learning_rate": 6.035532172966367e-06, "loss": 0.2881, "step": 2501 }, { "epoch": 1.282419272168119, "grad_norm": 0.940730734475594, "learning_rate": 6.027911544021465e-06, "loss": 0.28, "step": 2502 }, { "epoch": 1.282931829830856, "grad_norm": 0.869149871428628, "learning_rate": 6.020293652968969e-06, "loss": 0.2599, "step": 2503 }, { "epoch": 1.283444387493593, "grad_norm": 0.9101062360853046, "learning_rate": 6.012678505059755e-06, "loss": 0.2661, "step": 2504 }, { "epoch": 1.28395694515633, "grad_norm": 0.9242533959695692, "learning_rate": 6.00506610554281e-06, "loss": 0.2876, "step": 2505 }, { "epoch": 1.2844695028190671, "grad_norm": 0.9074574743992099, "learning_rate": 5.997456459665237e-06, "loss": 0.2808, "step": 2506 }, { "epoch": 1.2849820604818043, "grad_norm": 1.0309132518045758, "learning_rate": 5.989849572672235e-06, "loss": 0.2889, "step": 2507 }, { "epoch": 1.2854946181445412, "grad_norm": 0.8739305145195682, "learning_rate": 5.982245449807094e-06, "loss": 0.2512, "step": 2508 }, { "epoch": 1.2860071758072782, "grad_norm": 0.8285123451807908, "learning_rate": 5.9746440963112095e-06, "loss": 0.2248, "step": 2509 }, { "epoch": 1.2865197334700154, "grad_norm": 0.974211146990977, "learning_rate": 5.967045517424062e-06, "loss": 0.307, "step": 2510 }, { "epoch": 1.2870322911327525, "grad_norm": 0.8873840119239542, "learning_rate": 5.9594497183832276e-06, "loss": 0.2889, "step": 2511 }, { "epoch": 1.2875448487954895, "grad_norm": 0.9732275309199009, "learning_rate": 5.951856704424353e-06, "loss": 0.2855, "step": 2512 }, { "epoch": 1.2880574064582264, "grad_norm": 0.8381279456162986, "learning_rate": 5.944266480781173e-06, "loss": 0.245, "step": 2513 }, { "epoch": 1.2885699641209636, "grad_norm": 0.8956901300242837, "learning_rate": 5.936679052685505e-06, "loss": 0.253, "step": 2514 }, { "epoch": 1.2890825217837008, "grad_norm": 0.924469714922687, "learning_rate": 5.929094425367224e-06, "loss": 0.2776, "step": 2515 }, { "epoch": 1.2895950794464377, "grad_norm": 0.9562905509479959, "learning_rate": 5.921512604054289e-06, "loss": 0.2955, "step": 2516 }, { "epoch": 1.2901076371091749, "grad_norm": 0.8488491922948401, "learning_rate": 5.913933593972719e-06, "loss": 0.2513, "step": 2517 }, { "epoch": 1.2906201947719118, "grad_norm": 0.9524594968260232, "learning_rate": 5.906357400346596e-06, "loss": 0.2806, "step": 2518 }, { "epoch": 1.291132752434649, "grad_norm": 0.9482231270404913, "learning_rate": 5.898784028398058e-06, "loss": 0.2728, "step": 2519 }, { "epoch": 1.291645310097386, "grad_norm": 0.9835381027868075, "learning_rate": 5.8912134833473e-06, "loss": 0.2975, "step": 2520 }, { "epoch": 1.292157867760123, "grad_norm": 0.8928589943298624, "learning_rate": 5.883645770412574e-06, "loss": 0.2748, "step": 2521 }, { "epoch": 1.29267042542286, "grad_norm": 0.8875273691698113, "learning_rate": 5.876080894810167e-06, "loss": 0.2245, "step": 2522 }, { "epoch": 1.2931829830855972, "grad_norm": 0.8850799501878545, "learning_rate": 5.868518861754422e-06, "loss": 0.2722, "step": 2523 }, { "epoch": 1.2936955407483341, "grad_norm": 0.9211279492527054, "learning_rate": 5.860959676457718e-06, "loss": 0.2616, "step": 2524 }, { "epoch": 1.2942080984110713, "grad_norm": 0.8298607285422246, "learning_rate": 5.853403344130476e-06, "loss": 0.2483, "step": 2525 }, { "epoch": 1.2947206560738083, "grad_norm": 0.856124856032245, "learning_rate": 5.845849869981137e-06, "loss": 0.2245, "step": 2526 }, { "epoch": 1.2952332137365454, "grad_norm": 0.9668456593350154, "learning_rate": 5.838299259216187e-06, "loss": 0.2909, "step": 2527 }, { "epoch": 1.2957457713992824, "grad_norm": 0.9173778544193257, "learning_rate": 5.830751517040133e-06, "loss": 0.2491, "step": 2528 }, { "epoch": 1.2962583290620195, "grad_norm": 0.9748274333313001, "learning_rate": 5.823206648655503e-06, "loss": 0.2817, "step": 2529 }, { "epoch": 1.2967708867247565, "grad_norm": 0.9038705979490758, "learning_rate": 5.815664659262845e-06, "loss": 0.2562, "step": 2530 }, { "epoch": 1.2972834443874937, "grad_norm": 0.9865860252585198, "learning_rate": 5.808125554060723e-06, "loss": 0.3066, "step": 2531 }, { "epoch": 1.2977960020502306, "grad_norm": 0.9650031793175783, "learning_rate": 5.8005893382457125e-06, "loss": 0.2829, "step": 2532 }, { "epoch": 1.2983085597129678, "grad_norm": 0.9665843959253662, "learning_rate": 5.7930560170123985e-06, "loss": 0.2719, "step": 2533 }, { "epoch": 1.2988211173757047, "grad_norm": 0.8273252070982792, "learning_rate": 5.78552559555337e-06, "loss": 0.2416, "step": 2534 }, { "epoch": 1.2993336750384419, "grad_norm": 0.9150173065701611, "learning_rate": 5.777998079059215e-06, "loss": 0.3035, "step": 2535 }, { "epoch": 1.2998462327011788, "grad_norm": 0.869754268987871, "learning_rate": 5.770473472718525e-06, "loss": 0.2597, "step": 2536 }, { "epoch": 1.300358790363916, "grad_norm": 0.9662112175020203, "learning_rate": 5.762951781717879e-06, "loss": 0.2655, "step": 2537 }, { "epoch": 1.300871348026653, "grad_norm": 0.8952943218336786, "learning_rate": 5.755433011241851e-06, "loss": 0.2527, "step": 2538 }, { "epoch": 1.30138390568939, "grad_norm": 0.9317880798921929, "learning_rate": 5.747917166473e-06, "loss": 0.2635, "step": 2539 }, { "epoch": 1.301896463352127, "grad_norm": 0.8847922032199713, "learning_rate": 5.74040425259187e-06, "loss": 0.2701, "step": 2540 }, { "epoch": 1.3024090210148642, "grad_norm": 0.9724633326487746, "learning_rate": 5.7328942747769824e-06, "loss": 0.2741, "step": 2541 }, { "epoch": 1.3029215786776012, "grad_norm": 0.9506637786910939, "learning_rate": 5.725387238204831e-06, "loss": 0.2648, "step": 2542 }, { "epoch": 1.3034341363403383, "grad_norm": 1.00499968796117, "learning_rate": 5.717883148049901e-06, "loss": 0.2533, "step": 2543 }, { "epoch": 1.3039466940030753, "grad_norm": 0.9656021374600666, "learning_rate": 5.710382009484616e-06, "loss": 0.2984, "step": 2544 }, { "epoch": 1.3044592516658124, "grad_norm": 0.9055500893503854, "learning_rate": 5.702883827679397e-06, "loss": 0.2776, "step": 2545 }, { "epoch": 1.3049718093285494, "grad_norm": 0.9397091349997535, "learning_rate": 5.695388607802603e-06, "loss": 0.2644, "step": 2546 }, { "epoch": 1.3054843669912866, "grad_norm": 1.0065375022606036, "learning_rate": 5.687896355020563e-06, "loss": 0.278, "step": 2547 }, { "epoch": 1.3059969246540235, "grad_norm": 0.8965943539739205, "learning_rate": 5.680407074497558e-06, "loss": 0.2748, "step": 2548 }, { "epoch": 1.3065094823167607, "grad_norm": 0.8814188355507948, "learning_rate": 5.672920771395822e-06, "loss": 0.2663, "step": 2549 }, { "epoch": 1.3070220399794976, "grad_norm": 0.956178608690546, "learning_rate": 5.665437450875534e-06, "loss": 0.3065, "step": 2550 }, { "epoch": 1.3075345976422348, "grad_norm": 0.9327055202547829, "learning_rate": 5.657957118094818e-06, "loss": 0.2988, "step": 2551 }, { "epoch": 1.3080471553049717, "grad_norm": 0.7983864831524798, "learning_rate": 5.6504797782097386e-06, "loss": 0.2683, "step": 2552 }, { "epoch": 1.308559712967709, "grad_norm": 0.8454627215215483, "learning_rate": 5.6430054363742995e-06, "loss": 0.2622, "step": 2553 }, { "epoch": 1.3090722706304458, "grad_norm": 1.019257485815489, "learning_rate": 5.635534097740435e-06, "loss": 0.3453, "step": 2554 }, { "epoch": 1.309584828293183, "grad_norm": 0.9307590979563852, "learning_rate": 5.628065767458011e-06, "loss": 0.25, "step": 2555 }, { "epoch": 1.31009738595592, "grad_norm": 0.9177326827616586, "learning_rate": 5.620600450674819e-06, "loss": 0.2667, "step": 2556 }, { "epoch": 1.3106099436186571, "grad_norm": 0.959293284716458, "learning_rate": 5.61313815253657e-06, "loss": 0.2827, "step": 2557 }, { "epoch": 1.3111225012813943, "grad_norm": 0.9225519847699206, "learning_rate": 5.605678878186911e-06, "loss": 0.2786, "step": 2558 }, { "epoch": 1.3116350589441312, "grad_norm": 0.9379608715896298, "learning_rate": 5.598222632767374e-06, "loss": 0.2617, "step": 2559 }, { "epoch": 1.3121476166068682, "grad_norm": 0.9909257125306633, "learning_rate": 5.590769421417435e-06, "loss": 0.2674, "step": 2560 }, { "epoch": 1.3126601742696054, "grad_norm": 0.9948438452511342, "learning_rate": 5.5833192492744635e-06, "loss": 0.2634, "step": 2561 }, { "epoch": 1.3131727319323425, "grad_norm": 0.7744693547021987, "learning_rate": 5.575872121473722e-06, "loss": 0.2056, "step": 2562 }, { "epoch": 1.3136852895950795, "grad_norm": 0.8569914904738599, "learning_rate": 5.568428043148402e-06, "loss": 0.2555, "step": 2563 }, { "epoch": 1.3141978472578164, "grad_norm": 0.9386844598957251, "learning_rate": 5.560987019429575e-06, "loss": 0.2708, "step": 2564 }, { "epoch": 1.3147104049205536, "grad_norm": 0.8413445051346279, "learning_rate": 5.553549055446209e-06, "loss": 0.2594, "step": 2565 }, { "epoch": 1.3152229625832907, "grad_norm": 0.8788293166654598, "learning_rate": 5.546114156325166e-06, "loss": 0.2567, "step": 2566 }, { "epoch": 1.3157355202460277, "grad_norm": 0.8782960348257428, "learning_rate": 5.538682327191195e-06, "loss": 0.2537, "step": 2567 }, { "epoch": 1.3162480779087646, "grad_norm": 0.9420893421204548, "learning_rate": 5.531253573166928e-06, "loss": 0.2568, "step": 2568 }, { "epoch": 1.3167606355715018, "grad_norm": 0.9532911120974161, "learning_rate": 5.523827899372876e-06, "loss": 0.2602, "step": 2569 }, { "epoch": 1.317273193234239, "grad_norm": 0.9332033044717503, "learning_rate": 5.516405310927431e-06, "loss": 0.2783, "step": 2570 }, { "epoch": 1.317785750896976, "grad_norm": 0.8484990574563623, "learning_rate": 5.50898581294685e-06, "loss": 0.2485, "step": 2571 }, { "epoch": 1.3182983085597129, "grad_norm": 0.9060018646560157, "learning_rate": 5.501569410545278e-06, "loss": 0.296, "step": 2572 }, { "epoch": 1.31881086622245, "grad_norm": 0.8066472315177277, "learning_rate": 5.494156108834703e-06, "loss": 0.2351, "step": 2573 }, { "epoch": 1.3193234238851872, "grad_norm": 0.8753691145135836, "learning_rate": 5.4867459129249846e-06, "loss": 0.2437, "step": 2574 }, { "epoch": 1.3198359815479241, "grad_norm": 0.9342963880478088, "learning_rate": 5.479338827923854e-06, "loss": 0.2603, "step": 2575 }, { "epoch": 1.320348539210661, "grad_norm": 1.035633209693727, "learning_rate": 5.471934858936887e-06, "loss": 0.3128, "step": 2576 }, { "epoch": 1.3208610968733983, "grad_norm": 0.9980955728271259, "learning_rate": 5.464534011067499e-06, "loss": 0.2957, "step": 2577 }, { "epoch": 1.3213736545361354, "grad_norm": 0.9412661782320179, "learning_rate": 5.4571362894169795e-06, "loss": 0.2802, "step": 2578 }, { "epoch": 1.3218862121988724, "grad_norm": 0.9424465246998045, "learning_rate": 5.4497416990844545e-06, "loss": 0.2957, "step": 2579 }, { "epoch": 1.3223987698616093, "grad_norm": 0.9367522192996093, "learning_rate": 5.442350245166872e-06, "loss": 0.292, "step": 2580 }, { "epoch": 1.3229113275243465, "grad_norm": 0.9476417389156139, "learning_rate": 5.4349619327590474e-06, "loss": 0.297, "step": 2581 }, { "epoch": 1.3234238851870836, "grad_norm": 0.8684107916692322, "learning_rate": 5.427576766953615e-06, "loss": 0.2873, "step": 2582 }, { "epoch": 1.3239364428498206, "grad_norm": 0.8886024725914325, "learning_rate": 5.4201947528410425e-06, "loss": 0.2537, "step": 2583 }, { "epoch": 1.3244490005125575, "grad_norm": 0.9282231939005816, "learning_rate": 5.412815895509624e-06, "loss": 0.2974, "step": 2584 }, { "epoch": 1.3249615581752947, "grad_norm": 1.0107367955493758, "learning_rate": 5.405440200045482e-06, "loss": 0.2777, "step": 2585 }, { "epoch": 1.3254741158380319, "grad_norm": 0.9706716697120116, "learning_rate": 5.398067671532554e-06, "loss": 0.2849, "step": 2586 }, { "epoch": 1.3259866735007688, "grad_norm": 0.8693321041856987, "learning_rate": 5.390698315052598e-06, "loss": 0.2655, "step": 2587 }, { "epoch": 1.3264992311635058, "grad_norm": 0.8537529800940489, "learning_rate": 5.383332135685186e-06, "loss": 0.26, "step": 2588 }, { "epoch": 1.327011788826243, "grad_norm": 0.898021638676663, "learning_rate": 5.375969138507695e-06, "loss": 0.2567, "step": 2589 }, { "epoch": 1.32752434648898, "grad_norm": 0.9391746720618828, "learning_rate": 5.368609328595323e-06, "loss": 0.25, "step": 2590 }, { "epoch": 1.328036904151717, "grad_norm": 1.0044985436221368, "learning_rate": 5.3612527110210495e-06, "loss": 0.2771, "step": 2591 }, { "epoch": 1.328549461814454, "grad_norm": 0.886254924437907, "learning_rate": 5.353899290855666e-06, "loss": 0.2484, "step": 2592 }, { "epoch": 1.3290620194771912, "grad_norm": 0.9534598535885622, "learning_rate": 5.346549073167766e-06, "loss": 0.2569, "step": 2593 }, { "epoch": 1.3295745771399283, "grad_norm": 1.008789408588378, "learning_rate": 5.339202063023727e-06, "loss": 0.2975, "step": 2594 }, { "epoch": 1.3300871348026653, "grad_norm": 0.9666909010521586, "learning_rate": 5.3318582654877074e-06, "loss": 0.2863, "step": 2595 }, { "epoch": 1.3305996924654024, "grad_norm": 0.8760194234260813, "learning_rate": 5.324517685621671e-06, "loss": 0.2777, "step": 2596 }, { "epoch": 1.3311122501281394, "grad_norm": 0.8950692278503917, "learning_rate": 5.3171803284853544e-06, "loss": 0.2493, "step": 2597 }, { "epoch": 1.3316248077908766, "grad_norm": 0.9094836580246898, "learning_rate": 5.309846199136258e-06, "loss": 0.2594, "step": 2598 }, { "epoch": 1.3321373654536135, "grad_norm": 0.900512692150404, "learning_rate": 5.302515302629686e-06, "loss": 0.2516, "step": 2599 }, { "epoch": 1.3326499231163507, "grad_norm": 0.883194405498302, "learning_rate": 5.295187644018694e-06, "loss": 0.2765, "step": 2600 }, { "epoch": 1.3331624807790876, "grad_norm": 0.9344228119817731, "learning_rate": 5.287863228354113e-06, "loss": 0.311, "step": 2601 }, { "epoch": 1.3336750384418248, "grad_norm": 0.9081975164759682, "learning_rate": 5.280542060684535e-06, "loss": 0.2602, "step": 2602 }, { "epoch": 1.3341875961045617, "grad_norm": 1.017267548504276, "learning_rate": 5.273224146056315e-06, "loss": 0.3325, "step": 2603 }, { "epoch": 1.334700153767299, "grad_norm": 0.9216811388941136, "learning_rate": 5.265909489513568e-06, "loss": 0.2693, "step": 2604 }, { "epoch": 1.3352127114300358, "grad_norm": 0.926937780082936, "learning_rate": 5.25859809609816e-06, "loss": 0.2711, "step": 2605 }, { "epoch": 1.335725269092773, "grad_norm": 0.8884404623358778, "learning_rate": 5.2512899708497086e-06, "loss": 0.2837, "step": 2606 }, { "epoch": 1.33623782675551, "grad_norm": 1.0153482368510969, "learning_rate": 5.243985118805577e-06, "loss": 0.3207, "step": 2607 }, { "epoch": 1.3367503844182471, "grad_norm": 0.9621955742946706, "learning_rate": 5.2366835450008836e-06, "loss": 0.2691, "step": 2608 }, { "epoch": 1.337262942080984, "grad_norm": 0.9269819820437702, "learning_rate": 5.229385254468469e-06, "loss": 0.2714, "step": 2609 }, { "epoch": 1.3377754997437212, "grad_norm": 0.9379323461128128, "learning_rate": 5.222090252238916e-06, "loss": 0.2652, "step": 2610 }, { "epoch": 1.3382880574064582, "grad_norm": 0.9664788186132428, "learning_rate": 5.2147985433405555e-06, "loss": 0.3053, "step": 2611 }, { "epoch": 1.3388006150691953, "grad_norm": 0.8872884534418467, "learning_rate": 5.207510132799436e-06, "loss": 0.2803, "step": 2612 }, { "epoch": 1.3393131727319323, "grad_norm": 0.9046886309281978, "learning_rate": 5.20022502563932e-06, "loss": 0.2814, "step": 2613 }, { "epoch": 1.3398257303946695, "grad_norm": 0.8909318596462554, "learning_rate": 5.192943226881724e-06, "loss": 0.2835, "step": 2614 }, { "epoch": 1.3403382880574064, "grad_norm": 0.8690453219859466, "learning_rate": 5.185664741545862e-06, "loss": 0.2576, "step": 2615 }, { "epoch": 1.3408508457201436, "grad_norm": 0.8750862903283015, "learning_rate": 5.178389574648659e-06, "loss": 0.2665, "step": 2616 }, { "epoch": 1.3413634033828805, "grad_norm": 0.9383468673220686, "learning_rate": 5.171117731204775e-06, "loss": 0.2844, "step": 2617 }, { "epoch": 1.3418759610456177, "grad_norm": 0.9209080124248721, "learning_rate": 5.163849216226562e-06, "loss": 0.274, "step": 2618 }, { "epoch": 1.3423885187083546, "grad_norm": 1.011419736644076, "learning_rate": 5.156584034724081e-06, "loss": 0.3212, "step": 2619 }, { "epoch": 1.3429010763710918, "grad_norm": 0.8484081876394428, "learning_rate": 5.149322191705102e-06, "loss": 0.2517, "step": 2620 }, { "epoch": 1.3434136340338287, "grad_norm": 0.9186976378160783, "learning_rate": 5.142063692175084e-06, "loss": 0.2916, "step": 2621 }, { "epoch": 1.343926191696566, "grad_norm": 0.9701910999647313, "learning_rate": 5.134808541137183e-06, "loss": 0.293, "step": 2622 }, { "epoch": 1.3444387493593029, "grad_norm": 0.9534544052294363, "learning_rate": 5.127556743592263e-06, "loss": 0.2722, "step": 2623 }, { "epoch": 1.34495130702204, "grad_norm": 0.9992430262348463, "learning_rate": 5.120308304538852e-06, "loss": 0.3138, "step": 2624 }, { "epoch": 1.345463864684777, "grad_norm": 0.9473726486039104, "learning_rate": 5.113063228973174e-06, "loss": 0.2778, "step": 2625 }, { "epoch": 1.3459764223475141, "grad_norm": 0.9422125413554454, "learning_rate": 5.105821521889147e-06, "loss": 0.2979, "step": 2626 }, { "epoch": 1.346488980010251, "grad_norm": 1.040212678954863, "learning_rate": 5.098583188278345e-06, "loss": 0.2672, "step": 2627 }, { "epoch": 1.3470015376729882, "grad_norm": 0.9405224181334206, "learning_rate": 5.0913482331300244e-06, "loss": 0.3127, "step": 2628 }, { "epoch": 1.3475140953357252, "grad_norm": 0.9265845266512379, "learning_rate": 5.0841166614311266e-06, "loss": 0.2799, "step": 2629 }, { "epoch": 1.3480266529984624, "grad_norm": 0.9237923995664639, "learning_rate": 5.076888478166247e-06, "loss": 0.2753, "step": 2630 }, { "epoch": 1.3485392106611993, "grad_norm": 0.8909497433234325, "learning_rate": 5.069663688317638e-06, "loss": 0.2606, "step": 2631 }, { "epoch": 1.3490517683239365, "grad_norm": 0.840006052640889, "learning_rate": 5.062442296865234e-06, "loss": 0.2425, "step": 2632 }, { "epoch": 1.3495643259866734, "grad_norm": 0.9480529562615431, "learning_rate": 5.055224308786614e-06, "loss": 0.266, "step": 2633 }, { "epoch": 1.3500768836494106, "grad_norm": 0.8851992377571962, "learning_rate": 5.048009729057012e-06, "loss": 0.27, "step": 2634 }, { "epoch": 1.3505894413121475, "grad_norm": 0.9475529992448526, "learning_rate": 5.040798562649314e-06, "loss": 0.314, "step": 2635 }, { "epoch": 1.3511019989748847, "grad_norm": 0.9458004270494987, "learning_rate": 5.033590814534054e-06, "loss": 0.3104, "step": 2636 }, { "epoch": 1.3516145566376219, "grad_norm": 0.8838219706541867, "learning_rate": 5.026386489679408e-06, "loss": 0.2691, "step": 2637 }, { "epoch": 1.3521271143003588, "grad_norm": 0.9408655052062084, "learning_rate": 5.0191855930511946e-06, "loss": 0.2813, "step": 2638 }, { "epoch": 1.3526396719630958, "grad_norm": 0.944775631112022, "learning_rate": 5.011988129612868e-06, "loss": 0.2756, "step": 2639 }, { "epoch": 1.353152229625833, "grad_norm": 0.9056068277498013, "learning_rate": 5.004794104325512e-06, "loss": 0.2698, "step": 2640 }, { "epoch": 1.35366478728857, "grad_norm": 0.9691530890273554, "learning_rate": 4.997603522147856e-06, "loss": 0.2771, "step": 2641 }, { "epoch": 1.354177344951307, "grad_norm": 0.9842127707489681, "learning_rate": 4.990416388036233e-06, "loss": 0.2758, "step": 2642 }, { "epoch": 1.354689902614044, "grad_norm": 0.9053395801178422, "learning_rate": 4.983232706944613e-06, "loss": 0.2596, "step": 2643 }, { "epoch": 1.3552024602767812, "grad_norm": 0.8781843352833111, "learning_rate": 4.976052483824595e-06, "loss": 0.2431, "step": 2644 }, { "epoch": 1.3557150179395183, "grad_norm": 0.9515548683667909, "learning_rate": 4.968875723625371e-06, "loss": 0.2619, "step": 2645 }, { "epoch": 1.3562275756022553, "grad_norm": 0.9416525817675276, "learning_rate": 4.961702431293759e-06, "loss": 0.2919, "step": 2646 }, { "epoch": 1.3567401332649922, "grad_norm": 0.9096712657688694, "learning_rate": 4.954532611774195e-06, "loss": 0.2778, "step": 2647 }, { "epoch": 1.3572526909277294, "grad_norm": 1.0733796653565661, "learning_rate": 4.947366270008708e-06, "loss": 0.3177, "step": 2648 }, { "epoch": 1.3577652485904665, "grad_norm": 0.9140761301115523, "learning_rate": 4.940203410936934e-06, "loss": 0.2816, "step": 2649 }, { "epoch": 1.3582778062532035, "grad_norm": 0.8773934484479935, "learning_rate": 4.933044039496107e-06, "loss": 0.2959, "step": 2650 }, { "epoch": 1.3587903639159404, "grad_norm": 0.9246579153270607, "learning_rate": 4.925888160621064e-06, "loss": 0.2764, "step": 2651 }, { "epoch": 1.3593029215786776, "grad_norm": 0.9626422847415822, "learning_rate": 4.918735779244226e-06, "loss": 0.2975, "step": 2652 }, { "epoch": 1.3598154792414148, "grad_norm": 0.8940359962943731, "learning_rate": 4.911586900295606e-06, "loss": 0.2565, "step": 2653 }, { "epoch": 1.3603280369041517, "grad_norm": 0.9777195094070155, "learning_rate": 4.904441528702806e-06, "loss": 0.2649, "step": 2654 }, { "epoch": 1.3608405945668887, "grad_norm": 0.979486136315578, "learning_rate": 4.897299669391006e-06, "loss": 0.2953, "step": 2655 }, { "epoch": 1.3613531522296258, "grad_norm": 0.9916914719232122, "learning_rate": 4.890161327282968e-06, "loss": 0.3026, "step": 2656 }, { "epoch": 1.361865709892363, "grad_norm": 0.9037338731929311, "learning_rate": 4.883026507299028e-06, "loss": 0.2717, "step": 2657 }, { "epoch": 1.3623782675551, "grad_norm": 0.998374193044893, "learning_rate": 4.875895214357093e-06, "loss": 0.3201, "step": 2658 }, { "epoch": 1.3628908252178369, "grad_norm": 0.9279735912577414, "learning_rate": 4.868767453372649e-06, "loss": 0.269, "step": 2659 }, { "epoch": 1.363403382880574, "grad_norm": 0.9140403822039465, "learning_rate": 4.8616432292587315e-06, "loss": 0.2688, "step": 2660 }, { "epoch": 1.3639159405433112, "grad_norm": 0.8853240744341128, "learning_rate": 4.8545225469259435e-06, "loss": 0.2749, "step": 2661 }, { "epoch": 1.3644284982060482, "grad_norm": 0.8852755556561179, "learning_rate": 4.847405411282462e-06, "loss": 0.2395, "step": 2662 }, { "epoch": 1.3649410558687851, "grad_norm": 0.9135108383210703, "learning_rate": 4.8402918272339905e-06, "loss": 0.3066, "step": 2663 }, { "epoch": 1.3654536135315223, "grad_norm": 0.9266327042109095, "learning_rate": 4.833181799683812e-06, "loss": 0.2662, "step": 2664 }, { "epoch": 1.3659661711942594, "grad_norm": 0.8362141711462339, "learning_rate": 4.826075333532742e-06, "loss": 0.2351, "step": 2665 }, { "epoch": 1.3664787288569964, "grad_norm": 0.9361854041115062, "learning_rate": 4.818972433679145e-06, "loss": 0.2916, "step": 2666 }, { "epoch": 1.3669912865197333, "grad_norm": 0.8325491188909199, "learning_rate": 4.81187310501893e-06, "loss": 0.2726, "step": 2667 }, { "epoch": 1.3675038441824705, "grad_norm": 0.9949264574748581, "learning_rate": 4.804777352445542e-06, "loss": 0.3033, "step": 2668 }, { "epoch": 1.3680164018452077, "grad_norm": 1.0075559344925955, "learning_rate": 4.79768518084996e-06, "loss": 0.2826, "step": 2669 }, { "epoch": 1.3685289595079446, "grad_norm": 0.9017545434238061, "learning_rate": 4.790596595120699e-06, "loss": 0.2874, "step": 2670 }, { "epoch": 1.3690415171706816, "grad_norm": 0.9195264541814956, "learning_rate": 4.783511600143796e-06, "loss": 0.2836, "step": 2671 }, { "epoch": 1.3695540748334187, "grad_norm": 0.8684789243079722, "learning_rate": 4.776430200802819e-06, "loss": 0.2699, "step": 2672 }, { "epoch": 1.370066632496156, "grad_norm": 0.8783554470441273, "learning_rate": 4.7693524019788525e-06, "loss": 0.283, "step": 2673 }, { "epoch": 1.3705791901588928, "grad_norm": 0.914533763081523, "learning_rate": 4.762278208550505e-06, "loss": 0.2671, "step": 2674 }, { "epoch": 1.37109174782163, "grad_norm": 0.9676942993746757, "learning_rate": 4.7552076253938905e-06, "loss": 0.266, "step": 2675 }, { "epoch": 1.371604305484367, "grad_norm": 0.8622126046897903, "learning_rate": 4.7481406573826496e-06, "loss": 0.2358, "step": 2676 }, { "epoch": 1.3721168631471041, "grad_norm": 0.8719358887529197, "learning_rate": 4.741077309387921e-06, "loss": 0.2636, "step": 2677 }, { "epoch": 1.372629420809841, "grad_norm": 0.9449211290786131, "learning_rate": 4.734017586278337e-06, "loss": 0.2717, "step": 2678 }, { "epoch": 1.3731419784725782, "grad_norm": 0.972557823531287, "learning_rate": 4.726961492920057e-06, "loss": 0.2897, "step": 2679 }, { "epoch": 1.3736545361353152, "grad_norm": 0.9714922639130691, "learning_rate": 4.719909034176724e-06, "loss": 0.289, "step": 2680 }, { "epoch": 1.3741670937980524, "grad_norm": 0.9874153925216427, "learning_rate": 4.712860214909466e-06, "loss": 0.2976, "step": 2681 }, { "epoch": 1.3746796514607893, "grad_norm": 0.8652188800608398, "learning_rate": 4.7058150399769245e-06, "loss": 0.2492, "step": 2682 }, { "epoch": 1.3751922091235265, "grad_norm": 0.900873844791219, "learning_rate": 4.698773514235212e-06, "loss": 0.3006, "step": 2683 }, { "epoch": 1.3757047667862634, "grad_norm": 0.9366926727490122, "learning_rate": 4.691735642537934e-06, "loss": 0.2614, "step": 2684 }, { "epoch": 1.3762173244490006, "grad_norm": 0.8558879189120606, "learning_rate": 4.6847014297361735e-06, "loss": 0.2569, "step": 2685 }, { "epoch": 1.3767298821117375, "grad_norm": 1.0522068774811186, "learning_rate": 4.677670880678493e-06, "loss": 0.3179, "step": 2686 }, { "epoch": 1.3772424397744747, "grad_norm": 0.8891734665314979, "learning_rate": 4.6706440002109275e-06, "loss": 0.272, "step": 2687 }, { "epoch": 1.3777549974372116, "grad_norm": 0.9923390295866922, "learning_rate": 4.6636207931769884e-06, "loss": 0.2648, "step": 2688 }, { "epoch": 1.3782675550999488, "grad_norm": 0.9145640773037181, "learning_rate": 4.65660126441765e-06, "loss": 0.2718, "step": 2689 }, { "epoch": 1.3787801127626858, "grad_norm": 0.9693998598200018, "learning_rate": 4.649585418771348e-06, "loss": 0.302, "step": 2690 }, { "epoch": 1.379292670425423, "grad_norm": 0.9108695065870641, "learning_rate": 4.642573261073996e-06, "loss": 0.2848, "step": 2691 }, { "epoch": 1.3798052280881599, "grad_norm": 0.8327334428801858, "learning_rate": 4.635564796158946e-06, "loss": 0.234, "step": 2692 }, { "epoch": 1.380317785750897, "grad_norm": 0.9397112316311186, "learning_rate": 4.628560028857008e-06, "loss": 0.2615, "step": 2693 }, { "epoch": 1.380830343413634, "grad_norm": 0.917748641440137, "learning_rate": 4.621558963996458e-06, "loss": 0.2649, "step": 2694 }, { "epoch": 1.3813429010763711, "grad_norm": 0.8940800654106756, "learning_rate": 4.6145616064030095e-06, "loss": 0.2444, "step": 2695 }, { "epoch": 1.381855458739108, "grad_norm": 0.920270999619889, "learning_rate": 4.60756796089981e-06, "loss": 0.2701, "step": 2696 }, { "epoch": 1.3823680164018453, "grad_norm": 0.9361166551199841, "learning_rate": 4.60057803230747e-06, "loss": 0.2564, "step": 2697 }, { "epoch": 1.3828805740645822, "grad_norm": 0.9803254009504079, "learning_rate": 4.593591825444028e-06, "loss": 0.2618, "step": 2698 }, { "epoch": 1.3833931317273194, "grad_norm": 0.9778655207034463, "learning_rate": 4.586609345124944e-06, "loss": 0.2916, "step": 2699 }, { "epoch": 1.3839056893900563, "grad_norm": 0.9416198906144132, "learning_rate": 4.579630596163134e-06, "loss": 0.2801, "step": 2700 }, { "epoch": 1.3844182470527935, "grad_norm": 0.871386799999236, "learning_rate": 4.572655583368927e-06, "loss": 0.2373, "step": 2701 }, { "epoch": 1.3849308047155304, "grad_norm": 0.9012840021162581, "learning_rate": 4.565684311550077e-06, "loss": 0.2949, "step": 2702 }, { "epoch": 1.3854433623782676, "grad_norm": 1.007271460544208, "learning_rate": 4.558716785511764e-06, "loss": 0.3348, "step": 2703 }, { "epoch": 1.3859559200410045, "grad_norm": 0.847348074906095, "learning_rate": 4.551753010056584e-06, "loss": 0.2442, "step": 2704 }, { "epoch": 1.3864684777037417, "grad_norm": 0.9724543223308466, "learning_rate": 4.544792989984544e-06, "loss": 0.2618, "step": 2705 }, { "epoch": 1.3869810353664787, "grad_norm": 0.9578879834240923, "learning_rate": 4.537836730093077e-06, "loss": 0.2746, "step": 2706 }, { "epoch": 1.3874935930292158, "grad_norm": 0.8812987395697128, "learning_rate": 4.5308842351770026e-06, "loss": 0.253, "step": 2707 }, { "epoch": 1.3880061506919528, "grad_norm": 0.970471956003926, "learning_rate": 4.523935510028556e-06, "loss": 0.2916, "step": 2708 }, { "epoch": 1.38851870835469, "grad_norm": 0.982261019140247, "learning_rate": 4.516990559437388e-06, "loss": 0.2938, "step": 2709 }, { "epoch": 1.3890312660174269, "grad_norm": 0.9471604134272096, "learning_rate": 4.510049388190518e-06, "loss": 0.2855, "step": 2710 }, { "epoch": 1.389543823680164, "grad_norm": 0.943662509921527, "learning_rate": 4.503112001072381e-06, "loss": 0.2995, "step": 2711 }, { "epoch": 1.3900563813429012, "grad_norm": 1.0037453621425056, "learning_rate": 4.4961784028648025e-06, "loss": 0.2957, "step": 2712 }, { "epoch": 1.3905689390056382, "grad_norm": 0.9456086986655368, "learning_rate": 4.489248598346994e-06, "loss": 0.2982, "step": 2713 }, { "epoch": 1.391081496668375, "grad_norm": 0.9098717559017965, "learning_rate": 4.482322592295541e-06, "loss": 0.2571, "step": 2714 }, { "epoch": 1.3915940543311123, "grad_norm": 0.9490806419518295, "learning_rate": 4.47540038948443e-06, "loss": 0.2954, "step": 2715 }, { "epoch": 1.3921066119938494, "grad_norm": 1.013178106050524, "learning_rate": 4.468481994685018e-06, "loss": 0.3055, "step": 2716 }, { "epoch": 1.3926191696565864, "grad_norm": 0.8547768858255539, "learning_rate": 4.4615674126660226e-06, "loss": 0.2371, "step": 2717 }, { "epoch": 1.3931317273193233, "grad_norm": 0.959348545013339, "learning_rate": 4.454656648193559e-06, "loss": 0.2653, "step": 2718 }, { "epoch": 1.3936442849820605, "grad_norm": 0.9393633433523125, "learning_rate": 4.447749706031096e-06, "loss": 0.2923, "step": 2719 }, { "epoch": 1.3941568426447977, "grad_norm": 0.8667078871783541, "learning_rate": 4.440846590939469e-06, "loss": 0.2534, "step": 2720 }, { "epoch": 1.3946694003075346, "grad_norm": 0.9497545128837918, "learning_rate": 4.4339473076768765e-06, "loss": 0.299, "step": 2721 }, { "epoch": 1.3951819579702716, "grad_norm": 0.8725159753835214, "learning_rate": 4.427051860998877e-06, "loss": 0.2538, "step": 2722 }, { "epoch": 1.3956945156330087, "grad_norm": 0.9281579495188815, "learning_rate": 4.4201602556583775e-06, "loss": 0.2768, "step": 2723 }, { "epoch": 1.396207073295746, "grad_norm": 0.9301803680341099, "learning_rate": 4.4132724964056576e-06, "loss": 0.285, "step": 2724 }, { "epoch": 1.3967196309584828, "grad_norm": 0.9179222745899833, "learning_rate": 4.406388587988318e-06, "loss": 0.2559, "step": 2725 }, { "epoch": 1.3972321886212198, "grad_norm": 1.001130116235552, "learning_rate": 4.399508535151321e-06, "loss": 0.2876, "step": 2726 }, { "epoch": 1.397744746283957, "grad_norm": 0.9454659341235523, "learning_rate": 4.392632342636978e-06, "loss": 0.2613, "step": 2727 }, { "epoch": 1.3982573039466941, "grad_norm": 1.0078648145278968, "learning_rate": 4.385760015184919e-06, "loss": 0.3175, "step": 2728 }, { "epoch": 1.398769861609431, "grad_norm": 0.9143977498522445, "learning_rate": 4.378891557532123e-06, "loss": 0.262, "step": 2729 }, { "epoch": 1.399282419272168, "grad_norm": 0.9551612052484688, "learning_rate": 4.372026974412907e-06, "loss": 0.2511, "step": 2730 }, { "epoch": 1.3997949769349052, "grad_norm": 0.9050540294512316, "learning_rate": 4.3651662705589104e-06, "loss": 0.2712, "step": 2731 }, { "epoch": 1.4003075345976423, "grad_norm": 0.9394595145369848, "learning_rate": 4.358309450699087e-06, "loss": 0.2487, "step": 2732 }, { "epoch": 1.4008200922603793, "grad_norm": 1.107520707661685, "learning_rate": 4.3514565195597356e-06, "loss": 0.3169, "step": 2733 }, { "epoch": 1.4013326499231162, "grad_norm": 0.8703411984269354, "learning_rate": 4.344607481864466e-06, "loss": 0.2664, "step": 2734 }, { "epoch": 1.4018452075858534, "grad_norm": 0.9804900651108778, "learning_rate": 4.337762342334189e-06, "loss": 0.2607, "step": 2735 }, { "epoch": 1.4023577652485906, "grad_norm": 0.897206439269132, "learning_rate": 4.330921105687155e-06, "loss": 0.2668, "step": 2736 }, { "epoch": 1.4028703229113275, "grad_norm": 1.0126221944504665, "learning_rate": 4.324083776638905e-06, "loss": 0.2942, "step": 2737 }, { "epoch": 1.4033828805740645, "grad_norm": 0.9750299587464445, "learning_rate": 4.317250359902295e-06, "loss": 0.2902, "step": 2738 }, { "epoch": 1.4038954382368016, "grad_norm": 0.9176473134857934, "learning_rate": 4.31042086018748e-06, "loss": 0.2614, "step": 2739 }, { "epoch": 1.4044079958995388, "grad_norm": 0.8920927350042086, "learning_rate": 4.303595282201918e-06, "loss": 0.2685, "step": 2740 }, { "epoch": 1.4049205535622757, "grad_norm": 0.9225496241897282, "learning_rate": 4.296773630650358e-06, "loss": 0.2893, "step": 2741 }, { "epoch": 1.4054331112250127, "grad_norm": 0.8866808284438866, "learning_rate": 4.2899559102348585e-06, "loss": 0.2601, "step": 2742 }, { "epoch": 1.4059456688877499, "grad_norm": 0.9834888962197602, "learning_rate": 4.283142125654748e-06, "loss": 0.2635, "step": 2743 }, { "epoch": 1.406458226550487, "grad_norm": 0.9414916524585557, "learning_rate": 4.27633228160665e-06, "loss": 0.2636, "step": 2744 }, { "epoch": 1.406970784213224, "grad_norm": 0.9596112595663084, "learning_rate": 4.2695263827844835e-06, "loss": 0.2564, "step": 2745 }, { "epoch": 1.407483341875961, "grad_norm": 0.9171923121364974, "learning_rate": 4.262724433879427e-06, "loss": 0.2563, "step": 2746 }, { "epoch": 1.407995899538698, "grad_norm": 0.8681043774079612, "learning_rate": 4.255926439579948e-06, "loss": 0.2623, "step": 2747 }, { "epoch": 1.4085084572014352, "grad_norm": 1.031513259973893, "learning_rate": 4.249132404571794e-06, "loss": 0.3149, "step": 2748 }, { "epoch": 1.4090210148641722, "grad_norm": 0.909506530999037, "learning_rate": 4.242342333537978e-06, "loss": 0.2786, "step": 2749 }, { "epoch": 1.4095335725269091, "grad_norm": 0.9731982939975946, "learning_rate": 4.235556231158765e-06, "loss": 0.2705, "step": 2750 }, { "epoch": 1.4100461301896463, "grad_norm": 0.8818368027289715, "learning_rate": 4.228774102111714e-06, "loss": 0.2615, "step": 2751 }, { "epoch": 1.4105586878523835, "grad_norm": 0.9172885536493149, "learning_rate": 4.221995951071623e-06, "loss": 0.2667, "step": 2752 }, { "epoch": 1.4110712455151204, "grad_norm": 0.869664437368146, "learning_rate": 4.215221782710559e-06, "loss": 0.2747, "step": 2753 }, { "epoch": 1.4115838031778576, "grad_norm": 0.9418171634188247, "learning_rate": 4.208451601697836e-06, "loss": 0.2741, "step": 2754 }, { "epoch": 1.4120963608405945, "grad_norm": 0.9718634223287486, "learning_rate": 4.201685412700026e-06, "loss": 0.2785, "step": 2755 }, { "epoch": 1.4126089185033317, "grad_norm": 0.9079715073903435, "learning_rate": 4.194923220380947e-06, "loss": 0.2469, "step": 2756 }, { "epoch": 1.4131214761660686, "grad_norm": 0.8574940591638303, "learning_rate": 4.1881650294016606e-06, "loss": 0.256, "step": 2757 }, { "epoch": 1.4136340338288058, "grad_norm": 0.8230256052818363, "learning_rate": 4.181410844420473e-06, "loss": 0.2433, "step": 2758 }, { "epoch": 1.4141465914915428, "grad_norm": 0.950905013127273, "learning_rate": 4.1746606700929245e-06, "loss": 0.2676, "step": 2759 }, { "epoch": 1.41465914915428, "grad_norm": 0.9023698698177759, "learning_rate": 4.167914511071805e-06, "loss": 0.2794, "step": 2760 }, { "epoch": 1.4151717068170169, "grad_norm": 0.9813615900223553, "learning_rate": 4.161172372007115e-06, "loss": 0.291, "step": 2761 }, { "epoch": 1.415684264479754, "grad_norm": 0.9454384110803475, "learning_rate": 4.154434257546095e-06, "loss": 0.2987, "step": 2762 }, { "epoch": 1.416196822142491, "grad_norm": 0.9173451265110238, "learning_rate": 4.147700172333222e-06, "loss": 0.2574, "step": 2763 }, { "epoch": 1.4167093798052282, "grad_norm": 0.9082124323468608, "learning_rate": 4.140970121010177e-06, "loss": 0.281, "step": 2764 }, { "epoch": 1.417221937467965, "grad_norm": 0.9456662802970452, "learning_rate": 4.134244108215867e-06, "loss": 0.2601, "step": 2765 }, { "epoch": 1.4177344951307023, "grad_norm": 0.8866862936114692, "learning_rate": 4.127522138586424e-06, "loss": 0.2397, "step": 2766 }, { "epoch": 1.4182470527934392, "grad_norm": 1.0404311388484513, "learning_rate": 4.120804216755183e-06, "loss": 0.257, "step": 2767 }, { "epoch": 1.4187596104561764, "grad_norm": 1.0804609023404994, "learning_rate": 4.1140903473526935e-06, "loss": 0.3052, "step": 2768 }, { "epoch": 1.4192721681189133, "grad_norm": 0.9149039881771329, "learning_rate": 4.107380535006709e-06, "loss": 0.2565, "step": 2769 }, { "epoch": 1.4197847257816505, "grad_norm": 0.9049839737781582, "learning_rate": 4.10067478434219e-06, "loss": 0.2835, "step": 2770 }, { "epoch": 1.4202972834443874, "grad_norm": 0.9192138744128829, "learning_rate": 4.093973099981292e-06, "loss": 0.293, "step": 2771 }, { "epoch": 1.4208098411071246, "grad_norm": 0.9005443089794514, "learning_rate": 4.087275486543374e-06, "loss": 0.2764, "step": 2772 }, { "epoch": 1.4213223987698616, "grad_norm": 1.0323033384265536, "learning_rate": 4.080581948644985e-06, "loss": 0.2984, "step": 2773 }, { "epoch": 1.4218349564325987, "grad_norm": 0.9680974041165413, "learning_rate": 4.073892490899865e-06, "loss": 0.2907, "step": 2774 }, { "epoch": 1.4223475140953357, "grad_norm": 0.9846378904052738, "learning_rate": 4.067207117918944e-06, "loss": 0.3008, "step": 2775 }, { "epoch": 1.4228600717580728, "grad_norm": 0.8897945960671019, "learning_rate": 4.0605258343103315e-06, "loss": 0.2568, "step": 2776 }, { "epoch": 1.4233726294208098, "grad_norm": 0.8447160326133206, "learning_rate": 4.053848644679322e-06, "loss": 0.2463, "step": 2777 }, { "epoch": 1.423885187083547, "grad_norm": 0.893757486603962, "learning_rate": 4.047175553628397e-06, "loss": 0.2668, "step": 2778 }, { "epoch": 1.424397744746284, "grad_norm": 1.0076811160054848, "learning_rate": 4.04050656575719e-06, "loss": 0.2696, "step": 2779 }, { "epoch": 1.424910302409021, "grad_norm": 0.9204914117645239, "learning_rate": 4.033841685662529e-06, "loss": 0.2653, "step": 2780 }, { "epoch": 1.425422860071758, "grad_norm": 0.9132916120478372, "learning_rate": 4.0271809179384035e-06, "loss": 0.2689, "step": 2781 }, { "epoch": 1.4259354177344952, "grad_norm": 0.9386279364804802, "learning_rate": 4.020524267175954e-06, "loss": 0.2659, "step": 2782 }, { "epoch": 1.4264479753972321, "grad_norm": 0.8718058966217771, "learning_rate": 4.013871737963506e-06, "loss": 0.241, "step": 2783 }, { "epoch": 1.4269605330599693, "grad_norm": 0.9702308192187556, "learning_rate": 4.007223334886531e-06, "loss": 0.2871, "step": 2784 }, { "epoch": 1.4274730907227062, "grad_norm": 1.0219793661006358, "learning_rate": 4.00057906252766e-06, "loss": 0.2867, "step": 2785 }, { "epoch": 1.4279856483854434, "grad_norm": 0.9255488621022538, "learning_rate": 3.993938925466674e-06, "loss": 0.2825, "step": 2786 }, { "epoch": 1.4284982060481803, "grad_norm": 0.901383749276628, "learning_rate": 3.987302928280507e-06, "loss": 0.2527, "step": 2787 }, { "epoch": 1.4290107637109175, "grad_norm": 0.9686548514076639, "learning_rate": 3.980671075543238e-06, "loss": 0.2718, "step": 2788 }, { "epoch": 1.4295233213736545, "grad_norm": 0.9953903431031023, "learning_rate": 3.974043371826087e-06, "loss": 0.2722, "step": 2789 }, { "epoch": 1.4300358790363916, "grad_norm": 0.9725516891398517, "learning_rate": 3.96741982169742e-06, "loss": 0.2684, "step": 2790 }, { "epoch": 1.4305484366991288, "grad_norm": 0.9729264059822056, "learning_rate": 3.960800429722734e-06, "loss": 0.2818, "step": 2791 }, { "epoch": 1.4310609943618657, "grad_norm": 0.9248251757445058, "learning_rate": 3.954185200464663e-06, "loss": 0.274, "step": 2792 }, { "epoch": 1.4315735520246027, "grad_norm": 1.0543529494402177, "learning_rate": 3.9475741384829715e-06, "loss": 0.3067, "step": 2793 }, { "epoch": 1.4320861096873398, "grad_norm": 0.9319950946101182, "learning_rate": 3.9409672483345465e-06, "loss": 0.2817, "step": 2794 }, { "epoch": 1.432598667350077, "grad_norm": 0.9678862760651867, "learning_rate": 3.93436453457341e-06, "loss": 0.2888, "step": 2795 }, { "epoch": 1.433111225012814, "grad_norm": 0.9029540660657129, "learning_rate": 3.927766001750702e-06, "loss": 0.2404, "step": 2796 }, { "epoch": 1.433623782675551, "grad_norm": 0.9181017306478458, "learning_rate": 3.921171654414667e-06, "loss": 0.2755, "step": 2797 }, { "epoch": 1.434136340338288, "grad_norm": 1.0101720435852093, "learning_rate": 3.914581497110684e-06, "loss": 0.2956, "step": 2798 }, { "epoch": 1.4346488980010252, "grad_norm": 0.9228355892797405, "learning_rate": 3.907995534381237e-06, "loss": 0.2609, "step": 2799 }, { "epoch": 1.4351614556637622, "grad_norm": 0.9543046106889791, "learning_rate": 3.901413770765905e-06, "loss": 0.2912, "step": 2800 }, { "epoch": 1.4356740133264991, "grad_norm": 0.8868924212777257, "learning_rate": 3.894836210801397e-06, "loss": 0.2659, "step": 2801 }, { "epoch": 1.4361865709892363, "grad_norm": 0.8983228456838337, "learning_rate": 3.888262859021508e-06, "loss": 0.2514, "step": 2802 }, { "epoch": 1.4366991286519735, "grad_norm": 0.9347319237646986, "learning_rate": 3.881693719957136e-06, "loss": 0.2661, "step": 2803 }, { "epoch": 1.4372116863147104, "grad_norm": 0.9001664193657124, "learning_rate": 3.875128798136276e-06, "loss": 0.2847, "step": 2804 }, { "epoch": 1.4377242439774474, "grad_norm": 0.9016872537478151, "learning_rate": 3.868568098084017e-06, "loss": 0.2758, "step": 2805 }, { "epoch": 1.4382368016401845, "grad_norm": 0.9165512310525666, "learning_rate": 3.862011624322534e-06, "loss": 0.2731, "step": 2806 }, { "epoch": 1.4387493593029217, "grad_norm": 1.018284013294559, "learning_rate": 3.8554593813710975e-06, "loss": 0.2808, "step": 2807 }, { "epoch": 1.4392619169656586, "grad_norm": 0.8760324703790198, "learning_rate": 3.848911373746051e-06, "loss": 0.2486, "step": 2808 }, { "epoch": 1.4397744746283956, "grad_norm": 0.9708888104269001, "learning_rate": 3.842367605960821e-06, "loss": 0.2914, "step": 2809 }, { "epoch": 1.4402870322911328, "grad_norm": 0.9697239075218904, "learning_rate": 3.835828082525925e-06, "loss": 0.3019, "step": 2810 }, { "epoch": 1.44079958995387, "grad_norm": 0.9229849537858925, "learning_rate": 3.829292807948937e-06, "loss": 0.2677, "step": 2811 }, { "epoch": 1.4413121476166069, "grad_norm": 0.8968643549501264, "learning_rate": 3.822761786734506e-06, "loss": 0.2529, "step": 2812 }, { "epoch": 1.4418247052793438, "grad_norm": 0.9185950009897413, "learning_rate": 3.81623502338436e-06, "loss": 0.277, "step": 2813 }, { "epoch": 1.442337262942081, "grad_norm": 0.9874552253850014, "learning_rate": 3.8097125223972864e-06, "loss": 0.2775, "step": 2814 }, { "epoch": 1.4428498206048181, "grad_norm": 0.9942639463307671, "learning_rate": 3.803194288269122e-06, "loss": 0.2651, "step": 2815 }, { "epoch": 1.443362378267555, "grad_norm": 1.0188014416594826, "learning_rate": 3.7966803254927852e-06, "loss": 0.2832, "step": 2816 }, { "epoch": 1.443874935930292, "grad_norm": 0.9902903716653267, "learning_rate": 3.7901706385582372e-06, "loss": 0.2586, "step": 2817 }, { "epoch": 1.4443874935930292, "grad_norm": 0.9559421720782999, "learning_rate": 3.7836652319524835e-06, "loss": 0.2719, "step": 2818 }, { "epoch": 1.4449000512557664, "grad_norm": 0.909001441976769, "learning_rate": 3.7771641101596e-06, "loss": 0.2772, "step": 2819 }, { "epoch": 1.4454126089185033, "grad_norm": 0.9540445756540809, "learning_rate": 3.770667277660696e-06, "loss": 0.27, "step": 2820 }, { "epoch": 1.4459251665812403, "grad_norm": 1.0146666590341253, "learning_rate": 3.7641747389339232e-06, "loss": 0.3165, "step": 2821 }, { "epoch": 1.4464377242439774, "grad_norm": 0.9688199839115574, "learning_rate": 3.7576864984544814e-06, "loss": 0.2714, "step": 2822 }, { "epoch": 1.4469502819067146, "grad_norm": 0.9205078258689114, "learning_rate": 3.7512025606946e-06, "loss": 0.2897, "step": 2823 }, { "epoch": 1.4474628395694515, "grad_norm": 0.9915546409836087, "learning_rate": 3.7447229301235443e-06, "loss": 0.2829, "step": 2824 }, { "epoch": 1.4479753972321885, "grad_norm": 0.9387207282495666, "learning_rate": 3.7382476112076227e-06, "loss": 0.3122, "step": 2825 }, { "epoch": 1.4484879548949257, "grad_norm": 0.9644167763450204, "learning_rate": 3.73177660841015e-06, "loss": 0.3009, "step": 2826 }, { "epoch": 1.4490005125576628, "grad_norm": 0.8696932674924986, "learning_rate": 3.7253099261914794e-06, "loss": 0.2548, "step": 2827 }, { "epoch": 1.4495130702203998, "grad_norm": 0.8802388240308584, "learning_rate": 3.718847569008992e-06, "loss": 0.2692, "step": 2828 }, { "epoch": 1.450025627883137, "grad_norm": 0.9562519782397637, "learning_rate": 3.7123895413170706e-06, "loss": 0.2959, "step": 2829 }, { "epoch": 1.4505381855458739, "grad_norm": 0.9091325878249434, "learning_rate": 3.7059358475671225e-06, "loss": 0.288, "step": 2830 }, { "epoch": 1.451050743208611, "grad_norm": 0.9089288252243873, "learning_rate": 3.6994864922075756e-06, "loss": 0.2656, "step": 2831 }, { "epoch": 1.451563300871348, "grad_norm": 0.9178509653492251, "learning_rate": 3.6930414796838587e-06, "loss": 0.2745, "step": 2832 }, { "epoch": 1.4520758585340852, "grad_norm": 0.9051347093978458, "learning_rate": 3.686600814438399e-06, "loss": 0.2399, "step": 2833 }, { "epoch": 1.452588416196822, "grad_norm": 0.8819617636083374, "learning_rate": 3.680164500910646e-06, "loss": 0.2586, "step": 2834 }, { "epoch": 1.4531009738595593, "grad_norm": 0.9395696154493071, "learning_rate": 3.6737325435370376e-06, "loss": 0.2663, "step": 2835 }, { "epoch": 1.4536135315222962, "grad_norm": 0.8826903925977674, "learning_rate": 3.667304946751011e-06, "loss": 0.2504, "step": 2836 }, { "epoch": 1.4541260891850334, "grad_norm": 0.9068477209736094, "learning_rate": 3.660881714982999e-06, "loss": 0.2325, "step": 2837 }, { "epoch": 1.4546386468477703, "grad_norm": 0.9608612608998842, "learning_rate": 3.654462852660423e-06, "loss": 0.2968, "step": 2838 }, { "epoch": 1.4551512045105075, "grad_norm": 0.8395421525199853, "learning_rate": 3.648048364207698e-06, "loss": 0.238, "step": 2839 }, { "epoch": 1.4556637621732444, "grad_norm": 0.8331831219004401, "learning_rate": 3.641638254046218e-06, "loss": 0.2621, "step": 2840 }, { "epoch": 1.4561763198359816, "grad_norm": 0.9745392297525114, "learning_rate": 3.6352325265943634e-06, "loss": 0.2721, "step": 2841 }, { "epoch": 1.4566888774987186, "grad_norm": 0.8595437081753077, "learning_rate": 3.6288311862674885e-06, "loss": 0.2336, "step": 2842 }, { "epoch": 1.4572014351614557, "grad_norm": 1.012659489990274, "learning_rate": 3.6224342374779365e-06, "loss": 0.2976, "step": 2843 }, { "epoch": 1.4577139928241927, "grad_norm": 0.9104168910845732, "learning_rate": 3.6160416846350056e-06, "loss": 0.2498, "step": 2844 }, { "epoch": 1.4582265504869298, "grad_norm": 0.9352737427257128, "learning_rate": 3.609653532144972e-06, "loss": 0.2725, "step": 2845 }, { "epoch": 1.4587391081496668, "grad_norm": 0.8607766908283789, "learning_rate": 3.6032697844110896e-06, "loss": 0.2565, "step": 2846 }, { "epoch": 1.459251665812404, "grad_norm": 0.8899219097047468, "learning_rate": 3.5968904458335573e-06, "loss": 0.2635, "step": 2847 }, { "epoch": 1.459764223475141, "grad_norm": 0.9049474854151809, "learning_rate": 3.590515520809542e-06, "loss": 0.2685, "step": 2848 }, { "epoch": 1.460276781137878, "grad_norm": 0.8714010471141992, "learning_rate": 3.584145013733178e-06, "loss": 0.2636, "step": 2849 }, { "epoch": 1.460789338800615, "grad_norm": 0.9014370396650262, "learning_rate": 3.5777789289955454e-06, "loss": 0.2842, "step": 2850 }, { "epoch": 1.4613018964633522, "grad_norm": 0.9354405984253228, "learning_rate": 3.5714172709846686e-06, "loss": 0.2548, "step": 2851 }, { "epoch": 1.4618144541260891, "grad_norm": 0.899687386231357, "learning_rate": 3.5650600440855375e-06, "loss": 0.2416, "step": 2852 }, { "epoch": 1.4623270117888263, "grad_norm": 1.0518192161004938, "learning_rate": 3.558707252680078e-06, "loss": 0.3165, "step": 2853 }, { "epoch": 1.4628395694515632, "grad_norm": 0.9696176589008871, "learning_rate": 3.5523589011471592e-06, "loss": 0.3058, "step": 2854 }, { "epoch": 1.4633521271143004, "grad_norm": 1.0108027599797875, "learning_rate": 3.546014993862591e-06, "loss": 0.2936, "step": 2855 }, { "epoch": 1.4638646847770374, "grad_norm": 0.8878339700607274, "learning_rate": 3.5396755351991197e-06, "loss": 0.2732, "step": 2856 }, { "epoch": 1.4643772424397745, "grad_norm": 0.9692902278611001, "learning_rate": 3.5333405295264255e-06, "loss": 0.2793, "step": 2857 }, { "epoch": 1.4648898001025115, "grad_norm": 0.9435142945995173, "learning_rate": 3.527009981211119e-06, "loss": 0.2929, "step": 2858 }, { "epoch": 1.4654023577652486, "grad_norm": 0.9401762021125248, "learning_rate": 3.520683894616739e-06, "loss": 0.2755, "step": 2859 }, { "epoch": 1.4659149154279856, "grad_norm": 0.899926184953504, "learning_rate": 3.5143622741037432e-06, "loss": 0.2783, "step": 2860 }, { "epoch": 1.4664274730907227, "grad_norm": 0.9154782293816467, "learning_rate": 3.5080451240295288e-06, "loss": 0.2568, "step": 2861 }, { "epoch": 1.4669400307534597, "grad_norm": 0.9191627394769522, "learning_rate": 3.5017324487483873e-06, "loss": 0.255, "step": 2862 }, { "epoch": 1.4674525884161969, "grad_norm": 0.8640960697354293, "learning_rate": 3.495424252611538e-06, "loss": 0.2532, "step": 2863 }, { "epoch": 1.4679651460789338, "grad_norm": 0.8642312285232415, "learning_rate": 3.4891205399671226e-06, "loss": 0.2547, "step": 2864 }, { "epoch": 1.468477703741671, "grad_norm": 0.9855269053220901, "learning_rate": 3.4828213151601695e-06, "loss": 0.2789, "step": 2865 }, { "epoch": 1.468990261404408, "grad_norm": 0.9559995478517753, "learning_rate": 3.47652658253263e-06, "loss": 0.2636, "step": 2866 }, { "epoch": 1.469502819067145, "grad_norm": 1.015199369991651, "learning_rate": 3.4702363464233604e-06, "loss": 0.2755, "step": 2867 }, { "epoch": 1.470015376729882, "grad_norm": 0.9507617033081368, "learning_rate": 3.463950611168111e-06, "loss": 0.2696, "step": 2868 }, { "epoch": 1.4705279343926192, "grad_norm": 0.955436101853441, "learning_rate": 3.457669381099523e-06, "loss": 0.2578, "step": 2869 }, { "epoch": 1.4710404920553564, "grad_norm": 0.8634765973549983, "learning_rate": 3.4513926605471504e-06, "loss": 0.2606, "step": 2870 }, { "epoch": 1.4715530497180933, "grad_norm": 0.9607192821491113, "learning_rate": 3.445120453837424e-06, "loss": 0.2775, "step": 2871 }, { "epoch": 1.4720656073808303, "grad_norm": 0.9668960785390576, "learning_rate": 3.438852765293671e-06, "loss": 0.2938, "step": 2872 }, { "epoch": 1.4725781650435674, "grad_norm": 0.930415446335996, "learning_rate": 3.432589599236099e-06, "loss": 0.2664, "step": 2873 }, { "epoch": 1.4730907227063046, "grad_norm": 1.0156246744318056, "learning_rate": 3.4263309599818017e-06, "loss": 0.2848, "step": 2874 }, { "epoch": 1.4736032803690415, "grad_norm": 0.9020062729831978, "learning_rate": 3.4200768518447524e-06, "loss": 0.2617, "step": 2875 }, { "epoch": 1.4741158380317785, "grad_norm": 1.0243794422415022, "learning_rate": 3.4138272791357995e-06, "loss": 0.2665, "step": 2876 }, { "epoch": 1.4746283956945156, "grad_norm": 0.9026537144863527, "learning_rate": 3.4075822461626663e-06, "loss": 0.2351, "step": 2877 }, { "epoch": 1.4751409533572528, "grad_norm": 0.8800888119696689, "learning_rate": 3.4013417572299446e-06, "loss": 0.2443, "step": 2878 }, { "epoch": 1.4756535110199898, "grad_norm": 0.8210924077883569, "learning_rate": 3.395105816639106e-06, "loss": 0.2218, "step": 2879 }, { "epoch": 1.4761660686827267, "grad_norm": 0.9757033032883972, "learning_rate": 3.3888744286884657e-06, "loss": 0.3016, "step": 2880 }, { "epoch": 1.4766786263454639, "grad_norm": 0.9879207425836162, "learning_rate": 3.3826475976732144e-06, "loss": 0.2604, "step": 2881 }, { "epoch": 1.477191184008201, "grad_norm": 0.9758543660114087, "learning_rate": 3.37642532788541e-06, "loss": 0.2814, "step": 2882 }, { "epoch": 1.477703741670938, "grad_norm": 0.9300067881657181, "learning_rate": 3.3702076236139402e-06, "loss": 0.2475, "step": 2883 }, { "epoch": 1.478216299333675, "grad_norm": 0.9255764420933944, "learning_rate": 3.363994489144575e-06, "loss": 0.2592, "step": 2884 }, { "epoch": 1.478728856996412, "grad_norm": 0.9140993017029834, "learning_rate": 3.357785928759917e-06, "loss": 0.2527, "step": 2885 }, { "epoch": 1.4792414146591493, "grad_norm": 0.9970941612927864, "learning_rate": 3.3515819467394184e-06, "loss": 0.2764, "step": 2886 }, { "epoch": 1.4797539723218862, "grad_norm": 0.9994220019423996, "learning_rate": 3.3453825473593805e-06, "loss": 0.2769, "step": 2887 }, { "epoch": 1.4802665299846232, "grad_norm": 0.9357982698030626, "learning_rate": 3.339187734892941e-06, "loss": 0.2797, "step": 2888 }, { "epoch": 1.4807790876473603, "grad_norm": 0.8840200154138708, "learning_rate": 3.3329975136100787e-06, "loss": 0.2323, "step": 2889 }, { "epoch": 1.4812916453100975, "grad_norm": 0.9282165850628465, "learning_rate": 3.326811887777607e-06, "loss": 0.2715, "step": 2890 }, { "epoch": 1.4818042029728344, "grad_norm": 0.9162239612194758, "learning_rate": 3.320630861659171e-06, "loss": 0.2436, "step": 2891 }, { "epoch": 1.4823167606355714, "grad_norm": 0.893708074690087, "learning_rate": 3.3144544395152457e-06, "loss": 0.2967, "step": 2892 }, { "epoch": 1.4828293182983086, "grad_norm": 0.9062668091235521, "learning_rate": 3.308282625603134e-06, "loss": 0.2828, "step": 2893 }, { "epoch": 1.4833418759610457, "grad_norm": 0.940990526082368, "learning_rate": 3.3021154241769606e-06, "loss": 0.2866, "step": 2894 }, { "epoch": 1.4838544336237827, "grad_norm": 0.9072132118026028, "learning_rate": 3.2959528394876703e-06, "loss": 0.2632, "step": 2895 }, { "epoch": 1.4843669912865196, "grad_norm": 0.8867677645281544, "learning_rate": 3.2897948757830256e-06, "loss": 0.2534, "step": 2896 }, { "epoch": 1.4848795489492568, "grad_norm": 0.8699592327971473, "learning_rate": 3.283641537307615e-06, "loss": 0.2482, "step": 2897 }, { "epoch": 1.485392106611994, "grad_norm": 0.7952595939451567, "learning_rate": 3.2774928283028153e-06, "loss": 0.2266, "step": 2898 }, { "epoch": 1.485904664274731, "grad_norm": 0.8799661852191036, "learning_rate": 3.2713487530068356e-06, "loss": 0.2387, "step": 2899 }, { "epoch": 1.4864172219374678, "grad_norm": 0.9830347381040068, "learning_rate": 3.2652093156546795e-06, "loss": 0.3002, "step": 2900 }, { "epoch": 1.486929779600205, "grad_norm": 0.949524743529141, "learning_rate": 3.2590745204781537e-06, "loss": 0.2739, "step": 2901 }, { "epoch": 1.4874423372629422, "grad_norm": 0.9972775612314055, "learning_rate": 3.2529443717058693e-06, "loss": 0.2845, "step": 2902 }, { "epoch": 1.4879548949256791, "grad_norm": 0.9371708541711541, "learning_rate": 3.246818873563232e-06, "loss": 0.2676, "step": 2903 }, { "epoch": 1.488467452588416, "grad_norm": 0.9589691098159994, "learning_rate": 3.2406980302724422e-06, "loss": 0.2792, "step": 2904 }, { "epoch": 1.4889800102511532, "grad_norm": 0.9036067442741085, "learning_rate": 3.2345818460524927e-06, "loss": 0.2517, "step": 2905 }, { "epoch": 1.4894925679138904, "grad_norm": 0.9266448319119157, "learning_rate": 3.228470325119164e-06, "loss": 0.2737, "step": 2906 }, { "epoch": 1.4900051255766273, "grad_norm": 0.9245763426161948, "learning_rate": 3.2223634716850227e-06, "loss": 0.2332, "step": 2907 }, { "epoch": 1.4905176832393645, "grad_norm": 0.9290119455841875, "learning_rate": 3.216261289959417e-06, "loss": 0.2386, "step": 2908 }, { "epoch": 1.4910302409021015, "grad_norm": 0.9536568233264159, "learning_rate": 3.210163784148478e-06, "loss": 0.291, "step": 2909 }, { "epoch": 1.4915427985648386, "grad_norm": 0.9573308308145381, "learning_rate": 3.20407095845511e-06, "loss": 0.2813, "step": 2910 }, { "epoch": 1.4920553562275756, "grad_norm": 0.9673666848878478, "learning_rate": 3.197982817078994e-06, "loss": 0.2755, "step": 2911 }, { "epoch": 1.4925679138903127, "grad_norm": 0.9763050265808352, "learning_rate": 3.191899364216581e-06, "loss": 0.3332, "step": 2912 }, { "epoch": 1.4930804715530497, "grad_norm": 0.9318035832522025, "learning_rate": 3.1858206040610883e-06, "loss": 0.2747, "step": 2913 }, { "epoch": 1.4935930292157868, "grad_norm": 0.9911227330373943, "learning_rate": 3.179746540802506e-06, "loss": 0.294, "step": 2914 }, { "epoch": 1.4941055868785238, "grad_norm": 0.9782382450831053, "learning_rate": 3.1736771786275834e-06, "loss": 0.2893, "step": 2915 }, { "epoch": 1.494618144541261, "grad_norm": 5.10310724223168, "learning_rate": 3.1676125217198175e-06, "loss": 0.3985, "step": 2916 }, { "epoch": 1.495130702203998, "grad_norm": 0.8826933668156117, "learning_rate": 3.16155257425948e-06, "loss": 0.2679, "step": 2917 }, { "epoch": 1.495643259866735, "grad_norm": 0.8539058570681772, "learning_rate": 3.155497340423588e-06, "loss": 0.2657, "step": 2918 }, { "epoch": 1.496155817529472, "grad_norm": 0.917587495079918, "learning_rate": 3.149446824385909e-06, "loss": 0.2771, "step": 2919 }, { "epoch": 1.4966683751922092, "grad_norm": 0.8250197052496947, "learning_rate": 3.1434010303169593e-06, "loss": 0.2435, "step": 2920 }, { "epoch": 1.4971809328549461, "grad_norm": 0.9210829123866997, "learning_rate": 3.1373599623840024e-06, "loss": 0.2538, "step": 2921 }, { "epoch": 1.4976934905176833, "grad_norm": 0.9585754605322543, "learning_rate": 3.1313236247510414e-06, "loss": 0.2803, "step": 2922 }, { "epoch": 1.4982060481804202, "grad_norm": 0.9276047863973957, "learning_rate": 3.125292021578822e-06, "loss": 0.2794, "step": 2923 }, { "epoch": 1.4987186058431574, "grad_norm": 0.882675647159133, "learning_rate": 3.1192651570248232e-06, "loss": 0.2592, "step": 2924 }, { "epoch": 1.4992311635058944, "grad_norm": 1.019867847324855, "learning_rate": 3.11324303524326e-06, "loss": 0.3023, "step": 2925 }, { "epoch": 1.4997437211686315, "grad_norm": 0.9557394996868389, "learning_rate": 3.107225660385077e-06, "loss": 0.2531, "step": 2926 }, { "epoch": 1.5002562788313685, "grad_norm": 0.913944765907408, "learning_rate": 3.1012130365979476e-06, "loss": 0.2565, "step": 2927 }, { "epoch": 1.5007688364941056, "grad_norm": 0.9801551665784533, "learning_rate": 3.0952051680262675e-06, "loss": 0.2938, "step": 2928 }, { "epoch": 1.5012813941568428, "grad_norm": 0.9269471344720053, "learning_rate": 3.089202058811166e-06, "loss": 0.2487, "step": 2929 }, { "epoch": 1.5017939518195798, "grad_norm": 0.8501345807912941, "learning_rate": 3.0832037130904748e-06, "loss": 0.2074, "step": 2930 }, { "epoch": 1.5023065094823167, "grad_norm": 0.9414892873344449, "learning_rate": 3.0772101349987506e-06, "loss": 0.2765, "step": 2931 }, { "epoch": 1.5028190671450539, "grad_norm": 0.9287178030850809, "learning_rate": 3.0712213286672686e-06, "loss": 0.2712, "step": 2932 }, { "epoch": 1.503331624807791, "grad_norm": 1.0231415375594635, "learning_rate": 3.065237298224013e-06, "loss": 0.2977, "step": 2933 }, { "epoch": 1.503844182470528, "grad_norm": 1.0000996046388937, "learning_rate": 3.0592580477936606e-06, "loss": 0.2748, "step": 2934 }, { "epoch": 1.504356740133265, "grad_norm": 0.9290128740482224, "learning_rate": 3.0532835814976193e-06, "loss": 0.2376, "step": 2935 }, { "epoch": 1.504869297796002, "grad_norm": 0.9333506782644444, "learning_rate": 3.047313903453979e-06, "loss": 0.2562, "step": 2936 }, { "epoch": 1.5053818554587393, "grad_norm": 0.9045204828524452, "learning_rate": 3.0413490177775397e-06, "loss": 0.2479, "step": 2937 }, { "epoch": 1.5058944131214762, "grad_norm": 0.9415156955363759, "learning_rate": 3.035388928579792e-06, "loss": 0.2528, "step": 2938 }, { "epoch": 1.5064069707842132, "grad_norm": 0.9204945075017148, "learning_rate": 3.029433639968925e-06, "loss": 0.2524, "step": 2939 }, { "epoch": 1.5069195284469503, "grad_norm": 0.9554516875935021, "learning_rate": 3.023483156049817e-06, "loss": 0.2684, "step": 2940 }, { "epoch": 1.5074320861096875, "grad_norm": 0.9822834080727725, "learning_rate": 3.017537480924032e-06, "loss": 0.2937, "step": 2941 }, { "epoch": 1.5079446437724244, "grad_norm": 1.0155645821723298, "learning_rate": 3.011596618689825e-06, "loss": 0.2897, "step": 2942 }, { "epoch": 1.5084572014351614, "grad_norm": 0.9318102145434137, "learning_rate": 3.0056605734421253e-06, "loss": 0.2764, "step": 2943 }, { "epoch": 1.5089697590978985, "grad_norm": 0.9309495097113157, "learning_rate": 2.9997293492725577e-06, "loss": 0.2756, "step": 2944 }, { "epoch": 1.5094823167606357, "grad_norm": 0.9227307479384412, "learning_rate": 2.993802950269402e-06, "loss": 0.2518, "step": 2945 }, { "epoch": 1.5099948744233727, "grad_norm": 0.9184605300362723, "learning_rate": 2.9878813805176252e-06, "loss": 0.2668, "step": 2946 }, { "epoch": 1.5105074320861096, "grad_norm": 0.9958515870153322, "learning_rate": 2.9819646440988713e-06, "loss": 0.2981, "step": 2947 }, { "epoch": 1.5110199897488468, "grad_norm": 0.8800824090081159, "learning_rate": 2.976052745091438e-06, "loss": 0.2686, "step": 2948 }, { "epoch": 1.511532547411584, "grad_norm": 0.9104728318676808, "learning_rate": 2.9701456875702937e-06, "loss": 0.2701, "step": 2949 }, { "epoch": 1.5120451050743209, "grad_norm": 0.933818564399082, "learning_rate": 2.9642434756070793e-06, "loss": 0.2625, "step": 2950 }, { "epoch": 1.5125576627370578, "grad_norm": 1.0973103159085176, "learning_rate": 2.9583461132700875e-06, "loss": 0.2988, "step": 2951 }, { "epoch": 1.513070220399795, "grad_norm": 0.8600416171964511, "learning_rate": 2.9524536046242603e-06, "loss": 0.2527, "step": 2952 }, { "epoch": 1.5135827780625322, "grad_norm": 0.884314269228955, "learning_rate": 2.946565953731211e-06, "loss": 0.2466, "step": 2953 }, { "epoch": 1.514095335725269, "grad_norm": 0.8896955358389309, "learning_rate": 2.940683164649194e-06, "loss": 0.2686, "step": 2954 }, { "epoch": 1.514607893388006, "grad_norm": 0.9957554409128909, "learning_rate": 2.934805241433115e-06, "loss": 0.2851, "step": 2955 }, { "epoch": 1.5151204510507432, "grad_norm": 0.8681724299023543, "learning_rate": 2.9289321881345257e-06, "loss": 0.236, "step": 2956 }, { "epoch": 1.5156330087134804, "grad_norm": 0.9687261641733917, "learning_rate": 2.9230640088016204e-06, "loss": 0.2942, "step": 2957 }, { "epoch": 1.5161455663762173, "grad_norm": 0.911059551653334, "learning_rate": 2.9172007074792342e-06, "loss": 0.2657, "step": 2958 }, { "epoch": 1.5166581240389543, "grad_norm": 0.7809715290697508, "learning_rate": 2.9113422882088403e-06, "loss": 0.2213, "step": 2959 }, { "epoch": 1.5171706817016914, "grad_norm": 1.0212543255603315, "learning_rate": 2.9054887550285483e-06, "loss": 0.2544, "step": 2960 }, { "epoch": 1.5176832393644286, "grad_norm": 0.943234476065869, "learning_rate": 2.8996401119730923e-06, "loss": 0.2846, "step": 2961 }, { "epoch": 1.5181957970271656, "grad_norm": 1.0190864063951752, "learning_rate": 2.8937963630738517e-06, "loss": 0.306, "step": 2962 }, { "epoch": 1.5187083546899025, "grad_norm": 0.9048316183249655, "learning_rate": 2.887957512358813e-06, "loss": 0.2701, "step": 2963 }, { "epoch": 1.5192209123526397, "grad_norm": 0.9655036333328602, "learning_rate": 2.882123563852598e-06, "loss": 0.2986, "step": 2964 }, { "epoch": 1.5197334700153768, "grad_norm": 0.8084801291079287, "learning_rate": 2.87629452157645e-06, "loss": 0.2107, "step": 2965 }, { "epoch": 1.5202460276781138, "grad_norm": 0.9050726538211187, "learning_rate": 2.87047038954823e-06, "loss": 0.2402, "step": 2966 }, { "epoch": 1.5207585853408507, "grad_norm": 0.9675364922453477, "learning_rate": 2.864651171782402e-06, "loss": 0.2694, "step": 2967 }, { "epoch": 1.521271143003588, "grad_norm": 0.9449892757739133, "learning_rate": 2.858836872290064e-06, "loss": 0.2766, "step": 2968 }, { "epoch": 1.521783700666325, "grad_norm": 0.950242491766011, "learning_rate": 2.8530274950789095e-06, "loss": 0.3004, "step": 2969 }, { "epoch": 1.522296258329062, "grad_norm": 0.9097289822741039, "learning_rate": 2.8472230441532365e-06, "loss": 0.2527, "step": 2970 }, { "epoch": 1.522808815991799, "grad_norm": 0.9907053873371402, "learning_rate": 2.841423523513961e-06, "loss": 0.2913, "step": 2971 }, { "epoch": 1.5233213736545361, "grad_norm": 1.040284125933358, "learning_rate": 2.835628937158591e-06, "loss": 0.2799, "step": 2972 }, { "epoch": 1.5238339313172733, "grad_norm": 0.940316349729763, "learning_rate": 2.8298392890812344e-06, "loss": 0.271, "step": 2973 }, { "epoch": 1.5243464889800102, "grad_norm": 0.9474378915346555, "learning_rate": 2.8240545832725963e-06, "loss": 0.2454, "step": 2974 }, { "epoch": 1.5248590466427472, "grad_norm": 1.0047018774542393, "learning_rate": 2.8182748237199755e-06, "loss": 0.3012, "step": 2975 }, { "epoch": 1.5253716043054844, "grad_norm": 0.9952396006202386, "learning_rate": 2.8125000144072602e-06, "loss": 0.2769, "step": 2976 }, { "epoch": 1.5258841619682215, "grad_norm": 0.9565037985529744, "learning_rate": 2.806730159314929e-06, "loss": 0.2569, "step": 2977 }, { "epoch": 1.5263967196309585, "grad_norm": 0.9155717564722826, "learning_rate": 2.8009652624200436e-06, "loss": 0.2801, "step": 2978 }, { "epoch": 1.5269092772936954, "grad_norm": 0.9082848917377812, "learning_rate": 2.7952053276962444e-06, "loss": 0.2593, "step": 2979 }, { "epoch": 1.5274218349564326, "grad_norm": 0.8993580727996137, "learning_rate": 2.7894503591137656e-06, "loss": 0.2823, "step": 2980 }, { "epoch": 1.5279343926191697, "grad_norm": 0.9600175789543349, "learning_rate": 2.7837003606393987e-06, "loss": 0.3104, "step": 2981 }, { "epoch": 1.5284469502819067, "grad_norm": 0.9079822511665991, "learning_rate": 2.7779553362365184e-06, "loss": 0.2839, "step": 2982 }, { "epoch": 1.5289595079446436, "grad_norm": 0.9690264900301948, "learning_rate": 2.7722152898650788e-06, "loss": 0.2962, "step": 2983 }, { "epoch": 1.5294720656073808, "grad_norm": 0.8828788495131955, "learning_rate": 2.766480225481595e-06, "loss": 0.2418, "step": 2984 }, { "epoch": 1.529984623270118, "grad_norm": 0.9017825344460292, "learning_rate": 2.7607501470391384e-06, "loss": 0.2695, "step": 2985 }, { "epoch": 1.530497180932855, "grad_norm": 0.8421281978916403, "learning_rate": 2.755025058487364e-06, "loss": 0.2509, "step": 2986 }, { "epoch": 1.5310097385955919, "grad_norm": 1.0063604797824715, "learning_rate": 2.7493049637724724e-06, "loss": 0.3082, "step": 2987 }, { "epoch": 1.531522296258329, "grad_norm": 0.8713640453910338, "learning_rate": 2.7435898668372296e-06, "loss": 0.2655, "step": 2988 }, { "epoch": 1.5320348539210662, "grad_norm": 0.989610421705511, "learning_rate": 2.7378797716209506e-06, "loss": 0.2768, "step": 2989 }, { "epoch": 1.5325474115838031, "grad_norm": 0.9824804813299796, "learning_rate": 2.7321746820595084e-06, "loss": 0.2746, "step": 2990 }, { "epoch": 1.53305996924654, "grad_norm": 0.9118341175526858, "learning_rate": 2.7264746020853217e-06, "loss": 0.2514, "step": 2991 }, { "epoch": 1.5335725269092773, "grad_norm": 0.9162398064029887, "learning_rate": 2.720779535627359e-06, "loss": 0.2627, "step": 2992 }, { "epoch": 1.5340850845720144, "grad_norm": 0.9357152546286692, "learning_rate": 2.715089486611132e-06, "loss": 0.2734, "step": 2993 }, { "epoch": 1.5345976422347514, "grad_norm": 0.95561128512342, "learning_rate": 2.709404458958693e-06, "loss": 0.2861, "step": 2994 }, { "epoch": 1.5351101998974883, "grad_norm": 0.9985967595505099, "learning_rate": 2.7037244565886335e-06, "loss": 0.2734, "step": 2995 }, { "epoch": 1.5356227575602255, "grad_norm": 0.9144512923175704, "learning_rate": 2.698049483416083e-06, "loss": 0.2814, "step": 2996 }, { "epoch": 1.5361353152229626, "grad_norm": 0.9931053682346808, "learning_rate": 2.692379543352699e-06, "loss": 0.3076, "step": 2997 }, { "epoch": 1.5366478728856996, "grad_norm": 0.9260600737953205, "learning_rate": 2.6867146403066833e-06, "loss": 0.294, "step": 2998 }, { "epoch": 1.5371604305484365, "grad_norm": 1.0391609046982593, "learning_rate": 2.681054778182748e-06, "loss": 0.2921, "step": 2999 }, { "epoch": 1.5376729882111737, "grad_norm": 0.913838152431741, "learning_rate": 2.675399960882138e-06, "loss": 0.2822, "step": 3000 }, { "epoch": 1.5381855458739109, "grad_norm": 0.9731633002587958, "learning_rate": 2.669750192302628e-06, "loss": 0.2851, "step": 3001 }, { "epoch": 1.5386981035366478, "grad_norm": 0.9172510381525253, "learning_rate": 2.6641054763385044e-06, "loss": 0.2879, "step": 3002 }, { "epoch": 1.5392106611993848, "grad_norm": 0.9235213013584276, "learning_rate": 2.658465816880572e-06, "loss": 0.244, "step": 3003 }, { "epoch": 1.539723218862122, "grad_norm": 0.9498429613481342, "learning_rate": 2.652831217816151e-06, "loss": 0.2747, "step": 3004 }, { "epoch": 1.540235776524859, "grad_norm": 0.9785170066760988, "learning_rate": 2.647201683029075e-06, "loss": 0.2642, "step": 3005 }, { "epoch": 1.540748334187596, "grad_norm": 1.0097349127598378, "learning_rate": 2.6415772163996845e-06, "loss": 0.2752, "step": 3006 }, { "epoch": 1.5412608918503332, "grad_norm": 0.877344609475811, "learning_rate": 2.6359578218048287e-06, "loss": 0.2465, "step": 3007 }, { "epoch": 1.5417734495130704, "grad_norm": 0.9347152539403311, "learning_rate": 2.630343503117859e-06, "loss": 0.2567, "step": 3008 }, { "epoch": 1.5422860071758073, "grad_norm": 0.9052020007369581, "learning_rate": 2.6247342642086304e-06, "loss": 0.2582, "step": 3009 }, { "epoch": 1.5427985648385443, "grad_norm": 0.9373432438718463, "learning_rate": 2.619130108943494e-06, "loss": 0.2734, "step": 3010 }, { "epoch": 1.5433111225012814, "grad_norm": 0.9452541677468378, "learning_rate": 2.6135310411852977e-06, "loss": 0.2723, "step": 3011 }, { "epoch": 1.5438236801640186, "grad_norm": 1.047587426752156, "learning_rate": 2.6079370647933834e-06, "loss": 0.29, "step": 3012 }, { "epoch": 1.5443362378267556, "grad_norm": 1.0217701580008727, "learning_rate": 2.602348183623582e-06, "loss": 0.3095, "step": 3013 }, { "epoch": 1.5448487954894925, "grad_norm": 0.8935868798609808, "learning_rate": 2.5967644015282146e-06, "loss": 0.2455, "step": 3014 }, { "epoch": 1.5453613531522297, "grad_norm": 0.9587789171310719, "learning_rate": 2.591185722356082e-06, "loss": 0.2798, "step": 3015 }, { "epoch": 1.5458739108149668, "grad_norm": 0.9263206768774349, "learning_rate": 2.5856121499524832e-06, "loss": 0.2968, "step": 3016 }, { "epoch": 1.5463864684777038, "grad_norm": 1.0284367509841057, "learning_rate": 2.580043688159172e-06, "loss": 0.2965, "step": 3017 }, { "epoch": 1.5468990261404407, "grad_norm": 0.8984319214057288, "learning_rate": 2.5744803408144026e-06, "loss": 0.2615, "step": 3018 }, { "epoch": 1.547411583803178, "grad_norm": 1.0173145287705285, "learning_rate": 2.568922111752893e-06, "loss": 0.2724, "step": 3019 }, { "epoch": 1.547924141465915, "grad_norm": 0.9621962886142117, "learning_rate": 2.5633690048058346e-06, "loss": 0.2653, "step": 3020 }, { "epoch": 1.548436699128652, "grad_norm": 0.9531258174390543, "learning_rate": 2.5578210238008883e-06, "loss": 0.2498, "step": 3021 }, { "epoch": 1.548949256791389, "grad_norm": 0.9950416231109651, "learning_rate": 2.5522781725621814e-06, "loss": 0.2873, "step": 3022 }, { "epoch": 1.5494618144541261, "grad_norm": 0.9082772688776998, "learning_rate": 2.5467404549103094e-06, "loss": 0.2505, "step": 3023 }, { "epoch": 1.5499743721168633, "grad_norm": 0.9053109292475181, "learning_rate": 2.5412078746623225e-06, "loss": 0.2478, "step": 3024 }, { "epoch": 1.5504869297796002, "grad_norm": 0.9334436768352585, "learning_rate": 2.5356804356317345e-06, "loss": 0.2538, "step": 3025 }, { "epoch": 1.5509994874423372, "grad_norm": 0.8778683217025612, "learning_rate": 2.530158141628515e-06, "loss": 0.2316, "step": 3026 }, { "epoch": 1.5515120451050743, "grad_norm": 0.9889675775723976, "learning_rate": 2.5246409964590855e-06, "loss": 0.2781, "step": 3027 }, { "epoch": 1.5520246027678115, "grad_norm": 0.96449546193123, "learning_rate": 2.5191290039263203e-06, "loss": 0.2707, "step": 3028 }, { "epoch": 1.5525371604305485, "grad_norm": 0.9328426088326218, "learning_rate": 2.5136221678295413e-06, "loss": 0.2958, "step": 3029 }, { "epoch": 1.5530497180932854, "grad_norm": 0.9766759509262092, "learning_rate": 2.508120491964512e-06, "loss": 0.2769, "step": 3030 }, { "epoch": 1.5535622757560226, "grad_norm": 0.9231236899259703, "learning_rate": 2.502623980123453e-06, "loss": 0.2439, "step": 3031 }, { "epoch": 1.5540748334187597, "grad_norm": 0.9047867451578613, "learning_rate": 2.4971326360950032e-06, "loss": 0.2791, "step": 3032 }, { "epoch": 1.5545873910814967, "grad_norm": 0.9102734916209887, "learning_rate": 2.491646463664261e-06, "loss": 0.2765, "step": 3033 }, { "epoch": 1.5550999487442336, "grad_norm": 0.877422319897847, "learning_rate": 2.486165466612751e-06, "loss": 0.2437, "step": 3034 }, { "epoch": 1.5556125064069708, "grad_norm": 0.9122307897328242, "learning_rate": 2.4806896487184207e-06, "loss": 0.2681, "step": 3035 }, { "epoch": 1.556125064069708, "grad_norm": 0.951371074545274, "learning_rate": 2.4752190137556676e-06, "loss": 0.2936, "step": 3036 }, { "epoch": 1.556637621732445, "grad_norm": 0.9030495313602752, "learning_rate": 2.469753565495303e-06, "loss": 0.2538, "step": 3037 }, { "epoch": 1.5571501793951819, "grad_norm": 0.9077297011581276, "learning_rate": 2.464293307704566e-06, "loss": 0.2637, "step": 3038 }, { "epoch": 1.557662737057919, "grad_norm": 0.8630503216257582, "learning_rate": 2.4588382441471193e-06, "loss": 0.2537, "step": 3039 }, { "epoch": 1.5581752947206562, "grad_norm": 0.8887278924968038, "learning_rate": 2.4533883785830438e-06, "loss": 0.2672, "step": 3040 }, { "epoch": 1.5586878523833931, "grad_norm": 0.9206380988110152, "learning_rate": 2.447943714768839e-06, "loss": 0.2729, "step": 3041 }, { "epoch": 1.55920041004613, "grad_norm": 0.9154725139975625, "learning_rate": 2.4425042564574186e-06, "loss": 0.2681, "step": 3042 }, { "epoch": 1.5597129677088672, "grad_norm": 0.8971101123379861, "learning_rate": 2.437070007398107e-06, "loss": 0.2517, "step": 3043 }, { "epoch": 1.5602255253716044, "grad_norm": 0.9003764854284794, "learning_rate": 2.4316409713366353e-06, "loss": 0.2752, "step": 3044 }, { "epoch": 1.5607380830343414, "grad_norm": 0.9627180536238881, "learning_rate": 2.4262171520151557e-06, "loss": 0.2738, "step": 3045 }, { "epoch": 1.5612506406970783, "grad_norm": 0.8860684429923137, "learning_rate": 2.4207985531722034e-06, "loss": 0.2648, "step": 3046 }, { "epoch": 1.5617631983598155, "grad_norm": 0.9375631629336872, "learning_rate": 2.415385178542725e-06, "loss": 0.2756, "step": 3047 }, { "epoch": 1.5622757560225526, "grad_norm": 0.9885132616105727, "learning_rate": 2.409977031858075e-06, "loss": 0.2647, "step": 3048 }, { "epoch": 1.5627883136852896, "grad_norm": 0.9270464381751471, "learning_rate": 2.4045741168459944e-06, "loss": 0.2807, "step": 3049 }, { "epoch": 1.5633008713480265, "grad_norm": 0.8993916766171696, "learning_rate": 2.3991764372306113e-06, "loss": 0.2575, "step": 3050 }, { "epoch": 1.5638134290107637, "grad_norm": 0.9459196263864654, "learning_rate": 2.393783996732462e-06, "loss": 0.2791, "step": 3051 }, { "epoch": 1.5643259866735009, "grad_norm": 0.9649274725986697, "learning_rate": 2.388396799068463e-06, "loss": 0.274, "step": 3052 }, { "epoch": 1.5648385443362378, "grad_norm": 0.9939619815214988, "learning_rate": 2.38301484795191e-06, "loss": 0.301, "step": 3053 }, { "epoch": 1.5653511019989748, "grad_norm": 0.8955376913612115, "learning_rate": 2.377638147092497e-06, "loss": 0.295, "step": 3054 }, { "epoch": 1.565863659661712, "grad_norm": 0.8393677008218019, "learning_rate": 2.3722667001962898e-06, "loss": 0.2152, "step": 3055 }, { "epoch": 1.566376217324449, "grad_norm": 0.9492349406155677, "learning_rate": 2.366900510965733e-06, "loss": 0.2728, "step": 3056 }, { "epoch": 1.566888774987186, "grad_norm": 1.060930828849367, "learning_rate": 2.361539583099649e-06, "loss": 0.2983, "step": 3057 }, { "epoch": 1.567401332649923, "grad_norm": 0.9192216262731638, "learning_rate": 2.3561839202932344e-06, "loss": 0.2834, "step": 3058 }, { "epoch": 1.5679138903126602, "grad_norm": 0.9463338840373295, "learning_rate": 2.3508335262380542e-06, "loss": 0.2565, "step": 3059 }, { "epoch": 1.5684264479753973, "grad_norm": 0.9586334035460569, "learning_rate": 2.3454884046220463e-06, "loss": 0.2803, "step": 3060 }, { "epoch": 1.5689390056381343, "grad_norm": 0.9475992418421213, "learning_rate": 2.340148559129507e-06, "loss": 0.2609, "step": 3061 }, { "epoch": 1.5694515633008712, "grad_norm": 0.9689286266156858, "learning_rate": 2.3348139934411008e-06, "loss": 0.2591, "step": 3062 }, { "epoch": 1.5699641209636084, "grad_norm": 0.9494686614258021, "learning_rate": 2.329484711233858e-06, "loss": 0.2829, "step": 3063 }, { "epoch": 1.5704766786263455, "grad_norm": 0.9488420221117648, "learning_rate": 2.3241607161811553e-06, "loss": 0.2691, "step": 3064 }, { "epoch": 1.5709892362890825, "grad_norm": 0.9679814461713934, "learning_rate": 2.31884201195273e-06, "loss": 0.2932, "step": 3065 }, { "epoch": 1.5715017939518194, "grad_norm": 0.9211675642113568, "learning_rate": 2.3135286022146785e-06, "loss": 0.2398, "step": 3066 }, { "epoch": 1.5720143516145566, "grad_norm": 0.894880227530057, "learning_rate": 2.3082204906294448e-06, "loss": 0.2608, "step": 3067 }, { "epoch": 1.5725269092772938, "grad_norm": 0.853396159966413, "learning_rate": 2.3029176808558097e-06, "loss": 0.2483, "step": 3068 }, { "epoch": 1.5730394669400307, "grad_norm": 0.8521993484535534, "learning_rate": 2.297620176548916e-06, "loss": 0.2243, "step": 3069 }, { "epoch": 1.5735520246027677, "grad_norm": 0.9534911071174021, "learning_rate": 2.292327981360245e-06, "loss": 0.2479, "step": 3070 }, { "epoch": 1.5740645822655048, "grad_norm": 0.9648955006974802, "learning_rate": 2.2870410989376067e-06, "loss": 0.2814, "step": 3071 }, { "epoch": 1.574577139928242, "grad_norm": 0.8995587466640276, "learning_rate": 2.2817595329251663e-06, "loss": 0.2753, "step": 3072 }, { "epoch": 1.575089697590979, "grad_norm": 1.0194859126523725, "learning_rate": 2.2764832869634134e-06, "loss": 0.2851, "step": 3073 }, { "epoch": 1.5756022552537159, "grad_norm": 0.8612194239929883, "learning_rate": 2.271212364689176e-06, "loss": 0.248, "step": 3074 }, { "epoch": 1.576114812916453, "grad_norm": 0.9971079179073871, "learning_rate": 2.2659467697356074e-06, "loss": 0.29, "step": 3075 }, { "epoch": 1.5766273705791902, "grad_norm": 0.8997382257185657, "learning_rate": 2.2606865057321938e-06, "loss": 0.2635, "step": 3076 }, { "epoch": 1.5771399282419272, "grad_norm": 0.8487682815100842, "learning_rate": 2.255431576304744e-06, "loss": 0.2203, "step": 3077 }, { "epoch": 1.5776524859046641, "grad_norm": 1.0199559878639817, "learning_rate": 2.2501819850753925e-06, "loss": 0.2972, "step": 3078 }, { "epoch": 1.5781650435674013, "grad_norm": 0.9299120390382531, "learning_rate": 2.244937735662589e-06, "loss": 0.2323, "step": 3079 }, { "epoch": 1.5786776012301384, "grad_norm": 0.9699493670288922, "learning_rate": 2.239698831681105e-06, "loss": 0.2876, "step": 3080 }, { "epoch": 1.5791901588928754, "grad_norm": 0.9496467414933759, "learning_rate": 2.2344652767420337e-06, "loss": 0.2777, "step": 3081 }, { "epoch": 1.5797027165556123, "grad_norm": 0.9073324754490092, "learning_rate": 2.229237074452768e-06, "loss": 0.2646, "step": 3082 }, { "epoch": 1.5802152742183497, "grad_norm": 0.9112141692014043, "learning_rate": 2.2240142284170165e-06, "loss": 0.2675, "step": 3083 }, { "epoch": 1.5807278318810867, "grad_norm": 0.9045190715675011, "learning_rate": 2.2187967422348023e-06, "loss": 0.2692, "step": 3084 }, { "epoch": 1.5812403895438236, "grad_norm": 0.9690796857046998, "learning_rate": 2.213584619502451e-06, "loss": 0.2618, "step": 3085 }, { "epoch": 1.5817529472065608, "grad_norm": 0.8850365156785829, "learning_rate": 2.2083778638125796e-06, "loss": 0.2671, "step": 3086 }, { "epoch": 1.582265504869298, "grad_norm": 0.908055593752635, "learning_rate": 2.2031764787541244e-06, "loss": 0.2658, "step": 3087 }, { "epoch": 1.582778062532035, "grad_norm": 1.0040542359091775, "learning_rate": 2.1979804679123108e-06, "loss": 0.2998, "step": 3088 }, { "epoch": 1.5832906201947718, "grad_norm": 0.9645791792286269, "learning_rate": 2.1927898348686515e-06, "loss": 0.2668, "step": 3089 }, { "epoch": 1.583803177857509, "grad_norm": 0.9287912252403352, "learning_rate": 2.1876045832009694e-06, "loss": 0.267, "step": 3090 }, { "epoch": 1.5843157355202462, "grad_norm": 0.8989644124074194, "learning_rate": 2.1824247164833656e-06, "loss": 0.2621, "step": 3091 }, { "epoch": 1.5848282931829831, "grad_norm": 1.008764064555916, "learning_rate": 2.177250238286235e-06, "loss": 0.2664, "step": 3092 }, { "epoch": 1.58534085084572, "grad_norm": 1.0255652250724603, "learning_rate": 2.1720811521762554e-06, "loss": 0.3087, "step": 3093 }, { "epoch": 1.5858534085084572, "grad_norm": 0.8810771945091135, "learning_rate": 2.16691746171639e-06, "loss": 0.2483, "step": 3094 }, { "epoch": 1.5863659661711944, "grad_norm": 0.983949538805996, "learning_rate": 2.161759170465878e-06, "loss": 0.2694, "step": 3095 }, { "epoch": 1.5868785238339314, "grad_norm": 0.9882586201256643, "learning_rate": 2.15660628198025e-06, "loss": 0.3079, "step": 3096 }, { "epoch": 1.5873910814966683, "grad_norm": 1.04775789482594, "learning_rate": 2.1514587998112956e-06, "loss": 0.282, "step": 3097 }, { "epoch": 1.5879036391594055, "grad_norm": 1.0072088334257903, "learning_rate": 2.1463167275070863e-06, "loss": 0.2582, "step": 3098 }, { "epoch": 1.5884161968221426, "grad_norm": 1.0165141488354958, "learning_rate": 2.141180068611971e-06, "loss": 0.294, "step": 3099 }, { "epoch": 1.5889287544848796, "grad_norm": 1.00081796532118, "learning_rate": 2.1360488266665534e-06, "loss": 0.2691, "step": 3100 }, { "epoch": 1.5894413121476165, "grad_norm": 0.8863189458592272, "learning_rate": 2.13092300520771e-06, "loss": 0.2715, "step": 3101 }, { "epoch": 1.5899538698103537, "grad_norm": 1.008239103440743, "learning_rate": 2.125802607768588e-06, "loss": 0.2816, "step": 3102 }, { "epoch": 1.5904664274730909, "grad_norm": 1.0343431321506387, "learning_rate": 2.1206876378785877e-06, "loss": 0.2686, "step": 3103 }, { "epoch": 1.5909789851358278, "grad_norm": 0.901043225810011, "learning_rate": 2.1155780990633644e-06, "loss": 0.2579, "step": 3104 }, { "epoch": 1.5914915427985648, "grad_norm": 0.9601716306350234, "learning_rate": 2.1104739948448415e-06, "loss": 0.2757, "step": 3105 }, { "epoch": 1.592004100461302, "grad_norm": 0.9810957208512532, "learning_rate": 2.1053753287411895e-06, "loss": 0.2563, "step": 3106 }, { "epoch": 1.592516658124039, "grad_norm": 0.8481583073377855, "learning_rate": 2.100282104266829e-06, "loss": 0.2549, "step": 3107 }, { "epoch": 1.593029215786776, "grad_norm": 0.8568026442937204, "learning_rate": 2.0951943249324334e-06, "loss": 0.2469, "step": 3108 }, { "epoch": 1.593541773449513, "grad_norm": 0.8991431905071954, "learning_rate": 2.090111994244923e-06, "loss": 0.254, "step": 3109 }, { "epoch": 1.5940543311122501, "grad_norm": 0.9769802948181475, "learning_rate": 2.08503511570746e-06, "loss": 0.2909, "step": 3110 }, { "epoch": 1.5945668887749873, "grad_norm": 0.9637904185850478, "learning_rate": 2.07996369281945e-06, "loss": 0.2778, "step": 3111 }, { "epoch": 1.5950794464377243, "grad_norm": 0.9579128339983826, "learning_rate": 2.074897729076536e-06, "loss": 0.2404, "step": 3112 }, { "epoch": 1.5955920041004612, "grad_norm": 0.9456664538157867, "learning_rate": 2.069837227970599e-06, "loss": 0.2788, "step": 3113 }, { "epoch": 1.5961045617631984, "grad_norm": 0.9610720668877951, "learning_rate": 2.064782192989765e-06, "loss": 0.2754, "step": 3114 }, { "epoch": 1.5966171194259355, "grad_norm": 0.9058689412081958, "learning_rate": 2.0597326276183716e-06, "loss": 0.2458, "step": 3115 }, { "epoch": 1.5971296770886725, "grad_norm": 0.9655637205635111, "learning_rate": 2.0546885353369984e-06, "loss": 0.2897, "step": 3116 }, { "epoch": 1.5976422347514094, "grad_norm": 0.9147140508935572, "learning_rate": 2.0496499196224605e-06, "loss": 0.2592, "step": 3117 }, { "epoch": 1.5981547924141466, "grad_norm": 0.888612875017725, "learning_rate": 2.0446167839477815e-06, "loss": 0.2584, "step": 3118 }, { "epoch": 1.5986673500768838, "grad_norm": 0.9194925466120846, "learning_rate": 2.0395891317822146e-06, "loss": 0.3072, "step": 3119 }, { "epoch": 1.5991799077396207, "grad_norm": 0.8727247347538305, "learning_rate": 2.0345669665912402e-06, "loss": 0.2364, "step": 3120 }, { "epoch": 1.5996924654023577, "grad_norm": 0.9162486171053389, "learning_rate": 2.0295502918365473e-06, "loss": 0.2545, "step": 3121 }, { "epoch": 1.6002050230650948, "grad_norm": 0.9433702129187357, "learning_rate": 2.0245391109760437e-06, "loss": 0.2862, "step": 3122 }, { "epoch": 1.600717580727832, "grad_norm": 0.8714165155930964, "learning_rate": 2.019533427463851e-06, "loss": 0.2316, "step": 3123 }, { "epoch": 1.601230138390569, "grad_norm": 0.9775391519048258, "learning_rate": 2.0145332447502995e-06, "loss": 0.2959, "step": 3124 }, { "epoch": 1.6017426960533059, "grad_norm": 1.0075764746405056, "learning_rate": 2.009538566281931e-06, "loss": 0.2778, "step": 3125 }, { "epoch": 1.602255253716043, "grad_norm": 0.9275190029532079, "learning_rate": 2.0045493955014915e-06, "loss": 0.2662, "step": 3126 }, { "epoch": 1.6027678113787802, "grad_norm": 0.8812947514256354, "learning_rate": 1.9995657358479293e-06, "loss": 0.2375, "step": 3127 }, { "epoch": 1.6032803690415172, "grad_norm": 0.9799812095268969, "learning_rate": 1.994587590756397e-06, "loss": 0.3109, "step": 3128 }, { "epoch": 1.603792926704254, "grad_norm": 0.8879365705308202, "learning_rate": 1.989614963658244e-06, "loss": 0.2513, "step": 3129 }, { "epoch": 1.6043054843669913, "grad_norm": 0.9081768309404348, "learning_rate": 1.984647857981017e-06, "loss": 0.2743, "step": 3130 }, { "epoch": 1.6048180420297284, "grad_norm": 0.9510644836835995, "learning_rate": 1.9796862771484526e-06, "loss": 0.2784, "step": 3131 }, { "epoch": 1.6053305996924654, "grad_norm": 0.9034042171008027, "learning_rate": 1.9747302245804944e-06, "loss": 0.2474, "step": 3132 }, { "epoch": 1.6058431573552023, "grad_norm": 0.9699750099122548, "learning_rate": 1.969779703693253e-06, "loss": 0.2772, "step": 3133 }, { "epoch": 1.6063557150179395, "grad_norm": 0.9095814703827861, "learning_rate": 1.96483471789904e-06, "loss": 0.2713, "step": 3134 }, { "epoch": 1.6068682726806767, "grad_norm": 0.9517784156657687, "learning_rate": 1.959895270606356e-06, "loss": 0.2883, "step": 3135 }, { "epoch": 1.6073808303434136, "grad_norm": 0.9551581100782973, "learning_rate": 1.9549613652198663e-06, "loss": 0.2652, "step": 3136 }, { "epoch": 1.6078933880061506, "grad_norm": 0.9460640973438449, "learning_rate": 1.950033005140436e-06, "loss": 0.2574, "step": 3137 }, { "epoch": 1.6084059456688877, "grad_norm": 0.9969214221295366, "learning_rate": 1.9451101937650963e-06, "loss": 0.2618, "step": 3138 }, { "epoch": 1.608918503331625, "grad_norm": 0.9409027983707807, "learning_rate": 1.9401929344870563e-06, "loss": 0.2785, "step": 3139 }, { "epoch": 1.6094310609943618, "grad_norm": 0.770506226028296, "learning_rate": 1.9352812306956993e-06, "loss": 0.2034, "step": 3140 }, { "epoch": 1.6099436186570988, "grad_norm": 0.833657770885862, "learning_rate": 1.9303750857765767e-06, "loss": 0.2456, "step": 3141 }, { "epoch": 1.610456176319836, "grad_norm": 0.9972731097185675, "learning_rate": 1.925474503111412e-06, "loss": 0.2798, "step": 3142 }, { "epoch": 1.6109687339825731, "grad_norm": 1.0099147760353655, "learning_rate": 1.920579486078091e-06, "loss": 0.2837, "step": 3143 }, { "epoch": 1.61148129164531, "grad_norm": 0.9541436925203696, "learning_rate": 1.915690038050666e-06, "loss": 0.2565, "step": 3144 }, { "epoch": 1.611993849308047, "grad_norm": 0.8754689728703783, "learning_rate": 1.910806162399349e-06, "loss": 0.2592, "step": 3145 }, { "epoch": 1.6125064069707842, "grad_norm": 0.9826619257835063, "learning_rate": 1.905927862490512e-06, "loss": 0.2405, "step": 3146 }, { "epoch": 1.6130189646335213, "grad_norm": 0.9208596066452231, "learning_rate": 1.901055141686684e-06, "loss": 0.2896, "step": 3147 }, { "epoch": 1.6135315222962583, "grad_norm": 0.99066652197049, "learning_rate": 1.896188003346544e-06, "loss": 0.2861, "step": 3148 }, { "epoch": 1.6140440799589952, "grad_norm": 0.9303405379890741, "learning_rate": 1.891326450824933e-06, "loss": 0.2666, "step": 3149 }, { "epoch": 1.6145566376217324, "grad_norm": 0.9316778829590802, "learning_rate": 1.8864704874728346e-06, "loss": 0.2781, "step": 3150 }, { "epoch": 1.6150691952844696, "grad_norm": 0.9781343315056406, "learning_rate": 1.881620116637375e-06, "loss": 0.2523, "step": 3151 }, { "epoch": 1.6155817529472065, "grad_norm": 0.8533739982369352, "learning_rate": 1.8767753416618372e-06, "loss": 0.2401, "step": 3152 }, { "epoch": 1.6160943106099435, "grad_norm": 0.9681148414073961, "learning_rate": 1.871936165885644e-06, "loss": 0.2816, "step": 3153 }, { "epoch": 1.6166068682726806, "grad_norm": 0.8939919339664452, "learning_rate": 1.8671025926443464e-06, "loss": 0.2459, "step": 3154 }, { "epoch": 1.6171194259354178, "grad_norm": 0.9808086678555905, "learning_rate": 1.8622746252696522e-06, "loss": 0.2929, "step": 3155 }, { "epoch": 1.6176319835981547, "grad_norm": 0.8996379814875792, "learning_rate": 1.8574522670893935e-06, "loss": 0.2636, "step": 3156 }, { "epoch": 1.6181445412608917, "grad_norm": 0.9041383631966607, "learning_rate": 1.8526355214275382e-06, "loss": 0.2382, "step": 3157 }, { "epoch": 1.6186570989236289, "grad_norm": 0.914372236651057, "learning_rate": 1.8478243916041882e-06, "loss": 0.2415, "step": 3158 }, { "epoch": 1.619169656586366, "grad_norm": 1.002783946718652, "learning_rate": 1.843018880935572e-06, "loss": 0.2679, "step": 3159 }, { "epoch": 1.619682214249103, "grad_norm": 0.9094723058950007, "learning_rate": 1.8382189927340454e-06, "loss": 0.2527, "step": 3160 }, { "epoch": 1.6201947719118401, "grad_norm": 0.982893134819631, "learning_rate": 1.8334247303080888e-06, "loss": 0.2774, "step": 3161 }, { "epoch": 1.6207073295745773, "grad_norm": 0.9750823915235637, "learning_rate": 1.828636096962304e-06, "loss": 0.2835, "step": 3162 }, { "epoch": 1.6212198872373142, "grad_norm": 0.9016742148448629, "learning_rate": 1.8238530959974143e-06, "loss": 0.2759, "step": 3163 }, { "epoch": 1.6217324449000512, "grad_norm": 0.9970263145485582, "learning_rate": 1.8190757307102646e-06, "loss": 0.2789, "step": 3164 }, { "epoch": 1.6222450025627884, "grad_norm": 0.9558660461583711, "learning_rate": 1.8143040043938054e-06, "loss": 0.2669, "step": 3165 }, { "epoch": 1.6227575602255255, "grad_norm": 0.9745111120228908, "learning_rate": 1.8095379203371044e-06, "loss": 0.2489, "step": 3166 }, { "epoch": 1.6232701178882625, "grad_norm": 0.9570856980406919, "learning_rate": 1.8047774818253483e-06, "loss": 0.2637, "step": 3167 }, { "epoch": 1.6237826755509994, "grad_norm": 0.8894140444607461, "learning_rate": 1.8000226921398234e-06, "loss": 0.2765, "step": 3168 }, { "epoch": 1.6242952332137366, "grad_norm": 0.8355184737440584, "learning_rate": 1.7952735545579181e-06, "loss": 0.2344, "step": 3169 }, { "epoch": 1.6248077908764738, "grad_norm": 0.8872580356719425, "learning_rate": 1.7905300723531393e-06, "loss": 0.2469, "step": 3170 }, { "epoch": 1.6253203485392107, "grad_norm": 0.9107087038959913, "learning_rate": 1.7857922487950873e-06, "loss": 0.2572, "step": 3171 }, { "epoch": 1.6258329062019476, "grad_norm": 0.8893455479565809, "learning_rate": 1.7810600871494555e-06, "loss": 0.2667, "step": 3172 }, { "epoch": 1.6263454638646848, "grad_norm": 0.9840021640461148, "learning_rate": 1.7763335906780487e-06, "loss": 0.2808, "step": 3173 }, { "epoch": 1.626858021527422, "grad_norm": 0.9705149924282843, "learning_rate": 1.771612762638758e-06, "loss": 0.2789, "step": 3174 }, { "epoch": 1.627370579190159, "grad_norm": 0.9619519210576687, "learning_rate": 1.766897606285568e-06, "loss": 0.2765, "step": 3175 }, { "epoch": 1.6278831368528959, "grad_norm": 0.9531910928582852, "learning_rate": 1.7621881248685569e-06, "loss": 0.2533, "step": 3176 }, { "epoch": 1.628395694515633, "grad_norm": 0.939043412720377, "learning_rate": 1.7574843216338878e-06, "loss": 0.2712, "step": 3177 }, { "epoch": 1.6289082521783702, "grad_norm": 0.9765935525468941, "learning_rate": 1.7527861998238094e-06, "loss": 0.2999, "step": 3178 }, { "epoch": 1.6294208098411072, "grad_norm": 0.8939284671820672, "learning_rate": 1.7480937626766648e-06, "loss": 0.2563, "step": 3179 }, { "epoch": 1.629933367503844, "grad_norm": 0.8591816421469937, "learning_rate": 1.743407013426861e-06, "loss": 0.242, "step": 3180 }, { "epoch": 1.6304459251665813, "grad_norm": 0.9369256870294853, "learning_rate": 1.7387259553048963e-06, "loss": 0.2889, "step": 3181 }, { "epoch": 1.6309584828293184, "grad_norm": 0.8749001276303573, "learning_rate": 1.7340505915373495e-06, "loss": 0.2525, "step": 3182 }, { "epoch": 1.6314710404920554, "grad_norm": 0.9694176045426842, "learning_rate": 1.7293809253468607e-06, "loss": 0.2569, "step": 3183 }, { "epoch": 1.6319835981547923, "grad_norm": 0.9469269357258935, "learning_rate": 1.7247169599521529e-06, "loss": 0.2654, "step": 3184 }, { "epoch": 1.6324961558175295, "grad_norm": 0.9300117325261057, "learning_rate": 1.72005869856802e-06, "loss": 0.2603, "step": 3185 }, { "epoch": 1.6330087134802667, "grad_norm": 0.9782469999446256, "learning_rate": 1.7154061444053239e-06, "loss": 0.2889, "step": 3186 }, { "epoch": 1.6335212711430036, "grad_norm": 0.8862554862215284, "learning_rate": 1.7107593006709799e-06, "loss": 0.2456, "step": 3187 }, { "epoch": 1.6340338288057406, "grad_norm": 0.9567482436926655, "learning_rate": 1.7061181705679874e-06, "loss": 0.2689, "step": 3188 }, { "epoch": 1.6345463864684777, "grad_norm": 0.911251669528268, "learning_rate": 1.7014827572953974e-06, "loss": 0.2556, "step": 3189 }, { "epoch": 1.6350589441312149, "grad_norm": 0.8971641701878208, "learning_rate": 1.6968530640483126e-06, "loss": 0.2689, "step": 3190 }, { "epoch": 1.6355715017939518, "grad_norm": 0.9227134435052149, "learning_rate": 1.6922290940179098e-06, "loss": 0.2562, "step": 3191 }, { "epoch": 1.6360840594566888, "grad_norm": 0.9567036461743617, "learning_rate": 1.687610850391408e-06, "loss": 0.2961, "step": 3192 }, { "epoch": 1.636596617119426, "grad_norm": 0.9242081562777479, "learning_rate": 1.6829983363520842e-06, "loss": 0.2627, "step": 3193 }, { "epoch": 1.6371091747821631, "grad_norm": 0.9458571281337012, "learning_rate": 1.6783915550792652e-06, "loss": 0.2706, "step": 3194 }, { "epoch": 1.6376217324449, "grad_norm": 0.9740537922769512, "learning_rate": 1.6737905097483254e-06, "loss": 0.2653, "step": 3195 }, { "epoch": 1.638134290107637, "grad_norm": 0.9100612409658702, "learning_rate": 1.6691952035306857e-06, "loss": 0.2496, "step": 3196 }, { "epoch": 1.6386468477703742, "grad_norm": 0.8929916315619036, "learning_rate": 1.664605639593818e-06, "loss": 0.2824, "step": 3197 }, { "epoch": 1.6391594054331113, "grad_norm": 0.8998588200818383, "learning_rate": 1.660021821101222e-06, "loss": 0.2662, "step": 3198 }, { "epoch": 1.6396719630958483, "grad_norm": 1.0152500364358343, "learning_rate": 1.6554437512124465e-06, "loss": 0.2466, "step": 3199 }, { "epoch": 1.6401845207585852, "grad_norm": 0.8888032380251969, "learning_rate": 1.6508714330830823e-06, "loss": 0.251, "step": 3200 }, { "epoch": 1.6406970784213224, "grad_norm": 0.8299566193075496, "learning_rate": 1.6463048698647444e-06, "loss": 0.2204, "step": 3201 }, { "epoch": 1.6412096360840596, "grad_norm": 1.0038553750344748, "learning_rate": 1.6417440647050853e-06, "loss": 0.2565, "step": 3202 }, { "epoch": 1.6417221937467965, "grad_norm": 0.9295171178040287, "learning_rate": 1.6371890207477937e-06, "loss": 0.2801, "step": 3203 }, { "epoch": 1.6422347514095335, "grad_norm": 0.9022637788708087, "learning_rate": 1.6326397411325845e-06, "loss": 0.2594, "step": 3204 }, { "epoch": 1.6427473090722706, "grad_norm": 1.0364555744182928, "learning_rate": 1.6280962289951907e-06, "loss": 0.296, "step": 3205 }, { "epoch": 1.6432598667350078, "grad_norm": 0.9423044267634012, "learning_rate": 1.6235584874673848e-06, "loss": 0.2555, "step": 3206 }, { "epoch": 1.6437724243977447, "grad_norm": 1.0264309403681444, "learning_rate": 1.6190265196769539e-06, "loss": 0.2839, "step": 3207 }, { "epoch": 1.6442849820604817, "grad_norm": 0.874637696196113, "learning_rate": 1.6145003287476968e-06, "loss": 0.26, "step": 3208 }, { "epoch": 1.6447975397232188, "grad_norm": 0.9344936632801041, "learning_rate": 1.6099799177994491e-06, "loss": 0.2684, "step": 3209 }, { "epoch": 1.645310097385956, "grad_norm": 0.988204837090154, "learning_rate": 1.6054652899480472e-06, "loss": 0.2721, "step": 3210 }, { "epoch": 1.645822655048693, "grad_norm": 1.025371674254226, "learning_rate": 1.6009564483053496e-06, "loss": 0.2887, "step": 3211 }, { "epoch": 1.64633521271143, "grad_norm": 1.030923741965234, "learning_rate": 1.5964533959792217e-06, "loss": 0.3022, "step": 3212 }, { "epoch": 1.646847770374167, "grad_norm": 0.9742026303451554, "learning_rate": 1.5919561360735403e-06, "loss": 0.2747, "step": 3213 }, { "epoch": 1.6473603280369042, "grad_norm": 0.9348129964482134, "learning_rate": 1.587464671688187e-06, "loss": 0.2455, "step": 3214 }, { "epoch": 1.6478728856996412, "grad_norm": 0.918631119147141, "learning_rate": 1.5829790059190586e-06, "loss": 0.2226, "step": 3215 }, { "epoch": 1.6483854433623781, "grad_norm": 0.9127362416694705, "learning_rate": 1.578499141858041e-06, "loss": 0.256, "step": 3216 }, { "epoch": 1.6488980010251153, "grad_norm": 0.8731566677022883, "learning_rate": 1.574025082593027e-06, "loss": 0.2631, "step": 3217 }, { "epoch": 1.6494105586878525, "grad_norm": 0.9136533573563841, "learning_rate": 1.5695568312079156e-06, "loss": 0.2395, "step": 3218 }, { "epoch": 1.6499231163505894, "grad_norm": 0.9272962536884427, "learning_rate": 1.5650943907825888e-06, "loss": 0.2587, "step": 3219 }, { "epoch": 1.6504356740133264, "grad_norm": 0.9800505082511323, "learning_rate": 1.5606377643929305e-06, "loss": 0.2812, "step": 3220 }, { "epoch": 1.6509482316760635, "grad_norm": 0.9527963159615785, "learning_rate": 1.5561869551108212e-06, "loss": 0.2601, "step": 3221 }, { "epoch": 1.6514607893388007, "grad_norm": 0.9500153405765602, "learning_rate": 1.5517419660041277e-06, "loss": 0.2809, "step": 3222 }, { "epoch": 1.6519733470015376, "grad_norm": 1.0263995420071257, "learning_rate": 1.5473028001366973e-06, "loss": 0.2954, "step": 3223 }, { "epoch": 1.6524859046642746, "grad_norm": 0.9371503679599781, "learning_rate": 1.542869460568379e-06, "loss": 0.2841, "step": 3224 }, { "epoch": 1.6529984623270118, "grad_norm": 0.8874385848764302, "learning_rate": 1.5384419503549941e-06, "loss": 0.2594, "step": 3225 }, { "epoch": 1.653511019989749, "grad_norm": 0.8752578299085996, "learning_rate": 1.534020272548349e-06, "loss": 0.2718, "step": 3226 }, { "epoch": 1.6540235776524859, "grad_norm": 0.9724397920432665, "learning_rate": 1.529604430196232e-06, "loss": 0.2708, "step": 3227 }, { "epoch": 1.6545361353152228, "grad_norm": 0.9967361629242951, "learning_rate": 1.5251944263424056e-06, "loss": 0.2883, "step": 3228 }, { "epoch": 1.65504869297796, "grad_norm": 0.9063332404633097, "learning_rate": 1.5207902640266114e-06, "loss": 0.252, "step": 3229 }, { "epoch": 1.6555612506406971, "grad_norm": 0.9572457968227188, "learning_rate": 1.5163919462845622e-06, "loss": 0.2789, "step": 3230 }, { "epoch": 1.656073808303434, "grad_norm": 0.9037915384021974, "learning_rate": 1.5119994761479429e-06, "loss": 0.2544, "step": 3231 }, { "epoch": 1.656586365966171, "grad_norm": 1.0351241041022674, "learning_rate": 1.507612856644405e-06, "loss": 0.2819, "step": 3232 }, { "epoch": 1.6570989236289082, "grad_norm": 0.9073386099214382, "learning_rate": 1.5032320907975772e-06, "loss": 0.2638, "step": 3233 }, { "epoch": 1.6576114812916454, "grad_norm": 0.9236016688384575, "learning_rate": 1.4988571816270402e-06, "loss": 0.254, "step": 3234 }, { "epoch": 1.6581240389543823, "grad_norm": 0.8442107025566216, "learning_rate": 1.4944881321483428e-06, "loss": 0.2776, "step": 3235 }, { "epoch": 1.6586365966171193, "grad_norm": 0.9740148323153305, "learning_rate": 1.4901249453730038e-06, "loss": 0.2932, "step": 3236 }, { "epoch": 1.6591491542798564, "grad_norm": 0.9175468799273273, "learning_rate": 1.485767624308485e-06, "loss": 0.2685, "step": 3237 }, { "epoch": 1.6596617119425936, "grad_norm": 0.9606152174521757, "learning_rate": 1.4814161719582132e-06, "loss": 0.26, "step": 3238 }, { "epoch": 1.6601742696053305, "grad_norm": 0.9956700902201131, "learning_rate": 1.4770705913215743e-06, "loss": 0.271, "step": 3239 }, { "epoch": 1.6606868272680677, "grad_norm": 0.8380072204057675, "learning_rate": 1.4727308853939004e-06, "loss": 0.2435, "step": 3240 }, { "epoch": 1.6611993849308049, "grad_norm": 0.9257464567140736, "learning_rate": 1.4683970571664763e-06, "loss": 0.2583, "step": 3241 }, { "epoch": 1.6617119425935418, "grad_norm": 0.9313359003227876, "learning_rate": 1.4640691096265358e-06, "loss": 0.2555, "step": 3242 }, { "epoch": 1.6622245002562788, "grad_norm": 0.9249573207438228, "learning_rate": 1.4597470457572583e-06, "loss": 0.2605, "step": 3243 }, { "epoch": 1.662737057919016, "grad_norm": 0.8913185638947456, "learning_rate": 1.4554308685377694e-06, "loss": 0.2329, "step": 3244 }, { "epoch": 1.663249615581753, "grad_norm": 0.8967296427212132, "learning_rate": 1.451120580943134e-06, "loss": 0.2333, "step": 3245 }, { "epoch": 1.66376217324449, "grad_norm": 1.0321154106778188, "learning_rate": 1.4468161859443609e-06, "loss": 0.3012, "step": 3246 }, { "epoch": 1.664274730907227, "grad_norm": 0.8564597422022016, "learning_rate": 1.4425176865083946e-06, "loss": 0.2377, "step": 3247 }, { "epoch": 1.6647872885699642, "grad_norm": 0.9386440862478387, "learning_rate": 1.4382250855981172e-06, "loss": 0.2686, "step": 3248 }, { "epoch": 1.6652998462327013, "grad_norm": 1.0642270713625863, "learning_rate": 1.4339383861723465e-06, "loss": 0.3311, "step": 3249 }, { "epoch": 1.6658124038954383, "grad_norm": 0.8648343141035764, "learning_rate": 1.4296575911858268e-06, "loss": 0.2453, "step": 3250 }, { "epoch": 1.6663249615581752, "grad_norm": 0.9681179577319693, "learning_rate": 1.4253827035892442e-06, "loss": 0.2715, "step": 3251 }, { "epoch": 1.6668375192209124, "grad_norm": 0.9470272470616632, "learning_rate": 1.4211137263291952e-06, "loss": 0.2787, "step": 3252 }, { "epoch": 1.6673500768836496, "grad_norm": 0.9313808451775324, "learning_rate": 1.4168506623482202e-06, "loss": 0.2624, "step": 3253 }, { "epoch": 1.6678626345463865, "grad_norm": 0.9207378608504385, "learning_rate": 1.412593514584777e-06, "loss": 0.2637, "step": 3254 }, { "epoch": 1.6683751922091234, "grad_norm": 1.0868985483402365, "learning_rate": 1.4083422859732365e-06, "loss": 0.2807, "step": 3255 }, { "epoch": 1.6688877498718606, "grad_norm": 0.9650389942902585, "learning_rate": 1.4040969794439041e-06, "loss": 0.2693, "step": 3256 }, { "epoch": 1.6694003075345978, "grad_norm": 1.0011093944047795, "learning_rate": 1.3998575979229944e-06, "loss": 0.282, "step": 3257 }, { "epoch": 1.6699128651973347, "grad_norm": 0.8383321183592988, "learning_rate": 1.3956241443326423e-06, "loss": 0.2263, "step": 3258 }, { "epoch": 1.6704254228600717, "grad_norm": 0.9082052982803144, "learning_rate": 1.3913966215908926e-06, "loss": 0.2624, "step": 3259 }, { "epoch": 1.6709379805228088, "grad_norm": 0.972153309909388, "learning_rate": 1.387175032611705e-06, "loss": 0.2688, "step": 3260 }, { "epoch": 1.671450538185546, "grad_norm": 0.8598662342333584, "learning_rate": 1.382959380304948e-06, "loss": 0.2257, "step": 3261 }, { "epoch": 1.671963095848283, "grad_norm": 0.9625400507046095, "learning_rate": 1.378749667576399e-06, "loss": 0.2591, "step": 3262 }, { "epoch": 1.67247565351102, "grad_norm": 0.8867222007510539, "learning_rate": 1.3745458973277392e-06, "loss": 0.2179, "step": 3263 }, { "epoch": 1.672988211173757, "grad_norm": 0.9144954907809139, "learning_rate": 1.3703480724565577e-06, "loss": 0.2509, "step": 3264 }, { "epoch": 1.6735007688364942, "grad_norm": 0.8670161296716792, "learning_rate": 1.3661561958563407e-06, "loss": 0.2354, "step": 3265 }, { "epoch": 1.6740133264992312, "grad_norm": 1.0083036169948558, "learning_rate": 1.3619702704164783e-06, "loss": 0.2955, "step": 3266 }, { "epoch": 1.6745258841619681, "grad_norm": 0.9862531982761011, "learning_rate": 1.3577902990222547e-06, "loss": 0.3063, "step": 3267 }, { "epoch": 1.6750384418247053, "grad_norm": 0.9441453184469218, "learning_rate": 1.3536162845548572e-06, "loss": 0.2552, "step": 3268 }, { "epoch": 1.6755509994874425, "grad_norm": 0.991295344913683, "learning_rate": 1.3494482298913614e-06, "loss": 0.2761, "step": 3269 }, { "epoch": 1.6760635571501794, "grad_norm": 0.9433570078827307, "learning_rate": 1.3452861379047289e-06, "loss": 0.2572, "step": 3270 }, { "epoch": 1.6765761148129164, "grad_norm": 1.100235833964348, "learning_rate": 1.341130011463826e-06, "loss": 0.3245, "step": 3271 }, { "epoch": 1.6770886724756535, "grad_norm": 1.0421879520438662, "learning_rate": 1.336979853433399e-06, "loss": 0.2717, "step": 3272 }, { "epoch": 1.6776012301383907, "grad_norm": 0.9761829650206835, "learning_rate": 1.3328356666740739e-06, "loss": 0.2573, "step": 3273 }, { "epoch": 1.6781137878011276, "grad_norm": 1.0338202263024279, "learning_rate": 1.3286974540423747e-06, "loss": 0.3029, "step": 3274 }, { "epoch": 1.6786263454638646, "grad_norm": 0.8842726609398026, "learning_rate": 1.3245652183906965e-06, "loss": 0.2594, "step": 3275 }, { "epoch": 1.6791389031266017, "grad_norm": 0.936521297877573, "learning_rate": 1.3204389625673208e-06, "loss": 0.2437, "step": 3276 }, { "epoch": 1.679651460789339, "grad_norm": 1.0109941980373898, "learning_rate": 1.3163186894164026e-06, "loss": 0.3089, "step": 3277 }, { "epoch": 1.6801640184520759, "grad_norm": 1.0209498336630263, "learning_rate": 1.3122044017779768e-06, "loss": 0.2872, "step": 3278 }, { "epoch": 1.6806765761148128, "grad_norm": 0.875541896344077, "learning_rate": 1.3080961024879501e-06, "loss": 0.246, "step": 3279 }, { "epoch": 1.68118913377755, "grad_norm": 0.9238134294689387, "learning_rate": 1.3039937943781046e-06, "loss": 0.2646, "step": 3280 }, { "epoch": 1.6817016914402871, "grad_norm": 0.9316795200545039, "learning_rate": 1.29989748027609e-06, "loss": 0.2513, "step": 3281 }, { "epoch": 1.682214249103024, "grad_norm": 0.9228178908521906, "learning_rate": 1.2958071630054214e-06, "loss": 0.2753, "step": 3282 }, { "epoch": 1.682726806765761, "grad_norm": 0.8558041582634142, "learning_rate": 1.2917228453854936e-06, "loss": 0.243, "step": 3283 }, { "epoch": 1.6832393644284982, "grad_norm": 0.9173146378898307, "learning_rate": 1.2876445302315489e-06, "loss": 0.2455, "step": 3284 }, { "epoch": 1.6837519220912354, "grad_norm": 1.0297523732879397, "learning_rate": 1.2835722203546996e-06, "loss": 0.2848, "step": 3285 }, { "epoch": 1.6842644797539723, "grad_norm": 0.8610944996856925, "learning_rate": 1.279505918561923e-06, "loss": 0.2331, "step": 3286 }, { "epoch": 1.6847770374167093, "grad_norm": 0.8241076891477895, "learning_rate": 1.2754456276560534e-06, "loss": 0.2212, "step": 3287 }, { "epoch": 1.6852895950794464, "grad_norm": 0.983356794364147, "learning_rate": 1.27139135043577e-06, "loss": 0.2599, "step": 3288 }, { "epoch": 1.6858021527421836, "grad_norm": 0.993788532304922, "learning_rate": 1.2673430896956252e-06, "loss": 0.273, "step": 3289 }, { "epoch": 1.6863147104049205, "grad_norm": 0.8947846787640026, "learning_rate": 1.2633008482260146e-06, "loss": 0.2298, "step": 3290 }, { "epoch": 1.6868272680676575, "grad_norm": 0.9916426862383211, "learning_rate": 1.2592646288131804e-06, "loss": 0.2729, "step": 3291 }, { "epoch": 1.6873398257303946, "grad_norm": 0.9610394024758123, "learning_rate": 1.2552344342392242e-06, "loss": 0.2934, "step": 3292 }, { "epoch": 1.6878523833931318, "grad_norm": 0.9183056505694085, "learning_rate": 1.2512102672820902e-06, "loss": 0.2374, "step": 3293 }, { "epoch": 1.6883649410558688, "grad_norm": 0.8801163597791286, "learning_rate": 1.2471921307155655e-06, "loss": 0.2357, "step": 3294 }, { "epoch": 1.6888774987186057, "grad_norm": 0.891912962810415, "learning_rate": 1.2431800273092832e-06, "loss": 0.2512, "step": 3295 }, { "epoch": 1.6893900563813429, "grad_norm": 0.9765366889695666, "learning_rate": 1.2391739598287167e-06, "loss": 0.2747, "step": 3296 }, { "epoch": 1.68990261404408, "grad_norm": 0.9792475213148942, "learning_rate": 1.2351739310351796e-06, "loss": 0.2559, "step": 3297 }, { "epoch": 1.690415171706817, "grad_norm": 1.0029327408639106, "learning_rate": 1.2311799436858275e-06, "loss": 0.2708, "step": 3298 }, { "epoch": 1.690927729369554, "grad_norm": 1.1475069498200694, "learning_rate": 1.2271920005336425e-06, "loss": 0.2685, "step": 3299 }, { "epoch": 1.691440287032291, "grad_norm": 0.9040546513169084, "learning_rate": 1.2232101043274437e-06, "loss": 0.2524, "step": 3300 }, { "epoch": 1.6919528446950283, "grad_norm": 0.9369473231229185, "learning_rate": 1.21923425781189e-06, "loss": 0.2671, "step": 3301 }, { "epoch": 1.6924654023577652, "grad_norm": 0.8601384064360054, "learning_rate": 1.2152644637274603e-06, "loss": 0.2351, "step": 3302 }, { "epoch": 1.6929779600205022, "grad_norm": 0.8893336547767065, "learning_rate": 1.2113007248104625e-06, "loss": 0.2657, "step": 3303 }, { "epoch": 1.6934905176832393, "grad_norm": 1.0205790136036064, "learning_rate": 1.2073430437930412e-06, "loss": 0.2837, "step": 3304 }, { "epoch": 1.6940030753459765, "grad_norm": 0.8463678429893782, "learning_rate": 1.2033914234031552e-06, "loss": 0.2407, "step": 3305 }, { "epoch": 1.6945156330087134, "grad_norm": 0.9977117560678709, "learning_rate": 1.1994458663645836e-06, "loss": 0.2907, "step": 3306 }, { "epoch": 1.6950281906714504, "grad_norm": 0.9973343379474174, "learning_rate": 1.1955063753969365e-06, "loss": 0.2974, "step": 3307 }, { "epoch": 1.6955407483341876, "grad_norm": 0.8682679907057067, "learning_rate": 1.1915729532156372e-06, "loss": 0.2412, "step": 3308 }, { "epoch": 1.6960533059969247, "grad_norm": 0.9153466509226819, "learning_rate": 1.187645602531925e-06, "loss": 0.2615, "step": 3309 }, { "epoch": 1.6965658636596617, "grad_norm": 0.9475803953141617, "learning_rate": 1.1837243260528542e-06, "loss": 0.2554, "step": 3310 }, { "epoch": 1.6970784213223986, "grad_norm": 0.9314877251047728, "learning_rate": 1.179809126481295e-06, "loss": 0.2646, "step": 3311 }, { "epoch": 1.6975909789851358, "grad_norm": 0.8719456068121784, "learning_rate": 1.1759000065159276e-06, "loss": 0.2247, "step": 3312 }, { "epoch": 1.698103536647873, "grad_norm": 0.9769826028147698, "learning_rate": 1.1719969688512389e-06, "loss": 0.2667, "step": 3313 }, { "epoch": 1.69861609431061, "grad_norm": 0.9055346749070462, "learning_rate": 1.168100016177528e-06, "loss": 0.2364, "step": 3314 }, { "epoch": 1.6991286519733468, "grad_norm": 0.980191505678718, "learning_rate": 1.164209151180895e-06, "loss": 0.262, "step": 3315 }, { "epoch": 1.699641209636084, "grad_norm": 0.920737429597357, "learning_rate": 1.1603243765432527e-06, "loss": 0.2551, "step": 3316 }, { "epoch": 1.7001537672988212, "grad_norm": 0.938860890483465, "learning_rate": 1.156445694942302e-06, "loss": 0.27, "step": 3317 }, { "epoch": 1.7006663249615581, "grad_norm": 0.9566585385082185, "learning_rate": 1.1525731090515536e-06, "loss": 0.2749, "step": 3318 }, { "epoch": 1.7011788826242953, "grad_norm": 0.8755492606303149, "learning_rate": 1.1487066215403186e-06, "loss": 0.262, "step": 3319 }, { "epoch": 1.7016914402870325, "grad_norm": 0.8643958443630427, "learning_rate": 1.1448462350736956e-06, "loss": 0.2548, "step": 3320 }, { "epoch": 1.7022039979497694, "grad_norm": 0.9008085922587624, "learning_rate": 1.1409919523125823e-06, "loss": 0.266, "step": 3321 }, { "epoch": 1.7027165556125063, "grad_norm": 0.8997696996914653, "learning_rate": 1.137143775913675e-06, "loss": 0.2488, "step": 3322 }, { "epoch": 1.7032291132752435, "grad_norm": 0.9946525448211596, "learning_rate": 1.1333017085294528e-06, "loss": 0.2809, "step": 3323 }, { "epoch": 1.7037416709379807, "grad_norm": 0.9230874084662739, "learning_rate": 1.1294657528081842e-06, "loss": 0.2325, "step": 3324 }, { "epoch": 1.7042542286007176, "grad_norm": 0.9316698264535008, "learning_rate": 1.1256359113939309e-06, "loss": 0.2694, "step": 3325 }, { "epoch": 1.7047667862634546, "grad_norm": 0.943205087580268, "learning_rate": 1.1218121869265365e-06, "loss": 0.2552, "step": 3326 }, { "epoch": 1.7052793439261917, "grad_norm": 0.9097384215149541, "learning_rate": 1.1179945820416282e-06, "loss": 0.2427, "step": 3327 }, { "epoch": 1.705791901588929, "grad_norm": 0.9138664427926769, "learning_rate": 1.1141830993706149e-06, "loss": 0.2687, "step": 3328 }, { "epoch": 1.7063044592516659, "grad_norm": 0.9390411701739587, "learning_rate": 1.110377741540687e-06, "loss": 0.269, "step": 3329 }, { "epoch": 1.7068170169144028, "grad_norm": 0.8570589273880005, "learning_rate": 1.1065785111748117e-06, "loss": 0.2607, "step": 3330 }, { "epoch": 1.70732957457714, "grad_norm": 0.9106731991771216, "learning_rate": 1.1027854108917335e-06, "loss": 0.2529, "step": 3331 }, { "epoch": 1.7078421322398771, "grad_norm": 0.9439768356084133, "learning_rate": 1.098998443305972e-06, "loss": 0.2521, "step": 3332 }, { "epoch": 1.708354689902614, "grad_norm": 0.9412047002932634, "learning_rate": 1.0952176110278167e-06, "loss": 0.2754, "step": 3333 }, { "epoch": 1.708867247565351, "grad_norm": 0.88734189186438, "learning_rate": 1.0914429166633355e-06, "loss": 0.217, "step": 3334 }, { "epoch": 1.7093798052280882, "grad_norm": 0.9284617306306621, "learning_rate": 1.087674362814355e-06, "loss": 0.2498, "step": 3335 }, { "epoch": 1.7098923628908254, "grad_norm": 0.9458875091505011, "learning_rate": 1.0839119520784758e-06, "loss": 0.2964, "step": 3336 }, { "epoch": 1.7104049205535623, "grad_norm": 0.9328459044790826, "learning_rate": 1.080155687049067e-06, "loss": 0.2458, "step": 3337 }, { "epoch": 1.7109174782162992, "grad_norm": 0.9480040895536035, "learning_rate": 1.076405570315252e-06, "loss": 0.2841, "step": 3338 }, { "epoch": 1.7114300358790364, "grad_norm": 0.911215198577433, "learning_rate": 1.0726616044619243e-06, "loss": 0.2537, "step": 3339 }, { "epoch": 1.7119425935417736, "grad_norm": 0.9804177019124803, "learning_rate": 1.068923792069736e-06, "loss": 0.2683, "step": 3340 }, { "epoch": 1.7124551512045105, "grad_norm": 0.8632527831209511, "learning_rate": 1.0651921357150997e-06, "loss": 0.2237, "step": 3341 }, { "epoch": 1.7129677088672475, "grad_norm": 0.9694574217111905, "learning_rate": 1.0614666379701732e-06, "loss": 0.2613, "step": 3342 }, { "epoch": 1.7134802665299846, "grad_norm": 0.9581634350116524, "learning_rate": 1.0577473014028872e-06, "loss": 0.2559, "step": 3343 }, { "epoch": 1.7139928241927218, "grad_norm": 0.9159379401702887, "learning_rate": 1.054034128576913e-06, "loss": 0.263, "step": 3344 }, { "epoch": 1.7145053818554588, "grad_norm": 0.8985020417869729, "learning_rate": 1.0503271220516775e-06, "loss": 0.24, "step": 3345 }, { "epoch": 1.7150179395181957, "grad_norm": 0.883933860431439, "learning_rate": 1.046626284382356e-06, "loss": 0.2549, "step": 3346 }, { "epoch": 1.7155304971809329, "grad_norm": 0.9582041323677251, "learning_rate": 1.0429316181198735e-06, "loss": 0.2495, "step": 3347 }, { "epoch": 1.71604305484367, "grad_norm": 0.9202452786006317, "learning_rate": 1.039243125810898e-06, "loss": 0.2502, "step": 3348 }, { "epoch": 1.716555612506407, "grad_norm": 0.8507572974670568, "learning_rate": 1.0355608099978454e-06, "loss": 0.2099, "step": 3349 }, { "epoch": 1.717068170169144, "grad_norm": 0.898101947374253, "learning_rate": 1.0318846732188737e-06, "loss": 0.2465, "step": 3350 }, { "epoch": 1.717580727831881, "grad_norm": 0.9189386782346648, "learning_rate": 1.0282147180078761e-06, "loss": 0.2682, "step": 3351 }, { "epoch": 1.7180932854946183, "grad_norm": 0.9104160356119377, "learning_rate": 1.0245509468944992e-06, "loss": 0.2252, "step": 3352 }, { "epoch": 1.7186058431573552, "grad_norm": 1.0007842036406729, "learning_rate": 1.0208933624041085e-06, "loss": 0.2784, "step": 3353 }, { "epoch": 1.7191184008200922, "grad_norm": 1.039963632855599, "learning_rate": 1.017241967057816e-06, "loss": 0.3024, "step": 3354 }, { "epoch": 1.7196309584828293, "grad_norm": 0.9536354817966282, "learning_rate": 1.0135967633724708e-06, "loss": 0.2643, "step": 3355 }, { "epoch": 1.7201435161455665, "grad_norm": 0.9569085355888527, "learning_rate": 1.009957753860642e-06, "loss": 0.2917, "step": 3356 }, { "epoch": 1.7206560738083034, "grad_norm": 1.0058633756974766, "learning_rate": 1.006324941030643e-06, "loss": 0.28, "step": 3357 }, { "epoch": 1.7211686314710404, "grad_norm": 1.0186699721666828, "learning_rate": 1.0026983273865055e-06, "loss": 0.2739, "step": 3358 }, { "epoch": 1.7216811891337775, "grad_norm": 0.8648103317939124, "learning_rate": 9.99077915427994e-07, "loss": 0.2514, "step": 3359 }, { "epoch": 1.7221937467965147, "grad_norm": 0.9265609193095169, "learning_rate": 9.954637076505946e-07, "loss": 0.2558, "step": 3360 }, { "epoch": 1.7227063044592517, "grad_norm": 0.910557960115441, "learning_rate": 9.918557065455193e-07, "loss": 0.268, "step": 3361 }, { "epoch": 1.7232188621219886, "grad_norm": 1.0328230262486275, "learning_rate": 9.882539145997027e-07, "loss": 0.2962, "step": 3362 }, { "epoch": 1.7237314197847258, "grad_norm": 0.9540987834913089, "learning_rate": 9.84658334295796e-07, "loss": 0.2671, "step": 3363 }, { "epoch": 1.724243977447463, "grad_norm": 0.901157551455359, "learning_rate": 9.81068968112172e-07, "loss": 0.2418, "step": 3364 }, { "epoch": 1.7247565351101999, "grad_norm": 0.9858996833002232, "learning_rate": 9.774858185229198e-07, "loss": 0.2707, "step": 3365 }, { "epoch": 1.7252690927729368, "grad_norm": 0.901271920030777, "learning_rate": 9.739088879978409e-07, "loss": 0.2509, "step": 3366 }, { "epoch": 1.725781650435674, "grad_norm": 0.9688781796380161, "learning_rate": 9.703381790024547e-07, "loss": 0.2597, "step": 3367 }, { "epoch": 1.7262942080984112, "grad_norm": 0.9153193145391124, "learning_rate": 9.667736939979888e-07, "loss": 0.2645, "step": 3368 }, { "epoch": 1.726806765761148, "grad_norm": 0.9786730433835441, "learning_rate": 9.63215435441378e-07, "loss": 0.2711, "step": 3369 }, { "epoch": 1.727319323423885, "grad_norm": 1.0144701852939662, "learning_rate": 9.59663405785277e-07, "loss": 0.2745, "step": 3370 }, { "epoch": 1.7278318810866222, "grad_norm": 0.8779791060660727, "learning_rate": 9.561176074780299e-07, "loss": 0.248, "step": 3371 }, { "epoch": 1.7283444387493594, "grad_norm": 0.9687801857834366, "learning_rate": 9.525780429636999e-07, "loss": 0.2719, "step": 3372 }, { "epoch": 1.7288569964120963, "grad_norm": 0.9712939484597154, "learning_rate": 9.49044714682047e-07, "loss": 0.2902, "step": 3373 }, { "epoch": 1.7293695540748333, "grad_norm": 0.9311914016417668, "learning_rate": 9.455176250685338e-07, "loss": 0.2711, "step": 3374 }, { "epoch": 1.7298821117375704, "grad_norm": 0.9161272001162302, "learning_rate": 9.419967765543225e-07, "loss": 0.2532, "step": 3375 }, { "epoch": 1.7303946694003076, "grad_norm": 0.8921725244372315, "learning_rate": 9.384821715662729e-07, "loss": 0.2376, "step": 3376 }, { "epoch": 1.7309072270630446, "grad_norm": 0.9736978512848874, "learning_rate": 9.349738125269425e-07, "loss": 0.2777, "step": 3377 }, { "epoch": 1.7314197847257815, "grad_norm": 0.9144954580902419, "learning_rate": 9.314717018545838e-07, "loss": 0.2665, "step": 3378 }, { "epoch": 1.7319323423885187, "grad_norm": 0.839300963795033, "learning_rate": 9.279758419631402e-07, "loss": 0.2417, "step": 3379 }, { "epoch": 1.7324449000512558, "grad_norm": 0.943397196084179, "learning_rate": 9.244862352622485e-07, "loss": 0.2436, "step": 3380 }, { "epoch": 1.7329574577139928, "grad_norm": 0.8898763616861449, "learning_rate": 9.210028841572338e-07, "loss": 0.2282, "step": 3381 }, { "epoch": 1.7334700153767297, "grad_norm": 0.9561311398133451, "learning_rate": 9.17525791049112e-07, "loss": 0.2477, "step": 3382 }, { "epoch": 1.733982573039467, "grad_norm": 0.9404409627079312, "learning_rate": 9.140549583345826e-07, "loss": 0.2752, "step": 3383 }, { "epoch": 1.734495130702204, "grad_norm": 0.9018746990251563, "learning_rate": 9.105903884060307e-07, "loss": 0.2547, "step": 3384 }, { "epoch": 1.735007688364941, "grad_norm": 1.0409253722399865, "learning_rate": 9.071320836515263e-07, "loss": 0.2936, "step": 3385 }, { "epoch": 1.735520246027678, "grad_norm": 0.9510364035011942, "learning_rate": 9.036800464548157e-07, "loss": 0.2665, "step": 3386 }, { "epoch": 1.7360328036904151, "grad_norm": 0.9735037038142765, "learning_rate": 9.002342791953345e-07, "loss": 0.2519, "step": 3387 }, { "epoch": 1.7365453613531523, "grad_norm": 0.9195501507955506, "learning_rate": 8.967947842481894e-07, "loss": 0.2617, "step": 3388 }, { "epoch": 1.7370579190158892, "grad_norm": 0.8971970275607596, "learning_rate": 8.933615639841608e-07, "loss": 0.2611, "step": 3389 }, { "epoch": 1.7375704766786262, "grad_norm": 0.9560973422468377, "learning_rate": 8.899346207697135e-07, "loss": 0.2825, "step": 3390 }, { "epoch": 1.7380830343413634, "grad_norm": 0.8807160106350305, "learning_rate": 8.865139569669778e-07, "loss": 0.2334, "step": 3391 }, { "epoch": 1.7385955920041005, "grad_norm": 0.946336359698024, "learning_rate": 8.830995749337612e-07, "loss": 0.2566, "step": 3392 }, { "epoch": 1.7391081496668375, "grad_norm": 0.9988072292365786, "learning_rate": 8.796914770235365e-07, "loss": 0.2582, "step": 3393 }, { "epoch": 1.7396207073295744, "grad_norm": 0.9953904545945437, "learning_rate": 8.762896655854481e-07, "loss": 0.2728, "step": 3394 }, { "epoch": 1.7401332649923118, "grad_norm": 0.8953900368222404, "learning_rate": 8.728941429643045e-07, "loss": 0.2642, "step": 3395 }, { "epoch": 1.7406458226550487, "grad_norm": 0.9346120299501987, "learning_rate": 8.695049115005838e-07, "loss": 0.2717, "step": 3396 }, { "epoch": 1.7411583803177857, "grad_norm": 0.9905158162586136, "learning_rate": 8.661219735304238e-07, "loss": 0.2713, "step": 3397 }, { "epoch": 1.7416709379805229, "grad_norm": 0.9354407486472941, "learning_rate": 8.627453313856249e-07, "loss": 0.2681, "step": 3398 }, { "epoch": 1.74218349564326, "grad_norm": 0.918954830802086, "learning_rate": 8.593749873936497e-07, "loss": 0.2567, "step": 3399 }, { "epoch": 1.742696053305997, "grad_norm": 0.9540256740472671, "learning_rate": 8.560109438776176e-07, "loss": 0.2817, "step": 3400 }, { "epoch": 1.743208610968734, "grad_norm": 0.9118834488432686, "learning_rate": 8.526532031563051e-07, "loss": 0.2261, "step": 3401 }, { "epoch": 1.743721168631471, "grad_norm": 0.9583533730527749, "learning_rate": 8.493017675441495e-07, "loss": 0.2663, "step": 3402 }, { "epoch": 1.7442337262942083, "grad_norm": 0.9221171199014688, "learning_rate": 8.459566393512331e-07, "loss": 0.2442, "step": 3403 }, { "epoch": 1.7447462839569452, "grad_norm": 0.8764505706273363, "learning_rate": 8.426178208832958e-07, "loss": 0.2552, "step": 3404 }, { "epoch": 1.7452588416196821, "grad_norm": 0.9077792514692847, "learning_rate": 8.392853144417312e-07, "loss": 0.2529, "step": 3405 }, { "epoch": 1.7457713992824193, "grad_norm": 0.9712111878985997, "learning_rate": 8.359591223235785e-07, "loss": 0.2714, "step": 3406 }, { "epoch": 1.7462839569451565, "grad_norm": 0.956555045522214, "learning_rate": 8.326392468215206e-07, "loss": 0.2768, "step": 3407 }, { "epoch": 1.7467965146078934, "grad_norm": 0.8878013141681744, "learning_rate": 8.293256902238933e-07, "loss": 0.2545, "step": 3408 }, { "epoch": 1.7473090722706304, "grad_norm": 0.9176455311991804, "learning_rate": 8.260184548146755e-07, "loss": 0.2288, "step": 3409 }, { "epoch": 1.7478216299333675, "grad_norm": 0.9707152702349596, "learning_rate": 8.227175428734868e-07, "loss": 0.2813, "step": 3410 }, { "epoch": 1.7483341875961047, "grad_norm": 1.0024054064284338, "learning_rate": 8.194229566755885e-07, "loss": 0.305, "step": 3411 }, { "epoch": 1.7488467452588417, "grad_norm": 0.9968285559764463, "learning_rate": 8.161346984918827e-07, "loss": 0.2775, "step": 3412 }, { "epoch": 1.7493593029215786, "grad_norm": 0.9270178993978996, "learning_rate": 8.128527705889088e-07, "loss": 0.2363, "step": 3413 }, { "epoch": 1.7498718605843158, "grad_norm": 0.9200206857752891, "learning_rate": 8.095771752288451e-07, "loss": 0.2746, "step": 3414 }, { "epoch": 1.750384418247053, "grad_norm": 0.955754478472521, "learning_rate": 8.063079146695019e-07, "loss": 0.2719, "step": 3415 }, { "epoch": 1.7508969759097899, "grad_norm": 0.9558126205066089, "learning_rate": 8.030449911643223e-07, "loss": 0.2725, "step": 3416 }, { "epoch": 1.7514095335725268, "grad_norm": 0.9189740473452994, "learning_rate": 7.997884069623907e-07, "loss": 0.2612, "step": 3417 }, { "epoch": 1.751922091235264, "grad_norm": 0.9781173871349186, "learning_rate": 7.965381643084069e-07, "loss": 0.2751, "step": 3418 }, { "epoch": 1.7524346488980012, "grad_norm": 1.041297531138794, "learning_rate": 7.932942654427089e-07, "loss": 0.2867, "step": 3419 }, { "epoch": 1.752947206560738, "grad_norm": 0.9152970138300961, "learning_rate": 7.900567126012648e-07, "loss": 0.2304, "step": 3420 }, { "epoch": 1.753459764223475, "grad_norm": 1.0004833424180724, "learning_rate": 7.868255080156606e-07, "loss": 0.2632, "step": 3421 }, { "epoch": 1.7539723218862122, "grad_norm": 0.951371921776324, "learning_rate": 7.83600653913108e-07, "loss": 0.2945, "step": 3422 }, { "epoch": 1.7544848795489494, "grad_norm": 1.0034645061174254, "learning_rate": 7.80382152516449e-07, "loss": 0.3152, "step": 3423 }, { "epoch": 1.7549974372116863, "grad_norm": 0.9190105256021547, "learning_rate": 7.7717000604414e-07, "loss": 0.2694, "step": 3424 }, { "epoch": 1.7555099948744233, "grad_norm": 0.9653057734182166, "learning_rate": 7.739642167102546e-07, "loss": 0.2837, "step": 3425 }, { "epoch": 1.7560225525371604, "grad_norm": 0.9162567444484662, "learning_rate": 7.707647867244927e-07, "loss": 0.2725, "step": 3426 }, { "epoch": 1.7565351101998976, "grad_norm": 0.9683886416910099, "learning_rate": 7.675717182921649e-07, "loss": 0.2702, "step": 3427 }, { "epoch": 1.7570476678626346, "grad_norm": 0.9302161104332706, "learning_rate": 7.643850136141972e-07, "loss": 0.2895, "step": 3428 }, { "epoch": 1.7575602255253715, "grad_norm": 0.9897945209751209, "learning_rate": 7.612046748871327e-07, "loss": 0.2643, "step": 3429 }, { "epoch": 1.7580727831881087, "grad_norm": 0.9011252418229828, "learning_rate": 7.580307043031232e-07, "loss": 0.2535, "step": 3430 }, { "epoch": 1.7585853408508458, "grad_norm": 1.0372003271164882, "learning_rate": 7.548631040499321e-07, "loss": 0.2711, "step": 3431 }, { "epoch": 1.7590978985135828, "grad_norm": 0.854867691228195, "learning_rate": 7.517018763109318e-07, "loss": 0.2485, "step": 3432 }, { "epoch": 1.7596104561763197, "grad_norm": 1.011607383644128, "learning_rate": 7.485470232651027e-07, "loss": 0.276, "step": 3433 }, { "epoch": 1.760123013839057, "grad_norm": 0.9582021858440716, "learning_rate": 7.453985470870284e-07, "loss": 0.2738, "step": 3434 }, { "epoch": 1.760635571501794, "grad_norm": 0.8810109843771594, "learning_rate": 7.422564499469065e-07, "loss": 0.2416, "step": 3435 }, { "epoch": 1.761148129164531, "grad_norm": 0.8792201825623225, "learning_rate": 7.391207340105233e-07, "loss": 0.2335, "step": 3436 }, { "epoch": 1.761660686827268, "grad_norm": 0.8717901204415639, "learning_rate": 7.359914014392744e-07, "loss": 0.2726, "step": 3437 }, { "epoch": 1.7621732444900051, "grad_norm": 0.9016999045335367, "learning_rate": 7.328684543901598e-07, "loss": 0.2587, "step": 3438 }, { "epoch": 1.7626858021527423, "grad_norm": 0.97996392889955, "learning_rate": 7.297518950157712e-07, "loss": 0.2589, "step": 3439 }, { "epoch": 1.7631983598154792, "grad_norm": 0.9256548918086365, "learning_rate": 7.266417254642966e-07, "loss": 0.2678, "step": 3440 }, { "epoch": 1.7637109174782162, "grad_norm": 0.9031886912739947, "learning_rate": 7.235379478795268e-07, "loss": 0.2541, "step": 3441 }, { "epoch": 1.7642234751409533, "grad_norm": 0.9384367792901543, "learning_rate": 7.204405644008416e-07, "loss": 0.2412, "step": 3442 }, { "epoch": 1.7647360328036905, "grad_norm": 0.8917965854173963, "learning_rate": 7.1734957716321e-07, "loss": 0.2277, "step": 3443 }, { "epoch": 1.7652485904664275, "grad_norm": 0.9535895848603921, "learning_rate": 7.142649882972008e-07, "loss": 0.2724, "step": 3444 }, { "epoch": 1.7657611481291644, "grad_norm": 0.9957575827151925, "learning_rate": 7.111867999289679e-07, "loss": 0.2805, "step": 3445 }, { "epoch": 1.7662737057919016, "grad_norm": 0.9202917181029235, "learning_rate": 7.081150141802518e-07, "loss": 0.2518, "step": 3446 }, { "epoch": 1.7667862634546387, "grad_norm": 0.9520682860867564, "learning_rate": 7.050496331683843e-07, "loss": 0.2653, "step": 3447 }, { "epoch": 1.7672988211173757, "grad_norm": 0.9662501486366268, "learning_rate": 7.019906590062764e-07, "loss": 0.2863, "step": 3448 }, { "epoch": 1.7678113787801126, "grad_norm": 0.9677830103869915, "learning_rate": 6.989380938024293e-07, "loss": 0.2629, "step": 3449 }, { "epoch": 1.7683239364428498, "grad_norm": 1.0197263995219925, "learning_rate": 6.958919396609231e-07, "loss": 0.2692, "step": 3450 }, { "epoch": 1.768836494105587, "grad_norm": 0.8725021585759287, "learning_rate": 6.928521986814196e-07, "loss": 0.2432, "step": 3451 }, { "epoch": 1.769349051768324, "grad_norm": 0.9038538186948312, "learning_rate": 6.898188729591582e-07, "loss": 0.2538, "step": 3452 }, { "epoch": 1.7698616094310609, "grad_norm": 0.9257910457631245, "learning_rate": 6.867919645849619e-07, "loss": 0.2764, "step": 3453 }, { "epoch": 1.770374167093798, "grad_norm": 0.9463844677966398, "learning_rate": 6.837714756452241e-07, "loss": 0.2723, "step": 3454 }, { "epoch": 1.7708867247565352, "grad_norm": 0.8820259557211299, "learning_rate": 6.807574082219148e-07, "loss": 0.2588, "step": 3455 }, { "epoch": 1.7713992824192721, "grad_norm": 0.9011271592739772, "learning_rate": 6.777497643925834e-07, "loss": 0.2428, "step": 3456 }, { "epoch": 1.771911840082009, "grad_norm": 0.9200867460205983, "learning_rate": 6.747485462303449e-07, "loss": 0.2507, "step": 3457 }, { "epoch": 1.7724243977447462, "grad_norm": 0.9830248839497174, "learning_rate": 6.717537558038845e-07, "loss": 0.3023, "step": 3458 }, { "epoch": 1.7729369554074834, "grad_norm": 0.9622691177977688, "learning_rate": 6.687653951774642e-07, "loss": 0.2553, "step": 3459 }, { "epoch": 1.7734495130702204, "grad_norm": 0.9861195569746516, "learning_rate": 6.657834664109075e-07, "loss": 0.2911, "step": 3460 }, { "epoch": 1.7739620707329573, "grad_norm": 0.953231511254557, "learning_rate": 6.62807971559607e-07, "loss": 0.2542, "step": 3461 }, { "epoch": 1.7744746283956945, "grad_norm": 0.9438355732521317, "learning_rate": 6.598389126745209e-07, "loss": 0.2787, "step": 3462 }, { "epoch": 1.7749871860584316, "grad_norm": 0.8819102073515017, "learning_rate": 6.568762918021688e-07, "loss": 0.2169, "step": 3463 }, { "epoch": 1.7754997437211686, "grad_norm": 1.0125188411822776, "learning_rate": 6.539201109846372e-07, "loss": 0.2768, "step": 3464 }, { "epoch": 1.7760123013839055, "grad_norm": 0.9575171266072264, "learning_rate": 6.509703722595684e-07, "loss": 0.2784, "step": 3465 }, { "epoch": 1.7765248590466427, "grad_norm": 0.9056084012276979, "learning_rate": 6.480270776601682e-07, "loss": 0.2636, "step": 3466 }, { "epoch": 1.7770374167093799, "grad_norm": 0.9853590764062488, "learning_rate": 6.450902292151973e-07, "loss": 0.2639, "step": 3467 }, { "epoch": 1.7775499743721168, "grad_norm": 0.9778503093528208, "learning_rate": 6.421598289489772e-07, "loss": 0.2518, "step": 3468 }, { "epoch": 1.7780625320348538, "grad_norm": 0.9435095873628014, "learning_rate": 6.392358788813802e-07, "loss": 0.2854, "step": 3469 }, { "epoch": 1.778575089697591, "grad_norm": 0.9073999713370671, "learning_rate": 6.36318381027835e-07, "loss": 0.2519, "step": 3470 }, { "epoch": 1.779087647360328, "grad_norm": 0.9225855078165848, "learning_rate": 6.334073373993266e-07, "loss": 0.2503, "step": 3471 }, { "epoch": 1.779600205023065, "grad_norm": 0.8922190506687359, "learning_rate": 6.305027500023841e-07, "loss": 0.2459, "step": 3472 }, { "epoch": 1.7801127626858022, "grad_norm": 0.9275755404264794, "learning_rate": 6.276046208390873e-07, "loss": 0.2533, "step": 3473 }, { "epoch": 1.7806253203485394, "grad_norm": 0.971170249701887, "learning_rate": 6.247129519070728e-07, "loss": 0.2665, "step": 3474 }, { "epoch": 1.7811378780112763, "grad_norm": 0.9359352668776516, "learning_rate": 6.218277451995147e-07, "loss": 0.2868, "step": 3475 }, { "epoch": 1.7816504356740133, "grad_norm": 0.9339072654260773, "learning_rate": 6.18949002705137e-07, "loss": 0.266, "step": 3476 }, { "epoch": 1.7821629933367504, "grad_norm": 1.0009174055100178, "learning_rate": 6.160767264082079e-07, "loss": 0.282, "step": 3477 }, { "epoch": 1.7826755509994876, "grad_norm": 0.8689231130360066, "learning_rate": 6.132109182885382e-07, "loss": 0.2223, "step": 3478 }, { "epoch": 1.7831881086622245, "grad_norm": 0.9681030907127263, "learning_rate": 6.103515803214799e-07, "loss": 0.2474, "step": 3479 }, { "epoch": 1.7837006663249615, "grad_norm": 0.9594618499951618, "learning_rate": 6.074987144779254e-07, "loss": 0.2456, "step": 3480 }, { "epoch": 1.7842132239876987, "grad_norm": 0.9799805949714097, "learning_rate": 6.046523227243062e-07, "loss": 0.2582, "step": 3481 }, { "epoch": 1.7847257816504358, "grad_norm": 0.9106230674453358, "learning_rate": 6.018124070225928e-07, "loss": 0.2428, "step": 3482 }, { "epoch": 1.7852383393131728, "grad_norm": 1.0107480749022895, "learning_rate": 5.989789693302872e-07, "loss": 0.2757, "step": 3483 }, { "epoch": 1.7857508969759097, "grad_norm": 0.9928287083920011, "learning_rate": 5.961520116004326e-07, "loss": 0.2737, "step": 3484 }, { "epoch": 1.7862634546386469, "grad_norm": 0.9001014915732752, "learning_rate": 5.933315357816005e-07, "loss": 0.2714, "step": 3485 }, { "epoch": 1.786776012301384, "grad_norm": 0.9311964632147954, "learning_rate": 5.905175438178979e-07, "loss": 0.2388, "step": 3486 }, { "epoch": 1.787288569964121, "grad_norm": 0.9682805123541688, "learning_rate": 5.877100376489597e-07, "loss": 0.2864, "step": 3487 }, { "epoch": 1.787801127626858, "grad_norm": 0.8842186486342052, "learning_rate": 5.849090192099505e-07, "loss": 0.2801, "step": 3488 }, { "epoch": 1.7883136852895951, "grad_norm": 0.9079294706197943, "learning_rate": 5.821144904315701e-07, "loss": 0.2421, "step": 3489 }, { "epoch": 1.7888262429523323, "grad_norm": 0.9599469056441334, "learning_rate": 5.793264532400311e-07, "loss": 0.2505, "step": 3490 }, { "epoch": 1.7893388006150692, "grad_norm": 0.8604507474100307, "learning_rate": 5.765449095570863e-07, "loss": 0.2164, "step": 3491 }, { "epoch": 1.7898513582778062, "grad_norm": 0.9729008320780633, "learning_rate": 5.737698613000031e-07, "loss": 0.2723, "step": 3492 }, { "epoch": 1.7903639159405433, "grad_norm": 0.9390960456191846, "learning_rate": 5.710013103815748e-07, "loss": 0.2699, "step": 3493 }, { "epoch": 1.7908764736032805, "grad_norm": 0.9872724813564165, "learning_rate": 5.68239258710116e-07, "loss": 0.2845, "step": 3494 }, { "epoch": 1.7913890312660175, "grad_norm": 0.9539614317812878, "learning_rate": 5.654837081894626e-07, "loss": 0.2643, "step": 3495 }, { "epoch": 1.7919015889287544, "grad_norm": 0.9879324931040756, "learning_rate": 5.627346607189665e-07, "loss": 0.2444, "step": 3496 }, { "epoch": 1.7924141465914916, "grad_norm": 0.9341171989889656, "learning_rate": 5.599921181935009e-07, "loss": 0.2382, "step": 3497 }, { "epoch": 1.7929267042542287, "grad_norm": 0.9424786094293157, "learning_rate": 5.572560825034523e-07, "loss": 0.2636, "step": 3498 }, { "epoch": 1.7934392619169657, "grad_norm": 0.9404178584246844, "learning_rate": 5.545265555347235e-07, "loss": 0.2536, "step": 3499 }, { "epoch": 1.7939518195797026, "grad_norm": 0.9489384658220863, "learning_rate": 5.518035391687293e-07, "loss": 0.2916, "step": 3500 }, { "epoch": 1.7944643772424398, "grad_norm": 0.9562113286959427, "learning_rate": 5.490870352823996e-07, "loss": 0.2667, "step": 3501 }, { "epoch": 1.794976934905177, "grad_norm": 0.8950722537874035, "learning_rate": 5.463770457481732e-07, "loss": 0.2412, "step": 3502 }, { "epoch": 1.795489492567914, "grad_norm": 0.8738378255658583, "learning_rate": 5.436735724339981e-07, "loss": 0.2314, "step": 3503 }, { "epoch": 1.7960020502306508, "grad_norm": 0.921914234079399, "learning_rate": 5.40976617203336e-07, "loss": 0.251, "step": 3504 }, { "epoch": 1.796514607893388, "grad_norm": 0.8941145946308622, "learning_rate": 5.382861819151475e-07, "loss": 0.2284, "step": 3505 }, { "epoch": 1.7970271655561252, "grad_norm": 0.859576088262569, "learning_rate": 5.35602268423906e-07, "loss": 0.26, "step": 3506 }, { "epoch": 1.7975397232188621, "grad_norm": 0.8486882875504718, "learning_rate": 5.329248785795882e-07, "loss": 0.2367, "step": 3507 }, { "epoch": 1.798052280881599, "grad_norm": 0.9635252283992384, "learning_rate": 5.302540142276679e-07, "loss": 0.2823, "step": 3508 }, { "epoch": 1.7985648385443362, "grad_norm": 0.8981648161616475, "learning_rate": 5.275896772091316e-07, "loss": 0.2569, "step": 3509 }, { "epoch": 1.7990773962070734, "grad_norm": 0.89308090255185, "learning_rate": 5.249318693604577e-07, "loss": 0.2453, "step": 3510 }, { "epoch": 1.7995899538698104, "grad_norm": 0.8741872823264234, "learning_rate": 5.222805925136298e-07, "loss": 0.2345, "step": 3511 }, { "epoch": 1.8001025115325473, "grad_norm": 1.0099472487487506, "learning_rate": 5.196358484961251e-07, "loss": 0.2952, "step": 3512 }, { "epoch": 1.8006150691952845, "grad_norm": 0.995591193956091, "learning_rate": 5.169976391309228e-07, "loss": 0.2799, "step": 3513 }, { "epoch": 1.8011276268580216, "grad_norm": 1.0152427299163669, "learning_rate": 5.143659662364931e-07, "loss": 0.2878, "step": 3514 }, { "epoch": 1.8016401845207586, "grad_norm": 0.9788525050102902, "learning_rate": 5.117408316268047e-07, "loss": 0.2809, "step": 3515 }, { "epoch": 1.8021527421834955, "grad_norm": 0.9786597313029931, "learning_rate": 5.091222371113158e-07, "loss": 0.2761, "step": 3516 }, { "epoch": 1.8026652998462327, "grad_norm": 0.9491747889853029, "learning_rate": 5.065101844949793e-07, "loss": 0.2567, "step": 3517 }, { "epoch": 1.8031778575089699, "grad_norm": 0.9161342883938242, "learning_rate": 5.039046755782417e-07, "loss": 0.2339, "step": 3518 }, { "epoch": 1.8036904151717068, "grad_norm": 0.9972668694582063, "learning_rate": 5.013057121570308e-07, "loss": 0.2917, "step": 3519 }, { "epoch": 1.8042029728344438, "grad_norm": 0.9841181196166862, "learning_rate": 4.987132960227681e-07, "loss": 0.2525, "step": 3520 }, { "epoch": 1.804715530497181, "grad_norm": 0.9057089392198157, "learning_rate": 4.961274289623641e-07, "loss": 0.2321, "step": 3521 }, { "epoch": 1.805228088159918, "grad_norm": 0.9701221619668697, "learning_rate": 4.935481127582131e-07, "loss": 0.2602, "step": 3522 }, { "epoch": 1.805740645822655, "grad_norm": 0.8669949713430235, "learning_rate": 4.909753491881896e-07, "loss": 0.2588, "step": 3523 }, { "epoch": 1.806253203485392, "grad_norm": 1.014649666273488, "learning_rate": 4.884091400256574e-07, "loss": 0.2666, "step": 3524 }, { "epoch": 1.8067657611481291, "grad_norm": 0.8965651817796947, "learning_rate": 4.858494870394625e-07, "loss": 0.2577, "step": 3525 }, { "epoch": 1.8072783188108663, "grad_norm": 0.878616229439757, "learning_rate": 4.83296391993926e-07, "loss": 0.2317, "step": 3526 }, { "epoch": 1.8077908764736033, "grad_norm": 0.9316750157283387, "learning_rate": 4.807498566488545e-07, "loss": 0.2992, "step": 3527 }, { "epoch": 1.8083034341363402, "grad_norm": 1.0147619764597657, "learning_rate": 4.782098827595305e-07, "loss": 0.2888, "step": 3528 }, { "epoch": 1.8088159917990774, "grad_norm": 0.9034035006577285, "learning_rate": 4.75676472076716e-07, "loss": 0.2726, "step": 3529 }, { "epoch": 1.8093285494618145, "grad_norm": 0.9326393941701158, "learning_rate": 4.7314962634664616e-07, "loss": 0.2575, "step": 3530 }, { "epoch": 1.8098411071245515, "grad_norm": 0.9012935384917579, "learning_rate": 4.706293473110313e-07, "loss": 0.2651, "step": 3531 }, { "epoch": 1.8103536647872884, "grad_norm": 0.9546231317530083, "learning_rate": 4.681156367070594e-07, "loss": 0.2498, "step": 3532 }, { "epoch": 1.8108662224500256, "grad_norm": 1.005443550992538, "learning_rate": 4.656084962673857e-07, "loss": 0.2939, "step": 3533 }, { "epoch": 1.8113787801127628, "grad_norm": 0.9486713280635969, "learning_rate": 4.631079277201389e-07, "loss": 0.2554, "step": 3534 }, { "epoch": 1.8118913377754997, "grad_norm": 0.9479769225140852, "learning_rate": 4.606139327889181e-07, "loss": 0.2671, "step": 3535 }, { "epoch": 1.8124038954382367, "grad_norm": 0.9040663454377388, "learning_rate": 4.581265131927948e-07, "loss": 0.2724, "step": 3536 }, { "epoch": 1.8129164531009738, "grad_norm": 1.0255120426279034, "learning_rate": 4.5564567064630085e-07, "loss": 0.2826, "step": 3537 }, { "epoch": 1.813429010763711, "grad_norm": 0.955175168135886, "learning_rate": 4.5317140685943726e-07, "loss": 0.2614, "step": 3538 }, { "epoch": 1.813941568426448, "grad_norm": 0.8737475823622528, "learning_rate": 4.5070372353767543e-07, "loss": 0.266, "step": 3539 }, { "epoch": 1.8144541260891849, "grad_norm": 0.7813837485890262, "learning_rate": 4.482426223819458e-07, "loss": 0.2265, "step": 3540 }, { "epoch": 1.814966683751922, "grad_norm": 0.945799256004759, "learning_rate": 4.4578810508864145e-07, "loss": 0.2566, "step": 3541 }, { "epoch": 1.8154792414146592, "grad_norm": 0.9453619755598277, "learning_rate": 4.433401733496201e-07, "loss": 0.2734, "step": 3542 }, { "epoch": 1.8159917990773962, "grad_norm": 0.9453677823968424, "learning_rate": 4.4089882885220204e-07, "loss": 0.2579, "step": 3543 }, { "epoch": 1.816504356740133, "grad_norm": 0.9185752961478644, "learning_rate": 4.3846407327915784e-07, "loss": 0.2845, "step": 3544 }, { "epoch": 1.8170169144028703, "grad_norm": 0.8692593528299799, "learning_rate": 4.3603590830872843e-07, "loss": 0.2247, "step": 3545 }, { "epoch": 1.8175294720656074, "grad_norm": 0.985155931883852, "learning_rate": 4.3361433561460274e-07, "loss": 0.2806, "step": 3546 }, { "epoch": 1.8180420297283444, "grad_norm": 0.9184123716147655, "learning_rate": 4.311993568659323e-07, "loss": 0.2535, "step": 3547 }, { "epoch": 1.8185545873910813, "grad_norm": 0.9868152056183892, "learning_rate": 4.287909737273177e-07, "loss": 0.2733, "step": 3548 }, { "epoch": 1.8190671450538185, "grad_norm": 0.9662968234277599, "learning_rate": 4.263891878588178e-07, "loss": 0.2694, "step": 3549 }, { "epoch": 1.8195797027165557, "grad_norm": 0.9514831018511865, "learning_rate": 4.2399400091594154e-07, "loss": 0.2695, "step": 3550 }, { "epoch": 1.8200922603792926, "grad_norm": 0.9862127519461608, "learning_rate": 4.2160541454964954e-07, "loss": 0.284, "step": 3551 }, { "epoch": 1.8206048180420298, "grad_norm": 0.9124410928929055, "learning_rate": 4.192234304063558e-07, "loss": 0.2654, "step": 3552 }, { "epoch": 1.821117375704767, "grad_norm": 0.9033350418886033, "learning_rate": 4.1684805012791706e-07, "loss": 0.2466, "step": 3553 }, { "epoch": 1.821629933367504, "grad_norm": 0.866321623367656, "learning_rate": 4.14479275351648e-07, "loss": 0.2404, "step": 3554 }, { "epoch": 1.8221424910302408, "grad_norm": 0.8935524092942655, "learning_rate": 4.1211710771030034e-07, "loss": 0.2673, "step": 3555 }, { "epoch": 1.822655048692978, "grad_norm": 0.8991249213053149, "learning_rate": 4.0976154883207497e-07, "loss": 0.2385, "step": 3556 }, { "epoch": 1.8231676063557152, "grad_norm": 0.9326914629846936, "learning_rate": 4.074126003406198e-07, "loss": 0.2443, "step": 3557 }, { "epoch": 1.8236801640184521, "grad_norm": 0.8942939421939811, "learning_rate": 4.0507026385502747e-07, "loss": 0.2442, "step": 3558 }, { "epoch": 1.824192721681189, "grad_norm": 0.9775984878524471, "learning_rate": 4.0273454098982424e-07, "loss": 0.2985, "step": 3559 }, { "epoch": 1.8247052793439262, "grad_norm": 0.9921369126643905, "learning_rate": 4.0040543335498783e-07, "loss": 0.2881, "step": 3560 }, { "epoch": 1.8252178370066634, "grad_norm": 0.9458786960844708, "learning_rate": 3.9808294255593293e-07, "loss": 0.267, "step": 3561 }, { "epoch": 1.8257303946694003, "grad_norm": 0.966441769596834, "learning_rate": 3.9576707019350903e-07, "loss": 0.2617, "step": 3562 }, { "epoch": 1.8262429523321373, "grad_norm": 0.8941879150140184, "learning_rate": 3.934578178640103e-07, "loss": 0.2639, "step": 3563 }, { "epoch": 1.8267555099948745, "grad_norm": 0.9435050990144617, "learning_rate": 3.9115518715916345e-07, "loss": 0.2397, "step": 3564 }, { "epoch": 1.8272680676576116, "grad_norm": 0.9494736134870749, "learning_rate": 3.8885917966613227e-07, "loss": 0.2735, "step": 3565 }, { "epoch": 1.8277806253203486, "grad_norm": 0.9210963088477848, "learning_rate": 3.865697969675164e-07, "loss": 0.2628, "step": 3566 }, { "epoch": 1.8282931829830855, "grad_norm": 0.9289859114444105, "learning_rate": 3.842870406413457e-07, "loss": 0.2784, "step": 3567 }, { "epoch": 1.8288057406458227, "grad_norm": 0.9530665961748327, "learning_rate": 3.82010912261086e-07, "loss": 0.2411, "step": 3568 }, { "epoch": 1.8293182983085599, "grad_norm": 0.9462652789768361, "learning_rate": 3.7974141339563673e-07, "loss": 0.2469, "step": 3569 }, { "epoch": 1.8298308559712968, "grad_norm": 1.0118698239771862, "learning_rate": 3.7747854560931996e-07, "loss": 0.2628, "step": 3570 }, { "epoch": 1.8303434136340337, "grad_norm": 1.0529711857683304, "learning_rate": 3.7522231046189237e-07, "loss": 0.292, "step": 3571 }, { "epoch": 1.830855971296771, "grad_norm": 0.9320228518632996, "learning_rate": 3.7297270950854224e-07, "loss": 0.2622, "step": 3572 }, { "epoch": 1.831368528959508, "grad_norm": 0.8863769793616735, "learning_rate": 3.707297442998758e-07, "loss": 0.2837, "step": 3573 }, { "epoch": 1.831881086622245, "grad_norm": 0.8516337559365789, "learning_rate": 3.684934163819309e-07, "loss": 0.2334, "step": 3574 }, { "epoch": 1.832393644284982, "grad_norm": 0.8478741829025722, "learning_rate": 3.6626372729617335e-07, "loss": 0.2249, "step": 3575 }, { "epoch": 1.8329062019477191, "grad_norm": 0.8690928013645371, "learning_rate": 3.640406785794892e-07, "loss": 0.2463, "step": 3576 }, { "epoch": 1.8334187596104563, "grad_norm": 0.8606043471345215, "learning_rate": 3.618242717641829e-07, "loss": 0.2267, "step": 3577 }, { "epoch": 1.8339313172731933, "grad_norm": 0.876715259177772, "learning_rate": 3.596145083779912e-07, "loss": 0.2386, "step": 3578 }, { "epoch": 1.8344438749359302, "grad_norm": 0.9162037936664028, "learning_rate": 3.5741138994406346e-07, "loss": 0.2436, "step": 3579 }, { "epoch": 1.8349564325986674, "grad_norm": 1.0055038771090437, "learning_rate": 3.5521491798097163e-07, "loss": 0.3098, "step": 3580 }, { "epoch": 1.8354689902614045, "grad_norm": 0.9181066966040119, "learning_rate": 3.5302509400270693e-07, "loss": 0.2489, "step": 3581 }, { "epoch": 1.8359815479241415, "grad_norm": 0.8950940960583177, "learning_rate": 3.508419195186774e-07, "loss": 0.2589, "step": 3582 }, { "epoch": 1.8364941055868784, "grad_norm": 0.9121583732888542, "learning_rate": 3.486653960337061e-07, "loss": 0.2273, "step": 3583 }, { "epoch": 1.8370066632496156, "grad_norm": 0.8687256375194969, "learning_rate": 3.464955250480351e-07, "loss": 0.2239, "step": 3584 }, { "epoch": 1.8375192209123528, "grad_norm": 0.9420912117757714, "learning_rate": 3.443323080573191e-07, "loss": 0.2733, "step": 3585 }, { "epoch": 1.8380317785750897, "grad_norm": 0.8622287147213876, "learning_rate": 3.421757465526243e-07, "loss": 0.2103, "step": 3586 }, { "epoch": 1.8385443362378266, "grad_norm": 0.8982597202447109, "learning_rate": 3.40025842020435e-07, "loss": 0.2541, "step": 3587 }, { "epoch": 1.8390568939005638, "grad_norm": 1.0543409315166243, "learning_rate": 3.3788259594264126e-07, "loss": 0.285, "step": 3588 }, { "epoch": 1.839569451563301, "grad_norm": 0.8768646841554056, "learning_rate": 3.357460097965448e-07, "loss": 0.2627, "step": 3589 }, { "epoch": 1.840082009226038, "grad_norm": 0.9809323701080077, "learning_rate": 3.33616085054862e-07, "loss": 0.2583, "step": 3590 }, { "epoch": 1.8405945668887749, "grad_norm": 0.8734550852785142, "learning_rate": 3.314928231857084e-07, "loss": 0.2297, "step": 3591 }, { "epoch": 1.841107124551512, "grad_norm": 0.8621895190679607, "learning_rate": 3.293762256526134e-07, "loss": 0.2404, "step": 3592 }, { "epoch": 1.8416196822142492, "grad_norm": 0.903400573186691, "learning_rate": 3.2726629391451436e-07, "loss": 0.2468, "step": 3593 }, { "epoch": 1.8421322398769862, "grad_norm": 1.0194092807297004, "learning_rate": 3.2516302942574794e-07, "loss": 0.2667, "step": 3594 }, { "epoch": 1.842644797539723, "grad_norm": 0.9685302999969773, "learning_rate": 3.2306643363605893e-07, "loss": 0.2586, "step": 3595 }, { "epoch": 1.8431573552024603, "grad_norm": 0.9863043687473774, "learning_rate": 3.2097650799059464e-07, "loss": 0.2764, "step": 3596 }, { "epoch": 1.8436699128651974, "grad_norm": 0.9574633510985229, "learning_rate": 3.1889325392990613e-07, "loss": 0.2785, "step": 3597 }, { "epoch": 1.8441824705279344, "grad_norm": 0.9451717244541422, "learning_rate": 3.1681667288994353e-07, "loss": 0.2493, "step": 3598 }, { "epoch": 1.8446950281906713, "grad_norm": 0.9954398238307275, "learning_rate": 3.147467663020598e-07, "loss": 0.273, "step": 3599 }, { "epoch": 1.8452075858534085, "grad_norm": 0.9597600139374586, "learning_rate": 3.1268353559300467e-07, "loss": 0.2702, "step": 3600 }, { "epoch": 1.8457201435161457, "grad_norm": 0.9151130320126769, "learning_rate": 3.106269821849273e-07, "loss": 0.2825, "step": 3601 }, { "epoch": 1.8462327011788826, "grad_norm": 0.8903592194037608, "learning_rate": 3.0857710749537585e-07, "loss": 0.2392, "step": 3602 }, { "epoch": 1.8467452588416196, "grad_norm": 0.9394880376950947, "learning_rate": 3.065339129372935e-07, "loss": 0.2692, "step": 3603 }, { "epoch": 1.8472578165043567, "grad_norm": 0.8356230784057495, "learning_rate": 3.0449739991901703e-07, "loss": 0.2267, "step": 3604 }, { "epoch": 1.8477703741670939, "grad_norm": 0.9918258852405846, "learning_rate": 3.024675698442858e-07, "loss": 0.2691, "step": 3605 }, { "epoch": 1.8482829318298308, "grad_norm": 0.8991557492805653, "learning_rate": 3.0044442411222066e-07, "loss": 0.256, "step": 3606 }, { "epoch": 1.8487954894925678, "grad_norm": 0.9819407030192625, "learning_rate": 2.9842796411734263e-07, "loss": 0.2523, "step": 3607 }, { "epoch": 1.849308047155305, "grad_norm": 0.9145450881522216, "learning_rate": 2.964181912495656e-07, "loss": 0.2519, "step": 3608 }, { "epoch": 1.8498206048180421, "grad_norm": 0.9277427757667279, "learning_rate": 2.9441510689418806e-07, "loss": 0.2649, "step": 3609 }, { "epoch": 1.850333162480779, "grad_norm": 0.979328981510254, "learning_rate": 2.9241871243190555e-07, "loss": 0.2922, "step": 3610 }, { "epoch": 1.850845720143516, "grad_norm": 0.9291460391612071, "learning_rate": 2.9042900923879515e-07, "loss": 0.254, "step": 3611 }, { "epoch": 1.8513582778062532, "grad_norm": 0.9411454656695925, "learning_rate": 2.884459986863264e-07, "loss": 0.2534, "step": 3612 }, { "epoch": 1.8518708354689903, "grad_norm": 0.9752144650751763, "learning_rate": 2.864696821413548e-07, "loss": 0.2914, "step": 3613 }, { "epoch": 1.8523833931317273, "grad_norm": 0.9664209178536676, "learning_rate": 2.845000609661208e-07, "loss": 0.3009, "step": 3614 }, { "epoch": 1.8528959507944642, "grad_norm": 0.9151208656728105, "learning_rate": 2.825371365182505e-07, "loss": 0.2348, "step": 3615 }, { "epoch": 1.8534085084572014, "grad_norm": 0.9796676247167413, "learning_rate": 2.805809101507539e-07, "loss": 0.2581, "step": 3616 }, { "epoch": 1.8539210661199386, "grad_norm": 0.9859546391678442, "learning_rate": 2.786313832120258e-07, "loss": 0.2775, "step": 3617 }, { "epoch": 1.8544336237826755, "grad_norm": 1.0133235110388856, "learning_rate": 2.7668855704583997e-07, "loss": 0.2689, "step": 3618 }, { "epoch": 1.8549461814454125, "grad_norm": 0.96756894223705, "learning_rate": 2.7475243299135315e-07, "loss": 0.2706, "step": 3619 }, { "epoch": 1.8554587391081496, "grad_norm": 0.8506673734657793, "learning_rate": 2.7282301238310326e-07, "loss": 0.2354, "step": 3620 }, { "epoch": 1.8559712967708868, "grad_norm": 0.9443953781149766, "learning_rate": 2.7090029655100434e-07, "loss": 0.2852, "step": 3621 }, { "epoch": 1.8564838544336237, "grad_norm": 0.9196935384189636, "learning_rate": 2.689842868203563e-07, "loss": 0.2447, "step": 3622 }, { "epoch": 1.8569964120963607, "grad_norm": 0.9503084937116443, "learning_rate": 2.670749845118281e-07, "loss": 0.2734, "step": 3623 }, { "epoch": 1.8575089697590978, "grad_norm": 0.8826903447449403, "learning_rate": 2.651723909414694e-07, "loss": 0.2751, "step": 3624 }, { "epoch": 1.858021527421835, "grad_norm": 0.9045932576161188, "learning_rate": 2.6327650742070665e-07, "loss": 0.2593, "step": 3625 }, { "epoch": 1.858534085084572, "grad_norm": 0.9730685442849686, "learning_rate": 2.6138733525633896e-07, "loss": 0.2711, "step": 3626 }, { "epoch": 1.859046642747309, "grad_norm": 0.9377126836806541, "learning_rate": 2.595048757505392e-07, "loss": 0.2643, "step": 3627 }, { "epoch": 1.859559200410046, "grad_norm": 0.8771200959466298, "learning_rate": 2.576291302008571e-07, "loss": 0.2539, "step": 3628 }, { "epoch": 1.8600717580727832, "grad_norm": 0.9770756786917077, "learning_rate": 2.5576009990020856e-07, "loss": 0.2825, "step": 3629 }, { "epoch": 1.8605843157355202, "grad_norm": 0.9390503020265398, "learning_rate": 2.5389778613688744e-07, "loss": 0.255, "step": 3630 }, { "epoch": 1.8610968733982574, "grad_norm": 0.9638122885155476, "learning_rate": 2.5204219019455243e-07, "loss": 0.2699, "step": 3631 }, { "epoch": 1.8616094310609945, "grad_norm": 0.954308824894963, "learning_rate": 2.5019331335223383e-07, "loss": 0.2554, "step": 3632 }, { "epoch": 1.8621219887237315, "grad_norm": 0.9242900082108088, "learning_rate": 2.483511568843322e-07, "loss": 0.2525, "step": 3633 }, { "epoch": 1.8626345463864684, "grad_norm": 0.8395019019560666, "learning_rate": 2.46515722060614e-07, "loss": 0.1978, "step": 3634 }, { "epoch": 1.8631471040492056, "grad_norm": 0.9198532557866488, "learning_rate": 2.4468701014621177e-07, "loss": 0.2526, "step": 3635 }, { "epoch": 1.8636596617119427, "grad_norm": 0.9090235918637206, "learning_rate": 2.42865022401626e-07, "loss": 0.2514, "step": 3636 }, { "epoch": 1.8641722193746797, "grad_norm": 0.8799231774267293, "learning_rate": 2.4104976008272217e-07, "loss": 0.2423, "step": 3637 }, { "epoch": 1.8646847770374166, "grad_norm": 0.9357683317265568, "learning_rate": 2.392412244407294e-07, "loss": 0.2739, "step": 3638 }, { "epoch": 1.8651973347001538, "grad_norm": 1.0816808436596659, "learning_rate": 2.3743941672223825e-07, "loss": 0.2907, "step": 3639 }, { "epoch": 1.865709892362891, "grad_norm": 0.9234297846832527, "learning_rate": 2.3564433816920528e-07, "loss": 0.2642, "step": 3640 }, { "epoch": 1.866222450025628, "grad_norm": 0.9799057423639022, "learning_rate": 2.3385599001894965e-07, "loss": 0.2983, "step": 3641 }, { "epoch": 1.8667350076883649, "grad_norm": 0.900437584859864, "learning_rate": 2.3207437350414418e-07, "loss": 0.2486, "step": 3642 }, { "epoch": 1.867247565351102, "grad_norm": 0.9692689047868095, "learning_rate": 2.30299489852831e-07, "loss": 0.2734, "step": 3643 }, { "epoch": 1.8677601230138392, "grad_norm": 0.9254991904570841, "learning_rate": 2.2853134028840594e-07, "loss": 0.2506, "step": 3644 }, { "epoch": 1.8682726806765761, "grad_norm": 0.9849096104709659, "learning_rate": 2.2676992602962078e-07, "loss": 0.2818, "step": 3645 }, { "epoch": 1.868785238339313, "grad_norm": 1.0529618228533524, "learning_rate": 2.2501524829059208e-07, "loss": 0.2733, "step": 3646 }, { "epoch": 1.8692977960020503, "grad_norm": 0.9586331805564784, "learning_rate": 2.2326730828078792e-07, "loss": 0.2772, "step": 3647 }, { "epoch": 1.8698103536647874, "grad_norm": 0.919833481831629, "learning_rate": 2.215261072050323e-07, "loss": 0.2488, "step": 3648 }, { "epoch": 1.8703229113275244, "grad_norm": 0.9374266992277548, "learning_rate": 2.1979164626350745e-07, "loss": 0.2774, "step": 3649 }, { "epoch": 1.8708354689902613, "grad_norm": 0.9698063293458825, "learning_rate": 2.180639266517448e-07, "loss": 0.295, "step": 3650 }, { "epoch": 1.8713480266529985, "grad_norm": 0.8611761486309093, "learning_rate": 2.1634294956063284e-07, "loss": 0.2312, "step": 3651 }, { "epoch": 1.8718605843157357, "grad_norm": 0.9456197368818521, "learning_rate": 2.1462871617641379e-07, "loss": 0.2729, "step": 3652 }, { "epoch": 1.8723731419784726, "grad_norm": 0.913729130390938, "learning_rate": 2.129212276806769e-07, "loss": 0.2467, "step": 3653 }, { "epoch": 1.8728856996412095, "grad_norm": 0.9512780684330451, "learning_rate": 2.1122048525036409e-07, "loss": 0.2805, "step": 3654 }, { "epoch": 1.8733982573039467, "grad_norm": 0.9692484677165567, "learning_rate": 2.0952649005777093e-07, "loss": 0.2882, "step": 3655 }, { "epoch": 1.8739108149666839, "grad_norm": 0.9627095741211192, "learning_rate": 2.0783924327053562e-07, "loss": 0.2588, "step": 3656 }, { "epoch": 1.8744233726294208, "grad_norm": 0.9364594316568086, "learning_rate": 2.061587460516501e-07, "loss": 0.2575, "step": 3657 }, { "epoch": 1.8749359302921578, "grad_norm": 0.9850686678527139, "learning_rate": 2.0448499955945223e-07, "loss": 0.2652, "step": 3658 }, { "epoch": 1.875448487954895, "grad_norm": 2.649791265139643, "learning_rate": 2.0281800494762803e-07, "loss": 0.2559, "step": 3659 }, { "epoch": 1.875961045617632, "grad_norm": 0.8708341107096714, "learning_rate": 2.0115776336520622e-07, "loss": 0.2226, "step": 3660 }, { "epoch": 1.876473603280369, "grad_norm": 0.9681905080407486, "learning_rate": 1.9950427595656352e-07, "loss": 0.2595, "step": 3661 }, { "epoch": 1.876986160943106, "grad_norm": 0.938369303171435, "learning_rate": 1.9785754386142164e-07, "loss": 0.2674, "step": 3662 }, { "epoch": 1.8774987186058432, "grad_norm": 0.9991768926988145, "learning_rate": 1.962175682148426e-07, "loss": 0.256, "step": 3663 }, { "epoch": 1.8780112762685803, "grad_norm": 0.9278832516503077, "learning_rate": 1.9458435014723443e-07, "loss": 0.2507, "step": 3664 }, { "epoch": 1.8785238339313173, "grad_norm": 0.9273334097128542, "learning_rate": 1.9295789078434658e-07, "loss": 0.2418, "step": 3665 }, { "epoch": 1.8790363915940542, "grad_norm": 0.8771971362773185, "learning_rate": 1.9133819124727003e-07, "loss": 0.2532, "step": 3666 }, { "epoch": 1.8795489492567914, "grad_norm": 0.9387915028510723, "learning_rate": 1.8972525265243512e-07, "loss": 0.2652, "step": 3667 }, { "epoch": 1.8800615069195286, "grad_norm": 0.9200178025125502, "learning_rate": 1.881190761116125e-07, "loss": 0.2609, "step": 3668 }, { "epoch": 1.8805740645822655, "grad_norm": 0.892449071142075, "learning_rate": 1.8651966273191103e-07, "loss": 0.2411, "step": 3669 }, { "epoch": 1.8810866222450024, "grad_norm": 0.9167261972037986, "learning_rate": 1.8492701361578326e-07, "loss": 0.2549, "step": 3670 }, { "epoch": 1.8815991799077396, "grad_norm": 0.9507091981472987, "learning_rate": 1.8334112986100994e-07, "loss": 0.2808, "step": 3671 }, { "epoch": 1.8821117375704768, "grad_norm": 0.8913908421327487, "learning_rate": 1.8176201256071556e-07, "loss": 0.2343, "step": 3672 }, { "epoch": 1.8826242952332137, "grad_norm": 0.9231454903143947, "learning_rate": 1.8018966280336058e-07, "loss": 0.2646, "step": 3673 }, { "epoch": 1.8831368528959507, "grad_norm": 0.8564384385776808, "learning_rate": 1.7862408167273472e-07, "loss": 0.221, "step": 3674 }, { "epoch": 1.8836494105586878, "grad_norm": 0.9159434576090891, "learning_rate": 1.7706527024796805e-07, "loss": 0.267, "step": 3675 }, { "epoch": 1.884161968221425, "grad_norm": 0.9453172251772071, "learning_rate": 1.7551322960352334e-07, "loss": 0.2693, "step": 3676 }, { "epoch": 1.884674525884162, "grad_norm": 0.8964844433021213, "learning_rate": 1.7396796080919599e-07, "loss": 0.2541, "step": 3677 }, { "epoch": 1.885187083546899, "grad_norm": 0.9701853173175721, "learning_rate": 1.724294649301095e-07, "loss": 0.289, "step": 3678 }, { "epoch": 1.885699641209636, "grad_norm": 0.9149558274481937, "learning_rate": 1.708977430267278e-07, "loss": 0.253, "step": 3679 }, { "epoch": 1.8862121988723732, "grad_norm": 0.9268011706823843, "learning_rate": 1.6937279615483748e-07, "loss": 0.2578, "step": 3680 }, { "epoch": 1.8867247565351102, "grad_norm": 0.8974627536960192, "learning_rate": 1.6785462536555775e-07, "loss": 0.2642, "step": 3681 }, { "epoch": 1.8872373141978471, "grad_norm": 0.8522044870792566, "learning_rate": 1.6634323170533928e-07, "loss": 0.2404, "step": 3682 }, { "epoch": 1.8877498718605843, "grad_norm": 0.9105887902476856, "learning_rate": 1.6483861621595875e-07, "loss": 0.2566, "step": 3683 }, { "epoch": 1.8882624295233215, "grad_norm": 0.9824044891209252, "learning_rate": 1.6334077993452324e-07, "loss": 0.2625, "step": 3684 }, { "epoch": 1.8887749871860584, "grad_norm": 0.9617065349492755, "learning_rate": 1.6184972389346355e-07, "loss": 0.2448, "step": 3685 }, { "epoch": 1.8892875448487954, "grad_norm": 0.8228349910763592, "learning_rate": 1.6036544912054087e-07, "loss": 0.232, "step": 3686 }, { "epoch": 1.8898001025115325, "grad_norm": 0.9400250161498134, "learning_rate": 1.5888795663883904e-07, "loss": 0.2758, "step": 3687 }, { "epoch": 1.8903126601742697, "grad_norm": 0.9277137828567127, "learning_rate": 1.57417247466769e-07, "loss": 0.2796, "step": 3688 }, { "epoch": 1.8908252178370066, "grad_norm": 0.9511291951282229, "learning_rate": 1.559533226180654e-07, "loss": 0.2735, "step": 3689 }, { "epoch": 1.8913377754997436, "grad_norm": 0.9900042329708354, "learning_rate": 1.544961831017855e-07, "loss": 0.2504, "step": 3690 }, { "epoch": 1.8918503331624807, "grad_norm": 0.937834932606782, "learning_rate": 1.5304582992231255e-07, "loss": 0.2486, "step": 3691 }, { "epoch": 1.892362890825218, "grad_norm": 0.9675662439335486, "learning_rate": 1.5160226407934797e-07, "loss": 0.2744, "step": 3692 }, { "epoch": 1.8928754484879549, "grad_norm": 0.8741024224519184, "learning_rate": 1.5016548656791697e-07, "loss": 0.2473, "step": 3693 }, { "epoch": 1.8933880061506918, "grad_norm": 0.9882091129726905, "learning_rate": 1.487354983783673e-07, "loss": 0.2637, "step": 3694 }, { "epoch": 1.893900563813429, "grad_norm": 0.9447759397814305, "learning_rate": 1.4731230049636503e-07, "loss": 0.2746, "step": 3695 }, { "epoch": 1.8944131214761661, "grad_norm": 0.9023477230840312, "learning_rate": 1.4589589390289427e-07, "loss": 0.2327, "step": 3696 }, { "epoch": 1.894925679138903, "grad_norm": 0.9829201826225131, "learning_rate": 1.4448627957426186e-07, "loss": 0.2701, "step": 3697 }, { "epoch": 1.89543823680164, "grad_norm": 0.9402858026671327, "learning_rate": 1.430834584820895e-07, "loss": 0.2547, "step": 3698 }, { "epoch": 1.8959507944643772, "grad_norm": 0.9874500032539495, "learning_rate": 1.41687431593317e-07, "loss": 0.2818, "step": 3699 }, { "epoch": 1.8964633521271144, "grad_norm": 0.9052587549422164, "learning_rate": 1.4029819987020466e-07, "loss": 0.2645, "step": 3700 }, { "epoch": 1.8969759097898513, "grad_norm": 0.9625127445264544, "learning_rate": 1.3891576427032317e-07, "loss": 0.3019, "step": 3701 }, { "epoch": 1.8974884674525883, "grad_norm": 0.8848624928613569, "learning_rate": 1.375401257465625e-07, "loss": 0.2436, "step": 3702 }, { "epoch": 1.8980010251153254, "grad_norm": 0.9315806971590554, "learning_rate": 1.3617128524712643e-07, "loss": 0.2823, "step": 3703 }, { "epoch": 1.8985135827780626, "grad_norm": 1.0147030939700703, "learning_rate": 1.348092437155346e-07, "loss": 0.2852, "step": 3704 }, { "epoch": 1.8990261404407995, "grad_norm": 0.9215282524010056, "learning_rate": 1.334540020906172e-07, "loss": 0.2591, "step": 3705 }, { "epoch": 1.8995386981035365, "grad_norm": 0.9578038746159558, "learning_rate": 1.3210556130652031e-07, "loss": 0.256, "step": 3706 }, { "epoch": 1.9000512557662739, "grad_norm": 0.8893698416711485, "learning_rate": 1.3076392229270041e-07, "loss": 0.2633, "step": 3707 }, { "epoch": 1.9005638134290108, "grad_norm": 0.9631250944071408, "learning_rate": 1.2942908597392555e-07, "loss": 0.2679, "step": 3708 }, { "epoch": 1.9010763710917478, "grad_norm": 0.9753431801116019, "learning_rate": 1.2810105327027754e-07, "loss": 0.2877, "step": 3709 }, { "epoch": 1.901588928754485, "grad_norm": 0.9366501989859286, "learning_rate": 1.2677982509714415e-07, "loss": 0.2625, "step": 3710 }, { "epoch": 1.902101486417222, "grad_norm": 0.8971412941202875, "learning_rate": 1.2546540236522575e-07, "loss": 0.254, "step": 3711 }, { "epoch": 1.902614044079959, "grad_norm": 1.0820275137969273, "learning_rate": 1.2415778598053207e-07, "loss": 0.2888, "step": 3712 }, { "epoch": 1.903126601742696, "grad_norm": 0.8765116552450722, "learning_rate": 1.2285697684438104e-07, "loss": 0.2284, "step": 3713 }, { "epoch": 1.9036391594054332, "grad_norm": 0.9180408677705975, "learning_rate": 1.2156297585339872e-07, "loss": 0.2607, "step": 3714 }, { "epoch": 1.9041517170681703, "grad_norm": 0.9062959380557909, "learning_rate": 1.20275783899515e-07, "loss": 0.2701, "step": 3715 }, { "epoch": 1.9046642747309073, "grad_norm": 1.0246097017950202, "learning_rate": 1.1899540186997127e-07, "loss": 0.2762, "step": 3716 }, { "epoch": 1.9051768323936442, "grad_norm": 0.9436312794864099, "learning_rate": 1.1772183064731268e-07, "loss": 0.2699, "step": 3717 }, { "epoch": 1.9056893900563814, "grad_norm": 0.8994937720670283, "learning_rate": 1.1645507110938925e-07, "loss": 0.244, "step": 3718 }, { "epoch": 1.9062019477191186, "grad_norm": 0.8684813104032907, "learning_rate": 1.1519512412935807e-07, "loss": 0.2529, "step": 3719 }, { "epoch": 1.9067145053818555, "grad_norm": 0.8873342445079927, "learning_rate": 1.1394199057567667e-07, "loss": 0.2362, "step": 3720 }, { "epoch": 1.9072270630445924, "grad_norm": 0.9084041834219726, "learning_rate": 1.1269567131211079e-07, "loss": 0.2566, "step": 3721 }, { "epoch": 1.9077396207073296, "grad_norm": 0.9235734894446314, "learning_rate": 1.1145616719772545e-07, "loss": 0.2453, "step": 3722 }, { "epoch": 1.9082521783700668, "grad_norm": 0.9337400165059935, "learning_rate": 1.1022347908689057e-07, "loss": 0.2739, "step": 3723 }, { "epoch": 1.9087647360328037, "grad_norm": 0.9102770160900139, "learning_rate": 1.0899760782927649e-07, "loss": 0.2582, "step": 3724 }, { "epoch": 1.9092772936955407, "grad_norm": 0.9421851300910742, "learning_rate": 1.0777855426985395e-07, "loss": 0.261, "step": 3725 }, { "epoch": 1.9097898513582778, "grad_norm": 0.9128991450031978, "learning_rate": 1.0656631924889749e-07, "loss": 0.2462, "step": 3726 }, { "epoch": 1.910302409021015, "grad_norm": 0.8811719413121996, "learning_rate": 1.0536090360198092e-07, "loss": 0.2498, "step": 3727 }, { "epoch": 1.910814966683752, "grad_norm": 0.8648007751229417, "learning_rate": 1.0416230815997297e-07, "loss": 0.2303, "step": 3728 }, { "epoch": 1.911327524346489, "grad_norm": 0.9797978047799294, "learning_rate": 1.029705337490472e-07, "loss": 0.2833, "step": 3729 }, { "epoch": 1.911840082009226, "grad_norm": 0.8411513286141747, "learning_rate": 1.0178558119067316e-07, "loss": 0.2397, "step": 3730 }, { "epoch": 1.9123526396719632, "grad_norm": 0.961168753730892, "learning_rate": 1.0060745130161753e-07, "loss": 0.2553, "step": 3731 }, { "epoch": 1.9128651973347002, "grad_norm": 0.878277063268526, "learning_rate": 9.943614489394626e-08, "loss": 0.2497, "step": 3732 }, { "epoch": 1.9133777549974371, "grad_norm": 0.9677425782164956, "learning_rate": 9.827166277501909e-08, "loss": 0.2946, "step": 3733 }, { "epoch": 1.9138903126601743, "grad_norm": 0.9427464634884898, "learning_rate": 9.711400574749507e-08, "loss": 0.2697, "step": 3734 }, { "epoch": 1.9144028703229115, "grad_norm": 0.9058517443012387, "learning_rate": 9.596317460932703e-08, "loss": 0.2462, "step": 3735 }, { "epoch": 1.9149154279856484, "grad_norm": 0.8065174422274445, "learning_rate": 9.481917015376262e-08, "loss": 0.2239, "step": 3736 }, { "epoch": 1.9154279856483853, "grad_norm": 0.949037019666526, "learning_rate": 9.368199316934446e-08, "loss": 0.2431, "step": 3737 }, { "epoch": 1.9159405433111225, "grad_norm": 1.0260905063284265, "learning_rate": 9.255164443990994e-08, "loss": 0.2725, "step": 3738 }, { "epoch": 1.9164531009738597, "grad_norm": 0.913828027915596, "learning_rate": 9.142812474458806e-08, "loss": 0.2555, "step": 3739 }, { "epoch": 1.9169656586365966, "grad_norm": 0.9169836622914946, "learning_rate": 9.031143485780159e-08, "loss": 0.2963, "step": 3740 }, { "epoch": 1.9174782162993336, "grad_norm": 0.9312814784726389, "learning_rate": 8.92015755492659e-08, "loss": 0.2393, "step": 3741 }, { "epoch": 1.9179907739620707, "grad_norm": 0.9681371907486106, "learning_rate": 8.809854758399017e-08, "loss": 0.2217, "step": 3742 }, { "epoch": 1.918503331624808, "grad_norm": 1.0463784686547244, "learning_rate": 8.700235172226845e-08, "loss": 0.2806, "step": 3743 }, { "epoch": 1.9190158892875449, "grad_norm": 0.9654895994145357, "learning_rate": 8.591298871969411e-08, "loss": 0.2662, "step": 3744 }, { "epoch": 1.9195284469502818, "grad_norm": 0.9483088176832972, "learning_rate": 8.483045932714318e-08, "loss": 0.2894, "step": 3745 }, { "epoch": 1.920041004613019, "grad_norm": 0.855350028663181, "learning_rate": 8.375476429078543e-08, "loss": 0.2067, "step": 3746 }, { "epoch": 1.9205535622757561, "grad_norm": 0.9334443301465276, "learning_rate": 8.268590435208002e-08, "loss": 0.2694, "step": 3747 }, { "epoch": 1.921066119938493, "grad_norm": 1.0673415870567362, "learning_rate": 8.162388024777202e-08, "loss": 0.3118, "step": 3748 }, { "epoch": 1.92157867760123, "grad_norm": 1.0161060518932876, "learning_rate": 8.05686927098981e-08, "loss": 0.2899, "step": 3749 }, { "epoch": 1.9220912352639672, "grad_norm": 0.9280385157992878, "learning_rate": 7.952034246577977e-08, "loss": 0.257, "step": 3750 }, { "epoch": 1.9226037929267044, "grad_norm": 1.0135852504443006, "learning_rate": 7.847883023802793e-08, "loss": 0.2964, "step": 3751 }, { "epoch": 1.9231163505894413, "grad_norm": 0.9088230247513264, "learning_rate": 7.744415674453721e-08, "loss": 0.2344, "step": 3752 }, { "epoch": 1.9236289082521782, "grad_norm": 0.9821422342699351, "learning_rate": 7.641632269849263e-08, "loss": 0.275, "step": 3753 }, { "epoch": 1.9241414659149154, "grad_norm": 0.8515730865853282, "learning_rate": 7.539532880836087e-08, "loss": 0.2317, "step": 3754 }, { "epoch": 1.9246540235776526, "grad_norm": 0.8717423723419416, "learning_rate": 7.438117577789561e-08, "loss": 0.2374, "step": 3755 }, { "epoch": 1.9251665812403895, "grad_norm": 0.9146377131059429, "learning_rate": 7.33738643061377e-08, "loss": 0.2392, "step": 3756 }, { "epoch": 1.9256791389031265, "grad_norm": 0.898491182193106, "learning_rate": 7.237339508740615e-08, "loss": 0.2469, "step": 3757 }, { "epoch": 1.9261916965658636, "grad_norm": 0.9335819528735578, "learning_rate": 7.137976881130826e-08, "loss": 0.2515, "step": 3758 }, { "epoch": 1.9267042542286008, "grad_norm": 0.8941069871073567, "learning_rate": 7.039298616273393e-08, "loss": 0.2477, "step": 3759 }, { "epoch": 1.9272168118913378, "grad_norm": 0.9346162530926431, "learning_rate": 6.941304782185577e-08, "loss": 0.2894, "step": 3760 }, { "epoch": 1.9277293695540747, "grad_norm": 0.9994377748941553, "learning_rate": 6.843995446412788e-08, "loss": 0.2634, "step": 3761 }, { "epoch": 1.9282419272168119, "grad_norm": 0.9781158192088066, "learning_rate": 6.747370676028819e-08, "loss": 0.2694, "step": 3762 }, { "epoch": 1.928754484879549, "grad_norm": 0.9166192353210597, "learning_rate": 6.651430537635284e-08, "loss": 0.2459, "step": 3763 }, { "epoch": 1.929267042542286, "grad_norm": 1.0075769092443663, "learning_rate": 6.556175097362061e-08, "loss": 0.2793, "step": 3764 }, { "epoch": 1.929779600205023, "grad_norm": 0.8573831808736763, "learning_rate": 6.461604420867296e-08, "loss": 0.213, "step": 3765 }, { "epoch": 1.93029215786776, "grad_norm": 0.9459600432046313, "learning_rate": 6.367718573336845e-08, "loss": 0.2553, "step": 3766 }, { "epoch": 1.9308047155304973, "grad_norm": 0.9521799139668821, "learning_rate": 6.274517619484499e-08, "loss": 0.2889, "step": 3767 }, { "epoch": 1.9313172731932342, "grad_norm": 0.9103414274335606, "learning_rate": 6.182001623552202e-08, "loss": 0.2427, "step": 3768 }, { "epoch": 1.9318298308559712, "grad_norm": 0.9054161627311178, "learning_rate": 6.090170649309502e-08, "loss": 0.259, "step": 3769 }, { "epoch": 1.9323423885187083, "grad_norm": 0.9354012909308758, "learning_rate": 5.999024760054095e-08, "loss": 0.2644, "step": 3770 }, { "epoch": 1.9328549461814455, "grad_norm": 0.9592421030336351, "learning_rate": 5.9085640186111735e-08, "loss": 0.2733, "step": 3771 }, { "epoch": 1.9333675038441824, "grad_norm": 0.9301931056559379, "learning_rate": 5.8187884873338595e-08, "loss": 0.2415, "step": 3772 }, { "epoch": 1.9338800615069194, "grad_norm": 0.9863450853107147, "learning_rate": 5.7296982281026534e-08, "loss": 0.2613, "step": 3773 }, { "epoch": 1.9343926191696565, "grad_norm": 1.0432806467850182, "learning_rate": 5.641293302326323e-08, "loss": 0.2504, "step": 3774 }, { "epoch": 1.9349051768323937, "grad_norm": 0.976346170028349, "learning_rate": 5.5535737709404566e-08, "loss": 0.2711, "step": 3775 }, { "epoch": 1.9354177344951307, "grad_norm": 0.9446847909962358, "learning_rate": 5.466539694408912e-08, "loss": 0.2622, "step": 3776 }, { "epoch": 1.9359302921578676, "grad_norm": 0.9020538119164037, "learning_rate": 5.380191132722701e-08, "loss": 0.2551, "step": 3777 }, { "epoch": 1.9364428498206048, "grad_norm": 0.9285326023638922, "learning_rate": 5.2945281454003236e-08, "loss": 0.2561, "step": 3778 }, { "epoch": 1.936955407483342, "grad_norm": 0.820718341276233, "learning_rate": 5.209550791487883e-08, "loss": 0.2161, "step": 3779 }, { "epoch": 1.9374679651460789, "grad_norm": 0.8896103211781814, "learning_rate": 5.125259129558857e-08, "loss": 0.2424, "step": 3780 }, { "epoch": 1.9379805228088158, "grad_norm": 0.9744199132418813, "learning_rate": 5.041653217713993e-08, "loss": 0.2752, "step": 3781 }, { "epoch": 1.938493080471553, "grad_norm": 0.9020370605576424, "learning_rate": 4.958733113581415e-08, "loss": 0.2496, "step": 3782 }, { "epoch": 1.9390056381342902, "grad_norm": 1.0159875473804574, "learning_rate": 4.876498874316515e-08, "loss": 0.2988, "step": 3783 }, { "epoch": 1.9395181957970271, "grad_norm": 0.9684638969472307, "learning_rate": 4.7949505566018405e-08, "loss": 0.272, "step": 3784 }, { "epoch": 1.9400307534597643, "grad_norm": 0.885079032206187, "learning_rate": 4.714088216647428e-08, "loss": 0.2681, "step": 3785 }, { "epoch": 1.9405433111225014, "grad_norm": 0.9067243720537734, "learning_rate": 4.6339119101902475e-08, "loss": 0.2777, "step": 3786 }, { "epoch": 1.9410558687852384, "grad_norm": 0.9322995029093617, "learning_rate": 4.5544216924944265e-08, "loss": 0.2694, "step": 3787 }, { "epoch": 1.9415684264479753, "grad_norm": 0.9851234414846604, "learning_rate": 4.475617618351136e-08, "loss": 0.2572, "step": 3788 }, { "epoch": 1.9420809841107125, "grad_norm": 0.8934757337476446, "learning_rate": 4.3974997420789257e-08, "loss": 0.2283, "step": 3789 }, { "epoch": 1.9425935417734497, "grad_norm": 0.9259339995781978, "learning_rate": 4.320068117522835e-08, "loss": 0.2589, "step": 3790 }, { "epoch": 1.9431060994361866, "grad_norm": 0.9319151402220102, "learning_rate": 4.243322798055172e-08, "loss": 0.2538, "step": 3791 }, { "epoch": 1.9436186570989236, "grad_norm": 0.97170379267299, "learning_rate": 4.167263836575286e-08, "loss": 0.2746, "step": 3792 }, { "epoch": 1.9441312147616607, "grad_norm": 1.044057476843851, "learning_rate": 4.091891285509353e-08, "loss": 0.307, "step": 3793 }, { "epoch": 1.944643772424398, "grad_norm": 0.9462202347528488, "learning_rate": 4.0172051968101474e-08, "loss": 0.2802, "step": 3794 }, { "epoch": 1.9451563300871348, "grad_norm": 0.9626404012978463, "learning_rate": 3.943205621957713e-08, "loss": 0.2597, "step": 3795 }, { "epoch": 1.9456688877498718, "grad_norm": 0.9755356315900292, "learning_rate": 3.869892611958692e-08, "loss": 0.2945, "step": 3796 }, { "epoch": 1.946181445412609, "grad_norm": 0.8992898071014911, "learning_rate": 3.797266217346329e-08, "loss": 0.2247, "step": 3797 }, { "epoch": 1.9466940030753461, "grad_norm": 0.9355163851757783, "learning_rate": 3.7253264881809137e-08, "loss": 0.2394, "step": 3798 }, { "epoch": 1.947206560738083, "grad_norm": 0.9306489160032287, "learning_rate": 3.654073474049225e-08, "loss": 0.2662, "step": 3799 }, { "epoch": 1.94771911840082, "grad_norm": 0.9378694106361027, "learning_rate": 3.583507224064642e-08, "loss": 0.2635, "step": 3800 }, { "epoch": 1.9482316760635572, "grad_norm": 0.942887991409426, "learning_rate": 3.5136277868674795e-08, "loss": 0.2454, "step": 3801 }, { "epoch": 1.9487442337262944, "grad_norm": 0.8275031263442904, "learning_rate": 3.4444352106242086e-08, "loss": 0.2306, "step": 3802 }, { "epoch": 1.9492567913890313, "grad_norm": 0.9867651478651726, "learning_rate": 3.3759295430281226e-08, "loss": 0.3018, "step": 3803 }, { "epoch": 1.9497693490517682, "grad_norm": 0.9104146298727223, "learning_rate": 3.308110831299116e-08, "loss": 0.2477, "step": 3804 }, { "epoch": 1.9502819067145054, "grad_norm": 0.9011080485982329, "learning_rate": 3.240979122183352e-08, "loss": 0.2456, "step": 3805 }, { "epoch": 1.9507944643772426, "grad_norm": 0.9459921514872032, "learning_rate": 3.174534461953593e-08, "loss": 0.2564, "step": 3806 }, { "epoch": 1.9513070220399795, "grad_norm": 1.0118811948085606, "learning_rate": 3.1087768964092046e-08, "loss": 0.2614, "step": 3807 }, { "epoch": 1.9518195797027165, "grad_norm": 0.8979941015640462, "learning_rate": 3.043706470875374e-08, "loss": 0.2552, "step": 3808 }, { "epoch": 1.9523321373654536, "grad_norm": 0.9275262688321971, "learning_rate": 2.9793232302043342e-08, "loss": 0.2473, "step": 3809 }, { "epoch": 1.9528446950281908, "grad_norm": 0.960505140144728, "learning_rate": 2.915627218774142e-08, "loss": 0.2635, "step": 3810 }, { "epoch": 1.9533572526909277, "grad_norm": 0.905203512491497, "learning_rate": 2.852618480489566e-08, "loss": 0.2728, "step": 3811 }, { "epoch": 1.9538698103536647, "grad_norm": 0.9644294477089852, "learning_rate": 2.7902970587813104e-08, "loss": 0.2603, "step": 3812 }, { "epoch": 1.9543823680164019, "grad_norm": 1.010665033628912, "learning_rate": 2.728662996606457e-08, "loss": 0.2696, "step": 3813 }, { "epoch": 1.954894925679139, "grad_norm": 0.9031767197982937, "learning_rate": 2.667716336448356e-08, "loss": 0.2319, "step": 3814 }, { "epoch": 1.955407483341876, "grad_norm": 0.8734133313331182, "learning_rate": 2.6074571203165143e-08, "loss": 0.2398, "step": 3815 }, { "epoch": 1.955920041004613, "grad_norm": 0.9948513476718462, "learning_rate": 2.547885389746485e-08, "loss": 0.2979, "step": 3816 }, { "epoch": 1.95643259866735, "grad_norm": 1.040777269958223, "learning_rate": 2.4890011858000886e-08, "loss": 0.274, "step": 3817 }, { "epoch": 1.9569451563300873, "grad_norm": 0.893625460633952, "learning_rate": 2.430804549065302e-08, "loss": 0.2226, "step": 3818 }, { "epoch": 1.9574577139928242, "grad_norm": 0.9194914801037795, "learning_rate": 2.373295519655927e-08, "loss": 0.2631, "step": 3819 }, { "epoch": 1.9579702716555611, "grad_norm": 0.9870874230957, "learning_rate": 2.3164741372119215e-08, "loss": 0.2673, "step": 3820 }, { "epoch": 1.9584828293182983, "grad_norm": 0.9652147135879607, "learning_rate": 2.2603404408994e-08, "loss": 0.271, "step": 3821 }, { "epoch": 1.9589953869810355, "grad_norm": 0.940137089174412, "learning_rate": 2.2048944694104123e-08, "loss": 0.2851, "step": 3822 }, { "epoch": 1.9595079446437724, "grad_norm": 0.9726183239319907, "learning_rate": 2.150136260962721e-08, "loss": 0.277, "step": 3823 }, { "epoch": 1.9600205023065094, "grad_norm": 0.832472326881468, "learning_rate": 2.0960658533003554e-08, "loss": 0.2249, "step": 3824 }, { "epoch": 1.9605330599692465, "grad_norm": 0.9354160371379765, "learning_rate": 2.0426832836930588e-08, "loss": 0.2707, "step": 3825 }, { "epoch": 1.9610456176319837, "grad_norm": 0.9420752844992638, "learning_rate": 1.989988588936509e-08, "loss": 0.2561, "step": 3826 }, { "epoch": 1.9615581752947207, "grad_norm": 0.940121436852677, "learning_rate": 1.9379818053523182e-08, "loss": 0.2594, "step": 3827 }, { "epoch": 1.9620707329574576, "grad_norm": 0.9604850450734231, "learning_rate": 1.8866629687879222e-08, "loss": 0.278, "step": 3828 }, { "epoch": 1.9625832906201948, "grad_norm": 1.032815403375389, "learning_rate": 1.8360321146163595e-08, "loss": 0.2868, "step": 3829 }, { "epoch": 1.963095848282932, "grad_norm": 0.9802063103509342, "learning_rate": 1.7860892777367133e-08, "loss": 0.2861, "step": 3830 }, { "epoch": 1.9636084059456689, "grad_norm": 1.0032838312684467, "learning_rate": 1.7368344925736692e-08, "loss": 0.2721, "step": 3831 }, { "epoch": 1.9641209636084058, "grad_norm": 0.9242721486754426, "learning_rate": 1.6882677930777357e-08, "loss": 0.2585, "step": 3832 }, { "epoch": 1.964633521271143, "grad_norm": 0.9040355900224326, "learning_rate": 1.6403892127251352e-08, "loss": 0.2645, "step": 3833 }, { "epoch": 1.9651460789338802, "grad_norm": 0.9858101024408833, "learning_rate": 1.5931987845176912e-08, "loss": 0.2181, "step": 3834 }, { "epoch": 1.965658636596617, "grad_norm": 0.860633293708345, "learning_rate": 1.546696540982939e-08, "loss": 0.2276, "step": 3835 }, { "epoch": 1.966171194259354, "grad_norm": 0.8899363807271942, "learning_rate": 1.50088251417424e-08, "loss": 0.2203, "step": 3836 }, { "epoch": 1.9666837519220912, "grad_norm": 1.0048006093991506, "learning_rate": 1.4557567356702218e-08, "loss": 0.2911, "step": 3837 }, { "epoch": 1.9671963095848284, "grad_norm": 1.0097161850398657, "learning_rate": 1.411319236575337e-08, "loss": 0.2818, "step": 3838 }, { "epoch": 1.9677088672475653, "grad_norm": 0.8536308261330493, "learning_rate": 1.3675700475196395e-08, "loss": 0.2149, "step": 3839 }, { "epoch": 1.9682214249103023, "grad_norm": 0.9625320718398078, "learning_rate": 1.3245091986587855e-08, "loss": 0.2434, "step": 3840 }, { "epoch": 1.9687339825730394, "grad_norm": 0.9767613388087862, "learning_rate": 1.2821367196736988e-08, "loss": 0.2616, "step": 3841 }, { "epoch": 1.9692465402357766, "grad_norm": 0.922688435021621, "learning_rate": 1.2404526397711281e-08, "loss": 0.2522, "step": 3842 }, { "epoch": 1.9697590978985136, "grad_norm": 0.9286320791285935, "learning_rate": 1.1994569876830897e-08, "loss": 0.2584, "step": 3843 }, { "epoch": 1.9702716555612505, "grad_norm": 0.9593988411764477, "learning_rate": 1.1591497916673133e-08, "loss": 0.2764, "step": 3844 }, { "epoch": 1.9707842132239877, "grad_norm": 0.9221520286246826, "learning_rate": 1.119531079506797e-08, "loss": 0.2624, "step": 3845 }, { "epoch": 1.9712967708867248, "grad_norm": 0.9295080816037022, "learning_rate": 1.0806008785100297e-08, "loss": 0.2554, "step": 3846 }, { "epoch": 1.9718093285494618, "grad_norm": 0.8662035046919062, "learning_rate": 1.0423592155108798e-08, "loss": 0.2286, "step": 3847 }, { "epoch": 1.9723218862121987, "grad_norm": 0.8113879253105497, "learning_rate": 1.0048061168688173e-08, "loss": 0.2528, "step": 3848 }, { "epoch": 1.972834443874936, "grad_norm": 0.9074765882051262, "learning_rate": 9.67941608468359e-09, "loss": 0.2452, "step": 3849 }, { "epoch": 1.973347001537673, "grad_norm": 0.9421139811907703, "learning_rate": 9.317657157197347e-09, "loss": 0.2676, "step": 3850 }, { "epoch": 1.97385955920041, "grad_norm": 1.036516403879858, "learning_rate": 8.96278463558331e-09, "loss": 0.2913, "step": 3851 }, { "epoch": 1.974372116863147, "grad_norm": 0.9404965483596931, "learning_rate": 8.61479876444804e-09, "loss": 0.2554, "step": 3852 }, { "epoch": 1.9748846745258841, "grad_norm": 0.9552292073587391, "learning_rate": 8.273699783652999e-09, "loss": 0.2643, "step": 3853 }, { "epoch": 1.9753972321886213, "grad_norm": 0.8575619821694388, "learning_rate": 7.93948792831234e-09, "loss": 0.2288, "step": 3854 }, { "epoch": 1.9759097898513582, "grad_norm": 0.9567571109419976, "learning_rate": 7.612163428791786e-09, "loss": 0.2681, "step": 3855 }, { "epoch": 1.9764223475140952, "grad_norm": 1.0280288495148306, "learning_rate": 7.291726510710861e-09, "loss": 0.2841, "step": 3856 }, { "epoch": 1.9769349051768323, "grad_norm": 0.9731044123925876, "learning_rate": 6.978177394940666e-09, "loss": 0.2694, "step": 3857 }, { "epoch": 1.9774474628395695, "grad_norm": 1.0133365387188478, "learning_rate": 6.671516297606095e-09, "loss": 0.2877, "step": 3858 }, { "epoch": 1.9779600205023065, "grad_norm": 0.965886765731586, "learning_rate": 6.371743430082511e-09, "loss": 0.2631, "step": 3859 }, { "epoch": 1.9784725781650434, "grad_norm": 0.9065559185024261, "learning_rate": 6.078858998999071e-09, "loss": 0.2762, "step": 3860 }, { "epoch": 1.9789851358277806, "grad_norm": 0.902806022604907, "learning_rate": 5.79286320623651e-09, "loss": 0.2671, "step": 3861 }, { "epoch": 1.9794976934905177, "grad_norm": 0.9641885927834338, "learning_rate": 5.513756248924917e-09, "loss": 0.2696, "step": 3862 }, { "epoch": 1.9800102511532547, "grad_norm": 1.0015509112437777, "learning_rate": 5.241538319448181e-09, "loss": 0.2887, "step": 3863 }, { "epoch": 1.9805228088159919, "grad_norm": 0.9783178698037491, "learning_rate": 4.9762096054428724e-09, "loss": 0.3053, "step": 3864 }, { "epoch": 1.981035366478729, "grad_norm": 0.9774891393509072, "learning_rate": 4.717770289794921e-09, "loss": 0.2549, "step": 3865 }, { "epoch": 1.981547924141466, "grad_norm": 0.9447967517493798, "learning_rate": 4.466220550641831e-09, "loss": 0.2564, "step": 3866 }, { "epoch": 1.982060481804203, "grad_norm": 0.9726432203288189, "learning_rate": 4.221560561372684e-09, "loss": 0.2945, "step": 3867 }, { "epoch": 1.98257303946694, "grad_norm": 0.9422816524879933, "learning_rate": 3.983790490628137e-09, "loss": 0.2774, "step": 3868 }, { "epoch": 1.9830855971296772, "grad_norm": 0.9287234419820072, "learning_rate": 3.7529105022970915e-09, "loss": 0.2689, "step": 3869 }, { "epoch": 1.9835981547924142, "grad_norm": 1.0218006827607353, "learning_rate": 3.528920755523357e-09, "loss": 0.2854, "step": 3870 }, { "epoch": 1.9841107124551511, "grad_norm": 0.9082890663174827, "learning_rate": 3.311821404697879e-09, "loss": 0.2347, "step": 3871 }, { "epoch": 1.9846232701178883, "grad_norm": 0.9509043373940446, "learning_rate": 3.1016125994631776e-09, "loss": 0.244, "step": 3872 }, { "epoch": 1.9851358277806255, "grad_norm": 0.8992664427468472, "learning_rate": 2.898294484714459e-09, "loss": 0.23, "step": 3873 }, { "epoch": 1.9856483854433624, "grad_norm": 0.8201347877858165, "learning_rate": 2.701867200592956e-09, "loss": 0.2329, "step": 3874 }, { "epoch": 1.9861609431060994, "grad_norm": 0.8553737967658408, "learning_rate": 2.512330882494807e-09, "loss": 0.2339, "step": 3875 }, { "epoch": 1.9866735007688365, "grad_norm": 0.9227705360106256, "learning_rate": 2.329685661063286e-09, "loss": 0.2542, "step": 3876 }, { "epoch": 1.9871860584315737, "grad_norm": 0.894225640771016, "learning_rate": 2.1539316621921326e-09, "loss": 0.2464, "step": 3877 }, { "epoch": 1.9876986160943106, "grad_norm": 0.9111348527966869, "learning_rate": 1.9850690070266633e-09, "loss": 0.2612, "step": 3878 }, { "epoch": 1.9882111737570476, "grad_norm": 0.9400359251402866, "learning_rate": 1.823097811959329e-09, "loss": 0.2943, "step": 3879 }, { "epoch": 1.9887237314197848, "grad_norm": 0.8478599368012838, "learning_rate": 1.6680181886352676e-09, "loss": 0.2215, "step": 3880 }, { "epoch": 1.989236289082522, "grad_norm": 0.9103256207415227, "learning_rate": 1.519830243948972e-09, "loss": 0.246, "step": 3881 }, { "epoch": 1.9897488467452589, "grad_norm": 0.8592853628117347, "learning_rate": 1.378534080042071e-09, "loss": 0.2304, "step": 3882 }, { "epoch": 1.9902614044079958, "grad_norm": 0.9771945543058378, "learning_rate": 1.244129794307769e-09, "loss": 0.2577, "step": 3883 }, { "epoch": 1.990773962070733, "grad_norm": 1.0100783805455715, "learning_rate": 1.1166174793908468e-09, "loss": 0.2852, "step": 3884 }, { "epoch": 1.9912865197334702, "grad_norm": 1.0264271702183903, "learning_rate": 9.959972231798898e-10, "loss": 0.2774, "step": 3885 }, { "epoch": 1.991799077396207, "grad_norm": 0.8660522253379558, "learning_rate": 8.822691088195001e-10, "loss": 0.2464, "step": 3886 }, { "epoch": 1.992311635058944, "grad_norm": 0.9020137085440756, "learning_rate": 7.75433214699195e-10, "loss": 0.2525, "step": 3887 }, { "epoch": 1.9928241927216812, "grad_norm": 0.9047708273801515, "learning_rate": 6.754896144589573e-10, "loss": 0.2614, "step": 3888 }, { "epoch": 1.9933367503844184, "grad_norm": 0.8906108559874527, "learning_rate": 5.824383769892361e-10, "loss": 0.2312, "step": 3889 }, { "epoch": 1.9938493080471553, "grad_norm": 0.9537497487977468, "learning_rate": 4.962795664265052e-10, "loss": 0.2478, "step": 3890 }, { "epoch": 1.9943618657098923, "grad_norm": 0.9818019532402315, "learning_rate": 4.170132421610351e-10, "loss": 0.2728, "step": 3891 }, { "epoch": 1.9948744233726294, "grad_norm": 1.009441152030565, "learning_rate": 3.446394588280111e-10, "loss": 0.2754, "step": 3892 }, { "epoch": 1.9953869810353666, "grad_norm": 0.9505726454276209, "learning_rate": 2.791582663141945e-10, "loss": 0.2343, "step": 3893 }, { "epoch": 1.9958995386981035, "grad_norm": 0.9690525561485118, "learning_rate": 2.2056970975459223e-10, "loss": 0.2407, "step": 3894 }, { "epoch": 1.9964120963608405, "grad_norm": 0.9133451998370993, "learning_rate": 1.688738295335668e-10, "loss": 0.2296, "step": 3895 }, { "epoch": 1.9969246540235777, "grad_norm": 0.9518716368692047, "learning_rate": 1.2407066128372613e-10, "loss": 0.266, "step": 3896 }, { "epoch": 1.9974372116863148, "grad_norm": 0.9265254511607127, "learning_rate": 8.61602358859237e-11, "loss": 0.2683, "step": 3897 }, { "epoch": 1.9979497693490518, "grad_norm": 0.9366098859499025, "learning_rate": 5.514257947369928e-11, "loss": 0.276, "step": 3898 }, { "epoch": 1.9984623270117887, "grad_norm": 0.9875458808290456, "learning_rate": 3.101771342550741e-11, "loss": 0.259, "step": 3899 }, { "epoch": 1.9989748846745259, "grad_norm": 0.8855172903134064, "learning_rate": 1.3785654370268575e-11, "loss": 0.2426, "step": 3900 }, { "epoch": 1.999487442337263, "grad_norm": 0.853207703347426, "learning_rate": 3.4464141862589064e-12, "loss": 0.2326, "step": 3901 }, { "epoch": 2.0, "grad_norm": 0.9279656812881262, "learning_rate": 0.0, "loss": 0.2621, "step": 3902 }, { "epoch": 2.0, "step": 3902, "total_flos": 1.1427703377362944e+16, "train_loss": 0.38839334746711013, "train_runtime": 37429.8041, "train_samples_per_second": 13.343, "train_steps_per_second": 0.104 } ], "logging_steps": 1.0, "max_steps": 3902, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1427703377362944e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }